arize-phoenix 4.12.1rc1__tar.gz → 4.15.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (293) hide show
  1. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/PKG-INFO +10 -6
  2. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/README.md +2 -0
  3. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/pyproject.toml +15 -12
  4. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/bulk_inserter.py +5 -4
  5. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/engines.py +2 -1
  6. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/evaluators/base.py +4 -0
  7. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/evaluators/code_evaluators.py +80 -0
  8. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/evaluators/llm_evaluators.py +77 -1
  9. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/evaluators/utils.py +70 -21
  10. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/functions.py +17 -16
  11. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/context.py +5 -3
  12. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/__init__.py +2 -0
  13. arize_phoenix-4.15.0/src/phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  14. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +2 -4
  15. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +2 -4
  16. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -4
  17. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/document_evaluations.py +2 -4
  18. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +2 -4
  19. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +2 -4
  20. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +2 -4
  21. arize_phoenix-4.15.0/src/phoenix/server/api/dataloaders/experiment_error_rates.py +61 -0
  22. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/experiment_run_counts.py +20 -9
  23. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -4
  24. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +2 -3
  25. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +2 -4
  26. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/project_by_name.py +3 -3
  27. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/record_counts.py +2 -4
  28. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/span_annotations.py +2 -4
  29. arize_phoenix-4.15.0/src/phoenix/server/api/dataloaders/span_dataset_examples.py +36 -0
  30. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/span_descendants.py +2 -4
  31. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/span_evaluations.py +2 -4
  32. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/span_projects.py +3 -3
  33. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/token_counts.py +2 -4
  34. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/trace_evaluations.py +2 -4
  35. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/trace_row_ids.py +2 -4
  36. arize_phoenix-4.12.1rc1/src/phoenix/server/api/input_types/CreateSpanAnnotationsInput.py → arize_phoenix-4.15.0/src/phoenix/server/api/input_types/CreateSpanAnnotationInput.py +4 -2
  37. arize_phoenix-4.12.1rc1/src/phoenix/server/api/input_types/CreateTraceAnnotationsInput.py → arize_phoenix-4.15.0/src/phoenix/server/api/input_types/CreateTraceAnnotationInput.py +4 -2
  38. arize_phoenix-4.12.1rc1/src/phoenix/server/api/input_types/PatchAnnotationsInput.py → arize_phoenix-4.15.0/src/phoenix/server/api/input_types/PatchAnnotationInput.py +4 -2
  39. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/span_annotations_mutations.py +20 -9
  40. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/trace_annotations_mutations.py +20 -9
  41. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/datasets.py +132 -10
  42. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/evaluations.py +3 -5
  43. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/experiments.py +1 -1
  44. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Experiment.py +2 -2
  45. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Inferences.py +1 -2
  46. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Model.py +1 -2
  47. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Span.py +5 -0
  48. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/utils.py +4 -4
  49. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/app.py +21 -18
  50. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/grpc_server.py +2 -2
  51. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/main.py +5 -9
  52. arize_phoenix-4.15.0/src/phoenix/server/static/.vite/manifest.json +78 -0
  53. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/components-C8sm_r1F.js → arize_phoenix-4.15.0/src/phoenix/server/static/assets/components-kGgeFkHp.js +150 -110
  54. arize_phoenix-4.15.0/src/phoenix/server/static/assets/index-BctFO6S7.js +100 -0
  55. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/pages-bN7juCjh.js → arize_phoenix-4.15.0/src/phoenix/server/static/assets/pages-DabDCmVd.js +432 -255
  56. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-CUDAPm8e.js → arize_phoenix-4.15.0/src/phoenix/server/static/assets/vendor-CP0b0YG0.js +2 -2
  57. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-arizeai-Do2HOmcL.js → arize_phoenix-4.15.0/src/phoenix/server/static/assets/vendor-arizeai-B5Hti8OB.js +27 -27
  58. arize_phoenix-4.15.0/src/phoenix/server/static/assets/vendor-codemirror-DtdPDzrv.js +15 -0
  59. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-recharts-PKRvByVe.js → arize_phoenix-4.15.0/src/phoenix/server/static/assets/vendor-recharts-A0DA1O99.js +1 -1
  60. arize_phoenix-4.15.0/src/phoenix/server/types.py +18 -0
  61. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/session/client.py +9 -6
  62. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/session/session.py +2 -2
  63. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/dsl/filter.py +40 -25
  64. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/fixtures.py +17 -23
  65. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/utils.py +23 -0
  66. arize_phoenix-4.15.0/src/phoenix/utilities/client.py +116 -0
  67. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/project.py +1 -1
  68. arize_phoenix-4.15.0/src/phoenix/version.py +1 -0
  69. arize_phoenix-4.12.1rc1/src/phoenix/server/api/dataloaders/average_experiment_run_latency.py +0 -54
  70. arize_phoenix-4.12.1rc1/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
  71. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/dataset_examples.py +0 -157
  72. arize_phoenix-4.12.1rc1/src/phoenix/server/static/.vite/manifest.json +0 -78
  73. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
  74. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-codemirror-CrdxOlMs.js +0 -12
  75. arize_phoenix-4.12.1rc1/src/phoenix/version.py +0 -1
  76. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/.gitignore +0 -0
  77. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/IP_NOTICE +0 -0
  78. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/LICENSE +0 -0
  79. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
  80. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
  81. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
  82. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
  83. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
  84. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
  85. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
  86. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/frontend/requirements.txt +0 -0
  87. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
  88. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/__init__.py +0 -0
  89. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/config.py +0 -0
  90. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/core/__init__.py +0 -0
  91. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/core/embedding_dimension.py +0 -0
  92. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/core/model.py +0 -0
  93. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/core/model_schema.py +0 -0
  94. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/core/model_schema_adapter.py +0 -0
  95. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/datetime_utils.py +0 -0
  96. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/README.md +0 -0
  97. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/__init__.py +0 -0
  98. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/alembic.ini +0 -0
  99. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/helpers.py +0 -0
  100. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/insertion/__init__.py +0 -0
  101. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/insertion/dataset.py +0 -0
  102. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/insertion/evaluation.py +0 -0
  103. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/insertion/helpers.py +0 -0
  104. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/insertion/span.py +0 -0
  105. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrate.py +0 -0
  106. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrations/__init__.py +0 -0
  107. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrations/env.py +0 -0
  108. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrations/script.py.mako +0 -0
  109. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrations/types.py +0 -0
  110. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -0
  111. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
  112. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/db/models.py +0 -0
  113. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/exceptions.py +0 -0
  114. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/__init__.py +0 -0
  115. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/evaluators/__init__.py +0 -0
  116. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/tracing.py +0 -0
  117. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/types.py +0 -0
  118. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/experiments/utils.py +0 -0
  119. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/inferences/__init__.py +0 -0
  120. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/inferences/errors.py +0 -0
  121. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/inferences/fixtures.py +0 -0
  122. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/inferences/inferences.py +0 -0
  123. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/inferences/schema.py +0 -0
  124. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/inferences/validation.py +0 -0
  125. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/README.md +0 -0
  126. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/__init__.py +0 -0
  127. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/binning.py +0 -0
  128. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/metrics.py +0 -0
  129. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/mixins.py +0 -0
  130. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  131. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/timeseries.py +0 -0
  132. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/metrics/wrappers.py +0 -0
  133. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/pointcloud/__init__.py +0 -0
  134. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/pointcloud/clustering.py +0 -0
  135. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/pointcloud/pointcloud.py +0 -0
  136. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/pointcloud/projectors.py +0 -0
  137. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  138. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/py.typed +0 -0
  139. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/__init__.py +0 -0
  140. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/__init__.py +0 -0
  141. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
  142. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
  143. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/helpers/__init__.py +0 -0
  144. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/helpers/dataset_helpers.py +0 -0
  145. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
  146. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
  147. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/ClearProjectInput.py +0 -0
  148. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  149. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  150. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
  151. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  152. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
  153. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
  154. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
  155. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DeleteAnnotationsInput.py +0 -0
  156. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
  157. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
  158. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
  159. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  160. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  161. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  162. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
  163. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
  164. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  165. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  166. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  167. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/input_types/__init__.py +0 -0
  168. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/interceptor.py +0 -0
  169. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/__init__.py +0 -0
  170. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/auth.py +0 -0
  171. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
  172. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
  173. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
  174. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/mutations/project_mutations.py +0 -0
  175. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/openapi/__init__.py +0 -0
  176. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/openapi/main.py +0 -0
  177. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/openapi/schema.py +0 -0
  178. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/queries.py +0 -0
  179. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/__init__.py +0 -0
  180. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/utils.py +0 -0
  181. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/__init__.py +0 -0
  182. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/experiment_evaluations.py +0 -0
  183. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/experiment_runs.py +0 -0
  184. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/pydantic_compat.py +0 -0
  185. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/spans.py +0 -0
  186. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/traces.py +0 -0
  187. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/routers/v1/utils.py +0 -0
  188. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/schema.py +0 -0
  189. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Annotation.py +0 -0
  190. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
  191. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Cluster.py +0 -0
  192. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
  193. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  194. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Dataset.py +0 -0
  195. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DatasetExample.py +0 -0
  196. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
  197. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  198. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
  199. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Dimension.py +0 -0
  200. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  201. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  202. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DimensionType.py +0 -0
  203. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  204. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  205. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  206. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  207. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  208. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Evaluation.py +0 -0
  209. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  210. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Event.py +0 -0
  211. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  212. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
  213. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
  214. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
  215. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ExperimentRun.py +0 -0
  216. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +0 -0
  217. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  218. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Functionality.py +0 -0
  219. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/InferencesRole.py +0 -0
  220. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/MimeType.py +0 -0
  221. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/NumericRange.py +0 -0
  222. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  223. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Project.py +0 -0
  224. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  225. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Retrieval.py +0 -0
  226. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  227. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Segments.py +0 -0
  228. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/SortDir.py +0 -0
  229. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/SpanAnnotation.py +0 -0
  230. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  231. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/Trace.py +0 -0
  232. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/TraceAnnotation.py +0 -0
  233. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  234. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  235. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  236. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/__init__.py +0 -0
  237. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/node.py +0 -0
  238. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/api/types/pagination.py +0 -0
  239. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/openapi/__init__.py +0 -0
  240. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/prometheus.py +0 -0
  241. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  242. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  243. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  244. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  245. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  246. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  247. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  248. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  249. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/assets/vendor-DxkFTwjz.css +0 -0
  250. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/assets/vendor-three-DwGkEfCM.js +0 -0
  251. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/favicon.ico +0 -0
  252. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/static/modernizr.js +0 -0
  253. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/telemetry.py +0 -0
  254. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/templates/__init__.py +0 -0
  255. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/templates/index.html +0 -0
  256. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/server/thread_server.py +0 -0
  257. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/services.py +0 -0
  258. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/session/__init__.py +0 -0
  259. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/session/data_extractor.py +0 -0
  260. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/session/evaluation.py +0 -0
  261. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/settings.py +0 -0
  262. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/__init__.py +0 -0
  263. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/attributes.py +0 -0
  264. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/dsl/README.md +0 -0
  265. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/dsl/__init__.py +0 -0
  266. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/dsl/helpers.py +0 -0
  267. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/dsl/query.py +0 -0
  268. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/errors.py +0 -0
  269. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/evaluation_conventions.py +0 -0
  270. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/exporter.py +0 -0
  271. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/langchain/__init__.py +0 -0
  272. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  273. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/llama_index/__init__.py +0 -0
  274. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/llama_index/callback.py +0 -0
  275. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/openai/__init__.py +0 -0
  276. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/openai/instrumentor.py +0 -0
  277. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/otel.py +0 -0
  278. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/projects.py +0 -0
  279. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/schemas.py +0 -0
  280. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/span_evaluations.py +0 -0
  281. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/span_json_decoder.py +0 -0
  282. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/span_json_encoder.py +0 -0
  283. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/trace_dataset.py +0 -0
  284. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/v1/__init__.py +0 -0
  285. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  286. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  287. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/__init__.py +0 -0
  288. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/deprecation.py +0 -0
  289. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/error_handling.py +0 -0
  290. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/json.py +0 -0
  291. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/logging.py +0 -0
  292. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/re.py +0 -0
  293. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.15.0}/src/phoenix/utilities/span_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.12.1rc1
3
+ Version: 4.15.0
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -48,7 +48,7 @@ Requires-Dist: scipy
48
48
  Requires-Dist: sqlalchemy[asyncio]<3,>=2.0.4
49
49
  Requires-Dist: sqlean-py>=3.45.1
50
50
  Requires-Dist: starlette
51
- Requires-Dist: strawberry-graphql==0.235.0
51
+ Requires-Dist: strawberry-graphql==0.236.0
52
52
  Requires-Dist: tqdm
53
53
  Requires-Dist: typing-extensions>=4.5; python_version < '3.12'
54
54
  Requires-Dist: typing-extensions>=4.6; python_version >= '3.12'
@@ -65,11 +65,12 @@ Requires-Dist: opentelemetry-sdk; extra == 'container'
65
65
  Requires-Dist: opentelemetry-semantic-conventions; extra == 'container'
66
66
  Requires-Dist: prometheus-client; extra == 'container'
67
67
  Requires-Dist: py-grpc-prometheus; extra == 'container'
68
- Requires-Dist: strawberry-graphql[opentelemetry]==0.235.0; extra == 'container'
68
+ Requires-Dist: strawberry-graphql[opentelemetry]==0.236.0; extra == 'container'
69
69
  Requires-Dist: uvloop; (platform_system != 'Windows') and extra == 'container'
70
70
  Provides-Extra: dev
71
71
  Requires-Dist: anthropic; extra == 'dev'
72
72
  Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
73
+ Requires-Dist: asgi-lifespan; extra == 'dev'
73
74
  Requires-Dist: asyncpg; extra == 'dev'
74
75
  Requires-Dist: gcsfs; extra == 'dev'
75
76
  Requires-Dist: google-cloud-aiplatform>=1.3; extra == 'dev'
@@ -78,6 +79,7 @@ Requires-Dist: jupyter; extra == 'dev'
78
79
  Requires-Dist: langchain>=0.0.334; extra == 'dev'
79
80
  Requires-Dist: litellm>=1.0.3; extra == 'dev'
80
81
  Requires-Dist: llama-index>=0.10.3; extra == 'dev'
82
+ Requires-Dist: mypy==1.11.0; extra == 'dev'
81
83
  Requires-Dist: nbqa; extra == 'dev'
82
84
  Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
83
85
  Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
@@ -88,9 +90,9 @@ Requires-Dist: psycopg[binary]; extra == 'dev'
88
90
  Requires-Dist: pytest-asyncio; extra == 'dev'
89
91
  Requires-Dist: pytest-cov; extra == 'dev'
90
92
  Requires-Dist: pytest-postgresql; extra == 'dev'
91
- Requires-Dist: pytest==8.2.2; extra == 'dev'
92
- Requires-Dist: ruff==0.4.9; extra == 'dev'
93
- Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.235.0; extra == 'dev'
93
+ Requires-Dist: pytest==8.3.1; extra == 'dev'
94
+ Requires-Dist: ruff==0.5.4; extra == 'dev'
95
+ Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.236.0; extra == 'dev'
94
96
  Requires-Dist: tabulate; extra == 'dev'
95
97
  Requires-Dist: types-tabulate; extra == 'dev'
96
98
  Provides-Extra: evals
@@ -138,6 +140,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
138
140
 
139
141
  - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
140
142
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
143
+ - **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
144
+ - **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
141
145
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
142
146
 
143
147
  Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
@@ -31,6 +31,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
31
31
 
32
32
  - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
33
33
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
34
+ - **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
35
+ - **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
34
36
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
35
37
 
36
38
  Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
@@ -31,7 +31,7 @@ dependencies = [
31
31
  "starlette",
32
32
  "uvicorn",
33
33
  "psutil",
34
- "strawberry-graphql==0.235.0", # need to pin version because we're monkey-patching
34
+ "strawberry-graphql==0.236.0", # need to pin version because we're monkey-patching
35
35
  "pyarrow",
36
36
  "typing-extensions>=4.5; python_version<'3.12'",
37
37
  # A minimum version of typing-extensions==4.6.0 is needed to avoid this issue on Python 3.12: https://github.com/Azure/azure-sdk-for-python/issues/33442#issuecomment-1847886784
@@ -70,19 +70,20 @@ dev = [
70
70
  "hatch",
71
71
  "jupyter",
72
72
  "nbqa",
73
- "ruff==0.4.9",
73
+ "ruff==0.5.4",
74
+ "mypy==1.11.0",
74
75
  "pandas>=1.0",
75
76
  "tabulate", # used by DataFrame.to_markdown()
76
77
  "types-tabulate",
77
78
  "pandas-stubs==2.2.2.240603; python_version>='3.9'",
78
79
  "pandas-stubs==2.0.3.230814; python_version<'3.9'",
79
- "pytest==8.2.2",
80
+ "pytest==8.3.1",
80
81
  "pytest-asyncio",
81
82
  "pytest-cov",
82
83
  "pytest-postgresql",
83
84
  "asyncpg",
84
85
  "psycopg[binary]",
85
- "strawberry-graphql[debug-server,opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
86
+ "strawberry-graphql[debug-server,opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
86
87
  "pre-commit",
87
88
  "arize[AutoEmbeddings, LLM_Evaluation]",
88
89
  "llama-index>=0.10.3",
@@ -91,6 +92,7 @@ dev = [
91
92
  "google-cloud-aiplatform>=1.3",
92
93
  "anthropic",
93
94
  "prometheus_client",
95
+ "asgi-lifespan",
94
96
  ]
95
97
  evals = []
96
98
  experimental = []
@@ -114,7 +116,7 @@ container = [
114
116
  "opentelemetry-instrumentation-sqlalchemy",
115
117
  "opentelemetry-instrumentation-grpc",
116
118
  "py-grpc-prometheus",
117
- "strawberry-graphql[opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
119
+ "strawberry-graphql[opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
118
120
  "uvloop; platform_system != 'Windows'",
119
121
  ]
120
122
 
@@ -147,7 +149,7 @@ dependencies = [
147
149
  "numpy",
148
150
  "pandas==2.2.2; python_version>='3.9'",
149
151
  "pandas==1.4.0; python_version<'3.9'",
150
- "pytest==8.2.2",
152
+ "pytest==8.3.1",
151
153
  "pytest-asyncio",
152
154
  "pytest-cov",
153
155
  "pytest-postgresql",
@@ -168,11 +170,12 @@ dependencies = [
168
170
  "respx", # For OpenAI testing
169
171
  "nest-asyncio", # for executor testing
170
172
  "astunparse; python_version<'3.9'", # `ast.unparse(...)` is only available starting with Python 3.9
173
+ "asgi-lifespan",
171
174
  ]
172
175
 
173
176
  [tool.hatch.envs.type]
174
177
  dependencies = [
175
- "mypy==1.10.0",
178
+ "mypy==1.11.0",
176
179
  "tenacity",
177
180
  "pandas>=1.0",
178
181
  "pandas-stubs==2.0.3.230814",
@@ -194,7 +197,7 @@ dependencies = [
194
197
  "opentelemetry-instrumentation-sqlalchemy",
195
198
  "opentelemetry-instrumentation-grpc",
196
199
  "py-grpc-prometheus",
197
- "strawberry-graphql[opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
200
+ "strawberry-graphql[opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
198
201
  "requests", # this is needed to type-check third-party packages
199
202
  "pydantic==1.10.17; python_version=='3.8'", # lower minor versions of pydantic break strawberry mypy plugin
200
203
  "pydantic==1.10.17; python_version=='3.9'", # lower minor versions of pydantic break strawberry mypy plugin
@@ -207,7 +210,7 @@ python = ["3.8", "3.9", "3.12"]
207
210
  [tool.hatch.envs.style]
208
211
  detached = true
209
212
  dependencies = [
210
- "ruff==0.4.9",
213
+ "ruff==0.5.4",
211
214
  ]
212
215
 
213
216
  [[tool.hatch.envs.style.matrix]]
@@ -289,11 +292,11 @@ dependencies = [
289
292
 
290
293
  [tool.hatch.envs.publish.scripts]
291
294
  testpypi = [
292
- #"check-wheel-contents dist/",
295
+ "check-wheel-contents dist/",
293
296
  "twine upload --verbose --repository testpypi dist/*",
294
297
  ]
295
298
  pypi = [
296
- #"check-wheel-contents dist/",
299
+ "check-wheel-contents dist/",
297
300
  "twine upload --verbose dist/*",
298
301
  ]
299
302
 
@@ -304,7 +307,7 @@ check = [
304
307
 
305
308
  [tool.hatch.envs.gql]
306
309
  dependencies = [
307
- "strawberry-graphql[cli]==0.235.0", # need to pin version because we're monkey-patching
310
+ "strawberry-graphql[cli]==0.236.0", # need to pin version because we're monkey-patching
308
311
  "requests",
309
312
  ]
310
313
 
@@ -7,7 +7,6 @@ from itertools import islice
7
7
  from time import perf_counter
8
8
  from typing import (
9
9
  Any,
10
- AsyncContextManager,
11
10
  Awaitable,
12
11
  Callable,
13
12
  Iterable,
@@ -19,7 +18,6 @@ from typing import (
19
18
  )
20
19
 
21
20
  from cachetools import LRUCache
22
- from sqlalchemy.ext.asyncio import AsyncSession
23
21
  from typing_extensions import TypeAlias
24
22
 
25
23
  import phoenix.trace.v1 as pb
@@ -31,6 +29,7 @@ from phoenix.db.insertion.evaluation import (
31
29
  from phoenix.db.insertion.helpers import DataManipulation, DataManipulationEvent
32
30
  from phoenix.db.insertion.span import SpanInsertionEvent, insert_span
33
31
  from phoenix.server.api.dataloaders import CacheForDataLoaders
32
+ from phoenix.server.types import DbSessionFactory
34
33
  from phoenix.trace.schemas import Span
35
34
 
36
35
  logger = logging.getLogger(__name__)
@@ -46,7 +45,7 @@ class TransactionResult:
46
45
  class BulkInserter:
47
46
  def __init__(
48
47
  self,
49
- db: Callable[[], AsyncContextManager[AsyncSession]],
48
+ db: DbSessionFactory,
50
49
  *,
51
50
  cache_for_dataloaders: Optional[CacheForDataLoaders] = None,
52
51
  initial_batch_of_operations: Iterable[DataManipulation] = (),
@@ -105,8 +104,10 @@ class BulkInserter:
105
104
  )
106
105
 
107
106
  async def __aexit__(self, *args: Any) -> None:
108
- self._operations = None
109
107
  self._running = False
108
+ if self._task:
109
+ self._task.cancel()
110
+ self._task = None
110
111
 
111
112
  def _enqueue_operation(self, operation: DataManipulation) -> None:
112
113
  cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
@@ -8,7 +8,7 @@ from typing import Any
8
8
  import aiosqlite
9
9
  import numpy as np
10
10
  import sqlean
11
- from sqlalchemy import URL, event, make_url
11
+ from sqlalchemy import URL, StaticPool, event, make_url
12
12
  from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
13
13
  from typing_extensions import assert_never
14
14
 
@@ -105,6 +105,7 @@ def aio_sqlite_engine(
105
105
  echo=echo,
106
106
  json_serializer=_dumps,
107
107
  async_creator=async_creator,
108
+ poolclass=StaticPool,
108
109
  )
109
110
  event.listen(engine.sync_engine, "connect", set_sqlite_pragma)
110
111
  if not migrate:
@@ -90,11 +90,15 @@ class Evaluator(ABC):
90
90
  if super_cls in (LLMEvaluator, Evaluator):
91
91
  break
92
92
  if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
93
+ if isinstance(evaluate, classmethod):
94
+ evaluate = evaluate.__func__
93
95
  assert callable(evaluate), "`evaluate()` method should be callable"
94
96
  # need to remove the first param, i.e. `self`
95
97
  _validate_sig(functools.partial(evaluate, None), "evaluate")
96
98
  return
97
99
  if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
100
+ if isinstance(async_evaluate, classmethod):
101
+ async_evaluate = async_evaluate.__func__
98
102
  assert callable(async_evaluate), "`async_evaluate()` method should be callable"
99
103
  # need to remove the first param, i.e. `self`
100
104
  _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
@@ -9,6 +9,19 @@ from phoenix.experiments.types import EvaluationResult, TaskOutput
9
9
 
10
10
 
11
11
  class JSONParsable(CodeEvaluator):
12
+ """
13
+ An evaluator that checks if the output of an experiment run is a JSON-parsable string.
14
+
15
+ Example:
16
+
17
+ .. code-block:: python
18
+ from phoenix.experiments import run_experiment
19
+ from phoenix.experiments.evaluators import JSONParsable
20
+
21
+ run_experiment(dataset, task, evaluators=[JSONParsable])
22
+ """
23
+
24
+ @classmethod
12
25
  def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
13
26
  assert isinstance(output, str), "Experiment run output must be a string"
14
27
  try:
@@ -22,6 +35,22 @@ class JSONParsable(CodeEvaluator):
22
35
 
23
36
 
24
37
  class ContainsKeyword(CodeEvaluator):
38
+ """
39
+ An evaluator that checks if a keyword is present in the output of an experiment run.
40
+
41
+ Args:
42
+ keyword (str): The keyword to search for in the output.
43
+ name (str, optional): An optional name for the evaluator. Defaults to "Contains(<keyword>)".
44
+
45
+ Example:
46
+
47
+ .. code-block:: python
48
+ from phoenix.experiments import run_experiment
49
+ from phoenix.experiments.evaluators import ContainsKeyword
50
+
51
+ run_experiment(dataset, task, evaluators=[ContainsKeyword("foo")])
52
+ """
53
+
25
54
  def __init__(self, keyword: str, name: Optional[str] = None) -> None:
26
55
  self.keyword = keyword
27
56
  self._name = name or f"Contains({repr(keyword)})"
@@ -39,6 +68,23 @@ class ContainsKeyword(CodeEvaluator):
39
68
 
40
69
 
41
70
  class ContainsAnyKeyword(CodeEvaluator):
71
+ """
72
+ An evaluator that checks if any of the keywords are present in the output of an experiment run.
73
+
74
+ Args:
75
+ keywords (List[str]): The keywords to search for in the output.
76
+ name (str, optional): An optional name for the evaluator. Defaults to
77
+ "ContainsAny(<keywords>)".
78
+
79
+ Example:
80
+
81
+ .. code-block:: python
82
+ from phoenix.experiments import run_experiment
83
+ from phoenix.experiments.evaluators import ContainsAnyKeyword
84
+
85
+ run_experiment(dataset, task, evaluators=[ContainsAnyKeyword(["foo", "bar"])])
86
+ """
87
+
42
88
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
43
89
  self.keywords = keywords
44
90
  self._name = name or f"ContainsAny({keywords})"
@@ -57,6 +103,23 @@ class ContainsAnyKeyword(CodeEvaluator):
57
103
 
58
104
 
59
105
  class ContainsAllKeywords(CodeEvaluator):
106
+ """
107
+ An evaluator that checks if all of the keywords are present in the output of an experiment run.
108
+
109
+ Args:
110
+ keywords (List[str]): The keywords to search for in the output.
111
+ name (str, optional): An optional name for the evaluator. Defaults to
112
+ "ContainsAll(<keywords>)".
113
+
114
+ Example:
115
+ .. code-block:: python
116
+
117
+ from phoenix.experiments import run_experiment
118
+ from phoenix.experiments.evaluators import ContainsAllKeywords
119
+
120
+ run_experiment(dataset, task, evaluators=[ContainsAllKeywords(["foo", "bar"])])
121
+ """
122
+
60
123
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
61
124
  self.keywords = keywords
62
125
  self._name = name or f"ContainsAll({keywords})"
@@ -77,6 +140,23 @@ class ContainsAllKeywords(CodeEvaluator):
77
140
 
78
141
 
79
142
  class MatchesRegex(CodeEvaluator):
143
+ r"""
144
+ An experiment evaluator that checks if the output of an experiment run matches a regex pattern.
145
+
146
+ Args:
147
+ pattern (Union[str, re.Pattern[str]]): The regex pattern to match the output against.
148
+ name (str, optional): An optional name for the evaluator. Defaults to "matches_({pattern})".
149
+
150
+ Example:
151
+ .. code-block:: python
152
+
153
+ from phoenix.experiments import run_experiment
154
+ from phoenix.experiments.evaluators import MatchesRegex
155
+
156
+ phone_number_evaluator = MatchesRegex(r"\d{3}-\d{3}-\d{4}", name="valid-phone-number")
157
+ run_experiment(dataset, task, evaluators=[phone_number_evaluator])
158
+ """
159
+
80
160
  def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
81
161
  if isinstance(pattern, str):
82
162
  pattern = re.compile(pattern)
@@ -18,6 +18,31 @@ from phoenix.experiments.types import (
18
18
 
19
19
 
20
20
  class LLMCriteriaEvaluator(LLMEvaluator):
21
+ """
22
+ An experiment evaluator that uses an LLM to evaluate whether the text meets a custom criteria.
23
+
24
+ This evaluator uses the chain-of-thought technique to perform a binary evaluation of text based
25
+ on a custom criteria and description. When used as an experiment evaluator,
26
+ `LLMCriteriaEvaluator` will return a score of 1.0 if the text meets the criteria and a score of
27
+ 0.0 if not. The explanation produced by the chain-of-thought technique will be included in the
28
+ experiment evaluation as well.
29
+
30
+ Example criteria and descriptions:
31
+ - "thoughtfulness" - "shows careful consideration and fair judgement"
32
+ - "clarity" - "is easy to understand and follow"
33
+ - "professionalism" - "is respectful and appropriate for a formal setting"
34
+
35
+ Args:
36
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
37
+ the `phoenix.evals` module.
38
+ criteria: The criteria to evaluate the text against, the criteria should be able to be used
39
+ as a noun in a sentence.
40
+ description (str): A description of the criteria, used to clarify instructions to the LLM.
41
+ The description should complete this sentence: "{criteria} means the text
42
+ {description}".
43
+ name (str): The name of the evaluator
44
+ """
45
+
21
46
  _base_template = (
22
47
  "Determine if the following text is {criteria}. {description}"
23
48
  "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -117,6 +142,14 @@ ConcisenessEvaluator = criteria_evaluator_factory(
117
142
  description="is just a few sentences and easy to follow",
118
143
  default_name="Conciseness",
119
144
  )
145
+ """
146
+ An experiment evaluator that uses an LLM to evaluate whether the text is concise.
147
+
148
+ Args:
149
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
150
+ the `phoenix.evals` module.
151
+ name (str, optional): The name of the evaluator, defaults to "Conciseness".
152
+ """
120
153
 
121
154
 
122
155
  HelpfulnessEvaluator = criteria_evaluator_factory(
@@ -125,6 +158,14 @@ HelpfulnessEvaluator = criteria_evaluator_factory(
125
158
  description="provides useful information",
126
159
  default_name="Helpfulness",
127
160
  )
161
+ """
162
+ An experiment evaluator that uses an LLM to evaluate whether the text is helpful.
163
+
164
+ Args:
165
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
166
+ the `phoenix.evals` module.
167
+ name (str, optional): The name of the evaluator, defaults to "Helpfulness".
168
+ """
128
169
 
129
170
 
130
171
  CoherenceEvaluator = criteria_evaluator_factory(
@@ -133,6 +174,14 @@ CoherenceEvaluator = criteria_evaluator_factory(
133
174
  description="is coherent, well-structured, and logically sound",
134
175
  default_name="Coherence",
135
176
  )
177
+ """
178
+ An experiment evaluator that uses an LLM to evaluate whether the text is coherent.
179
+
180
+ Args:
181
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
182
+ the `phoenix.evals` module.
183
+ name (str, optional): The name of the evaluator, defaults to "Coherence".
184
+ """
136
185
 
137
186
 
138
187
  def _parse_label_from_explanation(raw_string: str) -> str:
@@ -149,6 +198,33 @@ def _parse_label_from_explanation(raw_string: str) -> str:
149
198
 
150
199
 
151
200
  class RelevanceEvaluator(LLMEvaluator):
201
+ """
202
+ An experiment evaluator that uses an LLM to evaluate whether a response is relevant to a query.
203
+
204
+ This evaluator uses the chain-of-thought technique to perform a binary evaluation of whether
205
+ the output "response" of an experiment is relevant to its input "query". When used as an
206
+ experiment evaluator, `RelevanceEvaluator` will return a score of 1.0 if the response is
207
+ relevant to the query and a score of 0.0 if not. The explanation produced by the
208
+ chain-of-thought technique will be included in the experiment evaluation as well.
209
+
210
+ Optionally, you can provide custom functions to extract the query and response from the input
211
+ and output of the experiment task. By default, the evaluator will use the dataset example as
212
+ the input and the output of the experiment task as the response.
213
+
214
+ Args:
215
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
216
+ the `phoenix.evals` module.
217
+ get_query (callable, optional): A function that extracts the query from the input of the
218
+ experiment task. The function should take the input and metadata of the dataset example
219
+ and return a string. By default, the function will return the string representation of
220
+ the input.
221
+ get_response (callable, optional): A function that extracts the response from the output of
222
+ the experiment task. The function should take the output and metadata of the experiment
223
+ task and return a string. By default, the function will return the string representation
224
+ of the output.
225
+ name (str, optional): The name of the evaluator. Defaults to "Relevance".
226
+ """
227
+
152
228
  template = (
153
229
  "Determine if the following response is relevant to the query. In this context, "
154
230
  "'relevance' means that the response directly addresses the core question or topic of the "
@@ -174,7 +250,7 @@ class RelevanceEvaluator(LLMEvaluator):
174
250
  model: LLMBaseModel,
175
251
  get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
176
252
  get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
177
- name: str = "RelevanceEvaluator",
253
+ name: str = "Relevance",
178
254
  ):
179
255
  self.model = model
180
256
  self._name = name
@@ -1,6 +1,5 @@
1
1
  import functools
2
2
  import inspect
3
- from itertools import chain, islice, repeat
4
3
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
5
4
 
6
5
  from phoenix.experiments.types import (
@@ -75,6 +74,72 @@ def create_evaluator(
75
74
  name: Optional[str] = None,
76
75
  scorer: Optional[Callable[[Any], EvaluationResult]] = None,
77
76
  ) -> Callable[[Callable[..., Any]], "Evaluator"]:
77
+ """
78
+ A decorator that configures a sync or async function to be used as an experiment evaluator.
79
+
80
+ If the `evaluator` is a function of one argument then that argument will be
81
+ bound to the `output` of an experiment task. Alternatively, the `evaluator` can be a function
82
+ of any combination of specific argument names that will be bound to special values:
83
+ `input`: The input field of the dataset example
84
+ `output`: The output of an experiment task
85
+ `expected`: The expected or reference output of the dataset example
86
+ `reference`: An alias for `expected`
87
+ `metadata`: Metadata associated with the dataset example
88
+
89
+ Args:
90
+ kind (str | AnnotatorKind): Broadly indicates how the evaluator scores an experiment run.
91
+ Valid kinds are: "CODE", "LLM". Defaults to "CODE".
92
+ name (str, optional): The name of the evaluator. If not provided, the name of the function
93
+ will be used.
94
+ scorer (callable, optional): An optional function that converts the output of the wrapped
95
+ function into an `EvaluationResult`. This allows configuring the evaluation
96
+ payload by setting a label, score and explanation. By default, numeric outputs will
97
+ be recorded as scores, boolean outputs will be recorded as scores and labels, and
98
+ string outputs will be recorded as labels. If the output is a 2-tuple, the first item
99
+ will be recorded as the score and the second item will recorded as the explanation.
100
+
101
+ Examples:
102
+ Configuring an evaluator that returns a boolean
103
+
104
+ .. code-block:: python
105
+ @create_evaluator(kind="CODE", name="exact-match)
106
+ def match(output: str, expected: str) -> bool:
107
+ return output == expected
108
+
109
+ Configuring an evaluator that returns a label
110
+
111
+ .. code-block:: python
112
+ client = openai.Client()
113
+
114
+ @create_evaluator(kind="LLM")
115
+ def label(output: str) -> str:
116
+ res = client.chat.completions.create(
117
+ model = "gpt-4",
118
+ messages = [
119
+ {
120
+ "role": "user",
121
+ "content": (
122
+ "in one word, characterize the sentiment of the following customer "
123
+ f"request: {output}"
124
+ )
125
+ },
126
+ ],
127
+ )
128
+ label = res.choices[0].message.content
129
+ return label
130
+
131
+ Configuring an evaluator that returns a score and explanation
132
+
133
+ .. code-block:: python
134
+ from textdistance import levenshtein
135
+
136
+ @create_evaluator(kind="CODE", name="levenshtein-distance")
137
+ def ld(output: str, expected: str) -> Tuple[float, str]:
138
+ return (
139
+ levenshtein(output, expected),
140
+ f"Levenshtein distance between {output} and {expected}"
141
+ )
142
+ """
78
143
  if scorer is None:
79
144
  scorer = _default_eval_scorer
80
145
 
@@ -163,24 +228,8 @@ def _default_eval_scorer(result: Any) -> EvaluationResult:
163
228
  return EvaluationResult(score=float(result))
164
229
  if isinstance(result, str):
165
230
  return EvaluationResult(label=result)
166
- if isinstance(result, (tuple, list)) and 0 < len(result) <= 3:
167
- # Possible interpretations are:
168
- # - 3-tuple: (Score, Label, Explanation)
169
- # - 2-tuple: (Score, Explanation) or (Label, Explanation)
170
- # - 1-tuple: (Score, ) or (Label, )
171
- # Note that (Score, Label) conflicts with (Score, Explanation) and we
172
- # pick the latter because it's probably more prevalent. To get
173
- # (Score, Label), use a 3-tuple instead, i.e. (Score, Label, None).
174
- a, b, c = islice(chain(result, repeat(None)), 3)
175
- score, label, explanation = None, a, b
176
- if hasattr(a, "__float__"):
177
- try:
178
- score = float(a)
179
- except ValueError:
180
- pass
181
- else:
182
- label, explanation = (None, b) if len(result) < 3 else (b, c)
183
- return EvaluationResult(score=score, label=label, explanation=explanation)
184
- if result is None:
185
- return EvaluationResult(score=0)
231
+ if isinstance(result, (tuple, list)) and len(result) == 2:
232
+ # If the result is a 2-tuple, the first item will be recorded as the score
233
+ # and the second item will recorded as the explanation.
234
+ return EvaluationResult(score=float(result[0]), explanation=str(result[1]))
186
235
  raise ValueError(f"Unsupported evaluation result type: {type(result)}")