arize-phoenix 4.12.1rc1__tar.gz → 4.14.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (298) hide show
  1. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/PKG-INFO +12 -9
  2. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/README.md +2 -0
  3. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/pyproject.toml +22 -27
  4. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/bulk_inserter.py +3 -1
  5. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/base.py +4 -0
  6. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/code_evaluators.py +80 -0
  7. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/llm_evaluators.py +77 -1
  8. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/utils.py +70 -21
  9. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/functions.py +14 -14
  10. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/context.py +7 -3
  11. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/average_experiment_run_latency.py +23 -23
  12. arize_phoenix-4.14.1/src/phoenix/server/api/dataloaders/experiment_error_rates.py +63 -0
  13. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/experiment_run_counts.py +18 -5
  14. arize_phoenix-4.12.1rc1/src/phoenix/server/api/input_types/CreateSpanAnnotationsInput.py → arize_phoenix-4.14.1/src/phoenix/server/api/input_types/CreateSpanAnnotationInput.py +4 -2
  15. arize_phoenix-4.12.1rc1/src/phoenix/server/api/input_types/CreateTraceAnnotationsInput.py → arize_phoenix-4.14.1/src/phoenix/server/api/input_types/CreateTraceAnnotationInput.py +4 -2
  16. arize_phoenix-4.12.1rc1/src/phoenix/server/api/input_types/PatchAnnotationsInput.py → arize_phoenix-4.14.1/src/phoenix/server/api/input_types/PatchAnnotationInput.py +4 -2
  17. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/span_annotations_mutations.py +12 -6
  18. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/trace_annotations_mutations.py +12 -6
  19. arize_phoenix-4.14.1/src/phoenix/server/api/openapi/main.py +6 -0
  20. arize_phoenix-4.14.1/src/phoenix/server/api/openapi/schema.py +16 -0
  21. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/__init__.py +89 -0
  22. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/dataset_examples.py +178 -0
  23. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/datasets.py +506 -390
  24. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/evaluations.py +66 -73
  25. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/experiment_evaluations.py +136 -0
  26. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/experiment_runs.py +217 -0
  27. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/experiments.py +301 -0
  28. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/spans.py +275 -0
  29. arize_phoenix-4.14.1/src/phoenix/server/api/routers/v1/traces.py +228 -0
  30. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Experiment.py +2 -2
  31. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Inferences.py +1 -2
  32. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Model.py +1 -2
  33. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/app.py +177 -152
  34. arize_phoenix-4.14.1/src/phoenix/server/openapi/docs.py +221 -0
  35. arize_phoenix-4.14.1/src/phoenix/server/static/.vite/manifest.json +78 -0
  36. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/components-C8sm_r1F.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/components-DeS0YEmv.js +2 -2
  37. arize_phoenix-4.14.1/src/phoenix/server/static/assets/index-CQgXRwU0.js +100 -0
  38. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/pages-bN7juCjh.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/pages-hdjlFZhO.js +275 -198
  39. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-CUDAPm8e.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-DPvSDRn3.js +1 -1
  40. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-arizeai-Do2HOmcL.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-arizeai-CkvPT67c.js +2 -2
  41. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-codemirror-CrdxOlMs.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-codemirror-Cqwpwlua.js +1 -1
  42. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/vendor-recharts-PKRvByVe.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-recharts-5jlNaZuF.js +1 -1
  43. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/thread_server.py +2 -2
  44. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/session/client.py +9 -8
  45. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/filter.py +40 -25
  46. arize_phoenix-4.14.1/src/phoenix/version.py +1 -0
  47. arize_phoenix-4.12.1rc1/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
  48. arize_phoenix-4.12.1rc1/src/phoenix/server/api/openapi/main.py +0 -22
  49. arize_phoenix-4.12.1rc1/src/phoenix/server/api/openapi/schema.py +0 -16
  50. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/__init__.py +0 -42
  51. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/dataset_examples.py +0 -157
  52. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/experiment_evaluations.py +0 -113
  53. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/experiment_runs.py +0 -160
  54. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/experiments.py +0 -252
  55. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/pydantic_compat.py +0 -78
  56. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/spans.py +0 -246
  57. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/traces.py +0 -215
  58. arize_phoenix-4.12.1rc1/src/phoenix/server/api/routers/v1/utils.py +0 -95
  59. arize_phoenix-4.12.1rc1/src/phoenix/server/static/.vite/manifest.json +0 -78
  60. arize_phoenix-4.12.1rc1/src/phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
  61. arize_phoenix-4.12.1rc1/src/phoenix/version.py +0 -1
  62. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/.gitignore +0 -0
  63. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/IP_NOTICE +0 -0
  64. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/LICENSE +0 -0
  65. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
  66. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
  67. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
  68. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
  69. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
  70. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
  71. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
  72. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/requirements.txt +0 -0
  73. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
  74. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/__init__.py +0 -0
  75. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/config.py +0 -0
  76. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/core/__init__.py +0 -0
  77. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/core/embedding_dimension.py +0 -0
  78. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/core/model.py +0 -0
  79. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/core/model_schema.py +0 -0
  80. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/core/model_schema_adapter.py +0 -0
  81. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/datetime_utils.py +0 -0
  82. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/README.md +0 -0
  83. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/__init__.py +0 -0
  84. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/alembic.ini +0 -0
  85. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/engines.py +0 -0
  86. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/helpers.py +0 -0
  87. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/__init__.py +0 -0
  88. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/dataset.py +0 -0
  89. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/evaluation.py +0 -0
  90. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/helpers.py +0 -0
  91. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/span.py +0 -0
  92. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrate.py +0 -0
  93. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/__init__.py +0 -0
  94. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/env.py +0 -0
  95. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/script.py.mako +0 -0
  96. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/types.py +0 -0
  97. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -0
  98. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
  99. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/db/models.py +0 -0
  100. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/exceptions.py +0 -0
  101. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/__init__.py +0 -0
  102. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/__init__.py +0 -0
  103. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/tracing.py +0 -0
  104. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/types.py +0 -0
  105. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/utils.py +0 -0
  106. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/__init__.py +0 -0
  107. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/errors.py +0 -0
  108. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/fixtures.py +0 -0
  109. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/inferences.py +0 -0
  110. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/schema.py +0 -0
  111. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/validation.py +0 -0
  112. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/README.md +0 -0
  113. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/__init__.py +0 -0
  114. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/binning.py +0 -0
  115. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/metrics.py +0 -0
  116. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/mixins.py +0 -0
  117. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  118. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/timeseries.py +0 -0
  119. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/wrappers.py +0 -0
  120. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/__init__.py +0 -0
  121. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/clustering.py +0 -0
  122. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/pointcloud.py +0 -0
  123. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/projectors.py +0 -0
  124. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  125. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/py.typed +0 -0
  126. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/__init__.py +0 -0
  127. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/__init__.py +0 -0
  128. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/__init__.py +0 -0
  129. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
  130. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
  131. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -0
  132. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +0 -0
  133. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +0 -0
  134. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/document_evaluations.py +0 -0
  135. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +0 -0
  136. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +0 -0
  137. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -0
  138. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -0
  139. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +0 -0
  140. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +0 -0
  141. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/project_by_name.py +0 -0
  142. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/record_counts.py +0 -0
  143. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_annotations.py +0 -0
  144. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_descendants.py +0 -0
  145. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_evaluations.py +0 -0
  146. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_projects.py +0 -0
  147. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/token_counts.py +0 -0
  148. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/trace_evaluations.py +0 -0
  149. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/trace_row_ids.py +0 -0
  150. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/helpers/__init__.py +0 -0
  151. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/helpers/dataset_helpers.py +0 -0
  152. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
  153. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
  154. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/ClearProjectInput.py +0 -0
  155. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  156. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  157. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
  158. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  159. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
  160. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
  161. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
  162. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteAnnotationsInput.py +0 -0
  163. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
  164. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
  165. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
  166. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  167. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  168. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  169. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
  170. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
  171. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  172. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  173. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  174. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/__init__.py +0 -0
  175. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/interceptor.py +0 -0
  176. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/__init__.py +0 -0
  177. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/auth.py +0 -0
  178. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
  179. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
  180. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
  181. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/project_mutations.py +0 -0
  182. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/openapi/__init__.py +0 -0
  183. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/queries.py +0 -0
  184. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/__init__.py +0 -0
  185. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/utils.py +0 -0
  186. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/schema.py +0 -0
  187. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Annotation.py +0 -0
  188. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
  189. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Cluster.py +0 -0
  190. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
  191. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  192. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Dataset.py +0 -0
  193. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetExample.py +0 -0
  194. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
  195. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  196. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
  197. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Dimension.py +0 -0
  198. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  199. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  200. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionType.py +0 -0
  201. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  202. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  203. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  204. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  205. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  206. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Evaluation.py +0 -0
  207. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  208. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Event.py +0 -0
  209. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  210. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
  211. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
  212. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
  213. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentRun.py +0 -0
  214. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +0 -0
  215. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  216. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Functionality.py +0 -0
  217. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/InferencesRole.py +0 -0
  218. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/MimeType.py +0 -0
  219. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/NumericRange.py +0 -0
  220. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  221. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Project.py +0 -0
  222. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  223. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Retrieval.py +0 -0
  224. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  225. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Segments.py +0 -0
  226. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/SortDir.py +0 -0
  227. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Span.py +0 -0
  228. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/SpanAnnotation.py +0 -0
  229. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  230. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Trace.py +0 -0
  231. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/TraceAnnotation.py +0 -0
  232. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  233. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  234. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  235. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/__init__.py +0 -0
  236. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/node.py +0 -0
  237. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/pagination.py +0 -0
  238. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/utils.py +0 -0
  239. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/grpc_server.py +0 -0
  240. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/main.py +0 -0
  241. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/openapi/__init__.py +0 -0
  242. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/prometheus.py +0 -0
  243. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  244. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  245. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  246. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  247. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  248. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  249. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  250. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  251. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/assets/vendor-DxkFTwjz.css +0 -0
  252. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/assets/vendor-three-DwGkEfCM.js +0 -0
  253. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/favicon.ico +0 -0
  254. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/modernizr.js +0 -0
  255. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/telemetry.py +0 -0
  256. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/templates/__init__.py +0 -0
  257. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/server/templates/index.html +0 -0
  258. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/services.py +0 -0
  259. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/session/__init__.py +0 -0
  260. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/session/data_extractor.py +0 -0
  261. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/session/evaluation.py +0 -0
  262. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/session/session.py +0 -0
  263. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/settings.py +0 -0
  264. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/__init__.py +0 -0
  265. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/attributes.py +0 -0
  266. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/README.md +0 -0
  267. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/__init__.py +0 -0
  268. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/helpers.py +0 -0
  269. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/query.py +0 -0
  270. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/errors.py +0 -0
  271. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/evaluation_conventions.py +0 -0
  272. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/exporter.py +0 -0
  273. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/fixtures.py +0 -0
  274. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/langchain/__init__.py +0 -0
  275. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  276. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/llama_index/__init__.py +0 -0
  277. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/llama_index/callback.py +0 -0
  278. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/openai/__init__.py +0 -0
  279. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/openai/instrumentor.py +0 -0
  280. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/otel.py +0 -0
  281. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/projects.py +0 -0
  282. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/schemas.py +0 -0
  283. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/span_evaluations.py +0 -0
  284. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/span_json_decoder.py +0 -0
  285. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/span_json_encoder.py +0 -0
  286. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/trace_dataset.py +0 -0
  287. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/utils.py +0 -0
  288. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/v1/__init__.py +0 -0
  289. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  290. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  291. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/__init__.py +0 -0
  292. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/deprecation.py +0 -0
  293. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/error_handling.py +0 -0
  294. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/json.py +0 -0
  295. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/logging.py +0 -0
  296. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/project.py +0 -0
  297. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/re.py +0 -0
  298. {arize_phoenix-4.12.1rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/span_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.12.1rc1
3
+ Version: 4.14.1
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -22,7 +22,6 @@ Requires-Dist: aiosqlite
22
22
  Requires-Dist: alembic<2,>=1.3.0
23
23
  Requires-Dist: arize-phoenix-evals>=0.13.1
24
24
  Requires-Dist: cachetools
25
- Requires-Dist: fastapi
26
25
  Requires-Dist: grpcio
27
26
  Requires-Dist: hdbscan>=0.8.33
28
27
  Requires-Dist: httpx
@@ -41,14 +40,14 @@ Requires-Dist: pandas>=1.0
41
40
  Requires-Dist: protobuf<6.0,>=3.20
42
41
  Requires-Dist: psutil
43
42
  Requires-Dist: pyarrow
44
- Requires-Dist: pydantic!=2.0.*,<3,>=1.0
45
43
  Requires-Dist: python-multipart
44
+ Requires-Dist: pyyaml
46
45
  Requires-Dist: scikit-learn
47
46
  Requires-Dist: scipy
48
47
  Requires-Dist: sqlalchemy[asyncio]<3,>=2.0.4
49
48
  Requires-Dist: sqlean-py>=3.45.1
50
49
  Requires-Dist: starlette
51
- Requires-Dist: strawberry-graphql==0.235.0
50
+ Requires-Dist: strawberry-graphql==0.236.0
52
51
  Requires-Dist: tqdm
53
52
  Requires-Dist: typing-extensions>=4.5; python_version < '3.12'
54
53
  Requires-Dist: typing-extensions>=4.6; python_version >= '3.12'
@@ -57,19 +56,20 @@ Requires-Dist: uvicorn
57
56
  Requires-Dist: wrapt
58
57
  Provides-Extra: container
59
58
  Requires-Dist: opentelemetry-exporter-otlp; extra == 'container'
60
- Requires-Dist: opentelemetry-instrumentation-fastapi; extra == 'container'
61
59
  Requires-Dist: opentelemetry-instrumentation-grpc; extra == 'container'
62
60
  Requires-Dist: opentelemetry-instrumentation-sqlalchemy; extra == 'container'
61
+ Requires-Dist: opentelemetry-instrumentation-starlette; extra == 'container'
63
62
  Requires-Dist: opentelemetry-proto>=1.12.0; extra == 'container'
64
63
  Requires-Dist: opentelemetry-sdk; extra == 'container'
65
64
  Requires-Dist: opentelemetry-semantic-conventions; extra == 'container'
66
65
  Requires-Dist: prometheus-client; extra == 'container'
67
66
  Requires-Dist: py-grpc-prometheus; extra == 'container'
68
- Requires-Dist: strawberry-graphql[opentelemetry]==0.235.0; extra == 'container'
67
+ Requires-Dist: strawberry-graphql[opentelemetry]==0.236.0; extra == 'container'
69
68
  Requires-Dist: uvloop; (platform_system != 'Windows') and extra == 'container'
70
69
  Provides-Extra: dev
71
70
  Requires-Dist: anthropic; extra == 'dev'
72
71
  Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
72
+ Requires-Dist: asgi-lifespan; extra == 'dev'
73
73
  Requires-Dist: asyncpg; extra == 'dev'
74
74
  Requires-Dist: gcsfs; extra == 'dev'
75
75
  Requires-Dist: google-cloud-aiplatform>=1.3; extra == 'dev'
@@ -78,6 +78,7 @@ Requires-Dist: jupyter; extra == 'dev'
78
78
  Requires-Dist: langchain>=0.0.334; extra == 'dev'
79
79
  Requires-Dist: litellm>=1.0.3; extra == 'dev'
80
80
  Requires-Dist: llama-index>=0.10.3; extra == 'dev'
81
+ Requires-Dist: mypy==1.11.0; extra == 'dev'
81
82
  Requires-Dist: nbqa; extra == 'dev'
82
83
  Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
83
84
  Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
@@ -88,9 +89,9 @@ Requires-Dist: psycopg[binary]; extra == 'dev'
88
89
  Requires-Dist: pytest-asyncio; extra == 'dev'
89
90
  Requires-Dist: pytest-cov; extra == 'dev'
90
91
  Requires-Dist: pytest-postgresql; extra == 'dev'
91
- Requires-Dist: pytest==8.2.2; extra == 'dev'
92
- Requires-Dist: ruff==0.4.9; extra == 'dev'
93
- Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.235.0; extra == 'dev'
92
+ Requires-Dist: pytest==8.3.1; extra == 'dev'
93
+ Requires-Dist: ruff==0.5.4; extra == 'dev'
94
+ Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.236.0; extra == 'dev'
94
95
  Requires-Dist: tabulate; extra == 'dev'
95
96
  Requires-Dist: types-tabulate; extra == 'dev'
96
97
  Provides-Extra: evals
@@ -138,6 +139,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
138
139
 
139
140
  - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
140
141
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
142
+ - **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
143
+ - **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
141
144
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
142
145
 
143
146
  Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
@@ -31,6 +31,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
31
31
 
32
32
  - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
33
33
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
34
+ - **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
35
+ - **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
34
36
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
35
37
 
36
38
  Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
@@ -31,7 +31,7 @@ dependencies = [
31
31
  "starlette",
32
32
  "uvicorn",
33
33
  "psutil",
34
- "strawberry-graphql==0.235.0", # need to pin version because we're monkey-patching
34
+ "strawberry-graphql==0.236.0", # need to pin version because we're monkey-patching
35
35
  "pyarrow",
36
36
  "typing-extensions>=4.5; python_version<'3.12'",
37
37
  # A minimum version of typing-extensions==4.6.0 is needed to avoid this issue on Python 3.12: https://github.com/Azure/azure-sdk-for-python/issues/33442#issuecomment-1847886784
@@ -59,8 +59,7 @@ dependencies = [
59
59
  "cachetools",
60
60
  "python-multipart", # see https://www.starlette.io/#dependencies
61
61
  "arize-phoenix-evals>=0.13.1",
62
- "fastapi",
63
- "pydantic>=1.0,!=2.0.*,<3,", # exclude 2.0.* since it does not support the `json_encoders` configuration setting
62
+ "pyyaml", # for OpenAPI
64
63
  ]
65
64
  dynamic = ["version"]
66
65
 
@@ -70,19 +69,20 @@ dev = [
70
69
  "hatch",
71
70
  "jupyter",
72
71
  "nbqa",
73
- "ruff==0.4.9",
72
+ "ruff==0.5.4",
73
+ "mypy==1.11.0",
74
74
  "pandas>=1.0",
75
75
  "tabulate", # used by DataFrame.to_markdown()
76
76
  "types-tabulate",
77
77
  "pandas-stubs==2.2.2.240603; python_version>='3.9'",
78
78
  "pandas-stubs==2.0.3.230814; python_version<'3.9'",
79
- "pytest==8.2.2",
79
+ "pytest==8.3.1",
80
80
  "pytest-asyncio",
81
81
  "pytest-cov",
82
82
  "pytest-postgresql",
83
83
  "asyncpg",
84
84
  "psycopg[binary]",
85
- "strawberry-graphql[debug-server,opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
85
+ "strawberry-graphql[debug-server,opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
86
86
  "pre-commit",
87
87
  "arize[AutoEmbeddings, LLM_Evaluation]",
88
88
  "llama-index>=0.10.3",
@@ -91,6 +91,7 @@ dev = [
91
91
  "google-cloud-aiplatform>=1.3",
92
92
  "anthropic",
93
93
  "prometheus_client",
94
+ "asgi-lifespan",
94
95
  ]
95
96
  evals = []
96
97
  experimental = []
@@ -110,11 +111,11 @@ container = [
110
111
  "opentelemetry-proto>=1.12.0",
111
112
  "opentelemetry-exporter-otlp",
112
113
  "opentelemetry-semantic-conventions",
113
- "opentelemetry-instrumentation-fastapi",
114
+ "opentelemetry-instrumentation-starlette",
114
115
  "opentelemetry-instrumentation-sqlalchemy",
115
116
  "opentelemetry-instrumentation-grpc",
116
117
  "py-grpc-prometheus",
117
- "strawberry-graphql[opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
118
+ "strawberry-graphql[opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
118
119
  "uvloop; platform_system != 'Windows'",
119
120
  ]
120
121
 
@@ -147,7 +148,7 @@ dependencies = [
147
148
  "numpy",
148
149
  "pandas==2.2.2; python_version>='3.9'",
149
150
  "pandas==1.4.0; python_version<'3.9'",
150
- "pytest==8.2.2",
151
+ "pytest==8.3.1",
151
152
  "pytest-asyncio",
152
153
  "pytest-cov",
153
154
  "pytest-postgresql",
@@ -160,19 +161,18 @@ dependencies = [
160
161
  "protobuf==3.20", # version minimum (for tests)
161
162
  "responses",
162
163
  "tiktoken",
163
- "typing-extensions==4.5.0; python_version=='3.8'",
164
- "typing-extensions==4.6.0; python_version=='3.9'",
165
- "pydantic==1.9.0; python_version<='3.9'", # minimum version of pydantic compatible with openai
166
- "pydantic==2.8.2; python_version=='3.12'",
164
+ "typing-extensions==4.5.0; python_version<'3.12'",
165
+ "typing-extensions==4.6.0; python_version>='3.12'",
167
166
  "httpx", # For OpenAI testing
168
167
  "respx", # For OpenAI testing
169
168
  "nest-asyncio", # for executor testing
170
169
  "astunparse; python_version<'3.9'", # `ast.unparse(...)` is only available starting with Python 3.9
170
+ "asgi-lifespan",
171
171
  ]
172
172
 
173
173
  [tool.hatch.envs.type]
174
174
  dependencies = [
175
- "mypy==1.10.0",
175
+ "mypy==1.11.0",
176
176
  "tenacity",
177
177
  "pandas>=1.0",
178
178
  "pandas-stubs==2.0.3.230814",
@@ -190,15 +190,12 @@ dependencies = [
190
190
  "opentelemetry-proto>=1.12.0",
191
191
  "opentelemetry-exporter-otlp",
192
192
  "opentelemetry-semantic-conventions",
193
- "opentelemetry-instrumentation-fastapi",
193
+ "opentelemetry-instrumentation-starlette",
194
194
  "opentelemetry-instrumentation-sqlalchemy",
195
195
  "opentelemetry-instrumentation-grpc",
196
196
  "py-grpc-prometheus",
197
- "strawberry-graphql[opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
197
+ "strawberry-graphql[opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
198
198
  "requests", # this is needed to type-check third-party packages
199
- "pydantic==1.10.17; python_version=='3.8'", # lower minor versions of pydantic break strawberry mypy plugin
200
- "pydantic==1.10.17; python_version=='3.9'", # lower minor versions of pydantic break strawberry mypy plugin
201
- "pydantic==2.8.2; python_version=='3.12'",
202
199
  ]
203
200
 
204
201
  [[tool.hatch.envs.type.matrix]]
@@ -207,7 +204,7 @@ python = ["3.8", "3.9", "3.12"]
207
204
  [tool.hatch.envs.style]
208
205
  detached = true
209
206
  dependencies = [
210
- "ruff==0.4.9",
207
+ "ruff==0.5.4",
211
208
  ]
212
209
 
213
210
  [[tool.hatch.envs.style.matrix]]
@@ -289,11 +286,11 @@ dependencies = [
289
286
 
290
287
  [tool.hatch.envs.publish.scripts]
291
288
  testpypi = [
292
- #"check-wheel-contents dist/",
289
+ "check-wheel-contents dist/",
293
290
  "twine upload --verbose --repository testpypi dist/*",
294
291
  ]
295
292
  pypi = [
296
- #"check-wheel-contents dist/",
293
+ "check-wheel-contents dist/",
297
294
  "twine upload --verbose dist/*",
298
295
  ]
299
296
 
@@ -304,7 +301,7 @@ check = [
304
301
 
305
302
  [tool.hatch.envs.gql]
306
303
  dependencies = [
307
- "strawberry-graphql[cli]==0.235.0", # need to pin version because we're monkey-patching
304
+ "strawberry-graphql[cli]==0.236.0", # need to pin version because we're monkey-patching
308
305
  "requests",
309
306
  ]
310
307
 
@@ -313,12 +310,11 @@ build = 'strawberry export-schema phoenix.server.api.schema:schema > app/schema.
313
310
 
314
311
  [tool.hatch.envs.openapi]
315
312
  dependencies = [
316
- "pydantic==2.8.2",
317
- "fastapi==0.111.0",
313
+ "pyyaml",
318
314
  ]
319
315
 
320
316
  [tool.hatch.envs.openapi.scripts]
321
- build = "python -m phoenix.server.api.openapi.main > schemas/openapi.json"
317
+ build = 'python -m phoenix.server.api.openapi.main > schemas/openapi.yaml'
322
318
 
323
319
  [tool.hatch.envs.proto]
324
320
  detached = true
@@ -383,7 +379,6 @@ module = [
383
379
  "sqlean",
384
380
  "grpc.*",
385
381
  "py_grpc_prometheus.*",
386
- "orjson", # suppress fastapi internal type errors
387
382
  ]
388
383
  ignore_missing_imports = true
389
384
 
@@ -105,8 +105,10 @@ class BulkInserter:
105
105
  )
106
106
 
107
107
  async def __aexit__(self, *args: Any) -> None:
108
- self._operations = None
109
108
  self._running = False
109
+ if self._task:
110
+ self._task.cancel()
111
+ self._task = None
110
112
 
111
113
  def _enqueue_operation(self, operation: DataManipulation) -> None:
112
114
  cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
@@ -90,11 +90,15 @@ class Evaluator(ABC):
90
90
  if super_cls in (LLMEvaluator, Evaluator):
91
91
  break
92
92
  if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
93
+ if isinstance(evaluate, classmethod):
94
+ evaluate = evaluate.__func__
93
95
  assert callable(evaluate), "`evaluate()` method should be callable"
94
96
  # need to remove the first param, i.e. `self`
95
97
  _validate_sig(functools.partial(evaluate, None), "evaluate")
96
98
  return
97
99
  if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
100
+ if isinstance(async_evaluate, classmethod):
101
+ async_evaluate = async_evaluate.__func__
98
102
  assert callable(async_evaluate), "`async_evaluate()` method should be callable"
99
103
  # need to remove the first param, i.e. `self`
100
104
  _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
@@ -9,6 +9,19 @@ from phoenix.experiments.types import EvaluationResult, TaskOutput
9
9
 
10
10
 
11
11
  class JSONParsable(CodeEvaluator):
12
+ """
13
+ An evaluator that checks if the output of an experiment run is a JSON-parsable string.
14
+
15
+ Example:
16
+
17
+ .. code-block:: python
18
+ from phoenix.experiments import run_experiment
19
+ from phoenix.experiments.evaluators import JSONParsable
20
+
21
+ run_experiment(dataset, task, evaluators=[JSONParsable])
22
+ """
23
+
24
+ @classmethod
12
25
  def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
13
26
  assert isinstance(output, str), "Experiment run output must be a string"
14
27
  try:
@@ -22,6 +35,22 @@ class JSONParsable(CodeEvaluator):
22
35
 
23
36
 
24
37
  class ContainsKeyword(CodeEvaluator):
38
+ """
39
+ An evaluator that checks if a keyword is present in the output of an experiment run.
40
+
41
+ Args:
42
+ keyword (str): The keyword to search for in the output.
43
+ name (str, optional): An optional name for the evaluator. Defaults to "Contains(<keyword>)".
44
+
45
+ Example:
46
+
47
+ .. code-block:: python
48
+ from phoenix.experiments import run_experiment
49
+ from phoenix.experiments.evaluators import ContainsKeyword
50
+
51
+ run_experiment(dataset, task, evaluators=[ContainsKeyword("foo")])
52
+ """
53
+
25
54
  def __init__(self, keyword: str, name: Optional[str] = None) -> None:
26
55
  self.keyword = keyword
27
56
  self._name = name or f"Contains({repr(keyword)})"
@@ -39,6 +68,23 @@ class ContainsKeyword(CodeEvaluator):
39
68
 
40
69
 
41
70
  class ContainsAnyKeyword(CodeEvaluator):
71
+ """
72
+ An evaluator that checks if any of the keywords are present in the output of an experiment run.
73
+
74
+ Args:
75
+ keywords (List[str]): The keywords to search for in the output.
76
+ name (str, optional): An optional name for the evaluator. Defaults to
77
+ "ContainsAny(<keywords>)".
78
+
79
+ Example:
80
+
81
+ .. code-block:: python
82
+ from phoenix.experiments import run_experiment
83
+ from phoenix.experiments.evaluators import ContainsAnyKeyword
84
+
85
+ run_experiment(dataset, task, evaluators=[ContainsAnyKeyword(["foo", "bar"])])
86
+ """
87
+
42
88
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
43
89
  self.keywords = keywords
44
90
  self._name = name or f"ContainsAny({keywords})"
@@ -57,6 +103,23 @@ class ContainsAnyKeyword(CodeEvaluator):
57
103
 
58
104
 
59
105
  class ContainsAllKeywords(CodeEvaluator):
106
+ """
107
+ An evaluator that checks if all of the keywords are present in the output of an experiment run.
108
+
109
+ Args:
110
+ keywords (List[str]): The keywords to search for in the output.
111
+ name (str, optional): An optional name for the evaluator. Defaults to
112
+ "ContainsAll(<keywords>)".
113
+
114
+ Example:
115
+ .. code-block:: python
116
+
117
+ from phoenix.experiments import run_experiment
118
+ from phoenix.experiments.evaluators import ContainsAllKeywords
119
+
120
+ run_experiment(dataset, task, evaluators=[ContainsAllKeywords(["foo", "bar"])])
121
+ """
122
+
60
123
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
61
124
  self.keywords = keywords
62
125
  self._name = name or f"ContainsAll({keywords})"
@@ -77,6 +140,23 @@ class ContainsAllKeywords(CodeEvaluator):
77
140
 
78
141
 
79
142
  class MatchesRegex(CodeEvaluator):
143
+ r"""
144
+ An experiment evaluator that checks if the output of an experiment run matches a regex pattern.
145
+
146
+ Args:
147
+ pattern (Union[str, re.Pattern[str]]): The regex pattern to match the output against.
148
+ name (str, optional): An optional name for the evaluator. Defaults to "matches_({pattern})".
149
+
150
+ Example:
151
+ .. code-block:: python
152
+
153
+ from phoenix.experiments import run_experiment
154
+ from phoenix.experiments.evaluators import MatchesRegex
155
+
156
+ phone_number_evaluator = MatchesRegex(r"\d{3}-\d{3}-\d{4}", name="valid-phone-number")
157
+ run_experiment(dataset, task, evaluators=[phone_number_evaluator])
158
+ """
159
+
80
160
  def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
81
161
  if isinstance(pattern, str):
82
162
  pattern = re.compile(pattern)
@@ -18,6 +18,31 @@ from phoenix.experiments.types import (
18
18
 
19
19
 
20
20
  class LLMCriteriaEvaluator(LLMEvaluator):
21
+ """
22
+ An experiment evaluator that uses an LLM to evaluate whether the text meets a custom criteria.
23
+
24
+ This evaluator uses the chain-of-thought technique to perform a binary evaluation of text based
25
+ on a custom criteria and description. When used as an experiment evaluator,
26
+ `LLMCriteriaEvaluator` will return a score of 1.0 if the text meets the criteria and a score of
27
+ 0.0 if not. The explanation produced by the chain-of-thought technique will be included in the
28
+ experiment evaluation as well.
29
+
30
+ Example criteria and descriptions:
31
+ - "thoughtfulness" - "shows careful consideration and fair judgement"
32
+ - "clarity" - "is easy to understand and follow"
33
+ - "professionalism" - "is respectful and appropriate for a formal setting"
34
+
35
+ Args:
36
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
37
+ the `phoenix.evals` module.
38
+ criteria: The criteria to evaluate the text against, the criteria should be able to be used
39
+ as a noun in a sentence.
40
+ description (str): A description of the criteria, used to clarify instructions to the LLM.
41
+ The description should complete this sentence: "{criteria} means the text
42
+ {description}".
43
+ name (str): The name of the evaluator
44
+ """
45
+
21
46
  _base_template = (
22
47
  "Determine if the following text is {criteria}. {description}"
23
48
  "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -117,6 +142,14 @@ ConcisenessEvaluator = criteria_evaluator_factory(
117
142
  description="is just a few sentences and easy to follow",
118
143
  default_name="Conciseness",
119
144
  )
145
+ """
146
+ An experiment evaluator that uses an LLM to evaluate whether the text is concise.
147
+
148
+ Args:
149
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
150
+ the `phoenix.evals` module.
151
+ name (str, optional): The name of the evaluator, defaults to "Conciseness".
152
+ """
120
153
 
121
154
 
122
155
  HelpfulnessEvaluator = criteria_evaluator_factory(
@@ -125,6 +158,14 @@ HelpfulnessEvaluator = criteria_evaluator_factory(
125
158
  description="provides useful information",
126
159
  default_name="Helpfulness",
127
160
  )
161
+ """
162
+ An experiment evaluator that uses an LLM to evaluate whether the text is helpful.
163
+
164
+ Args:
165
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
166
+ the `phoenix.evals` module.
167
+ name (str, optional): The name of the evaluator, defaults to "Helpfulness".
168
+ """
128
169
 
129
170
 
130
171
  CoherenceEvaluator = criteria_evaluator_factory(
@@ -133,6 +174,14 @@ CoherenceEvaluator = criteria_evaluator_factory(
133
174
  description="is coherent, well-structured, and logically sound",
134
175
  default_name="Coherence",
135
176
  )
177
+ """
178
+ An experiment evaluator that uses an LLM to evaluate whether the text is coherent.
179
+
180
+ Args:
181
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
182
+ the `phoenix.evals` module.
183
+ name (str, optional): The name of the evaluator, defaults to "Coherence".
184
+ """
136
185
 
137
186
 
138
187
  def _parse_label_from_explanation(raw_string: str) -> str:
@@ -149,6 +198,33 @@ def _parse_label_from_explanation(raw_string: str) -> str:
149
198
 
150
199
 
151
200
  class RelevanceEvaluator(LLMEvaluator):
201
+ """
202
+ An experiment evaluator that uses an LLM to evaluate whether a response is relevant to a query.
203
+
204
+ This evaluator uses the chain-of-thought technique to perform a binary evaluation of whether
205
+ the output "response" of an experiment is relevant to its input "query". When used as an
206
+ experiment evaluator, `RelevanceEvaluator` will return a score of 1.0 if the response is
207
+ relevant to the query and a score of 0.0 if not. The explanation produced by the
208
+ chain-of-thought technique will be included in the experiment evaluation as well.
209
+
210
+ Optionally, you can provide custom functions to extract the query and response from the input
211
+ and output of the experiment task. By default, the evaluator will use the dataset example as
212
+ the input and the output of the experiment task as the response.
213
+
214
+ Args:
215
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
216
+ the `phoenix.evals` module.
217
+ get_query (callable, optional): A function that extracts the query from the input of the
218
+ experiment task. The function should take the input and metadata of the dataset example
219
+ and return a string. By default, the function will return the string representation of
220
+ the input.
221
+ get_response (callable, optional): A function that extracts the response from the output of
222
+ the experiment task. The function should take the output and metadata of the experiment
223
+ task and return a string. By default, the function will return the string representation
224
+ of the output.
225
+ name (str, optional): The name of the evaluator. Defaults to "Relevance".
226
+ """
227
+
152
228
  template = (
153
229
  "Determine if the following response is relevant to the query. In this context, "
154
230
  "'relevance' means that the response directly addresses the core question or topic of the "
@@ -174,7 +250,7 @@ class RelevanceEvaluator(LLMEvaluator):
174
250
  model: LLMBaseModel,
175
251
  get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
176
252
  get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
177
- name: str = "RelevanceEvaluator",
253
+ name: str = "Relevance",
178
254
  ):
179
255
  self.model = model
180
256
  self._name = name
@@ -1,6 +1,5 @@
1
1
  import functools
2
2
  import inspect
3
- from itertools import chain, islice, repeat
4
3
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
5
4
 
6
5
  from phoenix.experiments.types import (
@@ -75,6 +74,72 @@ def create_evaluator(
75
74
  name: Optional[str] = None,
76
75
  scorer: Optional[Callable[[Any], EvaluationResult]] = None,
77
76
  ) -> Callable[[Callable[..., Any]], "Evaluator"]:
77
+ """
78
+ A decorator that configures a sync or async function to be used as an experiment evaluator.
79
+
80
+ If the `evaluator` is a function of one argument then that argument will be
81
+ bound to the `output` of an experiment task. Alternatively, the `evaluator` can be a function
82
+ of any combination of specific argument names that will be bound to special values:
83
+ `input`: The input field of the dataset example
84
+ `output`: The output of an experiment task
85
+ `expected`: The expected or reference output of the dataset example
86
+ `reference`: An alias for `expected`
87
+ `metadata`: Metadata associated with the dataset example
88
+
89
+ Args:
90
+ kind (str | AnnotatorKind): Broadly indicates how the evaluator scores an experiment run.
91
+ Valid kinds are: "CODE", "LLM". Defaults to "CODE".
92
+ name (str, optional): The name of the evaluator. If not provided, the name of the function
93
+ will be used.
94
+ scorer (callable, optional): An optional function that converts the output of the wrapped
95
+ function into an `EvaluationResult`. This allows configuring the evaluation
96
+ payload by setting a label, score and explanation. By default, numeric outputs will
97
+ be recorded as scores, boolean outputs will be recorded as scores and labels, and
98
+ string outputs will be recorded as labels. If the output is a 2-tuple, the first item
99
+ will be recorded as the score and the second item will recorded as the explanation.
100
+
101
+ Examples:
102
+ Configuring an evaluator that returns a boolean
103
+
104
+ .. code-block:: python
105
+ @create_evaluator(kind="CODE", name="exact-match)
106
+ def match(output: str, expected: str) -> bool:
107
+ return output == expected
108
+
109
+ Configuring an evaluator that returns a label
110
+
111
+ .. code-block:: python
112
+ client = openai.Client()
113
+
114
+ @create_evaluator(kind="LLM")
115
+ def label(output: str) -> str:
116
+ res = client.chat.completions.create(
117
+ model = "gpt-4",
118
+ messages = [
119
+ {
120
+ "role": "user",
121
+ "content": (
122
+ "in one word, characterize the sentiment of the following customer "
123
+ f"request: {output}"
124
+ )
125
+ },
126
+ ],
127
+ )
128
+ label = res.choices[0].message.content
129
+ return label
130
+
131
+ Configuring an evaluator that returns a score and explanation
132
+
133
+ .. code-block:: python
134
+ from textdistance import levenshtein
135
+
136
+ @create_evaluator(kind="CODE", name="levenshtein-distance")
137
+ def ld(output: str, expected: str) -> Tuple[float, str]:
138
+ return (
139
+ levenshtein(output, expected),
140
+ f"Levenshtein distance between {output} and {expected}"
141
+ )
142
+ """
78
143
  if scorer is None:
79
144
  scorer = _default_eval_scorer
80
145
 
@@ -163,24 +228,8 @@ def _default_eval_scorer(result: Any) -> EvaluationResult:
163
228
  return EvaluationResult(score=float(result))
164
229
  if isinstance(result, str):
165
230
  return EvaluationResult(label=result)
166
- if isinstance(result, (tuple, list)) and 0 < len(result) <= 3:
167
- # Possible interpretations are:
168
- # - 3-tuple: (Score, Label, Explanation)
169
- # - 2-tuple: (Score, Explanation) or (Label, Explanation)
170
- # - 1-tuple: (Score, ) or (Label, )
171
- # Note that (Score, Label) conflicts with (Score, Explanation) and we
172
- # pick the latter because it's probably more prevalent. To get
173
- # (Score, Label), use a 3-tuple instead, i.e. (Score, Label, None).
174
- a, b, c = islice(chain(result, repeat(None)), 3)
175
- score, label, explanation = None, a, b
176
- if hasattr(a, "__float__"):
177
- try:
178
- score = float(a)
179
- except ValueError:
180
- pass
181
- else:
182
- label, explanation = (None, b) if len(result) < 3 else (b, c)
183
- return EvaluationResult(score=score, label=label, explanation=explanation)
184
- if result is None:
185
- return EvaluationResult(score=0)
231
+ if isinstance(result, (tuple, list)) and len(result) == 2:
232
+ # If the result is a 2-tuple, the first item will be recorded as the score
233
+ # and the second item will recorded as the explanation.
234
+ return EvaluationResult(score=float(result[0]), explanation=str(result[1]))
186
235
  raise ValueError(f"Unsupported evaluation result type: {type(result)}")