arize-phoenix 4.12.0rc1__tar.gz → 4.14.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (287) hide show
  1. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/PKG-INFO +10 -6
  2. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/README.md +2 -0
  3. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/pyproject.toml +13 -10
  4. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/bulk_inserter.py +3 -1
  5. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/base.py +4 -0
  6. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/code_evaluators.py +80 -0
  7. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/llm_evaluators.py +77 -1
  8. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/utils.py +70 -21
  9. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/functions.py +14 -14
  10. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/average_experiment_run_latency.py +23 -23
  11. arize_phoenix-4.14.1/src/phoenix/server/api/dataloaders/experiment_error_rates.py +63 -0
  12. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/experiment_run_counts.py +18 -5
  13. arize_phoenix-4.12.0rc1/src/phoenix/server/api/input_types/CreateSpanAnnotationsInput.py → arize_phoenix-4.14.1/src/phoenix/server/api/input_types/CreateSpanAnnotationInput.py +4 -2
  14. arize_phoenix-4.12.0rc1/src/phoenix/server/api/input_types/CreateTraceAnnotationsInput.py → arize_phoenix-4.14.1/src/phoenix/server/api/input_types/CreateTraceAnnotationInput.py +4 -2
  15. arize_phoenix-4.12.0rc1/src/phoenix/server/api/input_types/PatchAnnotationsInput.py → arize_phoenix-4.14.1/src/phoenix/server/api/input_types/PatchAnnotationInput.py +4 -2
  16. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/span_annotations_mutations.py +12 -6
  17. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/trace_annotations_mutations.py +12 -6
  18. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Experiment.py +2 -2
  19. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Inferences.py +1 -2
  20. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Model.py +1 -2
  21. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/app.py +3 -7
  22. arize_phoenix-4.14.1/src/phoenix/server/static/.vite/manifest.json +78 -0
  23. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/components-C8sm_r1F.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/components-DeS0YEmv.js +2 -2
  24. arize_phoenix-4.14.1/src/phoenix/server/static/assets/index-CQgXRwU0.js +100 -0
  25. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/pages-bN7juCjh.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/pages-hdjlFZhO.js +275 -198
  26. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/vendor-CUDAPm8e.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-DPvSDRn3.js +1 -1
  27. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/vendor-arizeai-Do2HOmcL.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-arizeai-CkvPT67c.js +2 -2
  28. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/vendor-codemirror-CrdxOlMs.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-codemirror-Cqwpwlua.js +1 -1
  29. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/vendor-recharts-PKRvByVe.js → arize_phoenix-4.14.1/src/phoenix/server/static/assets/vendor-recharts-5jlNaZuF.js +1 -1
  30. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/templates/index.html +51 -43
  31. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/session/client.py +7 -5
  32. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/filter.py +40 -25
  33. arize_phoenix-4.14.1/src/phoenix/version.py +1 -0
  34. arize_phoenix-4.12.0rc1/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
  35. arize_phoenix-4.12.0rc1/src/phoenix/server/static/.vite/manifest.json +0 -78
  36. arize_phoenix-4.12.0rc1/src/phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
  37. arize_phoenix-4.12.0rc1/src/phoenix/version.py +0 -1
  38. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/.gitignore +0 -0
  39. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/IP_NOTICE +0 -0
  40. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/LICENSE +0 -0
  41. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
  42. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
  43. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
  44. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
  45. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
  46. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
  47. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
  48. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/requirements.txt +0 -0
  49. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
  50. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/__init__.py +0 -0
  51. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/config.py +0 -0
  52. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/core/__init__.py +0 -0
  53. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/core/embedding_dimension.py +0 -0
  54. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/core/model.py +0 -0
  55. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/core/model_schema.py +0 -0
  56. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/core/model_schema_adapter.py +0 -0
  57. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/datetime_utils.py +0 -0
  58. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/README.md +0 -0
  59. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/__init__.py +0 -0
  60. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/alembic.ini +0 -0
  61. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/engines.py +0 -0
  62. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/helpers.py +0 -0
  63. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/__init__.py +0 -0
  64. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/dataset.py +0 -0
  65. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/evaluation.py +0 -0
  66. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/helpers.py +0 -0
  67. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/insertion/span.py +0 -0
  68. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrate.py +0 -0
  69. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/__init__.py +0 -0
  70. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/env.py +0 -0
  71. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/script.py.mako +0 -0
  72. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/types.py +0 -0
  73. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -0
  74. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
  75. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/db/models.py +0 -0
  76. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/exceptions.py +0 -0
  77. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/__init__.py +0 -0
  78. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/evaluators/__init__.py +0 -0
  79. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/tracing.py +0 -0
  80. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/types.py +0 -0
  81. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/experiments/utils.py +0 -0
  82. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/__init__.py +0 -0
  83. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/errors.py +0 -0
  84. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/fixtures.py +0 -0
  85. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/inferences.py +0 -0
  86. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/schema.py +0 -0
  87. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/inferences/validation.py +0 -0
  88. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/README.md +0 -0
  89. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/__init__.py +0 -0
  90. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/binning.py +0 -0
  91. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/metrics.py +0 -0
  92. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/mixins.py +0 -0
  93. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  94. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/timeseries.py +0 -0
  95. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/metrics/wrappers.py +0 -0
  96. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/__init__.py +0 -0
  97. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/clustering.py +0 -0
  98. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/pointcloud.py +0 -0
  99. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/projectors.py +0 -0
  100. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  101. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/py.typed +0 -0
  102. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/__init__.py +0 -0
  103. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/__init__.py +0 -0
  104. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/context.py +0 -0
  105. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/__init__.py +0 -0
  106. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
  107. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
  108. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -0
  109. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +0 -0
  110. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +0 -0
  111. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/document_evaluations.py +0 -0
  112. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +0 -0
  113. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +0 -0
  114. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -0
  115. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -0
  116. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +0 -0
  117. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +0 -0
  118. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/project_by_name.py +0 -0
  119. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/record_counts.py +0 -0
  120. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_annotations.py +0 -0
  121. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_descendants.py +0 -0
  122. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_evaluations.py +0 -0
  123. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/span_projects.py +0 -0
  124. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/token_counts.py +0 -0
  125. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/trace_evaluations.py +0 -0
  126. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/dataloaders/trace_row_ids.py +0 -0
  127. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/helpers/__init__.py +0 -0
  128. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/helpers/dataset_helpers.py +0 -0
  129. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
  130. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
  131. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/ClearProjectInput.py +0 -0
  132. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  133. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  134. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
  135. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  136. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
  137. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
  138. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
  139. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteAnnotationsInput.py +0 -0
  140. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
  141. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
  142. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
  143. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  144. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  145. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  146. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
  147. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
  148. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  149. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  150. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  151. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/input_types/__init__.py +0 -0
  152. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/interceptor.py +0 -0
  153. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/__init__.py +0 -0
  154. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/auth.py +0 -0
  155. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
  156. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
  157. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
  158. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/mutations/project_mutations.py +0 -0
  159. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/openapi/__init__.py +0 -0
  160. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/openapi/main.py +0 -0
  161. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/openapi/schema.py +0 -0
  162. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/queries.py +0 -0
  163. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/__init__.py +0 -0
  164. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/utils.py +0 -0
  165. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/__init__.py +0 -0
  166. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/dataset_examples.py +0 -0
  167. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/datasets.py +0 -0
  168. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/evaluations.py +0 -0
  169. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/experiment_evaluations.py +0 -0
  170. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/experiment_runs.py +0 -0
  171. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/experiments.py +0 -0
  172. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/spans.py +0 -0
  173. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/routers/v1/traces.py +0 -0
  174. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/schema.py +0 -0
  175. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Annotation.py +0 -0
  176. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
  177. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Cluster.py +0 -0
  178. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
  179. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  180. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Dataset.py +0 -0
  181. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetExample.py +0 -0
  182. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
  183. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  184. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
  185. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Dimension.py +0 -0
  186. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  187. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  188. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionType.py +0 -0
  189. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  190. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  191. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  192. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  193. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  194. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Evaluation.py +0 -0
  195. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  196. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Event.py +0 -0
  197. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  198. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
  199. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
  200. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
  201. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentRun.py +0 -0
  202. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +0 -0
  203. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  204. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Functionality.py +0 -0
  205. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/InferencesRole.py +0 -0
  206. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/MimeType.py +0 -0
  207. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/NumericRange.py +0 -0
  208. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  209. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Project.py +0 -0
  210. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  211. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Retrieval.py +0 -0
  212. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  213. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Segments.py +0 -0
  214. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/SortDir.py +0 -0
  215. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Span.py +0 -0
  216. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/SpanAnnotation.py +0 -0
  217. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  218. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/Trace.py +0 -0
  219. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/TraceAnnotation.py +0 -0
  220. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  221. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  222. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  223. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/__init__.py +0 -0
  224. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/node.py +0 -0
  225. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/types/pagination.py +0 -0
  226. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/api/utils.py +0 -0
  227. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/grpc_server.py +0 -0
  228. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/main.py +0 -0
  229. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/openapi/__init__.py +0 -0
  230. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/openapi/docs.py +0 -0
  231. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/prometheus.py +0 -0
  232. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  233. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  234. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  235. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  236. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  237. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  238. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  239. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  240. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/assets/vendor-DxkFTwjz.css +0 -0
  241. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/assets/vendor-three-DwGkEfCM.js +0 -0
  242. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/favicon.ico +0 -0
  243. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/static/modernizr.js +0 -0
  244. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/telemetry.py +0 -0
  245. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/templates/__init__.py +0 -0
  246. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/server/thread_server.py +0 -0
  247. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/services.py +0 -0
  248. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/session/__init__.py +0 -0
  249. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/session/data_extractor.py +0 -0
  250. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/session/evaluation.py +0 -0
  251. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/session/session.py +0 -0
  252. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/settings.py +0 -0
  253. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/__init__.py +0 -0
  254. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/attributes.py +0 -0
  255. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/README.md +0 -0
  256. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/__init__.py +0 -0
  257. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/helpers.py +0 -0
  258. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/dsl/query.py +0 -0
  259. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/errors.py +0 -0
  260. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/evaluation_conventions.py +0 -0
  261. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/exporter.py +0 -0
  262. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/fixtures.py +0 -0
  263. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/langchain/__init__.py +0 -0
  264. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  265. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/llama_index/__init__.py +0 -0
  266. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/llama_index/callback.py +0 -0
  267. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/openai/__init__.py +0 -0
  268. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/openai/instrumentor.py +0 -0
  269. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/otel.py +0 -0
  270. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/projects.py +0 -0
  271. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/schemas.py +0 -0
  272. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/span_evaluations.py +0 -0
  273. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/span_json_decoder.py +0 -0
  274. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/span_json_encoder.py +0 -0
  275. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/trace_dataset.py +0 -0
  276. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/utils.py +0 -0
  277. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/v1/__init__.py +0 -0
  278. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  279. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  280. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/__init__.py +0 -0
  281. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/deprecation.py +0 -0
  282. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/error_handling.py +0 -0
  283. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/json.py +0 -0
  284. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/logging.py +0 -0
  285. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/project.py +0 -0
  286. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/re.py +0 -0
  287. {arize_phoenix-4.12.0rc1 → arize_phoenix-4.14.1}/src/phoenix/utilities/span_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.12.0rc1
3
+ Version: 4.14.1
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -47,7 +47,7 @@ Requires-Dist: scipy
47
47
  Requires-Dist: sqlalchemy[asyncio]<3,>=2.0.4
48
48
  Requires-Dist: sqlean-py>=3.45.1
49
49
  Requires-Dist: starlette
50
- Requires-Dist: strawberry-graphql==0.235.0
50
+ Requires-Dist: strawberry-graphql==0.236.0
51
51
  Requires-Dist: tqdm
52
52
  Requires-Dist: typing-extensions>=4.5; python_version < '3.12'
53
53
  Requires-Dist: typing-extensions>=4.6; python_version >= '3.12'
@@ -64,11 +64,12 @@ Requires-Dist: opentelemetry-sdk; extra == 'container'
64
64
  Requires-Dist: opentelemetry-semantic-conventions; extra == 'container'
65
65
  Requires-Dist: prometheus-client; extra == 'container'
66
66
  Requires-Dist: py-grpc-prometheus; extra == 'container'
67
- Requires-Dist: strawberry-graphql[opentelemetry]==0.235.0; extra == 'container'
67
+ Requires-Dist: strawberry-graphql[opentelemetry]==0.236.0; extra == 'container'
68
68
  Requires-Dist: uvloop; (platform_system != 'Windows') and extra == 'container'
69
69
  Provides-Extra: dev
70
70
  Requires-Dist: anthropic; extra == 'dev'
71
71
  Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
72
+ Requires-Dist: asgi-lifespan; extra == 'dev'
72
73
  Requires-Dist: asyncpg; extra == 'dev'
73
74
  Requires-Dist: gcsfs; extra == 'dev'
74
75
  Requires-Dist: google-cloud-aiplatform>=1.3; extra == 'dev'
@@ -77,6 +78,7 @@ Requires-Dist: jupyter; extra == 'dev'
77
78
  Requires-Dist: langchain>=0.0.334; extra == 'dev'
78
79
  Requires-Dist: litellm>=1.0.3; extra == 'dev'
79
80
  Requires-Dist: llama-index>=0.10.3; extra == 'dev'
81
+ Requires-Dist: mypy==1.11.0; extra == 'dev'
80
82
  Requires-Dist: nbqa; extra == 'dev'
81
83
  Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
82
84
  Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
@@ -87,9 +89,9 @@ Requires-Dist: psycopg[binary]; extra == 'dev'
87
89
  Requires-Dist: pytest-asyncio; extra == 'dev'
88
90
  Requires-Dist: pytest-cov; extra == 'dev'
89
91
  Requires-Dist: pytest-postgresql; extra == 'dev'
90
- Requires-Dist: pytest==8.2.2; extra == 'dev'
91
- Requires-Dist: ruff==0.4.9; extra == 'dev'
92
- Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.235.0; extra == 'dev'
92
+ Requires-Dist: pytest==8.3.1; extra == 'dev'
93
+ Requires-Dist: ruff==0.5.4; extra == 'dev'
94
+ Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.236.0; extra == 'dev'
93
95
  Requires-Dist: tabulate; extra == 'dev'
94
96
  Requires-Dist: types-tabulate; extra == 'dev'
95
97
  Provides-Extra: evals
@@ -137,6 +139,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
137
139
 
138
140
  - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
139
141
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
142
+ - **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
143
+ - **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
140
144
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
141
145
 
142
146
  Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
@@ -31,6 +31,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
31
31
 
32
32
  - **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
33
33
  - **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
34
+ - **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
35
+ - **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
34
36
  - **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
35
37
 
36
38
  Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
@@ -31,7 +31,7 @@ dependencies = [
31
31
  "starlette",
32
32
  "uvicorn",
33
33
  "psutil",
34
- "strawberry-graphql==0.235.0", # need to pin version because we're monkey-patching
34
+ "strawberry-graphql==0.236.0", # need to pin version because we're monkey-patching
35
35
  "pyarrow",
36
36
  "typing-extensions>=4.5; python_version<'3.12'",
37
37
  # A minimum version of typing-extensions==4.6.0 is needed to avoid this issue on Python 3.12: https://github.com/Azure/azure-sdk-for-python/issues/33442#issuecomment-1847886784
@@ -69,19 +69,20 @@ dev = [
69
69
  "hatch",
70
70
  "jupyter",
71
71
  "nbqa",
72
- "ruff==0.4.9",
72
+ "ruff==0.5.4",
73
+ "mypy==1.11.0",
73
74
  "pandas>=1.0",
74
75
  "tabulate", # used by DataFrame.to_markdown()
75
76
  "types-tabulate",
76
77
  "pandas-stubs==2.2.2.240603; python_version>='3.9'",
77
78
  "pandas-stubs==2.0.3.230814; python_version<'3.9'",
78
- "pytest==8.2.2",
79
+ "pytest==8.3.1",
79
80
  "pytest-asyncio",
80
81
  "pytest-cov",
81
82
  "pytest-postgresql",
82
83
  "asyncpg",
83
84
  "psycopg[binary]",
84
- "strawberry-graphql[debug-server,opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
85
+ "strawberry-graphql[debug-server,opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
85
86
  "pre-commit",
86
87
  "arize[AutoEmbeddings, LLM_Evaluation]",
87
88
  "llama-index>=0.10.3",
@@ -90,6 +91,7 @@ dev = [
90
91
  "google-cloud-aiplatform>=1.3",
91
92
  "anthropic",
92
93
  "prometheus_client",
94
+ "asgi-lifespan",
93
95
  ]
94
96
  evals = []
95
97
  experimental = []
@@ -113,7 +115,7 @@ container = [
113
115
  "opentelemetry-instrumentation-sqlalchemy",
114
116
  "opentelemetry-instrumentation-grpc",
115
117
  "py-grpc-prometheus",
116
- "strawberry-graphql[opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
118
+ "strawberry-graphql[opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
117
119
  "uvloop; platform_system != 'Windows'",
118
120
  ]
119
121
 
@@ -146,7 +148,7 @@ dependencies = [
146
148
  "numpy",
147
149
  "pandas==2.2.2; python_version>='3.9'",
148
150
  "pandas==1.4.0; python_version<'3.9'",
149
- "pytest==8.2.2",
151
+ "pytest==8.3.1",
150
152
  "pytest-asyncio",
151
153
  "pytest-cov",
152
154
  "pytest-postgresql",
@@ -165,11 +167,12 @@ dependencies = [
165
167
  "respx", # For OpenAI testing
166
168
  "nest-asyncio", # for executor testing
167
169
  "astunparse; python_version<'3.9'", # `ast.unparse(...)` is only available starting with Python 3.9
170
+ "asgi-lifespan",
168
171
  ]
169
172
 
170
173
  [tool.hatch.envs.type]
171
174
  dependencies = [
172
- "mypy==1.10.0",
175
+ "mypy==1.11.0",
173
176
  "tenacity",
174
177
  "pandas>=1.0",
175
178
  "pandas-stubs==2.0.3.230814",
@@ -191,7 +194,7 @@ dependencies = [
191
194
  "opentelemetry-instrumentation-sqlalchemy",
192
195
  "opentelemetry-instrumentation-grpc",
193
196
  "py-grpc-prometheus",
194
- "strawberry-graphql[opentelemetry]==0.235.0", # need to pin version because we're monkey-patching
197
+ "strawberry-graphql[opentelemetry]==0.236.0", # need to pin version because we're monkey-patching
195
198
  "requests", # this is needed to type-check third-party packages
196
199
  ]
197
200
 
@@ -201,7 +204,7 @@ python = ["3.8", "3.9", "3.12"]
201
204
  [tool.hatch.envs.style]
202
205
  detached = true
203
206
  dependencies = [
204
- "ruff==0.4.9",
207
+ "ruff==0.5.4",
205
208
  ]
206
209
 
207
210
  [[tool.hatch.envs.style.matrix]]
@@ -298,7 +301,7 @@ check = [
298
301
 
299
302
  [tool.hatch.envs.gql]
300
303
  dependencies = [
301
- "strawberry-graphql[cli]==0.235.0", # need to pin version because we're monkey-patching
304
+ "strawberry-graphql[cli]==0.236.0", # need to pin version because we're monkey-patching
302
305
  "requests",
303
306
  ]
304
307
 
@@ -105,8 +105,10 @@ class BulkInserter:
105
105
  )
106
106
 
107
107
  async def __aexit__(self, *args: Any) -> None:
108
- self._operations = None
109
108
  self._running = False
109
+ if self._task:
110
+ self._task.cancel()
111
+ self._task = None
110
112
 
111
113
  def _enqueue_operation(self, operation: DataManipulation) -> None:
112
114
  cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
@@ -90,11 +90,15 @@ class Evaluator(ABC):
90
90
  if super_cls in (LLMEvaluator, Evaluator):
91
91
  break
92
92
  if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
93
+ if isinstance(evaluate, classmethod):
94
+ evaluate = evaluate.__func__
93
95
  assert callable(evaluate), "`evaluate()` method should be callable"
94
96
  # need to remove the first param, i.e. `self`
95
97
  _validate_sig(functools.partial(evaluate, None), "evaluate")
96
98
  return
97
99
  if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
100
+ if isinstance(async_evaluate, classmethod):
101
+ async_evaluate = async_evaluate.__func__
98
102
  assert callable(async_evaluate), "`async_evaluate()` method should be callable"
99
103
  # need to remove the first param, i.e. `self`
100
104
  _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
@@ -9,6 +9,19 @@ from phoenix.experiments.types import EvaluationResult, TaskOutput
9
9
 
10
10
 
11
11
  class JSONParsable(CodeEvaluator):
12
+ """
13
+ An evaluator that checks if the output of an experiment run is a JSON-parsable string.
14
+
15
+ Example:
16
+
17
+ .. code-block:: python
18
+ from phoenix.experiments import run_experiment
19
+ from phoenix.experiments.evaluators import JSONParsable
20
+
21
+ run_experiment(dataset, task, evaluators=[JSONParsable])
22
+ """
23
+
24
+ @classmethod
12
25
  def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
13
26
  assert isinstance(output, str), "Experiment run output must be a string"
14
27
  try:
@@ -22,6 +35,22 @@ class JSONParsable(CodeEvaluator):
22
35
 
23
36
 
24
37
  class ContainsKeyword(CodeEvaluator):
38
+ """
39
+ An evaluator that checks if a keyword is present in the output of an experiment run.
40
+
41
+ Args:
42
+ keyword (str): The keyword to search for in the output.
43
+ name (str, optional): An optional name for the evaluator. Defaults to "Contains(<keyword>)".
44
+
45
+ Example:
46
+
47
+ .. code-block:: python
48
+ from phoenix.experiments import run_experiment
49
+ from phoenix.experiments.evaluators import ContainsKeyword
50
+
51
+ run_experiment(dataset, task, evaluators=[ContainsKeyword("foo")])
52
+ """
53
+
25
54
  def __init__(self, keyword: str, name: Optional[str] = None) -> None:
26
55
  self.keyword = keyword
27
56
  self._name = name or f"Contains({repr(keyword)})"
@@ -39,6 +68,23 @@ class ContainsKeyword(CodeEvaluator):
39
68
 
40
69
 
41
70
  class ContainsAnyKeyword(CodeEvaluator):
71
+ """
72
+ An evaluator that checks if any of the keywords are present in the output of an experiment run.
73
+
74
+ Args:
75
+ keywords (List[str]): The keywords to search for in the output.
76
+ name (str, optional): An optional name for the evaluator. Defaults to
77
+ "ContainsAny(<keywords>)".
78
+
79
+ Example:
80
+
81
+ .. code-block:: python
82
+ from phoenix.experiments import run_experiment
83
+ from phoenix.experiments.evaluators import ContainsAnyKeyword
84
+
85
+ run_experiment(dataset, task, evaluators=[ContainsAnyKeyword(["foo", "bar"])])
86
+ """
87
+
42
88
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
43
89
  self.keywords = keywords
44
90
  self._name = name or f"ContainsAny({keywords})"
@@ -57,6 +103,23 @@ class ContainsAnyKeyword(CodeEvaluator):
57
103
 
58
104
 
59
105
  class ContainsAllKeywords(CodeEvaluator):
106
+ """
107
+ An evaluator that checks if all of the keywords are present in the output of an experiment run.
108
+
109
+ Args:
110
+ keywords (List[str]): The keywords to search for in the output.
111
+ name (str, optional): An optional name for the evaluator. Defaults to
112
+ "ContainsAll(<keywords>)".
113
+
114
+ Example:
115
+ .. code-block:: python
116
+
117
+ from phoenix.experiments import run_experiment
118
+ from phoenix.experiments.evaluators import ContainsAllKeywords
119
+
120
+ run_experiment(dataset, task, evaluators=[ContainsAllKeywords(["foo", "bar"])])
121
+ """
122
+
60
123
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
61
124
  self.keywords = keywords
62
125
  self._name = name or f"ContainsAll({keywords})"
@@ -77,6 +140,23 @@ class ContainsAllKeywords(CodeEvaluator):
77
140
 
78
141
 
79
142
  class MatchesRegex(CodeEvaluator):
143
+ r"""
144
+ An experiment evaluator that checks if the output of an experiment run matches a regex pattern.
145
+
146
+ Args:
147
+ pattern (Union[str, re.Pattern[str]]): The regex pattern to match the output against.
148
+ name (str, optional): An optional name for the evaluator. Defaults to "matches_({pattern})".
149
+
150
+ Example:
151
+ .. code-block:: python
152
+
153
+ from phoenix.experiments import run_experiment
154
+ from phoenix.experiments.evaluators import MatchesRegex
155
+
156
+ phone_number_evaluator = MatchesRegex(r"\d{3}-\d{3}-\d{4}", name="valid-phone-number")
157
+ run_experiment(dataset, task, evaluators=[phone_number_evaluator])
158
+ """
159
+
80
160
  def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
81
161
  if isinstance(pattern, str):
82
162
  pattern = re.compile(pattern)
@@ -18,6 +18,31 @@ from phoenix.experiments.types import (
18
18
 
19
19
 
20
20
  class LLMCriteriaEvaluator(LLMEvaluator):
21
+ """
22
+ An experiment evaluator that uses an LLM to evaluate whether the text meets a custom criteria.
23
+
24
+ This evaluator uses the chain-of-thought technique to perform a binary evaluation of text based
25
+ on a custom criteria and description. When used as an experiment evaluator,
26
+ `LLMCriteriaEvaluator` will return a score of 1.0 if the text meets the criteria and a score of
27
+ 0.0 if not. The explanation produced by the chain-of-thought technique will be included in the
28
+ experiment evaluation as well.
29
+
30
+ Example criteria and descriptions:
31
+ - "thoughtfulness" - "shows careful consideration and fair judgement"
32
+ - "clarity" - "is easy to understand and follow"
33
+ - "professionalism" - "is respectful and appropriate for a formal setting"
34
+
35
+ Args:
36
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
37
+ the `phoenix.evals` module.
38
+ criteria: The criteria to evaluate the text against, the criteria should be able to be used
39
+ as a noun in a sentence.
40
+ description (str): A description of the criteria, used to clarify instructions to the LLM.
41
+ The description should complete this sentence: "{criteria} means the text
42
+ {description}".
43
+ name (str): The name of the evaluator
44
+ """
45
+
21
46
  _base_template = (
22
47
  "Determine if the following text is {criteria}. {description}"
23
48
  "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -117,6 +142,14 @@ ConcisenessEvaluator = criteria_evaluator_factory(
117
142
  description="is just a few sentences and easy to follow",
118
143
  default_name="Conciseness",
119
144
  )
145
+ """
146
+ An experiment evaluator that uses an LLM to evaluate whether the text is concise.
147
+
148
+ Args:
149
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
150
+ the `phoenix.evals` module.
151
+ name (str, optional): The name of the evaluator, defaults to "Conciseness".
152
+ """
120
153
 
121
154
 
122
155
  HelpfulnessEvaluator = criteria_evaluator_factory(
@@ -125,6 +158,14 @@ HelpfulnessEvaluator = criteria_evaluator_factory(
125
158
  description="provides useful information",
126
159
  default_name="Helpfulness",
127
160
  )
161
+ """
162
+ An experiment evaluator that uses an LLM to evaluate whether the text is helpful.
163
+
164
+ Args:
165
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
166
+ the `phoenix.evals` module.
167
+ name (str, optional): The name of the evaluator, defaults to "Helpfulness".
168
+ """
128
169
 
129
170
 
130
171
  CoherenceEvaluator = criteria_evaluator_factory(
@@ -133,6 +174,14 @@ CoherenceEvaluator = criteria_evaluator_factory(
133
174
  description="is coherent, well-structured, and logically sound",
134
175
  default_name="Coherence",
135
176
  )
177
+ """
178
+ An experiment evaluator that uses an LLM to evaluate whether the text is coherent.
179
+
180
+ Args:
181
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
182
+ the `phoenix.evals` module.
183
+ name (str, optional): The name of the evaluator, defaults to "Coherence".
184
+ """
136
185
 
137
186
 
138
187
  def _parse_label_from_explanation(raw_string: str) -> str:
@@ -149,6 +198,33 @@ def _parse_label_from_explanation(raw_string: str) -> str:
149
198
 
150
199
 
151
200
  class RelevanceEvaluator(LLMEvaluator):
201
+ """
202
+ An experiment evaluator that uses an LLM to evaluate whether a response is relevant to a query.
203
+
204
+ This evaluator uses the chain-of-thought technique to perform a binary evaluation of whether
205
+ the output "response" of an experiment is relevant to its input "query". When used as an
206
+ experiment evaluator, `RelevanceEvaluator` will return a score of 1.0 if the response is
207
+ relevant to the query and a score of 0.0 if not. The explanation produced by the
208
+ chain-of-thought technique will be included in the experiment evaluation as well.
209
+
210
+ Optionally, you can provide custom functions to extract the query and response from the input
211
+ and output of the experiment task. By default, the evaluator will use the dataset example as
212
+ the input and the output of the experiment task as the response.
213
+
214
+ Args:
215
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
216
+ the `phoenix.evals` module.
217
+ get_query (callable, optional): A function that extracts the query from the input of the
218
+ experiment task. The function should take the input and metadata of the dataset example
219
+ and return a string. By default, the function will return the string representation of
220
+ the input.
221
+ get_response (callable, optional): A function that extracts the response from the output of
222
+ the experiment task. The function should take the output and metadata of the experiment
223
+ task and return a string. By default, the function will return the string representation
224
+ of the output.
225
+ name (str, optional): The name of the evaluator. Defaults to "Relevance".
226
+ """
227
+
152
228
  template = (
153
229
  "Determine if the following response is relevant to the query. In this context, "
154
230
  "'relevance' means that the response directly addresses the core question or topic of the "
@@ -174,7 +250,7 @@ class RelevanceEvaluator(LLMEvaluator):
174
250
  model: LLMBaseModel,
175
251
  get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
176
252
  get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
177
- name: str = "RelevanceEvaluator",
253
+ name: str = "Relevance",
178
254
  ):
179
255
  self.model = model
180
256
  self._name = name
@@ -1,6 +1,5 @@
1
1
  import functools
2
2
  import inspect
3
- from itertools import chain, islice, repeat
4
3
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
5
4
 
6
5
  from phoenix.experiments.types import (
@@ -75,6 +74,72 @@ def create_evaluator(
75
74
  name: Optional[str] = None,
76
75
  scorer: Optional[Callable[[Any], EvaluationResult]] = None,
77
76
  ) -> Callable[[Callable[..., Any]], "Evaluator"]:
77
+ """
78
+ A decorator that configures a sync or async function to be used as an experiment evaluator.
79
+
80
+ If the `evaluator` is a function of one argument then that argument will be
81
+ bound to the `output` of an experiment task. Alternatively, the `evaluator` can be a function
82
+ of any combination of specific argument names that will be bound to special values:
83
+ `input`: The input field of the dataset example
84
+ `output`: The output of an experiment task
85
+ `expected`: The expected or reference output of the dataset example
86
+ `reference`: An alias for `expected`
87
+ `metadata`: Metadata associated with the dataset example
88
+
89
+ Args:
90
+ kind (str | AnnotatorKind): Broadly indicates how the evaluator scores an experiment run.
91
+ Valid kinds are: "CODE", "LLM". Defaults to "CODE".
92
+ name (str, optional): The name of the evaluator. If not provided, the name of the function
93
+ will be used.
94
+ scorer (callable, optional): An optional function that converts the output of the wrapped
95
+ function into an `EvaluationResult`. This allows configuring the evaluation
96
+ payload by setting a label, score and explanation. By default, numeric outputs will
97
+ be recorded as scores, boolean outputs will be recorded as scores and labels, and
98
+ string outputs will be recorded as labels. If the output is a 2-tuple, the first item
99
+ will be recorded as the score and the second item will recorded as the explanation.
100
+
101
+ Examples:
102
+ Configuring an evaluator that returns a boolean
103
+
104
+ .. code-block:: python
105
+ @create_evaluator(kind="CODE", name="exact-match)
106
+ def match(output: str, expected: str) -> bool:
107
+ return output == expected
108
+
109
+ Configuring an evaluator that returns a label
110
+
111
+ .. code-block:: python
112
+ client = openai.Client()
113
+
114
+ @create_evaluator(kind="LLM")
115
+ def label(output: str) -> str:
116
+ res = client.chat.completions.create(
117
+ model = "gpt-4",
118
+ messages = [
119
+ {
120
+ "role": "user",
121
+ "content": (
122
+ "in one word, characterize the sentiment of the following customer "
123
+ f"request: {output}"
124
+ )
125
+ },
126
+ ],
127
+ )
128
+ label = res.choices[0].message.content
129
+ return label
130
+
131
+ Configuring an evaluator that returns a score and explanation
132
+
133
+ .. code-block:: python
134
+ from textdistance import levenshtein
135
+
136
+ @create_evaluator(kind="CODE", name="levenshtein-distance")
137
+ def ld(output: str, expected: str) -> Tuple[float, str]:
138
+ return (
139
+ levenshtein(output, expected),
140
+ f"Levenshtein distance between {output} and {expected}"
141
+ )
142
+ """
78
143
  if scorer is None:
79
144
  scorer = _default_eval_scorer
80
145
 
@@ -163,24 +228,8 @@ def _default_eval_scorer(result: Any) -> EvaluationResult:
163
228
  return EvaluationResult(score=float(result))
164
229
  if isinstance(result, str):
165
230
  return EvaluationResult(label=result)
166
- if isinstance(result, (tuple, list)) and 0 < len(result) <= 3:
167
- # Possible interpretations are:
168
- # - 3-tuple: (Score, Label, Explanation)
169
- # - 2-tuple: (Score, Explanation) or (Label, Explanation)
170
- # - 1-tuple: (Score, ) or (Label, )
171
- # Note that (Score, Label) conflicts with (Score, Explanation) and we
172
- # pick the latter because it's probably more prevalent. To get
173
- # (Score, Label), use a 3-tuple instead, i.e. (Score, Label, None).
174
- a, b, c = islice(chain(result, repeat(None)), 3)
175
- score, label, explanation = None, a, b
176
- if hasattr(a, "__float__"):
177
- try:
178
- score = float(a)
179
- except ValueError:
180
- pass
181
- else:
182
- label, explanation = (None, b) if len(result) < 3 else (b, c)
183
- return EvaluationResult(score=score, label=label, explanation=explanation)
184
- if result is None:
185
- return EvaluationResult(score=0)
231
+ if isinstance(result, (tuple, list)) and len(result) == 2:
232
+ # If the result is a 2-tuple, the first item will be recorded as the score
233
+ # and the second item will recorded as the explanation.
234
+ return EvaluationResult(score=float(result[0]), explanation=str(result[1]))
186
235
  raise ValueError(f"Unsupported evaluation result type: {type(result)}")
@@ -120,21 +120,23 @@ def run_experiment(
120
120
  output. If the `task` is a function of one argument then that argument will be bound to the
121
121
  `input` field of the dataset example. Alternatively, the `task` can be a function of any
122
122
  combination of specific argument names that will be bound to special values:
123
- `input`: The input field of the dataset example
124
- `expected`: The expected or reference output of the dataset example
125
- `reference`: An alias for `expected`
126
- `metadata`: Metadata associated with the dataset example
127
- `example`: The dataset `Example` object with all associated fields
123
+
124
+ - `input`: The input field of the dataset example
125
+ - `expected`: The expected or reference output of the dataset example
126
+ - `reference`: An alias for `expected`
127
+ - `metadata`: Metadata associated with the dataset example
128
+ - `example`: The dataset `Example` object with all associated fields
128
129
 
129
130
  An `evaluator` is either a synchronous or asynchronous function that returns either a boolean
130
131
  or numeric "score". If the `evaluator` is a function of one argument then that argument will be
131
132
  bound to the `output` of the task. Alternatively, the `evaluator` can be a function of any
132
133
  combination of specific argument names that will be bound to special values:
133
- `input`: The input field of the dataset example
134
- `output`: The output of the task
135
- `expected`: The expected or reference output of the dataset example
136
- `reference`: An alias for `expected`
137
- `metadata`: Metadata associated with the dataset example
134
+
135
+ - `input`: The input field of the dataset example
136
+ - `output`: The output of the task
137
+ - `expected`: The expected or reference output of the dataset example
138
+ - `reference`: An alias for `expected`
139
+ - `metadata`: Metadata associated with the dataset example
138
140
 
139
141
  Phoenix also provides pre-built evaluators in the `phoenix.experiments.evaluators` module.
140
142
 
@@ -366,10 +368,9 @@ def run_experiment(
366
368
  return exp_run
367
369
 
368
370
  _errors: Tuple[Type[BaseException], ...]
369
- if not hasattr(rate_limit_errors, "__iter__"):
371
+ if not isinstance(rate_limit_errors, Sequence):
370
372
  _errors = (rate_limit_errors,) if rate_limit_errors is not None else ()
371
373
  else:
372
- rate_limit_errors = cast(Sequence[Type[BaseException]], rate_limit_errors)
373
374
  _errors = tuple(filter(None, rate_limit_errors))
374
375
  rate_limiters = [RateLimiter(rate_limit_error=rate_limit_error) for rate_limit_error in _errors]
375
376
 
@@ -606,10 +607,9 @@ def evaluate_experiment(
606
607
  return eval_run
607
608
 
608
609
  _errors: Tuple[Type[BaseException], ...]
609
- if not hasattr(rate_limit_errors, "__iter__"):
610
+ if not isinstance(rate_limit_errors, Sequence):
610
611
  _errors = (rate_limit_errors,) if rate_limit_errors is not None else ()
611
612
  else:
612
- rate_limit_errors = cast(Sequence[Type[BaseException]], rate_limit_errors)
613
613
  _errors = tuple(filter(None, rate_limit_errors))
614
614
  rate_limiters = [RateLimiter(rate_limit_error=rate_limit_error) for rate_limit_error in _errors]
615
615
 
@@ -1,8 +1,4 @@
1
- from typing import (
2
- AsyncContextManager,
3
- Callable,
4
- List,
5
- )
1
+ from typing import AsyncContextManager, Callable, List, Optional
6
2
 
7
3
  from sqlalchemy import func, select
8
4
  from sqlalchemy.ext.asyncio import AsyncSession
@@ -12,7 +8,7 @@ from typing_extensions import TypeAlias
12
8
  from phoenix.db import models
13
9
 
14
10
  ExperimentID: TypeAlias = int
15
- RunLatency: TypeAlias = float
11
+ RunLatency: TypeAlias = Optional[float]
16
12
  Key: TypeAlias = ExperimentID
17
13
  Result: TypeAlias = RunLatency
18
14
 
@@ -27,26 +23,30 @@ class AverageExperimentRunLatencyDataLoader(DataLoader[Key, Result]):
27
23
 
28
24
  async def _load_fn(self, keys: List[Key]) -> List[Result]:
29
25
  experiment_ids = keys
26
+ resolved_experiment_ids = (
27
+ select(models.Experiment.id)
28
+ .where(models.Experiment.id.in_(set(experiment_ids)))
29
+ .subquery()
30
+ )
31
+ query = (
32
+ select(
33
+ resolved_experiment_ids.c.id,
34
+ func.avg(
35
+ func.extract("epoch", models.ExperimentRun.end_time)
36
+ - func.extract("epoch", models.ExperimentRun.start_time)
37
+ ),
38
+ )
39
+ .outerjoin_from(
40
+ from_=resolved_experiment_ids,
41
+ target=models.ExperimentRun,
42
+ onclause=resolved_experiment_ids.c.id == models.ExperimentRun.experiment_id,
43
+ )
44
+ .group_by(resolved_experiment_ids.c.id)
45
+ )
30
46
  async with self._db() as session:
31
47
  avg_latencies = {
32
48
  experiment_id: avg_latency
33
- async for experiment_id, avg_latency in await session.stream(
34
- select(
35
- models.ExperimentRun.experiment_id,
36
- func.avg(
37
- func.extract(
38
- "epoch",
39
- models.ExperimentRun.end_time,
40
- )
41
- - func.extract(
42
- "epoch",
43
- models.ExperimentRun.start_time,
44
- )
45
- ),
46
- )
47
- .where(models.ExperimentRun.experiment_id.in_(set(experiment_ids)))
48
- .group_by(models.ExperimentRun.experiment_id)
49
- )
49
+ async for experiment_id, avg_latency in await session.stream(query)
50
50
  }
51
51
  return [
52
52
  avg_latencies.get(experiment_id, ValueError(f"Unknown experiment: {experiment_id}"))