arize-phoenix 4.4.4rc4__tar.gz → 4.4.4rc5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (263) hide show
  1. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/PKG-INFO +2 -2
  2. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/pyproject.toml +2 -1
  3. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/evaluators/code_evaluators.py +25 -53
  4. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/evaluators/llm_evaluators.py +63 -32
  5. arize_phoenix-4.4.4rc5/src/phoenix/datasets/evaluators/utils.py +292 -0
  6. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/experiments.py +147 -82
  7. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/tracing.py +19 -0
  8. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/types.py +18 -52
  9. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/insertion/dataset.py +19 -16
  10. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
  11. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/models.py +8 -3
  12. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/context.py +2 -0
  13. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/__init__.py +2 -0
  14. arize_phoenix-4.4.4rc5/src/phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  15. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/helpers/dataset_helpers.py +8 -7
  16. arize_phoenix-4.4.4rc5/src/phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  17. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/mutations/project_mutations.py +9 -4
  18. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/datasets.py +146 -42
  19. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/experiment_evaluations.py +1 -0
  20. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/experiment_runs.py +2 -2
  21. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Experiment.py +5 -0
  22. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ExperimentRun.py +1 -1
  23. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
  24. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/app.py +2 -0
  25. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/index.js +610 -564
  26. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/session/client.py +124 -2
  27. arize_phoenix-4.4.4rc5/src/phoenix/version.py +1 -0
  28. arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/_utils.py +0 -13
  29. arize_phoenix-4.4.4rc4/src/phoenix/version.py +0 -1
  30. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/.gitignore +0 -0
  31. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/IP_NOTICE +0 -0
  32. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/LICENSE +0 -0
  33. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/README.md +0 -0
  34. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
  35. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
  36. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
  37. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
  38. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
  39. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
  40. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
  41. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/frontend/requirements.txt +0 -0
  42. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
  43. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/__init__.py +0 -0
  44. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/config.py +0 -0
  45. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/core/__init__.py +0 -0
  46. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/core/embedding_dimension.py +0 -0
  47. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/core/model.py +0 -0
  48. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/core/model_schema.py +0 -0
  49. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/core/model_schema_adapter.py +0 -0
  50. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/__init__.py +0 -0
  51. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datasets/evaluators/__init__.py +0 -0
  52. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/datetime_utils.py +0 -0
  53. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/README.md +0 -0
  54. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/__init__.py +0 -0
  55. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/alembic.ini +0 -0
  56. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/bulk_inserter.py +0 -0
  57. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/engines.py +0 -0
  58. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/helpers.py +0 -0
  59. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/insertion/__init__.py +0 -0
  60. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/insertion/evaluation.py +0 -0
  61. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/insertion/helpers.py +0 -0
  62. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/insertion/span.py +0 -0
  63. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrate.py +0 -0
  64. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrations/__init__.py +0 -0
  65. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrations/env.py +0 -0
  66. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrations/script.py.mako +0 -0
  67. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrations/types.py +0 -0
  68. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
  69. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/exceptions.py +0 -0
  70. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/inferences/__init__.py +0 -0
  71. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/inferences/errors.py +0 -0
  72. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/inferences/fixtures.py +0 -0
  73. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/inferences/inferences.py +0 -0
  74. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/inferences/schema.py +0 -0
  75. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/inferences/validation.py +0 -0
  76. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/README.md +0 -0
  77. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/__init__.py +0 -0
  78. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/binning.py +0 -0
  79. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/metrics.py +0 -0
  80. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/mixins.py +0 -0
  81. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  82. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/timeseries.py +0 -0
  83. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/metrics/wrappers.py +0 -0
  84. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/pointcloud/__init__.py +0 -0
  85. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/pointcloud/clustering.py +0 -0
  86. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/pointcloud/pointcloud.py +0 -0
  87. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/pointcloud/projectors.py +0 -0
  88. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  89. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/py.typed +0 -0
  90. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/__init__.py +0 -0
  91. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/__init__.py +0 -0
  92. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
  93. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
  94. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -0
  95. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +0 -0
  96. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +0 -0
  97. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/document_evaluations.py +0 -0
  98. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +0 -0
  99. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +0 -0
  100. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -0
  101. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -0
  102. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -0
  103. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +0 -0
  104. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +0 -0
  105. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/project_by_name.py +0 -0
  106. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/record_counts.py +0 -0
  107. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/span_descendants.py +0 -0
  108. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/span_evaluations.py +0 -0
  109. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/span_projects.py +0 -0
  110. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/token_counts.py +0 -0
  111. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/trace_evaluations.py +0 -0
  112. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/dataloaders/trace_row_ids.py +0 -0
  113. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/helpers/__init__.py +0 -0
  114. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
  115. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
  116. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  117. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  118. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
  119. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  120. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
  121. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
  122. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
  123. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
  124. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
  125. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
  126. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  127. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  128. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  129. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
  130. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
  131. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  132. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  133. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  134. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/input_types/__init__.py +0 -0
  135. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/interceptor.py +0 -0
  136. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/mutations/__init__.py +0 -0
  137. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/mutations/auth.py +0 -0
  138. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
  139. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
  140. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
  141. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/openapi/__init__.py +0 -0
  142. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/openapi/main.py +0 -0
  143. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/openapi/schema.py +0 -0
  144. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/queries.py +0 -0
  145. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/__init__.py +0 -0
  146. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/utils.py +0 -0
  147. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/__init__.py +0 -0
  148. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/dataset_examples.py +0 -0
  149. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/evaluations.py +0 -0
  150. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/experiments.py +0 -0
  151. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/spans.py +0 -0
  152. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/routers/v1/traces.py +0 -0
  153. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/schema.py +0 -0
  154. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
  155. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Cluster.py +0 -0
  156. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
  157. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  158. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Dataset.py +0 -0
  159. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DatasetExample.py +0 -0
  160. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
  161. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  162. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
  163. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Dimension.py +0 -0
  164. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  165. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  166. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DimensionType.py +0 -0
  167. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  168. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  169. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  170. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  171. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  172. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Evaluation.py +0 -0
  173. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  174. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Event.py +0 -0
  175. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  176. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
  177. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
  178. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
  179. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  180. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Functionality.py +0 -0
  181. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Inferences.py +0 -0
  182. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/InferencesRole.py +0 -0
  183. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/MimeType.py +0 -0
  184. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Model.py +0 -0
  185. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/NumericRange.py +0 -0
  186. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  187. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Project.py +0 -0
  188. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  189. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Retrieval.py +0 -0
  190. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  191. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Segments.py +0 -0
  192. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/SortDir.py +0 -0
  193. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Span.py +0 -0
  194. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  195. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/Trace.py +0 -0
  196. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  197. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  198. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  199. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/__init__.py +0 -0
  200. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/node.py +0 -0
  201. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/api/types/pagination.py +0 -0
  202. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/grpc_server.py +0 -0
  203. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/main.py +0 -0
  204. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/openapi/__init__.py +0 -0
  205. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/openapi/docs.py +0 -0
  206. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/prometheus.py +0 -0
  207. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  208. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  209. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  210. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  211. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  212. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  213. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  214. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  215. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/favicon.ico +0 -0
  216. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/index.css +0 -0
  217. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/static/modernizr.js +0 -0
  218. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/telemetry.py +0 -0
  219. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/templates/__init__.py +0 -0
  220. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/templates/index.html +0 -0
  221. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/server/thread_server.py +0 -0
  222. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/services.py +0 -0
  223. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/session/__init__.py +0 -0
  224. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/session/data_extractor.py +0 -0
  225. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/session/evaluation.py +0 -0
  226. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/session/session.py +0 -0
  227. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/settings.py +0 -0
  228. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/__init__.py +0 -0
  229. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/attributes.py +0 -0
  230. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/dsl/README.md +0 -0
  231. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/dsl/__init__.py +0 -0
  232. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/dsl/filter.py +0 -0
  233. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/dsl/helpers.py +0 -0
  234. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/dsl/query.py +0 -0
  235. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/errors.py +0 -0
  236. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/evaluation_conventions.py +0 -0
  237. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/exporter.py +0 -0
  238. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/fixtures.py +0 -0
  239. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/langchain/__init__.py +0 -0
  240. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  241. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/llama_index/__init__.py +0 -0
  242. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/llama_index/callback.py +0 -0
  243. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/openai/__init__.py +0 -0
  244. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/openai/instrumentor.py +0 -0
  245. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/otel.py +0 -0
  246. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/projects.py +0 -0
  247. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/schemas.py +0 -0
  248. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/span_evaluations.py +0 -0
  249. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/span_json_decoder.py +0 -0
  250. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/span_json_encoder.py +0 -0
  251. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/trace_dataset.py +0 -0
  252. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/utils.py +0 -0
  253. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/v1/__init__.py +0 -0
  254. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  255. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  256. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/__init__.py +0 -0
  257. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/deprecation.py +0 -0
  258. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/error_handling.py +0 -0
  259. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/json.py +0 -0
  260. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/logging.py +0 -0
  261. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/project.py +0 -0
  262. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/re.py +0 -0
  263. {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc5}/src/phoenix/utilities/span_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.4.4rc4
3
+ Version: 4.4.4rc5
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -31,7 +31,7 @@ Requires-Dist: openinference-instrumentation
31
31
  Requires-Dist: openinference-instrumentation-langchain>=0.1.12
32
32
  Requires-Dist: openinference-instrumentation-llama-index>=1.2.0
33
33
  Requires-Dist: openinference-instrumentation-openai>=0.1.4
34
- Requires-Dist: openinference-semantic-conventions>=0.1.5
34
+ Requires-Dist: openinference-semantic-conventions>=0.1.9
35
35
  Requires-Dist: opentelemetry-exporter-otlp
36
36
  Requires-Dist: opentelemetry-proto>=1.12.0
37
37
  Requires-Dist: opentelemetry-sdk
@@ -46,7 +46,7 @@ dependencies = [
46
46
  "opentelemetry-proto>=1.12.0", # needed to avoid this issue: https://github.com/Arize-ai/phoenix/issues/2695
47
47
  "opentelemetry-exporter-otlp",
48
48
  "opentelemetry-semantic-conventions",
49
- "openinference-semantic-conventions>=0.1.5",
49
+ "openinference-semantic-conventions>=0.1.9",
50
50
  "openinference-instrumentation",
51
51
  "openinference-instrumentation-langchain>=0.1.12",
52
52
  "openinference-instrumentation-llama-index>=1.2.0",
@@ -206,6 +206,7 @@ dependencies = [
206
206
  [tool.hatch.envs.docs]
207
207
  detached = true
208
208
  dependencies = [
209
+ "pyment",
209
210
  "interrogate",
210
211
  ]
211
212
 
@@ -2,19 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import re
5
- from typing import TYPE_CHECKING, List, Optional, Union
5
+ from typing import Any, List, Optional, Union
6
6
 
7
- from phoenix.datasets.evaluators._utils import _unwrap_json
8
- from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
7
+ from phoenix.datasets.evaluators.utils import Evaluator
8
+ from phoenix.datasets.types import EvaluationResult, TaskOutput
9
9
 
10
10
 
11
- class JSONParsable:
12
- annotator_kind = "CODE"
13
- name = "JSONParsable"
14
-
15
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
16
- assert exp_run.output is not None
17
- output = _unwrap_json(exp_run.output.result)
11
+ class JSONParsable(Evaluator):
12
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
18
13
  assert isinstance(output, str), "Experiment run output must be a string"
19
14
  try:
20
15
  json.loads(output)
@@ -26,18 +21,14 @@ class JSONParsable:
26
21
  )
27
22
 
28
23
 
29
- class ContainsKeyword:
30
- annotator_kind = "CODE"
31
-
24
+ class ContainsKeyword(Evaluator):
32
25
  def __init__(self, keyword: str, name: Optional[str] = None) -> None:
33
26
  self.keyword = keyword
34
- self.name = name or f"Contains({repr(keyword)})"
27
+ self._name = name or f"Contains({repr(keyword)})"
35
28
 
36
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
37
- assert exp_run.output is not None
38
- result = _unwrap_json(exp_run.output.result)
39
- assert isinstance(result, str), "Experiment run output must be a string"
40
- found = self.keyword in result
29
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
30
+ assert isinstance(output, str), "Experiment run output must be a string"
31
+ found = self.keyword in output
41
32
  return EvaluationResult(
42
33
  score=float(found),
43
34
  explanation=(
@@ -47,18 +38,14 @@ class ContainsKeyword:
47
38
  )
48
39
 
49
40
 
50
- class ContainsAnyKeyword:
51
- annotator_kind = "CODE"
52
-
41
+ class ContainsAnyKeyword(Evaluator):
53
42
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
54
43
  self.keywords = keywords
55
- self.name = name or f"ContainsAny({keywords})"
44
+ self._name = name or f"ContainsAny({keywords})"
56
45
 
57
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
58
- assert exp_run.output is not None
59
- result = _unwrap_json(exp_run.output.result)
60
- assert isinstance(result, str), "Experiment run output must be a string"
61
- found = [keyword for keyword in self.keywords if keyword in result]
46
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
47
+ assert isinstance(output, str), "Experiment run output must be a string"
48
+ found = [keyword for keyword in self.keywords if keyword in output]
62
49
  if found:
63
50
  explanation = f"the keywords {found} were found in the output"
64
51
  else:
@@ -69,18 +56,14 @@ class ContainsAnyKeyword:
69
56
  )
70
57
 
71
58
 
72
- class ContainsAllKeywords:
73
- annotator_kind = "CODE"
74
-
59
+ class ContainsAllKeywords(Evaluator):
75
60
  def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
76
61
  self.keywords = keywords
77
- self.name = name or f"ContainsAll({keywords})"
62
+ self._name = name or f"ContainsAll({keywords})"
78
63
 
79
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
80
- assert exp_run.output is not None
81
- result = _unwrap_json(exp_run.output.result)
82
- assert isinstance(result, str), "Experiment run output must be a string"
83
- not_found = [keyword for keyword in self.keywords if keyword not in result]
64
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
65
+ assert isinstance(output, str), "Experiment run output must be a string"
66
+ not_found = [keyword for keyword in self.keywords if keyword not in output]
84
67
  if not_found:
85
68
  contains_all = False
86
69
  explanation = f"the keywords {not_found} were not found in the output"
@@ -93,21 +76,17 @@ class ContainsAllKeywords:
93
76
  )
94
77
 
95
78
 
96
- class MatchesRegex:
97
- annotator_kind = "CODE"
98
-
79
+ class MatchesRegex(Evaluator):
99
80
  def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
100
81
  if isinstance(pattern, str):
101
82
  pattern = re.compile(pattern)
102
83
  self.pattern = pattern
103
84
  assert isinstance(pattern, re.Pattern)
104
- self.name = name or f"matches_({pattern})"
85
+ self._name = name or f"matches_({pattern})"
105
86
 
106
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
107
- assert exp_run.output is not None
108
- result = _unwrap_json(exp_run.output.result)
109
- assert isinstance(result, str), "Experiment run output must be a string"
110
- matches = self.pattern.findall(result)
87
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
88
+ assert isinstance(output, str), "Experiment run output must be a string"
89
+ matches = self.pattern.findall(output)
111
90
  if matches:
112
91
  explanation = (
113
92
  f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
@@ -118,10 +97,3 @@ class MatchesRegex:
118
97
  score=float(bool(matches)),
119
98
  explanation=explanation,
120
99
  )
121
-
122
-
123
- # Someday we'll do typing checking in unit tests.
124
- if TYPE_CHECKING:
125
- _: ExperimentEvaluator
126
- _ = JSONParsable()
127
- _ = ContainsKeyword("test")
@@ -1,14 +1,23 @@
1
1
  import re
2
- from typing import Callable, Optional, Type
3
-
4
- from phoenix.datasets.evaluators._utils import _unwrap_json
5
- from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
2
+ from types import MappingProxyType
3
+ from typing import Any, Callable, Optional, Type
4
+
5
+ from phoenix.datasets.evaluators.utils import (
6
+ ExampleInput,
7
+ ExampleMetadata,
8
+ ExperimentEvaluator,
9
+ LLMEvaluator,
10
+ _unwrap_json,
11
+ )
12
+ from phoenix.datasets.types import (
13
+ EvaluationResult,
14
+ TaskOutput,
15
+ )
6
16
  from phoenix.evals.models.base import BaseModel as LLMBaseModel
7
17
  from phoenix.evals.utils import snap_to_rail
8
18
 
9
19
 
10
- class LLMCriteriaEvaluator:
11
- annotator_kind = "LLM"
20
+ class LLMCriteriaEvaluator(LLMEvaluator):
12
21
  _base_template = (
13
22
  "Determine if the following text is {criteria}. {description}"
14
23
  "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -37,21 +46,23 @@ class LLMCriteriaEvaluator:
37
46
  self.criteria = criteria
38
47
  self.description = description
39
48
  self.template = self._format_base_template(self.criteria, self.description)
40
- self.name = name
49
+ self._name = name
41
50
 
42
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
43
- formatted_template = self._format_eval_template(exp_run)
51
+ def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
52
+ formatted_template = self._format_eval_template(output)
44
53
  unparsed_response = self.model._generate(formatted_template)
45
54
  return self._parse_eval_output(unparsed_response)
46
55
 
47
- async def async_evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
48
- formatted_template = self._format_eval_template(exp_run)
56
+ async def async_evaluate(
57
+ self, *, output: Optional[TaskOutput] = None, **_: Any
58
+ ) -> EvaluationResult:
59
+ formatted_template = self._format_eval_template(output)
49
60
  unparsed_response = await self.model._async_generate(formatted_template)
50
61
  return self._parse_eval_output(unparsed_response)
51
62
 
52
- def _format_eval_template(self, experiment_run: ExperimentRun) -> str:
53
- assert experiment_run.output is not None
54
- result = _unwrap_json(experiment_run.output.result)
63
+ def _format_eval_template(self, output: TaskOutput) -> str:
64
+ assert output is not None
65
+ result = _unwrap_json(output)
55
66
  return self.template.format(text=str(result))
56
67
 
57
68
  def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -137,8 +148,7 @@ def _parse_label_from_explanation(raw_string: str) -> str:
137
148
  return raw_string
138
149
 
139
150
 
140
- class RelevanceEvaluator:
141
- annotator_kind = "LLM"
151
+ class RelevanceEvaluator(LLMEvaluator):
142
152
  template = (
143
153
  "Determine if the following response is relevant to the query. In this context, "
144
154
  "'relevance' means that the response directly addresses the core question or topic of the "
@@ -162,19 +172,24 @@ class RelevanceEvaluator:
162
172
  def __init__(
163
173
  self,
164
174
  model: LLMBaseModel,
165
- get_query: Optional[Callable[[Example, ExperimentRun], str]] = None,
166
- get_response: Optional[Callable[[Example, ExperimentRun], str]] = None,
175
+ get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
176
+ get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
167
177
  name: str = "RelevanceEvaluator",
168
178
  ):
169
179
  self.model = model
170
- self.name = name
180
+ self._name = name
171
181
  self.get_query = get_query or self._default_get_query
172
182
  self.get_response = get_response or self._default_get_response
173
183
 
174
- def _format_eval_template(self, example: Example, experiment_run: ExperimentRun) -> str:
175
- assert experiment_run.output is not None
176
- query = self.get_query(example, experiment_run)
177
- response = self.get_response(example, experiment_run)
184
+ def _format_eval_template(
185
+ self,
186
+ output: Optional[TaskOutput] = None,
187
+ input: ExampleInput = MappingProxyType({}),
188
+ metadata: ExampleMetadata = MappingProxyType({}),
189
+ ) -> str:
190
+ assert output is not None
191
+ query = self.get_query(input, metadata)
192
+ response = self.get_response(output, metadata)
178
193
  return self.template.format(query=query, response=response)
179
194
 
180
195
  def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
@@ -195,19 +210,35 @@ class RelevanceEvaluator:
195
210
  metadata={},
196
211
  )
197
212
 
198
- def _default_get_query(self, example: Example, experiment_run: ExperimentRun) -> str:
199
- return str(example.input)
213
+ def _default_get_query(self, input: ExampleInput, *args: Any, **kwargs: Any) -> str:
214
+ return str(input)
200
215
 
201
- def _default_get_response(self, example: Example, experiment_run: ExperimentRun) -> str:
202
- assert experiment_run.output is not None
203
- return str(_unwrap_json(experiment_run.output.result))
216
+ def _default_get_response(
217
+ self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
218
+ ) -> str:
219
+ assert output is not None
220
+ return str(_unwrap_json(output))
204
221
 
205
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
206
- formatted_template = self._format_eval_template(example, exp_run)
222
+ def evaluate(
223
+ self,
224
+ *,
225
+ output: Optional[TaskOutput] = None,
226
+ metadata: ExampleMetadata = MappingProxyType({}),
227
+ input: ExampleInput = MappingProxyType({}),
228
+ **_: Any,
229
+ ) -> EvaluationResult:
230
+ formatted_template = self._format_eval_template(output, input, metadata)
207
231
  unparsed_response = self.model._generate(formatted_template)
208
232
  return self._parse_eval_output(unparsed_response)
209
233
 
210
- async def async_evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
211
- formatted_template = self._format_eval_template(example, exp_run)
234
+ async def async_evaluate(
235
+ self,
236
+ *,
237
+ output: Optional[TaskOutput] = None,
238
+ metadata: ExampleMetadata = MappingProxyType({}),
239
+ input: ExampleInput = MappingProxyType({}),
240
+ **_: Any,
241
+ ) -> EvaluationResult:
242
+ formatted_template = self._format_eval_template(output, input, metadata)
212
243
  unparsed_response = await self.model._async_generate(formatted_template)
213
244
  return self._parse_eval_output(unparsed_response)
@@ -0,0 +1,292 @@
1
+ import functools
2
+ import inspect
3
+ from abc import ABC
4
+ from types import MappingProxyType
5
+ from typing import Any, Awaitable, Callable, Mapping, Optional, Union
6
+
7
+ from typing_extensions import TypeAlias
8
+
9
+ from phoenix.datasets.types import (
10
+ AnnotatorKind,
11
+ EvaluationResult,
12
+ JSONSerializable,
13
+ TaskOutput,
14
+ )
15
+
16
+
17
+ def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
18
+ if isinstance(obj, dict):
19
+ if len(obj) == 1:
20
+ key = next(iter(obj.keys()))
21
+ output = obj[key]
22
+ assert isinstance(
23
+ output, (dict, list, str, int, float, bool, type(None))
24
+ ), "Output must be JSON serializable"
25
+ return output
26
+ return obj
27
+
28
+
29
+ def validate_signature(sig: inspect.Signature) -> None:
30
+ # Check that the wrapped function has a valid signature for use as an evaluator
31
+ # If it does not, raise an error to exit early before running evaluations
32
+ params = sig.parameters
33
+ valid_named_params = {"input", "output", "expected", "metadata"}
34
+ if len(params) == 0:
35
+ raise ValueError("Evaluation function must have at least one parameter.")
36
+ if len(params) > 1:
37
+ for not_found in set(params) - valid_named_params:
38
+ param = params[not_found]
39
+ if (
40
+ param.kind is inspect.Parameter.VAR_KEYWORD
41
+ or param.default is not inspect.Parameter.empty
42
+ ):
43
+ continue
44
+ raise ValueError(
45
+ (
46
+ f"Invalid parameter names in evaluation function: {', '.join(not_found)}. "
47
+ "Parameters names for multi-argument functions must be "
48
+ f"any of: {', '.join(valid_named_params)}."
49
+ )
50
+ )
51
+
52
+
53
+ def _bind_signature(sig: inspect.Signature, **kwargs: Any) -> inspect.BoundArguments:
54
+ parameter_mapping = {
55
+ "input": kwargs.get("input"),
56
+ "output": kwargs.get("output"),
57
+ "expected": kwargs.get("expected"),
58
+ "metadata": kwargs.get("metadata"),
59
+ }
60
+ params = sig.parameters
61
+ if len(params) == 1:
62
+ parameter_name = next(iter(params))
63
+ if parameter_name in parameter_mapping:
64
+ return sig.bind(parameter_mapping[parameter_name])
65
+ else:
66
+ return sig.bind(parameter_mapping["output"])
67
+ return sig.bind_partial(
68
+ **{name: parameter_mapping[name] for name in set(parameter_mapping).intersection(params)}
69
+ )
70
+
71
+
72
+ def create_evaluator(
73
+ kind: Union[str, AnnotatorKind] = AnnotatorKind.CODE,
74
+ name: Optional[str] = None,
75
+ scorer: Optional[Callable[[Any], EvaluationResult]] = None,
76
+ ) -> Callable[[Callable[..., Any]], "Evaluator"]:
77
+ if scorer is None:
78
+ scorer = _default_eval_scorer
79
+
80
+ if isinstance(kind, str):
81
+ kind = AnnotatorKind(kind.upper())
82
+
83
+ def wrapper(func: Callable[..., Any]) -> Evaluator:
84
+ nonlocal name
85
+ if not name:
86
+ if hasattr(func, "__self__"):
87
+ name = func.__self__.__class__.__name__
88
+ elif hasattr(func, "__name__"):
89
+ name = func.__name__
90
+ else:
91
+ name = str(func)
92
+ assert name is not None
93
+
94
+ wrapped_signature = inspect.signature(func)
95
+ validate_signature(wrapped_signature)
96
+
97
+ if inspect.iscoroutinefunction(func):
98
+ return _wrap_coroutine_evaluation_function(name, kind, wrapped_signature, scorer)(func)
99
+ else:
100
+ return _wrap_sync_evaluation_function(name, kind, wrapped_signature, scorer)(func)
101
+
102
+ return wrapper
103
+
104
+
105
+ def _wrap_coroutine_evaluation_function(
106
+ name: str,
107
+ annotator_kind: AnnotatorKind,
108
+ sig: inspect.Signature,
109
+ convert_to_score: Callable[[Any], EvaluationResult],
110
+ ) -> Callable[[Callable[..., Any]], "Evaluator"]:
111
+ def wrapper(func: Callable[..., Any]) -> "Evaluator":
112
+ class AsyncEvaluator(Evaluator):
113
+ def __init__(self) -> None:
114
+ self._name = name
115
+ self._kind = annotator_kind
116
+
117
+ @functools.wraps(func)
118
+ async def __call__(self, *args: Any, **kwargs: Any) -> Any:
119
+ return await func(*args, **kwargs)
120
+
121
+ async def async_evaluate(self, **kwargs: Any) -> EvaluationResult:
122
+ bound_signature = _bind_signature(sig, **kwargs)
123
+ result = await func(*bound_signature.args, **bound_signature.kwargs)
124
+ return convert_to_score(result)
125
+
126
+ return AsyncEvaluator()
127
+
128
+ return wrapper
129
+
130
+
131
+ def _wrap_sync_evaluation_function(
132
+ name: str,
133
+ annotator_kind: AnnotatorKind,
134
+ sig: inspect.Signature,
135
+ convert_to_score: Callable[[Any], EvaluationResult],
136
+ ) -> Callable[[Callable[..., Any]], "Evaluator"]:
137
+ def wrapper(func: Callable[..., Any]) -> "Evaluator":
138
+ class SyncEvaluator(Evaluator):
139
+ def __init__(self) -> None:
140
+ self._name = name
141
+ self._kind = annotator_kind
142
+
143
+ @functools.wraps(func)
144
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
145
+ return func(*args, **kwargs)
146
+
147
+ def evaluate(self, **kwargs: Any) -> EvaluationResult:
148
+ bound_signature = _bind_signature(sig, **kwargs)
149
+ result = func(*bound_signature.args, **bound_signature.kwargs)
150
+ return convert_to_score(result)
151
+
152
+ return SyncEvaluator()
153
+
154
+ return wrapper
155
+
156
+
157
+ def _default_eval_scorer(result: Any) -> EvaluationResult:
158
+ if isinstance(result, bool):
159
+ return EvaluationResult(score=float(result), label=str(result))
160
+ elif isinstance(result, (int, float)):
161
+ return EvaluationResult(score=float(result))
162
+ elif isinstance(result, EvaluationResult):
163
+ return result
164
+ else:
165
+ raise ValueError(f"Unsupported evaluation result type: {type(result)}")
166
+
167
+
168
+ ExampleOutput: TypeAlias = Mapping[str, JSONSerializable]
169
+ ExampleMetadata: TypeAlias = Mapping[str, JSONSerializable]
170
+ ExampleInput: TypeAlias = Mapping[str, JSONSerializable]
171
+
172
+ EvaluatorName: TypeAlias = str
173
+ EvaluatorKind: TypeAlias = str
174
+ EvaluatorOutput: TypeAlias = Union[EvaluationResult, bool, int, float, str]
175
+
176
+
177
+ class Evaluator(ABC):
178
+ """
179
+ A helper super class to guide the implementation of an `Evaluator` object.
180
+ Subclasses must implement either the `evaluate` or `async_evaluate` method.
181
+ Implementing both methods is recommended, but not required.
182
+
183
+ This Class is intended to be subclassed, and should not be instantiated directly.
184
+ """
185
+
186
+ _kind: AnnotatorKind
187
+ _name: EvaluatorName
188
+
189
+ @functools.cached_property
190
+ def name(self) -> EvaluatorName:
191
+ if hasattr(self, "_name"):
192
+ return self._name
193
+ return self.__class__.__name__
194
+
195
+ @functools.cached_property
196
+ def kind(self) -> EvaluatorKind:
197
+ if hasattr(self, "_kind"):
198
+ return self._kind.value
199
+ return AnnotatorKind.CODE.value
200
+
201
+ def __new__(cls, *args: Any, **kwargs: Any) -> "Evaluator":
202
+ if cls is Evaluator:
203
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
204
+ return object.__new__(cls)
205
+
206
+ def evaluate(
207
+ self,
208
+ *,
209
+ output: Optional[TaskOutput] = None,
210
+ expected: Optional[ExampleOutput] = None,
211
+ metadata: ExampleMetadata = MappingProxyType({}),
212
+ input: ExampleInput = MappingProxyType({}),
213
+ **kwargs: Any,
214
+ ) -> EvaluationResult:
215
+ # For subclassing, one should implement either this sync method or the
216
+ # async version. Implementing both is recommended but not required.
217
+ raise NotImplementedError
218
+
219
+ async def async_evaluate(
220
+ self,
221
+ *,
222
+ output: Optional[TaskOutput] = None,
223
+ expected: Optional[ExampleOutput] = None,
224
+ metadata: ExampleMetadata = MappingProxyType({}),
225
+ input: ExampleInput = MappingProxyType({}),
226
+ **kwargs: Any,
227
+ ) -> EvaluationResult:
228
+ # For subclassing, one should implement either this async method or the
229
+ # sync version. Implementing both is recommended but not required.
230
+ return self.evaluate(
231
+ output=output,
232
+ expected=expected,
233
+ metadata=metadata,
234
+ input=input,
235
+ **kwargs,
236
+ )
237
+
238
+ def __init_subclass__(cls, is_abstract: bool = False, **kwargs: Any) -> None:
239
+ super().__init_subclass__(**kwargs)
240
+ if is_abstract:
241
+ return
242
+ evaluate_fn_signature = inspect.signature(Evaluator.evaluate)
243
+ for super_cls in inspect.getmro(cls):
244
+ if super_cls in (LLMEvaluator, Evaluator):
245
+ break
246
+ if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
247
+ assert callable(evaluate), "`evaluate()` method should be callable"
248
+ # need to remove the first param, i.e. `self`
249
+ _validate_sig(functools.partial(evaluate, None), "evaluate")
250
+ return
251
+ if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
252
+ assert callable(async_evaluate), "`async_evaluate()` method should be callable"
253
+ # need to remove the first param, i.e. `self`
254
+ _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
255
+ return
256
+ raise ValueError(
257
+ f"Evaluator must implement either "
258
+ f"`def evaluate{evaluate_fn_signature}` or "
259
+ f"`async def async_evaluate{evaluate_fn_signature}`"
260
+ )
261
+
262
+
263
+ def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
264
+ sig = inspect.signature(fn)
265
+ validate_signature(sig)
266
+ for param in sig.parameters.values():
267
+ if param.kind is inspect.Parameter.VAR_KEYWORD:
268
+ return
269
+ else:
270
+ raise ValueError(f"`{fn_name}` should allow variadic keyword arguments `**kwargs`")
271
+
272
+
273
+ class LLMEvaluator(Evaluator, ABC, is_abstract=True):
274
+ """
275
+ A convenience super class for setting `kind` as LLM.
276
+
277
+ This Class is intended to be subclassed, and should not be instantiated directly.
278
+ """
279
+
280
+ _kind = AnnotatorKind.LLM
281
+
282
+ def __new__(cls, *args: Any, **kwargs: Any) -> "LLMEvaluator":
283
+ if cls is LLMEvaluator:
284
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
285
+ return object.__new__(cls)
286
+
287
+
288
+ ExperimentEvaluator: TypeAlias = Union[
289
+ Evaluator,
290
+ Callable[..., EvaluatorOutput],
291
+ Callable[..., Awaitable[EvaluatorOutput]],
292
+ ]