arize-phoenix 4.4.4rc2__tar.gz → 4.4.4rc4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (260) hide show
  1. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/PKG-INFO +1 -1
  2. arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/__init__.py +18 -0
  3. arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/_utils.py +13 -0
  4. arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/code_evaluators.py +127 -0
  5. arize_phoenix-4.4.4rc2/src/phoenix/datasets/evaluators.py → arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/llm_evaluators.py +19 -81
  6. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/datasets/experiments.py +20 -4
  7. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/openapi/schema.py +2 -1
  8. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Span.py +1 -0
  9. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/index.js +519 -515
  10. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/schemas.py +1 -2
  11. arize_phoenix-4.4.4rc4/src/phoenix/version.py +1 -0
  12. arize_phoenix-4.4.4rc2/src/phoenix/version.py +0 -1
  13. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/.gitignore +0 -0
  14. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/IP_NOTICE +0 -0
  15. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/LICENSE +0 -0
  16. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/README.md +0 -0
  17. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
  18. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
  19. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
  20. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
  21. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
  22. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
  23. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
  24. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/frontend/requirements.txt +0 -0
  25. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
  26. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/pyproject.toml +0 -0
  27. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/__init__.py +0 -0
  28. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/config.py +0 -0
  29. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/core/__init__.py +0 -0
  30. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/core/embedding_dimension.py +0 -0
  31. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/core/model.py +0 -0
  32. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/core/model_schema.py +0 -0
  33. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/core/model_schema_adapter.py +0 -0
  34. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/datasets/__init__.py +0 -0
  35. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/datasets/tracing.py +0 -0
  36. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/datasets/types.py +0 -0
  37. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/datetime_utils.py +0 -0
  38. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/README.md +0 -0
  39. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/__init__.py +0 -0
  40. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/alembic.ini +0 -0
  41. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/bulk_inserter.py +0 -0
  42. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/engines.py +0 -0
  43. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/helpers.py +0 -0
  44. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/insertion/__init__.py +0 -0
  45. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/insertion/dataset.py +0 -0
  46. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/insertion/evaluation.py +0 -0
  47. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/insertion/helpers.py +0 -0
  48. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/insertion/span.py +0 -0
  49. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrate.py +0 -0
  50. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrations/__init__.py +0 -0
  51. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrations/env.py +0 -0
  52. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrations/script.py.mako +0 -0
  53. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrations/types.py +0 -0
  54. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -0
  55. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
  56. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/db/models.py +0 -0
  57. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/exceptions.py +0 -0
  58. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/inferences/__init__.py +0 -0
  59. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/inferences/errors.py +0 -0
  60. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/inferences/fixtures.py +0 -0
  61. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/inferences/inferences.py +0 -0
  62. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/inferences/schema.py +0 -0
  63. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/inferences/validation.py +0 -0
  64. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/README.md +0 -0
  65. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/__init__.py +0 -0
  66. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/binning.py +0 -0
  67. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/metrics.py +0 -0
  68. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/mixins.py +0 -0
  69. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  70. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/timeseries.py +0 -0
  71. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/metrics/wrappers.py +0 -0
  72. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/pointcloud/__init__.py +0 -0
  73. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/pointcloud/clustering.py +0 -0
  74. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/pointcloud/pointcloud.py +0 -0
  75. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/pointcloud/projectors.py +0 -0
  76. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  77. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/py.typed +0 -0
  78. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/__init__.py +0 -0
  79. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/__init__.py +0 -0
  80. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/context.py +0 -0
  81. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/__init__.py +0 -0
  82. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
  83. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
  84. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -0
  85. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +0 -0
  86. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +0 -0
  87. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/document_evaluations.py +0 -0
  88. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +0 -0
  89. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +0 -0
  90. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -0
  91. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -0
  92. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -0
  93. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +0 -0
  94. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +0 -0
  95. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/project_by_name.py +0 -0
  96. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/record_counts.py +0 -0
  97. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/span_descendants.py +0 -0
  98. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/span_evaluations.py +0 -0
  99. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/span_projects.py +0 -0
  100. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/token_counts.py +0 -0
  101. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/trace_evaluations.py +0 -0
  102. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/dataloaders/trace_row_ids.py +0 -0
  103. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/helpers/__init__.py +0 -0
  104. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/helpers/dataset_helpers.py +0 -0
  105. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
  106. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
  107. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  108. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  109. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
  110. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  111. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
  112. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
  113. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
  114. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
  115. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
  116. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
  117. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  118. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  119. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  120. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
  121. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
  122. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  123. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  124. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  125. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/input_types/__init__.py +0 -0
  126. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/interceptor.py +0 -0
  127. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/mutations/__init__.py +0 -0
  128. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/mutations/auth.py +0 -0
  129. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
  130. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
  131. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
  132. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/mutations/project_mutations.py +0 -0
  133. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/openapi/__init__.py +0 -0
  134. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/openapi/main.py +0 -0
  135. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/queries.py +0 -0
  136. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/__init__.py +0 -0
  137. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/utils.py +0 -0
  138. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/__init__.py +0 -0
  139. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/dataset_examples.py +0 -0
  140. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/datasets.py +0 -0
  141. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/evaluations.py +0 -0
  142. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/experiment_evaluations.py +0 -0
  143. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/experiment_runs.py +0 -0
  144. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/experiments.py +0 -0
  145. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/spans.py +0 -0
  146. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/routers/v1/traces.py +0 -0
  147. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/schema.py +0 -0
  148. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
  149. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Cluster.py +0 -0
  150. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
  151. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  152. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Dataset.py +0 -0
  153. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DatasetExample.py +0 -0
  154. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
  155. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  156. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
  157. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Dimension.py +0 -0
  158. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  159. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  160. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DimensionType.py +0 -0
  161. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  162. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  163. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  164. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  165. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  166. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Evaluation.py +0 -0
  167. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  168. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Event.py +0 -0
  169. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  170. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
  171. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Experiment.py +0 -0
  172. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
  173. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
  174. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ExperimentRun.py +0 -0
  175. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +0 -0
  176. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  177. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Functionality.py +0 -0
  178. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Inferences.py +0 -0
  179. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/InferencesRole.py +0 -0
  180. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/MimeType.py +0 -0
  181. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Model.py +0 -0
  182. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/NumericRange.py +0 -0
  183. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  184. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Project.py +0 -0
  185. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  186. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Retrieval.py +0 -0
  187. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  188. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Segments.py +0 -0
  189. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/SortDir.py +0 -0
  190. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  191. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/Trace.py +0 -0
  192. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  193. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  194. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  195. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/__init__.py +0 -0
  196. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/node.py +0 -0
  197. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/api/types/pagination.py +0 -0
  198. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/app.py +0 -0
  199. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/grpc_server.py +0 -0
  200. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/main.py +0 -0
  201. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/openapi/__init__.py +0 -0
  202. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/openapi/docs.py +0 -0
  203. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/prometheus.py +0 -0
  204. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  205. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  206. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  207. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  208. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  209. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  210. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  211. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  212. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/favicon.ico +0 -0
  213. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/index.css +0 -0
  214. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/static/modernizr.js +0 -0
  215. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/telemetry.py +0 -0
  216. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/templates/__init__.py +0 -0
  217. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/templates/index.html +0 -0
  218. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/server/thread_server.py +0 -0
  219. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/services.py +0 -0
  220. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/session/__init__.py +0 -0
  221. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/session/client.py +0 -0
  222. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/session/data_extractor.py +0 -0
  223. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/session/evaluation.py +0 -0
  224. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/session/session.py +0 -0
  225. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/settings.py +0 -0
  226. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/__init__.py +0 -0
  227. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/attributes.py +0 -0
  228. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/dsl/README.md +0 -0
  229. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/dsl/__init__.py +0 -0
  230. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/dsl/filter.py +0 -0
  231. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/dsl/helpers.py +0 -0
  232. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/dsl/query.py +0 -0
  233. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/errors.py +0 -0
  234. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/evaluation_conventions.py +0 -0
  235. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/exporter.py +0 -0
  236. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/fixtures.py +0 -0
  237. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/langchain/__init__.py +0 -0
  238. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  239. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/llama_index/__init__.py +0 -0
  240. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/llama_index/callback.py +0 -0
  241. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/openai/__init__.py +0 -0
  242. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/openai/instrumentor.py +0 -0
  243. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/otel.py +0 -0
  244. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/projects.py +0 -0
  245. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/span_evaluations.py +0 -0
  246. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/span_json_decoder.py +0 -0
  247. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/span_json_encoder.py +0 -0
  248. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/trace_dataset.py +0 -0
  249. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/utils.py +0 -0
  250. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/v1/__init__.py +0 -0
  251. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  252. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  253. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/__init__.py +0 -0
  254. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/deprecation.py +0 -0
  255. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/error_handling.py +0 -0
  256. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/json.py +0 -0
  257. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/logging.py +0 -0
  258. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/project.py +0 -0
  259. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/re.py +0 -0
  260. {arize_phoenix-4.4.4rc2 → arize_phoenix-4.4.4rc4}/src/phoenix/utilities/span_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.4.4rc2
3
+ Version: 4.4.4rc4
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -0,0 +1,18 @@
1
+ from phoenix.datasets.evaluators.code_evaluators import ContainsKeyword, JSONParsable
2
+ from phoenix.datasets.evaluators.llm_evaluators import (
3
+ CoherenceEvaluator,
4
+ ConcisenessEvaluator,
5
+ HelpfulnessEvaluator,
6
+ LLMCriteriaEvaluator,
7
+ RelevanceEvaluator,
8
+ )
9
+
10
+ __all__ = [
11
+ "ContainsKeyword",
12
+ "JSONParsable",
13
+ "CoherenceEvaluator",
14
+ "ConcisenessEvaluator",
15
+ "LLMCriteriaEvaluator",
16
+ "HelpfulnessEvaluator",
17
+ "RelevanceEvaluator",
18
+ ]
@@ -0,0 +1,13 @@
1
+ from phoenix.datasets.types import JSONSerializable
2
+
3
+
4
+ def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
5
+ if isinstance(obj, dict):
6
+ if len(obj) == 1:
7
+ key = next(iter(obj.keys()))
8
+ output = obj[key]
9
+ assert isinstance(
10
+ output, (dict, list, str, int, float, bool, type(None))
11
+ ), "Output must be JSON serializable"
12
+ return output
13
+ return obj
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from typing import TYPE_CHECKING, List, Optional, Union
6
+
7
+ from phoenix.datasets.evaluators._utils import _unwrap_json
8
+ from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
9
+
10
+
11
+ class JSONParsable:
12
+ annotator_kind = "CODE"
13
+ name = "JSONParsable"
14
+
15
+ def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
16
+ assert exp_run.output is not None
17
+ output = _unwrap_json(exp_run.output.result)
18
+ assert isinstance(output, str), "Experiment run output must be a string"
19
+ try:
20
+ json.loads(output)
21
+ json_parsable = True
22
+ except BaseException:
23
+ json_parsable = False
24
+ return EvaluationResult(
25
+ score=int(json_parsable),
26
+ )
27
+
28
+
29
+ class ContainsKeyword:
30
+ annotator_kind = "CODE"
31
+
32
+ def __init__(self, keyword: str, name: Optional[str] = None) -> None:
33
+ self.keyword = keyword
34
+ self.name = name or f"Contains({repr(keyword)})"
35
+
36
+ def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
37
+ assert exp_run.output is not None
38
+ result = _unwrap_json(exp_run.output.result)
39
+ assert isinstance(result, str), "Experiment run output must be a string"
40
+ found = self.keyword in result
41
+ return EvaluationResult(
42
+ score=float(found),
43
+ explanation=(
44
+ f"the string {repr(self.keyword)} was "
45
+ f"{'found' if found else 'not found'} in the output"
46
+ ),
47
+ )
48
+
49
+
50
+ class ContainsAnyKeyword:
51
+ annotator_kind = "CODE"
52
+
53
+ def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
54
+ self.keywords = keywords
55
+ self.name = name or f"ContainsAny({keywords})"
56
+
57
+ def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
58
+ assert exp_run.output is not None
59
+ result = _unwrap_json(exp_run.output.result)
60
+ assert isinstance(result, str), "Experiment run output must be a string"
61
+ found = [keyword for keyword in self.keywords if keyword in result]
62
+ if found:
63
+ explanation = f"the keywords {found} were found in the output"
64
+ else:
65
+ explanation = f"none of the keywords {self.keywords} were found in the output"
66
+ return EvaluationResult(
67
+ score=float(bool(found)),
68
+ explanation=explanation,
69
+ )
70
+
71
+
72
+ class ContainsAllKeywords:
73
+ annotator_kind = "CODE"
74
+
75
+ def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
76
+ self.keywords = keywords
77
+ self.name = name or f"ContainsAll({keywords})"
78
+
79
+ def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
80
+ assert exp_run.output is not None
81
+ result = _unwrap_json(exp_run.output.result)
82
+ assert isinstance(result, str), "Experiment run output must be a string"
83
+ not_found = [keyword for keyword in self.keywords if keyword not in result]
84
+ if not_found:
85
+ contains_all = False
86
+ explanation = f"the keywords {not_found} were not found in the output"
87
+ else:
88
+ contains_all = True
89
+ explanation = f"all of the keywords {self.keywords} were found in the output"
90
+ return EvaluationResult(
91
+ score=float(contains_all),
92
+ explanation=explanation,
93
+ )
94
+
95
+
96
+ class MatchesRegex:
97
+ annotator_kind = "CODE"
98
+
99
+ def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
100
+ if isinstance(pattern, str):
101
+ pattern = re.compile(pattern)
102
+ self.pattern = pattern
103
+ assert isinstance(pattern, re.Pattern)
104
+ self.name = name or f"matches_({pattern})"
105
+
106
+ def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
107
+ assert exp_run.output is not None
108
+ result = _unwrap_json(exp_run.output.result)
109
+ assert isinstance(result, str), "Experiment run output must be a string"
110
+ matches = self.pattern.findall(result)
111
+ if matches:
112
+ explanation = (
113
+ f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
114
+ )
115
+ else:
116
+ explanation = f"no substrings matched the regex pattern {self.pattern.pattern}"
117
+ return EvaluationResult(
118
+ score=float(bool(matches)),
119
+ explanation=explanation,
120
+ )
121
+
122
+
123
+ # Someday we'll do typing checking in unit tests.
124
+ if TYPE_CHECKING:
125
+ _: ExperimentEvaluator
126
+ _ = JSONParsable()
127
+ _ = ContainsKeyword("test")
@@ -1,70 +1,12 @@
1
- import json
2
1
  import re
3
- from typing import TYPE_CHECKING, Callable, Optional, Type
2
+ from typing import Callable, Optional, Type
4
3
 
5
- from phoenix.datasets.types import (
6
- EvaluationResult,
7
- Example,
8
- ExperimentEvaluator,
9
- ExperimentRun,
10
- JSONSerializable,
11
- )
4
+ from phoenix.datasets.evaluators._utils import _unwrap_json
5
+ from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
12
6
  from phoenix.evals.models.base import BaseModel as LLMBaseModel
13
7
  from phoenix.evals.utils import snap_to_rail
14
8
 
15
9
 
16
- def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
17
- if isinstance(obj, dict):
18
- if len(obj) == 1:
19
- key = next(iter(obj.keys()))
20
- output = obj[key]
21
- assert isinstance(
22
- output, (dict, list, str, int, float, bool, type(None))
23
- ), "Output must be JSON serializable"
24
- return output
25
- return obj
26
-
27
-
28
- class JSONParsable:
29
- annotator_kind = "CODE"
30
- name = "JSONParsable"
31
-
32
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
33
- assert exp_run.output is not None
34
- output = _unwrap_json(exp_run.output.result)
35
- assert isinstance(output, str), "Experiment run output must be a string"
36
- try:
37
- json.loads(output)
38
- json_parsable = True
39
- except BaseException:
40
- json_parsable = False
41
- return EvaluationResult(
42
- score=int(json_parsable),
43
- )
44
-
45
-
46
- class ContainsKeyword:
47
- annotator_kind = "CODE"
48
-
49
- def __init__(self, keyword: str) -> None:
50
- super().__init__()
51
- self.keyword = keyword
52
- self.name = f"ContainsKeyword({keyword})"
53
-
54
- def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
55
- assert exp_run.output is not None
56
- result = _unwrap_json(exp_run.output.result)
57
- assert isinstance(result, str), "Experiment run output must be a string"
58
- found = self.keyword in result
59
- return EvaluationResult(
60
- score=float(found),
61
- explanation=(
62
- f"the string {repr(self.keyword)} was "
63
- f"{'found' if found else 'not found'} in the output"
64
- ),
65
- )
66
-
67
-
68
10
  class LLMCriteriaEvaluator:
69
11
  annotator_kind = "LLM"
70
12
  _base_template = (
@@ -77,7 +19,7 @@ class LLMCriteriaEvaluator:
77
19
  "EXPLANATION: *a step by step explanation of your reasoning for whether the text meets "
78
20
  "the criteria*\n"
79
21
  "LABEL: *true or false*\n\n"
80
- "Follow this template for the following text:\n\n"
22
+ "Follow this template for the following example:\n\n"
81
23
  "CRITERIA: the text is '{criteria}'\n"
82
24
  "TEXT: {text}\n"
83
25
  "EXPLANATION: "
@@ -142,40 +84,43 @@ class LLMCriteriaEvaluator:
142
84
 
143
85
 
144
86
  def criteria_evaluator_factory(
145
- class_name: str, criteria: str, description: str
87
+ class_name: str, criteria: str, description: str, default_name: str
146
88
  ) -> Type[ExperimentEvaluator]:
89
+ def _init(self, model: LLMBaseModel, name: str = default_name) -> None: # type: ignore
90
+ LLMCriteriaEvaluator.__init__(self, model, criteria, description, name=name)
91
+
147
92
  return type(
148
93
  class_name,
149
94
  (LLMCriteriaEvaluator,),
150
95
  {
151
- "__init__": lambda self, model: LLMCriteriaEvaluator.__init__(
152
- self, model, criteria, description, name=class_name
153
- ),
96
+ "__init__": _init,
154
97
  "__module__": __name__,
155
- "name": class_name,
156
98
  "template": LLMCriteriaEvaluator._format_base_template(criteria, description),
157
99
  },
158
100
  )
159
101
 
160
102
 
161
- LLMConcisenessEvaluator = criteria_evaluator_factory(
162
- class_name="LLMConcisenessEvaluator",
103
+ ConcisenessEvaluator = criteria_evaluator_factory(
104
+ class_name="ConcisenessEvaluator",
163
105
  criteria="concise",
164
106
  description="is just a few sentences and easy to follow",
107
+ default_name="Conciseness",
165
108
  )
166
109
 
167
110
 
168
- LLMHelpfulnessEvaluator = criteria_evaluator_factory(
169
- class_name="LLMHelpfulnessEvaluator",
111
+ HelpfulnessEvaluator = criteria_evaluator_factory(
112
+ class_name="HelpfulnessEvaluator",
170
113
  criteria="helpful",
171
114
  description="provides useful information",
115
+ default_name="Helpfulness",
172
116
  )
173
117
 
174
118
 
175
- LLMCoherenceEvaluator = criteria_evaluator_factory(
176
- class_name="LLMCoherenceEvaluator",
119
+ CoherenceEvaluator = criteria_evaluator_factory(
120
+ class_name="CoherenceEvaluator",
177
121
  criteria="coherent",
178
- description="is coherent, well-structured, and organized",
122
+ description="is coherent, well-structured, and logically sound",
123
+ default_name="Coherence",
179
124
  )
180
125
 
181
126
 
@@ -266,10 +211,3 @@ class RelevanceEvaluator:
266
211
  formatted_template = self._format_eval_template(example, exp_run)
267
212
  unparsed_response = await self.model._async_generate(formatted_template)
268
213
  return self._parse_eval_output(unparsed_response)
269
-
270
-
271
- # Someday we'll do typing checking in unit tests.
272
- if TYPE_CHECKING:
273
- _: ExperimentEvaluator
274
- _ = JSONParsable()
275
- _ = ContainsKeyword("test")
@@ -61,6 +61,7 @@ from phoenix.datasets.types import (
61
61
  from phoenix.evals.executors import get_executor_on_sync_context
62
62
  from phoenix.evals.models.rate_limiters import RateLimiter
63
63
  from phoenix.evals.utils import get_tqdm_progress_bar_formatter
64
+ from phoenix.session.session import active_session
64
65
  from phoenix.trace.attributes import flatten
65
66
  from phoenix.utilities.json import jsonify
66
67
 
@@ -78,12 +79,23 @@ def _get_base_url() -> str:
78
79
  return base_url if base_url.endswith("/") else base_url + "/"
79
80
 
80
81
 
82
+ def _get_web_base_url() -> str:
83
+ """Return the web UI base URL.
84
+
85
+ Returns:
86
+ str: the web UI base URL
87
+ """
88
+ if session := active_session():
89
+ return session.url
90
+ return _get_base_url()
91
+
92
+
81
93
  def _get_experiment_url(*, dataset_id: str, experiment_id: str) -> str:
82
- return f"{_get_base_url()}datasets/{dataset_id}/compare?experimentId={experiment_id}"
94
+ return f"{_get_web_base_url()}datasets/{dataset_id}/compare?experimentId={experiment_id}"
83
95
 
84
96
 
85
97
  def _get_dataset_experiments_url(*, dataset_id: str) -> str:
86
- return f"{_get_base_url()}datasets/{dataset_id}/experiments"
98
+ return f"{_get_web_base_url()}datasets/{dataset_id}/experiments"
87
99
 
88
100
 
89
101
  def _phoenix_client() -> httpx.Client:
@@ -134,7 +146,9 @@ def run_experiment(
134
146
 
135
147
  dataset_experiments_url = _get_dataset_experiments_url(dataset_id=dataset.id)
136
148
  experiment_compare_url = _get_experiment_url(dataset_id=dataset.id, experiment_id=experiment_id)
137
- print(f"🧪 Experiment started: {experiment_compare_url}")
149
+ print("🧪 Experiment started.")
150
+ print(f"📺 View dataset experiments: {dataset_experiments_url}")
151
+ print(f"🔗 View this experiment: {experiment_compare_url}")
138
152
 
139
153
  errors: Tuple[Optional[Type[BaseException]], ...]
140
154
  if not hasattr(rate_limit_errors, "__iter__"):
@@ -278,7 +292,8 @@ def run_experiment(
278
292
  project_name=project_name,
279
293
  )
280
294
 
281
- print(f"✅ Task runs completed. View all experiments: {dataset_experiments_url}")
295
+ print("✅ Task runs completed.")
296
+ print("🧠 Evaluation started.")
282
297
 
283
298
  if evaluators is not None:
284
299
  _evaluate_experiment(experiment, evaluators, dataset.examples, client)
@@ -443,6 +458,7 @@ def _evaluate_experiment(
443
458
  max_retries=0,
444
459
  exit_on_error=False,
445
460
  fallback_return_value=None,
461
+ tqdm_bar_format=get_tqdm_progress_bar_formatter("running experiment evaluations"),
446
462
  )
447
463
  evaluation_payloads, _execution_details = executor.run(evaluation_inputs)
448
464
  for payload in evaluation_payloads:
@@ -1,8 +1,9 @@
1
1
  from typing import Any
2
2
 
3
- from phoenix.server.api.routers.v1 import V1_ROUTES
4
3
  from starlette.schemas import SchemaGenerator
5
4
 
5
+ from phoenix.server.api.routers.v1 import V1_ROUTES
6
+
6
7
  OPENAPI_SCHEMA_GENERATOR = SchemaGenerator(
7
8
  {"openapi": "3.0.0", "info": {"title": "Arize-Phoenix API", "version": "1.0"}}
8
9
  )
@@ -59,6 +59,7 @@ class SpanKind(Enum):
59
59
  embedding = "EMBEDDING"
60
60
  agent = "AGENT"
61
61
  reranker = "RERANKER"
62
+ evaluator = "EVALUATOR"
62
63
  unknown = "UNKNOWN"
63
64
 
64
65
  @classmethod