arize-phoenix 2.5.0__tar.gz → 2.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/PKG-INFO +1 -1
  2. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/config.py +32 -7
  3. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/evals.py +53 -0
  4. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/fixtures.py +46 -0
  5. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/evaluators.py +4 -0
  6. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/classify.py +16 -6
  7. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/generate.py +6 -3
  8. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/anthropic.py +3 -4
  9. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/base.py +1 -0
  10. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/bedrock.py +4 -2
  11. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/openai.py +2 -0
  12. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/vertex.py +6 -0
  13. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/templates/default_templates.py +0 -7
  14. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/index.js +1 -1
  15. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/session/evaluation.py +16 -10
  16. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/session/session.py +19 -0
  17. arize_phoenix-2.7.0/src/phoenix/trace/errors.py +5 -0
  18. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/span_evaluations.py +46 -61
  19. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/trace_dataset.py +111 -4
  20. arize_phoenix-2.7.0/src/phoenix/version.py +1 -0
  21. arize_phoenix-2.5.0/src/phoenix/version.py +0 -1
  22. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/.gitignore +0 -0
  23. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/IP_NOTICE +0 -0
  24. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/LICENSE +0 -0
  25. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/README.md +0 -0
  26. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/pyproject.toml +0 -0
  27. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/__init__.py +0 -0
  28. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/__init__.py +0 -0
  29. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/embedding_dimension.py +0 -0
  30. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/model.py +0 -0
  31. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/model_schema.py +0 -0
  32. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/model_schema_adapter.py +0 -0
  33. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/traces.py +0 -0
  34. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/__init__.py +0 -0
  35. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/dataset.py +0 -0
  36. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/errors.py +0 -0
  37. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/schema.py +0 -0
  38. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/validation.py +0 -0
  39. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datetime_utils.py +0 -0
  40. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/exceptions.py +0 -0
  41. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/__init__.py +0 -0
  42. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/__init__.py +0 -0
  43. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/__init__.py +0 -0
  44. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/executor.py +0 -0
  45. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/processing.py +0 -0
  46. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/__init__.py +0 -0
  47. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/litellm.py +0 -0
  48. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/rate_limiters.py +0 -0
  49. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/vertexai.py +0 -0
  50. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/retrievals.py +0 -0
  51. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/templates/__init__.py +0 -0
  52. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/templates/template.py +0 -0
  53. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
  54. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/utils/threads.py +0 -0
  55. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/README.md +0 -0
  56. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/__init__.py +0 -0
  57. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/binning.py +0 -0
  58. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/metrics.py +0 -0
  59. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/mixins.py +0 -0
  60. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/retrieval_metrics.py +0 -0
  61. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/timeseries.py +0 -0
  62. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/wrappers.py +0 -0
  63. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/__init__.py +0 -0
  64. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/clustering.py +0 -0
  65. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/pointcloud.py +0 -0
  66. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/projectors.py +0 -0
  67. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  68. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/py.typed +0 -0
  69. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/__init__.py +0 -0
  70. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/__init__.py +0 -0
  71. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/context.py +0 -0
  72. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/helpers.py +0 -0
  73. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  74. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  75. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  76. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  77. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  78. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  79. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  80. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  81. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  82. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/__init__.py +0 -0
  83. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/interceptor.py +0 -0
  84. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/schema.py +0 -0
  85. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Cluster.py +0 -0
  86. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  87. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Dataset.py +0 -0
  88. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
  89. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DatasetRole.py +0 -0
  90. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  91. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Dimension.py +0 -0
  92. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  93. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  94. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionType.py +0 -0
  95. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  96. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
  97. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
  98. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  99. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  100. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Evaluation.py +0 -0
  101. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
  102. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Event.py +0 -0
  103. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  104. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
  105. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  106. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Functionality.py +0 -0
  107. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/MimeType.py +0 -0
  108. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Model.py +0 -0
  109. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/NumericRange.py +0 -0
  110. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  111. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  112. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Retrieval.py +0 -0
  113. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  114. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Segments.py +0 -0
  115. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/SortDir.py +0 -0
  116. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Span.py +0 -0
  117. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  118. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  119. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  120. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  121. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/__init__.py +0 -0
  122. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/node.py +0 -0
  123. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/pagination.py +0 -0
  124. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/app.py +0 -0
  125. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/evaluation_handler.py +0 -0
  126. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/main.py +0 -0
  127. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/span_handler.py +0 -0
  128. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  129. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  130. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  131. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  132. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  133. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  134. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  135. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  136. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/favicon.ico +0 -0
  137. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/index.css +0 -0
  138. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/modernizr.js +0 -0
  139. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/templates/__init__.py +0 -0
  140. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/templates/index.html +0 -0
  141. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/thread_server.py +0 -0
  142. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/trace_handler.py +0 -0
  143. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/services.py +0 -0
  144. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/session/__init__.py +0 -0
  145. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/__init__.py +0 -0
  146. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/__init__.py +0 -0
  147. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/filter.py +0 -0
  148. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/helpers.py +0 -0
  149. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/missing.py +0 -0
  150. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/query.py +0 -0
  151. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/evaluation_conventions.py +0 -0
  152. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/exporter.py +0 -0
  153. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/fixtures.py +0 -0
  154. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/langchain/__init__.py +0 -0
  155. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/langchain/instrumentor.py +0 -0
  156. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/langchain/tracer.py +0 -0
  157. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/__init__.py +0 -0
  158. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/callback.py +0 -0
  159. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
  160. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/streaming.py +0 -0
  161. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/openai/__init__.py +0 -0
  162. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/openai/instrumentor.py +0 -0
  163. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/otel.py +0 -0
  164. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/schemas.py +0 -0
  165. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/semantic_conventions.py +0 -0
  166. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/span_json_decoder.py +0 -0
  167. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/span_json_encoder.py +0 -0
  168. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/tracer.py +0 -0
  169. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/utils.py +0 -0
  170. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/v1/__init__.py +0 -0
  171. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
  172. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
  173. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/utilities/__init__.py +0 -0
  174. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/utilities/error_handling.py +0 -0
  175. {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/utilities/logging.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 2.5.0
3
+ Version: 2.7.0
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -12,6 +12,11 @@ ENV_PHOENIX_COLLECTOR_ENDPOINT = "PHOENIX_COLLECTOR_ENDPOINT"
12
12
  The endpoint traces and evals are sent to. This must be set if the Phoenix
13
13
  server is running on a remote instance.
14
14
  """
15
+ ENV_WORKING_DIR = "PHOENIX_WORKING_DIR"
16
+ """
17
+ The directory in which to save, load, and export datasets. This directory must
18
+ be accessible by both the Phoenix server and the notebook environment.
19
+ """
15
20
 
16
21
 
17
22
  def _get_temp_path() -> Path:
@@ -36,13 +41,16 @@ def get_running_pid() -> Optional[int]:
36
41
  return None
37
42
 
38
43
 
39
- for path in (
40
- ROOT_DIR := Path.home().resolve() / ".phoenix",
41
- EXPORT_DIR := ROOT_DIR / "exports",
42
- DATASET_DIR := ROOT_DIR / "datasets",
43
- TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
44
- ):
45
- path.mkdir(parents=True, exist_ok=True)
44
+ def get_working_dir() -> Path:
45
+ """
46
+ Get the working directory for saving, loading, and exporting datasets.
47
+ """
48
+ working_dir_str = os.getenv(ENV_WORKING_DIR)
49
+ if working_dir_str is not None:
50
+ return Path(working_dir_str)
51
+ # Fall back to ~/.phoenix if PHOENIX_WORKING_DIR is not set
52
+ return Path.home().resolve() / ".phoenix"
53
+
46
54
 
47
55
  PHOENIX_DIR = Path(__file__).resolve().parent
48
56
  # Server config
@@ -53,6 +61,23 @@ HOST = "0.0.0.0"
53
61
  PORT = 6006
54
62
  # The prefix of datasets that are auto-assigned a name
55
63
  GENERATED_DATASET_NAME_PREFIX = "phoenix_dataset_"
64
+ # The work directory for saving, loading, and exporting datasets
65
+ WORKING_DIR = get_working_dir()
66
+
67
+ try:
68
+ for path in (
69
+ ROOT_DIR := WORKING_DIR,
70
+ EXPORT_DIR := ROOT_DIR / "exports",
71
+ DATASET_DIR := ROOT_DIR / "datasets",
72
+ TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
73
+ ):
74
+ path.mkdir(parents=True, exist_ok=True)
75
+ except Exception as e:
76
+ print(
77
+ f"⚠️ Failed to initialize the working directory at {WORKING_DIR} due to an error: {str(e)}"
78
+ )
79
+ print("⚠️ While phoenix will still run, you will not be able to save, load, or export data")
80
+ print("ℹ️ To change, set the `{ENV_WORKING_DIR}` environment variable before importing phoenix.")
56
81
 
57
82
 
58
83
  def get_exported_files(directory: Path) -> List[Path]:
@@ -9,10 +9,12 @@ from typing import DefaultDict, Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy as np
11
11
  from google.protobuf.json_format import MessageToDict
12
+ from pandas import DataFrame, Index, MultiIndex
12
13
  from typing_extensions import TypeAlias, assert_never
13
14
 
14
15
  import phoenix.trace.v1 as pb
15
16
  from phoenix.trace.schemas import SpanID, TraceID
17
+ from phoenix.trace.span_evaluations import DocumentEvaluations, Evaluations, SpanEvaluations
16
18
 
17
19
  logger = logging.getLogger(__name__)
18
20
  logger.addHandler(logging.NullHandler())
@@ -171,3 +173,54 @@ class Evals:
171
173
  if result.HasField("score") and document_position < num_documents:
172
174
  scores[document_position] = result.score.value
173
175
  return scores
176
+
177
+ def export_evaluations(self) -> List[Evaluations]:
178
+ evaluations: List[Evaluations] = []
179
+ evaluations.extend(self._export_span_evaluations())
180
+ evaluations.extend(self._export_document_evaluations())
181
+ return evaluations
182
+
183
+ def _export_span_evaluations(self) -> List[SpanEvaluations]:
184
+ span_evaluations = []
185
+ with self._lock:
186
+ span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
187
+ for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
188
+ span_ids = []
189
+ rows = []
190
+ with self._lock:
191
+ span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
192
+ for span_id, pb_eval in span_evaluations_by_id:
193
+ span_ids.append(span_id)
194
+ rows.append(MessageToDict(pb_eval.result))
195
+ dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
196
+ span_evaluations.append(SpanEvaluations(eval_name, dataframe))
197
+ return span_evaluations
198
+
199
+ def _export_document_evaluations(self) -> List[DocumentEvaluations]:
200
+ evaluations = []
201
+ with self._lock:
202
+ document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
203
+ for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
204
+ span_ids = []
205
+ document_positions = []
206
+ rows = []
207
+ with self._lock:
208
+ document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
209
+ for span_id, _document_evaluations_by_position in document_evaluations_by_id:
210
+ with self._lock:
211
+ document_evaluations_by_position = sorted(
212
+ _document_evaluations_by_position.items()
213
+ ) # ensure the evals are sorted by document position
214
+ for document_position, pb_eval in document_evaluations_by_position:
215
+ span_ids.append(span_id)
216
+ document_positions.append(document_position)
217
+ rows.append(MessageToDict(pb_eval.result))
218
+ dataframe = DataFrame(
219
+ rows,
220
+ index=MultiIndex.from_arrays(
221
+ (span_ids, document_positions),
222
+ names=("context.span_id", "document_position"),
223
+ ),
224
+ )
225
+ evaluations.append(DocumentEvaluations(eval_name, dataframe))
226
+ return evaluations
@@ -240,6 +240,51 @@ click_through_rate_fixture = Fixture(
240
240
  reference_file_name="click_through_rate_train.parquet",
241
241
  )
242
242
 
243
+ chatbot_queries_schema = Schema(
244
+ prediction_id_column_name="id",
245
+ prompt_column_names=RetrievalEmbeddingColumnNames(
246
+ vector_column_name="prompt",
247
+ raw_data_column_name="prompt_text",
248
+ context_retrieval_ids_column_name="document_ids",
249
+ context_retrieval_scores_column_name="document_scores",
250
+ ),
251
+ response_column_names="response",
252
+ tag_column_names=[
253
+ "answer_relevancy",
254
+ "context_relevancy",
255
+ "faithfulness",
256
+ "document_similarity_0",
257
+ "document_similarity_1",
258
+ "openai_relevance_0",
259
+ "openai_relevance_1",
260
+ "user_feedback",
261
+ ],
262
+ )
263
+
264
+ chatbot_database_schema = Schema(
265
+ prediction_id_column_name="document_id",
266
+ prompt_column_names=EmbeddingColumnNames(
267
+ vector_column_name="text_vector",
268
+ raw_data_column_name="text",
269
+ ),
270
+ )
271
+
272
+ chatbot_fixture = Fixture(
273
+ name="chatbot",
274
+ description="""
275
+ Investigate RAG performance for a chatbot built on top of Arize's documentation.
276
+ This use-case highlights how embedding visualizations for a RAG application can
277
+ highlight issues with the application's retrieval and performance.
278
+
279
+ The data contains relevance metrics generated by LLM Evals as well as RAGAS.
280
+ """,
281
+ primary_schema=chatbot_queries_schema,
282
+ corpus_schema=chatbot_database_schema,
283
+ prefix="unstructured/llm/chatbot",
284
+ primary_file_name="chatbot_queries_with_ragas.parquet",
285
+ corpus_file_name="chatbot_database_ds.parquet",
286
+ )
287
+
243
288
  wide_data_primary_schema = Schema(
244
289
  actual_label_column_name="actual_label",
245
290
  prediction_label_column_name="predicted_label",
@@ -363,6 +408,7 @@ FIXTURES: Tuple[Fixture, ...] = (
363
408
  deep_data_fixture,
364
409
  llm_summarization_fixture,
365
410
  wikipedia_fixture,
411
+ chatbot_fixture,
366
412
  )
367
413
  NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
368
414
 
@@ -36,6 +36,10 @@ class LLMEvaluator:
36
36
  self._model = model
37
37
  self._template = template
38
38
 
39
+ @property
40
+ def default_concurrency(self) -> int:
41
+ return self._model.default_concurrency
42
+
39
43
  def reload_client(self) -> None:
40
44
  self._model.reload_client()
41
45
 
@@ -73,7 +73,7 @@ def llm_classify(
73
73
  include_prompt: bool = False,
74
74
  include_response: bool = False,
75
75
  run_sync: bool = False,
76
- concurrency: int = 20,
76
+ concurrency: Optional[int] = None,
77
77
  ) -> pd.DataFrame:
78
78
  """Classifies each input row of the dataframe using an LLM. Returns a pandas.DataFrame
79
79
  where the first column is named `label` and contains the classification labels. An optional
@@ -116,8 +116,9 @@ def llm_classify(
116
116
  run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
117
117
  evaluations will be run asynchronously if possible.
118
118
 
119
- concurrency (int, default=20): The number of concurrent evals if async submission is
120
- possible.
119
+ concurrency (Optional[int], default=None): The number of concurrent evals if async
120
+ submission is possible. If not provided, a recommended default concurrency is set on a
121
+ per-model basis.
121
122
 
122
123
  Returns:
123
124
  pandas.DataFrame: A dataframe where the `label` column (at column position 0) contains
@@ -127,6 +128,7 @@ def llm_classify(
127
128
  from the entries in the rails argument or "NOT_PARSABLE" if the model's output could
128
129
  not be parsed.
129
130
  """
131
+ concurrency = concurrency or model.default_concurrency
130
132
  # clients need to be reloaded to ensure that async evals work properly
131
133
  model.reload_client()
132
134
 
@@ -353,7 +355,7 @@ def run_evals(
353
355
  provide_explanation: bool = False,
354
356
  use_function_calling_if_available: bool = True,
355
357
  verbose: bool = False,
356
- concurrency: int = 20,
358
+ concurrency: Optional[int] = None,
357
359
  ) -> List[DataFrame]:
358
360
  """
359
361
  Applies a list of evaluators to a dataframe. Outputs a list of dataframes in
@@ -381,13 +383,21 @@ def run_evals(
381
383
  as model invocation parameters and details about retries and snapping to
382
384
  rails.
383
385
 
384
- concurrency (int, optional): The number of concurrent evals if async
385
- submission is possible.
386
+ concurrency (Optional[int], default=None): The number of concurrent evals if async
387
+ submission is possible. If not provided, a recommended default concurrency is set on a
388
+ per-model basis.
386
389
 
387
390
  Returns:
388
391
  List[DataFrame]: A list of dataframes, one for each evaluator, all of
389
392
  which have the same number of rows as the input dataframe.
390
393
  """
394
+ # use the minimum default concurrency of all the models
395
+ if concurrency is None:
396
+ if len(evaluators) == 0:
397
+ concurrency = 1
398
+ else:
399
+ concurrency = min(evaluator.default_concurrency for evaluator in evaluators)
400
+
391
401
  # clients need to be reloaded to ensure that async evals work properly
392
402
  for evaluator in evaluators:
393
403
  evaluator.reload_client()
@@ -31,7 +31,7 @@ def llm_generate(
31
31
  include_prompt: bool = False,
32
32
  include_response: bool = False,
33
33
  run_sync: bool = False,
34
- concurrency: int = 20,
34
+ concurrency: Optional[int] = None,
35
35
  ) -> pd.DataFrame:
36
36
  """
37
37
  Generates a text using a template using an LLM. This function is useful
@@ -70,14 +70,17 @@ def llm_generate(
70
70
  run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
71
71
  evaluations will be run asynchronously if possible.
72
72
 
73
- concurrency (int, default=20): The number of concurrent evals if async submission is
74
- possible.
73
+ concurrency (Optional[int], default=None): The number of concurrent evals if async
74
+ submission is possible. If not provided, a recommended default concurrency is set on a
75
+ per-model basis.
75
76
 
76
77
  Returns:
77
78
  generations_dataframe (pandas.DataFrame): A dataframe where each row
78
79
  represents the generated output
79
80
 
80
81
  """
82
+ concurrency = concurrency or model.default_concurrency
83
+
81
84
  # clients need to be reloaded to ensure that async evals work properly
82
85
  model.reload_client()
83
86
 
@@ -1,4 +1,3 @@
1
- import logging
2
1
  from dataclasses import dataclass, field
3
2
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
4
3
 
@@ -8,8 +7,6 @@ from phoenix.experimental.evals.models.rate_limiters import RateLimiter
8
7
  if TYPE_CHECKING:
9
8
  from tiktoken import Encoding
10
9
 
11
- logger = logging.getLogger(__name__)
12
-
13
10
  MODEL_TOKEN_LIMIT_MAPPING = {
14
11
  "claude-2.1": 200000,
15
12
  "claude-2.0": 100000,
@@ -80,7 +77,6 @@ class AnthropicModel(BaseEvalModel):
80
77
  try:
81
78
  encoding = self._tiktoken.encoding_for_model(self.model)
82
79
  except KeyError:
83
- logger.warning("Warning: model not found. Using cl100k_base encoding.")
84
80
  encoding = self._tiktoken.get_encoding("cl100k_base")
85
81
  self._tiktoken_encoding = encoding
86
82
 
@@ -149,6 +145,9 @@ class AnthropicModel(BaseEvalModel):
149
145
  return _completion_with_retry(**kwargs)
150
146
 
151
147
  async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
148
+ # instruction is an invalid input to Anthropic models, it is passed in by
149
+ # BaseEvalModel.__call__ and needs to be removed
150
+ kwargs.pop("instruction", None)
152
151
  invocation_parameters = self.invocation_parameters()
153
152
  invocation_parameters.update(kwargs)
154
153
  response = await self._async_generate_with_retry(
@@ -58,6 +58,7 @@ def set_verbosity(
58
58
 
59
59
  @dataclass
60
60
  class BaseEvalModel(ABC):
61
+ default_concurrency: int = 20
61
62
  _verbose: bool = False
62
63
  _rate_limiter: RateLimiter = field(default_factory=RateLimiter)
63
64
 
@@ -87,7 +87,6 @@ class BedrockModel(BaseEvalModel):
87
87
  try:
88
88
  encoding = self._tiktoken.encoding_for_model(self.model_id)
89
89
  except KeyError:
90
- logger.warning("Warning: model not found. Using cl100k_base encoding.")
91
90
  encoding = self._tiktoken.get_encoding("cl100k_base")
92
91
  self._tiktoken_encoding = encoding
93
92
 
@@ -165,7 +164,7 @@ class BedrockModel(BaseEvalModel):
165
164
  "temperature": self.temperature,
166
165
  "topP": self.top_p,
167
166
  "maxTokens": self.max_tokens,
168
- "stopSequences": [self.stop_sequences],
167
+ "stopSequences": self.stop_sequences,
169
168
  },
170
169
  **self.extra_parameters,
171
170
  }
@@ -204,6 +203,9 @@ class BedrockModel(BaseEvalModel):
204
203
  elif self.model_id.startswith("anthropic"):
205
204
  body = json.loads(response.get("body").read().decode())
206
205
  return body.get("completion")
206
+ elif self.model_id.startswith("amazon"):
207
+ body = json.loads(response.get("body").read())
208
+ return body.get("results")[0].get("outputText")
207
209
  else:
208
210
  body = json.loads(response.get("body").read())
209
211
  return body.get("results")[0].get("data").get("outputText")
@@ -31,6 +31,8 @@ MODEL_TOKEN_LIMIT_MAPPING = {
31
31
  "gpt-4-0613": 8192, # Current gpt-4 default
32
32
  "gpt-4-32k-0314": 32768,
33
33
  "gpt-4-32k-0613": 32768,
34
+ "gpt-4-1106-preview": 128000,
35
+ "gpt-4-vision-preview": 128000,
34
36
  }
35
37
  LEGACY_COMPLETION_API_MODELS = ("gpt-3.5-turbo-instruct",)
36
38
  logger = logging.getLogger(__name__)
@@ -21,6 +21,9 @@ MODEL_TOKEN_LIMIT_MAPPING = {
21
21
 
22
22
  @dataclass
23
23
  class GeminiModel(BaseEvalModel):
24
+ # The vertex SDK runs into connection pool limits at high concurrency
25
+ default_concurrency: int = 5
26
+
24
27
  model: str = "gemini-pro"
25
28
  """The model name to use."""
26
29
  temperature: float = 0.0
@@ -50,6 +53,9 @@ class GeminiModel(BaseEvalModel):
50
53
  max_retries=self.max_retries,
51
54
  )
52
55
 
56
+ def reload_client(self) -> None:
57
+ self._init_client()
58
+
53
59
  def _init_client(self) -> None:
54
60
  try:
55
61
  from google.api_core import exceptions # type:ignore
@@ -73,13 +73,6 @@ your response.
73
73
  [END DATA]
74
74
 
75
75
  Is the answer above factual or hallucinated based on the query and reference text?
76
-
77
- Your response should be a single word: either "factual" or "hallucinated", and
78
- it should not include any other text or characters. "hallucinated" indicates that the answer
79
- provides factually inaccurate information to the query based on the reference text. "factual"
80
- indicates that the answer to the question is correct relative to the reference text, and does not
81
- contain made up information. Please read the query and reference text carefully before determining
82
- your response.
83
76
  """
84
77
  HALLUCINATION_PROMPT_TEMPLATE_WITH_EXPLANATION = """
85
78
  In this task, you will be presented with a query, a reference text and an answer. The answer is
@@ -6717,7 +6717,7 @@ fragment SpanEvaluationsTable_evals on Span {
6717
6717
  gap: var(--ac-global-dimension-static-size-200);
6718
6718
  `,children:i.map((o,l)=>x("li",{children:_(ft,{padding:"size-200",backgroundColor:"purple-100",borderColor:"purple-700",borderWidth:"thin",borderRadius:"medium",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"embedded text"}),x("pre",{css:ee`
6719
6719
  margin: var(--ac-global-dimension-static-size-100) 0;
6720
- `,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
6720
+ `,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant","unrelated"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
6721
6721
  padding: var(--ac-global-dimension-static-size-200);
6722
6722
  white-space: normal;
6723
6723
  margin: 0;
@@ -9,6 +9,7 @@ import math
9
9
  from time import sleep
10
10
  from typing import (
11
11
  Any,
12
+ Iterator,
12
13
  Optional,
13
14
  Sequence,
14
15
  Tuple,
@@ -33,24 +34,29 @@ __all__ = [
33
34
  from phoenix.trace.span_evaluations import Evaluations
34
35
 
35
36
 
36
- def add_evaluations(
37
- exporter: HttpExporter,
38
- evaluations: pd.DataFrame,
39
- evaluation_name: str,
40
- ) -> None:
41
- index_names = evaluations.index.names
42
- for index, row in evaluations.iterrows():
37
+ def encode_evaluations(evaluations: Evaluations) -> Iterator[pb.Evaluation]:
38
+ dataframe = evaluations.dataframe
39
+ eval_name = evaluations.eval_name
40
+ index_names = dataframe.index.names
41
+ for index, row in dataframe.iterrows():
43
42
  subject_id = _extract_subject_id_from_index(
44
43
  index_names,
45
44
  cast(Union[str, Tuple[Any]], index),
46
45
  )
47
46
  if (result := _extract_result(row)) is None:
48
47
  continue
49
- evaluation = pb.Evaluation(
50
- name=evaluation_name,
48
+ yield pb.Evaluation(
49
+ name=eval_name,
51
50
  result=result,
52
51
  subject_id=subject_id,
53
52
  )
53
+
54
+
55
+ def add_evaluations(
56
+ exporter: HttpExporter,
57
+ evaluations: Evaluations,
58
+ ) -> None:
59
+ for evaluation in encode_evaluations(evaluations):
54
60
  exporter.export(evaluation)
55
61
 
56
62
 
@@ -130,7 +136,7 @@ def log_evaluations(
130
136
  return
131
137
  exporter = HttpExporter(endpoint=endpoint, host=host, port=port)
132
138
  for eval in filter(bool, evals):
133
- add_evaluations(exporter, eval.dataframe, eval.eval_name)
139
+ add_evaluations(exporter, eval)
134
140
  with tqdm(total=n, desc="Sending Evaluations") as pbar:
135
141
  while n:
136
142
  sleep(0.1)
@@ -30,6 +30,7 @@ from phoenix.pointcloud.umap_parameters import get_umap_parameters
30
30
  from phoenix.server.app import create_app
31
31
  from phoenix.server.thread_server import ThreadServer
32
32
  from phoenix.services import AppService
33
+ from phoenix.session.evaluation import encode_evaluations
33
34
  from phoenix.trace.dsl import SpanFilter
34
35
  from phoenix.trace.dsl.query import SpanQuery
35
36
  from phoenix.trace.otel import encode
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
46
47
  # type workaround
47
48
  # https://github.com/python/mypy/issues/5264#issuecomment-399407428
48
49
  if TYPE_CHECKING:
50
+ from phoenix.trace import Evaluations
51
+
49
52
  _BaseList = UserList[pd.DataFrame]
50
53
  else:
51
54
  _BaseList = UserList
@@ -123,6 +126,10 @@ class Session(ABC):
123
126
  self.traces.put(encode(span))
124
127
 
125
128
  self.evals: Evals = Evals()
129
+ if trace_dataset:
130
+ for evaluations in trace_dataset.evaluations:
131
+ for pb_evaluation in encode_evaluations(evaluations):
132
+ self.evals.put(pb_evaluation)
126
133
 
127
134
  self.host = host or get_env_host()
128
135
  self.port = port or get_env_port()
@@ -213,6 +220,15 @@ class Session(ABC):
213
220
  return None
214
221
  return pd.json_normalize(data, max_level=1).set_index("context.span_id", drop=False)
215
222
 
223
+ def get_evaluations(self) -> List["Evaluations"]:
224
+ return self.evals.export_evaluations()
225
+
226
+ def get_trace_dataset(self) -> Optional[TraceDataset]:
227
+ if (dataframe := self.get_spans_dataframe()) is None:
228
+ return None
229
+ evaluations = self.get_evaluations()
230
+ return TraceDataset(dataframe=dataframe, evaluations=evaluations)
231
+
216
232
 
217
233
  _session: Optional[Session] = None
218
234
 
@@ -479,6 +495,9 @@ def _get_url(host: str, port: int, notebook_env: NotebookEnvironment) -> str:
479
495
  if notebook_env == NotebookEnvironment.DATABRICKS:
480
496
  context = _get_databricks_context()
481
497
  return f"{_get_databricks_notebook_base_url(context)}/{port}/"
498
+ if host == "0.0.0.0" or host == "127.0.0.1":
499
+ # The app is running locally, so use localhost
500
+ return f"http://localhost:{port}/"
482
501
  return f"http://{host}:{port}/"
483
502
 
484
503
 
@@ -0,0 +1,5 @@
1
+ from phoenix.exceptions import PhoenixException
2
+
3
+
4
+ class InvalidParquetMetadataError(PhoenixException):
5
+ pass