arize-phoenix 0.0.50rc0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (159) hide show
  1. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/PKG-INFO +13 -7
  2. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/README.md +7 -2
  3. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/pyproject.toml +21 -28
  4. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/__init__.py +1 -1
  5. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/__init__.py +11 -10
  6. arize_phoenix-1.1.1/src/phoenix/experimental/evals/evaluators.py +139 -0
  7. arize_phoenix-1.1.1/src/phoenix/experimental/evals/functions/__init__.py +4 -0
  8. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/classify.py +125 -76
  9. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/generate.py +32 -9
  10. arize_phoenix-1.1.1/src/phoenix/experimental/evals/models/__init__.py +6 -0
  11. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/base.py +10 -8
  12. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/openai.py +144 -77
  13. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/vertexai.py +1 -1
  14. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/retrievals.py +6 -3
  15. arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/__init__.py +38 -0
  16. arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/default_templates.py +343 -0
  17. arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/template.py +177 -0
  18. arize_phoenix-1.1.1/src/phoenix/server/static/index.js +6845 -0
  19. arize_phoenix-1.1.1/src/phoenix/trace/evaluation_conventions.py +26 -0
  20. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/instrumentor.py +1 -1
  21. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/tracer.py +39 -32
  22. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/callback.py +160 -50
  23. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/openai/instrumentor.py +49 -40
  24. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/semantic_conventions.py +2 -35
  25. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/utils.py +13 -1
  26. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/__init__.py +14 -8
  27. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/utilities/logging.py +3 -1
  28. arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/functions/__init__.py +0 -4
  29. arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/models/__init__.py +0 -5
  30. arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/templates/__init__.py +0 -26
  31. arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/templates/default_templates.py +0 -128
  32. arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/templates/template.py +0 -138
  33. arize_phoenix-0.0.50rc0/src/phoenix/server/static/index.js +0 -6829
  34. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/.gitignore +0 -0
  35. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/IP_NOTICE +0 -0
  36. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/LICENSE +0 -0
  37. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/config.py +0 -0
  38. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/__init__.py +0 -0
  39. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/embedding_dimension.py +0 -0
  40. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/model.py +0 -0
  41. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/model_schema.py +0 -0
  42. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/model_schema_adapter.py +0 -0
  43. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/traces.py +0 -0
  44. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/__init__.py +0 -0
  45. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/dataset.py +0 -0
  46. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/errors.py +0 -0
  47. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/fixtures.py +0 -0
  48. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/schema.py +0 -0
  49. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/validation.py +0 -0
  50. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datetime_utils.py +0 -0
  51. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/__init__.py +0 -0
  52. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/processing.py +0 -0
  53. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/bedrock.py +0 -0
  54. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
  55. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/downloads.py +0 -0
  56. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/threads.py +0 -0
  57. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/types.py +0 -0
  58. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils.py +0 -0
  59. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/README.md +0 -0
  60. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/__init__.py +0 -0
  61. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/binning.py +0 -0
  62. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/metrics.py +0 -0
  63. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/mixins.py +0 -0
  64. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/timeseries.py +0 -0
  65. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/wrappers.py +0 -0
  66. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/__init__.py +0 -0
  67. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/clustering.py +0 -0
  68. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/pointcloud.py +0 -0
  69. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/projectors.py +0 -0
  70. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/umap_parameters.py +0 -0
  71. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/py.typed +0 -0
  72. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/__init__.py +0 -0
  73. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/__init__.py +0 -0
  74. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/context.py +0 -0
  75. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/helpers.py +0 -0
  76. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
  77. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
  78. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
  79. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
  80. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
  81. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/Granularity.py +0 -0
  82. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
  83. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
  84. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
  85. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/__init__.py +0 -0
  86. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/interceptor.py +0 -0
  87. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/schema.py +0 -0
  88. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Cluster.py +0 -0
  89. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
  90. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Dataset.py +0 -0
  91. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
  92. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetRole.py +0 -0
  93. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetValues.py +0 -0
  94. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Dimension.py +0 -0
  95. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
  96. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionShape.py +0 -0
  97. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionType.py +0 -0
  98. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
  99. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
  100. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
  101. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Event.py +0 -0
  102. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EventMetadata.py +0 -0
  103. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
  104. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ExportedFile.py +0 -0
  105. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Functionality.py +0 -0
  106. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/MimeType.py +0 -0
  107. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Model.py +0 -0
  108. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/NumericRange.py +0 -0
  109. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
  110. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/PromptResponse.py +0 -0
  111. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Retrieval.py +0 -0
  112. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
  113. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Segments.py +0 -0
  114. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/SortDir.py +0 -0
  115. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Span.py +0 -0
  116. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/TimeSeries.py +0 -0
  117. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
  118. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ValidationResult.py +0 -0
  119. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
  120. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/__init__.py +0 -0
  121. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/node.py +0 -0
  122. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/pagination.py +0 -0
  123. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/app.py +0 -0
  124. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/main.py +0 -0
  125. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/span_handler.py +0 -0
  126. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
  127. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
  128. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
  129. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
  130. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
  131. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
  132. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
  133. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon.png +0 -0
  134. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/favicon.ico +0 -0
  135. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/index.css +0 -0
  136. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/modernizr.js +0 -0
  137. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/templates/__init__.py +0 -0
  138. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/templates/index.html +0 -0
  139. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/thread_server.py +0 -0
  140. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/services.py +0 -0
  141. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/session/__init__.py +0 -0
  142. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/session/session.py +0 -0
  143. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/__init__.py +0 -0
  144. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/exporter.py +0 -0
  145. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/filter.py +0 -0
  146. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/fixtures.py +0 -0
  147. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/__init__.py +0 -0
  148. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/__init__.py +0 -0
  149. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
  150. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/openai/__init__.py +0 -0
  151. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/schemas.py +0 -0
  152. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/span_json_decoder.py +0 -0
  153. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/span_json_encoder.py +0 -0
  154. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/trace_dataset.py +0 -0
  155. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/tracer.py +0 -0
  156. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/trace_pb2.py +0 -0
  157. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/trace_pb2.pyi +0 -0
  158. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/utilities/__init__.py +0 -0
  159. {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/utilities/error_handling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 0.0.50rc0
3
+ Version: 1.1.1
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -35,22 +35,23 @@ Requires-Dist: uvicorn
35
35
  Requires-Dist: wrapt
36
36
  Provides-Extra: dev
37
37
  Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
38
- Requires-Dist: black[jupyter]; extra == 'dev'
39
38
  Requires-Dist: gcsfs; extra == 'dev'
40
39
  Requires-Dist: hatch; extra == 'dev'
41
40
  Requires-Dist: jupyter; extra == 'dev'
42
- Requires-Dist: langchain>=0.0.293; extra == 'dev'
43
- Requires-Dist: llama-index>=0.8.29; extra == 'dev'
41
+ Requires-Dist: langchain>=0.0.334; extra == 'dev'
42
+ Requires-Dist: llama-index>=0.9.0; extra == 'dev'
44
43
  Requires-Dist: nbqa; extra == 'dev'
45
44
  Requires-Dist: pandas-stubs<=2.0.2.230605; extra == 'dev'
46
45
  Requires-Dist: pre-commit; extra == 'dev'
47
46
  Requires-Dist: pytest; extra == 'dev'
48
47
  Requires-Dist: pytest-cov; extra == 'dev'
49
48
  Requires-Dist: pytest-lazy-fixture; extra == 'dev'
50
- Requires-Dist: ruff==0.0.290; extra == 'dev'
49
+ Requires-Dist: ruff==0.1.5; extra == 'dev'
51
50
  Requires-Dist: strawberry-graphql[debug-server]==0.208.2; extra == 'dev'
52
51
  Provides-Extra: experimental
53
52
  Requires-Dist: tenacity; extra == 'experimental'
53
+ Provides-Extra: llama-index
54
+ Requires-Dist: llama-index~=0.9.0; extra == 'llama-index'
54
55
  Description-Content-Type: text/markdown
55
56
 
56
57
  <p align="center">
@@ -102,6 +103,7 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
102
103
  - [Exportable Clusters](#exportable-clusters)
103
104
  - [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
104
105
  - [Structured Data Analysis](#structured-data-analysis)
106
+ - [Breaking Changes](#breaking-changes)
105
107
  - [Community](#community)
106
108
  - [Thanks](#thanks)
107
109
  - [Copyright, Patent, and License](#copyright-patent-and-license)
@@ -267,7 +269,7 @@ pip install arize-phoenix[experimental] ipython matplotlib openai pycm scikit-le
267
269
 
268
270
  ```python
269
271
  from phoenix.experimental.evals import (
270
- RAG_RELEVANCY_PROMPT_TEMPLATE_STR,
272
+ RAG_RELEVANCY_PROMPT_TEMPLATE,
271
273
  RAG_RELEVANCY_PROMPT_RAILS_MAP,
272
274
  OpenAIModel,
273
275
  download_benchmark_dataset,
@@ -292,7 +294,7 @@ model = OpenAIModel(
292
294
  temperature=0.0,
293
295
  )
294
296
  rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
295
- df["eval_relevance"] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE_STR, rails)
297
+ df[["eval_relevance"]] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE, rails)
296
298
  #Golden dataset has True/False map to -> "irrelevant" / "relevant"
297
299
  #we can then scikit compare to output of template - same format
298
300
  y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
@@ -419,6 +421,10 @@ train_ds = px.Dataset(dataframe=train_df, schema=schema, name="training")
419
421
  session = px.launch_app(primary=prod_ds, reference=train_ds)
420
422
  ```
421
423
 
424
+ ## Breaking Changes
425
+
426
+ - **v1.0.0** - Phoenix now exclusively supports the `openai>=1.0.0` sdk. If you are using an older version of the OpenAI SDK, you can continue to use `arize-phoenix==0.1.1`. However, we recommend upgrading to the latest version of the OpenAI SDK as it contains many improvements. If you are using Phoenix with LlamaIndex and and LangChain, you will have to upgrade to the versions of these packages that support the OpenAI `1.0.0` SDK as well (`llama-index>=0.8.64`, `langchain>=0.0.334`)
427
+
422
428
  ## Community
423
429
 
424
430
  Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
@@ -47,6 +47,7 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
47
47
  - [Exportable Clusters](#exportable-clusters)
48
48
  - [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
49
49
  - [Structured Data Analysis](#structured-data-analysis)
50
+ - [Breaking Changes](#breaking-changes)
50
51
  - [Community](#community)
51
52
  - [Thanks](#thanks)
52
53
  - [Copyright, Patent, and License](#copyright-patent-and-license)
@@ -212,7 +213,7 @@ pip install arize-phoenix[experimental] ipython matplotlib openai pycm scikit-le
212
213
 
213
214
  ```python
214
215
  from phoenix.experimental.evals import (
215
- RAG_RELEVANCY_PROMPT_TEMPLATE_STR,
216
+ RAG_RELEVANCY_PROMPT_TEMPLATE,
216
217
  RAG_RELEVANCY_PROMPT_RAILS_MAP,
217
218
  OpenAIModel,
218
219
  download_benchmark_dataset,
@@ -237,7 +238,7 @@ model = OpenAIModel(
237
238
  temperature=0.0,
238
239
  )
239
240
  rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
240
- df["eval_relevance"] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE_STR, rails)
241
+ df[["eval_relevance"]] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE, rails)
241
242
  #Golden dataset has True/False map to -> "irrelevant" / "relevant"
242
243
  #we can then scikit compare to output of template - same format
243
244
  y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
@@ -364,6 +365,10 @@ train_ds = px.Dataset(dataframe=train_df, schema=schema, name="training")
364
365
  session = px.launch_app(primary=prod_ds, reference=train_ds)
365
366
  ```
366
367
 
368
+ ## Breaking Changes
369
+
370
+ - **v1.0.0** - Phoenix now exclusively supports the `openai>=1.0.0` sdk. If you are using an older version of the OpenAI SDK, you can continue to use `arize-phoenix==0.1.1`. However, we recommend upgrading to the latest version of the OpenAI SDK as it contains many improvements. If you are using Phoenix with LlamaIndex and and LangChain, you will have to upgrade to the versions of these packages that support the OpenAI `1.0.0` SDK as well (`llama-index>=0.8.64`, `langchain>=0.0.334`)
371
+
367
372
  ## Community
368
373
 
369
374
  Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
@@ -43,12 +43,11 @@ dynamic = ["version"]
43
43
 
44
44
  [project.optional-dependencies]
45
45
  dev = [
46
- "black[jupyter]",
47
46
  "gcsfs",
48
47
  "hatch",
49
48
  "jupyter",
50
49
  "nbqa",
51
- "ruff==0.0.290",
50
+ "ruff==0.1.5",
52
51
  "pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
53
52
  "pytest",
54
53
  "pytest-cov",
@@ -56,12 +55,15 @@ dev = [
56
55
  "strawberry-graphql[debug-server]==0.208.2",
57
56
  "pre-commit",
58
57
  "arize[AutoEmbeddings, LLM_Evaluation]",
59
- "llama-index>=0.8.29",
60
- "langchain>=0.0.293",
58
+ "llama-index>=0.9.0",
59
+ "langchain>=0.0.334",
61
60
  ]
62
61
  experimental = [
63
62
  "tenacity",
64
63
  ]
64
+ llama-index = [
65
+ "llama-index~=0.9.0",
66
+ ]
65
67
 
66
68
  [project.urls]
67
69
  Documentation = "https://docs.arize.com/phoenix/"
@@ -82,16 +84,6 @@ artifacts = ["src/phoenix/server/static"]
82
84
  [tool.hatch.build]
83
85
  only-packages = true
84
86
 
85
- [tool.hatch.build.hooks.jupyter-builder]
86
- dependencies = ["hatch-jupyter-builder"]
87
- build-function = "hatch_jupyter_builder.npm_builder"
88
- ensured-targets = ["src/phoenix/server/static/index.js"]
89
- skip-if-exists = ["src/phoenix/server/static/index.js"]
90
-
91
- [tool.hatch.build.hooks.jupyter-builder.build-kwargs]
92
- path = "app"
93
- source_dir = "app"
94
-
95
87
  [tool.hatch.build.targets.sdist]
96
88
  artifacts = ["src/phoenix/server/static"]
97
89
 
@@ -102,9 +94,9 @@ dependencies = [
102
94
  "pytest-cov",
103
95
  "pytest-lazy-fixture",
104
96
  "arize",
105
- "langchain>=0.0.293",
106
- "llama-index>=0.8.29",
107
- "openai",
97
+ "langchain>=0.0.334",
98
+ "llama-index>=0.9.0",
99
+ "openai>=1.0.0",
108
100
  "tenacity",
109
101
  "nltk==3.8.1",
110
102
  "sentence-transformers==2.2.2",
@@ -114,25 +106,26 @@ dependencies = [
114
106
  "responses",
115
107
  "tiktoken",
116
108
  "typing-extensions<4.6.0", # for Colab
109
+ "httpx", # For OpenAI testing
110
+ "respx", # For OpenAI testing
117
111
  ]
118
112
 
119
113
  [tool.hatch.envs.type]
120
114
  dependencies = [
121
115
  "mypy==1.5.1",
122
- "llama-index>=0.8.29",
116
+ "llama-index>=0.9.0",
123
117
  "pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
124
118
  "types-psutil",
125
119
  "types-tqdm",
126
120
  "types-requests",
127
121
  "types-protobuf",
122
+ "openai>=1.0.0",
128
123
  ]
129
124
 
130
125
  [tool.hatch.envs.style]
131
126
  detached = true
132
127
  dependencies = [
133
- "black~=23.3.0",
134
- "black[jupyter]~=23.3.0",
135
- "ruff~=0.0.290",
128
+ "ruff~=0.1.5",
136
129
  ]
137
130
 
138
131
  [tool.hatch.envs.notebooks]
@@ -188,11 +181,11 @@ check = [
188
181
 
189
182
  [tool.hatch.envs.style.scripts]
190
183
  check = [
191
- "black --check --diff --color .",
192
184
  "ruff .",
185
+ "ruff format --check --diff .",
193
186
  ]
194
187
  fix = [
195
- "black .",
188
+ "ruff format .",
196
189
  "ruff --fix .",
197
190
  ]
198
191
 
@@ -217,10 +210,6 @@ pypi = [
217
210
  "twine upload --verbose dist/*",
218
211
  ]
219
212
 
220
- [tool.black]
221
- line-length = 100
222
- exclude = '_pb2\.pyi?$'
223
-
224
213
  [tool.hatch.envs.docs.scripts]
225
214
  check = [
226
215
  "interrogate -vv src/",
@@ -288,11 +277,15 @@ module = [
288
277
  ignore_missing_imports = true
289
278
 
290
279
  [tool.ruff]
291
- exclude = [".git", "__pycache__", "docs/source/conf.py", "*_pb2.py*"]
280
+ exclude = [".git", "__pycache__", "docs/source/conf.py", "*_pb2.py*", "*.pyi"]
281
+ extend-include = ["*.ipynb"]
292
282
  ignore-init-module-imports = true
293
283
  line-length = 100
294
284
  select = ["E", "F", "W", "I"]
295
285
  target-version = "py38"
296
286
 
287
+ [tool.ruff.lint.per-file-ignores]
288
+ "*.ipynb" = ["E402", "E501"]
289
+
297
290
  [tool.ruff.isort]
298
291
  force-single-line = false
@@ -5,7 +5,7 @@ from .session.session import Session, active_session, close_app, launch_app
5
5
  from .trace.fixtures import load_example_traces
6
6
  from .trace.trace_dataset import TraceDataset
7
7
 
8
- __version__ = "0.0.50rc"
8
+ __version__ = "1.1.1"
9
9
 
10
10
  # module level doc-string
11
11
  __doc__ = """
@@ -1,16 +1,17 @@
1
- from .functions import llm_classify, llm_eval_binary, llm_generate, run_relevance_eval
1
+ from .functions import llm_classify, llm_generate, run_relevance_eval
2
2
  from .models import OpenAIModel, VertexAIModel
3
3
  from .retrievals import compute_precisions_at_k
4
4
  from .templates import (
5
5
  CODE_READABILITY_PROMPT_RAILS_MAP,
6
- CODE_READABILITY_PROMPT_TEMPLATE_STR,
6
+ CODE_READABILITY_PROMPT_TEMPLATE,
7
7
  HALLUCINATION_PROMPT_RAILS_MAP,
8
- HALLUCINATION_PROMPT_TEMPLATE_STR,
8
+ HALLUCINATION_PROMPT_TEMPLATE,
9
9
  NOT_PARSABLE,
10
10
  RAG_RELEVANCY_PROMPT_RAILS_MAP,
11
- RAG_RELEVANCY_PROMPT_TEMPLATE_STR,
11
+ RAG_RELEVANCY_PROMPT_TEMPLATE,
12
12
  TOXICITY_PROMPT_RAILS_MAP,
13
- TOXICITY_PROMPT_TEMPLATE_STR,
13
+ TOXICITY_PROMPT_TEMPLATE,
14
+ ClassificationTemplate,
14
15
  PromptTemplate,
15
16
  )
16
17
  from .utils.downloads import download_benchmark_dataset
@@ -19,19 +20,19 @@ __all__ = [
19
20
  "compute_precisions_at_k",
20
21
  "download_benchmark_dataset",
21
22
  "llm_classify",
22
- "llm_eval_binary",
23
23
  "llm_generate",
24
24
  "OpenAIModel",
25
25
  "VertexAIModel",
26
26
  "PromptTemplate",
27
+ "ClassificationTemplate",
27
28
  "CODE_READABILITY_PROMPT_RAILS_MAP",
28
- "CODE_READABILITY_PROMPT_TEMPLATE_STR",
29
+ "CODE_READABILITY_PROMPT_TEMPLATE",
29
30
  "HALLUCINATION_PROMPT_RAILS_MAP",
30
- "HALLUCINATION_PROMPT_TEMPLATE_STR",
31
+ "HALLUCINATION_PROMPT_TEMPLATE",
31
32
  "RAG_RELEVANCY_PROMPT_RAILS_MAP",
32
- "RAG_RELEVANCY_PROMPT_TEMPLATE_STR",
33
- "TOXICITY_PROMPT_TEMPLATE_STR",
33
+ "RAG_RELEVANCY_PROMPT_TEMPLATE",
34
34
  "TOXICITY_PROMPT_RAILS_MAP",
35
+ "TOXICITY_PROMPT_TEMPLATE",
35
36
  "NOT_PARSABLE",
36
37
  "run_relevance_eval",
37
38
  ]
@@ -0,0 +1,139 @@
1
+ from typing import List, Optional
2
+
3
+ from phoenix.experimental.evals import PromptTemplate
4
+ from phoenix.experimental.evals.models import BaseEvalModel
5
+
6
+
7
+ class MapReducer:
8
+ """
9
+ Evaluates data that is too large to fit into a single context window using a
10
+ map-reduce strategy. The data must first be divided into "chunks" that
11
+ individually fit into an LLM's context window. Each chunk of data is
12
+ individually evaluated (the "map" step), producing intermediate outputs that
13
+ are combined into a single result (the "reduce" step).
14
+
15
+ This is the simplest strategy for evaluating long-context data.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ model: BaseEvalModel,
21
+ map_prompt_template: PromptTemplate,
22
+ reduce_prompt_template: PromptTemplate,
23
+ ) -> None:
24
+ """Initializes an instance.
25
+
26
+ Args:
27
+ model (BaseEvalModel): The LLM model to use for evaluation.
28
+
29
+ map_prompt_template (PromptTemplate): The template that is mapped
30
+ over each chunk to produce intermediate outputs. Must contain the
31
+ {chunk} placeholder.
32
+
33
+ reduce_prompt_template (PromptTemplate): The template that combines
34
+ the intermediate outputs into a single result. Must contain the
35
+ {mapped} placeholder, which will be formatted as a list of the
36
+ intermediate outputs produced by the map step.
37
+ """
38
+ self._model = model
39
+ self._map_prompt_template = map_prompt_template
40
+ self._reduce_prompt_template = reduce_prompt_template
41
+
42
+ def evaluate(self, chunks: List[str]) -> str:
43
+ """Evaluates a list of two or more chunks.
44
+
45
+ Args:
46
+ chunks (List[str]): A list of chunks to be evaluated. Each chunk is
47
+ inserted into the map_prompt_template and must therefore fit within
48
+ the LLM's context window and still leave room for the rest of the
49
+ prompt.
50
+
51
+ Returns:
52
+ str: The output of the map-reduce process.
53
+ """
54
+ if len(chunks) < 2:
55
+ raise ValueError(
56
+ "The map-reduce strategy is not needed to evaluate data "
57
+ "that fits within a single context window. "
58
+ "Consider using llm_classify instead."
59
+ )
60
+ model = self._model
61
+ mapped_records = []
62
+ for chunk in chunks:
63
+ map_prompt = self._map_prompt_template.format({"chunk": chunk})
64
+ intermediate_output = model(map_prompt)
65
+ mapped_records.append(intermediate_output)
66
+ reduce_prompt = self._reduce_prompt_template.format({"mapped": repr(mapped_records)})
67
+ return model(reduce_prompt)
68
+
69
+
70
+ class Refiner:
71
+ """
72
+ Evaluates data that is too large to fit into a single context window using a
73
+ refine strategy. The data must first be divided into "chunks" that
74
+ individually fit into an LLM's context window. An initial "accumulator" is
75
+ generated from the first chunk of data. The accumulator is subsequently
76
+ refined by iteratively updating and incorporating new information from each
77
+ subsequent chunk. An optional synthesis step can be used to synthesize the
78
+ final accumulator into a desired format.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ model: BaseEvalModel,
84
+ initial_prompt_template: PromptTemplate,
85
+ refine_prompt_template: PromptTemplate,
86
+ synthesize_prompt_template: Optional[PromptTemplate] = None,
87
+ ) -> None:
88
+ """Initializes an instance.
89
+
90
+ Args:
91
+ model (BaseEvalModel): The LLM model to use for evaluation.
92
+
93
+ initial_prompt_template (PromptTemplate): The template for the
94
+ initial invocation of the model that will generate the initial
95
+ accumulator. Should contain the {chunk} placeholder.
96
+
97
+ refine_prompt_template (PromptTemplate): The template for refining
98
+ the accumulator across all subsequence chunks. Must contain the
99
+ {chunk} and {accumulator} placeholders.
100
+
101
+ synthesize_prompt_template (Optional[PromptTemplate], optional): An
102
+ optional template to synthesize the final version of the
103
+ accumulator. Must contain the {accumulator} placeholder.
104
+ """
105
+ self._model = model
106
+ self._initial_prompt_template = initial_prompt_template
107
+ self._refine_prompt_template = refine_prompt_template
108
+ self._synthesize_prompt_template = synthesize_prompt_template
109
+
110
+ def evaluate(self, chunks: List[str]) -> str:
111
+ """Evaluates a list of two or more chunks.
112
+
113
+ Args:
114
+ chunks (List[str]): A list of chunks to be evaluated. Each chunk is
115
+ inserted into the initial_prompt_template and refine_prompt_template
116
+ and must therefore fit within the LLM's context window and still
117
+ leave room for the rest of the prompt.
118
+
119
+ Returns:
120
+ str: The output of the refine process.
121
+ """
122
+ if len(chunks) < 2:
123
+ raise ValueError(
124
+ "The refine strategy is not needed to evaluate data "
125
+ "that fits within a single context window. "
126
+ "Consider using llm_classify instead."
127
+ )
128
+ model = self._model
129
+ initial_prompt = self._initial_prompt_template.format({"chunk": chunks[0]})
130
+ accumulator = model(initial_prompt)
131
+ for chunk in chunks[1:]:
132
+ refine_prompt = self._refine_prompt_template.format(
133
+ {"accumulator": accumulator, "chunk": chunk}
134
+ )
135
+ accumulator = model(refine_prompt)
136
+ if not self._synthesize_prompt_template:
137
+ return accumulator
138
+ reduce_prompt = self._synthesize_prompt_template.format({"accumulator": accumulator})
139
+ return model(reduce_prompt)
@@ -0,0 +1,4 @@
1
+ from .classify import llm_classify, run_relevance_eval
2
+ from .generate import llm_generate
3
+
4
+ __all__ = ["llm_classify", "run_relevance_eval", "llm_generate"]