arize-phoenix 0.0.50rc0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/PKG-INFO +13 -7
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/README.md +7 -2
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/pyproject.toml +21 -28
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/__init__.py +1 -1
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/__init__.py +11 -10
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/evaluators.py +139 -0
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/functions/__init__.py +4 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/classify.py +125 -76
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/generate.py +32 -9
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/models/__init__.py +6 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/base.py +10 -8
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/openai.py +144 -77
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/vertexai.py +1 -1
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/retrievals.py +6 -3
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/__init__.py +38 -0
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/default_templates.py +343 -0
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/template.py +177 -0
- arize_phoenix-1.1.1/src/phoenix/server/static/index.js +6845 -0
- arize_phoenix-1.1.1/src/phoenix/trace/evaluation_conventions.py +26 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/instrumentor.py +1 -1
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/tracer.py +39 -32
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/callback.py +160 -50
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/openai/instrumentor.py +49 -40
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/semantic_conventions.py +2 -35
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/utils.py +13 -1
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/__init__.py +14 -8
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/utilities/logging.py +3 -1
- arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/functions/__init__.py +0 -4
- arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/models/__init__.py +0 -5
- arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/templates/__init__.py +0 -26
- arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/templates/default_templates.py +0 -128
- arize_phoenix-0.0.50rc0/src/phoenix/experimental/evals/templates/template.py +0 -138
- arize_phoenix-0.0.50rc0/src/phoenix/server/static/index.js +0 -6829
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/.gitignore +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/IP_NOTICE +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/LICENSE +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/config.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/model_schema.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/core/traces.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/dataset.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/errors.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/fixtures.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/schema.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datasets/validation.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/datetime_utils.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/processing.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/bedrock.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/downloads.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/threads.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/types.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/metrics.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/timeseries.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/metrics/wrappers.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/umap_parameters.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/py.typed +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/context.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/helpers.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/schema.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Cluster.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetRole.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetValues.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Dimension.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionShape.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionType.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Functionality.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/MimeType.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/NumericRange.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Retrieval.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Segments.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/SortDir.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Span.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/TimeSeries.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ValidationResult.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/app.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/main.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/span_handler.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/templates/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/templates/index.html +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/services.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/session/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/session/session.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/exporter.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/filter.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/fixtures.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/openai/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/schemas.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/span_json_decoder.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/span_json_encoder.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/trace_dataset.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/tracer.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/trace_pb2.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/trace_pb2.pyi +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/utilities/__init__.py +0 -0
- {arize_phoenix-0.0.50rc0 → arize_phoenix-1.1.1}/src/phoenix/utilities/error_handling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: ML Observability in your notebook
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -35,22 +35,23 @@ Requires-Dist: uvicorn
|
|
|
35
35
|
Requires-Dist: wrapt
|
|
36
36
|
Provides-Extra: dev
|
|
37
37
|
Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
|
|
38
|
-
Requires-Dist: black[jupyter]; extra == 'dev'
|
|
39
38
|
Requires-Dist: gcsfs; extra == 'dev'
|
|
40
39
|
Requires-Dist: hatch; extra == 'dev'
|
|
41
40
|
Requires-Dist: jupyter; extra == 'dev'
|
|
42
|
-
Requires-Dist: langchain>=0.0.
|
|
43
|
-
Requires-Dist: llama-index>=0.
|
|
41
|
+
Requires-Dist: langchain>=0.0.334; extra == 'dev'
|
|
42
|
+
Requires-Dist: llama-index>=0.9.0; extra == 'dev'
|
|
44
43
|
Requires-Dist: nbqa; extra == 'dev'
|
|
45
44
|
Requires-Dist: pandas-stubs<=2.0.2.230605; extra == 'dev'
|
|
46
45
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
47
46
|
Requires-Dist: pytest; extra == 'dev'
|
|
48
47
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
49
48
|
Requires-Dist: pytest-lazy-fixture; extra == 'dev'
|
|
50
|
-
Requires-Dist: ruff==0.
|
|
49
|
+
Requires-Dist: ruff==0.1.5; extra == 'dev'
|
|
51
50
|
Requires-Dist: strawberry-graphql[debug-server]==0.208.2; extra == 'dev'
|
|
52
51
|
Provides-Extra: experimental
|
|
53
52
|
Requires-Dist: tenacity; extra == 'experimental'
|
|
53
|
+
Provides-Extra: llama-index
|
|
54
|
+
Requires-Dist: llama-index~=0.9.0; extra == 'llama-index'
|
|
54
55
|
Description-Content-Type: text/markdown
|
|
55
56
|
|
|
56
57
|
<p align="center">
|
|
@@ -102,6 +103,7 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
|
|
|
102
103
|
- [Exportable Clusters](#exportable-clusters)
|
|
103
104
|
- [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
|
|
104
105
|
- [Structured Data Analysis](#structured-data-analysis)
|
|
106
|
+
- [Breaking Changes](#breaking-changes)
|
|
105
107
|
- [Community](#community)
|
|
106
108
|
- [Thanks](#thanks)
|
|
107
109
|
- [Copyright, Patent, and License](#copyright-patent-and-license)
|
|
@@ -267,7 +269,7 @@ pip install arize-phoenix[experimental] ipython matplotlib openai pycm scikit-le
|
|
|
267
269
|
|
|
268
270
|
```python
|
|
269
271
|
from phoenix.experimental.evals import (
|
|
270
|
-
|
|
272
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE,
|
|
271
273
|
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
272
274
|
OpenAIModel,
|
|
273
275
|
download_benchmark_dataset,
|
|
@@ -292,7 +294,7 @@ model = OpenAIModel(
|
|
|
292
294
|
temperature=0.0,
|
|
293
295
|
)
|
|
294
296
|
rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
|
|
295
|
-
df["eval_relevance"] = llm_classify(df, model,
|
|
297
|
+
df[["eval_relevance"]] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE, rails)
|
|
296
298
|
#Golden dataset has True/False map to -> "irrelevant" / "relevant"
|
|
297
299
|
#we can then scikit compare to output of template - same format
|
|
298
300
|
y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
|
|
@@ -419,6 +421,10 @@ train_ds = px.Dataset(dataframe=train_df, schema=schema, name="training")
|
|
|
419
421
|
session = px.launch_app(primary=prod_ds, reference=train_ds)
|
|
420
422
|
```
|
|
421
423
|
|
|
424
|
+
## Breaking Changes
|
|
425
|
+
|
|
426
|
+
- **v1.0.0** - Phoenix now exclusively supports the `openai>=1.0.0` sdk. If you are using an older version of the OpenAI SDK, you can continue to use `arize-phoenix==0.1.1`. However, we recommend upgrading to the latest version of the OpenAI SDK as it contains many improvements. If you are using Phoenix with LlamaIndex and and LangChain, you will have to upgrade to the versions of these packages that support the OpenAI `1.0.0` SDK as well (`llama-index>=0.8.64`, `langchain>=0.0.334`)
|
|
427
|
+
|
|
422
428
|
## Community
|
|
423
429
|
|
|
424
430
|
Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
|
|
@@ -47,6 +47,7 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
|
|
|
47
47
|
- [Exportable Clusters](#exportable-clusters)
|
|
48
48
|
- [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
|
|
49
49
|
- [Structured Data Analysis](#structured-data-analysis)
|
|
50
|
+
- [Breaking Changes](#breaking-changes)
|
|
50
51
|
- [Community](#community)
|
|
51
52
|
- [Thanks](#thanks)
|
|
52
53
|
- [Copyright, Patent, and License](#copyright-patent-and-license)
|
|
@@ -212,7 +213,7 @@ pip install arize-phoenix[experimental] ipython matplotlib openai pycm scikit-le
|
|
|
212
213
|
|
|
213
214
|
```python
|
|
214
215
|
from phoenix.experimental.evals import (
|
|
215
|
-
|
|
216
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE,
|
|
216
217
|
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
217
218
|
OpenAIModel,
|
|
218
219
|
download_benchmark_dataset,
|
|
@@ -237,7 +238,7 @@ model = OpenAIModel(
|
|
|
237
238
|
temperature=0.0,
|
|
238
239
|
)
|
|
239
240
|
rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
|
|
240
|
-
df["eval_relevance"] = llm_classify(df, model,
|
|
241
|
+
df[["eval_relevance"]] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE, rails)
|
|
241
242
|
#Golden dataset has True/False map to -> "irrelevant" / "relevant"
|
|
242
243
|
#we can then scikit compare to output of template - same format
|
|
243
244
|
y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
|
|
@@ -364,6 +365,10 @@ train_ds = px.Dataset(dataframe=train_df, schema=schema, name="training")
|
|
|
364
365
|
session = px.launch_app(primary=prod_ds, reference=train_ds)
|
|
365
366
|
```
|
|
366
367
|
|
|
368
|
+
## Breaking Changes
|
|
369
|
+
|
|
370
|
+
- **v1.0.0** - Phoenix now exclusively supports the `openai>=1.0.0` sdk. If you are using an older version of the OpenAI SDK, you can continue to use `arize-phoenix==0.1.1`. However, we recommend upgrading to the latest version of the OpenAI SDK as it contains many improvements. If you are using Phoenix with LlamaIndex and and LangChain, you will have to upgrade to the versions of these packages that support the OpenAI `1.0.0` SDK as well (`llama-index>=0.8.64`, `langchain>=0.0.334`)
|
|
371
|
+
|
|
367
372
|
## Community
|
|
368
373
|
|
|
369
374
|
Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
|
|
@@ -43,12 +43,11 @@ dynamic = ["version"]
|
|
|
43
43
|
|
|
44
44
|
[project.optional-dependencies]
|
|
45
45
|
dev = [
|
|
46
|
-
"black[jupyter]",
|
|
47
46
|
"gcsfs",
|
|
48
47
|
"hatch",
|
|
49
48
|
"jupyter",
|
|
50
49
|
"nbqa",
|
|
51
|
-
"ruff==0.
|
|
50
|
+
"ruff==0.1.5",
|
|
52
51
|
"pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
|
|
53
52
|
"pytest",
|
|
54
53
|
"pytest-cov",
|
|
@@ -56,12 +55,15 @@ dev = [
|
|
|
56
55
|
"strawberry-graphql[debug-server]==0.208.2",
|
|
57
56
|
"pre-commit",
|
|
58
57
|
"arize[AutoEmbeddings, LLM_Evaluation]",
|
|
59
|
-
"llama-index>=0.
|
|
60
|
-
"langchain>=0.0.
|
|
58
|
+
"llama-index>=0.9.0",
|
|
59
|
+
"langchain>=0.0.334",
|
|
61
60
|
]
|
|
62
61
|
experimental = [
|
|
63
62
|
"tenacity",
|
|
64
63
|
]
|
|
64
|
+
llama-index = [
|
|
65
|
+
"llama-index~=0.9.0",
|
|
66
|
+
]
|
|
65
67
|
|
|
66
68
|
[project.urls]
|
|
67
69
|
Documentation = "https://docs.arize.com/phoenix/"
|
|
@@ -82,16 +84,6 @@ artifacts = ["src/phoenix/server/static"]
|
|
|
82
84
|
[tool.hatch.build]
|
|
83
85
|
only-packages = true
|
|
84
86
|
|
|
85
|
-
[tool.hatch.build.hooks.jupyter-builder]
|
|
86
|
-
dependencies = ["hatch-jupyter-builder"]
|
|
87
|
-
build-function = "hatch_jupyter_builder.npm_builder"
|
|
88
|
-
ensured-targets = ["src/phoenix/server/static/index.js"]
|
|
89
|
-
skip-if-exists = ["src/phoenix/server/static/index.js"]
|
|
90
|
-
|
|
91
|
-
[tool.hatch.build.hooks.jupyter-builder.build-kwargs]
|
|
92
|
-
path = "app"
|
|
93
|
-
source_dir = "app"
|
|
94
|
-
|
|
95
87
|
[tool.hatch.build.targets.sdist]
|
|
96
88
|
artifacts = ["src/phoenix/server/static"]
|
|
97
89
|
|
|
@@ -102,9 +94,9 @@ dependencies = [
|
|
|
102
94
|
"pytest-cov",
|
|
103
95
|
"pytest-lazy-fixture",
|
|
104
96
|
"arize",
|
|
105
|
-
"langchain>=0.0.
|
|
106
|
-
"llama-index>=0.
|
|
107
|
-
"openai",
|
|
97
|
+
"langchain>=0.0.334",
|
|
98
|
+
"llama-index>=0.9.0",
|
|
99
|
+
"openai>=1.0.0",
|
|
108
100
|
"tenacity",
|
|
109
101
|
"nltk==3.8.1",
|
|
110
102
|
"sentence-transformers==2.2.2",
|
|
@@ -114,25 +106,26 @@ dependencies = [
|
|
|
114
106
|
"responses",
|
|
115
107
|
"tiktoken",
|
|
116
108
|
"typing-extensions<4.6.0", # for Colab
|
|
109
|
+
"httpx", # For OpenAI testing
|
|
110
|
+
"respx", # For OpenAI testing
|
|
117
111
|
]
|
|
118
112
|
|
|
119
113
|
[tool.hatch.envs.type]
|
|
120
114
|
dependencies = [
|
|
121
115
|
"mypy==1.5.1",
|
|
122
|
-
"llama-index>=0.
|
|
116
|
+
"llama-index>=0.9.0",
|
|
123
117
|
"pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
|
|
124
118
|
"types-psutil",
|
|
125
119
|
"types-tqdm",
|
|
126
120
|
"types-requests",
|
|
127
121
|
"types-protobuf",
|
|
122
|
+
"openai>=1.0.0",
|
|
128
123
|
]
|
|
129
124
|
|
|
130
125
|
[tool.hatch.envs.style]
|
|
131
126
|
detached = true
|
|
132
127
|
dependencies = [
|
|
133
|
-
"
|
|
134
|
-
"black[jupyter]~=23.3.0",
|
|
135
|
-
"ruff~=0.0.290",
|
|
128
|
+
"ruff~=0.1.5",
|
|
136
129
|
]
|
|
137
130
|
|
|
138
131
|
[tool.hatch.envs.notebooks]
|
|
@@ -188,11 +181,11 @@ check = [
|
|
|
188
181
|
|
|
189
182
|
[tool.hatch.envs.style.scripts]
|
|
190
183
|
check = [
|
|
191
|
-
"black --check --diff --color .",
|
|
192
184
|
"ruff .",
|
|
185
|
+
"ruff format --check --diff .",
|
|
193
186
|
]
|
|
194
187
|
fix = [
|
|
195
|
-
"
|
|
188
|
+
"ruff format .",
|
|
196
189
|
"ruff --fix .",
|
|
197
190
|
]
|
|
198
191
|
|
|
@@ -217,10 +210,6 @@ pypi = [
|
|
|
217
210
|
"twine upload --verbose dist/*",
|
|
218
211
|
]
|
|
219
212
|
|
|
220
|
-
[tool.black]
|
|
221
|
-
line-length = 100
|
|
222
|
-
exclude = '_pb2\.pyi?$'
|
|
223
|
-
|
|
224
213
|
[tool.hatch.envs.docs.scripts]
|
|
225
214
|
check = [
|
|
226
215
|
"interrogate -vv src/",
|
|
@@ -288,11 +277,15 @@ module = [
|
|
|
288
277
|
ignore_missing_imports = true
|
|
289
278
|
|
|
290
279
|
[tool.ruff]
|
|
291
|
-
exclude = [".git", "__pycache__", "docs/source/conf.py", "*_pb2.py*"]
|
|
280
|
+
exclude = [".git", "__pycache__", "docs/source/conf.py", "*_pb2.py*", "*.pyi"]
|
|
281
|
+
extend-include = ["*.ipynb"]
|
|
292
282
|
ignore-init-module-imports = true
|
|
293
283
|
line-length = 100
|
|
294
284
|
select = ["E", "F", "W", "I"]
|
|
295
285
|
target-version = "py38"
|
|
296
286
|
|
|
287
|
+
[tool.ruff.lint.per-file-ignores]
|
|
288
|
+
"*.ipynb" = ["E402", "E501"]
|
|
289
|
+
|
|
297
290
|
[tool.ruff.isort]
|
|
298
291
|
force-single-line = false
|
|
@@ -5,7 +5,7 @@ from .session.session import Session, active_session, close_app, launch_app
|
|
|
5
5
|
from .trace.fixtures import load_example_traces
|
|
6
6
|
from .trace.trace_dataset import TraceDataset
|
|
7
7
|
|
|
8
|
-
__version__ = "
|
|
8
|
+
__version__ = "1.1.1"
|
|
9
9
|
|
|
10
10
|
# module level doc-string
|
|
11
11
|
__doc__ = """
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
from .functions import llm_classify,
|
|
1
|
+
from .functions import llm_classify, llm_generate, run_relevance_eval
|
|
2
2
|
from .models import OpenAIModel, VertexAIModel
|
|
3
3
|
from .retrievals import compute_precisions_at_k
|
|
4
4
|
from .templates import (
|
|
5
5
|
CODE_READABILITY_PROMPT_RAILS_MAP,
|
|
6
|
-
|
|
6
|
+
CODE_READABILITY_PROMPT_TEMPLATE,
|
|
7
7
|
HALLUCINATION_PROMPT_RAILS_MAP,
|
|
8
|
-
|
|
8
|
+
HALLUCINATION_PROMPT_TEMPLATE,
|
|
9
9
|
NOT_PARSABLE,
|
|
10
10
|
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
11
|
-
|
|
11
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE,
|
|
12
12
|
TOXICITY_PROMPT_RAILS_MAP,
|
|
13
|
-
|
|
13
|
+
TOXICITY_PROMPT_TEMPLATE,
|
|
14
|
+
ClassificationTemplate,
|
|
14
15
|
PromptTemplate,
|
|
15
16
|
)
|
|
16
17
|
from .utils.downloads import download_benchmark_dataset
|
|
@@ -19,19 +20,19 @@ __all__ = [
|
|
|
19
20
|
"compute_precisions_at_k",
|
|
20
21
|
"download_benchmark_dataset",
|
|
21
22
|
"llm_classify",
|
|
22
|
-
"llm_eval_binary",
|
|
23
23
|
"llm_generate",
|
|
24
24
|
"OpenAIModel",
|
|
25
25
|
"VertexAIModel",
|
|
26
26
|
"PromptTemplate",
|
|
27
|
+
"ClassificationTemplate",
|
|
27
28
|
"CODE_READABILITY_PROMPT_RAILS_MAP",
|
|
28
|
-
"
|
|
29
|
+
"CODE_READABILITY_PROMPT_TEMPLATE",
|
|
29
30
|
"HALLUCINATION_PROMPT_RAILS_MAP",
|
|
30
|
-
"
|
|
31
|
+
"HALLUCINATION_PROMPT_TEMPLATE",
|
|
31
32
|
"RAG_RELEVANCY_PROMPT_RAILS_MAP",
|
|
32
|
-
"
|
|
33
|
-
"TOXICITY_PROMPT_TEMPLATE_STR",
|
|
33
|
+
"RAG_RELEVANCY_PROMPT_TEMPLATE",
|
|
34
34
|
"TOXICITY_PROMPT_RAILS_MAP",
|
|
35
|
+
"TOXICITY_PROMPT_TEMPLATE",
|
|
35
36
|
"NOT_PARSABLE",
|
|
36
37
|
"run_relevance_eval",
|
|
37
38
|
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from phoenix.experimental.evals import PromptTemplate
|
|
4
|
+
from phoenix.experimental.evals.models import BaseEvalModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MapReducer:
|
|
8
|
+
"""
|
|
9
|
+
Evaluates data that is too large to fit into a single context window using a
|
|
10
|
+
map-reduce strategy. The data must first be divided into "chunks" that
|
|
11
|
+
individually fit into an LLM's context window. Each chunk of data is
|
|
12
|
+
individually evaluated (the "map" step), producing intermediate outputs that
|
|
13
|
+
are combined into a single result (the "reduce" step).
|
|
14
|
+
|
|
15
|
+
This is the simplest strategy for evaluating long-context data.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
model: BaseEvalModel,
|
|
21
|
+
map_prompt_template: PromptTemplate,
|
|
22
|
+
reduce_prompt_template: PromptTemplate,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initializes an instance.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
model (BaseEvalModel): The LLM model to use for evaluation.
|
|
28
|
+
|
|
29
|
+
map_prompt_template (PromptTemplate): The template that is mapped
|
|
30
|
+
over each chunk to produce intermediate outputs. Must contain the
|
|
31
|
+
{chunk} placeholder.
|
|
32
|
+
|
|
33
|
+
reduce_prompt_template (PromptTemplate): The template that combines
|
|
34
|
+
the intermediate outputs into a single result. Must contain the
|
|
35
|
+
{mapped} placeholder, which will be formatted as a list of the
|
|
36
|
+
intermediate outputs produced by the map step.
|
|
37
|
+
"""
|
|
38
|
+
self._model = model
|
|
39
|
+
self._map_prompt_template = map_prompt_template
|
|
40
|
+
self._reduce_prompt_template = reduce_prompt_template
|
|
41
|
+
|
|
42
|
+
def evaluate(self, chunks: List[str]) -> str:
|
|
43
|
+
"""Evaluates a list of two or more chunks.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
chunks (List[str]): A list of chunks to be evaluated. Each chunk is
|
|
47
|
+
inserted into the map_prompt_template and must therefore fit within
|
|
48
|
+
the LLM's context window and still leave room for the rest of the
|
|
49
|
+
prompt.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
str: The output of the map-reduce process.
|
|
53
|
+
"""
|
|
54
|
+
if len(chunks) < 2:
|
|
55
|
+
raise ValueError(
|
|
56
|
+
"The map-reduce strategy is not needed to evaluate data "
|
|
57
|
+
"that fits within a single context window. "
|
|
58
|
+
"Consider using llm_classify instead."
|
|
59
|
+
)
|
|
60
|
+
model = self._model
|
|
61
|
+
mapped_records = []
|
|
62
|
+
for chunk in chunks:
|
|
63
|
+
map_prompt = self._map_prompt_template.format({"chunk": chunk})
|
|
64
|
+
intermediate_output = model(map_prompt)
|
|
65
|
+
mapped_records.append(intermediate_output)
|
|
66
|
+
reduce_prompt = self._reduce_prompt_template.format({"mapped": repr(mapped_records)})
|
|
67
|
+
return model(reduce_prompt)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Refiner:
|
|
71
|
+
"""
|
|
72
|
+
Evaluates data that is too large to fit into a single context window using a
|
|
73
|
+
refine strategy. The data must first be divided into "chunks" that
|
|
74
|
+
individually fit into an LLM's context window. An initial "accumulator" is
|
|
75
|
+
generated from the first chunk of data. The accumulator is subsequently
|
|
76
|
+
refined by iteratively updating and incorporating new information from each
|
|
77
|
+
subsequent chunk. An optional synthesis step can be used to synthesize the
|
|
78
|
+
final accumulator into a desired format.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
model: BaseEvalModel,
|
|
84
|
+
initial_prompt_template: PromptTemplate,
|
|
85
|
+
refine_prompt_template: PromptTemplate,
|
|
86
|
+
synthesize_prompt_template: Optional[PromptTemplate] = None,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Initializes an instance.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
model (BaseEvalModel): The LLM model to use for evaluation.
|
|
92
|
+
|
|
93
|
+
initial_prompt_template (PromptTemplate): The template for the
|
|
94
|
+
initial invocation of the model that will generate the initial
|
|
95
|
+
accumulator. Should contain the {chunk} placeholder.
|
|
96
|
+
|
|
97
|
+
refine_prompt_template (PromptTemplate): The template for refining
|
|
98
|
+
the accumulator across all subsequence chunks. Must contain the
|
|
99
|
+
{chunk} and {accumulator} placeholders.
|
|
100
|
+
|
|
101
|
+
synthesize_prompt_template (Optional[PromptTemplate], optional): An
|
|
102
|
+
optional template to synthesize the final version of the
|
|
103
|
+
accumulator. Must contain the {accumulator} placeholder.
|
|
104
|
+
"""
|
|
105
|
+
self._model = model
|
|
106
|
+
self._initial_prompt_template = initial_prompt_template
|
|
107
|
+
self._refine_prompt_template = refine_prompt_template
|
|
108
|
+
self._synthesize_prompt_template = synthesize_prompt_template
|
|
109
|
+
|
|
110
|
+
def evaluate(self, chunks: List[str]) -> str:
|
|
111
|
+
"""Evaluates a list of two or more chunks.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
chunks (List[str]): A list of chunks to be evaluated. Each chunk is
|
|
115
|
+
inserted into the initial_prompt_template and refine_prompt_template
|
|
116
|
+
and must therefore fit within the LLM's context window and still
|
|
117
|
+
leave room for the rest of the prompt.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
str: The output of the refine process.
|
|
121
|
+
"""
|
|
122
|
+
if len(chunks) < 2:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
"The refine strategy is not needed to evaluate data "
|
|
125
|
+
"that fits within a single context window. "
|
|
126
|
+
"Consider using llm_classify instead."
|
|
127
|
+
)
|
|
128
|
+
model = self._model
|
|
129
|
+
initial_prompt = self._initial_prompt_template.format({"chunk": chunks[0]})
|
|
130
|
+
accumulator = model(initial_prompt)
|
|
131
|
+
for chunk in chunks[1:]:
|
|
132
|
+
refine_prompt = self._refine_prompt_template.format(
|
|
133
|
+
{"accumulator": accumulator, "chunk": chunk}
|
|
134
|
+
)
|
|
135
|
+
accumulator = model(refine_prompt)
|
|
136
|
+
if not self._synthesize_prompt_template:
|
|
137
|
+
return accumulator
|
|
138
|
+
reduce_prompt = self._synthesize_prompt_template.format({"accumulator": accumulator})
|
|
139
|
+
return model(reduce_prompt)
|