arize-phoenix 0.0.50rc1__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/PKG-INFO +13 -7
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/README.md +7 -2
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/pyproject.toml +21 -18
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/__init__.py +1 -1
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/__init__.py +11 -10
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/evaluators.py +139 -0
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/functions/__init__.py +4 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/classify.py +125 -76
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/generate.py +32 -9
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/models/__init__.py +6 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/base.py +10 -8
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/openai.py +144 -77
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/vertexai.py +1 -1
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/retrievals.py +6 -3
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/__init__.py +38 -0
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/default_templates.py +343 -0
- arize_phoenix-1.1.1/src/phoenix/experimental/evals/templates/template.py +177 -0
- arize_phoenix-1.1.1/src/phoenix/server/static/index.js +6845 -0
- arize_phoenix-1.1.1/src/phoenix/trace/evaluation_conventions.py +26 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/instrumentor.py +1 -1
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/tracer.py +39 -32
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/callback.py +160 -50
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/openai/instrumentor.py +49 -40
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/semantic_conventions.py +2 -35
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/utils.py +13 -1
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/__init__.py +14 -8
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/utilities/logging.py +3 -1
- arize_phoenix-0.0.50rc1/src/phoenix/experimental/evals/functions/__init__.py +0 -4
- arize_phoenix-0.0.50rc1/src/phoenix/experimental/evals/models/__init__.py +0 -5
- arize_phoenix-0.0.50rc1/src/phoenix/experimental/evals/templates/__init__.py +0 -26
- arize_phoenix-0.0.50rc1/src/phoenix/experimental/evals/templates/default_templates.py +0 -128
- arize_phoenix-0.0.50rc1/src/phoenix/experimental/evals/templates/template.py +0 -138
- arize_phoenix-0.0.50rc1/src/phoenix/server/static/index.js +0 -6829
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/.gitignore +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/IP_NOTICE +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/LICENSE +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/config.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/core/model_schema.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/core/traces.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datasets/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datasets/dataset.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datasets/errors.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datasets/fixtures.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datasets/schema.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datasets/validation.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/datetime_utils.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/functions/processing.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/models/bedrock.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/downloads.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/threads.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils/types.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/experimental/evals/utils.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/metrics.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/timeseries.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/metrics/wrappers.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/pointcloud/umap_parameters.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/py.typed +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/context.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/helpers.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/input_types/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/schema.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Cluster.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetRole.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DatasetValues.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Dimension.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionShape.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionType.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Functionality.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/MimeType.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/NumericRange.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Retrieval.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Segments.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/SortDir.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/Span.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/TimeSeries.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/ValidationResult.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/app.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/main.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/span_handler.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/templates/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/templates/index.html +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/services.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/session/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/session/session.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/exporter.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/filter.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/fixtures.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/langchain/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/openai/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/schemas.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/span_json_decoder.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/span_json_encoder.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/trace_dataset.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/tracer.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/trace_pb2.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/trace/v1/trace_pb2.pyi +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/utilities/__init__.py +0 -0
- {arize_phoenix-0.0.50rc1 → arize_phoenix-1.1.1}/src/phoenix/utilities/error_handling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: ML Observability in your notebook
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -35,22 +35,23 @@ Requires-Dist: uvicorn
|
|
|
35
35
|
Requires-Dist: wrapt
|
|
36
36
|
Provides-Extra: dev
|
|
37
37
|
Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
|
|
38
|
-
Requires-Dist: black[jupyter]; extra == 'dev'
|
|
39
38
|
Requires-Dist: gcsfs; extra == 'dev'
|
|
40
39
|
Requires-Dist: hatch; extra == 'dev'
|
|
41
40
|
Requires-Dist: jupyter; extra == 'dev'
|
|
42
|
-
Requires-Dist: langchain>=0.0.
|
|
43
|
-
Requires-Dist: llama-index>=0.
|
|
41
|
+
Requires-Dist: langchain>=0.0.334; extra == 'dev'
|
|
42
|
+
Requires-Dist: llama-index>=0.9.0; extra == 'dev'
|
|
44
43
|
Requires-Dist: nbqa; extra == 'dev'
|
|
45
44
|
Requires-Dist: pandas-stubs<=2.0.2.230605; extra == 'dev'
|
|
46
45
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
47
46
|
Requires-Dist: pytest; extra == 'dev'
|
|
48
47
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
49
48
|
Requires-Dist: pytest-lazy-fixture; extra == 'dev'
|
|
50
|
-
Requires-Dist: ruff==0.
|
|
49
|
+
Requires-Dist: ruff==0.1.5; extra == 'dev'
|
|
51
50
|
Requires-Dist: strawberry-graphql[debug-server]==0.208.2; extra == 'dev'
|
|
52
51
|
Provides-Extra: experimental
|
|
53
52
|
Requires-Dist: tenacity; extra == 'experimental'
|
|
53
|
+
Provides-Extra: llama-index
|
|
54
|
+
Requires-Dist: llama-index~=0.9.0; extra == 'llama-index'
|
|
54
55
|
Description-Content-Type: text/markdown
|
|
55
56
|
|
|
56
57
|
<p align="center">
|
|
@@ -102,6 +103,7 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
|
|
|
102
103
|
- [Exportable Clusters](#exportable-clusters)
|
|
103
104
|
- [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
|
|
104
105
|
- [Structured Data Analysis](#structured-data-analysis)
|
|
106
|
+
- [Breaking Changes](#breaking-changes)
|
|
105
107
|
- [Community](#community)
|
|
106
108
|
- [Thanks](#thanks)
|
|
107
109
|
- [Copyright, Patent, and License](#copyright-patent-and-license)
|
|
@@ -267,7 +269,7 @@ pip install arize-phoenix[experimental] ipython matplotlib openai pycm scikit-le
|
|
|
267
269
|
|
|
268
270
|
```python
|
|
269
271
|
from phoenix.experimental.evals import (
|
|
270
|
-
|
|
272
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE,
|
|
271
273
|
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
272
274
|
OpenAIModel,
|
|
273
275
|
download_benchmark_dataset,
|
|
@@ -292,7 +294,7 @@ model = OpenAIModel(
|
|
|
292
294
|
temperature=0.0,
|
|
293
295
|
)
|
|
294
296
|
rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
|
|
295
|
-
df["eval_relevance"] = llm_classify(df, model,
|
|
297
|
+
df[["eval_relevance"]] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE, rails)
|
|
296
298
|
#Golden dataset has True/False map to -> "irrelevant" / "relevant"
|
|
297
299
|
#we can then scikit compare to output of template - same format
|
|
298
300
|
y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
|
|
@@ -419,6 +421,10 @@ train_ds = px.Dataset(dataframe=train_df, schema=schema, name="training")
|
|
|
419
421
|
session = px.launch_app(primary=prod_ds, reference=train_ds)
|
|
420
422
|
```
|
|
421
423
|
|
|
424
|
+
## Breaking Changes
|
|
425
|
+
|
|
426
|
+
- **v1.0.0** - Phoenix now exclusively supports the `openai>=1.0.0` sdk. If you are using an older version of the OpenAI SDK, you can continue to use `arize-phoenix==0.1.1`. However, we recommend upgrading to the latest version of the OpenAI SDK as it contains many improvements. If you are using Phoenix with LlamaIndex and and LangChain, you will have to upgrade to the versions of these packages that support the OpenAI `1.0.0` SDK as well (`llama-index>=0.8.64`, `langchain>=0.0.334`)
|
|
427
|
+
|
|
422
428
|
## Community
|
|
423
429
|
|
|
424
430
|
Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
|
|
@@ -47,6 +47,7 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
|
|
|
47
47
|
- [Exportable Clusters](#exportable-clusters)
|
|
48
48
|
- [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
|
|
49
49
|
- [Structured Data Analysis](#structured-data-analysis)
|
|
50
|
+
- [Breaking Changes](#breaking-changes)
|
|
50
51
|
- [Community](#community)
|
|
51
52
|
- [Thanks](#thanks)
|
|
52
53
|
- [Copyright, Patent, and License](#copyright-patent-and-license)
|
|
@@ -212,7 +213,7 @@ pip install arize-phoenix[experimental] ipython matplotlib openai pycm scikit-le
|
|
|
212
213
|
|
|
213
214
|
```python
|
|
214
215
|
from phoenix.experimental.evals import (
|
|
215
|
-
|
|
216
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE,
|
|
216
217
|
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
217
218
|
OpenAIModel,
|
|
218
219
|
download_benchmark_dataset,
|
|
@@ -237,7 +238,7 @@ model = OpenAIModel(
|
|
|
237
238
|
temperature=0.0,
|
|
238
239
|
)
|
|
239
240
|
rails =list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
|
|
240
|
-
df["eval_relevance"] = llm_classify(df, model,
|
|
241
|
+
df[["eval_relevance"]] = llm_classify(df, model, RAG_RELEVANCY_PROMPT_TEMPLATE, rails)
|
|
241
242
|
#Golden dataset has True/False map to -> "irrelevant" / "relevant"
|
|
242
243
|
#we can then scikit compare to output of template - same format
|
|
243
244
|
y_true = df["relevant"].map({True: "relevant", False: "irrelevant"})
|
|
@@ -364,6 +365,10 @@ train_ds = px.Dataset(dataframe=train_df, schema=schema, name="training")
|
|
|
364
365
|
session = px.launch_app(primary=prod_ds, reference=train_ds)
|
|
365
366
|
```
|
|
366
367
|
|
|
368
|
+
## Breaking Changes
|
|
369
|
+
|
|
370
|
+
- **v1.0.0** - Phoenix now exclusively supports the `openai>=1.0.0` sdk. If you are using an older version of the OpenAI SDK, you can continue to use `arize-phoenix==0.1.1`. However, we recommend upgrading to the latest version of the OpenAI SDK as it contains many improvements. If you are using Phoenix with LlamaIndex and and LangChain, you will have to upgrade to the versions of these packages that support the OpenAI `1.0.0` SDK as well (`llama-index>=0.8.64`, `langchain>=0.0.334`)
|
|
371
|
+
|
|
367
372
|
## Community
|
|
368
373
|
|
|
369
374
|
Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
|
|
@@ -43,12 +43,11 @@ dynamic = ["version"]
|
|
|
43
43
|
|
|
44
44
|
[project.optional-dependencies]
|
|
45
45
|
dev = [
|
|
46
|
-
"black[jupyter]",
|
|
47
46
|
"gcsfs",
|
|
48
47
|
"hatch",
|
|
49
48
|
"jupyter",
|
|
50
49
|
"nbqa",
|
|
51
|
-
"ruff==0.
|
|
50
|
+
"ruff==0.1.5",
|
|
52
51
|
"pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
|
|
53
52
|
"pytest",
|
|
54
53
|
"pytest-cov",
|
|
@@ -56,12 +55,15 @@ dev = [
|
|
|
56
55
|
"strawberry-graphql[debug-server]==0.208.2",
|
|
57
56
|
"pre-commit",
|
|
58
57
|
"arize[AutoEmbeddings, LLM_Evaluation]",
|
|
59
|
-
"llama-index>=0.
|
|
60
|
-
"langchain>=0.0.
|
|
58
|
+
"llama-index>=0.9.0",
|
|
59
|
+
"langchain>=0.0.334",
|
|
61
60
|
]
|
|
62
61
|
experimental = [
|
|
63
62
|
"tenacity",
|
|
64
63
|
]
|
|
64
|
+
llama-index = [
|
|
65
|
+
"llama-index~=0.9.0",
|
|
66
|
+
]
|
|
65
67
|
|
|
66
68
|
[project.urls]
|
|
67
69
|
Documentation = "https://docs.arize.com/phoenix/"
|
|
@@ -92,9 +94,9 @@ dependencies = [
|
|
|
92
94
|
"pytest-cov",
|
|
93
95
|
"pytest-lazy-fixture",
|
|
94
96
|
"arize",
|
|
95
|
-
"langchain>=0.0.
|
|
96
|
-
"llama-index>=0.
|
|
97
|
-
"openai",
|
|
97
|
+
"langchain>=0.0.334",
|
|
98
|
+
"llama-index>=0.9.0",
|
|
99
|
+
"openai>=1.0.0",
|
|
98
100
|
"tenacity",
|
|
99
101
|
"nltk==3.8.1",
|
|
100
102
|
"sentence-transformers==2.2.2",
|
|
@@ -104,25 +106,26 @@ dependencies = [
|
|
|
104
106
|
"responses",
|
|
105
107
|
"tiktoken",
|
|
106
108
|
"typing-extensions<4.6.0", # for Colab
|
|
109
|
+
"httpx", # For OpenAI testing
|
|
110
|
+
"respx", # For OpenAI testing
|
|
107
111
|
]
|
|
108
112
|
|
|
109
113
|
[tool.hatch.envs.type]
|
|
110
114
|
dependencies = [
|
|
111
115
|
"mypy==1.5.1",
|
|
112
|
-
"llama-index>=0.
|
|
116
|
+
"llama-index>=0.9.0",
|
|
113
117
|
"pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
|
|
114
118
|
"types-psutil",
|
|
115
119
|
"types-tqdm",
|
|
116
120
|
"types-requests",
|
|
117
121
|
"types-protobuf",
|
|
122
|
+
"openai>=1.0.0",
|
|
118
123
|
]
|
|
119
124
|
|
|
120
125
|
[tool.hatch.envs.style]
|
|
121
126
|
detached = true
|
|
122
127
|
dependencies = [
|
|
123
|
-
"
|
|
124
|
-
"black[jupyter]~=23.3.0",
|
|
125
|
-
"ruff~=0.0.290",
|
|
128
|
+
"ruff~=0.1.5",
|
|
126
129
|
]
|
|
127
130
|
|
|
128
131
|
[tool.hatch.envs.notebooks]
|
|
@@ -178,11 +181,11 @@ check = [
|
|
|
178
181
|
|
|
179
182
|
[tool.hatch.envs.style.scripts]
|
|
180
183
|
check = [
|
|
181
|
-
"black --check --diff --color .",
|
|
182
184
|
"ruff .",
|
|
185
|
+
"ruff format --check --diff .",
|
|
183
186
|
]
|
|
184
187
|
fix = [
|
|
185
|
-
"
|
|
188
|
+
"ruff format .",
|
|
186
189
|
"ruff --fix .",
|
|
187
190
|
]
|
|
188
191
|
|
|
@@ -207,10 +210,6 @@ pypi = [
|
|
|
207
210
|
"twine upload --verbose dist/*",
|
|
208
211
|
]
|
|
209
212
|
|
|
210
|
-
[tool.black]
|
|
211
|
-
line-length = 100
|
|
212
|
-
exclude = '_pb2\.pyi?$'
|
|
213
|
-
|
|
214
213
|
[tool.hatch.envs.docs.scripts]
|
|
215
214
|
check = [
|
|
216
215
|
"interrogate -vv src/",
|
|
@@ -278,11 +277,15 @@ module = [
|
|
|
278
277
|
ignore_missing_imports = true
|
|
279
278
|
|
|
280
279
|
[tool.ruff]
|
|
281
|
-
exclude = [".git", "__pycache__", "docs/source/conf.py", "*_pb2.py*"]
|
|
280
|
+
exclude = [".git", "__pycache__", "docs/source/conf.py", "*_pb2.py*", "*.pyi"]
|
|
281
|
+
extend-include = ["*.ipynb"]
|
|
282
282
|
ignore-init-module-imports = true
|
|
283
283
|
line-length = 100
|
|
284
284
|
select = ["E", "F", "W", "I"]
|
|
285
285
|
target-version = "py38"
|
|
286
286
|
|
|
287
|
+
[tool.ruff.lint.per-file-ignores]
|
|
288
|
+
"*.ipynb" = ["E402", "E501"]
|
|
289
|
+
|
|
287
290
|
[tool.ruff.isort]
|
|
288
291
|
force-single-line = false
|
|
@@ -5,7 +5,7 @@ from .session.session import Session, active_session, close_app, launch_app
|
|
|
5
5
|
from .trace.fixtures import load_example_traces
|
|
6
6
|
from .trace.trace_dataset import TraceDataset
|
|
7
7
|
|
|
8
|
-
__version__ = "
|
|
8
|
+
__version__ = "1.1.1"
|
|
9
9
|
|
|
10
10
|
# module level doc-string
|
|
11
11
|
__doc__ = """
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
from .functions import llm_classify,
|
|
1
|
+
from .functions import llm_classify, llm_generate, run_relevance_eval
|
|
2
2
|
from .models import OpenAIModel, VertexAIModel
|
|
3
3
|
from .retrievals import compute_precisions_at_k
|
|
4
4
|
from .templates import (
|
|
5
5
|
CODE_READABILITY_PROMPT_RAILS_MAP,
|
|
6
|
-
|
|
6
|
+
CODE_READABILITY_PROMPT_TEMPLATE,
|
|
7
7
|
HALLUCINATION_PROMPT_RAILS_MAP,
|
|
8
|
-
|
|
8
|
+
HALLUCINATION_PROMPT_TEMPLATE,
|
|
9
9
|
NOT_PARSABLE,
|
|
10
10
|
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
11
|
-
|
|
11
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE,
|
|
12
12
|
TOXICITY_PROMPT_RAILS_MAP,
|
|
13
|
-
|
|
13
|
+
TOXICITY_PROMPT_TEMPLATE,
|
|
14
|
+
ClassificationTemplate,
|
|
14
15
|
PromptTemplate,
|
|
15
16
|
)
|
|
16
17
|
from .utils.downloads import download_benchmark_dataset
|
|
@@ -19,19 +20,19 @@ __all__ = [
|
|
|
19
20
|
"compute_precisions_at_k",
|
|
20
21
|
"download_benchmark_dataset",
|
|
21
22
|
"llm_classify",
|
|
22
|
-
"llm_eval_binary",
|
|
23
23
|
"llm_generate",
|
|
24
24
|
"OpenAIModel",
|
|
25
25
|
"VertexAIModel",
|
|
26
26
|
"PromptTemplate",
|
|
27
|
+
"ClassificationTemplate",
|
|
27
28
|
"CODE_READABILITY_PROMPT_RAILS_MAP",
|
|
28
|
-
"
|
|
29
|
+
"CODE_READABILITY_PROMPT_TEMPLATE",
|
|
29
30
|
"HALLUCINATION_PROMPT_RAILS_MAP",
|
|
30
|
-
"
|
|
31
|
+
"HALLUCINATION_PROMPT_TEMPLATE",
|
|
31
32
|
"RAG_RELEVANCY_PROMPT_RAILS_MAP",
|
|
32
|
-
"
|
|
33
|
-
"TOXICITY_PROMPT_TEMPLATE_STR",
|
|
33
|
+
"RAG_RELEVANCY_PROMPT_TEMPLATE",
|
|
34
34
|
"TOXICITY_PROMPT_RAILS_MAP",
|
|
35
|
+
"TOXICITY_PROMPT_TEMPLATE",
|
|
35
36
|
"NOT_PARSABLE",
|
|
36
37
|
"run_relevance_eval",
|
|
37
38
|
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from phoenix.experimental.evals import PromptTemplate
|
|
4
|
+
from phoenix.experimental.evals.models import BaseEvalModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MapReducer:
|
|
8
|
+
"""
|
|
9
|
+
Evaluates data that is too large to fit into a single context window using a
|
|
10
|
+
map-reduce strategy. The data must first be divided into "chunks" that
|
|
11
|
+
individually fit into an LLM's context window. Each chunk of data is
|
|
12
|
+
individually evaluated (the "map" step), producing intermediate outputs that
|
|
13
|
+
are combined into a single result (the "reduce" step).
|
|
14
|
+
|
|
15
|
+
This is the simplest strategy for evaluating long-context data.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
model: BaseEvalModel,
|
|
21
|
+
map_prompt_template: PromptTemplate,
|
|
22
|
+
reduce_prompt_template: PromptTemplate,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initializes an instance.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
model (BaseEvalModel): The LLM model to use for evaluation.
|
|
28
|
+
|
|
29
|
+
map_prompt_template (PromptTemplate): The template that is mapped
|
|
30
|
+
over each chunk to produce intermediate outputs. Must contain the
|
|
31
|
+
{chunk} placeholder.
|
|
32
|
+
|
|
33
|
+
reduce_prompt_template (PromptTemplate): The template that combines
|
|
34
|
+
the intermediate outputs into a single result. Must contain the
|
|
35
|
+
{mapped} placeholder, which will be formatted as a list of the
|
|
36
|
+
intermediate outputs produced by the map step.
|
|
37
|
+
"""
|
|
38
|
+
self._model = model
|
|
39
|
+
self._map_prompt_template = map_prompt_template
|
|
40
|
+
self._reduce_prompt_template = reduce_prompt_template
|
|
41
|
+
|
|
42
|
+
def evaluate(self, chunks: List[str]) -> str:
|
|
43
|
+
"""Evaluates a list of two or more chunks.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
chunks (List[str]): A list of chunks to be evaluated. Each chunk is
|
|
47
|
+
inserted into the map_prompt_template and must therefore fit within
|
|
48
|
+
the LLM's context window and still leave room for the rest of the
|
|
49
|
+
prompt.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
str: The output of the map-reduce process.
|
|
53
|
+
"""
|
|
54
|
+
if len(chunks) < 2:
|
|
55
|
+
raise ValueError(
|
|
56
|
+
"The map-reduce strategy is not needed to evaluate data "
|
|
57
|
+
"that fits within a single context window. "
|
|
58
|
+
"Consider using llm_classify instead."
|
|
59
|
+
)
|
|
60
|
+
model = self._model
|
|
61
|
+
mapped_records = []
|
|
62
|
+
for chunk in chunks:
|
|
63
|
+
map_prompt = self._map_prompt_template.format({"chunk": chunk})
|
|
64
|
+
intermediate_output = model(map_prompt)
|
|
65
|
+
mapped_records.append(intermediate_output)
|
|
66
|
+
reduce_prompt = self._reduce_prompt_template.format({"mapped": repr(mapped_records)})
|
|
67
|
+
return model(reduce_prompt)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Refiner:
|
|
71
|
+
"""
|
|
72
|
+
Evaluates data that is too large to fit into a single context window using a
|
|
73
|
+
refine strategy. The data must first be divided into "chunks" that
|
|
74
|
+
individually fit into an LLM's context window. An initial "accumulator" is
|
|
75
|
+
generated from the first chunk of data. The accumulator is subsequently
|
|
76
|
+
refined by iteratively updating and incorporating new information from each
|
|
77
|
+
subsequent chunk. An optional synthesis step can be used to synthesize the
|
|
78
|
+
final accumulator into a desired format.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
model: BaseEvalModel,
|
|
84
|
+
initial_prompt_template: PromptTemplate,
|
|
85
|
+
refine_prompt_template: PromptTemplate,
|
|
86
|
+
synthesize_prompt_template: Optional[PromptTemplate] = None,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Initializes an instance.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
model (BaseEvalModel): The LLM model to use for evaluation.
|
|
92
|
+
|
|
93
|
+
initial_prompt_template (PromptTemplate): The template for the
|
|
94
|
+
initial invocation of the model that will generate the initial
|
|
95
|
+
accumulator. Should contain the {chunk} placeholder.
|
|
96
|
+
|
|
97
|
+
refine_prompt_template (PromptTemplate): The template for refining
|
|
98
|
+
the accumulator across all subsequence chunks. Must contain the
|
|
99
|
+
{chunk} and {accumulator} placeholders.
|
|
100
|
+
|
|
101
|
+
synthesize_prompt_template (Optional[PromptTemplate], optional): An
|
|
102
|
+
optional template to synthesize the final version of the
|
|
103
|
+
accumulator. Must contain the {accumulator} placeholder.
|
|
104
|
+
"""
|
|
105
|
+
self._model = model
|
|
106
|
+
self._initial_prompt_template = initial_prompt_template
|
|
107
|
+
self._refine_prompt_template = refine_prompt_template
|
|
108
|
+
self._synthesize_prompt_template = synthesize_prompt_template
|
|
109
|
+
|
|
110
|
+
def evaluate(self, chunks: List[str]) -> str:
|
|
111
|
+
"""Evaluates a list of two or more chunks.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
chunks (List[str]): A list of chunks to be evaluated. Each chunk is
|
|
115
|
+
inserted into the initial_prompt_template and refine_prompt_template
|
|
116
|
+
and must therefore fit within the LLM's context window and still
|
|
117
|
+
leave room for the rest of the prompt.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
str: The output of the refine process.
|
|
121
|
+
"""
|
|
122
|
+
if len(chunks) < 2:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
"The refine strategy is not needed to evaluate data "
|
|
125
|
+
"that fits within a single context window. "
|
|
126
|
+
"Consider using llm_classify instead."
|
|
127
|
+
)
|
|
128
|
+
model = self._model
|
|
129
|
+
initial_prompt = self._initial_prompt_template.format({"chunk": chunks[0]})
|
|
130
|
+
accumulator = model(initial_prompt)
|
|
131
|
+
for chunk in chunks[1:]:
|
|
132
|
+
refine_prompt = self._refine_prompt_template.format(
|
|
133
|
+
{"accumulator": accumulator, "chunk": chunk}
|
|
134
|
+
)
|
|
135
|
+
accumulator = model(refine_prompt)
|
|
136
|
+
if not self._synthesize_prompt_template:
|
|
137
|
+
return accumulator
|
|
138
|
+
reduce_prompt = self._synthesize_prompt_template.format({"accumulator": accumulator})
|
|
139
|
+
return model(reduce_prompt)
|