arize-phoenix 2.7.0__tar.gz → 2.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/PKG-INFO +5 -2
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/README.md +4 -1
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/pyproject.toml +1 -0
- arize_phoenix-2.8.0/src/phoenix/exceptions.py +6 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/functions/classify.py +1 -1
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/anthropic.py +27 -22
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/base.py +1 -56
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/bedrock.py +23 -13
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/litellm.py +10 -17
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/openai.py +46 -53
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/vertex.py +19 -29
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/vertexai.py +1 -20
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/schema.py +2 -3
- arize_phoenix-2.8.0/src/phoenix/server/static/index.js +7195 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/session/session.py +2 -1
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/exporter.py +15 -11
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/fixtures.py +10 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/llama_index/callback.py +5 -5
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/llama_index/streaming.py +3 -4
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/otel.py +49 -21
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/schemas.py +2 -2
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/span_json_decoder.py +5 -4
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/tracer.py +6 -5
- arize_phoenix-2.8.0/src/phoenix/version.py +1 -0
- arize_phoenix-2.7.0/src/phoenix/exceptions.py +0 -2
- arize_phoenix-2.7.0/src/phoenix/server/static/index.js +0 -7155
- arize_phoenix-2.7.0/src/phoenix/version.py +0 -1
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/.gitignore +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/IP_NOTICE +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/LICENSE +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/config.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/evals.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/model_schema.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/core/traces.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datasets/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datasets/dataset.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datasets/errors.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datasets/fixtures.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datasets/schema.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datasets/validation.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/datetime_utils.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/evaluators.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/functions/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/functions/executor.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/functions/generate.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/functions/processing.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/rate_limiters.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/retrievals.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/templates/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/templates/default_templates.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/templates/template.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/utils/threads.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/metrics.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/retrieval_metrics.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/timeseries.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/metrics/wrappers.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/pointcloud/umap_parameters.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/py.typed +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/context.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/helpers.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/input_types/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Cluster.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DatasetRole.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DatasetValues.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Dimension.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DimensionShape.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DimensionType.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Evaluation.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Functionality.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/MimeType.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/NumericRange.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Retrieval.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Segments.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/SortDir.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/Span.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/TimeSeries.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/ValidationResult.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/app.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/evaluation_handler.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/main.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/span_handler.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/templates/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/templates/index.html +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/server/trace_handler.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/services.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/session/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/session/evaluation.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/dsl/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/dsl/filter.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/dsl/helpers.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/dsl/missing.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/dsl/query.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/errors.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/evaluation_conventions.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/langchain/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/langchain/instrumentor.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/langchain/tracer.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/llama_index/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/openai/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/openai/instrumentor.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/semantic_conventions.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/span_evaluations.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/span_json_encoder.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/trace_dataset.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/utils.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/v1/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/utilities/__init__.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/utilities/error_handling.py +0 -0
- {arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/utilities/logging.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.8.0
|
|
4
4
|
Summary: ML Observability in your notebook
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -86,6 +86,9 @@ Description-Content-Type: text/markdown
|
|
|
86
86
|
<a target="_blank" href="https://pypi.org/project/arize-phoenix/">
|
|
87
87
|
<img src="https://img.shields.io/pypi/pyversions/arize-phoenix">
|
|
88
88
|
</a>
|
|
89
|
+
<a target="_blank" href="https://hub.docker.com/repository/docker/arizephoenix/phoenix/general">
|
|
90
|
+
<img src="https://img.shields.io/docker/v/arizephoenix/phoenix?sort=semver&logo=docker&label=image&color=blue">
|
|
91
|
+
</a>
|
|
89
92
|
</p>
|
|
90
93
|
|
|
91
94
|

|
|
@@ -134,7 +137,7 @@ pip install arize-phoenix[experimental]
|
|
|
134
137
|
|
|
135
138
|

|
|
136
139
|
|
|
137
|
-
With the advent of powerful LLMs, it is now possible to build LLM Applications that can perform complex tasks like summarization, translation, question and answering, and more. However, these applications are often difficult to debug and troubleshoot as they have an extensive surface area: search and retrieval via vector stores, embedding generation, usage of external tools and so on. Phoenix provides a tracing framework that allows you to trace through the execution of your LLM Application hierarchically. This allows you to understand the internals of your LLM Application and to troubleshoot the complex components of your applicaition. Phoenix is built on top of the OpenInference tracing standard and uses it to trace, export, and collect critical information about your LLM Application in the form of `spans`. For more details on the OpenInference tracing standard, see the [OpenInference Specification](https://github.com/Arize-ai/
|
|
140
|
+
With the advent of powerful LLMs, it is now possible to build LLM Applications that can perform complex tasks like summarization, translation, question and answering, and more. However, these applications are often difficult to debug and troubleshoot as they have an extensive surface area: search and retrieval via vector stores, embedding generation, usage of external tools and so on. Phoenix provides a tracing framework that allows you to trace through the execution of your LLM Application hierarchically. This allows you to understand the internals of your LLM Application and to troubleshoot the complex components of your applicaition. Phoenix is built on top of the OpenInference tracing standard and uses it to trace, export, and collect critical information about your LLM Application in the form of `spans`. For more details on the OpenInference tracing standard, see the [OpenInference Specification](https://github.com/Arize-ai/openinference)
|
|
138
141
|
|
|
139
142
|
### Tracing with LlamaIndex
|
|
140
143
|
|
|
@@ -22,6 +22,9 @@
|
|
|
22
22
|
<a target="_blank" href="https://pypi.org/project/arize-phoenix/">
|
|
23
23
|
<img src="https://img.shields.io/pypi/pyversions/arize-phoenix">
|
|
24
24
|
</a>
|
|
25
|
+
<a target="_blank" href="https://hub.docker.com/repository/docker/arizephoenix/phoenix/general">
|
|
26
|
+
<img src="https://img.shields.io/docker/v/arizephoenix/phoenix?sort=semver&logo=docker&label=image&color=blue">
|
|
27
|
+
</a>
|
|
25
28
|
</p>
|
|
26
29
|
|
|
27
30
|

|
|
@@ -70,7 +73,7 @@ pip install arize-phoenix[experimental]
|
|
|
70
73
|
|
|
71
74
|

|
|
72
75
|
|
|
73
|
-
With the advent of powerful LLMs, it is now possible to build LLM Applications that can perform complex tasks like summarization, translation, question and answering, and more. However, these applications are often difficult to debug and troubleshoot as they have an extensive surface area: search and retrieval via vector stores, embedding generation, usage of external tools and so on. Phoenix provides a tracing framework that allows you to trace through the execution of your LLM Application hierarchically. This allows you to understand the internals of your LLM Application and to troubleshoot the complex components of your applicaition. Phoenix is built on top of the OpenInference tracing standard and uses it to trace, export, and collect critical information about your LLM Application in the form of `spans`. For more details on the OpenInference tracing standard, see the [OpenInference Specification](https://github.com/Arize-ai/
|
|
76
|
+
With the advent of powerful LLMs, it is now possible to build LLM Applications that can perform complex tasks like summarization, translation, question and answering, and more. However, these applications are often difficult to debug and troubleshoot as they have an extensive surface area: search and retrieval via vector stores, embedding generation, usage of external tools and so on. Phoenix provides a tracing framework that allows you to trace through the execution of your LLM Application hierarchically. This allows you to understand the internals of your LLM Application and to troubleshoot the complex components of your applicaition. Phoenix is built on top of the OpenInference tracing standard and uses it to trace, export, and collect critical information about your LLM Application in the form of `spans`. For more details on the OpenInference tracing standard, see the [OpenInference Specification](https://github.com/Arize-ai/openinference)
|
|
74
77
|
|
|
75
78
|
### Tracing with LlamaIndex
|
|
76
79
|
|
|
@@ -124,6 +124,7 @@ dependencies = [
|
|
|
124
124
|
[tool.hatch.envs.type]
|
|
125
125
|
dependencies = [
|
|
126
126
|
"mypy==1.5.1",
|
|
127
|
+
"pydantic==v1.10.14", # for mypy
|
|
127
128
|
"llama-index>=0.9.14",
|
|
128
129
|
"pandas-stubs<=2.0.2.230605", # version 2.0.3.230814 is causing a dependency conflict.
|
|
129
130
|
"types-psutil",
|
{arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/functions/classify.py
RENAMED
|
@@ -249,7 +249,7 @@ def run_relevance_eval(
|
|
|
249
249
|
|
|
250
250
|
This latter format is intended for running evaluations on exported OpenInference trace
|
|
251
251
|
dataframes. For more information on the OpenInference tracing specification, see
|
|
252
|
-
https://github.com/Arize-ai/
|
|
252
|
+
https://github.com/Arize-ai/openinference/.
|
|
253
253
|
|
|
254
254
|
model (BaseEvalModel): The model used for evaluation.
|
|
255
255
|
|
{arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/anthropic.py
RENAMED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
3
3
|
|
|
4
|
+
from phoenix.exceptions import PhoenixContextLimitExceeded
|
|
4
5
|
from phoenix.experimental.evals.models.base import BaseEvalModel
|
|
5
6
|
from phoenix.experimental.evals.models.rate_limiters import RateLimiter
|
|
6
7
|
|
|
@@ -44,12 +45,6 @@ class AnthropicModel(BaseEvalModel):
|
|
|
44
45
|
self._init_client()
|
|
45
46
|
self._init_tiktoken()
|
|
46
47
|
self._init_rate_limiter()
|
|
47
|
-
self.retry = self._retry(
|
|
48
|
-
error_types=[], # default to catching all errors
|
|
49
|
-
min_seconds=self.retry_min_seconds,
|
|
50
|
-
max_seconds=self.retry_max_seconds,
|
|
51
|
-
max_retries=self.max_retries,
|
|
52
|
-
)
|
|
53
48
|
|
|
54
49
|
def _init_environment(self) -> None:
|
|
55
50
|
try:
|
|
@@ -127,7 +122,7 @@ class AnthropicModel(BaseEvalModel):
|
|
|
127
122
|
kwargs.pop("instruction", None)
|
|
128
123
|
invocation_parameters = self.invocation_parameters()
|
|
129
124
|
invocation_parameters.update(kwargs)
|
|
130
|
-
response = self.
|
|
125
|
+
response = self._rate_limited_completion(
|
|
131
126
|
model=self.model,
|
|
132
127
|
prompt=self._format_prompt_for_claude(prompt),
|
|
133
128
|
**invocation_parameters,
|
|
@@ -135,14 +130,19 @@ class AnthropicModel(BaseEvalModel):
|
|
|
135
130
|
|
|
136
131
|
return str(response)
|
|
137
132
|
|
|
138
|
-
def
|
|
139
|
-
@self.retry
|
|
133
|
+
def _rate_limited_completion(self, **kwargs: Any) -> Any:
|
|
140
134
|
@self._rate_limiter.limit
|
|
141
|
-
def
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
135
|
+
def _completion(**kwargs: Any) -> Any:
|
|
136
|
+
try:
|
|
137
|
+
response = self.client.completions.create(**kwargs)
|
|
138
|
+
return response.completion
|
|
139
|
+
except self._anthropic.BadRequestError as e:
|
|
140
|
+
exception_message = e.args[0]
|
|
141
|
+
if exception_message and "prompt is too long" in exception_message:
|
|
142
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
143
|
+
raise e
|
|
144
|
+
|
|
145
|
+
return _completion(**kwargs)
|
|
146
146
|
|
|
147
147
|
async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
|
|
148
148
|
# instruction is an invalid input to Anthropic models, it is passed in by
|
|
@@ -150,20 +150,25 @@ class AnthropicModel(BaseEvalModel):
|
|
|
150
150
|
kwargs.pop("instruction", None)
|
|
151
151
|
invocation_parameters = self.invocation_parameters()
|
|
152
152
|
invocation_parameters.update(kwargs)
|
|
153
|
-
response = await self.
|
|
153
|
+
response = await self._async_rate_limited_completion(
|
|
154
154
|
model=self.model, prompt=self._format_prompt_for_claude(prompt), **invocation_parameters
|
|
155
155
|
)
|
|
156
156
|
|
|
157
157
|
return str(response)
|
|
158
158
|
|
|
159
|
-
async def
|
|
160
|
-
@self.retry
|
|
159
|
+
async def _async_rate_limited_completion(self, **kwargs: Any) -> Any:
|
|
161
160
|
@self._rate_limiter.alimit
|
|
162
|
-
async def
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
161
|
+
async def _async_completion(**kwargs: Any) -> Any:
|
|
162
|
+
try:
|
|
163
|
+
response = await self.async_client.completions.create(**kwargs)
|
|
164
|
+
return response.completion
|
|
165
|
+
except self._anthropic.BadRequestError as e:
|
|
166
|
+
exception_message = e.args[0]
|
|
167
|
+
if exception_message and "prompt is too long" in exception_message:
|
|
168
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
169
|
+
raise e
|
|
170
|
+
|
|
171
|
+
return await _async_completion(**kwargs)
|
|
167
172
|
|
|
168
173
|
def _format_prompt_for_claude(self, prompt: str) -> str:
|
|
169
174
|
# Claude requires prompt in the format of Human: ... Assistant:
|
|
@@ -2,22 +2,13 @@ import logging
|
|
|
2
2
|
from abc import ABC, abstractmethod, abstractproperty
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Generator, List, Optional, Sequence
|
|
6
6
|
|
|
7
7
|
from phoenix.experimental.evals.models.rate_limiters import RateLimiter
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from tiktoken import Encoding
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
from tenacity import (
|
|
14
|
-
RetryCallState,
|
|
15
|
-
retry,
|
|
16
|
-
retry_base,
|
|
17
|
-
retry_if_exception_type,
|
|
18
|
-
stop_after_attempt,
|
|
19
|
-
wait_random_exponential,
|
|
20
|
-
)
|
|
21
12
|
from tqdm.asyncio import tqdm_asyncio
|
|
22
13
|
from tqdm.auto import tqdm
|
|
23
14
|
from typing_extensions import TypeVar
|
|
@@ -65,52 +56,6 @@ class BaseEvalModel(ABC):
|
|
|
65
56
|
def reload_client(self) -> None:
|
|
66
57
|
pass
|
|
67
58
|
|
|
68
|
-
def _retry(
|
|
69
|
-
self,
|
|
70
|
-
error_types: List[Type[BaseException]],
|
|
71
|
-
min_seconds: int,
|
|
72
|
-
max_seconds: int,
|
|
73
|
-
max_retries: int,
|
|
74
|
-
) -> Callable[[Any], Any]:
|
|
75
|
-
"""Create a retry decorator for a given LLM and provided list of error types."""
|
|
76
|
-
|
|
77
|
-
def log_retry(retry_state: RetryCallState) -> None:
|
|
78
|
-
if fut := retry_state.outcome:
|
|
79
|
-
exc = fut.exception()
|
|
80
|
-
else:
|
|
81
|
-
exc = None
|
|
82
|
-
|
|
83
|
-
if exc:
|
|
84
|
-
printif(
|
|
85
|
-
self._verbose,
|
|
86
|
-
(
|
|
87
|
-
f"Failed attempt {retry_state.attempt_number}: "
|
|
88
|
-
f"{type(exc).__module__}.{type(exc).__name__}"
|
|
89
|
-
),
|
|
90
|
-
)
|
|
91
|
-
printif(
|
|
92
|
-
True,
|
|
93
|
-
f"Failed attempt {retry_state.attempt_number}: raised {repr(exc)}",
|
|
94
|
-
)
|
|
95
|
-
else:
|
|
96
|
-
printif(True, f"Failed attempt {retry_state.attempt_number}")
|
|
97
|
-
return None
|
|
98
|
-
|
|
99
|
-
if not error_types:
|
|
100
|
-
# default to retrying on all exceptions
|
|
101
|
-
error_types = [Exception]
|
|
102
|
-
|
|
103
|
-
retry_instance: retry_base = retry_if_exception_type(error_types[0])
|
|
104
|
-
for error in error_types[1:]:
|
|
105
|
-
retry_instance = retry_instance | retry_if_exception_type(error)
|
|
106
|
-
return retry(
|
|
107
|
-
reraise=True,
|
|
108
|
-
stop=stop_after_attempt(max_retries),
|
|
109
|
-
wait=wait_random_exponential(multiplier=1, min=min_seconds, max=max_seconds),
|
|
110
|
-
retry=retry_instance,
|
|
111
|
-
before_sleep=log_retry,
|
|
112
|
-
)
|
|
113
|
-
|
|
114
59
|
def __call__(self, prompt: str, instruction: Optional[str] = None, **kwargs: Any) -> str:
|
|
115
60
|
"""Run the LLM on the given prompt."""
|
|
116
61
|
if not isinstance(prompt, str):
|
{arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/bedrock.py
RENAMED
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
5
5
|
|
|
6
|
+
from phoenix.exceptions import PhoenixContextLimitExceeded
|
|
6
7
|
from phoenix.experimental.evals.models.base import BaseEvalModel
|
|
7
8
|
from phoenix.experimental.evals.models.rate_limiters import RateLimiter
|
|
8
9
|
|
|
@@ -54,12 +55,6 @@ class BedrockModel(BaseEvalModel):
|
|
|
54
55
|
self._init_client()
|
|
55
56
|
self._init_tiktoken()
|
|
56
57
|
self._init_rate_limiter()
|
|
57
|
-
self.retry = self._retry(
|
|
58
|
-
error_types=[], # default to catching all errors
|
|
59
|
-
min_seconds=self.retry_min_seconds,
|
|
60
|
-
max_seconds=self.retry_max_seconds,
|
|
61
|
-
max_retries=self.max_retries,
|
|
62
|
-
)
|
|
63
58
|
|
|
64
59
|
def _init_environment(self) -> None:
|
|
65
60
|
try:
|
|
@@ -130,21 +125,36 @@ class BedrockModel(BaseEvalModel):
|
|
|
130
125
|
accept = "application/json"
|
|
131
126
|
contentType = "application/json"
|
|
132
127
|
|
|
133
|
-
response = self.
|
|
128
|
+
response = self._rate_limited_completion(
|
|
134
129
|
body=body, modelId=self.model_id, accept=accept, contentType=contentType
|
|
135
130
|
)
|
|
136
131
|
|
|
137
132
|
return self._parse_output(response) or ""
|
|
138
133
|
|
|
139
|
-
def
|
|
134
|
+
def _rate_limited_completion(self, **kwargs: Any) -> Any:
|
|
140
135
|
"""Use tenacity to retry the completion call."""
|
|
141
136
|
|
|
142
|
-
@self.retry
|
|
143
137
|
@self._rate_limiter.limit
|
|
144
|
-
def
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
138
|
+
def _completion(**kwargs: Any) -> Any:
|
|
139
|
+
try:
|
|
140
|
+
return self.client.invoke_model(**kwargs)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
exception_message = e.args[0]
|
|
143
|
+
if not exception_message:
|
|
144
|
+
raise e
|
|
145
|
+
|
|
146
|
+
if "Input is too long" in exception_message:
|
|
147
|
+
# Error from Anthropic models
|
|
148
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
149
|
+
elif "expected maxLength" in exception_message:
|
|
150
|
+
# Error from Titan models
|
|
151
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
152
|
+
elif "Prompt has too many tokens" in exception_message:
|
|
153
|
+
# Error from AI21 models
|
|
154
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
155
|
+
raise e
|
|
156
|
+
|
|
157
|
+
return _completion(**kwargs)
|
|
148
158
|
|
|
149
159
|
def _format_prompt_for_claude(self, prompt: str) -> str:
|
|
150
160
|
# Claude requires prompt in the format of Human: ... Assisatnt:
|
{arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/litellm.py
RENAMED
|
@@ -95,24 +95,17 @@ class LiteLLMModel(BaseEvalModel):
|
|
|
95
95
|
|
|
96
96
|
def _generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
|
|
97
97
|
messages = self._get_messages_from_prompt(prompt)
|
|
98
|
-
|
|
99
|
-
self.
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
**self.model_kwargs,
|
|
108
|
-
)
|
|
98
|
+
response = self._litellm.completion(
|
|
99
|
+
model=self.model_name,
|
|
100
|
+
messages=messages,
|
|
101
|
+
temperature=self.temperature,
|
|
102
|
+
max_tokens=self.max_tokens,
|
|
103
|
+
top_p=self.top_p,
|
|
104
|
+
num_retries=self.num_retries,
|
|
105
|
+
request_timeout=self.request_timeout,
|
|
106
|
+
**self.model_kwargs,
|
|
109
107
|
)
|
|
110
|
-
|
|
111
|
-
def _generate_with_retry(self, **kwargs: Any) -> Any:
|
|
112
|
-
# Using default LiteLLM completion with retries = self.num_retries.
|
|
113
|
-
|
|
114
|
-
response = self._litellm.completion(**kwargs)
|
|
115
|
-
return response.choices[0].message.content
|
|
108
|
+
return str(response.choices[0].message.content)
|
|
116
109
|
|
|
117
110
|
def _get_messages_from_prompt(self, prompt: str) -> List[Dict[str, str]]:
|
|
118
111
|
# LiteLLM requires prompts in the format of messages
|
|
@@ -14,6 +14,7 @@ from typing import (
|
|
|
14
14
|
get_origin,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
+
from phoenix.exceptions import PhoenixContextLimitExceeded
|
|
17
18
|
from phoenix.experimental.evals.models.base import BaseEvalModel
|
|
18
19
|
from phoenix.experimental.evals.models.rate_limiters import RateLimiter
|
|
19
20
|
|
|
@@ -114,25 +115,11 @@ class OpenAIModel(BaseEvalModel):
|
|
|
114
115
|
|
|
115
116
|
def _init_environment(self) -> None:
|
|
116
117
|
try:
|
|
117
|
-
import httpx
|
|
118
118
|
import openai
|
|
119
119
|
import openai._utils as openai_util
|
|
120
120
|
|
|
121
121
|
self._openai = openai
|
|
122
122
|
self._openai_util = openai_util
|
|
123
|
-
self._openai_retry_errors = [
|
|
124
|
-
self._openai.APITimeoutError,
|
|
125
|
-
self._openai.APIError,
|
|
126
|
-
self._openai.APIConnectionError,
|
|
127
|
-
self._openai.InternalServerError,
|
|
128
|
-
httpx.ReadTimeout,
|
|
129
|
-
]
|
|
130
|
-
self.retry = self._retry(
|
|
131
|
-
error_types=self._openai_retry_errors,
|
|
132
|
-
min_seconds=self.retry_min_seconds,
|
|
133
|
-
max_seconds=self.retry_max_seconds,
|
|
134
|
-
max_retries=self.max_retries,
|
|
135
|
-
)
|
|
136
123
|
except ImportError:
|
|
137
124
|
self._raise_import_error(
|
|
138
125
|
package_display_name="OpenAI",
|
|
@@ -265,7 +252,7 @@ class OpenAIModel(BaseEvalModel):
|
|
|
265
252
|
invoke_params["functions"] = functions
|
|
266
253
|
if function_call := kwargs.get("function_call"):
|
|
267
254
|
invoke_params["function_call"] = function_call
|
|
268
|
-
response = await self.
|
|
255
|
+
response = await self._async_rate_limited_completion(
|
|
269
256
|
messages=messages,
|
|
270
257
|
**invoke_params,
|
|
271
258
|
)
|
|
@@ -284,7 +271,7 @@ class OpenAIModel(BaseEvalModel):
|
|
|
284
271
|
invoke_params["functions"] = functions
|
|
285
272
|
if function_call := kwargs.get("function_call"):
|
|
286
273
|
invoke_params["function_call"] = function_call
|
|
287
|
-
response = self.
|
|
274
|
+
response = self._rate_limited_completion(
|
|
288
275
|
messages=messages,
|
|
289
276
|
**invoke_params,
|
|
290
277
|
)
|
|
@@ -296,45 +283,51 @@ class OpenAIModel(BaseEvalModel):
|
|
|
296
283
|
return str(function_call.get("arguments") or "")
|
|
297
284
|
return str(message["content"])
|
|
298
285
|
|
|
299
|
-
async def
|
|
300
|
-
"""Use tenacity to retry the completion call."""
|
|
301
|
-
|
|
302
|
-
@self.retry
|
|
286
|
+
async def _async_rate_limited_completion(self, **kwargs: Any) -> Any:
|
|
303
287
|
@self._rate_limiter.alimit
|
|
304
|
-
async def
|
|
305
|
-
|
|
306
|
-
if
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
288
|
+
async def _async_completion(**kwargs: Any) -> Any:
|
|
289
|
+
try:
|
|
290
|
+
if self._model_uses_legacy_completion_api:
|
|
291
|
+
if "prompt" not in kwargs:
|
|
292
|
+
kwargs["prompt"] = "\n\n".join(
|
|
293
|
+
(message.get("content") or "")
|
|
294
|
+
for message in (kwargs.pop("messages", None) or ())
|
|
295
|
+
)
|
|
296
|
+
# OpenAI 1.0.0 API responses are pydantic objects, not dicts
|
|
297
|
+
# We must dump the model to get the dict
|
|
298
|
+
res = await self._async_client.completions.create(**kwargs)
|
|
299
|
+
else:
|
|
300
|
+
res = await self._async_client.chat.completions.create(**kwargs)
|
|
301
|
+
return res.model_dump()
|
|
302
|
+
except self._openai._exceptions.BadRequestError as e:
|
|
303
|
+
exception_message = e.args[0]
|
|
304
|
+
if exception_message and "maximum context length" in exception_message:
|
|
305
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
306
|
+
raise e
|
|
307
|
+
|
|
308
|
+
return await _async_completion(**kwargs)
|
|
309
|
+
|
|
310
|
+
def _rate_limited_completion(self, **kwargs: Any) -> Any:
|
|
324
311
|
@self._rate_limiter.limit
|
|
325
|
-
def
|
|
326
|
-
|
|
327
|
-
if
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
312
|
+
def _completion(**kwargs: Any) -> Any:
|
|
313
|
+
try:
|
|
314
|
+
if self._model_uses_legacy_completion_api:
|
|
315
|
+
if "prompt" not in kwargs:
|
|
316
|
+
kwargs["prompt"] = "\n\n".join(
|
|
317
|
+
(message.get("content") or "")
|
|
318
|
+
for message in (kwargs.pop("messages", None) or ())
|
|
319
|
+
)
|
|
320
|
+
# OpenAI 1.0.0 API responses are pydantic objects, not dicts
|
|
321
|
+
# We must dump the model to get the dict
|
|
322
|
+
return self._client.completions.create(**kwargs).model_dump()
|
|
323
|
+
return self._client.chat.completions.create(**kwargs).model_dump()
|
|
324
|
+
except self._openai._exceptions.BadRequestError as e:
|
|
325
|
+
exception_message = e.args[0]
|
|
326
|
+
if exception_message and "maximum context length" in exception_message:
|
|
327
|
+
raise PhoenixContextLimitExceeded(exception_message) from e
|
|
328
|
+
raise e
|
|
329
|
+
|
|
330
|
+
return _completion(**kwargs)
|
|
338
331
|
|
|
339
332
|
@property
|
|
340
333
|
def max_context_size(self) -> int:
|
|
@@ -46,12 +46,6 @@ class GeminiModel(BaseEvalModel):
|
|
|
46
46
|
def __post_init__(self) -> None:
|
|
47
47
|
self._init_client()
|
|
48
48
|
self._init_rate_limiter()
|
|
49
|
-
self.retry = self._retry(
|
|
50
|
-
error_types=[], # default to catching all errors
|
|
51
|
-
min_seconds=self.retry_min_seconds,
|
|
52
|
-
max_seconds=self.retry_max_seconds,
|
|
53
|
-
max_retries=self.max_retries,
|
|
54
|
-
)
|
|
55
49
|
|
|
56
50
|
def reload_client(self) -> None:
|
|
57
51
|
self._init_client()
|
|
@@ -115,30 +109,17 @@ class GeminiModel(BaseEvalModel):
|
|
|
115
109
|
# instruction is an invalid input to Gemini models, it is passed in by
|
|
116
110
|
# BaseEvalModel.__call__ and needs to be removed
|
|
117
111
|
kwargs.pop("instruction", None)
|
|
118
|
-
response = self._generate_with_retry(
|
|
119
|
-
prompt=prompt,
|
|
120
|
-
generation_config=self.generation_config,
|
|
121
|
-
**kwargs,
|
|
122
|
-
)
|
|
123
112
|
|
|
124
|
-
return str(response)
|
|
125
|
-
|
|
126
|
-
def _generate_with_retry(
|
|
127
|
-
self, prompt: str, generation_config: Dict[str, Any], **kwargs: Any
|
|
128
|
-
) -> Any:
|
|
129
|
-
@self.retry
|
|
130
113
|
@self._rate_limiter.limit
|
|
131
|
-
def
|
|
114
|
+
def _rate_limited_completion(
|
|
115
|
+
prompt: str, generation_config: Dict[str, Any], **kwargs: Any
|
|
116
|
+
) -> Any:
|
|
132
117
|
response = self._model.generate_content(
|
|
133
118
|
contents=prompt, generation_config=generation_config, **kwargs
|
|
134
119
|
)
|
|
135
120
|
return self._parse_response_candidates(response)
|
|
136
121
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
|
|
140
|
-
kwargs.pop("instruction", None)
|
|
141
|
-
response = await self._async_generate_with_retry(
|
|
122
|
+
response = _rate_limited_completion(
|
|
142
123
|
prompt=prompt,
|
|
143
124
|
generation_config=self.generation_config,
|
|
144
125
|
**kwargs,
|
|
@@ -146,18 +127,27 @@ class GeminiModel(BaseEvalModel):
|
|
|
146
127
|
|
|
147
128
|
return str(response)
|
|
148
129
|
|
|
149
|
-
async def
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
130
|
+
async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
|
|
131
|
+
# instruction is an invalid input to Gemini models, it is passed in by
|
|
132
|
+
# BaseEvalModel.__call__ and needs to be removed
|
|
133
|
+
kwargs.pop("instruction", None)
|
|
134
|
+
|
|
153
135
|
@self._rate_limiter.alimit
|
|
154
|
-
async def
|
|
136
|
+
async def _rate_limited_completion(
|
|
137
|
+
prompt: str, generation_config: Dict[str, Any], **kwargs: Any
|
|
138
|
+
) -> Any:
|
|
155
139
|
response = await self._model.generate_content_async(
|
|
156
140
|
contents=prompt, generation_config=generation_config, **kwargs
|
|
157
141
|
)
|
|
158
142
|
return self._parse_response_candidates(response)
|
|
159
143
|
|
|
160
|
-
|
|
144
|
+
response = await _rate_limited_completion(
|
|
145
|
+
prompt=prompt,
|
|
146
|
+
generation_config=self.generation_config,
|
|
147
|
+
**kwargs,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return str(response)
|
|
161
151
|
|
|
162
152
|
def _parse_response_candidates(self, response: Any) -> Any:
|
|
163
153
|
if hasattr(response, "candidates"):
|
{arize_phoenix-2.7.0 → arize_phoenix-2.8.0}/src/phoenix/experimental/evals/models/vertexai.py
RENAMED
|
@@ -52,18 +52,6 @@ class VertexAIModel(BaseEvalModel):
|
|
|
52
52
|
|
|
53
53
|
self._vertexai = vertexai
|
|
54
54
|
self._google_exceptions = google_exceptions
|
|
55
|
-
self._google_api_retry_errors = [
|
|
56
|
-
self._google_exceptions.ResourceExhausted,
|
|
57
|
-
self._google_exceptions.ServiceUnavailable,
|
|
58
|
-
self._google_exceptions.Aborted,
|
|
59
|
-
self._google_exceptions.DeadlineExceeded,
|
|
60
|
-
]
|
|
61
|
-
self.retry = self._retry(
|
|
62
|
-
error_types=self._google_api_retry_errors,
|
|
63
|
-
min_seconds=self.retry_min_seconds,
|
|
64
|
-
max_seconds=self.retry_max_seconds,
|
|
65
|
-
max_retries=self.max_retries,
|
|
66
|
-
)
|
|
67
55
|
except ImportError:
|
|
68
56
|
self._raise_import_error(
|
|
69
57
|
package_display_name="VertexAI",
|
|
@@ -97,19 +85,12 @@ class VertexAIModel(BaseEvalModel):
|
|
|
97
85
|
|
|
98
86
|
def _generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
|
|
99
87
|
invoke_params = self.invocation_params
|
|
100
|
-
response = self.
|
|
88
|
+
response = self._model.predict(
|
|
101
89
|
prompt=prompt,
|
|
102
90
|
**invoke_params,
|
|
103
91
|
)
|
|
104
92
|
return str(response.text)
|
|
105
93
|
|
|
106
|
-
def _generate_with_retry(self, **kwargs: Any) -> Any:
|
|
107
|
-
@self.retry
|
|
108
|
-
def _completion_with_retry(**kwargs: Any) -> Any:
|
|
109
|
-
return self._model.predict(**kwargs)
|
|
110
|
-
|
|
111
|
-
return _completion_with_retry(**kwargs)
|
|
112
|
-
|
|
113
94
|
@property
|
|
114
95
|
def is_codey_model(self) -> bool:
|
|
115
96
|
return is_codey_model(self.tuned_model_name or self.model_name)
|
|
@@ -2,7 +2,6 @@ from collections import defaultdict
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from itertools import chain
|
|
4
4
|
from typing import Dict, List, Optional, Set, Tuple, Union, cast
|
|
5
|
-
from uuid import UUID
|
|
6
5
|
|
|
7
6
|
import numpy as np
|
|
8
7
|
import numpy.typing as npt
|
|
@@ -22,7 +21,7 @@ from phoenix.server.api.input_types.Coordinates import (
|
|
|
22
21
|
from phoenix.server.api.input_types.SpanSort import SpanSort
|
|
23
22
|
from phoenix.server.api.types.Cluster import Cluster, to_gql_clusters
|
|
24
23
|
from phoenix.trace.dsl import SpanFilter
|
|
25
|
-
from phoenix.trace.schemas import SpanID
|
|
24
|
+
from phoenix.trace.schemas import SpanID, TraceID
|
|
26
25
|
|
|
27
26
|
from .context import Context
|
|
28
27
|
from .input_types.TimeRange import TimeRange
|
|
@@ -264,7 +263,7 @@ class Query:
|
|
|
264
263
|
root_spans_only=root_spans_only,
|
|
265
264
|
)
|
|
266
265
|
else:
|
|
267
|
-
spans = chain.from_iterable(map(traces.get_trace, map(
|
|
266
|
+
spans = chain.from_iterable(map(traces.get_trace, map(TraceID, trace_ids)))
|
|
268
267
|
if predicate:
|
|
269
268
|
spans = filter(predicate, spans)
|
|
270
269
|
if sort:
|