arize-phoenix 2.5.0__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/PKG-INFO +1 -1
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/config.py +32 -7
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/evals.py +53 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/fixtures.py +46 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/evaluators.py +4 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/classify.py +16 -6
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/generate.py +6 -3
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/anthropic.py +3 -4
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/base.py +1 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/bedrock.py +4 -2
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/openai.py +2 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/vertex.py +6 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/templates/default_templates.py +0 -7
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/index.js +1 -1
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/session/evaluation.py +16 -10
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/session/session.py +19 -0
- arize_phoenix-2.7.0/src/phoenix/trace/errors.py +5 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/span_evaluations.py +46 -61
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/trace_dataset.py +111 -4
- arize_phoenix-2.7.0/src/phoenix/version.py +1 -0
- arize_phoenix-2.5.0/src/phoenix/version.py +0 -1
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/.gitignore +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/IP_NOTICE +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/LICENSE +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/README.md +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/pyproject.toml +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/model_schema.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/core/traces.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/dataset.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/errors.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/schema.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datasets/validation.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/datetime_utils.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/exceptions.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/executor.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/processing.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/litellm.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/rate_limiters.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/vertexai.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/retrievals.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/templates/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/templates/template.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/utils/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/utils/threads.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/metrics.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/retrieval_metrics.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/timeseries.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/metrics/wrappers.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/pointcloud/umap_parameters.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/py.typed +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/context.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/helpers.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/input_types/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/schema.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Cluster.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DatasetInfo.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DatasetRole.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DatasetValues.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Dimension.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionShape.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionType.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Evaluation.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ExportEventsMutation.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Functionality.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/MimeType.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/NumericRange.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Retrieval.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Segments.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/SortDir.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/Span.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/TimeSeries.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/ValidationResult.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/app.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/evaluation_handler.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/main.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/span_handler.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/templates/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/templates/index.html +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/server/trace_handler.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/services.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/session/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/filter.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/helpers.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/missing.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/dsl/query.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/evaluation_conventions.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/exporter.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/fixtures.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/langchain/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/langchain/instrumentor.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/langchain/tracer.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/callback.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/debug_callback.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/llama_index/streaming.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/openai/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/openai/instrumentor.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/otel.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/schemas.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/semantic_conventions.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/span_json_decoder.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/span_json_encoder.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/tracer.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/utils.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/v1/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/utilities/__init__.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/utilities/error_handling.py +0 -0
- {arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/utilities/logging.py +0 -0
|
@@ -12,6 +12,11 @@ ENV_PHOENIX_COLLECTOR_ENDPOINT = "PHOENIX_COLLECTOR_ENDPOINT"
|
|
|
12
12
|
The endpoint traces and evals are sent to. This must be set if the Phoenix
|
|
13
13
|
server is running on a remote instance.
|
|
14
14
|
"""
|
|
15
|
+
ENV_WORKING_DIR = "PHOENIX_WORKING_DIR"
|
|
16
|
+
"""
|
|
17
|
+
The directory in which to save, load, and export datasets. This directory must
|
|
18
|
+
be accessible by both the Phoenix server and the notebook environment.
|
|
19
|
+
"""
|
|
15
20
|
|
|
16
21
|
|
|
17
22
|
def _get_temp_path() -> Path:
|
|
@@ -36,13 +41,16 @@ def get_running_pid() -> Optional[int]:
|
|
|
36
41
|
return None
|
|
37
42
|
|
|
38
43
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
def get_working_dir() -> Path:
|
|
45
|
+
"""
|
|
46
|
+
Get the working directory for saving, loading, and exporting datasets.
|
|
47
|
+
"""
|
|
48
|
+
working_dir_str = os.getenv(ENV_WORKING_DIR)
|
|
49
|
+
if working_dir_str is not None:
|
|
50
|
+
return Path(working_dir_str)
|
|
51
|
+
# Fall back to ~/.phoenix if PHOENIX_WORKING_DIR is not set
|
|
52
|
+
return Path.home().resolve() / ".phoenix"
|
|
53
|
+
|
|
46
54
|
|
|
47
55
|
PHOENIX_DIR = Path(__file__).resolve().parent
|
|
48
56
|
# Server config
|
|
@@ -53,6 +61,23 @@ HOST = "0.0.0.0"
|
|
|
53
61
|
PORT = 6006
|
|
54
62
|
# The prefix of datasets that are auto-assigned a name
|
|
55
63
|
GENERATED_DATASET_NAME_PREFIX = "phoenix_dataset_"
|
|
64
|
+
# The work directory for saving, loading, and exporting datasets
|
|
65
|
+
WORKING_DIR = get_working_dir()
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
for path in (
|
|
69
|
+
ROOT_DIR := WORKING_DIR,
|
|
70
|
+
EXPORT_DIR := ROOT_DIR / "exports",
|
|
71
|
+
DATASET_DIR := ROOT_DIR / "datasets",
|
|
72
|
+
TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
|
|
73
|
+
):
|
|
74
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
except Exception as e:
|
|
76
|
+
print(
|
|
77
|
+
f"⚠️ Failed to initialize the working directory at {WORKING_DIR} due to an error: {str(e)}"
|
|
78
|
+
)
|
|
79
|
+
print("⚠️ While phoenix will still run, you will not be able to save, load, or export data")
|
|
80
|
+
print("ℹ️ To change, set the `{ENV_WORKING_DIR}` environment variable before importing phoenix.")
|
|
56
81
|
|
|
57
82
|
|
|
58
83
|
def get_exported_files(directory: Path) -> List[Path]:
|
|
@@ -9,10 +9,12 @@ from typing import DefaultDict, Dict, List, Optional, Set, Tuple
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from google.protobuf.json_format import MessageToDict
|
|
12
|
+
from pandas import DataFrame, Index, MultiIndex
|
|
12
13
|
from typing_extensions import TypeAlias, assert_never
|
|
13
14
|
|
|
14
15
|
import phoenix.trace.v1 as pb
|
|
15
16
|
from phoenix.trace.schemas import SpanID, TraceID
|
|
17
|
+
from phoenix.trace.span_evaluations import DocumentEvaluations, Evaluations, SpanEvaluations
|
|
16
18
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
logger.addHandler(logging.NullHandler())
|
|
@@ -171,3 +173,54 @@ class Evals:
|
|
|
171
173
|
if result.HasField("score") and document_position < num_documents:
|
|
172
174
|
scores[document_position] = result.score.value
|
|
173
175
|
return scores
|
|
176
|
+
|
|
177
|
+
def export_evaluations(self) -> List[Evaluations]:
|
|
178
|
+
evaluations: List[Evaluations] = []
|
|
179
|
+
evaluations.extend(self._export_span_evaluations())
|
|
180
|
+
evaluations.extend(self._export_document_evaluations())
|
|
181
|
+
return evaluations
|
|
182
|
+
|
|
183
|
+
def _export_span_evaluations(self) -> List[SpanEvaluations]:
|
|
184
|
+
span_evaluations = []
|
|
185
|
+
with self._lock:
|
|
186
|
+
span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
|
|
187
|
+
for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
|
|
188
|
+
span_ids = []
|
|
189
|
+
rows = []
|
|
190
|
+
with self._lock:
|
|
191
|
+
span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
|
|
192
|
+
for span_id, pb_eval in span_evaluations_by_id:
|
|
193
|
+
span_ids.append(span_id)
|
|
194
|
+
rows.append(MessageToDict(pb_eval.result))
|
|
195
|
+
dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
|
|
196
|
+
span_evaluations.append(SpanEvaluations(eval_name, dataframe))
|
|
197
|
+
return span_evaluations
|
|
198
|
+
|
|
199
|
+
def _export_document_evaluations(self) -> List[DocumentEvaluations]:
|
|
200
|
+
evaluations = []
|
|
201
|
+
with self._lock:
|
|
202
|
+
document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
|
|
203
|
+
for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
|
|
204
|
+
span_ids = []
|
|
205
|
+
document_positions = []
|
|
206
|
+
rows = []
|
|
207
|
+
with self._lock:
|
|
208
|
+
document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
|
|
209
|
+
for span_id, _document_evaluations_by_position in document_evaluations_by_id:
|
|
210
|
+
with self._lock:
|
|
211
|
+
document_evaluations_by_position = sorted(
|
|
212
|
+
_document_evaluations_by_position.items()
|
|
213
|
+
) # ensure the evals are sorted by document position
|
|
214
|
+
for document_position, pb_eval in document_evaluations_by_position:
|
|
215
|
+
span_ids.append(span_id)
|
|
216
|
+
document_positions.append(document_position)
|
|
217
|
+
rows.append(MessageToDict(pb_eval.result))
|
|
218
|
+
dataframe = DataFrame(
|
|
219
|
+
rows,
|
|
220
|
+
index=MultiIndex.from_arrays(
|
|
221
|
+
(span_ids, document_positions),
|
|
222
|
+
names=("context.span_id", "document_position"),
|
|
223
|
+
),
|
|
224
|
+
)
|
|
225
|
+
evaluations.append(DocumentEvaluations(eval_name, dataframe))
|
|
226
|
+
return evaluations
|
|
@@ -240,6 +240,51 @@ click_through_rate_fixture = Fixture(
|
|
|
240
240
|
reference_file_name="click_through_rate_train.parquet",
|
|
241
241
|
)
|
|
242
242
|
|
|
243
|
+
chatbot_queries_schema = Schema(
|
|
244
|
+
prediction_id_column_name="id",
|
|
245
|
+
prompt_column_names=RetrievalEmbeddingColumnNames(
|
|
246
|
+
vector_column_name="prompt",
|
|
247
|
+
raw_data_column_name="prompt_text",
|
|
248
|
+
context_retrieval_ids_column_name="document_ids",
|
|
249
|
+
context_retrieval_scores_column_name="document_scores",
|
|
250
|
+
),
|
|
251
|
+
response_column_names="response",
|
|
252
|
+
tag_column_names=[
|
|
253
|
+
"answer_relevancy",
|
|
254
|
+
"context_relevancy",
|
|
255
|
+
"faithfulness",
|
|
256
|
+
"document_similarity_0",
|
|
257
|
+
"document_similarity_1",
|
|
258
|
+
"openai_relevance_0",
|
|
259
|
+
"openai_relevance_1",
|
|
260
|
+
"user_feedback",
|
|
261
|
+
],
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
chatbot_database_schema = Schema(
|
|
265
|
+
prediction_id_column_name="document_id",
|
|
266
|
+
prompt_column_names=EmbeddingColumnNames(
|
|
267
|
+
vector_column_name="text_vector",
|
|
268
|
+
raw_data_column_name="text",
|
|
269
|
+
),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
chatbot_fixture = Fixture(
|
|
273
|
+
name="chatbot",
|
|
274
|
+
description="""
|
|
275
|
+
Investigate RAG performance for a chatbot built on top of Arize's documentation.
|
|
276
|
+
This use-case highlights how embedding visualizations for a RAG application can
|
|
277
|
+
highlight issues with the application's retrieval and performance.
|
|
278
|
+
|
|
279
|
+
The data contains relevance metrics generated by LLM Evals as well as RAGAS.
|
|
280
|
+
""",
|
|
281
|
+
primary_schema=chatbot_queries_schema,
|
|
282
|
+
corpus_schema=chatbot_database_schema,
|
|
283
|
+
prefix="unstructured/llm/chatbot",
|
|
284
|
+
primary_file_name="chatbot_queries_with_ragas.parquet",
|
|
285
|
+
corpus_file_name="chatbot_database_ds.parquet",
|
|
286
|
+
)
|
|
287
|
+
|
|
243
288
|
wide_data_primary_schema = Schema(
|
|
244
289
|
actual_label_column_name="actual_label",
|
|
245
290
|
prediction_label_column_name="predicted_label",
|
|
@@ -363,6 +408,7 @@ FIXTURES: Tuple[Fixture, ...] = (
|
|
|
363
408
|
deep_data_fixture,
|
|
364
409
|
llm_summarization_fixture,
|
|
365
410
|
wikipedia_fixture,
|
|
411
|
+
chatbot_fixture,
|
|
366
412
|
)
|
|
367
413
|
NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
|
|
368
414
|
|
{arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/classify.py
RENAMED
|
@@ -73,7 +73,7 @@ def llm_classify(
|
|
|
73
73
|
include_prompt: bool = False,
|
|
74
74
|
include_response: bool = False,
|
|
75
75
|
run_sync: bool = False,
|
|
76
|
-
concurrency: int =
|
|
76
|
+
concurrency: Optional[int] = None,
|
|
77
77
|
) -> pd.DataFrame:
|
|
78
78
|
"""Classifies each input row of the dataframe using an LLM. Returns a pandas.DataFrame
|
|
79
79
|
where the first column is named `label` and contains the classification labels. An optional
|
|
@@ -116,8 +116,9 @@ def llm_classify(
|
|
|
116
116
|
run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
|
|
117
117
|
evaluations will be run asynchronously if possible.
|
|
118
118
|
|
|
119
|
-
concurrency (int, default=
|
|
120
|
-
possible.
|
|
119
|
+
concurrency (Optional[int], default=None): The number of concurrent evals if async
|
|
120
|
+
submission is possible. If not provided, a recommended default concurrency is set on a
|
|
121
|
+
per-model basis.
|
|
121
122
|
|
|
122
123
|
Returns:
|
|
123
124
|
pandas.DataFrame: A dataframe where the `label` column (at column position 0) contains
|
|
@@ -127,6 +128,7 @@ def llm_classify(
|
|
|
127
128
|
from the entries in the rails argument or "NOT_PARSABLE" if the model's output could
|
|
128
129
|
not be parsed.
|
|
129
130
|
"""
|
|
131
|
+
concurrency = concurrency or model.default_concurrency
|
|
130
132
|
# clients need to be reloaded to ensure that async evals work properly
|
|
131
133
|
model.reload_client()
|
|
132
134
|
|
|
@@ -353,7 +355,7 @@ def run_evals(
|
|
|
353
355
|
provide_explanation: bool = False,
|
|
354
356
|
use_function_calling_if_available: bool = True,
|
|
355
357
|
verbose: bool = False,
|
|
356
|
-
concurrency: int =
|
|
358
|
+
concurrency: Optional[int] = None,
|
|
357
359
|
) -> List[DataFrame]:
|
|
358
360
|
"""
|
|
359
361
|
Applies a list of evaluators to a dataframe. Outputs a list of dataframes in
|
|
@@ -381,13 +383,21 @@ def run_evals(
|
|
|
381
383
|
as model invocation parameters and details about retries and snapping to
|
|
382
384
|
rails.
|
|
383
385
|
|
|
384
|
-
concurrency (int,
|
|
385
|
-
submission is possible.
|
|
386
|
+
concurrency (Optional[int], default=None): The number of concurrent evals if async
|
|
387
|
+
submission is possible. If not provided, a recommended default concurrency is set on a
|
|
388
|
+
per-model basis.
|
|
386
389
|
|
|
387
390
|
Returns:
|
|
388
391
|
List[DataFrame]: A list of dataframes, one for each evaluator, all of
|
|
389
392
|
which have the same number of rows as the input dataframe.
|
|
390
393
|
"""
|
|
394
|
+
# use the minimum default concurrency of all the models
|
|
395
|
+
if concurrency is None:
|
|
396
|
+
if len(evaluators) == 0:
|
|
397
|
+
concurrency = 1
|
|
398
|
+
else:
|
|
399
|
+
concurrency = min(evaluator.default_concurrency for evaluator in evaluators)
|
|
400
|
+
|
|
391
401
|
# clients need to be reloaded to ensure that async evals work properly
|
|
392
402
|
for evaluator in evaluators:
|
|
393
403
|
evaluator.reload_client()
|
{arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/functions/generate.py
RENAMED
|
@@ -31,7 +31,7 @@ def llm_generate(
|
|
|
31
31
|
include_prompt: bool = False,
|
|
32
32
|
include_response: bool = False,
|
|
33
33
|
run_sync: bool = False,
|
|
34
|
-
concurrency: int =
|
|
34
|
+
concurrency: Optional[int] = None,
|
|
35
35
|
) -> pd.DataFrame:
|
|
36
36
|
"""
|
|
37
37
|
Generates a text using a template using an LLM. This function is useful
|
|
@@ -70,14 +70,17 @@ def llm_generate(
|
|
|
70
70
|
run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
|
|
71
71
|
evaluations will be run asynchronously if possible.
|
|
72
72
|
|
|
73
|
-
concurrency (int, default=
|
|
74
|
-
possible.
|
|
73
|
+
concurrency (Optional[int], default=None): The number of concurrent evals if async
|
|
74
|
+
submission is possible. If not provided, a recommended default concurrency is set on a
|
|
75
|
+
per-model basis.
|
|
75
76
|
|
|
76
77
|
Returns:
|
|
77
78
|
generations_dataframe (pandas.DataFrame): A dataframe where each row
|
|
78
79
|
represents the generated output
|
|
79
80
|
|
|
80
81
|
"""
|
|
82
|
+
concurrency = concurrency or model.default_concurrency
|
|
83
|
+
|
|
81
84
|
# clients need to be reloaded to ensure that async evals work properly
|
|
82
85
|
model.reload_client()
|
|
83
86
|
|
{arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/anthropic.py
RENAMED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
from dataclasses import dataclass, field
|
|
3
2
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
4
3
|
|
|
@@ -8,8 +7,6 @@ from phoenix.experimental.evals.models.rate_limiters import RateLimiter
|
|
|
8
7
|
if TYPE_CHECKING:
|
|
9
8
|
from tiktoken import Encoding
|
|
10
9
|
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
10
|
MODEL_TOKEN_LIMIT_MAPPING = {
|
|
14
11
|
"claude-2.1": 200000,
|
|
15
12
|
"claude-2.0": 100000,
|
|
@@ -80,7 +77,6 @@ class AnthropicModel(BaseEvalModel):
|
|
|
80
77
|
try:
|
|
81
78
|
encoding = self._tiktoken.encoding_for_model(self.model)
|
|
82
79
|
except KeyError:
|
|
83
|
-
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
|
84
80
|
encoding = self._tiktoken.get_encoding("cl100k_base")
|
|
85
81
|
self._tiktoken_encoding = encoding
|
|
86
82
|
|
|
@@ -149,6 +145,9 @@ class AnthropicModel(BaseEvalModel):
|
|
|
149
145
|
return _completion_with_retry(**kwargs)
|
|
150
146
|
|
|
151
147
|
async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
|
|
148
|
+
# instruction is an invalid input to Anthropic models, it is passed in by
|
|
149
|
+
# BaseEvalModel.__call__ and needs to be removed
|
|
150
|
+
kwargs.pop("instruction", None)
|
|
152
151
|
invocation_parameters = self.invocation_parameters()
|
|
153
152
|
invocation_parameters.update(kwargs)
|
|
154
153
|
response = await self._async_generate_with_retry(
|
{arize_phoenix-2.5.0 → arize_phoenix-2.7.0}/src/phoenix/experimental/evals/models/bedrock.py
RENAMED
|
@@ -87,7 +87,6 @@ class BedrockModel(BaseEvalModel):
|
|
|
87
87
|
try:
|
|
88
88
|
encoding = self._tiktoken.encoding_for_model(self.model_id)
|
|
89
89
|
except KeyError:
|
|
90
|
-
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
|
91
90
|
encoding = self._tiktoken.get_encoding("cl100k_base")
|
|
92
91
|
self._tiktoken_encoding = encoding
|
|
93
92
|
|
|
@@ -165,7 +164,7 @@ class BedrockModel(BaseEvalModel):
|
|
|
165
164
|
"temperature": self.temperature,
|
|
166
165
|
"topP": self.top_p,
|
|
167
166
|
"maxTokens": self.max_tokens,
|
|
168
|
-
"stopSequences":
|
|
167
|
+
"stopSequences": self.stop_sequences,
|
|
169
168
|
},
|
|
170
169
|
**self.extra_parameters,
|
|
171
170
|
}
|
|
@@ -204,6 +203,9 @@ class BedrockModel(BaseEvalModel):
|
|
|
204
203
|
elif self.model_id.startswith("anthropic"):
|
|
205
204
|
body = json.loads(response.get("body").read().decode())
|
|
206
205
|
return body.get("completion")
|
|
206
|
+
elif self.model_id.startswith("amazon"):
|
|
207
|
+
body = json.loads(response.get("body").read())
|
|
208
|
+
return body.get("results")[0].get("outputText")
|
|
207
209
|
else:
|
|
208
210
|
body = json.loads(response.get("body").read())
|
|
209
211
|
return body.get("results")[0].get("data").get("outputText")
|
|
@@ -31,6 +31,8 @@ MODEL_TOKEN_LIMIT_MAPPING = {
|
|
|
31
31
|
"gpt-4-0613": 8192, # Current gpt-4 default
|
|
32
32
|
"gpt-4-32k-0314": 32768,
|
|
33
33
|
"gpt-4-32k-0613": 32768,
|
|
34
|
+
"gpt-4-1106-preview": 128000,
|
|
35
|
+
"gpt-4-vision-preview": 128000,
|
|
34
36
|
}
|
|
35
37
|
LEGACY_COMPLETION_API_MODELS = ("gpt-3.5-turbo-instruct",)
|
|
36
38
|
logger = logging.getLogger(__name__)
|
|
@@ -21,6 +21,9 @@ MODEL_TOKEN_LIMIT_MAPPING = {
|
|
|
21
21
|
|
|
22
22
|
@dataclass
|
|
23
23
|
class GeminiModel(BaseEvalModel):
|
|
24
|
+
# The vertex SDK runs into connection pool limits at high concurrency
|
|
25
|
+
default_concurrency: int = 5
|
|
26
|
+
|
|
24
27
|
model: str = "gemini-pro"
|
|
25
28
|
"""The model name to use."""
|
|
26
29
|
temperature: float = 0.0
|
|
@@ -50,6 +53,9 @@ class GeminiModel(BaseEvalModel):
|
|
|
50
53
|
max_retries=self.max_retries,
|
|
51
54
|
)
|
|
52
55
|
|
|
56
|
+
def reload_client(self) -> None:
|
|
57
|
+
self._init_client()
|
|
58
|
+
|
|
53
59
|
def _init_client(self) -> None:
|
|
54
60
|
try:
|
|
55
61
|
from google.api_core import exceptions # type:ignore
|
|
@@ -73,13 +73,6 @@ your response.
|
|
|
73
73
|
[END DATA]
|
|
74
74
|
|
|
75
75
|
Is the answer above factual or hallucinated based on the query and reference text?
|
|
76
|
-
|
|
77
|
-
Your response should be a single word: either "factual" or "hallucinated", and
|
|
78
|
-
it should not include any other text or characters. "hallucinated" indicates that the answer
|
|
79
|
-
provides factually inaccurate information to the query based on the reference text. "factual"
|
|
80
|
-
indicates that the answer to the question is correct relative to the reference text, and does not
|
|
81
|
-
contain made up information. Please read the query and reference text carefully before determining
|
|
82
|
-
your response.
|
|
83
76
|
"""
|
|
84
77
|
HALLUCINATION_PROMPT_TEMPLATE_WITH_EXPLANATION = """
|
|
85
78
|
In this task, you will be presented with a query, a reference text and an answer. The answer is
|
|
@@ -6717,7 +6717,7 @@ fragment SpanEvaluationsTable_evals on Span {
|
|
|
6717
6717
|
gap: var(--ac-global-dimension-static-size-200);
|
|
6718
6718
|
`,children:i.map((o,l)=>x("li",{children:_(ft,{padding:"size-200",backgroundColor:"purple-100",borderColor:"purple-700",borderWidth:"thin",borderRadius:"medium",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"embedded text"}),x("pre",{css:ee`
|
|
6719
6719
|
margin: var(--ac-global-dimension-static-size-100) 0;
|
|
6720
|
-
`,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
|
|
6720
|
+
`,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant","unrelated"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
|
|
6721
6721
|
padding: var(--ac-global-dimension-static-size-200);
|
|
6722
6722
|
white-space: normal;
|
|
6723
6723
|
margin: 0;
|
|
@@ -9,6 +9,7 @@ import math
|
|
|
9
9
|
from time import sleep
|
|
10
10
|
from typing import (
|
|
11
11
|
Any,
|
|
12
|
+
Iterator,
|
|
12
13
|
Optional,
|
|
13
14
|
Sequence,
|
|
14
15
|
Tuple,
|
|
@@ -33,24 +34,29 @@ __all__ = [
|
|
|
33
34
|
from phoenix.trace.span_evaluations import Evaluations
|
|
34
35
|
|
|
35
36
|
|
|
36
|
-
def
|
|
37
|
-
|
|
38
|
-
evaluations
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
index_names = evaluations.index.names
|
|
42
|
-
for index, row in evaluations.iterrows():
|
|
37
|
+
def encode_evaluations(evaluations: Evaluations) -> Iterator[pb.Evaluation]:
|
|
38
|
+
dataframe = evaluations.dataframe
|
|
39
|
+
eval_name = evaluations.eval_name
|
|
40
|
+
index_names = dataframe.index.names
|
|
41
|
+
for index, row in dataframe.iterrows():
|
|
43
42
|
subject_id = _extract_subject_id_from_index(
|
|
44
43
|
index_names,
|
|
45
44
|
cast(Union[str, Tuple[Any]], index),
|
|
46
45
|
)
|
|
47
46
|
if (result := _extract_result(row)) is None:
|
|
48
47
|
continue
|
|
49
|
-
|
|
50
|
-
name=
|
|
48
|
+
yield pb.Evaluation(
|
|
49
|
+
name=eval_name,
|
|
51
50
|
result=result,
|
|
52
51
|
subject_id=subject_id,
|
|
53
52
|
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def add_evaluations(
|
|
56
|
+
exporter: HttpExporter,
|
|
57
|
+
evaluations: Evaluations,
|
|
58
|
+
) -> None:
|
|
59
|
+
for evaluation in encode_evaluations(evaluations):
|
|
54
60
|
exporter.export(evaluation)
|
|
55
61
|
|
|
56
62
|
|
|
@@ -130,7 +136,7 @@ def log_evaluations(
|
|
|
130
136
|
return
|
|
131
137
|
exporter = HttpExporter(endpoint=endpoint, host=host, port=port)
|
|
132
138
|
for eval in filter(bool, evals):
|
|
133
|
-
add_evaluations(exporter, eval
|
|
139
|
+
add_evaluations(exporter, eval)
|
|
134
140
|
with tqdm(total=n, desc="Sending Evaluations") as pbar:
|
|
135
141
|
while n:
|
|
136
142
|
sleep(0.1)
|
|
@@ -30,6 +30,7 @@ from phoenix.pointcloud.umap_parameters import get_umap_parameters
|
|
|
30
30
|
from phoenix.server.app import create_app
|
|
31
31
|
from phoenix.server.thread_server import ThreadServer
|
|
32
32
|
from phoenix.services import AppService
|
|
33
|
+
from phoenix.session.evaluation import encode_evaluations
|
|
33
34
|
from phoenix.trace.dsl import SpanFilter
|
|
34
35
|
from phoenix.trace.dsl.query import SpanQuery
|
|
35
36
|
from phoenix.trace.otel import encode
|
|
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
|
|
|
46
47
|
# type workaround
|
|
47
48
|
# https://github.com/python/mypy/issues/5264#issuecomment-399407428
|
|
48
49
|
if TYPE_CHECKING:
|
|
50
|
+
from phoenix.trace import Evaluations
|
|
51
|
+
|
|
49
52
|
_BaseList = UserList[pd.DataFrame]
|
|
50
53
|
else:
|
|
51
54
|
_BaseList = UserList
|
|
@@ -123,6 +126,10 @@ class Session(ABC):
|
|
|
123
126
|
self.traces.put(encode(span))
|
|
124
127
|
|
|
125
128
|
self.evals: Evals = Evals()
|
|
129
|
+
if trace_dataset:
|
|
130
|
+
for evaluations in trace_dataset.evaluations:
|
|
131
|
+
for pb_evaluation in encode_evaluations(evaluations):
|
|
132
|
+
self.evals.put(pb_evaluation)
|
|
126
133
|
|
|
127
134
|
self.host = host or get_env_host()
|
|
128
135
|
self.port = port or get_env_port()
|
|
@@ -213,6 +220,15 @@ class Session(ABC):
|
|
|
213
220
|
return None
|
|
214
221
|
return pd.json_normalize(data, max_level=1).set_index("context.span_id", drop=False)
|
|
215
222
|
|
|
223
|
+
def get_evaluations(self) -> List["Evaluations"]:
|
|
224
|
+
return self.evals.export_evaluations()
|
|
225
|
+
|
|
226
|
+
def get_trace_dataset(self) -> Optional[TraceDataset]:
|
|
227
|
+
if (dataframe := self.get_spans_dataframe()) is None:
|
|
228
|
+
return None
|
|
229
|
+
evaluations = self.get_evaluations()
|
|
230
|
+
return TraceDataset(dataframe=dataframe, evaluations=evaluations)
|
|
231
|
+
|
|
216
232
|
|
|
217
233
|
_session: Optional[Session] = None
|
|
218
234
|
|
|
@@ -479,6 +495,9 @@ def _get_url(host: str, port: int, notebook_env: NotebookEnvironment) -> str:
|
|
|
479
495
|
if notebook_env == NotebookEnvironment.DATABRICKS:
|
|
480
496
|
context = _get_databricks_context()
|
|
481
497
|
return f"{_get_databricks_notebook_base_url(context)}/{port}/"
|
|
498
|
+
if host == "0.0.0.0" or host == "127.0.0.1":
|
|
499
|
+
# The app is running locally, so use localhost
|
|
500
|
+
return f"http://localhost:{port}/"
|
|
482
501
|
return f"http://{host}:{port}/"
|
|
483
502
|
|
|
484
503
|
|