arize-phoenix 3.16.1__py3-none-any.whl → 7.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.1.dist-info/METADATA +261 -0
- arize_phoenix-7.7.1.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.1.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -241
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +4 -112
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.1.dist-info/METADATA +0 -495
- arize_phoenix-3.16.1.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -619
- phoenix/core/traces.py +0 -96
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
phoenix/trace/exporter.py
CHANGED
|
@@ -7,16 +7,19 @@ from types import MethodType
|
|
|
7
7
|
from typing import Any, Optional
|
|
8
8
|
from urllib.parse import urljoin
|
|
9
9
|
|
|
10
|
-
import
|
|
10
|
+
import httpx
|
|
11
11
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
12
|
-
from requests import Session
|
|
13
12
|
from typing_extensions import TypeAlias, assert_never
|
|
14
13
|
|
|
15
14
|
import phoenix.trace.v1 as pb
|
|
16
|
-
from phoenix.config import
|
|
15
|
+
from phoenix.config import (
|
|
16
|
+
get_env_client_headers,
|
|
17
|
+
get_env_collector_endpoint,
|
|
18
|
+
get_env_host,
|
|
19
|
+
get_env_port,
|
|
20
|
+
)
|
|
17
21
|
|
|
18
22
|
logger = logging.getLogger(__name__)
|
|
19
|
-
logger.addHandler(logging.NullHandler())
|
|
20
23
|
|
|
21
24
|
END_OF_QUEUE = None # sentinel value for queue termination
|
|
22
25
|
|
|
@@ -33,11 +36,11 @@ class _OpenInferenceExporter(OTLPSpanExporter):
|
|
|
33
36
|
host = get_env_host()
|
|
34
37
|
if host == "0.0.0.0":
|
|
35
38
|
host = "127.0.0.1"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
base_url = get_env_collector_endpoint() or f"http://{host}:{get_env_port()}"
|
|
40
|
+
base_url = base_url if base_url.endswith("/") else base_url + "/"
|
|
41
|
+
_warn_if_phoenix_is_not_running(base_url)
|
|
42
|
+
|
|
43
|
+
endpoint = urljoin(base_url, "v1/traces")
|
|
41
44
|
super().__init__(endpoint)
|
|
42
45
|
|
|
43
46
|
|
|
@@ -68,15 +71,17 @@ class HttpExporter:
|
|
|
68
71
|
"""
|
|
69
72
|
self._host = host or get_env_host()
|
|
70
73
|
self._port = port or get_env_port()
|
|
71
|
-
|
|
74
|
+
base_url = (
|
|
72
75
|
endpoint
|
|
73
76
|
or get_env_collector_endpoint()
|
|
74
77
|
or f"http://{'127.0.0.1' if self._host == '0.0.0.0' else self._host}:{self._port}"
|
|
75
78
|
)
|
|
79
|
+
self._base_url = base_url if base_url.endswith("/") else base_url + "/"
|
|
76
80
|
_warn_if_phoenix_is_not_running(self._base_url)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
self.
|
|
81
|
+
headers = get_env_client_headers()
|
|
82
|
+
self._client = httpx.Client(headers=headers)
|
|
83
|
+
weakref.finalize(self, self._client.close)
|
|
84
|
+
self._client.headers.update(
|
|
80
85
|
{
|
|
81
86
|
"content-type": "application/x-protobuf",
|
|
82
87
|
"content-encoding": "gzip",
|
|
@@ -109,24 +114,24 @@ class HttpExporter:
|
|
|
109
114
|
|
|
110
115
|
def _send(self, message: Message) -> None:
|
|
111
116
|
serialized = message.SerializeToString()
|
|
112
|
-
|
|
117
|
+
content = gzip.compress(serialized)
|
|
113
118
|
try:
|
|
114
|
-
self.
|
|
119
|
+
self._client.post(self._url(message), content=content).raise_for_status()
|
|
115
120
|
except Exception as e:
|
|
116
121
|
logger.exception(e)
|
|
117
122
|
|
|
118
123
|
def _url(self, message: Message) -> str:
|
|
119
124
|
if isinstance(message, pb.Evaluation):
|
|
120
|
-
return urljoin(self._base_url, "
|
|
125
|
+
return urljoin(self._base_url, "v1/evaluations")
|
|
121
126
|
logger.exception(f"unrecognized message type: {type(message)}")
|
|
122
127
|
assert_never(message)
|
|
123
128
|
|
|
124
129
|
|
|
125
|
-
def _warn_if_phoenix_is_not_running(
|
|
130
|
+
def _warn_if_phoenix_is_not_running(base_url: str) -> None:
|
|
126
131
|
try:
|
|
127
|
-
|
|
132
|
+
httpx.get(urljoin(base_url, "arize_phoenix_version")).raise_for_status()
|
|
128
133
|
except Exception:
|
|
129
134
|
logger.warning(
|
|
130
|
-
f"Arize Phoenix is not running on {
|
|
135
|
+
f"Arize Phoenix is not running on {base_url}. Launch Phoenix "
|
|
131
136
|
f"with `import phoenix as px; px.launch_app()`"
|
|
132
137
|
)
|
phoenix/trace/fixtures.py
CHANGED
|
@@ -1,13 +1,37 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from
|
|
1
|
+
import logging
|
|
2
|
+
import shutil
|
|
3
|
+
from binascii import hexlify
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
6
|
+
from dataclasses import dataclass, field, replace
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from io import StringIO
|
|
9
|
+
from random import getrandbits
|
|
10
|
+
from tempfile import NamedTemporaryFile
|
|
11
|
+
from time import sleep, time
|
|
12
|
+
from typing import (
|
|
13
|
+
NamedTuple,
|
|
14
|
+
Optional,
|
|
15
|
+
cast,
|
|
16
|
+
)
|
|
17
|
+
from urllib.parse import urljoin
|
|
4
18
|
|
|
19
|
+
import httpx
|
|
5
20
|
import pandas as pd
|
|
6
21
|
from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
22
|
+
from httpx import ConnectError, HTTPStatusError
|
|
7
23
|
|
|
8
24
|
import phoenix.trace.v1 as pb
|
|
25
|
+
from phoenix.session.client import Client
|
|
26
|
+
from phoenix.trace.schemas import Span
|
|
9
27
|
from phoenix.trace.trace_dataset import TraceDataset
|
|
10
|
-
from phoenix.trace.utils import
|
|
28
|
+
from phoenix.trace.utils import (
|
|
29
|
+
download_json_traces_fixture,
|
|
30
|
+
json_lines_to_df,
|
|
31
|
+
parse_file_extension,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
11
35
|
|
|
12
36
|
|
|
13
37
|
class EvaluationResultSchema(NamedTuple):
|
|
@@ -28,18 +52,130 @@ class DocumentEvaluationFixture(EvaluationFixture):
|
|
|
28
52
|
document_position: str = "document_position"
|
|
29
53
|
|
|
30
54
|
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class DatasetFixture:
|
|
57
|
+
file_name: str
|
|
58
|
+
name: str
|
|
59
|
+
input_keys: Sequence[str]
|
|
60
|
+
output_keys: Sequence[str]
|
|
61
|
+
metadata_keys: Sequence[str] = ()
|
|
62
|
+
description: Optional[str] = field(default=None)
|
|
63
|
+
_df: Optional[pd.DataFrame] = field(default=None, init=False, repr=False)
|
|
64
|
+
_csv: Optional[str] = field(default=None, init=False, repr=False)
|
|
65
|
+
|
|
66
|
+
def load(self) -> "DatasetFixture":
|
|
67
|
+
if self._df is None:
|
|
68
|
+
url = _url(self.file_name)
|
|
69
|
+
|
|
70
|
+
if parse_file_extension(self.file_name) == ".jsonl":
|
|
71
|
+
df = json_lines_to_df(download_json_traces_fixture(url))
|
|
72
|
+
elif parse_file_extension(self.file_name) == ".csv":
|
|
73
|
+
df = pd.read_csv(_url(self.file_name))
|
|
74
|
+
else:
|
|
75
|
+
try:
|
|
76
|
+
df = pd.read_parquet(url)
|
|
77
|
+
except Exception:
|
|
78
|
+
logger.warning(
|
|
79
|
+
f"Failed to download example traces from {url=} "
|
|
80
|
+
"due to exception {e=}. "
|
|
81
|
+
"Returning empty dataframe for DatasetFixture"
|
|
82
|
+
)
|
|
83
|
+
df = pd.DataFrame()
|
|
84
|
+
|
|
85
|
+
object.__setattr__(self, "_df", df)
|
|
86
|
+
return self
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def dataframe(self) -> pd.DataFrame:
|
|
90
|
+
self.load()
|
|
91
|
+
return cast(pd.DataFrame, self._df).copy(deep=False)
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def csv(self) -> StringIO:
|
|
95
|
+
if self._csv is None:
|
|
96
|
+
with StringIO() as buffer:
|
|
97
|
+
self.dataframe.to_csv(buffer, index=False)
|
|
98
|
+
object.__setattr__(self, "_csv", buffer.getvalue())
|
|
99
|
+
return StringIO(self._csv)
|
|
100
|
+
|
|
101
|
+
|
|
31
102
|
@dataclass(frozen=True)
|
|
32
103
|
class TracesFixture:
|
|
33
104
|
name: str
|
|
34
105
|
description: str
|
|
35
106
|
file_name: str
|
|
36
107
|
evaluation_fixtures: Iterable[EvaluationFixture] = ()
|
|
108
|
+
dataset_fixtures: Iterable[DatasetFixture] = ()
|
|
109
|
+
project_name: Optional[str] = None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
demo_llama_index_rag_fixture = TracesFixture(
|
|
113
|
+
name="demo_llama_index_rag",
|
|
114
|
+
project_name="demo_llama_index",
|
|
115
|
+
description="Traces and evaluations of a RAG chatbot using LlamaIndex.",
|
|
116
|
+
file_name="demo_llama_index_rag_traces.parquet",
|
|
117
|
+
evaluation_fixtures=(
|
|
118
|
+
EvaluationFixture(
|
|
119
|
+
evaluation_name="Q&A Correctness",
|
|
120
|
+
file_name="demo_llama_index_rag_qa_correctness_eval.parquet",
|
|
121
|
+
),
|
|
122
|
+
EvaluationFixture(
|
|
123
|
+
evaluation_name="Hallucination",
|
|
124
|
+
file_name="demo_llama_index_rag_hallucination_eval.parquet",
|
|
125
|
+
),
|
|
126
|
+
DocumentEvaluationFixture(
|
|
127
|
+
evaluation_name="Relevance",
|
|
128
|
+
file_name="demo_llama_index_rag_doc_relevance_eval.parquet",
|
|
129
|
+
),
|
|
130
|
+
),
|
|
131
|
+
dataset_fixtures=(
|
|
132
|
+
DatasetFixture(
|
|
133
|
+
file_name="demo_llama_index_finetune_dataset.jsonl",
|
|
134
|
+
input_keys=("messages",),
|
|
135
|
+
output_keys=("messages",),
|
|
136
|
+
name="Demo LlamaIndex: RAG Q&A",
|
|
137
|
+
description="OpenAI GPT-3.5 LLM dataset for LlamaIndex demo",
|
|
138
|
+
),
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
demo_code_based_agent_fixture = TracesFixture(
|
|
143
|
+
name="demo_code_based_agent",
|
|
144
|
+
project_name="demo_agents",
|
|
145
|
+
description="LangGraph, LlamaIndex, and Code-based agent traces",
|
|
146
|
+
file_name="agent-demo-traces.parquet",
|
|
147
|
+
)
|
|
148
|
+
demo_langgraph_agent_fixture = TracesFixture(
|
|
149
|
+
name="demo_langgraph_agent",
|
|
150
|
+
project_name="demo_agents",
|
|
151
|
+
description="LangGraph, LlamaIndex, and Code-based agent traces",
|
|
152
|
+
file_name="langgraph-demo-traces-format-updated.parquet",
|
|
153
|
+
)
|
|
154
|
+
demo_llamaindex_workflows_agent_fixture = TracesFixture(
|
|
155
|
+
name="demo_llamaindex_workflows_agent",
|
|
156
|
+
project_name="demo_agents",
|
|
157
|
+
description="LangGraph, LlamaIndex, and Code-based agent traces",
|
|
158
|
+
file_name="llamaindex-workflow-demo-traces.parquet",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
demo_o1_preview_timeseries_testing_fixture = TracesFixture(
|
|
162
|
+
name="demo_o1_preview_timeseries_evals",
|
|
163
|
+
project_name="demo_o1_preview_timeseries",
|
|
164
|
+
description="Shows the traces for a timeseries evaluation of o1-preview",
|
|
165
|
+
file_name="o1-traces-preview-testing.parquet",
|
|
166
|
+
)
|
|
37
167
|
|
|
168
|
+
demo_llama_index_rag_llm_fixture = TracesFixture(
|
|
169
|
+
name="demo_llama_index_rag_llm",
|
|
170
|
+
project_name="demo_llama_index_rag_llm",
|
|
171
|
+
description="LLM traces for RAG chatbot using LlamaIndex.",
|
|
172
|
+
file_name="demo_llama_index_llm_all_spans.parquet",
|
|
173
|
+
)
|
|
38
174
|
|
|
39
175
|
llama_index_rag_fixture = TracesFixture(
|
|
40
176
|
name="llama_index_rag",
|
|
41
177
|
description="Traces from running the llama_index on a RAG use case.",
|
|
42
|
-
file_name="llama_index_rag_v8.
|
|
178
|
+
file_name="llama_index_rag_v8.parquet",
|
|
43
179
|
evaluation_fixtures=(
|
|
44
180
|
EvaluationFixture(
|
|
45
181
|
evaluation_name="Q&A Correctness",
|
|
@@ -54,45 +190,104 @@ llama_index_rag_fixture = TracesFixture(
|
|
|
54
190
|
file_name="llama_index_rag_v8.retrieved_documents_eval.parquet",
|
|
55
191
|
),
|
|
56
192
|
),
|
|
193
|
+
dataset_fixtures=(
|
|
194
|
+
DatasetFixture(
|
|
195
|
+
file_name="hybridial_samples.csv.gz",
|
|
196
|
+
input_keys=("messages", "ctxs"),
|
|
197
|
+
output_keys=("answers",),
|
|
198
|
+
name="ChatRAG-Bench: Hybrid Dialogue (samples)",
|
|
199
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/hybridial",
|
|
200
|
+
),
|
|
201
|
+
DatasetFixture(
|
|
202
|
+
file_name="sqa_samples.csv.gz",
|
|
203
|
+
input_keys=("messages", "ctxs"),
|
|
204
|
+
output_keys=("answers",),
|
|
205
|
+
name="ChatRAG-Bench: SQA (samples)",
|
|
206
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/sqa",
|
|
207
|
+
),
|
|
208
|
+
DatasetFixture(
|
|
209
|
+
file_name="doqa_cooking_samples.csv.gz",
|
|
210
|
+
input_keys=("messages", "ctxs"),
|
|
211
|
+
output_keys=("answers",),
|
|
212
|
+
name="ChatRAG-Bench: DoQA Cooking (samples)",
|
|
213
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/doqa_cooking",
|
|
214
|
+
),
|
|
215
|
+
DatasetFixture(
|
|
216
|
+
file_name="synthetic_convqa_samples.csv.gz",
|
|
217
|
+
input_keys=("messages", "document"),
|
|
218
|
+
output_keys=("answers",),
|
|
219
|
+
name="ChatQA-Train: Synthetic ConvQA (samples)",
|
|
220
|
+
description="https://huggingface.co/datasets/nvidia/ChatQA-Training-Data/viewer/synthetic_convqa",
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
project_sessions_llama_index_rag_arize_docs_fixture = TracesFixture(
|
|
226
|
+
name="project_sessions_llama_index_rag_arize_docs",
|
|
227
|
+
project_name="SESSIONS-DEMO",
|
|
228
|
+
file_name="project_sessions_demo_llama_index_query_engine_arize_docs.parquet",
|
|
229
|
+
description="RAG queries grouped by session.id and user.id.",
|
|
57
230
|
)
|
|
58
231
|
|
|
59
232
|
llama_index_calculator_agent_fixture = TracesFixture(
|
|
60
233
|
name="llama_index_calculator_agent",
|
|
61
234
|
description="Traces from running the llama_index with calculator tools.",
|
|
62
|
-
file_name="llama_index_calculator_agent_v3.
|
|
235
|
+
file_name="llama_index_calculator_agent_v3.parquet",
|
|
63
236
|
)
|
|
64
237
|
|
|
65
238
|
llama_index_rag_fixture_with_davinci = TracesFixture(
|
|
66
239
|
name="llama_index_rag_with_davinci",
|
|
67
240
|
description="Traces from running llama_index on a RAG use case with the completions API.",
|
|
68
|
-
file_name="llama_index_rag_with_davinci_v0.
|
|
241
|
+
file_name="llama_index_rag_with_davinci_v0.parquet",
|
|
69
242
|
)
|
|
70
243
|
|
|
71
244
|
langchain_rag_stuff_document_chain_fixture = TracesFixture(
|
|
72
245
|
name="langchain_rag_stuff_document_chain",
|
|
246
|
+
project_name="demo_langchain_rag",
|
|
73
247
|
description="LangChain RAG data",
|
|
74
|
-
file_name="langchain_rag.
|
|
248
|
+
file_name="langchain_rag.parquet",
|
|
75
249
|
)
|
|
76
250
|
|
|
77
251
|
langchain_titanic_csv_agent_evaluator_fixture = TracesFixture(
|
|
78
252
|
name="lc_titanic",
|
|
79
253
|
description="LangChain titanic.csv Agent Evaluator",
|
|
80
|
-
file_name="lc_titanic.
|
|
254
|
+
file_name="lc_titanic.parquet",
|
|
81
255
|
)
|
|
82
256
|
|
|
83
257
|
langchain_qa_with_sources_fixture = TracesFixture(
|
|
84
258
|
name="langchain_qa_with_sources",
|
|
85
259
|
description="LangChain QA with sources on financial data",
|
|
86
|
-
file_name="langchain_qa_with_sources_chain.
|
|
260
|
+
file_name="langchain_qa_with_sources_chain.parquet",
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
vision_fixture = TracesFixture(
|
|
264
|
+
name="vision",
|
|
265
|
+
project_name="demo_multimodal",
|
|
266
|
+
description="Vision LLM Requests",
|
|
267
|
+
file_name="vision_fixture_trace_datasets.parquet",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
anthropic_tools_fixture = TracesFixture(
|
|
271
|
+
name="anthropic_tools",
|
|
272
|
+
project_name="anthropic_tools",
|
|
273
|
+
description="Anthropic tools traces",
|
|
274
|
+
file_name="anthropic_tools.parquet",
|
|
87
275
|
)
|
|
88
276
|
|
|
89
277
|
random_fixture = TracesFixture(
|
|
90
278
|
name="random",
|
|
279
|
+
project_name="demo_random",
|
|
91
280
|
description="Randomly generated traces",
|
|
92
281
|
file_name="random.jsonl",
|
|
93
282
|
)
|
|
94
283
|
|
|
95
|
-
TRACES_FIXTURES:
|
|
284
|
+
TRACES_FIXTURES: list[TracesFixture] = [
|
|
285
|
+
demo_llama_index_rag_fixture,
|
|
286
|
+
demo_llama_index_rag_llm_fixture,
|
|
287
|
+
demo_langgraph_agent_fixture,
|
|
288
|
+
demo_code_based_agent_fixture,
|
|
289
|
+
demo_llamaindex_workflows_agent_fixture,
|
|
290
|
+
demo_o1_preview_timeseries_testing_fixture,
|
|
96
291
|
llama_index_rag_fixture,
|
|
97
292
|
llama_index_rag_fixture_with_davinci,
|
|
98
293
|
langchain_rag_stuff_document_chain_fixture,
|
|
@@ -100,14 +295,23 @@ TRACES_FIXTURES: List[TracesFixture] = [
|
|
|
100
295
|
random_fixture,
|
|
101
296
|
langchain_qa_with_sources_fixture,
|
|
102
297
|
llama_index_calculator_agent_fixture,
|
|
298
|
+
vision_fixture,
|
|
299
|
+
anthropic_tools_fixture,
|
|
300
|
+
project_sessions_llama_index_rag_arize_docs_fixture,
|
|
103
301
|
]
|
|
104
302
|
|
|
105
|
-
NAME_TO_TRACES_FIXTURE
|
|
303
|
+
NAME_TO_TRACES_FIXTURE: dict[str, TracesFixture] = {
|
|
304
|
+
fixture.name: fixture for fixture in TRACES_FIXTURES
|
|
305
|
+
}
|
|
306
|
+
PROJ_NAME_TO_TRACES_FIXTURE: defaultdict[str, list[TracesFixture]] = defaultdict(list)
|
|
307
|
+
for fixture in TRACES_FIXTURES:
|
|
308
|
+
if fixture.project_name:
|
|
309
|
+
PROJ_NAME_TO_TRACES_FIXTURE[fixture.project_name].append(fixture)
|
|
106
310
|
|
|
107
311
|
|
|
108
|
-
def
|
|
312
|
+
def get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
|
|
109
313
|
"""
|
|
110
|
-
Returns the fixture whose name matches the input name.
|
|
314
|
+
Returns the trace fixture whose name matches the input name.
|
|
111
315
|
|
|
112
316
|
Raises
|
|
113
317
|
------
|
|
@@ -120,31 +324,105 @@ def _get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
|
|
|
120
324
|
return NAME_TO_TRACES_FIXTURE[fixture_name]
|
|
121
325
|
|
|
122
326
|
|
|
123
|
-
def
|
|
124
|
-
fixture: TracesFixture,
|
|
125
|
-
host: Optional[str] = "https://storage.googleapis.com/",
|
|
126
|
-
bucket: Optional[str] = "arize-assets",
|
|
127
|
-
prefix: Optional[str] = "phoenix/traces/",
|
|
128
|
-
) -> List[str]:
|
|
327
|
+
def get_trace_fixtures_by_project_name(proj_name: str) -> list[TracesFixture]:
|
|
129
328
|
"""
|
|
130
|
-
|
|
329
|
+
Returns a dictionary of project name (key) and set of TracesFixtures (value)
|
|
330
|
+
whose project name matches the input name.
|
|
331
|
+
|
|
332
|
+
Raises
|
|
333
|
+
------
|
|
334
|
+
ValueError
|
|
335
|
+
if the input fixture name does not match any known project names.
|
|
131
336
|
"""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
337
|
+
if proj_name not in PROJ_NAME_TO_TRACES_FIXTURE:
|
|
338
|
+
valid_fixture_proj_names = ", ".join(PROJ_NAME_TO_TRACES_FIXTURE.keys())
|
|
339
|
+
raise ValueError(
|
|
340
|
+
f'"{proj_name}" is invalid. Valid project names are: {valid_fixture_proj_names}'
|
|
341
|
+
)
|
|
342
|
+
return PROJ_NAME_TO_TRACES_FIXTURE[proj_name]
|
|
135
343
|
|
|
136
344
|
|
|
137
|
-
def load_example_traces(
|
|
345
|
+
def load_example_traces(fixture_name: str) -> TraceDataset:
|
|
138
346
|
"""
|
|
139
347
|
Loads a trace dataframe by name.
|
|
140
348
|
"""
|
|
141
|
-
fixture =
|
|
142
|
-
|
|
349
|
+
fixture = get_trace_fixture_by_name(fixture_name)
|
|
350
|
+
url = _url(fixture.file_name)
|
|
351
|
+
|
|
352
|
+
if parse_file_extension(fixture.file_name) == ".jsonl":
|
|
353
|
+
return TraceDataset(json_lines_to_df(download_json_traces_fixture(url)))
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
df = pd.read_parquet(url)
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.warning(
|
|
359
|
+
f"Failed to download example traces from {url=} due to exception {e=}. "
|
|
360
|
+
"Returning empty TraceDataset"
|
|
361
|
+
)
|
|
362
|
+
df = pd.DataFrame()
|
|
363
|
+
|
|
364
|
+
return TraceDataset(df)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def get_dataset_fixtures(fixture_name: str) -> Iterable[DatasetFixture]:
|
|
368
|
+
return (fixture.load() for fixture in get_trace_fixture_by_name(fixture_name).dataset_fixtures)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def send_dataset_fixtures(
|
|
372
|
+
endpoint: str,
|
|
373
|
+
fixtures: Iterable[DatasetFixture],
|
|
374
|
+
) -> None:
|
|
375
|
+
expiration = time() + 5
|
|
376
|
+
while time() < expiration:
|
|
377
|
+
try:
|
|
378
|
+
url = urljoin(endpoint, "/healthz")
|
|
379
|
+
httpx.get(url=url).raise_for_status()
|
|
380
|
+
except ConnectError:
|
|
381
|
+
sleep(0.1)
|
|
382
|
+
continue
|
|
383
|
+
except Exception as e:
|
|
384
|
+
print(str(e))
|
|
385
|
+
raise
|
|
386
|
+
break
|
|
387
|
+
client = Client(endpoint=endpoint)
|
|
388
|
+
for i, fixture in enumerate(fixtures):
|
|
389
|
+
try:
|
|
390
|
+
if i % 2:
|
|
391
|
+
client.upload_dataset(
|
|
392
|
+
dataset_name=fixture.name,
|
|
393
|
+
dataframe=fixture.dataframe,
|
|
394
|
+
input_keys=fixture.input_keys,
|
|
395
|
+
output_keys=fixture.output_keys,
|
|
396
|
+
metadata_keys=fixture.metadata_keys,
|
|
397
|
+
dataset_description=fixture.description,
|
|
398
|
+
)
|
|
399
|
+
else:
|
|
400
|
+
with NamedTemporaryFile() as tf:
|
|
401
|
+
with open(tf.name, "w") as f:
|
|
402
|
+
shutil.copyfileobj(fixture.csv, f)
|
|
403
|
+
f.flush()
|
|
404
|
+
client.upload_dataset(
|
|
405
|
+
dataset_name=fixture.name,
|
|
406
|
+
csv_file_path=tf.name,
|
|
407
|
+
input_keys=fixture.input_keys,
|
|
408
|
+
output_keys=fixture.output_keys,
|
|
409
|
+
metadata_keys=fixture.metadata_keys,
|
|
410
|
+
dataset_description=fixture.description,
|
|
411
|
+
)
|
|
412
|
+
except HTTPStatusError as e:
|
|
413
|
+
print(e.response.content.decode())
|
|
414
|
+
pass
|
|
415
|
+
else:
|
|
416
|
+
name, df = fixture.name, fixture.dataframe
|
|
417
|
+
print(f"Dataset sent: {name=}, {len(df)=}")
|
|
143
418
|
|
|
144
419
|
|
|
145
|
-
def get_evals_from_fixture(
|
|
146
|
-
fixture =
|
|
420
|
+
def get_evals_from_fixture(fixture_name: str) -> Iterator[pb.Evaluation]:
|
|
421
|
+
fixture = get_trace_fixture_by_name(fixture_name)
|
|
147
422
|
for eval_fixture in fixture.evaluation_fixtures:
|
|
423
|
+
logger.info(
|
|
424
|
+
f"Loading eval fixture '{eval_fixture.evaluation_name}' from '{eval_fixture.file_name}'"
|
|
425
|
+
)
|
|
148
426
|
yield from _read_eval_fixture(eval_fixture)
|
|
149
427
|
|
|
150
428
|
|
|
@@ -161,7 +439,7 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
|
|
|
161
439
|
explanation=StringValue(value=cast(str, explanation)) if explanation else None,
|
|
162
440
|
)
|
|
163
441
|
if isinstance(eval_fixture, DocumentEvaluationFixture):
|
|
164
|
-
span_id, document_position = cast(
|
|
442
|
+
span_id, document_position = cast(tuple[str, int], index)
|
|
165
443
|
# Legacy fixture files contain UUID strings for span_ids. The hyphens in these
|
|
166
444
|
# strings need to be removed because we are also removing the hyphens from the
|
|
167
445
|
# span_ids of their corresponding traces. In general, hyphen is not an allowed
|
|
@@ -191,7 +469,65 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
|
|
|
191
469
|
def _url(
|
|
192
470
|
file_name: str,
|
|
193
471
|
host: Optional[str] = "https://storage.googleapis.com/",
|
|
194
|
-
bucket: Optional[str] = "arize-assets",
|
|
195
|
-
prefix: Optional[str] = "
|
|
472
|
+
bucket: Optional[str] = "arize-phoenix-assets",
|
|
473
|
+
prefix: Optional[str] = "traces/",
|
|
196
474
|
) -> str:
|
|
197
475
|
return f"{host}{bucket}/{prefix}{file_name}"
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def reset_fixture_span_ids_and_timestamps(
|
|
479
|
+
spans: Iterable[Span],
|
|
480
|
+
evals: Iterable[pb.Evaluation] = (),
|
|
481
|
+
) -> tuple[list[Span], list[pb.Evaluation]]:
|
|
482
|
+
old_spans, old_evals = list(spans), list(evals)
|
|
483
|
+
new_trace_ids: dict[str, str] = {}
|
|
484
|
+
new_span_ids: dict[str, str] = {}
|
|
485
|
+
for old_span in old_spans:
|
|
486
|
+
new_trace_ids[old_span.context.trace_id] = _new_trace_id()
|
|
487
|
+
new_span_ids[old_span.context.span_id] = _new_span_id()
|
|
488
|
+
if old_span.parent_id:
|
|
489
|
+
new_span_ids[old_span.parent_id] = _new_span_id()
|
|
490
|
+
for old_eval in old_evals:
|
|
491
|
+
subject_id = old_eval.subject_id
|
|
492
|
+
if trace_id := subject_id.trace_id:
|
|
493
|
+
new_trace_ids[trace_id] = _new_trace_id()
|
|
494
|
+
elif span_id := subject_id.span_id:
|
|
495
|
+
new_span_ids[span_id] = _new_span_id()
|
|
496
|
+
elif span_id := subject_id.document_retrieval_id.span_id:
|
|
497
|
+
new_span_ids[span_id] = _new_span_id()
|
|
498
|
+
max_end_time = max(old_span.end_time for old_span in old_spans)
|
|
499
|
+
time_diff = datetime.now(timezone.utc) - max_end_time
|
|
500
|
+
new_spans: list[Span] = []
|
|
501
|
+
new_evals: list[pb.Evaluation] = []
|
|
502
|
+
for old_span in old_spans:
|
|
503
|
+
new_trace_id = new_trace_ids[old_span.context.trace_id]
|
|
504
|
+
new_span_id = new_span_ids[old_span.context.span_id]
|
|
505
|
+
new_parent_id = new_span_ids[old_span.parent_id] if old_span.parent_id else None
|
|
506
|
+
new_span = replace(
|
|
507
|
+
old_span,
|
|
508
|
+
context=replace(old_span.context, trace_id=new_trace_id, span_id=new_span_id),
|
|
509
|
+
parent_id=new_parent_id,
|
|
510
|
+
start_time=old_span.start_time + time_diff,
|
|
511
|
+
end_time=old_span.end_time + time_diff,
|
|
512
|
+
)
|
|
513
|
+
new_spans.append(new_span)
|
|
514
|
+
for old_eval in old_evals:
|
|
515
|
+
new_eval = pb.Evaluation()
|
|
516
|
+
new_eval.CopyFrom(old_eval)
|
|
517
|
+
subject_id = new_eval.subject_id
|
|
518
|
+
if trace_id := subject_id.trace_id:
|
|
519
|
+
subject_id.trace_id = new_trace_ids[trace_id]
|
|
520
|
+
elif span_id := subject_id.span_id:
|
|
521
|
+
subject_id.span_id = new_span_ids[span_id]
|
|
522
|
+
elif span_id := subject_id.document_retrieval_id.span_id:
|
|
523
|
+
subject_id.document_retrieval_id.span_id = new_span_ids[span_id]
|
|
524
|
+
new_evals.append(new_eval)
|
|
525
|
+
return new_spans, new_evals
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _new_trace_id() -> str:
|
|
529
|
+
return hexlify(getrandbits(128).to_bytes(16, "big")).decode()
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _new_span_id() -> str:
|
|
533
|
+
return hexlify(getrandbits(64).to_bytes(8, "big")).decode()
|