arize-phoenix 3.16.0__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.0.dist-info/METADATA +261 -0
- arize_phoenix-7.7.0.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -247
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +13 -107
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.0.dist-info/METADATA +0 -495
- arize_phoenix-3.16.0.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -617
- phoenix/core/traces.py +0 -100
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Awaitable, Iterable, Iterator, Mapping
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from itertools import chain
|
|
7
|
+
from typing import Any, Optional, Union, cast
|
|
8
|
+
|
|
9
|
+
from sqlalchemy import insert, select
|
|
10
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
11
|
+
from typing_extensions import TypeAlias
|
|
12
|
+
|
|
13
|
+
from phoenix.db import models
|
|
14
|
+
from phoenix.db.insertion.helpers import DataManipulationEvent
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
DatasetId: TypeAlias = int
|
|
19
|
+
DatasetVersionId: TypeAlias = int
|
|
20
|
+
DatasetExampleId: TypeAlias = int
|
|
21
|
+
DatasetExampleRevisionId: TypeAlias = int
|
|
22
|
+
SpanRowId: TypeAlias = int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class ExampleContent:
|
|
27
|
+
input: dict[str, Any] = field(default_factory=dict)
|
|
28
|
+
output: dict[str, Any] = field(default_factory=dict)
|
|
29
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
Examples: TypeAlias = Iterable[ExampleContent]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class DatasetExampleAdditionEvent(DataManipulationEvent):
|
|
37
|
+
dataset_id: DatasetId
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def insert_dataset(
|
|
41
|
+
session: AsyncSession,
|
|
42
|
+
name: str,
|
|
43
|
+
description: Optional[str] = None,
|
|
44
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
45
|
+
created_at: Optional[datetime] = None,
|
|
46
|
+
) -> DatasetId:
|
|
47
|
+
id_ = await session.scalar(
|
|
48
|
+
insert(models.Dataset)
|
|
49
|
+
.values(
|
|
50
|
+
name=name,
|
|
51
|
+
description=description,
|
|
52
|
+
metadata_=metadata,
|
|
53
|
+
created_at=created_at,
|
|
54
|
+
)
|
|
55
|
+
.returning(models.Dataset.id)
|
|
56
|
+
)
|
|
57
|
+
return cast(DatasetId, id_)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def insert_dataset_version(
|
|
61
|
+
session: AsyncSession,
|
|
62
|
+
dataset_id: DatasetId,
|
|
63
|
+
description: Optional[str] = None,
|
|
64
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
65
|
+
created_at: Optional[datetime] = None,
|
|
66
|
+
) -> DatasetVersionId:
|
|
67
|
+
id_ = await session.scalar(
|
|
68
|
+
insert(models.DatasetVersion)
|
|
69
|
+
.values(
|
|
70
|
+
dataset_id=dataset_id,
|
|
71
|
+
description=description,
|
|
72
|
+
metadata_=metadata,
|
|
73
|
+
created_at=created_at,
|
|
74
|
+
)
|
|
75
|
+
.returning(models.DatasetVersion.id)
|
|
76
|
+
)
|
|
77
|
+
return cast(DatasetVersionId, id_)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def insert_dataset_example(
|
|
81
|
+
session: AsyncSession,
|
|
82
|
+
dataset_id: DatasetId,
|
|
83
|
+
span_rowid: Optional[SpanRowId] = None,
|
|
84
|
+
created_at: Optional[datetime] = None,
|
|
85
|
+
) -> DatasetExampleId:
|
|
86
|
+
id_ = await session.scalar(
|
|
87
|
+
insert(models.DatasetExample)
|
|
88
|
+
.values(
|
|
89
|
+
dataset_id=dataset_id,
|
|
90
|
+
span_rowid=span_rowid,
|
|
91
|
+
created_at=created_at,
|
|
92
|
+
)
|
|
93
|
+
.returning(models.DatasetExample.id)
|
|
94
|
+
)
|
|
95
|
+
return cast(DatasetExampleId, id_)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class RevisionKind(Enum):
|
|
99
|
+
CREATE = "CREATE"
|
|
100
|
+
PATCH = "PATCH"
|
|
101
|
+
DELETE = "DELETE"
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def _missing_(cls, v: Any) -> "RevisionKind":
|
|
105
|
+
if isinstance(v, str) and v and v.isascii() and not v.isupper():
|
|
106
|
+
return cls(v.upper())
|
|
107
|
+
raise ValueError(f"Invalid revision kind: {v}")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def insert_dataset_example_revision(
|
|
111
|
+
session: AsyncSession,
|
|
112
|
+
dataset_version_id: DatasetVersionId,
|
|
113
|
+
dataset_example_id: DatasetExampleId,
|
|
114
|
+
input: Mapping[str, Any],
|
|
115
|
+
output: Mapping[str, Any],
|
|
116
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
117
|
+
revision_kind: RevisionKind = RevisionKind.CREATE,
|
|
118
|
+
created_at: Optional[datetime] = None,
|
|
119
|
+
) -> DatasetExampleRevisionId:
|
|
120
|
+
id_ = await session.scalar(
|
|
121
|
+
insert(models.DatasetExampleRevision)
|
|
122
|
+
.values(
|
|
123
|
+
dataset_version_id=dataset_version_id,
|
|
124
|
+
dataset_example_id=dataset_example_id,
|
|
125
|
+
input=input,
|
|
126
|
+
output=output,
|
|
127
|
+
metadata_=metadata,
|
|
128
|
+
revision_kind=revision_kind.value,
|
|
129
|
+
created_at=created_at,
|
|
130
|
+
)
|
|
131
|
+
.returning(models.DatasetExampleRevision.id)
|
|
132
|
+
)
|
|
133
|
+
return cast(DatasetExampleRevisionId, id_)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class DatasetAction(Enum):
|
|
137
|
+
CREATE = "create"
|
|
138
|
+
APPEND = "append"
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def _missing_(cls, v: Any) -> "DatasetAction":
|
|
142
|
+
if isinstance(v, str) and v and v.isascii() and not v.islower():
|
|
143
|
+
return cls(v.lower())
|
|
144
|
+
raise ValueError(f"Invalid dateset action: {v}")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def add_dataset_examples(
|
|
148
|
+
session: AsyncSession,
|
|
149
|
+
name: str,
|
|
150
|
+
examples: Union[Examples, Awaitable[Examples]],
|
|
151
|
+
description: Optional[str] = None,
|
|
152
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
153
|
+
action: DatasetAction = DatasetAction.CREATE,
|
|
154
|
+
) -> Optional[DatasetExampleAdditionEvent]:
|
|
155
|
+
created_at = datetime.now(timezone.utc)
|
|
156
|
+
dataset_id: Optional[DatasetId] = None
|
|
157
|
+
if action is DatasetAction.APPEND and name:
|
|
158
|
+
dataset_id = await session.scalar(
|
|
159
|
+
select(models.Dataset.id).where(models.Dataset.name == name)
|
|
160
|
+
)
|
|
161
|
+
if action is DatasetAction.CREATE or dataset_id is None:
|
|
162
|
+
try:
|
|
163
|
+
dataset_id = await insert_dataset(
|
|
164
|
+
session=session,
|
|
165
|
+
name=name,
|
|
166
|
+
description=description,
|
|
167
|
+
metadata=metadata,
|
|
168
|
+
created_at=created_at,
|
|
169
|
+
)
|
|
170
|
+
except Exception:
|
|
171
|
+
logger.exception(f"Failed to insert dataset: {name=}")
|
|
172
|
+
raise
|
|
173
|
+
try:
|
|
174
|
+
dataset_version_id = await insert_dataset_version(
|
|
175
|
+
session=session,
|
|
176
|
+
dataset_id=dataset_id,
|
|
177
|
+
created_at=created_at,
|
|
178
|
+
)
|
|
179
|
+
except Exception:
|
|
180
|
+
logger.exception(f"Failed to insert dataset version for {dataset_id=}")
|
|
181
|
+
raise
|
|
182
|
+
for example in (await examples) if isinstance(examples, Awaitable) else examples:
|
|
183
|
+
try:
|
|
184
|
+
dataset_example_id = await insert_dataset_example(
|
|
185
|
+
session=session,
|
|
186
|
+
dataset_id=dataset_id,
|
|
187
|
+
created_at=created_at,
|
|
188
|
+
)
|
|
189
|
+
except Exception:
|
|
190
|
+
logger.exception(f"Failed to insert dataset example for {dataset_id=}")
|
|
191
|
+
raise
|
|
192
|
+
try:
|
|
193
|
+
await insert_dataset_example_revision(
|
|
194
|
+
session=session,
|
|
195
|
+
dataset_version_id=dataset_version_id,
|
|
196
|
+
dataset_example_id=dataset_example_id,
|
|
197
|
+
input=example.input,
|
|
198
|
+
output=example.output,
|
|
199
|
+
metadata=example.metadata,
|
|
200
|
+
created_at=created_at,
|
|
201
|
+
)
|
|
202
|
+
except Exception:
|
|
203
|
+
logger.exception(
|
|
204
|
+
f"Failed to insert dataset example revision for {dataset_version_id=}, "
|
|
205
|
+
f"{dataset_example_id=}"
|
|
206
|
+
)
|
|
207
|
+
raise
|
|
208
|
+
return DatasetExampleAdditionEvent(dataset_id=dataset_id)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@dataclass(frozen=True)
|
|
212
|
+
class DatasetKeys:
|
|
213
|
+
input: frozenset[str]
|
|
214
|
+
output: frozenset[str]
|
|
215
|
+
metadata: frozenset[str]
|
|
216
|
+
|
|
217
|
+
def __iter__(self) -> Iterator[str]:
|
|
218
|
+
yield from sorted(set(chain(self.input, self.output, self.metadata)))
|
|
219
|
+
|
|
220
|
+
def check_differences(self, column_headers_set: frozenset[str]) -> None:
|
|
221
|
+
for category, keys in (
|
|
222
|
+
("input", self.input),
|
|
223
|
+
("output", self.output),
|
|
224
|
+
("metadata", self.metadata),
|
|
225
|
+
):
|
|
226
|
+
if diff := keys.difference(column_headers_set):
|
|
227
|
+
raise ValueError(f"{category} keys not found in table column headers: {diff}")
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, NamedTuple, Optional
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Row, Select, and_, select, tuple_
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
from phoenix.db import models
|
|
10
|
+
from phoenix.db.helpers import dedup, num_docs_col
|
|
11
|
+
from phoenix.db.insertion.helpers import as_kv
|
|
12
|
+
from phoenix.db.insertion.types import (
|
|
13
|
+
Insertables,
|
|
14
|
+
Postponed,
|
|
15
|
+
Precursors,
|
|
16
|
+
QueueInserter,
|
|
17
|
+
Received,
|
|
18
|
+
)
|
|
19
|
+
from phoenix.server.dml_event import DocumentAnnotationDmlEvent
|
|
20
|
+
|
|
21
|
+
_Name: TypeAlias = str
|
|
22
|
+
_SpanId: TypeAlias = str
|
|
23
|
+
_SpanRowId: TypeAlias = int
|
|
24
|
+
_DocumentPosition: TypeAlias = int
|
|
25
|
+
_AnnoRowId: TypeAlias = int
|
|
26
|
+
_NumDocs: TypeAlias = int
|
|
27
|
+
|
|
28
|
+
_Key: TypeAlias = tuple[_Name, _SpanId, _DocumentPosition]
|
|
29
|
+
_UniqueBy: TypeAlias = tuple[_Name, _SpanRowId, _DocumentPosition]
|
|
30
|
+
_Existing: TypeAlias = tuple[
|
|
31
|
+
_SpanRowId,
|
|
32
|
+
_SpanId,
|
|
33
|
+
_NumDocs,
|
|
34
|
+
Optional[_AnnoRowId],
|
|
35
|
+
Optional[_Name],
|
|
36
|
+
Optional[_DocumentPosition],
|
|
37
|
+
Optional[datetime],
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DocumentAnnotationQueueInserter(
|
|
42
|
+
QueueInserter[
|
|
43
|
+
Precursors.DocumentAnnotation,
|
|
44
|
+
Insertables.DocumentAnnotation,
|
|
45
|
+
models.DocumentAnnotation,
|
|
46
|
+
DocumentAnnotationDmlEvent,
|
|
47
|
+
],
|
|
48
|
+
table=models.DocumentAnnotation,
|
|
49
|
+
unique_by=("name", "span_rowid", "document_position"),
|
|
50
|
+
):
|
|
51
|
+
async def _events(
|
|
52
|
+
self,
|
|
53
|
+
session: AsyncSession,
|
|
54
|
+
*insertions: Insertables.DocumentAnnotation,
|
|
55
|
+
) -> list[DocumentAnnotationDmlEvent]:
|
|
56
|
+
records = [dict(as_kv(ins.row)) for ins in insertions]
|
|
57
|
+
stmt = self._insert_on_conflict(*records).returning(self.table.id)
|
|
58
|
+
ids = tuple([_ async for _ in await session.stream_scalars(stmt)])
|
|
59
|
+
return [DocumentAnnotationDmlEvent(ids)]
|
|
60
|
+
|
|
61
|
+
async def _partition(
|
|
62
|
+
self,
|
|
63
|
+
session: AsyncSession,
|
|
64
|
+
*parcels: Received[Precursors.DocumentAnnotation],
|
|
65
|
+
) -> tuple[
|
|
66
|
+
list[Received[Insertables.DocumentAnnotation]],
|
|
67
|
+
list[Postponed[Precursors.DocumentAnnotation]],
|
|
68
|
+
list[Received[Precursors.DocumentAnnotation]],
|
|
69
|
+
]:
|
|
70
|
+
to_insert: list[Received[Insertables.DocumentAnnotation]] = []
|
|
71
|
+
to_postpone: list[Postponed[Precursors.DocumentAnnotation]] = []
|
|
72
|
+
to_discard: list[Received[Precursors.DocumentAnnotation]] = []
|
|
73
|
+
|
|
74
|
+
stmt = self._select_existing(*map(_key, parcels))
|
|
75
|
+
existing: list[Row[_Existing]] = [_ async for _ in await session.stream(stmt)]
|
|
76
|
+
existing_spans: Mapping[str, _SpanAttr] = {
|
|
77
|
+
e.span_id: _SpanAttr(e.span_rowid, e.num_docs) for e in existing
|
|
78
|
+
}
|
|
79
|
+
existing_annos: Mapping[_Key, _AnnoAttr] = {
|
|
80
|
+
(e.name, e.span_id, e.document_position): _AnnoAttr(e.span_rowid, e.id, e.updated_at)
|
|
81
|
+
for e in existing
|
|
82
|
+
if e.id is not None
|
|
83
|
+
and e.name is not None
|
|
84
|
+
and e.document_position is not None
|
|
85
|
+
and e.updated_at is not None
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
for p in parcels:
|
|
89
|
+
if (anno := existing_annos.get(_key(p))) is not None:
|
|
90
|
+
if p.received_at <= anno.updated_at:
|
|
91
|
+
to_discard.append(p)
|
|
92
|
+
else:
|
|
93
|
+
to_insert.append(
|
|
94
|
+
Received(
|
|
95
|
+
received_at=p.received_at,
|
|
96
|
+
item=p.item.as_insertable(
|
|
97
|
+
span_rowid=anno.span_rowid,
|
|
98
|
+
id_=anno.id_,
|
|
99
|
+
),
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
elif (span := existing_spans.get(p.item.span_id)) is not None:
|
|
103
|
+
if 0 <= p.item.document_position < span.num_docs:
|
|
104
|
+
to_insert.append(
|
|
105
|
+
Received(
|
|
106
|
+
received_at=p.received_at,
|
|
107
|
+
item=p.item.as_insertable(
|
|
108
|
+
span_rowid=span.span_rowid,
|
|
109
|
+
),
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
to_discard.append(p)
|
|
114
|
+
elif isinstance(p, Postponed):
|
|
115
|
+
if p.retries_left > 1:
|
|
116
|
+
to_postpone.append(p.postpone(p.retries_left - 1))
|
|
117
|
+
else:
|
|
118
|
+
to_discard.append(p)
|
|
119
|
+
elif isinstance(p, Received):
|
|
120
|
+
to_postpone.append(p.postpone(self._retry_allowance))
|
|
121
|
+
else:
|
|
122
|
+
to_discard.append(p)
|
|
123
|
+
|
|
124
|
+
assert len(to_insert) + len(to_postpone) + len(to_discard) == len(parcels)
|
|
125
|
+
to_insert = dedup(sorted(to_insert, key=_time, reverse=True), _unique_by)[::-1]
|
|
126
|
+
return to_insert, to_postpone, to_discard
|
|
127
|
+
|
|
128
|
+
def _select_existing(self, *keys: _Key) -> Select[_Existing]:
|
|
129
|
+
anno = self.table
|
|
130
|
+
span = (
|
|
131
|
+
select(models.Span.id, models.Span.span_id, num_docs_col(self._db.dialect))
|
|
132
|
+
.where(models.Span.span_id.in_({span_id for _, span_id, *_ in keys}))
|
|
133
|
+
.cte()
|
|
134
|
+
)
|
|
135
|
+
onclause = and_(
|
|
136
|
+
span.c.id == anno.span_rowid,
|
|
137
|
+
anno.name.in_({name for name, *_ in keys}),
|
|
138
|
+
tuple_(anno.name, span.c.span_id, anno.document_position).in_(keys),
|
|
139
|
+
)
|
|
140
|
+
return select(
|
|
141
|
+
span.c.id.label("span_rowid"),
|
|
142
|
+
span.c.span_id,
|
|
143
|
+
span.c.num_docs,
|
|
144
|
+
anno.id,
|
|
145
|
+
anno.name,
|
|
146
|
+
anno.document_position,
|
|
147
|
+
anno.updated_at,
|
|
148
|
+
).outerjoin_from(span, anno, onclause)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class _SpanAttr(NamedTuple):
|
|
152
|
+
span_rowid: _SpanRowId
|
|
153
|
+
num_docs: _NumDocs
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class _AnnoAttr(NamedTuple):
|
|
157
|
+
span_rowid: _SpanRowId
|
|
158
|
+
id_: _AnnoRowId
|
|
159
|
+
updated_at: datetime
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _key(p: Received[Precursors.DocumentAnnotation]) -> _Key:
|
|
163
|
+
return p.item.obj.name, p.item.span_id, p.item.document_position
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _unique_by(p: Received[Insertables.DocumentAnnotation]) -> _UniqueBy:
|
|
167
|
+
return p.item.obj.name, p.item.span_rowid, p.item.document_position
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _time(p: Received[Any]) -> datetime:
|
|
171
|
+
return p.received_at
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from typing import NamedTuple, Optional
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import select
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
from typing_extensions import assert_never
|
|
6
|
+
|
|
7
|
+
from phoenix.db import models
|
|
8
|
+
from phoenix.db.helpers import SupportedSQLDialect, num_docs_col
|
|
9
|
+
from phoenix.db.insertion.helpers import insert_on_conflict
|
|
10
|
+
from phoenix.exceptions import PhoenixException
|
|
11
|
+
from phoenix.trace import v1 as pb
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InsertEvaluationError(PhoenixException):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EvaluationInsertionEvent(NamedTuple):
|
|
19
|
+
project_rowid: int
|
|
20
|
+
evaluation_name: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SpanEvaluationInsertionEvent(EvaluationInsertionEvent): ...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TraceEvaluationInsertionEvent(EvaluationInsertionEvent): ...
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DocumentEvaluationInsertionEvent(EvaluationInsertionEvent): ...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def insert_evaluation(
|
|
33
|
+
session: AsyncSession,
|
|
34
|
+
evaluation: pb.Evaluation,
|
|
35
|
+
) -> Optional[EvaluationInsertionEvent]:
|
|
36
|
+
evaluation_name = evaluation.name
|
|
37
|
+
result = evaluation.result
|
|
38
|
+
label = result.label.value if result.HasField("label") else None
|
|
39
|
+
score = result.score.value if result.HasField("score") else None
|
|
40
|
+
explanation = result.explanation.value if result.HasField("explanation") else None
|
|
41
|
+
if (evaluation_kind := evaluation.subject_id.WhichOneof("kind")) is None:
|
|
42
|
+
raise InsertEvaluationError("Cannot insert an evaluation that has no evaluation kind")
|
|
43
|
+
elif evaluation_kind == "trace_id":
|
|
44
|
+
trace_id = evaluation.subject_id.trace_id
|
|
45
|
+
return await _insert_trace_evaluation(
|
|
46
|
+
session, trace_id, evaluation_name, label, score, explanation
|
|
47
|
+
)
|
|
48
|
+
elif evaluation_kind == "span_id":
|
|
49
|
+
span_id = evaluation.subject_id.span_id
|
|
50
|
+
return await _insert_span_evaluation(
|
|
51
|
+
session, span_id, evaluation_name, label, score, explanation
|
|
52
|
+
)
|
|
53
|
+
elif evaluation_kind == "document_retrieval_id":
|
|
54
|
+
span_id = evaluation.subject_id.document_retrieval_id.span_id
|
|
55
|
+
document_position = evaluation.subject_id.document_retrieval_id.document_position
|
|
56
|
+
return await _insert_document_evaluation(
|
|
57
|
+
session, span_id, document_position, evaluation_name, label, score, explanation
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
assert_never(evaluation_kind)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async def _insert_trace_evaluation(
|
|
64
|
+
session: AsyncSession,
|
|
65
|
+
trace_id: str,
|
|
66
|
+
evaluation_name: str,
|
|
67
|
+
label: Optional[str],
|
|
68
|
+
score: Optional[float],
|
|
69
|
+
explanation: Optional[str],
|
|
70
|
+
) -> TraceEvaluationInsertionEvent:
|
|
71
|
+
stmt = select(
|
|
72
|
+
models.Trace.project_rowid,
|
|
73
|
+
models.Trace.id,
|
|
74
|
+
).where(models.Trace.trace_id == trace_id)
|
|
75
|
+
if not (row := (await session.execute(stmt)).first()):
|
|
76
|
+
raise InsertEvaluationError(
|
|
77
|
+
f"Cannot insert a trace evaluation for a missing trace: {evaluation_name=}, {trace_id=}"
|
|
78
|
+
)
|
|
79
|
+
project_rowid, trace_rowid = row
|
|
80
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
81
|
+
values = dict(
|
|
82
|
+
trace_rowid=trace_rowid,
|
|
83
|
+
name=evaluation_name,
|
|
84
|
+
label=label,
|
|
85
|
+
score=score,
|
|
86
|
+
explanation=explanation,
|
|
87
|
+
metadata_={}, # `metadata_` must match ORM
|
|
88
|
+
annotator_kind="LLM",
|
|
89
|
+
)
|
|
90
|
+
await session.execute(
|
|
91
|
+
insert_on_conflict(
|
|
92
|
+
values,
|
|
93
|
+
dialect=dialect,
|
|
94
|
+
table=models.TraceAnnotation,
|
|
95
|
+
unique_by=("name", "trace_rowid"),
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
return TraceEvaluationInsertionEvent(project_rowid, evaluation_name)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
async def _insert_span_evaluation(
|
|
102
|
+
session: AsyncSession,
|
|
103
|
+
span_id: str,
|
|
104
|
+
evaluation_name: str,
|
|
105
|
+
label: Optional[str],
|
|
106
|
+
score: Optional[float],
|
|
107
|
+
explanation: Optional[str],
|
|
108
|
+
) -> SpanEvaluationInsertionEvent:
|
|
109
|
+
stmt = (
|
|
110
|
+
select(
|
|
111
|
+
models.Trace.project_rowid,
|
|
112
|
+
models.Span.id,
|
|
113
|
+
)
|
|
114
|
+
.join_from(models.Span, models.Trace)
|
|
115
|
+
.where(models.Span.span_id == span_id)
|
|
116
|
+
)
|
|
117
|
+
if not (row := (await session.execute(stmt)).first()):
|
|
118
|
+
raise InsertEvaluationError(
|
|
119
|
+
f"Cannot insert a span evaluation for a missing span: {evaluation_name=}, {span_id=}"
|
|
120
|
+
)
|
|
121
|
+
project_rowid, span_rowid = row
|
|
122
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
123
|
+
values = dict(
|
|
124
|
+
span_rowid=span_rowid,
|
|
125
|
+
name=evaluation_name,
|
|
126
|
+
label=label,
|
|
127
|
+
score=score,
|
|
128
|
+
explanation=explanation,
|
|
129
|
+
metadata_={}, # `metadata_` must match ORM
|
|
130
|
+
annotator_kind="LLM",
|
|
131
|
+
)
|
|
132
|
+
await session.execute(
|
|
133
|
+
insert_on_conflict(
|
|
134
|
+
values,
|
|
135
|
+
dialect=dialect,
|
|
136
|
+
table=models.SpanAnnotation,
|
|
137
|
+
unique_by=("name", "span_rowid"),
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
return SpanEvaluationInsertionEvent(project_rowid, evaluation_name)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def _insert_document_evaluation(
|
|
144
|
+
session: AsyncSession,
|
|
145
|
+
span_id: str,
|
|
146
|
+
document_position: int,
|
|
147
|
+
evaluation_name: str,
|
|
148
|
+
label: Optional[str],
|
|
149
|
+
score: Optional[float],
|
|
150
|
+
explanation: Optional[str],
|
|
151
|
+
) -> EvaluationInsertionEvent:
|
|
152
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
153
|
+
stmt = (
|
|
154
|
+
select(
|
|
155
|
+
models.Trace.project_rowid,
|
|
156
|
+
models.Span.id,
|
|
157
|
+
num_docs_col(dialect),
|
|
158
|
+
)
|
|
159
|
+
.join_from(models.Span, models.Trace)
|
|
160
|
+
.where(models.Span.span_id == span_id)
|
|
161
|
+
)
|
|
162
|
+
if not (row := (await session.execute(stmt)).first()):
|
|
163
|
+
raise InsertEvaluationError(
|
|
164
|
+
f"Cannot insert a document evaluation for a missing span: {span_id=}"
|
|
165
|
+
)
|
|
166
|
+
project_rowid, span_rowid, num_docs = row
|
|
167
|
+
if num_docs is None or num_docs <= document_position:
|
|
168
|
+
raise InsertEvaluationError(
|
|
169
|
+
f"Cannot insert a document evaluation for a non-existent "
|
|
170
|
+
f"document position: {evaluation_name=}, {span_id=}, {document_position=}"
|
|
171
|
+
)
|
|
172
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
173
|
+
values = dict(
|
|
174
|
+
span_rowid=span_rowid,
|
|
175
|
+
document_position=document_position,
|
|
176
|
+
name=evaluation_name,
|
|
177
|
+
label=label,
|
|
178
|
+
score=score,
|
|
179
|
+
explanation=explanation,
|
|
180
|
+
metadata_={}, # `metadata_` must match ORM
|
|
181
|
+
annotator_kind="LLM",
|
|
182
|
+
)
|
|
183
|
+
await session.execute(
|
|
184
|
+
insert_on_conflict(
|
|
185
|
+
values,
|
|
186
|
+
dialect=dialect,
|
|
187
|
+
table=models.DocumentAnnotation,
|
|
188
|
+
unique_by=("name", "span_rowid", "document_position"),
|
|
189
|
+
)
|
|
190
|
+
)
|
|
191
|
+
return DocumentEvaluationInsertionEvent(project_rowid, evaluation_name)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from collections.abc import Awaitable, Callable, Iterable, Iterator, Mapping, Sequence
|
|
3
|
+
from enum import Enum, auto
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import Insert
|
|
7
|
+
from sqlalchemy.dialects.postgresql import insert as insert_postgresql
|
|
8
|
+
from sqlalchemy.dialects.sqlite import insert as insert_sqlite
|
|
9
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
+
from sqlalchemy.sql.elements import KeyedColumnElement
|
|
11
|
+
from typing_extensions import TypeAlias, assert_never
|
|
12
|
+
|
|
13
|
+
from phoenix.db import models
|
|
14
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
15
|
+
from phoenix.db.models import Base
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DataManipulationEvent(ABC):
|
|
19
|
+
"""
|
|
20
|
+
Execution of DML (Data Manipulation Language) statements.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
DataManipulation: TypeAlias = Callable[[AsyncSession], Awaitable[Optional[DataManipulationEvent]]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OnConflict(Enum):
|
|
28
|
+
DO_NOTHING = auto()
|
|
29
|
+
DO_UPDATE = auto()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def insert_on_conflict(
|
|
33
|
+
*records: Mapping[str, Any],
|
|
34
|
+
table: type[Base],
|
|
35
|
+
dialect: SupportedSQLDialect,
|
|
36
|
+
unique_by: Sequence[str],
|
|
37
|
+
on_conflict: OnConflict = OnConflict.DO_UPDATE,
|
|
38
|
+
set_: Optional[Mapping[str, Any]] = None,
|
|
39
|
+
) -> Insert:
|
|
40
|
+
"""
|
|
41
|
+
Dialect specific insertion statement using ON CONFLICT DO syntax.
|
|
42
|
+
"""
|
|
43
|
+
if on_conflict is OnConflict.DO_UPDATE:
|
|
44
|
+
# postegresql rejects duplicate updates for the same record
|
|
45
|
+
seen = set()
|
|
46
|
+
unique_records = []
|
|
47
|
+
for v in reversed(records):
|
|
48
|
+
if (k := tuple(v.get(name) for name in unique_by)) in seen:
|
|
49
|
+
continue
|
|
50
|
+
unique_records.append(v)
|
|
51
|
+
seen.add(k)
|
|
52
|
+
records = tuple(reversed(unique_records))
|
|
53
|
+
constraint = "_".join(("uq", table.__tablename__, *unique_by))
|
|
54
|
+
if dialect is SupportedSQLDialect.POSTGRESQL:
|
|
55
|
+
stmt_postgresql = insert_postgresql(table).values(records)
|
|
56
|
+
if on_conflict is OnConflict.DO_NOTHING:
|
|
57
|
+
return stmt_postgresql.on_conflict_do_nothing(constraint=constraint)
|
|
58
|
+
if on_conflict is OnConflict.DO_UPDATE:
|
|
59
|
+
return stmt_postgresql.on_conflict_do_update(
|
|
60
|
+
constraint=constraint,
|
|
61
|
+
set_=set_ if set_ else dict(_clean(stmt_postgresql.excluded.items())),
|
|
62
|
+
)
|
|
63
|
+
assert_never(on_conflict)
|
|
64
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
65
|
+
stmt_sqlite = insert_sqlite(table).values(records)
|
|
66
|
+
if on_conflict is OnConflict.DO_NOTHING:
|
|
67
|
+
return stmt_sqlite.on_conflict_do_nothing(unique_by)
|
|
68
|
+
if on_conflict is OnConflict.DO_UPDATE:
|
|
69
|
+
return stmt_sqlite.on_conflict_do_update(
|
|
70
|
+
unique_by,
|
|
71
|
+
set_=set_ if set_ else dict(_clean(stmt_sqlite.excluded.items())),
|
|
72
|
+
)
|
|
73
|
+
assert_never(on_conflict)
|
|
74
|
+
assert_never(dialect)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _clean(
|
|
78
|
+
kv: Iterable[tuple[str, KeyedColumnElement[Any]]],
|
|
79
|
+
) -> Iterator[tuple[str, KeyedColumnElement[Any]]]:
|
|
80
|
+
for k, v in kv:
|
|
81
|
+
if v.primary_key or v.foreign_keys or k == "created_at":
|
|
82
|
+
continue
|
|
83
|
+
if k == "metadata_":
|
|
84
|
+
yield "metadata", v
|
|
85
|
+
else:
|
|
86
|
+
yield k, v
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def as_kv(obj: models.Base) -> Iterator[tuple[str, Any]]:
|
|
90
|
+
for k, c in obj.__table__.c.items():
|
|
91
|
+
if k in ["created_at", "updated_at"]:
|
|
92
|
+
continue
|
|
93
|
+
k = "metadata_" if k == "metadata" else k
|
|
94
|
+
v = getattr(obj, k, None)
|
|
95
|
+
if c.primary_key and v is None:
|
|
96
|
+
# postgresql disallows None for primary key
|
|
97
|
+
continue
|
|
98
|
+
yield k, v
|