arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
- arize_phoenix-12.28.1.dist-info/RECORD +499 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
- phoenix/__generated__/__init__.py +0 -0
- phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
- phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
- phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
- phoenix/__init__.py +5 -4
- phoenix/auth.py +39 -2
- phoenix/config.py +1763 -91
- phoenix/datetime_utils.py +120 -2
- phoenix/db/README.md +595 -25
- phoenix/db/bulk_inserter.py +145 -103
- phoenix/db/engines.py +140 -33
- phoenix/db/enums.py +3 -12
- phoenix/db/facilitator.py +302 -35
- phoenix/db/helpers.py +1000 -65
- phoenix/db/iam_auth.py +64 -0
- phoenix/db/insertion/dataset.py +135 -2
- phoenix/db/insertion/document_annotation.py +9 -6
- phoenix/db/insertion/evaluation.py +2 -3
- phoenix/db/insertion/helpers.py +17 -2
- phoenix/db/insertion/session_annotation.py +176 -0
- phoenix/db/insertion/span.py +15 -11
- phoenix/db/insertion/span_annotation.py +3 -4
- phoenix/db/insertion/trace_annotation.py +3 -4
- phoenix/db/insertion/types.py +50 -20
- phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
- phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
- phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
- phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
- phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
- phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
- phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
- phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
- phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
- phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
- phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
- phoenix/db/models.py +669 -56
- phoenix/db/pg_config.py +10 -0
- phoenix/db/types/model_provider.py +4 -0
- phoenix/db/types/token_price_customization.py +29 -0
- phoenix/db/types/trace_retention.py +23 -15
- phoenix/experiments/evaluators/utils.py +3 -3
- phoenix/experiments/functions.py +160 -52
- phoenix/experiments/tracing.py +2 -2
- phoenix/experiments/types.py +1 -1
- phoenix/inferences/inferences.py +1 -2
- phoenix/server/api/auth.py +38 -7
- phoenix/server/api/auth_messages.py +46 -0
- phoenix/server/api/context.py +100 -4
- phoenix/server/api/dataloaders/__init__.py +79 -5
- phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
- phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
- phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
- phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
- phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
- phoenix/server/api/dataloaders/dataset_labels.py +36 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
- phoenix/server/api/dataloaders/document_evaluations.py +6 -9
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
- phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
- phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
- phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
- phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
- phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
- phoenix/server/api/dataloaders/record_counts.py +37 -10
- phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
- phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
- phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
- phoenix/server/api/dataloaders/span_costs.py +29 -0
- phoenix/server/api/dataloaders/table_fields.py +2 -2
- phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
- phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
- phoenix/server/api/dataloaders/types.py +29 -0
- phoenix/server/api/exceptions.py +11 -1
- phoenix/server/api/helpers/dataset_helpers.py +5 -1
- phoenix/server/api/helpers/playground_clients.py +1243 -292
- phoenix/server/api/helpers/playground_registry.py +2 -2
- phoenix/server/api/helpers/playground_spans.py +8 -4
- phoenix/server/api/helpers/playground_users.py +26 -0
- phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
- phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
- phoenix/server/api/helpers/prompts/models.py +205 -22
- phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
- phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
- phoenix/server/api/input_types/CreateProjectInput.py +27 -0
- phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
- phoenix/server/api/input_types/DatasetFilter.py +17 -0
- phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
- phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
- phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
- phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
- phoenix/server/api/input_types/PromptFilter.py +14 -0
- phoenix/server/api/input_types/PromptVersionInput.py +52 -1
- phoenix/server/api/input_types/SpanSort.py +44 -7
- phoenix/server/api/input_types/TimeBinConfig.py +23 -0
- phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
- phoenix/server/api/input_types/UserRoleInput.py +1 -0
- phoenix/server/api/mutations/__init__.py +10 -0
- phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
- phoenix/server/api/mutations/api_key_mutations.py +19 -23
- phoenix/server/api/mutations/chat_mutations.py +154 -47
- phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
- phoenix/server/api/mutations/dataset_mutations.py +21 -16
- phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
- phoenix/server/api/mutations/experiment_mutations.py +2 -2
- phoenix/server/api/mutations/export_events_mutations.py +3 -3
- phoenix/server/api/mutations/model_mutations.py +210 -0
- phoenix/server/api/mutations/project_mutations.py +49 -10
- phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
- phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
- phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
- phoenix/server/api/mutations/prompt_mutations.py +65 -129
- phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
- phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
- phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
- phoenix/server/api/mutations/trace_mutations.py +47 -3
- phoenix/server/api/mutations/user_mutations.py +66 -41
- phoenix/server/api/queries.py +768 -293
- phoenix/server/api/routers/__init__.py +2 -2
- phoenix/server/api/routers/auth.py +154 -88
- phoenix/server/api/routers/ldap.py +229 -0
- phoenix/server/api/routers/oauth2.py +369 -106
- phoenix/server/api/routers/v1/__init__.py +24 -4
- phoenix/server/api/routers/v1/annotation_configs.py +23 -31
- phoenix/server/api/routers/v1/annotations.py +481 -17
- phoenix/server/api/routers/v1/datasets.py +395 -81
- phoenix/server/api/routers/v1/documents.py +142 -0
- phoenix/server/api/routers/v1/evaluations.py +24 -31
- phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
- phoenix/server/api/routers/v1/experiment_runs.py +337 -59
- phoenix/server/api/routers/v1/experiments.py +479 -48
- phoenix/server/api/routers/v1/models.py +7 -0
- phoenix/server/api/routers/v1/projects.py +18 -49
- phoenix/server/api/routers/v1/prompts.py +54 -40
- phoenix/server/api/routers/v1/sessions.py +108 -0
- phoenix/server/api/routers/v1/spans.py +1091 -81
- phoenix/server/api/routers/v1/traces.py +132 -78
- phoenix/server/api/routers/v1/users.py +389 -0
- phoenix/server/api/routers/v1/utils.py +3 -7
- phoenix/server/api/subscriptions.py +305 -88
- phoenix/server/api/types/Annotation.py +90 -23
- phoenix/server/api/types/ApiKey.py +13 -17
- phoenix/server/api/types/AuthMethod.py +1 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
- phoenix/server/api/types/CostBreakdown.py +12 -0
- phoenix/server/api/types/Dataset.py +226 -72
- phoenix/server/api/types/DatasetExample.py +88 -18
- phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
- phoenix/server/api/types/DatasetLabel.py +57 -0
- phoenix/server/api/types/DatasetSplit.py +98 -0
- phoenix/server/api/types/DatasetVersion.py +49 -4
- phoenix/server/api/types/DocumentAnnotation.py +212 -0
- phoenix/server/api/types/Experiment.py +264 -59
- phoenix/server/api/types/ExperimentComparison.py +5 -10
- phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
- phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
- phoenix/server/api/types/ExperimentRun.py +169 -65
- phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
- phoenix/server/api/types/GenerativeModel.py +245 -3
- phoenix/server/api/types/GenerativeProvider.py +70 -11
- phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
- phoenix/server/api/types/ModelInterface.py +16 -0
- phoenix/server/api/types/PlaygroundModel.py +20 -0
- phoenix/server/api/types/Project.py +1278 -216
- phoenix/server/api/types/ProjectSession.py +188 -28
- phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
- phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
- phoenix/server/api/types/Prompt.py +119 -39
- phoenix/server/api/types/PromptLabel.py +42 -25
- phoenix/server/api/types/PromptVersion.py +11 -8
- phoenix/server/api/types/PromptVersionTag.py +65 -25
- phoenix/server/api/types/ServerStatus.py +6 -0
- phoenix/server/api/types/Span.py +167 -123
- phoenix/server/api/types/SpanAnnotation.py +189 -42
- phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
- phoenix/server/api/types/SpanCostSummary.py +10 -0
- phoenix/server/api/types/SystemApiKey.py +65 -1
- phoenix/server/api/types/TokenPrice.py +16 -0
- phoenix/server/api/types/TokenUsage.py +3 -3
- phoenix/server/api/types/Trace.py +223 -51
- phoenix/server/api/types/TraceAnnotation.py +149 -50
- phoenix/server/api/types/User.py +137 -32
- phoenix/server/api/types/UserApiKey.py +73 -26
- phoenix/server/api/types/node.py +10 -0
- phoenix/server/api/types/pagination.py +11 -2
- phoenix/server/app.py +290 -45
- phoenix/server/authorization.py +38 -3
- phoenix/server/bearer_auth.py +34 -24
- phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
- phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
- phoenix/server/cost_tracking/helpers.py +68 -0
- phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
- phoenix/server/cost_tracking/regex_specificity.py +397 -0
- phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
- phoenix/server/daemons/__init__.py +0 -0
- phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
- phoenix/server/daemons/generative_model_store.py +103 -0
- phoenix/server/daemons/span_cost_calculator.py +99 -0
- phoenix/server/dml_event.py +17 -0
- phoenix/server/dml_event_handler.py +5 -0
- phoenix/server/email/sender.py +56 -3
- phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/experiments/__init__.py +0 -0
- phoenix/server/experiments/utils.py +14 -0
- phoenix/server/grpc_server.py +11 -11
- phoenix/server/jwt_store.py +17 -15
- phoenix/server/ldap.py +1449 -0
- phoenix/server/main.py +26 -10
- phoenix/server/oauth2.py +330 -12
- phoenix/server/prometheus.py +66 -6
- phoenix/server/rate_limiters.py +4 -9
- phoenix/server/retention.py +33 -20
- phoenix/server/session_filters.py +49 -0
- phoenix/server/static/.vite/manifest.json +55 -51
- phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
- phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
- phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
- phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
- phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
- phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
- phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
- phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
- phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
- phoenix/server/templates/index.html +40 -6
- phoenix/server/thread_server.py +1 -2
- phoenix/server/types.py +14 -4
- phoenix/server/utils.py +74 -0
- phoenix/session/client.py +56 -3
- phoenix/session/data_extractor.py +5 -0
- phoenix/session/evaluation.py +14 -5
- phoenix/session/session.py +45 -9
- phoenix/settings.py +5 -0
- phoenix/trace/attributes.py +80 -13
- phoenix/trace/dsl/helpers.py +90 -1
- phoenix/trace/dsl/query.py +8 -6
- phoenix/trace/projects.py +5 -0
- phoenix/utilities/template_formatters.py +1 -1
- phoenix/version.py +1 -1
- arize_phoenix-10.0.4.dist-info/RECORD +0 -405
- phoenix/server/api/types/Evaluation.py +0 -39
- phoenix/server/cost_tracking/cost_lookup.py +0 -255
- phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
- phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
- phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
- phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
- phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
- phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
- phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
- phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
- phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
- phoenix/utilities/deprecation.py +0 -31
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
phoenix/db/iam_auth.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_aws_rds_token(
|
|
9
|
+
host: str,
|
|
10
|
+
port: int,
|
|
11
|
+
user: str,
|
|
12
|
+
) -> str:
|
|
13
|
+
"""Generate an AWS RDS IAM authentication token.
|
|
14
|
+
|
|
15
|
+
This function creates a short-lived (15 minutes) authentication token for connecting
|
|
16
|
+
to AWS RDS/Aurora PostgreSQL instances using IAM database authentication.
|
|
17
|
+
|
|
18
|
+
The AWS region is automatically resolved using boto3.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
host: The database hostname (e.g., 'mydb.abc123.us-west-2.rds.amazonaws.com')
|
|
22
|
+
port: The database port (typically 5432 for PostgreSQL)
|
|
23
|
+
user: The database username (must match an IAM-enabled database user)
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
A temporary authentication token string to use as the database password
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
ImportError: If boto3 is not installed
|
|
30
|
+
Exception: If AWS credentials/region are not configured or token generation fails
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
>>> token = generate_aws_rds_token(
|
|
34
|
+
... host='mydb.us-west-2.rds.amazonaws.com',
|
|
35
|
+
... port=5432,
|
|
36
|
+
... user='myuser'
|
|
37
|
+
... )
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
import boto3 # type: ignore
|
|
41
|
+
except ImportError as e:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"boto3 is required for AWS RDS IAM authentication. "
|
|
44
|
+
"Install it with: pip install 'arize-phoenix[aws]'"
|
|
45
|
+
) from e
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
client = boto3.client("rds")
|
|
49
|
+
|
|
50
|
+
logger.debug(f"Generating AWS RDS IAM auth token for user '{user}' at {host}:{port}")
|
|
51
|
+
token = client.generate_db_auth_token( # pyright: ignore
|
|
52
|
+
DBHostname=host,
|
|
53
|
+
Port=port,
|
|
54
|
+
DBUsername=user,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return str(token) # pyright: ignore
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.error(
|
|
61
|
+
f"Failed to generate AWS RDS IAM authentication token: {e}. "
|
|
62
|
+
"Ensure AWS credentials are configured and have 'rds-db:connect' permission."
|
|
63
|
+
)
|
|
64
|
+
raise
|
phoenix/db/insertion/dataset.py
CHANGED
|
@@ -11,7 +11,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
11
11
|
from typing_extensions import TypeAlias
|
|
12
12
|
|
|
13
13
|
from phoenix.db import models
|
|
14
|
-
from phoenix.db.
|
|
14
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
15
|
+
from phoenix.db.insertion.helpers import DataManipulationEvent, OnConflict, insert_on_conflict
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -27,6 +28,7 @@ class ExampleContent:
|
|
|
27
28
|
input: dict[str, Any] = field(default_factory=dict)
|
|
28
29
|
output: dict[str, Any] = field(default_factory=dict)
|
|
29
30
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
31
|
+
splits: frozenset[str] = field(default_factory=frozenset) # Set of split names
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
Examples: TypeAlias = Iterable[ExampleContent]
|
|
@@ -35,6 +37,7 @@ Examples: TypeAlias = Iterable[ExampleContent]
|
|
|
35
37
|
@dataclass(frozen=True)
|
|
36
38
|
class DatasetExampleAdditionEvent(DataManipulationEvent):
|
|
37
39
|
dataset_id: DatasetId
|
|
40
|
+
dataset_version_id: DatasetVersionId
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
async def insert_dataset(
|
|
@@ -43,6 +46,7 @@ async def insert_dataset(
|
|
|
43
46
|
description: Optional[str] = None,
|
|
44
47
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
45
48
|
created_at: Optional[datetime] = None,
|
|
49
|
+
user_id: Optional[int] = None,
|
|
46
50
|
) -> DatasetId:
|
|
47
51
|
id_ = await session.scalar(
|
|
48
52
|
insert(models.Dataset)
|
|
@@ -51,6 +55,7 @@ async def insert_dataset(
|
|
|
51
55
|
description=description,
|
|
52
56
|
metadata_=metadata,
|
|
53
57
|
created_at=created_at,
|
|
58
|
+
user_id=user_id,
|
|
54
59
|
)
|
|
55
60
|
.returning(models.Dataset.id)
|
|
56
61
|
)
|
|
@@ -63,6 +68,7 @@ async def insert_dataset_version(
|
|
|
63
68
|
description: Optional[str] = None,
|
|
64
69
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
65
70
|
created_at: Optional[datetime] = None,
|
|
71
|
+
user_id: Optional[int] = None,
|
|
66
72
|
) -> DatasetVersionId:
|
|
67
73
|
id_ = await session.scalar(
|
|
68
74
|
insert(models.DatasetVersion)
|
|
@@ -71,6 +77,7 @@ async def insert_dataset_version(
|
|
|
71
77
|
description=description,
|
|
72
78
|
metadata_=metadata,
|
|
73
79
|
created_at=created_at,
|
|
80
|
+
user_id=user_id,
|
|
74
81
|
)
|
|
75
82
|
.returning(models.DatasetVersion.id)
|
|
76
83
|
)
|
|
@@ -133,6 +140,92 @@ async def insert_dataset_example_revision(
|
|
|
133
140
|
return cast(DatasetExampleRevisionId, id_)
|
|
134
141
|
|
|
135
142
|
|
|
143
|
+
async def bulk_create_dataset_splits(
|
|
144
|
+
session: AsyncSession,
|
|
145
|
+
split_names: set[str],
|
|
146
|
+
user_id: Optional[int] = None,
|
|
147
|
+
) -> dict[str, int]:
|
|
148
|
+
"""
|
|
149
|
+
Bulk create dataset splits using upsert pattern.
|
|
150
|
+
Returns a mapping of split name to split ID.
|
|
151
|
+
"""
|
|
152
|
+
if not split_names:
|
|
153
|
+
return {}
|
|
154
|
+
|
|
155
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
156
|
+
records = [
|
|
157
|
+
{
|
|
158
|
+
"name": name,
|
|
159
|
+
"color": "#808080", # Default gray color
|
|
160
|
+
"metadata_": {},
|
|
161
|
+
"user_id": user_id,
|
|
162
|
+
}
|
|
163
|
+
for name in split_names
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
# Bulk upsert all splits - uses ON CONFLICT DO NOTHING to handle race conditions
|
|
167
|
+
stmt = insert_on_conflict(
|
|
168
|
+
*records,
|
|
169
|
+
table=models.DatasetSplit,
|
|
170
|
+
dialect=dialect,
|
|
171
|
+
unique_by=["name"],
|
|
172
|
+
on_conflict=OnConflict.DO_NOTHING,
|
|
173
|
+
)
|
|
174
|
+
await session.execute(stmt)
|
|
175
|
+
|
|
176
|
+
# Fetch all split IDs by name
|
|
177
|
+
result = await session.execute(
|
|
178
|
+
select(models.DatasetSplit.name, models.DatasetSplit.id).where(
|
|
179
|
+
models.DatasetSplit.name.in_(split_names)
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
return {name: split_id for name, split_id in result.all()}
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
async def bulk_assign_examples_to_splits(
|
|
186
|
+
session: AsyncSession,
|
|
187
|
+
assignments: list[tuple[DatasetExampleId, int]],
|
|
188
|
+
) -> None:
|
|
189
|
+
"""
|
|
190
|
+
Bulk assign examples to splits.
|
|
191
|
+
assignments is a list of (dataset_example_id, dataset_split_id) tuples.
|
|
192
|
+
"""
|
|
193
|
+
if not assignments:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
|
197
|
+
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
|
198
|
+
from typing_extensions import assert_never
|
|
199
|
+
|
|
200
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
201
|
+
records = [
|
|
202
|
+
{
|
|
203
|
+
"dataset_example_id": example_id,
|
|
204
|
+
"dataset_split_id": split_id,
|
|
205
|
+
}
|
|
206
|
+
for example_id, split_id in assignments
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
# Use index_elements instead of constraint name because the table uses
|
|
210
|
+
# a PrimaryKeyConstraint, not a unique constraint
|
|
211
|
+
if dialect is SupportedSQLDialect.POSTGRESQL:
|
|
212
|
+
pg_stmt = pg_insert(models.DatasetSplitDatasetExample).values(records)
|
|
213
|
+
await session.execute(
|
|
214
|
+
pg_stmt.on_conflict_do_nothing(
|
|
215
|
+
index_elements=["dataset_split_id", "dataset_example_id"]
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
elif dialect is SupportedSQLDialect.SQLITE:
|
|
219
|
+
sqlite_stmt = sqlite_insert(models.DatasetSplitDatasetExample).values(records)
|
|
220
|
+
await session.execute(
|
|
221
|
+
sqlite_stmt.on_conflict_do_nothing(
|
|
222
|
+
index_elements=["dataset_split_id", "dataset_example_id"]
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
else:
|
|
226
|
+
assert_never(dialect)
|
|
227
|
+
|
|
228
|
+
|
|
136
229
|
class DatasetAction(Enum):
|
|
137
230
|
CREATE = "create"
|
|
138
231
|
APPEND = "append"
|
|
@@ -151,6 +244,7 @@ async def add_dataset_examples(
|
|
|
151
244
|
description: Optional[str] = None,
|
|
152
245
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
153
246
|
action: DatasetAction = DatasetAction.CREATE,
|
|
247
|
+
user_id: Optional[int] = None,
|
|
154
248
|
) -> Optional[DatasetExampleAdditionEvent]:
|
|
155
249
|
created_at = datetime.now(timezone.utc)
|
|
156
250
|
dataset_id: Optional[DatasetId] = None
|
|
@@ -166,6 +260,7 @@ async def add_dataset_examples(
|
|
|
166
260
|
description=description,
|
|
167
261
|
metadata=metadata,
|
|
168
262
|
created_at=created_at,
|
|
263
|
+
user_id=user_id,
|
|
169
264
|
)
|
|
170
265
|
except Exception:
|
|
171
266
|
logger.exception(f"Failed to insert dataset: {name=}")
|
|
@@ -175,10 +270,14 @@ async def add_dataset_examples(
|
|
|
175
270
|
session=session,
|
|
176
271
|
dataset_id=dataset_id,
|
|
177
272
|
created_at=created_at,
|
|
273
|
+
user_id=user_id,
|
|
178
274
|
)
|
|
179
275
|
except Exception:
|
|
180
276
|
logger.exception(f"Failed to insert dataset version for {dataset_id=}")
|
|
181
277
|
raise
|
|
278
|
+
|
|
279
|
+
# Process examples and collect split assignments (by name, resolved to IDs after iteration)
|
|
280
|
+
split_assignments: list[tuple[DatasetExampleId, str]] = []
|
|
182
281
|
for example in (await examples) if isinstance(examples, Awaitable) else examples:
|
|
183
282
|
try:
|
|
184
283
|
dataset_example_id = await insert_dataset_example(
|
|
@@ -205,7 +304,41 @@ async def add_dataset_examples(
|
|
|
205
304
|
f"{dataset_example_id=}"
|
|
206
305
|
)
|
|
207
306
|
raise
|
|
208
|
-
|
|
307
|
+
|
|
308
|
+
# Collect split assignments by name for bulk insert later
|
|
309
|
+
for split_name in example.splits:
|
|
310
|
+
split_assignments.append((dataset_example_id, split_name))
|
|
311
|
+
|
|
312
|
+
# Bulk create splits and assign examples after iteration
|
|
313
|
+
if split_assignments:
|
|
314
|
+
# Collect all unique split names
|
|
315
|
+
all_split_names = {name for _, name in split_assignments}
|
|
316
|
+
try:
|
|
317
|
+
split_name_to_id = await bulk_create_dataset_splits(
|
|
318
|
+
session=session,
|
|
319
|
+
split_names=all_split_names,
|
|
320
|
+
user_id=user_id,
|
|
321
|
+
)
|
|
322
|
+
except Exception:
|
|
323
|
+
logger.exception(f"Failed to bulk create dataset splits: {all_split_names}")
|
|
324
|
+
raise
|
|
325
|
+
|
|
326
|
+
# Convert name-based assignments to ID-based assignments
|
|
327
|
+
id_assignments = [
|
|
328
|
+
(example_id, split_name_to_id[split_name])
|
|
329
|
+
for example_id, split_name in split_assignments
|
|
330
|
+
]
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
await bulk_assign_examples_to_splits(
|
|
334
|
+
session=session,
|
|
335
|
+
assignments=id_assignments,
|
|
336
|
+
)
|
|
337
|
+
except Exception:
|
|
338
|
+
logger.exception("Failed to bulk assign examples to splits")
|
|
339
|
+
raise
|
|
340
|
+
|
|
341
|
+
return DatasetExampleAdditionEvent(dataset_id=dataset_id, dataset_version_id=dataset_version_id)
|
|
209
342
|
|
|
210
343
|
|
|
211
344
|
@dataclass(frozen=True)
|
|
@@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
7
7
|
from typing_extensions import TypeAlias
|
|
8
8
|
|
|
9
9
|
from phoenix.db import models
|
|
10
|
-
from phoenix.db.helpers import dedup
|
|
10
|
+
from phoenix.db.helpers import dedup
|
|
11
11
|
from phoenix.db.insertion.helpers import as_kv
|
|
12
12
|
from phoenix.db.insertion.types import (
|
|
13
13
|
Insertables,
|
|
@@ -63,7 +63,7 @@ class DocumentAnnotationQueueInserter(
|
|
|
63
63
|
session: AsyncSession,
|
|
64
64
|
*insertions: Insertables.DocumentAnnotation,
|
|
65
65
|
) -> list[DocumentAnnotationDmlEvent]:
|
|
66
|
-
records = [dict(as_kv(ins.row)) for ins in insertions]
|
|
66
|
+
records = [{**dict(as_kv(ins.row)), "updated_at": ins.row.updated_at} for ins in insertions]
|
|
67
67
|
stmt = self._insert_on_conflict(*records).returning(self.table.id)
|
|
68
68
|
ids = tuple([_ async for _ in await session.stream_scalars(stmt)])
|
|
69
69
|
return [DocumentAnnotationDmlEvent(ids)]
|
|
@@ -99,7 +99,7 @@ class DocumentAnnotationQueueInserter(
|
|
|
99
99
|
|
|
100
100
|
for p in parcels:
|
|
101
101
|
if (anno := existing_annos.get(_key(p))) is not None:
|
|
102
|
-
if p.
|
|
102
|
+
if p.item.updated_at <= anno.updated_at:
|
|
103
103
|
to_discard.append(p)
|
|
104
104
|
else:
|
|
105
105
|
to_insert.append(
|
|
@@ -107,7 +107,6 @@ class DocumentAnnotationQueueInserter(
|
|
|
107
107
|
received_at=p.received_at,
|
|
108
108
|
item=p.item.as_insertable(
|
|
109
109
|
span_rowid=anno.span_rowid,
|
|
110
|
-
id_=anno.id_,
|
|
111
110
|
),
|
|
112
111
|
)
|
|
113
112
|
)
|
|
@@ -140,7 +139,11 @@ class DocumentAnnotationQueueInserter(
|
|
|
140
139
|
def _select_existing(self, *keys: _Key) -> Select[_Existing]:
|
|
141
140
|
anno = self.table
|
|
142
141
|
span = (
|
|
143
|
-
select(
|
|
142
|
+
select(
|
|
143
|
+
models.Span.id,
|
|
144
|
+
models.Span.span_id,
|
|
145
|
+
models.Span.num_documents.label("num_docs"),
|
|
146
|
+
)
|
|
144
147
|
.where(models.Span.span_id.in_({k.span_id for k in keys}))
|
|
145
148
|
.cte()
|
|
146
149
|
)
|
|
@@ -182,7 +185,7 @@ def _key(p: Received[Precursors.DocumentAnnotation]) -> _Key:
|
|
|
182
185
|
|
|
183
186
|
|
|
184
187
|
def _unique_by(p: Received[Insertables.DocumentAnnotation]) -> _UniqueBy:
|
|
185
|
-
return p.item.obj.name, p.item.span_rowid, p.item.document_position, p.item.identifier
|
|
188
|
+
return p.item.obj.name, p.item.span_rowid, p.item.document_position, p.item.obj.identifier
|
|
186
189
|
|
|
187
190
|
|
|
188
191
|
def _time(p: Received[Any]) -> datetime:
|
|
@@ -5,7 +5,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
5
5
|
from typing_extensions import assert_never
|
|
6
6
|
|
|
7
7
|
from phoenix.db import models
|
|
8
|
-
from phoenix.db.helpers import SupportedSQLDialect
|
|
8
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
9
9
|
from phoenix.db.insertion.helpers import insert_on_conflict
|
|
10
10
|
from phoenix.exceptions import PhoenixException
|
|
11
11
|
from phoenix.trace import v1 as pb
|
|
@@ -153,12 +153,11 @@ async def _insert_document_evaluation(
|
|
|
153
153
|
score: Optional[float],
|
|
154
154
|
explanation: Optional[str],
|
|
155
155
|
) -> EvaluationInsertionEvent:
|
|
156
|
-
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
157
156
|
stmt = (
|
|
158
157
|
select(
|
|
159
158
|
models.Trace.project_rowid,
|
|
160
159
|
models.Span.id,
|
|
161
|
-
|
|
160
|
+
models.Span.num_documents,
|
|
162
161
|
)
|
|
163
162
|
.join_from(models.Span, models.Trace)
|
|
164
163
|
.where(models.Span.span_id == span_id)
|
phoenix/db/insertion/helpers.py
CHANGED
|
@@ -3,6 +3,7 @@ from collections.abc import Awaitable, Callable, Iterable, Iterator, Mapping, Se
|
|
|
3
3
|
from enum import Enum, auto
|
|
4
4
|
from typing import Any, Optional
|
|
5
5
|
|
|
6
|
+
from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes
|
|
6
7
|
from sqlalchemy import Insert
|
|
7
8
|
from sqlalchemy.dialects.postgresql import insert as insert_postgresql
|
|
8
9
|
from sqlalchemy.dialects.sqlite import insert as insert_sqlite
|
|
@@ -11,8 +12,9 @@ from sqlalchemy.sql.elements import KeyedColumnElement
|
|
|
11
12
|
from typing_extensions import TypeAlias, assert_never
|
|
12
13
|
|
|
13
14
|
from phoenix.db import models
|
|
14
|
-
from phoenix.db.helpers import SupportedSQLDialect
|
|
15
|
+
from phoenix.db.helpers import SupportedSQLDialect, truncate_name
|
|
15
16
|
from phoenix.db.models import Base
|
|
17
|
+
from phoenix.trace.attributes import get_attribute_value
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
class DataManipulationEvent(ABC):
|
|
@@ -51,7 +53,7 @@ def insert_on_conflict(
|
|
|
51
53
|
unique_records.append(v)
|
|
52
54
|
seen.add(k)
|
|
53
55
|
records = tuple(reversed(unique_records))
|
|
54
|
-
constraint = constraint_name or "_".join(("uq", table.__tablename__, *unique_by))
|
|
56
|
+
constraint = constraint_name or truncate_name("_".join(("uq", table.__tablename__, *unique_by)))
|
|
55
57
|
if dialect is SupportedSQLDialect.POSTGRESQL:
|
|
56
58
|
stmt_postgresql = insert_postgresql(table).values(records)
|
|
57
59
|
if on_conflict is OnConflict.DO_NOTHING:
|
|
@@ -97,3 +99,16 @@ def as_kv(obj: models.Base) -> Iterator[tuple[str, Any]]:
|
|
|
97
99
|
# postgresql disallows None for primary key
|
|
98
100
|
continue
|
|
99
101
|
yield k, v
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def should_calculate_span_cost(
|
|
105
|
+
attributes: Optional[Mapping[str, Any]],
|
|
106
|
+
) -> bool:
|
|
107
|
+
return bool(
|
|
108
|
+
(span_kind := get_attribute_value(attributes, SpanAttributes.OPENINFERENCE_SPAN_KIND))
|
|
109
|
+
and isinstance(span_kind, str)
|
|
110
|
+
and span_kind == OpenInferenceSpanKindValues.LLM.value
|
|
111
|
+
and (llm_name := get_attribute_value(attributes, SpanAttributes.LLM_MODEL_NAME))
|
|
112
|
+
and isinstance(llm_name, str)
|
|
113
|
+
and llm_name.strip()
|
|
114
|
+
)
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, NamedTuple, Optional
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Row, Select, and_, select, tuple_
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
from phoenix.db import models
|
|
10
|
+
from phoenix.db.helpers import dedup
|
|
11
|
+
from phoenix.db.insertion.helpers import as_kv
|
|
12
|
+
from phoenix.db.insertion.types import (
|
|
13
|
+
Insertables,
|
|
14
|
+
Postponed,
|
|
15
|
+
Precursors,
|
|
16
|
+
QueueInserter,
|
|
17
|
+
Received,
|
|
18
|
+
)
|
|
19
|
+
from phoenix.server.dml_event import ProjectSessionAnnotationDmlEvent
|
|
20
|
+
|
|
21
|
+
# Type alias for consistency with other annotation patterns
|
|
22
|
+
SessionAnnotationDmlEvent = ProjectSessionAnnotationDmlEvent
|
|
23
|
+
|
|
24
|
+
_Name: TypeAlias = str
|
|
25
|
+
_SessionId: TypeAlias = str
|
|
26
|
+
_SessionRowId: TypeAlias = int
|
|
27
|
+
_AnnoRowId: TypeAlias = int
|
|
28
|
+
_Identifier: TypeAlias = str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class _Key(NamedTuple):
|
|
32
|
+
annotation_name: _Name
|
|
33
|
+
annotation_identifier: _Identifier
|
|
34
|
+
session_id: _SessionId
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_UniqueBy: TypeAlias = tuple[_Name, _SessionRowId, _Identifier]
|
|
38
|
+
_Existing: TypeAlias = tuple[
|
|
39
|
+
_SessionRowId,
|
|
40
|
+
_SessionId,
|
|
41
|
+
Optional[_AnnoRowId],
|
|
42
|
+
Optional[_Name],
|
|
43
|
+
Optional[datetime],
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SessionAnnotationQueueInserter(
|
|
48
|
+
QueueInserter[
|
|
49
|
+
Precursors.SessionAnnotation,
|
|
50
|
+
Insertables.SessionAnnotation,
|
|
51
|
+
models.ProjectSessionAnnotation,
|
|
52
|
+
SessionAnnotationDmlEvent,
|
|
53
|
+
],
|
|
54
|
+
table=models.ProjectSessionAnnotation,
|
|
55
|
+
unique_by=("name", "project_session_id", "identifier"),
|
|
56
|
+
):
|
|
57
|
+
async def _events(
|
|
58
|
+
self,
|
|
59
|
+
session: AsyncSession,
|
|
60
|
+
*insertions: Insertables.SessionAnnotation,
|
|
61
|
+
) -> list[SessionAnnotationDmlEvent]:
|
|
62
|
+
records = [{**dict(as_kv(ins.row)), "updated_at": ins.row.updated_at} for ins in insertions]
|
|
63
|
+
stmt = self._insert_on_conflict(*records).returning(self.table.id)
|
|
64
|
+
ids = tuple([_ async for _ in await session.stream_scalars(stmt)])
|
|
65
|
+
return [SessionAnnotationDmlEvent(ids)]
|
|
66
|
+
|
|
67
|
+
async def _partition(
|
|
68
|
+
self,
|
|
69
|
+
session: AsyncSession,
|
|
70
|
+
*parcels: Received[Precursors.SessionAnnotation],
|
|
71
|
+
) -> tuple[
|
|
72
|
+
list[Received[Insertables.SessionAnnotation]],
|
|
73
|
+
list[Postponed[Precursors.SessionAnnotation]],
|
|
74
|
+
list[Received[Precursors.SessionAnnotation]],
|
|
75
|
+
]:
|
|
76
|
+
to_insert: list[Received[Insertables.SessionAnnotation]] = []
|
|
77
|
+
to_postpone: list[Postponed[Precursors.SessionAnnotation]] = []
|
|
78
|
+
to_discard: list[Received[Precursors.SessionAnnotation]] = []
|
|
79
|
+
|
|
80
|
+
stmt = self._select_existing(*map(_key, parcels))
|
|
81
|
+
existing: list[Row[_Existing]] = [_ async for _ in await session.stream(stmt)]
|
|
82
|
+
existing_sessions: Mapping[str, _SessionAttr] = {
|
|
83
|
+
e.session_id: _SessionAttr(e.session_rowid) for e in existing
|
|
84
|
+
}
|
|
85
|
+
existing_annos: Mapping[_Key, _AnnoAttr] = {
|
|
86
|
+
_Key(
|
|
87
|
+
annotation_name=e.name,
|
|
88
|
+
annotation_identifier=e.identifier,
|
|
89
|
+
session_id=e.session_id,
|
|
90
|
+
): _AnnoAttr(e.session_rowid, e.id, e.updated_at)
|
|
91
|
+
for e in existing
|
|
92
|
+
if e.id is not None and e.name is not None and e.updated_at is not None
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
for p in parcels:
|
|
96
|
+
if (anno := existing_annos.get(_key(p))) is not None:
|
|
97
|
+
if p.item.updated_at <= anno.updated_at:
|
|
98
|
+
to_discard.append(p)
|
|
99
|
+
else:
|
|
100
|
+
to_insert.append(
|
|
101
|
+
Received(
|
|
102
|
+
received_at=p.received_at,
|
|
103
|
+
item=p.item.as_insertable(
|
|
104
|
+
project_session_rowid=anno.session_rowid,
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
elif (existing_session := existing_sessions.get(p.item.session_id)) is not None:
|
|
109
|
+
to_insert.append(
|
|
110
|
+
Received(
|
|
111
|
+
received_at=p.received_at,
|
|
112
|
+
item=p.item.as_insertable(
|
|
113
|
+
project_session_rowid=existing_session.session_rowid,
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
elif isinstance(p, Postponed):
|
|
118
|
+
if p.retries_left > 1:
|
|
119
|
+
to_postpone.append(p.postpone(p.retries_left - 1))
|
|
120
|
+
else:
|
|
121
|
+
to_discard.append(p)
|
|
122
|
+
elif isinstance(p, Received):
|
|
123
|
+
to_postpone.append(p.postpone(self._retry_allowance))
|
|
124
|
+
else:
|
|
125
|
+
to_discard.append(p)
|
|
126
|
+
|
|
127
|
+
assert len(to_insert) + len(to_postpone) + len(to_discard) == len(parcels)
|
|
128
|
+
to_insert = dedup(sorted(to_insert, key=_time, reverse=True), _unique_by)[::-1]
|
|
129
|
+
return to_insert, to_postpone, to_discard
|
|
130
|
+
|
|
131
|
+
def _select_existing(self, *keys: _Key) -> Select[_Existing]:
|
|
132
|
+
anno = self.table
|
|
133
|
+
session = (
|
|
134
|
+
select(models.ProjectSession.id, models.ProjectSession.session_id)
|
|
135
|
+
.where(models.ProjectSession.session_id.in_({k.session_id for k in keys}))
|
|
136
|
+
.cte()
|
|
137
|
+
)
|
|
138
|
+
onclause = and_(
|
|
139
|
+
session.c.id == anno.project_session_id,
|
|
140
|
+
anno.name.in_({k.annotation_name for k in keys}),
|
|
141
|
+
tuple_(anno.name, anno.identifier, session.c.session_id).in_(keys),
|
|
142
|
+
)
|
|
143
|
+
return select(
|
|
144
|
+
session.c.id.label("session_rowid"),
|
|
145
|
+
session.c.session_id,
|
|
146
|
+
anno.id,
|
|
147
|
+
anno.name,
|
|
148
|
+
anno.identifier,
|
|
149
|
+
anno.updated_at,
|
|
150
|
+
).outerjoin_from(session, anno, onclause)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class _SessionAttr(NamedTuple):
|
|
154
|
+
session_rowid: _SessionRowId
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class _AnnoAttr(NamedTuple):
|
|
158
|
+
session_rowid: _SessionRowId
|
|
159
|
+
id_: _AnnoRowId
|
|
160
|
+
updated_at: datetime
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _key(p: Received[Precursors.SessionAnnotation]) -> _Key:
|
|
164
|
+
return _Key(
|
|
165
|
+
annotation_name=p.item.obj.name,
|
|
166
|
+
annotation_identifier=p.item.obj.identifier,
|
|
167
|
+
session_id=p.item.session_id,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _unique_by(p: Received[Insertables.SessionAnnotation]) -> _UniqueBy:
|
|
172
|
+
return p.item.obj.name, p.item.project_session_rowid, p.item.obj.identifier
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _time(p: Received[Any]) -> datetime:
|
|
176
|
+
return p.received_at
|
phoenix/db/insertion/span.py
CHANGED
|
@@ -14,6 +14,8 @@ from phoenix.trace.schemas import Span, SpanStatusCode
|
|
|
14
14
|
|
|
15
15
|
class SpanInsertionEvent(NamedTuple):
|
|
16
16
|
project_rowid: int
|
|
17
|
+
span_rowid: int
|
|
18
|
+
trace_rowid: int
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class ClearProjectSpansEvent(NamedTuple):
|
|
@@ -26,15 +28,6 @@ async def insert_span(
|
|
|
26
28
|
project_name: str,
|
|
27
29
|
) -> Optional[SpanInsertionEvent]:
|
|
28
30
|
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
29
|
-
if (
|
|
30
|
-
project_rowid := await session.scalar(
|
|
31
|
-
select(models.Project.id).filter_by(name=project_name)
|
|
32
|
-
)
|
|
33
|
-
) is None:
|
|
34
|
-
project_rowid = await session.scalar(
|
|
35
|
-
insert(models.Project).values(name=project_name).returning(models.Project.id)
|
|
36
|
-
)
|
|
37
|
-
assert project_rowid is not None
|
|
38
31
|
|
|
39
32
|
trace_id = span.context.trace_id
|
|
40
33
|
trace: models.Trace = await session.scalar(
|
|
@@ -42,16 +35,27 @@ async def insert_span(
|
|
|
42
35
|
) or models.Trace(trace_id=trace_id)
|
|
43
36
|
|
|
44
37
|
if trace.id is not None:
|
|
38
|
+
# We use the existing project_rowid on the trace because we allow users to transfer traces
|
|
39
|
+
# between projects, so the project_name parameter is ignored for existing traces.
|
|
40
|
+
project_rowid = trace.project_rowid
|
|
45
41
|
# Trace record may need to be updated.
|
|
46
42
|
if trace.end_time < span.end_time:
|
|
47
43
|
trace.end_time = span.end_time
|
|
48
|
-
trace.project_rowid = project_rowid
|
|
49
44
|
if span.start_time < trace.start_time:
|
|
50
45
|
trace.start_time = span.start_time
|
|
51
46
|
else:
|
|
52
47
|
# Trace record needs to be persisted for the first time.
|
|
53
48
|
trace.start_time = span.start_time
|
|
54
49
|
trace.end_time = span.end_time
|
|
50
|
+
if (
|
|
51
|
+
project_rowid := await session.scalar(
|
|
52
|
+
select(models.Project.id).filter_by(name=project_name)
|
|
53
|
+
)
|
|
54
|
+
) is None:
|
|
55
|
+
project_rowid = await session.scalar(
|
|
56
|
+
insert(models.Project).values(name=project_name).returning(models.Project.id)
|
|
57
|
+
)
|
|
58
|
+
assert project_rowid is not None
|
|
55
59
|
trace.project_rowid = project_rowid
|
|
56
60
|
session.add(trace)
|
|
57
61
|
|
|
@@ -190,4 +194,4 @@ async def insert_span(
|
|
|
190
194
|
+ cumulative_llm_token_count_completion,
|
|
191
195
|
)
|
|
192
196
|
)
|
|
193
|
-
return SpanInsertionEvent(project_rowid)
|
|
197
|
+
return SpanInsertionEvent(project_rowid, span_rowid, trace.id)
|
|
@@ -57,7 +57,7 @@ class SpanAnnotationQueueInserter(
|
|
|
57
57
|
session: AsyncSession,
|
|
58
58
|
*insertions: Insertables.SpanAnnotation,
|
|
59
59
|
) -> list[SpanAnnotationDmlEvent]:
|
|
60
|
-
records = [dict(as_kv(ins.row)) for ins in insertions]
|
|
60
|
+
records = [{**dict(as_kv(ins.row)), "updated_at": ins.row.updated_at} for ins in insertions]
|
|
61
61
|
stmt = self._insert_on_conflict(*records).returning(self.table.id)
|
|
62
62
|
ids = tuple([_ async for _ in await session.stream_scalars(stmt)])
|
|
63
63
|
return [SpanAnnotationDmlEvent(ids)]
|
|
@@ -92,7 +92,7 @@ class SpanAnnotationQueueInserter(
|
|
|
92
92
|
|
|
93
93
|
for p in parcels:
|
|
94
94
|
if (anno := existing_annos.get(_key(p))) is not None:
|
|
95
|
-
if p.
|
|
95
|
+
if p.item.updated_at <= anno.updated_at:
|
|
96
96
|
to_discard.append(p)
|
|
97
97
|
else:
|
|
98
98
|
to_insert.append(
|
|
@@ -100,7 +100,6 @@ class SpanAnnotationQueueInserter(
|
|
|
100
100
|
received_at=p.received_at,
|
|
101
101
|
item=p.item.as_insertable(
|
|
102
102
|
span_rowid=anno.span_rowid,
|
|
103
|
-
id_=anno.id_,
|
|
104
103
|
),
|
|
105
104
|
)
|
|
106
105
|
)
|
|
@@ -168,7 +167,7 @@ def _key(p: Received[Precursors.SpanAnnotation]) -> _Key:
|
|
|
168
167
|
|
|
169
168
|
|
|
170
169
|
def _unique_by(p: Received[Insertables.SpanAnnotation]) -> _UniqueBy:
|
|
171
|
-
return p.item.obj.name, p.item.span_rowid, p.item.identifier
|
|
170
|
+
return p.item.obj.name, p.item.span_rowid, p.item.obj.identifier
|
|
172
171
|
|
|
173
172
|
|
|
174
173
|
def _time(p: Received[Any]) -> datetime:
|