arize-phoenix 3.16.1__py3-none-any.whl → 7.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.1.dist-info/METADATA +261 -0
- arize_phoenix-7.7.1.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.1.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -241
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +4 -112
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.1.dist-info/METADATA +0 -495
- arize_phoenix-3.16.1.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -619
- phoenix/core/traces.py +0 -96
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
phoenix/trace/dsl/query.py
CHANGED
|
@@ -1,30 +1,37 @@
|
|
|
1
|
-
import
|
|
1
|
+
import warnings
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from
|
|
4
|
-
from
|
|
3
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
4
|
+
from dataclasses import dataclass, field, replace
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from functools import cached_property
|
|
7
|
+
from itertools import chain
|
|
8
|
+
from random import randint, random
|
|
5
9
|
from types import MappingProxyType
|
|
6
|
-
from typing import
|
|
7
|
-
Any,
|
|
8
|
-
Callable,
|
|
9
|
-
ClassVar,
|
|
10
|
-
Dict,
|
|
11
|
-
Iterable,
|
|
12
|
-
Iterator,
|
|
13
|
-
List,
|
|
14
|
-
Mapping,
|
|
15
|
-
Optional,
|
|
16
|
-
Sequence,
|
|
17
|
-
Tuple,
|
|
18
|
-
cast,
|
|
19
|
-
)
|
|
10
|
+
from typing import Any, Optional, cast
|
|
20
11
|
|
|
21
12
|
import pandas as pd
|
|
22
13
|
from openinference.semconv.trace import SpanAttributes
|
|
23
|
-
|
|
14
|
+
from sqlalchemy import JSON, Column, Label, Select, SQLColumnExpression, and_, func, select
|
|
15
|
+
from sqlalchemy.dialects.postgresql import aggregate_order_by
|
|
16
|
+
from sqlalchemy.orm import Session, aliased
|
|
17
|
+
from typing_extensions import assert_never
|
|
18
|
+
|
|
19
|
+
from phoenix.config import DEFAULT_PROJECT_NAME
|
|
20
|
+
from phoenix.db import models
|
|
21
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
22
|
+
from phoenix.trace.attributes import (
|
|
23
|
+
JSON_STRING_ATTRIBUTES,
|
|
24
|
+
SEMANTIC_CONVENTIONS,
|
|
25
|
+
flatten,
|
|
26
|
+
get_attribute_value,
|
|
27
|
+
load_json_strings,
|
|
28
|
+
unflatten,
|
|
29
|
+
)
|
|
24
30
|
from phoenix.trace.dsl import SpanFilter
|
|
25
|
-
from phoenix.trace.dsl.filter import
|
|
26
|
-
from phoenix.trace.schemas import ATTRIBUTE_PREFIX
|
|
27
|
-
|
|
31
|
+
from phoenix.trace.dsl.filter import Projector
|
|
32
|
+
from phoenix.trace.schemas import ATTRIBUTE_PREFIX
|
|
33
|
+
|
|
34
|
+
DEFAULT_SPAN_LIMIT = 1000
|
|
28
35
|
|
|
29
36
|
RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
|
|
30
37
|
|
|
@@ -38,129 +45,214 @@ _ALIASES = {
|
|
|
38
45
|
"trace_id": "context.trace_id",
|
|
39
46
|
}
|
|
40
47
|
|
|
41
|
-
# Because span_kind is an enum, it needs to be converted to string,
|
|
42
|
-
# so it's serializable by pyarrow.
|
|
43
|
-
_CONVERT_TO_STRING = ("span_kind",)
|
|
44
|
-
|
|
45
48
|
|
|
46
49
|
def _unalias(key: str) -> str:
|
|
47
50
|
return _ALIASES.get(key, key)
|
|
48
51
|
|
|
49
52
|
|
|
50
53
|
@dataclass(frozen=True)
|
|
51
|
-
class
|
|
52
|
-
|
|
53
|
-
value: Callable[[Span], Any] = field(init=False, repr=False)
|
|
54
|
-
span_fields: ClassVar[Tuple[str, ...]] = tuple(f.name for f in fields(Span))
|
|
55
|
-
|
|
56
|
-
def __bool__(self) -> bool:
|
|
57
|
-
return bool(self.key)
|
|
54
|
+
class _Base:
|
|
55
|
+
"""The sole purpose of this class is for `super().__post_init__()` to work"""
|
|
58
56
|
|
|
59
57
|
def __post_init__(self) -> None:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if key.startswith(CONTEXT_PREFIX):
|
|
63
|
-
key = key[len(CONTEXT_PREFIX) :]
|
|
64
|
-
value = partial(self._from_context, key=key)
|
|
65
|
-
elif key.startswith(ATTRIBUTE_PREFIX):
|
|
66
|
-
key = self.key[len(ATTRIBUTE_PREFIX) :]
|
|
67
|
-
value = partial(self._from_attributes, key=key)
|
|
68
|
-
elif key in self.span_fields:
|
|
69
|
-
value = partial(self._from_span, key=key)
|
|
70
|
-
else:
|
|
71
|
-
value = partial(self._from_attributes, key=key)
|
|
72
|
-
if self.key in _CONVERT_TO_STRING:
|
|
73
|
-
object.__setattr__(
|
|
74
|
-
self,
|
|
75
|
-
"value",
|
|
76
|
-
lambda span: None if (v := value(span)) is None else str(v),
|
|
77
|
-
)
|
|
78
|
-
else:
|
|
79
|
-
object.__setattr__(self, "value", value)
|
|
58
|
+
pass
|
|
59
|
+
|
|
80
60
|
|
|
81
|
-
|
|
82
|
-
|
|
61
|
+
@dataclass(frozen=True)
|
|
62
|
+
class Projection(_Base):
|
|
63
|
+
key: str = ""
|
|
64
|
+
_projector: Projector = field(init=False, repr=False)
|
|
83
65
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
66
|
+
def __post_init__(self) -> None:
|
|
67
|
+
super().__post_init__()
|
|
68
|
+
object.__setattr__(self, "key", _unalias(self.key))
|
|
69
|
+
object.__setattr__(self, "_projector", Projector(self.key))
|
|
87
70
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
return getattr(span.context, key, None)
|
|
71
|
+
def __bool__(self) -> bool:
|
|
72
|
+
return bool(self.key)
|
|
91
73
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
return getattr(span, key, None)
|
|
74
|
+
def __call__(self) -> SQLColumnExpression[Any]:
|
|
75
|
+
return self._projector()
|
|
95
76
|
|
|
96
|
-
def to_dict(self) ->
|
|
77
|
+
def to_dict(self) -> dict[str, Any]:
|
|
97
78
|
return {"key": self.key}
|
|
98
79
|
|
|
99
80
|
@classmethod
|
|
100
81
|
def from_dict(cls, obj: Mapping[str, Any]) -> "Projection":
|
|
101
|
-
return cls(
|
|
102
|
-
|
|
103
|
-
|
|
82
|
+
return cls(**({"key": cast(str, key)} if (key := obj.get("key")) else {}))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True)
|
|
86
|
+
class _HasTmpSuffix(_Base):
|
|
87
|
+
_tmp_suffix: str = field(init=False, repr=False)
|
|
88
|
+
"""Ideally every column label should get a temporary random suffix that will
|
|
89
|
+
be removed at the end. This is necessary during query construction because
|
|
90
|
+
sqlalchemy is not always foolproof, e.g. we have seen `group_by` clauses that
|
|
91
|
+
were incorrect or ambiguous. We should actively avoid name collisions, which
|
|
92
|
+
is increasingly likely as queries get more complex.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __post_init__(self) -> None:
|
|
96
|
+
super().__post_init__()
|
|
97
|
+
object.__setattr__(self, "_tmp_suffix", f"{randint(0, 10**6):06d}")
|
|
98
|
+
|
|
99
|
+
def _remove_tmp_suffix(self, name: str) -> str:
|
|
100
|
+
if name.endswith(self._tmp_suffix):
|
|
101
|
+
return name[: -len(self._tmp_suffix)]
|
|
102
|
+
return name
|
|
103
|
+
|
|
104
|
+
def _add_tmp_suffix(self, name: str) -> str:
|
|
105
|
+
if name.endswith(self._tmp_suffix):
|
|
106
|
+
return name
|
|
107
|
+
return name + self._tmp_suffix
|
|
104
108
|
|
|
105
109
|
|
|
106
110
|
@dataclass(frozen=True)
|
|
107
|
-
class Explosion(Projection):
|
|
111
|
+
class Explosion(_HasTmpSuffix, Projection):
|
|
108
112
|
kwargs: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
|
|
109
113
|
primary_index_key: str = "context.span_id"
|
|
110
114
|
|
|
111
|
-
|
|
112
|
-
|
|
115
|
+
_position_prefix: str = field(init=False, repr=False)
|
|
116
|
+
_primary_index: Projection = field(init=False, repr=False)
|
|
117
|
+
_array_tmp_col_label: str = field(init=False, repr=False)
|
|
118
|
+
"""For sqlite we need to store the array in a temporary column to be able
|
|
119
|
+
to explode it later in pandas. `_array_tmp_col_label` is the name of this
|
|
120
|
+
temporary column. The temporary column will have a unique name
|
|
121
|
+
per instance.
|
|
122
|
+
"""
|
|
113
123
|
|
|
114
124
|
def __post_init__(self) -> None:
|
|
115
125
|
super().__post_init__()
|
|
116
126
|
position_prefix = _PRESCRIBED_POSITION_PREFIXES.get(self.key, "")
|
|
117
|
-
object.__setattr__(self, "
|
|
118
|
-
object.__setattr__(self, "
|
|
127
|
+
object.__setattr__(self, "_position_prefix", position_prefix)
|
|
128
|
+
object.__setattr__(self, "_primary_index", Projection(self.primary_index_key))
|
|
129
|
+
object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
|
|
119
130
|
|
|
120
131
|
@cached_property
|
|
121
|
-
def index_keys(self) ->
|
|
122
|
-
return
|
|
123
|
-
|
|
124
|
-
def with_primary_index_key(self,
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
return
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
132
|
+
def index_keys(self) -> list[str]:
|
|
133
|
+
return [self._primary_index.key, f"{self._position_prefix}position"]
|
|
134
|
+
|
|
135
|
+
def with_primary_index_key(self, _: str) -> "Explosion":
|
|
136
|
+
print("`.with_primary_index_key(...)` is deprecated and will be removed in the future.")
|
|
137
|
+
return self
|
|
138
|
+
|
|
139
|
+
def update_sql(
|
|
140
|
+
self,
|
|
141
|
+
stmt: Select[Any],
|
|
142
|
+
dialect: SupportedSQLDialect,
|
|
143
|
+
) -> Select[Any]:
|
|
144
|
+
array = self()
|
|
145
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
146
|
+
# Because sqlite doesn't support `WITH ORDINALITY`, the order of
|
|
147
|
+
# the returned (table) values is not guaranteed. So we resort to
|
|
148
|
+
# post hoc processing using pandas.
|
|
149
|
+
stmt = stmt.where(
|
|
150
|
+
func.json_type(array) == "array",
|
|
151
|
+
).add_columns(
|
|
152
|
+
array.label(self._array_tmp_col_label),
|
|
153
|
+
)
|
|
154
|
+
return stmt
|
|
155
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
156
|
+
element = (
|
|
157
|
+
func.jsonb_array_elements(array)
|
|
158
|
+
.table_valued(
|
|
159
|
+
Column("obj", JSON),
|
|
160
|
+
with_ordinality="position",
|
|
161
|
+
joins_implicitly=True,
|
|
162
|
+
)
|
|
163
|
+
.render_derived()
|
|
151
164
|
)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
165
|
+
obj, position = element.c.obj, element.c.position
|
|
166
|
+
# Use zero-based indexing for backward-compatibility.
|
|
167
|
+
position_label = (position - 1).label(f"{self._position_prefix}position")
|
|
168
|
+
if self.kwargs:
|
|
169
|
+
columns: Iterable[Label[Any]] = (
|
|
170
|
+
obj[key.split(".")].label(self._add_tmp_suffix(name))
|
|
171
|
+
for name, key in self.kwargs.items()
|
|
172
|
+
)
|
|
155
173
|
else:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
174
|
+
columns = (obj.label(self._array_tmp_col_label),)
|
|
175
|
+
stmt = (
|
|
176
|
+
stmt.where(func.jsonb_typeof(array) == "array")
|
|
177
|
+
.where(func.jsonb_typeof(obj) == "object")
|
|
178
|
+
.add_columns(position_label, *columns)
|
|
179
|
+
)
|
|
180
|
+
return stmt
|
|
181
|
+
else:
|
|
182
|
+
assert_never(dialect)
|
|
183
|
+
|
|
184
|
+
def update_df(
|
|
185
|
+
self,
|
|
186
|
+
df: pd.DataFrame,
|
|
187
|
+
dialect: SupportedSQLDialect,
|
|
188
|
+
) -> pd.DataFrame:
|
|
189
|
+
df = df.rename(self._remove_tmp_suffix, axis=1)
|
|
190
|
+
if df.empty:
|
|
191
|
+
columns = list(
|
|
192
|
+
set(
|
|
193
|
+
chain(
|
|
194
|
+
self.index_keys,
|
|
195
|
+
df.drop(self._array_tmp_col_label, axis=1, errors="ignore").columns,
|
|
196
|
+
self.kwargs.keys(),
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
df = pd.DataFrame(columns=columns).set_index(self.index_keys)
|
|
201
|
+
return df
|
|
202
|
+
if dialect != SupportedSQLDialect.SQLITE and self.kwargs:
|
|
203
|
+
df = df.set_index(self.index_keys)
|
|
204
|
+
return df
|
|
205
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
206
|
+
# Because sqlite doesn't support `WITH ORDINALITY`, the order of
|
|
207
|
+
# the returned (table) values is not guaranteed. So we resort to
|
|
208
|
+
# post hoc processing using pandas.
|
|
209
|
+
def _extract_values(array: list[Any]) -> list[dict[str, Any]]:
|
|
210
|
+
if not isinstance(array, Iterable):
|
|
211
|
+
return []
|
|
212
|
+
if not self.kwargs:
|
|
213
|
+
return [
|
|
214
|
+
{
|
|
215
|
+
**dict(flatten(obj)),
|
|
216
|
+
f"{self._position_prefix}position": i,
|
|
217
|
+
}
|
|
218
|
+
for i, obj in enumerate(array)
|
|
219
|
+
if isinstance(obj, Mapping)
|
|
220
|
+
]
|
|
221
|
+
res: list[dict[str, Any]] = []
|
|
222
|
+
for i, obj in enumerate(array):
|
|
223
|
+
if not isinstance(obj, Mapping):
|
|
224
|
+
continue
|
|
225
|
+
values: dict[str, Any] = {f"{self._position_prefix}position": i}
|
|
226
|
+
for name, key in self.kwargs.items():
|
|
227
|
+
if (value := get_attribute_value(obj, key)) is not None:
|
|
228
|
+
values[name] = value
|
|
229
|
+
res.append(values)
|
|
230
|
+
return res
|
|
231
|
+
|
|
232
|
+
records = df.loc[:, self._array_tmp_col_label].dropna().map(_extract_values).explode()
|
|
233
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
234
|
+
records = df.loc[:, self._array_tmp_col_label].dropna().map(flatten).map(dict)
|
|
235
|
+
else:
|
|
236
|
+
assert_never(dialect)
|
|
237
|
+
df = df.drop(self._array_tmp_col_label, axis=1)
|
|
238
|
+
if records.empty:
|
|
239
|
+
df = df.set_index(self.index_keys[0])
|
|
240
|
+
return df
|
|
241
|
+
not_na = records.notna()
|
|
242
|
+
df_explode = pd.DataFrame.from_records(
|
|
243
|
+
records.loc[not_na].to_list(),
|
|
244
|
+
index=records.index[not_na],
|
|
245
|
+
)
|
|
246
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
247
|
+
df = _outer_join(df, df_explode)
|
|
248
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
249
|
+
df = pd.concat([df, df_explode], axis=1)
|
|
250
|
+
else:
|
|
251
|
+
assert_never(dialect)
|
|
252
|
+
df = df.set_index(self.index_keys)
|
|
253
|
+
return df
|
|
254
|
+
|
|
255
|
+
def to_dict(self) -> dict[str, Any]:
|
|
164
256
|
return {
|
|
165
257
|
**super().to_dict(),
|
|
166
258
|
**({"kwargs": dict(self.kwargs)} if self.kwargs else {}),
|
|
@@ -185,29 +277,128 @@ class Explosion(Projection):
|
|
|
185
277
|
|
|
186
278
|
|
|
187
279
|
@dataclass(frozen=True)
|
|
188
|
-
class Concatenation(Projection):
|
|
280
|
+
class Concatenation(_HasTmpSuffix, Projection):
|
|
189
281
|
kwargs: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
|
|
190
282
|
separator: str = "\n\n"
|
|
191
283
|
|
|
284
|
+
_array_tmp_col_label: str = field(init=False, repr=False)
|
|
285
|
+
"""For SQLite we need to store the array in a temporary column to be able
|
|
286
|
+
to concatenate it later in pandas. `_array_tmp_col_label` is the name of
|
|
287
|
+
this temporary column. The temporary column will have a unique name
|
|
288
|
+
per instance.
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
def __post_init__(self) -> None:
|
|
292
|
+
super().__post_init__()
|
|
293
|
+
object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
|
|
294
|
+
|
|
192
295
|
def with_separator(self, separator: str = "\n\n") -> "Concatenation":
|
|
193
296
|
return replace(self, separator=separator)
|
|
194
297
|
|
|
195
|
-
def
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
298
|
+
def update_sql(
|
|
299
|
+
self,
|
|
300
|
+
stmt: Select[Any],
|
|
301
|
+
dialect: SupportedSQLDialect,
|
|
302
|
+
) -> Select[Any]:
|
|
303
|
+
array = self()
|
|
304
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
305
|
+
# Because SQLite doesn't support `WITH ORDINALITY`, the order of
|
|
306
|
+
# the returned table-values is not guaranteed. So we resort to
|
|
307
|
+
# post hoc processing using pandas.
|
|
308
|
+
stmt = stmt.where(
|
|
309
|
+
func.json_type(array) == "array",
|
|
310
|
+
).add_columns(
|
|
311
|
+
array.label(self._array_tmp_col_label),
|
|
312
|
+
)
|
|
313
|
+
return stmt
|
|
314
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
315
|
+
element = (
|
|
316
|
+
(
|
|
317
|
+
func.jsonb_array_elements(array)
|
|
318
|
+
if self.kwargs
|
|
319
|
+
else func.jsonb_array_elements_text(array)
|
|
320
|
+
)
|
|
321
|
+
.table_valued(
|
|
322
|
+
Column("obj", JSON),
|
|
323
|
+
with_ordinality="position",
|
|
324
|
+
joins_implicitly=True,
|
|
325
|
+
)
|
|
326
|
+
.render_derived()
|
|
327
|
+
)
|
|
328
|
+
obj, position = element.c.obj, element.c.position
|
|
329
|
+
if self.kwargs:
|
|
330
|
+
columns: Iterable[Label[Any]] = (
|
|
331
|
+
func.string_agg(
|
|
332
|
+
obj[key.split(".")].as_string(),
|
|
333
|
+
aggregate_order_by(self.separator, position), # type: ignore
|
|
334
|
+
).label(self._add_tmp_suffix(label))
|
|
335
|
+
for label, key in self.kwargs.items()
|
|
336
|
+
)
|
|
337
|
+
else:
|
|
338
|
+
columns = (
|
|
339
|
+
func.string_agg(
|
|
340
|
+
obj,
|
|
341
|
+
aggregate_order_by(self.separator, position), # type: ignore
|
|
342
|
+
).label(self.key),
|
|
343
|
+
)
|
|
344
|
+
stmt = (
|
|
345
|
+
stmt.where(
|
|
346
|
+
and_(
|
|
347
|
+
func.jsonb_typeof(array) == "array",
|
|
348
|
+
*((func.jsonb_typeof(obj) == "object",) if self.kwargs else ()),
|
|
349
|
+
)
|
|
350
|
+
)
|
|
351
|
+
.add_columns(*columns)
|
|
352
|
+
.group_by(*stmt.columns.keys())
|
|
353
|
+
)
|
|
354
|
+
return stmt
|
|
355
|
+
else:
|
|
356
|
+
assert_never(dialect)
|
|
357
|
+
|
|
358
|
+
def update_df(
|
|
359
|
+
self,
|
|
360
|
+
df: pd.DataFrame,
|
|
361
|
+
dialect: SupportedSQLDialect,
|
|
362
|
+
) -> pd.DataFrame:
|
|
363
|
+
df = df.rename(self._remove_tmp_suffix, axis=1)
|
|
364
|
+
if df.empty:
|
|
365
|
+
columns = list(
|
|
366
|
+
set(
|
|
367
|
+
chain(
|
|
368
|
+
df.drop(self._array_tmp_col_label, axis=1, errors="ignore").columns,
|
|
369
|
+
self.kwargs.keys(),
|
|
370
|
+
)
|
|
371
|
+
)
|
|
372
|
+
)
|
|
373
|
+
return pd.DataFrame(columns=columns, index=df.index)
|
|
374
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
375
|
+
# Because SQLite doesn't support `WITH ORDINALITY`, the order of
|
|
376
|
+
# the returned table-values is not guaranteed. So we resort to
|
|
377
|
+
# post hoc processing using pandas.
|
|
378
|
+
def _concat_values(array: list[Any]) -> dict[str, Any]:
|
|
379
|
+
if not isinstance(array, Iterable):
|
|
380
|
+
return {}
|
|
381
|
+
if not self.kwargs:
|
|
382
|
+
return {self.key: self.separator.join(str(obj) for obj in array)}
|
|
383
|
+
values: defaultdict[str, list[str]] = defaultdict(list)
|
|
384
|
+
for i, obj in enumerate(array):
|
|
385
|
+
if not isinstance(obj, Mapping):
|
|
386
|
+
continue
|
|
387
|
+
for label, key in self.kwargs.items():
|
|
388
|
+
if (value := get_attribute_value(obj, key)) is not None:
|
|
389
|
+
values[label].append(str(value))
|
|
390
|
+
return {label: self.separator.join(vs) for label, vs in values.items()}
|
|
391
|
+
|
|
392
|
+
records = df.loc[:, self._array_tmp_col_label].map(_concat_values)
|
|
393
|
+
df_concat = pd.DataFrame.from_records(records.to_list(), index=records.index)
|
|
394
|
+
return df.drop(self._array_tmp_col_label, axis=1).join(df_concat, how="outer")
|
|
395
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
396
|
+
pass
|
|
397
|
+
else:
|
|
398
|
+
assert_never(dialect)
|
|
399
|
+
return df
|
|
400
|
+
|
|
401
|
+
def to_dict(self) -> dict[str, Any]:
|
|
211
402
|
return {
|
|
212
403
|
**super().to_dict(),
|
|
213
404
|
**({"kwargs": dict(self.kwargs)} if self.kwargs else {}),
|
|
@@ -232,13 +423,24 @@ class Concatenation(Projection):
|
|
|
232
423
|
|
|
233
424
|
|
|
234
425
|
@dataclass(frozen=True)
|
|
235
|
-
class SpanQuery:
|
|
426
|
+
class SpanQuery(_HasTmpSuffix):
|
|
236
427
|
_select: Mapping[str, Projection] = field(default_factory=lambda: MappingProxyType({}))
|
|
237
|
-
_concat: Concatenation = field(
|
|
238
|
-
_explode: Explosion = field(
|
|
239
|
-
_filter: SpanFilter = field(
|
|
428
|
+
_concat: Optional[Concatenation] = field(default=None)
|
|
429
|
+
_explode: Optional[Explosion] = field(default=None)
|
|
430
|
+
_filter: Optional[SpanFilter] = field(default=None)
|
|
240
431
|
_rename: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
|
|
241
432
|
_index: Projection = field(default_factory=lambda: Projection("context.span_id"))
|
|
433
|
+
_concat_separator: str = field(default="\n\n", repr=False)
|
|
434
|
+
_pk_tmp_col_label: str = field(init=False, repr=False)
|
|
435
|
+
"""We use `_pk_tmp_col_label` as a temporary column for storing
|
|
436
|
+
the row id, i.e. the primary key, of the spans table. This will help
|
|
437
|
+
us with joins without the risk of naming conflicts. The temporary
|
|
438
|
+
column will have a unique name per instance.
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
def __post_init__(self) -> None:
|
|
442
|
+
super().__post_init__()
|
|
443
|
+
object.__setattr__(self, "_pk_tmp_col_label", f"__pk_tmp_col_{random()}")
|
|
242
444
|
|
|
243
445
|
def __bool__(self) -> bool:
|
|
244
446
|
return bool(self._select) or bool(self._filter) or bool(self._explode) or bool(self._concat)
|
|
@@ -254,11 +456,21 @@ class SpanQuery:
|
|
|
254
456
|
return replace(self, _filter=_filter)
|
|
255
457
|
|
|
256
458
|
def explode(self, key: str, **kwargs: str) -> "SpanQuery":
|
|
459
|
+
assert (
|
|
460
|
+
isinstance(key, str) and key
|
|
461
|
+
), "The field name for explosion must be a non-empty string."
|
|
257
462
|
_explode = Explosion(key=key, kwargs=kwargs, primary_index_key=self._index.key)
|
|
258
463
|
return replace(self, _explode=_explode)
|
|
259
464
|
|
|
260
465
|
def concat(self, key: str, **kwargs: str) -> "SpanQuery":
|
|
261
|
-
|
|
466
|
+
assert (
|
|
467
|
+
isinstance(key, str) and key
|
|
468
|
+
), "The field name for concatenation must be a non-empty string."
|
|
469
|
+
_concat = (
|
|
470
|
+
Concatenation(key=key, kwargs=kwargs, separator=self._concat.separator)
|
|
471
|
+
if self._concat
|
|
472
|
+
else Concatenation(key=key, kwargs=kwargs, separator=self._concat_separator)
|
|
473
|
+
)
|
|
262
474
|
return replace(self, _concat=_concat)
|
|
263
475
|
|
|
264
476
|
def rename(self, **kwargs: str) -> "SpanQuery":
|
|
@@ -267,86 +479,147 @@ class SpanQuery:
|
|
|
267
479
|
|
|
268
480
|
def with_index(self, key: str = "context.span_id") -> "SpanQuery":
|
|
269
481
|
_index = Projection(key=key)
|
|
270
|
-
return
|
|
482
|
+
return (
|
|
483
|
+
replace(self, _index=_index, _explode=replace(self._explode, primary_index_key=key))
|
|
484
|
+
if self._explode
|
|
485
|
+
else replace(self, _index=_index)
|
|
486
|
+
)
|
|
271
487
|
|
|
272
488
|
def with_concat_separator(self, separator: str = "\n\n") -> "SpanQuery":
|
|
489
|
+
if not self._concat:
|
|
490
|
+
return replace(self, _concat_separator=separator)
|
|
273
491
|
_concat = self._concat.with_separator(separator)
|
|
274
492
|
return replace(self, _concat=_concat)
|
|
275
493
|
|
|
276
|
-
def with_explode_primary_index_key(self,
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
494
|
+
def with_explode_primary_index_key(self, _: str) -> "SpanQuery":
|
|
495
|
+
print(
|
|
496
|
+
"`.with_explode_primary_index_key(...)` is deprecated and will be "
|
|
497
|
+
"removed in the future. Use `.with_index(...)` instead."
|
|
498
|
+
)
|
|
499
|
+
return self
|
|
500
|
+
|
|
501
|
+
def __call__(
|
|
502
|
+
self,
|
|
503
|
+
session: Session,
|
|
504
|
+
project_name: Optional[str] = None,
|
|
505
|
+
start_time: Optional[datetime] = None,
|
|
506
|
+
end_time: Optional[datetime] = None,
|
|
507
|
+
limit: Optional[int] = DEFAULT_SPAN_LIMIT,
|
|
508
|
+
root_spans_only: Optional[bool] = None,
|
|
509
|
+
# Deprecated
|
|
510
|
+
stop_time: Optional[datetime] = None,
|
|
511
|
+
) -> pd.DataFrame:
|
|
512
|
+
if not project_name:
|
|
513
|
+
project_name = DEFAULT_PROJECT_NAME
|
|
514
|
+
if stop_time:
|
|
515
|
+
# Deprecated. Raise a warning
|
|
516
|
+
warnings.warn(
|
|
517
|
+
"stop_time is deprecated. Use end_time instead.",
|
|
518
|
+
DeprecationWarning,
|
|
292
519
|
)
|
|
520
|
+
end_time = end_time or stop_time
|
|
293
521
|
if not (self._select or self._explode or self._concat):
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
522
|
+
return _get_spans_dataframe(
|
|
523
|
+
session,
|
|
524
|
+
project_name,
|
|
525
|
+
span_filter=self._filter,
|
|
526
|
+
start_time=start_time,
|
|
527
|
+
end_time=end_time,
|
|
528
|
+
limit=limit,
|
|
529
|
+
root_spans_only=root_spans_only,
|
|
300
530
|
)
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
531
|
+
assert session.bind is not None
|
|
532
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
533
|
+
row_id = models.Span.id.label(self._pk_tmp_col_label)
|
|
534
|
+
stmt: Select[Any] = (
|
|
535
|
+
# We do not allow `group_by` anything other than `row_id` because otherwise
|
|
536
|
+
# it's too complex for the post hoc processing step in pandas.
|
|
537
|
+
select(row_id)
|
|
538
|
+
.join(models.Trace)
|
|
539
|
+
.join(models.Project)
|
|
540
|
+
.where(models.Project.name == project_name)
|
|
541
|
+
)
|
|
542
|
+
if start_time:
|
|
543
|
+
stmt = stmt.where(start_time <= models.Span.start_time)
|
|
544
|
+
if end_time:
|
|
545
|
+
stmt = stmt.where(models.Span.start_time < end_time)
|
|
546
|
+
if limit is not None:
|
|
547
|
+
stmt = stmt.limit(limit)
|
|
548
|
+
if root_spans_only:
|
|
549
|
+
parent = aliased(models.Span)
|
|
550
|
+
stmt = stmt.outerjoin(
|
|
551
|
+
parent,
|
|
552
|
+
models.Span.parent_id == parent.span_id,
|
|
553
|
+
).where(parent.span_id == None) # noqa E711
|
|
554
|
+
stmt0_orig: Select[Any] = stmt
|
|
555
|
+
stmt1_filter: Optional[Select[Any]] = None
|
|
556
|
+
if self._filter:
|
|
557
|
+
stmt = stmt1_filter = self._filter(stmt)
|
|
558
|
+
stmt2_select: Optional[Select[Any]] = None
|
|
559
|
+
if self._select:
|
|
560
|
+
columns: Iterable[Label[Any]] = (
|
|
561
|
+
proj().label(self._add_tmp_suffix(label)) for label, proj in self._select.items()
|
|
562
|
+
)
|
|
563
|
+
stmt = stmt2_select = stmt.add_columns(*columns)
|
|
564
|
+
stmt3_explode: Optional[Select[Any]] = None
|
|
329
565
|
if self._explode:
|
|
330
|
-
|
|
331
|
-
|
|
566
|
+
stmt = stmt3_explode = self._explode.update_sql(stmt, dialect)
|
|
567
|
+
index: Label[Any] = self._index().label(self._add_tmp_suffix(self._index.key))
|
|
568
|
+
df: Optional[pd.DataFrame] = None
|
|
569
|
+
# `concat` is done separately because it has `group_by` but we can't
|
|
570
|
+
# always join to it as a subquery because it may require post hoc
|
|
571
|
+
# processing in pandas. It's kept separate for simplicity.
|
|
572
|
+
df_concat: Optional[pd.DataFrame] = None
|
|
573
|
+
conn = session.connection()
|
|
574
|
+
if self._explode or not self._concat:
|
|
575
|
+
if index.name not in stmt.selected_columns.keys():
|
|
576
|
+
stmt = stmt.add_columns(index)
|
|
577
|
+
df = pd.read_sql_query(stmt, conn, self._pk_tmp_col_label)
|
|
578
|
+
if self._concat:
|
|
579
|
+
if df is not None:
|
|
580
|
+
assert stmt3_explode is not None
|
|
581
|
+
# We can't include stmt3_explode because it may be trying to
|
|
582
|
+
# explode the same column that we're trying to concatenate,
|
|
583
|
+
# resulting in duplicated joins.
|
|
584
|
+
stmt_no_explode = (
|
|
585
|
+
stmt2_select
|
|
586
|
+
if stmt2_select is not None
|
|
587
|
+
else (stmt1_filter if stmt1_filter is not None else stmt0_orig)
|
|
588
|
+
)
|
|
589
|
+
stmt4_concat = stmt_no_explode.with_only_columns(row_id)
|
|
332
590
|
else:
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
if not
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
591
|
+
assert stmt3_explode is None
|
|
592
|
+
stmt4_concat = stmt
|
|
593
|
+
if (df is None or df.empty) and index.name not in stmt4_concat.selected_columns.keys():
|
|
594
|
+
stmt4_concat = stmt4_concat.add_columns(index)
|
|
595
|
+
stmt4_concat = self._concat.update_sql(stmt4_concat, dialect)
|
|
596
|
+
df_concat = pd.read_sql_query(stmt4_concat, conn, self._pk_tmp_col_label)
|
|
597
|
+
df_concat = self._concat.update_df(df_concat, dialect)
|
|
598
|
+
assert df is not None or df_concat is not None
|
|
599
|
+
if df is None:
|
|
600
|
+
df = df_concat
|
|
601
|
+
elif df_concat is not None:
|
|
602
|
+
df = _outer_join(df, df_concat)
|
|
603
|
+
assert df is not None and self._pk_tmp_col_label not in df.columns
|
|
604
|
+
df = df.rename(self._remove_tmp_suffix, axis=1)
|
|
605
|
+
if self._explode:
|
|
606
|
+
df = self._explode.update_df(df, dialect)
|
|
607
|
+
else:
|
|
608
|
+
df = df.set_index(self._index.key)
|
|
609
|
+
df = df.rename(_ALIASES, axis=1, errors="ignore")
|
|
610
|
+
df = df.rename(self._rename, axis=1, errors="ignore")
|
|
611
|
+
return df
|
|
612
|
+
|
|
613
|
+
def to_dict(self) -> dict[str, Any]:
|
|
341
614
|
return {
|
|
342
615
|
**(
|
|
343
616
|
{"select": {name: proj.to_dict() for name, proj in self._select.items()}}
|
|
344
617
|
if self._select
|
|
345
618
|
else {}
|
|
346
619
|
),
|
|
347
|
-
"filter": self._filter.to_dict(),
|
|
348
|
-
"explode": self._explode.to_dict(),
|
|
349
|
-
"concat": self._concat.to_dict(),
|
|
620
|
+
**({"filter": self._filter.to_dict()} if self._filter else {}),
|
|
621
|
+
**({"explode": self._explode.to_dict()} if self._explode else {}),
|
|
622
|
+
**({"concat": self._concat.to_dict()} if self._concat else {}),
|
|
350
623
|
**({"rename": dict(self._rename)} if self._rename else {}),
|
|
351
624
|
"index": self._index.to_dict(),
|
|
352
625
|
}
|
|
@@ -355,7 +628,6 @@ class SpanQuery:
|
|
|
355
628
|
def from_dict(
|
|
356
629
|
cls,
|
|
357
630
|
obj: Mapping[str, Any],
|
|
358
|
-
evals: Optional[SupportsGetSpanEvaluation] = None,
|
|
359
631
|
valid_eval_names: Optional[Sequence[str]] = None,
|
|
360
632
|
) -> "SpanQuery":
|
|
361
633
|
return cls(
|
|
@@ -375,7 +647,6 @@ class SpanQuery:
|
|
|
375
647
|
{
|
|
376
648
|
"_filter": SpanFilter.from_dict(
|
|
377
649
|
cast(Mapping[str, Any], filter),
|
|
378
|
-
evals=evals,
|
|
379
650
|
valid_eval_names=valid_eval_names,
|
|
380
651
|
)
|
|
381
652
|
} # type: ignore
|
|
@@ -385,11 +656,13 @@ class SpanQuery:
|
|
|
385
656
|
**(
|
|
386
657
|
{"_explode": Explosion.from_dict(cast(Mapping[str, Any], explode))} # type: ignore
|
|
387
658
|
if (explode := obj.get("explode"))
|
|
659
|
+
and explode.get("key") # check `key` for backward-compatible truthiness
|
|
388
660
|
else {}
|
|
389
661
|
),
|
|
390
662
|
**(
|
|
391
663
|
{"_concat": Concatenation.from_dict(cast(Mapping[str, Any], concat))} # type: ignore
|
|
392
664
|
if (concat := obj.get("concat"))
|
|
665
|
+
and concat.get("key") # check `key` for backward-compatible truthiness
|
|
393
666
|
else {}
|
|
394
667
|
),
|
|
395
668
|
**(
|
|
@@ -403,3 +676,103 @@ class SpanQuery:
|
|
|
403
676
|
else {}
|
|
404
677
|
),
|
|
405
678
|
)
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def _get_spans_dataframe(
|
|
682
|
+
session: Session,
|
|
683
|
+
project_name: str,
|
|
684
|
+
/,
|
|
685
|
+
*,
|
|
686
|
+
span_filter: Optional[SpanFilter] = None,
|
|
687
|
+
start_time: Optional[datetime] = None,
|
|
688
|
+
end_time: Optional[datetime] = None,
|
|
689
|
+
limit: Optional[int] = DEFAULT_SPAN_LIMIT,
|
|
690
|
+
root_spans_only: Optional[bool] = None,
|
|
691
|
+
# Deprecated
|
|
692
|
+
stop_time: Optional[datetime] = None,
|
|
693
|
+
) -> pd.DataFrame:
|
|
694
|
+
# use legacy labels for backward-compatibility
|
|
695
|
+
span_id_label = "context.span_id"
|
|
696
|
+
trace_id_label = "context.trace_id"
|
|
697
|
+
if stop_time:
|
|
698
|
+
# Deprecated. Raise a warning
|
|
699
|
+
warnings.warn(
|
|
700
|
+
"stop_time is deprecated. Use end_time instead.",
|
|
701
|
+
DeprecationWarning,
|
|
702
|
+
)
|
|
703
|
+
end_time = end_time or stop_time
|
|
704
|
+
stmt: Select[Any] = (
|
|
705
|
+
select(
|
|
706
|
+
models.Span.name,
|
|
707
|
+
models.Span.span_kind,
|
|
708
|
+
models.Span.parent_id,
|
|
709
|
+
models.Span.start_time,
|
|
710
|
+
models.Span.end_time,
|
|
711
|
+
models.Span.status_code,
|
|
712
|
+
models.Span.status_message,
|
|
713
|
+
models.Span.events,
|
|
714
|
+
models.Span.span_id.label(span_id_label),
|
|
715
|
+
models.Trace.trace_id.label(trace_id_label),
|
|
716
|
+
models.Span.attributes,
|
|
717
|
+
)
|
|
718
|
+
.join(models.Trace)
|
|
719
|
+
.join(models.Project)
|
|
720
|
+
.where(models.Project.name == project_name)
|
|
721
|
+
)
|
|
722
|
+
if span_filter:
|
|
723
|
+
stmt = span_filter(stmt)
|
|
724
|
+
if start_time:
|
|
725
|
+
stmt = stmt.where(start_time <= models.Span.start_time)
|
|
726
|
+
if end_time:
|
|
727
|
+
stmt = stmt.where(models.Span.start_time < end_time)
|
|
728
|
+
if limit is not None:
|
|
729
|
+
stmt = stmt.limit(limit)
|
|
730
|
+
if root_spans_only:
|
|
731
|
+
parent = aliased(models.Span)
|
|
732
|
+
stmt = stmt.outerjoin(
|
|
733
|
+
parent,
|
|
734
|
+
models.Span.parent_id == parent.span_id,
|
|
735
|
+
).where(parent.span_id == None) # noqa E711
|
|
736
|
+
conn = session.connection()
|
|
737
|
+
# set `drop=False` for backward-compatibility
|
|
738
|
+
df = pd.read_sql_query(stmt, conn).set_index(span_id_label, drop=False)
|
|
739
|
+
if df.empty:
|
|
740
|
+
return df.drop("attributes", axis=1)
|
|
741
|
+
df_attributes = pd.DataFrame.from_records(
|
|
742
|
+
df.attributes.map(_flatten_semantic_conventions),
|
|
743
|
+
).set_axis(df.index, axis=0)
|
|
744
|
+
df = pd.concat(
|
|
745
|
+
[
|
|
746
|
+
df.drop("attributes", axis=1),
|
|
747
|
+
df_attributes.add_prefix("attributes" + "."),
|
|
748
|
+
],
|
|
749
|
+
axis=1,
|
|
750
|
+
)
|
|
751
|
+
return df
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def _outer_join(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
|
|
755
|
+
if (columns_intersection := left.columns.intersection(right.columns)).empty:
|
|
756
|
+
df = left.join(right, how="outer")
|
|
757
|
+
else:
|
|
758
|
+
df = left.join(right, how="outer", lsuffix="_L", rsuffix="_R")
|
|
759
|
+
for col in columns_intersection:
|
|
760
|
+
df.loc[:, col] = df.loc[:, f"{col}_L"].fillna(df.loc[:, f"{col}_R"])
|
|
761
|
+
df = df.drop([f"{col}_L", f"{col}_R"], axis=1)
|
|
762
|
+
return df
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _flatten_semantic_conventions(attributes: Mapping[str, Any]) -> dict[str, Any]:
|
|
766
|
+
# This may be inefficient, but is needed to preserve backward-compatibility.
|
|
767
|
+
# For example, custom attributes do not get flattened.
|
|
768
|
+
ans = unflatten(
|
|
769
|
+
load_json_strings(
|
|
770
|
+
flatten(
|
|
771
|
+
attributes,
|
|
772
|
+
recurse_on_sequence=True,
|
|
773
|
+
json_string_attributes=JSON_STRING_ATTRIBUTES,
|
|
774
|
+
),
|
|
775
|
+
),
|
|
776
|
+
prefix_exclusions=SEMANTIC_CONVENTIONS,
|
|
777
|
+
)
|
|
778
|
+
return ans
|