arize-phoenix 3.16.1__py3-none-any.whl → 7.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.1.dist-info/METADATA +261 -0
- arize_phoenix-7.7.1.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.1.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -241
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +4 -112
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.1.dist-info/METADATA +0 -495
- arize_phoenix-3.16.1.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -619
- phoenix/core/traces.py +0 -96
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
|
@@ -1,69 +1,72 @@
|
|
|
1
|
+
from collections.abc import Iterable, Sized
|
|
1
2
|
from itertools import chain
|
|
2
3
|
from operator import itemgetter
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Optional, Union
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
6
7
|
from pandas.api.types import is_object_dtype
|
|
7
8
|
from typing_extensions import TypeAlias, TypeGuard
|
|
8
9
|
|
|
9
|
-
from phoenix import Dataset, EmbeddingColumnNames
|
|
10
10
|
from phoenix.core.model import _get_embedding_dimensions
|
|
11
11
|
from phoenix.core.model_schema import Embedding, Model, RetrievalEmbedding, Schema
|
|
12
|
-
from phoenix.
|
|
13
|
-
from phoenix.
|
|
12
|
+
from phoenix.inferences.inferences import Inferences
|
|
13
|
+
from phoenix.inferences.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames
|
|
14
|
+
from phoenix.inferences.schema import Schema as InferencesSchema
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
InferencesName: TypeAlias = str
|
|
16
17
|
ColumnName: TypeAlias = str
|
|
17
18
|
DisplayName: TypeAlias = str
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def
|
|
21
|
+
def create_model_from_inferences(*inference_sets: Optional[Inferences]) -> Model:
|
|
21
22
|
# TODO: move this validation into model_schema.Model.
|
|
22
|
-
if len(
|
|
23
|
+
if len(inference_sets) > 1 and inference_sets[0] is not None:
|
|
23
24
|
# Check that for each embedding dimension all vectors
|
|
24
|
-
# have the same length between
|
|
25
|
-
_ = _get_embedding_dimensions(
|
|
26
|
-
|
|
27
|
-
named_dataframes:
|
|
28
|
-
prediction_ids:
|
|
29
|
-
timestamps:
|
|
30
|
-
prediction_labels:
|
|
31
|
-
prediction_scores:
|
|
32
|
-
actual_labels:
|
|
33
|
-
actual_scores:
|
|
34
|
-
features:
|
|
35
|
-
tags:
|
|
36
|
-
embeddings:
|
|
37
|
-
prompts:
|
|
38
|
-
responses:
|
|
39
|
-
|
|
40
|
-
for
|
|
41
|
-
df =
|
|
25
|
+
# have the same length between inferences.
|
|
26
|
+
_ = _get_embedding_dimensions(inference_sets[0], inference_sets[1])
|
|
27
|
+
|
|
28
|
+
named_dataframes: list[tuple[InferencesName, pd.DataFrame]] = []
|
|
29
|
+
prediction_ids: list[ColumnName] = []
|
|
30
|
+
timestamps: list[ColumnName] = []
|
|
31
|
+
prediction_labels: list[ColumnName] = []
|
|
32
|
+
prediction_scores: list[ColumnName] = []
|
|
33
|
+
actual_labels: list[ColumnName] = []
|
|
34
|
+
actual_scores: list[ColumnName] = []
|
|
35
|
+
features: list[ColumnName] = []
|
|
36
|
+
tags: list[ColumnName] = []
|
|
37
|
+
embeddings: dict[DisplayName, EmbeddingColumnNames] = {}
|
|
38
|
+
prompts: list[EmbeddingColumnNames] = []
|
|
39
|
+
responses: list[Union[str, EmbeddingColumnNames]] = []
|
|
40
|
+
|
|
41
|
+
for inferences in filter(_is_inferences, inference_sets):
|
|
42
|
+
df = inferences.dataframe
|
|
42
43
|
# Coerce string column names at run time.
|
|
43
44
|
df = df.set_axis(
|
|
44
45
|
map(str, df.columns),
|
|
45
46
|
axis=1,
|
|
46
47
|
)
|
|
47
|
-
named_dataframes.append((
|
|
48
|
-
|
|
48
|
+
named_dataframes.append((inferences.name, df))
|
|
49
|
+
inferences_schema = (
|
|
50
|
+
inferences.schema if inferences.schema is not None else InferencesSchema()
|
|
51
|
+
)
|
|
49
52
|
for display_name, embedding in (
|
|
50
|
-
|
|
53
|
+
inferences_schema.embedding_feature_column_names or {}
|
|
51
54
|
).items():
|
|
52
55
|
if display_name not in embeddings:
|
|
53
56
|
embeddings[display_name] = embedding
|
|
54
|
-
if
|
|
55
|
-
prompts.append(
|
|
56
|
-
if
|
|
57
|
-
responses.append(
|
|
57
|
+
if inferences_schema.prompt_column_names is not None:
|
|
58
|
+
prompts.append(inferences_schema.prompt_column_names)
|
|
59
|
+
if inferences_schema.response_column_names is not None:
|
|
60
|
+
responses.append(inferences_schema.response_column_names)
|
|
58
61
|
for source, sink in (
|
|
59
|
-
([
|
|
60
|
-
([
|
|
61
|
-
([
|
|
62
|
-
([
|
|
63
|
-
([
|
|
64
|
-
([
|
|
65
|
-
(
|
|
66
|
-
(
|
|
62
|
+
([inferences_schema.prediction_id_column_name], prediction_ids),
|
|
63
|
+
([inferences_schema.timestamp_column_name], timestamps),
|
|
64
|
+
([inferences_schema.prediction_label_column_name], prediction_labels),
|
|
65
|
+
([inferences_schema.prediction_score_column_name], prediction_scores),
|
|
66
|
+
([inferences_schema.actual_label_column_name], actual_labels),
|
|
67
|
+
([inferences_schema.actual_score_column_name], actual_scores),
|
|
68
|
+
(inferences_schema.feature_column_names or (), features),
|
|
69
|
+
(inferences_schema.tag_column_names or (), tags),
|
|
67
70
|
):
|
|
68
71
|
# Coerce None to "" to simplify type checks.
|
|
69
72
|
sink.extend(map(lambda s: "" if s is None else str(s), source))
|
|
@@ -132,8 +135,8 @@ def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
|
|
|
132
135
|
)
|
|
133
136
|
|
|
134
137
|
|
|
135
|
-
def
|
|
136
|
-
return type(obj) is
|
|
138
|
+
def _is_inferences(obj: Optional[Inferences]) -> TypeGuard[Inferences]:
|
|
139
|
+
return type(obj) is Inferences
|
|
137
140
|
|
|
138
141
|
|
|
139
142
|
def _take_first_str(iterator: Iterable[str]) -> str:
|
|
@@ -182,14 +185,14 @@ def _translate_prompt_embedding(
|
|
|
182
185
|
def _split_vectors_vs_scalars(
|
|
183
186
|
names: Iterable[str],
|
|
184
187
|
*dataframes: pd.DataFrame,
|
|
185
|
-
) ->
|
|
188
|
+
) -> tuple[list[str], list[Embedding]]:
|
|
186
189
|
"""A best-effort attempt at separating vector columns from scalar columns
|
|
187
190
|
by examining the first non-null item of the column from each dataframe. If
|
|
188
191
|
any item is `Iterable` and `Sized`, but not `str`, then the column is
|
|
189
192
|
returned as `Embedding`, else it's returned as scalar.
|
|
190
193
|
"""
|
|
191
|
-
scalars:
|
|
192
|
-
vectors:
|
|
194
|
+
scalars: list[str] = []
|
|
195
|
+
vectors: list[Embedding] = []
|
|
193
196
|
# convert to sets for a speedier lookup
|
|
194
197
|
column_names = [set(df.columns) for df in dataframes]
|
|
195
198
|
for name in names:
|
phoenix/datetime_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from datetime import datetime, timedelta, timezone
|
|
2
|
-
from typing import Any, Optional,
|
|
1
|
+
from datetime import datetime, timedelta, timezone, tzinfo
|
|
2
|
+
from typing import Any, Optional, cast
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import pytz
|
|
@@ -11,6 +11,27 @@ from pandas.core.dtypes.common import (
|
|
|
11
11
|
is_object_dtype,
|
|
12
12
|
)
|
|
13
13
|
|
|
14
|
+
_LOCAL_TIMEZONE = datetime.now(timezone.utc).astimezone().tzinfo
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def local_now() -> datetime:
|
|
18
|
+
return datetime.now(timezone.utc).astimezone(tz=_LOCAL_TIMEZONE)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def normalize_datetime(
|
|
22
|
+
dt: Optional[datetime],
|
|
23
|
+
tz: Optional[tzinfo] = None,
|
|
24
|
+
) -> Optional[datetime]:
|
|
25
|
+
"""
|
|
26
|
+
If the input datetime is timezone-naive, it is localized as local timezone
|
|
27
|
+
unless tzinfo is specified.
|
|
28
|
+
"""
|
|
29
|
+
if not isinstance(dt, datetime):
|
|
30
|
+
return None
|
|
31
|
+
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
|
|
32
|
+
dt = dt.replace(tzinfo=tz if tz else _LOCAL_TIMEZONE)
|
|
33
|
+
return dt.astimezone(timezone.utc)
|
|
34
|
+
|
|
14
35
|
|
|
15
36
|
def normalize_timestamps(
|
|
16
37
|
timestamps: "pd.Series[Any]",
|
|
@@ -75,7 +96,7 @@ MINUTE_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:00%z"
|
|
|
75
96
|
def right_open_time_range(
|
|
76
97
|
min_time: Optional[datetime],
|
|
77
98
|
max_time: Optional[datetime],
|
|
78
|
-
) ->
|
|
99
|
+
) -> tuple[Optional[datetime], Optional[datetime]]:
|
|
79
100
|
"""
|
|
80
101
|
First adds one minute to `max_time`, because time intervals are right
|
|
81
102
|
open and one minute is the smallest interval allowed, then rounds down
|
phoenix/db/README.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Database
|
|
2
|
+
|
|
3
|
+
This module is responsible for the database connection and the migrations.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## Migrations
|
|
7
|
+
|
|
8
|
+
All migrations are managed by Alembic. Migrations are applied to the database automatically when the application starts.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Manually Applying Migrations
|
|
12
|
+
|
|
13
|
+
Sometimes, it's necessary to manually apply migrations, e.g., to recover from a failed migration. To manually apply migrations, you must first clone the Phoenix repository.
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/Arize-ai/phoenix.git
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Then navigate to the current directory.
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
cd phoenix/src/phoenix/db
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
If you are using a non-default SQL database URL (e.g., for running PostgreSQL), ensure your `PHOENIX_SQL_DATABASE_URL` is set. This is not needed if you are running Phoenix with the default SQLite URL.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
export PHOENIX_SQL_DATABASE_URL=<sql-database-url>
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
To manually run up-migrations, run the following command:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
alembic upgrade head
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
If the above command fails, it may be necessary to undo partially applied changes from a failed migration by first running down-migrations. This can be accomplished by identifying the ID of the migration revision you wish to return to. Revisions are defined [here](./migrations/versions/).
|
|
39
|
+
|
|
40
|
+
⚠️ Running down-migrations can result in lost data. Only run down-migrations if you know what you are doing and consider backing up your database first. If you have any questions or doubts, contact the Phoenix team in the `#phoenix-support` channel of the [Arize AI Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) or via GitHub.
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
alembic downgrade <revision-id>
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Creating a Migration
|
|
47
|
+
|
|
48
|
+
All migrations are stored in the `migrations` folder. To create a new migration, run the following command:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
alembic revision -m "your_revision_name"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Then fill the migration file with the necessary changes.
|
phoenix/db/__init__.py
ADDED
phoenix/db/alembic.ini
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# A generic, single database configuration.
|
|
2
|
+
|
|
3
|
+
[alembic]
|
|
4
|
+
# path to migration scripts
|
|
5
|
+
# Note this is overridden in .migrate during programatic migrations
|
|
6
|
+
script_location = migrations
|
|
7
|
+
|
|
8
|
+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
|
9
|
+
# Uncomment the line below if you want the files to be prepended with date and time
|
|
10
|
+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
|
11
|
+
# for all available tokens
|
|
12
|
+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
|
13
|
+
|
|
14
|
+
# sys.path path, will be prepended to sys.path if present.
|
|
15
|
+
# defaults to the current working directory.
|
|
16
|
+
prepend_sys_path = .
|
|
17
|
+
|
|
18
|
+
# timezone to use when rendering the date within the migration file
|
|
19
|
+
# as well as the filename.
|
|
20
|
+
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
|
21
|
+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
|
22
|
+
# string value is passed to ZoneInfo()
|
|
23
|
+
# leave blank for localtime
|
|
24
|
+
# timezone =
|
|
25
|
+
|
|
26
|
+
# max length of characters to apply to the
|
|
27
|
+
# "slug" field
|
|
28
|
+
# truncate_slug_length = 40
|
|
29
|
+
|
|
30
|
+
# set to 'true' to run the environment during
|
|
31
|
+
# the 'revision' command, regardless of autogenerate
|
|
32
|
+
# revision_environment = false
|
|
33
|
+
|
|
34
|
+
# set to 'true' to allow .pyc and .pyo files without
|
|
35
|
+
# a source .py file to be detected as revisions in the
|
|
36
|
+
# versions/ directory
|
|
37
|
+
# sourceless = false
|
|
38
|
+
|
|
39
|
+
# version location specification; This defaults
|
|
40
|
+
# to migrations/versions. When using multiple version
|
|
41
|
+
# directories, initial revisions must be specified with --version-path.
|
|
42
|
+
# The path separator used here should be the separator specified by "version_path_separator" below.
|
|
43
|
+
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
|
|
44
|
+
|
|
45
|
+
# version path separator; As mentioned above, this is the character used to split
|
|
46
|
+
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
|
47
|
+
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
|
48
|
+
# Valid values for version_path_separator are:
|
|
49
|
+
#
|
|
50
|
+
# version_path_separator = :
|
|
51
|
+
# version_path_separator = ;
|
|
52
|
+
# version_path_separator = space
|
|
53
|
+
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
|
54
|
+
|
|
55
|
+
# set to 'true' to search source files recursively
|
|
56
|
+
# in each "version_locations" directory
|
|
57
|
+
# new in Alembic version 1.10
|
|
58
|
+
# recursive_version_locations = false
|
|
59
|
+
|
|
60
|
+
# the output encoding used when revision files
|
|
61
|
+
# are written from script.py.mako
|
|
62
|
+
# output_encoding = utf-8
|
|
63
|
+
|
|
64
|
+
# NB: This is commented out intentionally as it is dynamic
|
|
65
|
+
# See migrations/env.py
|
|
66
|
+
# sqlalchemy.url = driver://user:pass@localhost/dbname
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
[post_write_hooks]
|
|
70
|
+
# post_write_hooks defines scripts or Python functions that are run
|
|
71
|
+
# on newly generated revision scripts. See the documentation for further
|
|
72
|
+
# detail and examples
|
|
73
|
+
|
|
74
|
+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
|
75
|
+
# hooks = black
|
|
76
|
+
# black.type = console_scripts
|
|
77
|
+
# black.entrypoint = black
|
|
78
|
+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
|
79
|
+
|
|
80
|
+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
|
81
|
+
# hooks = ruff
|
|
82
|
+
# ruff.type = exec
|
|
83
|
+
# ruff.executable = %(here)s/.venv/bin/ruff
|
|
84
|
+
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
|
85
|
+
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from asyncio import Queue, as_completed
|
|
4
|
+
from collections.abc import AsyncIterator, Awaitable, Callable, Iterable
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from functools import singledispatchmethod
|
|
7
|
+
from itertools import islice
|
|
8
|
+
from time import perf_counter
|
|
9
|
+
from typing import Any, Optional, cast
|
|
10
|
+
|
|
11
|
+
from typing_extensions import TypeAlias
|
|
12
|
+
|
|
13
|
+
import phoenix.trace.v1 as pb
|
|
14
|
+
from phoenix.db.insertion.constants import DEFAULT_RETRY_ALLOWANCE, DEFAULT_RETRY_DELAY_SEC
|
|
15
|
+
from phoenix.db.insertion.document_annotation import DocumentAnnotationQueueInserter
|
|
16
|
+
from phoenix.db.insertion.evaluation import (
|
|
17
|
+
InsertEvaluationError,
|
|
18
|
+
insert_evaluation,
|
|
19
|
+
)
|
|
20
|
+
from phoenix.db.insertion.helpers import DataManipulation, DataManipulationEvent
|
|
21
|
+
from phoenix.db.insertion.span import SpanInsertionEvent, insert_span
|
|
22
|
+
from phoenix.db.insertion.span_annotation import SpanAnnotationQueueInserter
|
|
23
|
+
from phoenix.db.insertion.trace_annotation import TraceAnnotationQueueInserter
|
|
24
|
+
from phoenix.db.insertion.types import Insertables, Precursors
|
|
25
|
+
from phoenix.server.dml_event import DmlEvent, SpanInsertEvent
|
|
26
|
+
from phoenix.server.types import CanPutItem, DbSessionFactory
|
|
27
|
+
from phoenix.trace.schemas import Span
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
ProjectRowId: TypeAlias = int
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class TransactionResult:
|
|
36
|
+
updated_project_rowids: set[ProjectRowId] = field(default_factory=set)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BulkInserter:
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
db: DbSessionFactory,
|
|
43
|
+
*,
|
|
44
|
+
event_queue: CanPutItem[DmlEvent],
|
|
45
|
+
initial_batch_of_spans: Optional[Iterable[tuple[Span, str]]] = None,
|
|
46
|
+
initial_batch_of_evaluations: Optional[Iterable[pb.Evaluation]] = None,
|
|
47
|
+
sleep: float = 0.1,
|
|
48
|
+
max_ops_per_transaction: int = 1000,
|
|
49
|
+
max_queue_size: int = 1000,
|
|
50
|
+
enable_prometheus: bool = False,
|
|
51
|
+
retry_delay_sec: float = DEFAULT_RETRY_DELAY_SEC,
|
|
52
|
+
retry_allowance: int = DEFAULT_RETRY_ALLOWANCE,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""
|
|
55
|
+
:param db: A function to initiate a new database session.
|
|
56
|
+
:param initial_batch_of_spans: Initial batch of spans to insert.
|
|
57
|
+
:param sleep: The time to sleep between bulk insertions
|
|
58
|
+
:param max_ops_per_transaction: The maximum number of operations to dequeue from
|
|
59
|
+
the operations queue for each transaction.
|
|
60
|
+
:param max_queue_size: The maximum length of the operations queue.
|
|
61
|
+
:param enable_prometheus: Whether Prometheus is enabled.
|
|
62
|
+
"""
|
|
63
|
+
self._db = db
|
|
64
|
+
self._running = False
|
|
65
|
+
self._sleep = sleep
|
|
66
|
+
self._max_ops_per_transaction = max_ops_per_transaction
|
|
67
|
+
self._operations: Optional[Queue[DataManipulation]] = None
|
|
68
|
+
self._max_queue_size = max_queue_size
|
|
69
|
+
self._spans: list[tuple[Span, str]] = (
|
|
70
|
+
[] if initial_batch_of_spans is None else list(initial_batch_of_spans)
|
|
71
|
+
)
|
|
72
|
+
self._evaluations: list[pb.Evaluation] = (
|
|
73
|
+
[] if initial_batch_of_evaluations is None else list(initial_batch_of_evaluations)
|
|
74
|
+
)
|
|
75
|
+
self._task: Optional[asyncio.Task[None]] = None
|
|
76
|
+
self._event_queue = event_queue
|
|
77
|
+
self._enable_prometheus = enable_prometheus
|
|
78
|
+
self._retry_delay_sec = retry_delay_sec
|
|
79
|
+
self._retry_allowance = retry_allowance
|
|
80
|
+
self._queue_inserters = _QueueInserters(db, self._retry_delay_sec, self._retry_allowance)
|
|
81
|
+
|
|
82
|
+
async def __aenter__(
|
|
83
|
+
self,
|
|
84
|
+
) -> tuple[
|
|
85
|
+
Callable[[Any], Awaitable[None]],
|
|
86
|
+
Callable[[Span, str], Awaitable[None]],
|
|
87
|
+
Callable[[pb.Evaluation], Awaitable[None]],
|
|
88
|
+
Callable[[DataManipulation], None],
|
|
89
|
+
]:
|
|
90
|
+
self._running = True
|
|
91
|
+
self._operations = Queue(maxsize=self._max_queue_size)
|
|
92
|
+
self._task = asyncio.create_task(self._bulk_insert())
|
|
93
|
+
return (
|
|
94
|
+
self._enqueue,
|
|
95
|
+
self._queue_span,
|
|
96
|
+
self._queue_evaluation,
|
|
97
|
+
self._enqueue_operation,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
101
|
+
self._running = False
|
|
102
|
+
if self._task:
|
|
103
|
+
self._task.cancel()
|
|
104
|
+
self._task = None
|
|
105
|
+
|
|
106
|
+
async def _enqueue(self, *items: Any) -> None:
|
|
107
|
+
await self._queue_inserters.enqueue(*items)
|
|
108
|
+
|
|
109
|
+
def _enqueue_operation(self, operation: DataManipulation) -> None:
|
|
110
|
+
cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
|
|
111
|
+
|
|
112
|
+
async def _queue_span(self, span: Span, project_name: str) -> None:
|
|
113
|
+
self._spans.append((span, project_name))
|
|
114
|
+
|
|
115
|
+
async def _queue_evaluation(self, evaluation: pb.Evaluation) -> None:
|
|
116
|
+
self._evaluations.append(evaluation)
|
|
117
|
+
|
|
118
|
+
async def _process_events(self, events: Iterable[Optional[DataManipulationEvent]]) -> None: ...
|
|
119
|
+
|
|
120
|
+
async def _bulk_insert(self) -> None:
|
|
121
|
+
assert isinstance(self._operations, Queue)
|
|
122
|
+
spans_buffer, evaluations_buffer = None, None
|
|
123
|
+
# start first insert immediately if the inserter has not run recently
|
|
124
|
+
while (
|
|
125
|
+
self._running
|
|
126
|
+
or not self._queue_inserters.empty
|
|
127
|
+
or not self._operations.empty()
|
|
128
|
+
or self._spans
|
|
129
|
+
or self._evaluations
|
|
130
|
+
):
|
|
131
|
+
if (
|
|
132
|
+
self._queue_inserters.empty
|
|
133
|
+
and self._operations.empty()
|
|
134
|
+
and not self._spans
|
|
135
|
+
and not self._evaluations
|
|
136
|
+
):
|
|
137
|
+
await asyncio.sleep(self._sleep)
|
|
138
|
+
continue
|
|
139
|
+
ops_remaining = self._max_ops_per_transaction
|
|
140
|
+
async with self._db() as session:
|
|
141
|
+
while ops_remaining and not self._operations.empty():
|
|
142
|
+
ops_remaining -= 1
|
|
143
|
+
op = await self._operations.get()
|
|
144
|
+
try:
|
|
145
|
+
async with session.begin_nested():
|
|
146
|
+
await op(session)
|
|
147
|
+
except Exception as e:
|
|
148
|
+
if self._enable_prometheus:
|
|
149
|
+
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
150
|
+
|
|
151
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
152
|
+
logger.exception(str(e))
|
|
153
|
+
# It's important to grab the buffers at the same time so there's
|
|
154
|
+
# no race condition, since an eval insertion will fail if the span
|
|
155
|
+
# it references doesn't exist. Grabbing the eval buffer later may
|
|
156
|
+
# include an eval whose span is in the queue but missed being
|
|
157
|
+
# included in the span buffer that was grabbed previously.
|
|
158
|
+
if self._spans:
|
|
159
|
+
spans_buffer = self._spans
|
|
160
|
+
self._spans = []
|
|
161
|
+
if self._evaluations:
|
|
162
|
+
evaluations_buffer = self._evaluations
|
|
163
|
+
self._evaluations = []
|
|
164
|
+
# Spans should be inserted before the evaluations, since an evaluation
|
|
165
|
+
# insertion will fail if the span it references doesn't exist.
|
|
166
|
+
if spans_buffer:
|
|
167
|
+
await self._insert_spans(spans_buffer)
|
|
168
|
+
spans_buffer = None
|
|
169
|
+
if evaluations_buffer:
|
|
170
|
+
await self._insert_evaluations(evaluations_buffer)
|
|
171
|
+
evaluations_buffer = None
|
|
172
|
+
async for event in self._queue_inserters.insert():
|
|
173
|
+
self._event_queue.put(event)
|
|
174
|
+
await asyncio.sleep(self._sleep)
|
|
175
|
+
|
|
176
|
+
async def _insert_spans(self, spans: list[tuple[Span, str]]) -> None:
|
|
177
|
+
project_ids = set()
|
|
178
|
+
for i in range(0, len(spans), self._max_ops_per_transaction):
|
|
179
|
+
try:
|
|
180
|
+
start = perf_counter()
|
|
181
|
+
async with self._db() as session:
|
|
182
|
+
for span, project_name in islice(spans, i, i + self._max_ops_per_transaction):
|
|
183
|
+
if self._enable_prometheus:
|
|
184
|
+
from phoenix.server.prometheus import BULK_LOADER_SPAN_INSERTIONS
|
|
185
|
+
|
|
186
|
+
BULK_LOADER_SPAN_INSERTIONS.inc()
|
|
187
|
+
result: Optional[SpanInsertionEvent] = None
|
|
188
|
+
try:
|
|
189
|
+
async with session.begin_nested():
|
|
190
|
+
result = await insert_span(session, span, project_name)
|
|
191
|
+
except Exception:
|
|
192
|
+
if self._enable_prometheus:
|
|
193
|
+
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
194
|
+
|
|
195
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
196
|
+
logger.exception(
|
|
197
|
+
f"Failed to insert span with span_id={span.context.span_id}"
|
|
198
|
+
)
|
|
199
|
+
if result is not None:
|
|
200
|
+
project_ids.add(result.project_rowid)
|
|
201
|
+
if self._enable_prometheus:
|
|
202
|
+
from phoenix.server.prometheus import BULK_LOADER_INSERTION_TIME
|
|
203
|
+
|
|
204
|
+
BULK_LOADER_INSERTION_TIME.observe(perf_counter() - start)
|
|
205
|
+
except Exception:
|
|
206
|
+
if self._enable_prometheus:
|
|
207
|
+
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
208
|
+
|
|
209
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
210
|
+
logger.exception("Failed to insert spans")
|
|
211
|
+
self._event_queue.put(SpanInsertEvent(tuple(project_ids)))
|
|
212
|
+
|
|
213
|
+
async def _insert_evaluations(self, evaluations: list[pb.Evaluation]) -> None:
|
|
214
|
+
for i in range(0, len(evaluations), self._max_ops_per_transaction):
|
|
215
|
+
try:
|
|
216
|
+
start = perf_counter()
|
|
217
|
+
async with self._db() as session:
|
|
218
|
+
for evaluation in islice(evaluations, i, i + self._max_ops_per_transaction):
|
|
219
|
+
if self._enable_prometheus:
|
|
220
|
+
from phoenix.server.prometheus import BULK_LOADER_EVALUATION_INSERTIONS
|
|
221
|
+
|
|
222
|
+
BULK_LOADER_EVALUATION_INSERTIONS.inc()
|
|
223
|
+
try:
|
|
224
|
+
async with session.begin_nested():
|
|
225
|
+
await insert_evaluation(session, evaluation)
|
|
226
|
+
except InsertEvaluationError as error:
|
|
227
|
+
if self._enable_prometheus:
|
|
228
|
+
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
229
|
+
|
|
230
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
231
|
+
logger.exception(f"Failed to insert evaluation: {str(error)}")
|
|
232
|
+
if self._enable_prometheus:
|
|
233
|
+
from phoenix.server.prometheus import BULK_LOADER_INSERTION_TIME
|
|
234
|
+
|
|
235
|
+
BULK_LOADER_INSERTION_TIME.observe(perf_counter() - start)
|
|
236
|
+
except Exception:
|
|
237
|
+
if self._enable_prometheus:
|
|
238
|
+
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
239
|
+
|
|
240
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
241
|
+
logger.exception("Failed to insert evaluations")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class _QueueInserters:
|
|
245
|
+
def __init__(
|
|
246
|
+
self,
|
|
247
|
+
db: DbSessionFactory,
|
|
248
|
+
retry_delay_sec: float = DEFAULT_RETRY_DELAY_SEC,
|
|
249
|
+
retry_allowance: int = DEFAULT_RETRY_ALLOWANCE,
|
|
250
|
+
) -> None:
|
|
251
|
+
self._db = db
|
|
252
|
+
args = (db, retry_delay_sec, retry_allowance)
|
|
253
|
+
self._span_annotations = SpanAnnotationQueueInserter(*args)
|
|
254
|
+
self._trace_annotations = TraceAnnotationQueueInserter(*args)
|
|
255
|
+
self._document_annotations = DocumentAnnotationQueueInserter(*args)
|
|
256
|
+
self._queues = (
|
|
257
|
+
self._span_annotations,
|
|
258
|
+
self._trace_annotations,
|
|
259
|
+
self._document_annotations,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
async def insert(self) -> AsyncIterator[DmlEvent]:
|
|
263
|
+
if self.empty:
|
|
264
|
+
return
|
|
265
|
+
for coro in as_completed([q.insert() for q in self._queues if not q.empty]):
|
|
266
|
+
if events := cast(Optional[list[DmlEvent]], await coro):
|
|
267
|
+
for event in events:
|
|
268
|
+
yield event
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def empty(self) -> bool:
|
|
272
|
+
return all(q.empty for q in self._queues)
|
|
273
|
+
|
|
274
|
+
async def enqueue(self, *items: Any) -> None:
|
|
275
|
+
for item in items:
|
|
276
|
+
await self._enqueue(item)
|
|
277
|
+
|
|
278
|
+
@singledispatchmethod
|
|
279
|
+
async def _enqueue(self, item: Any) -> None: ...
|
|
280
|
+
|
|
281
|
+
@_enqueue.register(Precursors.SpanAnnotation)
|
|
282
|
+
@_enqueue.register(Insertables.SpanAnnotation)
|
|
283
|
+
async def _(self, item: Precursors.SpanAnnotation) -> None:
|
|
284
|
+
await self._span_annotations.enqueue(item)
|
|
285
|
+
|
|
286
|
+
@_enqueue.register(Precursors.TraceAnnotation)
|
|
287
|
+
@_enqueue.register(Insertables.TraceAnnotation)
|
|
288
|
+
async def _(self, item: Precursors.TraceAnnotation) -> None:
|
|
289
|
+
await self._trace_annotations.enqueue(item)
|
|
290
|
+
|
|
291
|
+
@_enqueue.register(Precursors.DocumentAnnotation)
|
|
292
|
+
@_enqueue.register(Insertables.DocumentAnnotation)
|
|
293
|
+
async def _(self, item: Precursors.DocumentAnnotation) -> None:
|
|
294
|
+
await self._document_annotations.enqueue(item)
|