arize-phoenix 3.16.0__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.0.dist-info/METADATA +261 -0
- arize_phoenix-7.7.0.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -247
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +13 -107
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.0.dist-info/METADATA +0 -495
- arize_phoenix-3.16.0.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -617
- phoenix/core/traces.py +0 -100
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
phoenix/core/project.py
DELETED
|
@@ -1,617 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from collections import defaultdict
|
|
3
|
-
from datetime import datetime, timezone
|
|
4
|
-
from threading import RLock
|
|
5
|
-
from types import MappingProxyType
|
|
6
|
-
from typing import (
|
|
7
|
-
Any,
|
|
8
|
-
DefaultDict,
|
|
9
|
-
Dict,
|
|
10
|
-
Iterable,
|
|
11
|
-
Iterator,
|
|
12
|
-
List,
|
|
13
|
-
Mapping,
|
|
14
|
-
Optional,
|
|
15
|
-
Set,
|
|
16
|
-
Tuple,
|
|
17
|
-
Union,
|
|
18
|
-
cast,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
import numpy as np
|
|
22
|
-
from ddsketch import DDSketch
|
|
23
|
-
from google.protobuf.json_format import MessageToDict
|
|
24
|
-
from openinference.semconv.trace import SpanAttributes
|
|
25
|
-
from pandas import DataFrame, Index, MultiIndex
|
|
26
|
-
from sortedcontainers import SortedKeyList
|
|
27
|
-
from typing_extensions import TypeAlias, assert_never
|
|
28
|
-
from wrapt import ObjectProxy
|
|
29
|
-
|
|
30
|
-
import phoenix.trace.v1 as pb
|
|
31
|
-
from phoenix.datetime_utils import right_open_time_range
|
|
32
|
-
from phoenix.trace import DocumentEvaluations, Evaluations, SpanEvaluations
|
|
33
|
-
from phoenix.trace.schemas import (
|
|
34
|
-
ComputedAttributes,
|
|
35
|
-
Span,
|
|
36
|
-
SpanID,
|
|
37
|
-
SpanStatusCode,
|
|
38
|
-
TraceID,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
logger = logging.getLogger(__name__)
|
|
42
|
-
logger.addHandler(logging.NullHandler())
|
|
43
|
-
|
|
44
|
-
END_OF_QUEUE = None # sentinel value for queue termination
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class WrappedSpan(ObjectProxy): # type: ignore
|
|
48
|
-
"""
|
|
49
|
-
A wrapped Span object with __getitem__ and __setitem__ methods for accessing
|
|
50
|
-
computed attributes.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
def __init__(self, span: Span) -> None:
|
|
54
|
-
super().__init__(span)
|
|
55
|
-
self._self_computed_values: Dict[ComputedAttributes, Union[float, int]] = {}
|
|
56
|
-
|
|
57
|
-
def get_computed_value(self, key: str) -> Optional[Union[float, int]]:
|
|
58
|
-
try:
|
|
59
|
-
attr = ComputedAttributes(key)
|
|
60
|
-
except Exception:
|
|
61
|
-
return None
|
|
62
|
-
return self._self_computed_values.get(attr)
|
|
63
|
-
|
|
64
|
-
def __getitem__(self, key: Union[str, ComputedAttributes]) -> Any:
|
|
65
|
-
if isinstance(key, ComputedAttributes):
|
|
66
|
-
return self._self_computed_values.get(key)
|
|
67
|
-
return self.__wrapped__.attributes.get(key)
|
|
68
|
-
|
|
69
|
-
def __setitem__(self, key: ComputedAttributes, value: Any) -> None:
|
|
70
|
-
if not isinstance(key, ComputedAttributes):
|
|
71
|
-
raise KeyError(f"{key} is not a computed value")
|
|
72
|
-
self._self_computed_values[key] = value
|
|
73
|
-
|
|
74
|
-
def __eq__(self, other: Any) -> bool:
|
|
75
|
-
return self is other
|
|
76
|
-
|
|
77
|
-
def __hash__(self) -> int:
|
|
78
|
-
return id(self)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
_ParentSpanID: TypeAlias = SpanID
|
|
82
|
-
_ChildSpanID: TypeAlias = SpanID
|
|
83
|
-
_ProjectName: TypeAlias = str
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
EvaluationName: TypeAlias = str
|
|
87
|
-
DocumentPosition: TypeAlias = int
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
class Project:
|
|
91
|
-
def __init__(self) -> None:
|
|
92
|
-
self._spans = _Spans()
|
|
93
|
-
self._evals = _Evals()
|
|
94
|
-
self._is_archived = False
|
|
95
|
-
|
|
96
|
-
@property
|
|
97
|
-
def last_updated_at(self) -> Optional[datetime]:
|
|
98
|
-
spans_last_updated_at = self._spans.last_updated_at
|
|
99
|
-
evals_last_updated_at = self._evals.last_updated_at
|
|
100
|
-
if (
|
|
101
|
-
not spans_last_updated_at
|
|
102
|
-
or evals_last_updated_at
|
|
103
|
-
and evals_last_updated_at > spans_last_updated_at
|
|
104
|
-
):
|
|
105
|
-
return evals_last_updated_at
|
|
106
|
-
return spans_last_updated_at
|
|
107
|
-
|
|
108
|
-
def add_span(self, span: Span) -> None:
|
|
109
|
-
self._spans.add(WrappedSpan(span))
|
|
110
|
-
|
|
111
|
-
def add_eval(self, pb_eval: pb.Evaluation) -> None:
|
|
112
|
-
self._evals.add(pb_eval)
|
|
113
|
-
|
|
114
|
-
def get_trace(self, trace_id: TraceID) -> Iterator[WrappedSpan]:
|
|
115
|
-
yield from self._spans.get_trace(trace_id)
|
|
116
|
-
|
|
117
|
-
def get_spans(
|
|
118
|
-
self,
|
|
119
|
-
start_time: Optional[datetime] = None,
|
|
120
|
-
stop_time: Optional[datetime] = None,
|
|
121
|
-
root_spans_only: Optional[bool] = False,
|
|
122
|
-
span_ids: Optional[Iterable[SpanID]] = None,
|
|
123
|
-
) -> Iterator[WrappedSpan]:
|
|
124
|
-
yield from self._spans.get_spans(start_time, stop_time, root_spans_only, span_ids)
|
|
125
|
-
|
|
126
|
-
def get_num_documents(self, span_id: SpanID) -> int:
|
|
127
|
-
return self._spans.get_num_documents(span_id)
|
|
128
|
-
|
|
129
|
-
def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]:
|
|
130
|
-
"""Root span latency quantiles in milliseconds"""
|
|
131
|
-
return self._spans.root_span_latency_ms_quantiles(probability)
|
|
132
|
-
|
|
133
|
-
def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
|
|
134
|
-
yield from self._spans.get_descendant_spans(span_id)
|
|
135
|
-
|
|
136
|
-
def span_count(
|
|
137
|
-
self,
|
|
138
|
-
start_time: Optional[datetime] = None,
|
|
139
|
-
stop_time: Optional[datetime] = None,
|
|
140
|
-
) -> int:
|
|
141
|
-
return self._spans.span_count(start_time, stop_time)
|
|
142
|
-
|
|
143
|
-
def trace_count(
|
|
144
|
-
self,
|
|
145
|
-
start_time: Optional[datetime] = None,
|
|
146
|
-
stop_time: Optional[datetime] = None,
|
|
147
|
-
) -> int:
|
|
148
|
-
return self._spans.trace_count(start_time, stop_time)
|
|
149
|
-
|
|
150
|
-
@property
|
|
151
|
-
def token_count_total(self) -> int:
|
|
152
|
-
return self._spans.token_count_total
|
|
153
|
-
|
|
154
|
-
@property
|
|
155
|
-
def right_open_time_range(self) -> Tuple[Optional[datetime], Optional[datetime]]:
|
|
156
|
-
return self._spans.right_open_time_range
|
|
157
|
-
|
|
158
|
-
def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
|
|
159
|
-
return self._evals.get_span_evaluation(span_id, name)
|
|
160
|
-
|
|
161
|
-
def get_span_evaluation_names(self) -> List[EvaluationName]:
|
|
162
|
-
return self._evals.get_span_evaluation_names()
|
|
163
|
-
|
|
164
|
-
def get_document_evaluation_names(
|
|
165
|
-
self,
|
|
166
|
-
span_id: Optional[SpanID] = None,
|
|
167
|
-
) -> List[EvaluationName]:
|
|
168
|
-
return self._evals.get_document_evaluation_names(span_id)
|
|
169
|
-
|
|
170
|
-
def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
|
|
171
|
-
return self._evals.get_span_evaluation_labels(name)
|
|
172
|
-
|
|
173
|
-
def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
|
|
174
|
-
return self._evals.get_span_evaluation_span_ids(name)
|
|
175
|
-
|
|
176
|
-
def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
|
|
177
|
-
return self._evals.get_evaluations_by_span_id(span_id)
|
|
178
|
-
|
|
179
|
-
def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
|
|
180
|
-
return self._evals.get_document_evaluation_span_ids(name)
|
|
181
|
-
|
|
182
|
-
def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
|
|
183
|
-
return self._evals.get_document_evaluations_by_span_id(span_id)
|
|
184
|
-
|
|
185
|
-
def get_document_evaluation_scores(
|
|
186
|
-
self,
|
|
187
|
-
span_id: SpanID,
|
|
188
|
-
evaluation_name: str,
|
|
189
|
-
num_documents: int,
|
|
190
|
-
) -> List[float]:
|
|
191
|
-
return self._evals.get_document_evaluation_scores(span_id, evaluation_name, num_documents)
|
|
192
|
-
|
|
193
|
-
def export_evaluations(self) -> List[Evaluations]:
|
|
194
|
-
return self._evals.export_evaluations()
|
|
195
|
-
|
|
196
|
-
def archive(self) -> None:
|
|
197
|
-
self._is_archived = True
|
|
198
|
-
|
|
199
|
-
@property
|
|
200
|
-
def is_archived(self) -> bool:
|
|
201
|
-
return self._is_archived
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
class _Spans:
|
|
205
|
-
def __init__(self) -> None:
|
|
206
|
-
self._lock = RLock()
|
|
207
|
-
self._spans: Dict[SpanID, WrappedSpan] = {}
|
|
208
|
-
self._parent_span_ids: Dict[SpanID, _ParentSpanID] = {}
|
|
209
|
-
self._traces: DefaultDict[TraceID, Set[WrappedSpan]] = defaultdict(set)
|
|
210
|
-
self._child_spans: DefaultDict[SpanID, Set[WrappedSpan]] = defaultdict(set)
|
|
211
|
-
self._num_documents: DefaultDict[SpanID, int] = defaultdict(int)
|
|
212
|
-
self._start_time_sorted_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
|
|
213
|
-
key=lambda span: span.start_time,
|
|
214
|
-
)
|
|
215
|
-
self._start_time_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
|
|
216
|
-
key=lambda span: span.start_time,
|
|
217
|
-
)
|
|
218
|
-
self._latency_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
|
|
219
|
-
key=lambda span: span[ComputedAttributes.LATENCY_MS],
|
|
220
|
-
)
|
|
221
|
-
self._root_span_latency_ms_sketch = DDSketch()
|
|
222
|
-
self._token_count_total: int = 0
|
|
223
|
-
self._last_updated_at: Optional[datetime] = None
|
|
224
|
-
|
|
225
|
-
def get_trace(self, trace_id: TraceID) -> Iterator[WrappedSpan]:
|
|
226
|
-
with self._lock:
|
|
227
|
-
# make a copy because source data can mutate during iteration
|
|
228
|
-
if not (trace := self._traces.get(trace_id)):
|
|
229
|
-
return
|
|
230
|
-
spans = tuple(trace)
|
|
231
|
-
for span in spans:
|
|
232
|
-
yield span
|
|
233
|
-
|
|
234
|
-
def get_spans(
|
|
235
|
-
self,
|
|
236
|
-
start_time: Optional[datetime] = None,
|
|
237
|
-
stop_time: Optional[datetime] = None,
|
|
238
|
-
root_spans_only: Optional[bool] = False,
|
|
239
|
-
span_ids: Optional[Iterable[SpanID]] = None,
|
|
240
|
-
) -> Iterator[WrappedSpan]:
|
|
241
|
-
if not self._spans:
|
|
242
|
-
return
|
|
243
|
-
if start_time is None or stop_time is None:
|
|
244
|
-
min_start_time, max_stop_time = cast(
|
|
245
|
-
Tuple[datetime, datetime],
|
|
246
|
-
self.right_open_time_range,
|
|
247
|
-
)
|
|
248
|
-
start_time = start_time or min_start_time
|
|
249
|
-
stop_time = stop_time or max_stop_time
|
|
250
|
-
if span_ids is not None:
|
|
251
|
-
with self._lock:
|
|
252
|
-
spans = tuple(
|
|
253
|
-
span
|
|
254
|
-
for span_id in span_ids
|
|
255
|
-
if (
|
|
256
|
-
(span := self._spans.get(span_id))
|
|
257
|
-
and start_time <= span.start_time < stop_time
|
|
258
|
-
and (not root_spans_only or span.parent_id is None)
|
|
259
|
-
)
|
|
260
|
-
)
|
|
261
|
-
else:
|
|
262
|
-
sorted_spans = (
|
|
263
|
-
self._start_time_sorted_root_spans
|
|
264
|
-
if root_spans_only
|
|
265
|
-
else self._start_time_sorted_spans
|
|
266
|
-
)
|
|
267
|
-
# make a copy because source data can mutate during iteration
|
|
268
|
-
with self._lock:
|
|
269
|
-
spans = tuple(
|
|
270
|
-
sorted_spans.irange_key(
|
|
271
|
-
start_time.astimezone(timezone.utc),
|
|
272
|
-
stop_time.astimezone(timezone.utc),
|
|
273
|
-
inclusive=(True, False),
|
|
274
|
-
reverse=True, # most recent spans first
|
|
275
|
-
)
|
|
276
|
-
)
|
|
277
|
-
for span in spans:
|
|
278
|
-
yield span
|
|
279
|
-
|
|
280
|
-
def get_num_documents(self, span_id: SpanID) -> int:
|
|
281
|
-
with self._lock:
|
|
282
|
-
return self._num_documents.get(span_id) or 0
|
|
283
|
-
|
|
284
|
-
def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]:
|
|
285
|
-
"""Root span latency quantiles in milliseconds"""
|
|
286
|
-
with self._lock:
|
|
287
|
-
return self._root_span_latency_ms_sketch.get_quantile_value(probability)
|
|
288
|
-
|
|
289
|
-
def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
|
|
290
|
-
for span in self._get_descendant_spans(span_id):
|
|
291
|
-
yield span
|
|
292
|
-
|
|
293
|
-
def _get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
|
|
294
|
-
with self._lock:
|
|
295
|
-
# make a copy because source data can mutate during iteration
|
|
296
|
-
if not (child_spans := self._child_spans.get(span_id)):
|
|
297
|
-
return
|
|
298
|
-
spans = tuple(child_spans)
|
|
299
|
-
for child_span in spans:
|
|
300
|
-
yield child_span
|
|
301
|
-
yield from self._get_descendant_spans(child_span.context.span_id)
|
|
302
|
-
|
|
303
|
-
@property
|
|
304
|
-
def last_updated_at(self) -> Optional[datetime]:
|
|
305
|
-
return self._last_updated_at
|
|
306
|
-
|
|
307
|
-
def span_count(
|
|
308
|
-
self,
|
|
309
|
-
start_time: Optional[datetime] = None,
|
|
310
|
-
stop_time: Optional[datetime] = None,
|
|
311
|
-
) -> int:
|
|
312
|
-
_index = self._start_time_sorted_spans.bisect_key_left
|
|
313
|
-
with self._lock:
|
|
314
|
-
start: int = _index(start_time) if start_time else 0
|
|
315
|
-
stop: int = _index(stop_time) if stop_time else len(self._spans)
|
|
316
|
-
return stop - start
|
|
317
|
-
|
|
318
|
-
def trace_count(
|
|
319
|
-
self,
|
|
320
|
-
start_time: Optional[datetime] = None,
|
|
321
|
-
stop_time: Optional[datetime] = None,
|
|
322
|
-
) -> int:
|
|
323
|
-
_index = self._start_time_sorted_root_spans.bisect_key_left
|
|
324
|
-
with self._lock:
|
|
325
|
-
start: int = _index(start_time) if start_time else 0
|
|
326
|
-
stop: int = _index(stop_time) if stop_time else len(self._traces)
|
|
327
|
-
return stop - start
|
|
328
|
-
|
|
329
|
-
@property
|
|
330
|
-
def token_count_total(self) -> int:
|
|
331
|
-
return self._token_count_total
|
|
332
|
-
|
|
333
|
-
@property
|
|
334
|
-
def right_open_time_range(self) -> Tuple[Optional[datetime], Optional[datetime]]:
|
|
335
|
-
with self._lock:
|
|
336
|
-
if not self._start_time_sorted_spans:
|
|
337
|
-
return None, None
|
|
338
|
-
first_span = self._start_time_sorted_spans[0]
|
|
339
|
-
last_span = self._start_time_sorted_spans[-1]
|
|
340
|
-
min_start_time = first_span.start_time
|
|
341
|
-
max_start_time = last_span.start_time
|
|
342
|
-
return right_open_time_range(min_start_time, max_start_time)
|
|
343
|
-
|
|
344
|
-
def add(self, span: WrappedSpan) -> None:
|
|
345
|
-
with self._lock:
|
|
346
|
-
self._add_span(span)
|
|
347
|
-
|
|
348
|
-
def _add_span(self, span: WrappedSpan) -> None:
|
|
349
|
-
span_id = span.context.span_id
|
|
350
|
-
if span_id in self._spans:
|
|
351
|
-
# Update is not allowed.
|
|
352
|
-
return
|
|
353
|
-
|
|
354
|
-
parent_span_id = span.parent_id
|
|
355
|
-
is_root_span = parent_span_id is None
|
|
356
|
-
if not is_root_span:
|
|
357
|
-
self._child_spans[parent_span_id].add(span)
|
|
358
|
-
self._parent_span_ids[span_id] = parent_span_id
|
|
359
|
-
|
|
360
|
-
# Add computed attributes to span
|
|
361
|
-
start_time = span.start_time
|
|
362
|
-
end_time = span.end_time
|
|
363
|
-
span[ComputedAttributes.LATENCY_MS] = latency = (
|
|
364
|
-
end_time - start_time
|
|
365
|
-
).total_seconds() * 1000
|
|
366
|
-
if is_root_span:
|
|
367
|
-
self._root_span_latency_ms_sketch.add(latency)
|
|
368
|
-
span[ComputedAttributes.ERROR_COUNT] = int(span.status_code is SpanStatusCode.ERROR)
|
|
369
|
-
|
|
370
|
-
# Store the new span (after adding computed attributes)
|
|
371
|
-
self._spans[span_id] = span
|
|
372
|
-
self._traces[span.context.trace_id].add(span)
|
|
373
|
-
self._start_time_sorted_spans.add(span)
|
|
374
|
-
if is_root_span:
|
|
375
|
-
self._start_time_sorted_root_spans.add(span)
|
|
376
|
-
self._latency_sorted_root_spans.add(span)
|
|
377
|
-
self._propagate_cumulative_values(span)
|
|
378
|
-
self._update_cached_statistics(span)
|
|
379
|
-
|
|
380
|
-
# Update last updated timestamp, letting users know
|
|
381
|
-
# when they should refresh the page.
|
|
382
|
-
self._last_updated_at = datetime.now(timezone.utc)
|
|
383
|
-
|
|
384
|
-
def _update_cached_statistics(self, span: WrappedSpan) -> None:
|
|
385
|
-
# Update statistics for quick access later
|
|
386
|
-
span_id = span.context.span_id
|
|
387
|
-
if token_count_update := span.attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL):
|
|
388
|
-
self._token_count_total += token_count_update
|
|
389
|
-
if num_documents_update := len(
|
|
390
|
-
span.attributes.get(SpanAttributes.RETRIEVAL_DOCUMENTS) or ()
|
|
391
|
-
):
|
|
392
|
-
self._num_documents[span_id] += num_documents_update
|
|
393
|
-
|
|
394
|
-
def _propagate_cumulative_values(self, span: WrappedSpan) -> None:
|
|
395
|
-
child_spans: Iterable[WrappedSpan] = self._child_spans.get(span.context.span_id) or ()
|
|
396
|
-
for cumulative_attribute, attribute in _CUMULATIVE_ATTRIBUTES.items():
|
|
397
|
-
span[cumulative_attribute] = span[attribute] or 0
|
|
398
|
-
for child_span in child_spans:
|
|
399
|
-
span[cumulative_attribute] += child_span[cumulative_attribute] or 0
|
|
400
|
-
self._update_ancestors(span)
|
|
401
|
-
|
|
402
|
-
def _update_ancestors(self, span: WrappedSpan) -> None:
|
|
403
|
-
# Add cumulative values to each of the span's ancestors.
|
|
404
|
-
span_id = span.context.span_id
|
|
405
|
-
for attribute in _CUMULATIVE_ATTRIBUTES.keys():
|
|
406
|
-
value = span[attribute] or 0
|
|
407
|
-
self._add_value_to_span_ancestors(span_id, attribute, value)
|
|
408
|
-
|
|
409
|
-
def _add_value_to_span_ancestors(
|
|
410
|
-
self,
|
|
411
|
-
span_id: SpanID,
|
|
412
|
-
attribute: ComputedAttributes,
|
|
413
|
-
value: float,
|
|
414
|
-
) -> None:
|
|
415
|
-
while parent_span_id := self._parent_span_ids.get(span_id):
|
|
416
|
-
if not (parent_span := self._spans.get(parent_span_id)):
|
|
417
|
-
return
|
|
418
|
-
cumulative_value = parent_span[attribute] or 0
|
|
419
|
-
parent_span[attribute] = cumulative_value + value
|
|
420
|
-
span_id = parent_span_id
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
class _Evals:
|
|
424
|
-
def __init__(self) -> None:
|
|
425
|
-
self._lock = RLock()
|
|
426
|
-
self._trace_evaluations_by_name: DefaultDict[
|
|
427
|
-
EvaluationName, Dict[TraceID, pb.Evaluation]
|
|
428
|
-
] = defaultdict(dict)
|
|
429
|
-
self._evaluations_by_trace_id: DefaultDict[TraceID, Dict[EvaluationName, pb.Evaluation]] = (
|
|
430
|
-
defaultdict(dict)
|
|
431
|
-
)
|
|
432
|
-
self._span_evaluations_by_name: DefaultDict[EvaluationName, Dict[SpanID, pb.Evaluation]] = (
|
|
433
|
-
defaultdict(dict)
|
|
434
|
-
)
|
|
435
|
-
self._evaluations_by_span_id: DefaultDict[SpanID, Dict[EvaluationName, pb.Evaluation]] = (
|
|
436
|
-
defaultdict(dict)
|
|
437
|
-
)
|
|
438
|
-
self._span_evaluation_labels: DefaultDict[EvaluationName, Set[str]] = defaultdict(set)
|
|
439
|
-
self._document_evaluations_by_span_id: DefaultDict[
|
|
440
|
-
SpanID, DefaultDict[EvaluationName, Dict[DocumentPosition, pb.Evaluation]]
|
|
441
|
-
] = defaultdict(lambda: defaultdict(dict))
|
|
442
|
-
self._document_evaluations_by_name: DefaultDict[
|
|
443
|
-
EvaluationName, DefaultDict[SpanID, Dict[DocumentPosition, pb.Evaluation]]
|
|
444
|
-
] = defaultdict(lambda: defaultdict(dict))
|
|
445
|
-
self._last_updated_at: Optional[datetime] = None
|
|
446
|
-
|
|
447
|
-
def add(self, evaluation: pb.Evaluation) -> None:
|
|
448
|
-
with self._lock:
|
|
449
|
-
self._add(evaluation)
|
|
450
|
-
|
|
451
|
-
def _add(self, evaluation: pb.Evaluation) -> None:
|
|
452
|
-
subject_id = evaluation.subject_id
|
|
453
|
-
name = evaluation.name
|
|
454
|
-
subject_id_kind = subject_id.WhichOneof("kind")
|
|
455
|
-
if subject_id_kind == "document_retrieval_id":
|
|
456
|
-
document_retrieval_id = subject_id.document_retrieval_id
|
|
457
|
-
span_id = SpanID(document_retrieval_id.span_id)
|
|
458
|
-
document_position = document_retrieval_id.document_position
|
|
459
|
-
self._document_evaluations_by_span_id[span_id][name][document_position] = evaluation
|
|
460
|
-
self._document_evaluations_by_name[name][span_id][document_position] = evaluation
|
|
461
|
-
elif subject_id_kind == "span_id":
|
|
462
|
-
span_id = SpanID(subject_id.span_id)
|
|
463
|
-
self._evaluations_by_span_id[span_id][name] = evaluation
|
|
464
|
-
self._span_evaluations_by_name[name][span_id] = evaluation
|
|
465
|
-
if evaluation.result.HasField("label"):
|
|
466
|
-
label = evaluation.result.label.value
|
|
467
|
-
self._span_evaluation_labels[name].add(label)
|
|
468
|
-
elif subject_id_kind == "trace_id":
|
|
469
|
-
trace_id = TraceID(subject_id.trace_id)
|
|
470
|
-
self._evaluations_by_trace_id[trace_id][name] = evaluation
|
|
471
|
-
self._trace_evaluations_by_name[name][trace_id] = evaluation
|
|
472
|
-
elif subject_id_kind is None:
|
|
473
|
-
logger.warning(
|
|
474
|
-
f"discarding evaluation with missing subject_id: {MessageToDict(evaluation)}"
|
|
475
|
-
)
|
|
476
|
-
else:
|
|
477
|
-
assert_never(subject_id_kind)
|
|
478
|
-
self._last_updated_at = datetime.now(timezone.utc)
|
|
479
|
-
|
|
480
|
-
@property
|
|
481
|
-
def last_updated_at(self) -> Optional[datetime]:
|
|
482
|
-
return self._last_updated_at
|
|
483
|
-
|
|
484
|
-
def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
|
|
485
|
-
with self._lock:
|
|
486
|
-
span_evaluations = self._evaluations_by_span_id.get(span_id)
|
|
487
|
-
return span_evaluations.get(name) if span_evaluations else None
|
|
488
|
-
|
|
489
|
-
def get_span_evaluation_names(self) -> List[EvaluationName]:
|
|
490
|
-
with self._lock:
|
|
491
|
-
return list(self._span_evaluations_by_name)
|
|
492
|
-
|
|
493
|
-
def get_document_evaluation_names(
|
|
494
|
-
self,
|
|
495
|
-
span_id: Optional[SpanID] = None,
|
|
496
|
-
) -> List[EvaluationName]:
|
|
497
|
-
with self._lock:
|
|
498
|
-
if span_id is None:
|
|
499
|
-
return list(self._document_evaluations_by_name)
|
|
500
|
-
document_evaluations = self._document_evaluations_by_span_id.get(span_id)
|
|
501
|
-
return list(document_evaluations) if document_evaluations else []
|
|
502
|
-
|
|
503
|
-
def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
|
|
504
|
-
with self._lock:
|
|
505
|
-
labels = self._span_evaluation_labels.get(name)
|
|
506
|
-
return tuple(labels) if labels else ()
|
|
507
|
-
|
|
508
|
-
def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
|
|
509
|
-
with self._lock:
|
|
510
|
-
span_evaluations = self._span_evaluations_by_name.get(name)
|
|
511
|
-
return tuple(span_evaluations.keys()) if span_evaluations else ()
|
|
512
|
-
|
|
513
|
-
def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
|
|
514
|
-
with self._lock:
|
|
515
|
-
evaluations = self._evaluations_by_span_id.get(span_id)
|
|
516
|
-
return list(evaluations.values()) if evaluations else []
|
|
517
|
-
|
|
518
|
-
def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
|
|
519
|
-
with self._lock:
|
|
520
|
-
document_evaluations = self._document_evaluations_by_name.get(name)
|
|
521
|
-
return tuple(document_evaluations.keys()) if document_evaluations else ()
|
|
522
|
-
|
|
523
|
-
def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
|
|
524
|
-
all_evaluations: List[pb.Evaluation] = []
|
|
525
|
-
with self._lock:
|
|
526
|
-
document_evaluations = self._document_evaluations_by_span_id.get(span_id)
|
|
527
|
-
if not document_evaluations:
|
|
528
|
-
return all_evaluations
|
|
529
|
-
for evaluations in document_evaluations.values():
|
|
530
|
-
all_evaluations.extend(evaluations.values())
|
|
531
|
-
return all_evaluations
|
|
532
|
-
|
|
533
|
-
def get_document_evaluation_scores(
|
|
534
|
-
self,
|
|
535
|
-
span_id: SpanID,
|
|
536
|
-
evaluation_name: str,
|
|
537
|
-
num_documents: int,
|
|
538
|
-
) -> List[float]:
|
|
539
|
-
# num_documents is needed as argument because the document position values
|
|
540
|
-
# are not checked during ingestion: e.g. if there exists a position value
|
|
541
|
-
# of one trillion, we would not want to create a result that large.
|
|
542
|
-
scores: List[float] = [np.nan] * num_documents
|
|
543
|
-
with self._lock:
|
|
544
|
-
document_evaluations = self._document_evaluations_by_span_id.get(span_id)
|
|
545
|
-
if not document_evaluations:
|
|
546
|
-
return scores
|
|
547
|
-
evaluations = document_evaluations.get(evaluation_name)
|
|
548
|
-
if not evaluations:
|
|
549
|
-
return scores
|
|
550
|
-
for document_position, evaluation in evaluations.items():
|
|
551
|
-
result = evaluation.result
|
|
552
|
-
if result.HasField("score") and document_position < num_documents:
|
|
553
|
-
scores[document_position] = result.score.value
|
|
554
|
-
return scores
|
|
555
|
-
|
|
556
|
-
def export_evaluations(self) -> List[Evaluations]:
|
|
557
|
-
evaluations: List[Evaluations] = []
|
|
558
|
-
evaluations.extend(self._export_span_evaluations())
|
|
559
|
-
evaluations.extend(self._export_document_evaluations())
|
|
560
|
-
return evaluations
|
|
561
|
-
|
|
562
|
-
def _export_span_evaluations(self) -> List[SpanEvaluations]:
|
|
563
|
-
span_evaluations = []
|
|
564
|
-
with self._lock:
|
|
565
|
-
span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
|
|
566
|
-
for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
|
|
567
|
-
span_ids = []
|
|
568
|
-
rows = []
|
|
569
|
-
with self._lock:
|
|
570
|
-
span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
|
|
571
|
-
for span_id, pb_eval in span_evaluations_by_id:
|
|
572
|
-
span_ids.append(span_id)
|
|
573
|
-
rows.append(MessageToDict(pb_eval.result))
|
|
574
|
-
dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
|
|
575
|
-
span_evaluations.append(SpanEvaluations(eval_name, dataframe))
|
|
576
|
-
return span_evaluations
|
|
577
|
-
|
|
578
|
-
def _export_document_evaluations(self) -> List[DocumentEvaluations]:
|
|
579
|
-
evaluations = []
|
|
580
|
-
with self._lock:
|
|
581
|
-
document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
|
|
582
|
-
for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
|
|
583
|
-
span_ids = []
|
|
584
|
-
document_positions = []
|
|
585
|
-
rows = []
|
|
586
|
-
with self._lock:
|
|
587
|
-
document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
|
|
588
|
-
for span_id, _document_evaluations_by_position in document_evaluations_by_id:
|
|
589
|
-
with self._lock:
|
|
590
|
-
document_evaluations_by_position = sorted(
|
|
591
|
-
_document_evaluations_by_position.items()
|
|
592
|
-
) # ensure the evals are sorted by document position
|
|
593
|
-
for document_position, pb_eval in document_evaluations_by_position:
|
|
594
|
-
span_ids.append(span_id)
|
|
595
|
-
document_positions.append(document_position)
|
|
596
|
-
rows.append(MessageToDict(pb_eval.result))
|
|
597
|
-
dataframe = DataFrame(
|
|
598
|
-
rows,
|
|
599
|
-
index=MultiIndex.from_arrays(
|
|
600
|
-
(span_ids, document_positions),
|
|
601
|
-
names=("context.span_id", "document_position"),
|
|
602
|
-
),
|
|
603
|
-
)
|
|
604
|
-
evaluations.append(DocumentEvaluations(eval_name, dataframe))
|
|
605
|
-
return evaluations
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
_CUMULATIVE_ATTRIBUTES: Mapping[ComputedAttributes, Union[str, ComputedAttributes]] = (
|
|
609
|
-
MappingProxyType(
|
|
610
|
-
{
|
|
611
|
-
ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_TOTAL: SpanAttributes.LLM_TOKEN_COUNT_TOTAL, # noqa: E501
|
|
612
|
-
ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_PROMPT: SpanAttributes.LLM_TOKEN_COUNT_PROMPT, # noqa: E501
|
|
613
|
-
ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION: SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, # noqa: E501
|
|
614
|
-
ComputedAttributes.CUMULATIVE_ERROR_COUNT: ComputedAttributes.ERROR_COUNT,
|
|
615
|
-
}
|
|
616
|
-
)
|
|
617
|
-
)
|