arize-phoenix 5.5.2__py3-none-any.whl → 5.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.6.0.dist-info}/METADATA +3 -6
- {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.6.0.dist-info}/RECORD +171 -171
- phoenix/config.py +8 -8
- phoenix/core/model.py +3 -3
- phoenix/core/model_schema.py +41 -50
- phoenix/core/model_schema_adapter.py +17 -16
- phoenix/datetime_utils.py +2 -2
- phoenix/db/bulk_inserter.py +10 -20
- phoenix/db/engines.py +2 -1
- phoenix/db/enums.py +2 -2
- phoenix/db/helpers.py +8 -7
- phoenix/db/insertion/dataset.py +9 -19
- phoenix/db/insertion/document_annotation.py +14 -13
- phoenix/db/insertion/helpers.py +6 -16
- phoenix/db/insertion/span_annotation.py +14 -13
- phoenix/db/insertion/trace_annotation.py +14 -13
- phoenix/db/insertion/types.py +19 -30
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +8 -8
- phoenix/db/models.py +28 -28
- phoenix/experiments/evaluators/base.py +2 -1
- phoenix/experiments/evaluators/code_evaluators.py +4 -5
- phoenix/experiments/evaluators/llm_evaluators.py +157 -4
- phoenix/experiments/evaluators/utils.py +3 -2
- phoenix/experiments/functions.py +10 -21
- phoenix/experiments/tracing.py +2 -1
- phoenix/experiments/types.py +20 -29
- phoenix/experiments/utils.py +2 -1
- phoenix/inferences/errors.py +6 -5
- phoenix/inferences/fixtures.py +6 -5
- phoenix/inferences/inferences.py +37 -37
- phoenix/inferences/schema.py +11 -10
- phoenix/inferences/validation.py +13 -14
- phoenix/logging/_formatter.py +3 -3
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +2 -1
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +2 -2
- phoenix/pointcloud/clustering.py +3 -4
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/umap_parameters.py +2 -1
- phoenix/server/api/dataloaders/annotation_summaries.py +12 -19
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +2 -2
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +3 -2
- phoenix/server/api/dataloaders/dataset_example_revisions.py +3 -8
- phoenix/server/api/dataloaders/dataset_example_spans.py +2 -5
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +12 -18
- phoenix/server/api/dataloaders/document_evaluations.py +3 -7
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +6 -13
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +4 -8
- phoenix/server/api/dataloaders/experiment_error_rates.py +2 -5
- phoenix/server/api/dataloaders/experiment_run_annotations.py +3 -7
- phoenix/server/api/dataloaders/experiment_run_counts.py +1 -5
- phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -5
- phoenix/server/api/dataloaders/latency_ms_quantile.py +21 -30
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +7 -13
- phoenix/server/api/dataloaders/project_by_name.py +3 -3
- phoenix/server/api/dataloaders/record_counts.py +11 -18
- phoenix/server/api/dataloaders/span_annotations.py +3 -7
- phoenix/server/api/dataloaders/span_dataset_examples.py +3 -8
- phoenix/server/api/dataloaders/span_descendants.py +3 -7
- phoenix/server/api/dataloaders/span_projects.py +2 -2
- phoenix/server/api/dataloaders/token_counts.py +12 -19
- phoenix/server/api/dataloaders/trace_row_ids.py +3 -7
- phoenix/server/api/dataloaders/user_roles.py +3 -3
- phoenix/server/api/dataloaders/users.py +3 -3
- phoenix/server/api/helpers/__init__.py +4 -3
- phoenix/server/api/helpers/dataset_helpers.py +10 -9
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +2 -2
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +2 -2
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +13 -1
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +1 -3
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +2 -2
- phoenix/server/api/input_types/DeleteExperimentsInput.py +1 -3
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +2 -2
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +2 -2
- phoenix/server/api/mutations/dataset_mutations.py +4 -4
- phoenix/server/api/mutations/experiment_mutations.py +1 -2
- phoenix/server/api/mutations/export_events_mutations.py +7 -7
- phoenix/server/api/mutations/span_annotations_mutations.py +4 -4
- phoenix/server/api/mutations/trace_annotations_mutations.py +4 -4
- phoenix/server/api/mutations/user_mutations.py +4 -4
- phoenix/server/api/openapi/schema.py +2 -2
- phoenix/server/api/queries.py +20 -20
- phoenix/server/api/routers/oauth2.py +4 -4
- phoenix/server/api/routers/v1/datasets.py +22 -36
- phoenix/server/api/routers/v1/evaluations.py +6 -5
- phoenix/server/api/routers/v1/experiment_evaluations.py +2 -2
- phoenix/server/api/routers/v1/experiment_runs.py +2 -2
- phoenix/server/api/routers/v1/experiments.py +4 -4
- phoenix/server/api/routers/v1/spans.py +13 -12
- phoenix/server/api/routers/v1/traces.py +5 -5
- phoenix/server/api/routers/v1/utils.py +5 -5
- phoenix/server/api/subscriptions.py +284 -162
- phoenix/server/api/types/AnnotationSummary.py +3 -3
- phoenix/server/api/types/Cluster.py +8 -7
- phoenix/server/api/types/Dataset.py +5 -4
- phoenix/server/api/types/Dimension.py +3 -3
- phoenix/server/api/types/DocumentEvaluationSummary.py +8 -7
- phoenix/server/api/types/EmbeddingDimension.py +6 -5
- phoenix/server/api/types/EvaluationSummary.py +3 -3
- phoenix/server/api/types/Event.py +7 -7
- phoenix/server/api/types/Experiment.py +3 -3
- phoenix/server/api/types/ExperimentComparison.py +2 -4
- phoenix/server/api/types/Inferences.py +9 -8
- phoenix/server/api/types/InferencesRole.py +2 -2
- phoenix/server/api/types/Model.py +2 -2
- phoenix/server/api/types/Project.py +11 -18
- phoenix/server/api/types/Segments.py +3 -3
- phoenix/server/api/types/Span.py +8 -7
- phoenix/server/api/types/TimeSeries.py +8 -7
- phoenix/server/api/types/Trace.py +2 -2
- phoenix/server/api/types/UMAPPoints.py +6 -6
- phoenix/server/api/types/User.py +3 -3
- phoenix/server/api/types/node.py +1 -3
- phoenix/server/api/types/pagination.py +4 -4
- phoenix/server/api/utils.py +2 -4
- phoenix/server/app.py +16 -25
- phoenix/server/bearer_auth.py +4 -10
- phoenix/server/dml_event.py +3 -3
- phoenix/server/dml_event_handler.py +10 -24
- phoenix/server/grpc_server.py +3 -2
- phoenix/server/jwt_store.py +22 -21
- phoenix/server/main.py +3 -3
- phoenix/server/oauth2.py +3 -2
- phoenix/server/rate_limiters.py +5 -8
- phoenix/server/static/.vite/manifest.json +31 -31
- phoenix/server/static/assets/components-C70HJiXz.js +1612 -0
- phoenix/server/static/assets/{index-DCzakdJq.js → index-DLe1Oo3l.js} +2 -2
- phoenix/server/static/assets/{pages-CAL1FDMt.js → pages-C8-Sl7JI.js} +269 -434
- phoenix/server/static/assets/{vendor-6IcPAw_j.js → vendor-CtqfhlbC.js} +6 -6
- phoenix/server/static/assets/{vendor-arizeai-DRZuoyuF.js → vendor-arizeai-C_3SBz56.js} +2 -2
- phoenix/server/static/assets/{vendor-codemirror-DVE2_WBr.js → vendor-codemirror-wfdk9cjp.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-DwrexFA4.js → vendor-recharts-BiVnSv90.js} +1 -1
- phoenix/server/thread_server.py +1 -1
- phoenix/server/types.py +17 -29
- phoenix/services.py +4 -3
- phoenix/session/client.py +12 -24
- phoenix/session/data_extractor.py +3 -3
- phoenix/session/evaluation.py +1 -2
- phoenix/session/session.py +11 -20
- phoenix/trace/attributes.py +16 -28
- phoenix/trace/dsl/filter.py +17 -21
- phoenix/trace/dsl/helpers.py +3 -3
- phoenix/trace/dsl/query.py +13 -22
- phoenix/trace/fixtures.py +11 -17
- phoenix/trace/otel.py +5 -15
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +2 -2
- phoenix/trace/span_evaluations.py +9 -8
- phoenix/trace/span_json_decoder.py +3 -3
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +6 -5
- phoenix/trace/utils.py +6 -6
- phoenix/utilities/deprecation.py +3 -2
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +2 -1
- phoenix/utilities/logging.py +2 -2
- phoenix/utilities/project.py +1 -1
- phoenix/utilities/re.py +3 -4
- phoenix/utilities/template_formatters.py +5 -4
- phoenix/version.py +1 -1
- phoenix/server/static/assets/components-hX0LgYz3.js +0 -1428
- {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.6.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.6.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.6.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional, Union, cast
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import strawberry
|
|
@@ -22,11 +22,11 @@ class AnnotationSummary:
|
|
|
22
22
|
return cast(int, self.df.record_count.sum())
|
|
23
23
|
|
|
24
24
|
@strawberry.field
|
|
25
|
-
def labels(self) ->
|
|
25
|
+
def labels(self) -> list[str]:
|
|
26
26
|
return self.df.label.dropna().tolist()
|
|
27
27
|
|
|
28
28
|
@strawberry.field
|
|
29
|
-
def label_fractions(self) ->
|
|
29
|
+
def label_fractions(self) -> list[LabelFraction]:
|
|
30
30
|
if not (n := self.df.label_count.sum()):
|
|
31
31
|
return []
|
|
32
32
|
return [
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from collections import Counter, defaultdict
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Mapping
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
import strawberry
|
|
5
6
|
from strawberry import ID
|
|
@@ -22,7 +23,7 @@ class Cluster:
|
|
|
22
23
|
description="The ID of the cluster",
|
|
23
24
|
)
|
|
24
25
|
|
|
25
|
-
event_ids:
|
|
26
|
+
event_ids: list[ID] = strawberry.field(
|
|
26
27
|
description="The event IDs of the points in the cluster",
|
|
27
28
|
)
|
|
28
29
|
|
|
@@ -94,7 +95,7 @@ class Cluster:
|
|
|
94
95
|
metric: DataQualityMetricInput,
|
|
95
96
|
) -> DatasetValues:
|
|
96
97
|
model = info.context.model
|
|
97
|
-
row_ids:
|
|
98
|
+
row_ids: dict[InferencesRole, list[int]] = defaultdict(list)
|
|
98
99
|
for row_id, inferences_role in map(unpack_event_id, self.event_ids):
|
|
99
100
|
if not isinstance(inferences_role, InferencesRole):
|
|
100
101
|
continue
|
|
@@ -120,7 +121,7 @@ class Cluster:
|
|
|
120
121
|
metric: PerformanceMetricInput,
|
|
121
122
|
) -> DatasetValues:
|
|
122
123
|
model = info.context.model
|
|
123
|
-
row_ids:
|
|
124
|
+
row_ids: dict[InferencesRole, list[int]] = defaultdict(list)
|
|
124
125
|
for row_id, inferences_role in map(unpack_event_id, self.event_ids):
|
|
125
126
|
if not isinstance(inferences_role, InferencesRole):
|
|
126
127
|
continue
|
|
@@ -139,15 +140,15 @@ class Cluster:
|
|
|
139
140
|
|
|
140
141
|
|
|
141
142
|
def to_gql_clusters(
|
|
142
|
-
clustered_events: Mapping[str,
|
|
143
|
-
) ->
|
|
143
|
+
clustered_events: Mapping[str, set[ID]],
|
|
144
|
+
) -> list[Cluster]:
|
|
144
145
|
"""
|
|
145
146
|
Converts a dictionary of event IDs to cluster IDs to a list of clusters
|
|
146
147
|
for the graphQL response
|
|
147
148
|
|
|
148
149
|
Parameters
|
|
149
150
|
----------
|
|
150
|
-
clustered_events: Mapping[str,
|
|
151
|
+
clustered_events: Mapping[str, set[ID]]
|
|
151
152
|
A mapping of cluster ID to its set of event IDs
|
|
152
153
|
"""
|
|
153
154
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
from collections.abc import AsyncIterable
|
|
1
2
|
from datetime import datetime
|
|
2
|
-
from typing import
|
|
3
|
+
from typing import ClassVar, Optional, cast
|
|
3
4
|
|
|
4
5
|
import strawberry
|
|
5
6
|
from sqlalchemy import and_, func, select
|
|
@@ -27,7 +28,7 @@ from phoenix.server.api.types.SortDir import SortDir
|
|
|
27
28
|
|
|
28
29
|
@strawberry.type
|
|
29
30
|
class Dataset(Node):
|
|
30
|
-
_table: ClassVar[
|
|
31
|
+
_table: ClassVar[type[models.Base]] = models.Experiment
|
|
31
32
|
id_attr: NodeID[int]
|
|
32
33
|
name: str
|
|
33
34
|
description: Optional[str]
|
|
@@ -233,7 +234,7 @@ class Dataset(Node):
|
|
|
233
234
|
experiments = [
|
|
234
235
|
to_gql_experiment(experiment, sequence_number)
|
|
235
236
|
async for experiment, sequence_number in cast(
|
|
236
|
-
AsyncIterable[
|
|
237
|
+
AsyncIterable[tuple[models.Experiment, int]],
|
|
237
238
|
await session.stream(query),
|
|
238
239
|
)
|
|
239
240
|
]
|
|
@@ -242,7 +243,7 @@ class Dataset(Node):
|
|
|
242
243
|
@strawberry.field
|
|
243
244
|
async def experiment_annotation_summaries(
|
|
244
245
|
self, info: Info[Context, None]
|
|
245
|
-
) ->
|
|
246
|
+
) -> list[ExperimentAnnotationSummary]:
|
|
246
247
|
dataset_id = self.id_attr
|
|
247
248
|
query = (
|
|
248
249
|
select(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
|
-
from typing import Any,
|
|
2
|
+
from typing import Any, Optional
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import strawberry
|
|
@@ -123,7 +123,7 @@ class Dimension(Node):
|
|
|
123
123
|
" Missing values are excluded. Non-categorical dimensions return an empty list."
|
|
124
124
|
)
|
|
125
125
|
) # type: ignore # https://github.com/strawberry-graphql/strawberry/issues/1929
|
|
126
|
-
def categories(self) ->
|
|
126
|
+
def categories(self) -> list[str]:
|
|
127
127
|
return list(self.dimension.categories)
|
|
128
128
|
|
|
129
129
|
@strawberry.field(
|
|
@@ -250,7 +250,7 @@ class Dimension(Node):
|
|
|
250
250
|
if isinstance(binning_method, binning.IntervalBinning) and binning_method.bins is not None:
|
|
251
251
|
all_bins = all_bins.union(binning_method.bins)
|
|
252
252
|
for bin in all_bins:
|
|
253
|
-
values:
|
|
253
|
+
values: dict[ms.InferencesRole, Any] = defaultdict(lambda: None)
|
|
254
254
|
for role in ms.InferencesRole:
|
|
255
255
|
if model[role].empty:
|
|
256
256
|
continue
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import math
|
|
2
|
+
from collections.abc import Iterable
|
|
2
3
|
from functools import cached_property
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
6
7
|
import strawberry
|
|
@@ -24,8 +25,8 @@ class DocumentEvaluationSummary:
|
|
|
24
25
|
) -> None:
|
|
25
26
|
self.evaluation_name = evaluation_name
|
|
26
27
|
self.metrics_collection = pd.Series(metrics_collection, dtype=object)
|
|
27
|
-
self._cached_average_ndcg_results:
|
|
28
|
-
self._cached_average_precision_results:
|
|
28
|
+
self._cached_average_ndcg_results: dict[Optional[int], tuple[float, int]] = {}
|
|
29
|
+
self._cached_average_precision_results: dict[Optional[int], tuple[float, int]] = {}
|
|
29
30
|
|
|
30
31
|
@strawberry.field
|
|
31
32
|
def average_ndcg(self, k: Optional[int] = UNSET) -> Optional[float]:
|
|
@@ -67,7 +68,7 @@ class DocumentEvaluationSummary:
|
|
|
67
68
|
_, count = self._average_hit
|
|
68
69
|
return count
|
|
69
70
|
|
|
70
|
-
def _average_ndcg(self, k: Optional[int] = None) ->
|
|
71
|
+
def _average_ndcg(self, k: Optional[int] = None) -> tuple[float, int]:
|
|
71
72
|
if (result := self._cached_average_ndcg_results.get(k)) is not None:
|
|
72
73
|
return result
|
|
73
74
|
values = self.metrics_collection.apply(lambda m: m.ndcg(k))
|
|
@@ -75,7 +76,7 @@ class DocumentEvaluationSummary:
|
|
|
75
76
|
self._cached_average_ndcg_results[k] = result
|
|
76
77
|
return result
|
|
77
78
|
|
|
78
|
-
def _average_precision(self, k: Optional[int] = None) ->
|
|
79
|
+
def _average_precision(self, k: Optional[int] = None) -> tuple[float, int]:
|
|
79
80
|
if (result := self._cached_average_precision_results.get(k)) is not None:
|
|
80
81
|
return result
|
|
81
82
|
values = self.metrics_collection.apply(lambda m: m.precision(k))
|
|
@@ -84,11 +85,11 @@ class DocumentEvaluationSummary:
|
|
|
84
85
|
return result
|
|
85
86
|
|
|
86
87
|
@cached_property
|
|
87
|
-
def _average_reciprocal_rank(self) ->
|
|
88
|
+
def _average_reciprocal_rank(self) -> tuple[float, int]:
|
|
88
89
|
values = self.metrics_collection.apply(lambda m: m.reciprocal_rank())
|
|
89
90
|
return values.mean(), values.count()
|
|
90
91
|
|
|
91
92
|
@cached_property
|
|
92
|
-
def _average_hit(self) ->
|
|
93
|
+
def _average_hit(self) -> tuple[float, int]:
|
|
93
94
|
values = self.metrics_collection.apply(lambda m: m.hit())
|
|
94
95
|
return values.mean(), values.count()
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
2
3
|
from datetime import timedelta
|
|
3
4
|
from itertools import chain, repeat
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Optional, Union, cast
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import numpy.typing as npt
|
|
@@ -313,8 +314,8 @@ class EmbeddingDimension(Node):
|
|
|
313
314
|
] = DEFAULT_CLUSTER_SELECTION_EPSILON,
|
|
314
315
|
) -> UMAPPoints:
|
|
315
316
|
model = info.context.model
|
|
316
|
-
data:
|
|
317
|
-
retrievals:
|
|
317
|
+
data: dict[ID, npt.NDArray[np.float64]] = {}
|
|
318
|
+
retrievals: list[tuple[ID, Any, Any]] = []
|
|
318
319
|
for inferences in model[Inferences]:
|
|
319
320
|
inferences_id = inferences.role
|
|
320
321
|
row_id_start, row_id_stop = 0, len(inferences)
|
|
@@ -353,7 +354,7 @@ class EmbeddingDimension(Node):
|
|
|
353
354
|
)
|
|
354
355
|
)
|
|
355
356
|
|
|
356
|
-
context_retrievals:
|
|
357
|
+
context_retrievals: list[Retrieval] = []
|
|
357
358
|
if isinstance(
|
|
358
359
|
self.dimension,
|
|
359
360
|
ms.RetrievalEmbeddingDimension,
|
|
@@ -414,7 +415,7 @@ class EmbeddingDimension(Node):
|
|
|
414
415
|
),
|
|
415
416
|
).generate(data, n_components=n_components)
|
|
416
417
|
|
|
417
|
-
points:
|
|
418
|
+
points: dict[Union[InferencesRole, AncillaryInferencesRole], list[UMAPPoint]] = defaultdict(
|
|
418
419
|
list
|
|
419
420
|
)
|
|
420
421
|
for event_id, vector in vectors.items():
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional, Union, cast
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import strawberry
|
|
@@ -22,11 +22,11 @@ class EvaluationSummary:
|
|
|
22
22
|
return cast(int, self.df.record_count.sum())
|
|
23
23
|
|
|
24
24
|
@strawberry.field
|
|
25
|
-
def labels(self) ->
|
|
25
|
+
def labels(self) -> list[str]:
|
|
26
26
|
return self.df.label.dropna().tolist()
|
|
27
27
|
|
|
28
28
|
@strawberry.field
|
|
29
|
-
def label_fractions(self) ->
|
|
29
|
+
def label_fractions(self) -> list[LabelFraction]:
|
|
30
30
|
if not (n := self.df.label_count.sum()):
|
|
31
31
|
return []
|
|
32
32
|
return [
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import math
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional, Union, cast
|
|
4
4
|
|
|
5
5
|
import strawberry
|
|
6
6
|
from strawberry import ID
|
|
@@ -28,7 +28,7 @@ from .PromptResponse import PromptResponse
|
|
|
28
28
|
class Event:
|
|
29
29
|
id: strawberry.ID
|
|
30
30
|
eventMetadata: EventMetadata
|
|
31
|
-
dimensions:
|
|
31
|
+
dimensions: list[DimensionWithValue]
|
|
32
32
|
prompt_and_response: Optional[PromptResponse] = strawberry.field(
|
|
33
33
|
description="The prompt and response pair associated with the event",
|
|
34
34
|
default=GqlValueMediator(),
|
|
@@ -53,7 +53,7 @@ def create_event_id(
|
|
|
53
53
|
|
|
54
54
|
def unpack_event_id(
|
|
55
55
|
event_id: ID,
|
|
56
|
-
) ->
|
|
56
|
+
) -> tuple[int, Union[InferencesRole, AncillaryInferencesRole]]:
|
|
57
57
|
row_id_str, inferences_role_str = str(event_id).split(":")
|
|
58
58
|
row_id = int(row_id_str)
|
|
59
59
|
inferences_role = STR_TO_INFEREENCES_ROLE[inferences_role_str]
|
|
@@ -61,12 +61,12 @@ def unpack_event_id(
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def parse_event_ids_by_inferences_role(
|
|
64
|
-
event_ids:
|
|
65
|
-
) ->
|
|
64
|
+
event_ids: list[ID],
|
|
65
|
+
) -> dict[Union[InferencesRole, AncillaryInferencesRole], list[int]]:
|
|
66
66
|
"""
|
|
67
67
|
Parses event IDs and returns the corresponding row indexes.
|
|
68
68
|
"""
|
|
69
|
-
row_indexes:
|
|
69
|
+
row_indexes: dict[Union[InferencesRole, AncillaryInferencesRole], list[int]] = defaultdict(list)
|
|
70
70
|
for event_id in event_ids:
|
|
71
71
|
row_id, inferences_role = unpack_event_id(event_id)
|
|
72
72
|
row_indexes[inferences_role].append(row_id)
|
|
@@ -76,7 +76,7 @@ def parse_event_ids_by_inferences_role(
|
|
|
76
76
|
def create_event(
|
|
77
77
|
event_id: ID,
|
|
78
78
|
event: ms.Event,
|
|
79
|
-
dimensions:
|
|
79
|
+
dimensions: list[Dimension],
|
|
80
80
|
is_document_record: bool = False,
|
|
81
81
|
) -> Event:
|
|
82
82
|
"""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import ClassVar,
|
|
2
|
+
from typing import ClassVar, Optional
|
|
3
3
|
|
|
4
4
|
import strawberry
|
|
5
5
|
from sqlalchemy import select
|
|
@@ -23,7 +23,7 @@ from phoenix.server.api.types.Project import Project
|
|
|
23
23
|
|
|
24
24
|
@strawberry.type
|
|
25
25
|
class Experiment(Node):
|
|
26
|
-
_table: ClassVar[
|
|
26
|
+
_table: ClassVar[type[models.Base]] = models.Experiment
|
|
27
27
|
cached_sequence_number: Private[Optional[int]] = None
|
|
28
28
|
id_attr: NodeID[int]
|
|
29
29
|
name: str
|
|
@@ -84,7 +84,7 @@ class Experiment(Node):
|
|
|
84
84
|
@strawberry.field
|
|
85
85
|
async def annotation_summaries(
|
|
86
86
|
self, info: Info[Context, None]
|
|
87
|
-
) ->
|
|
87
|
+
) -> list[ExperimentAnnotationSummary]:
|
|
88
88
|
experiment_id = self.id_attr
|
|
89
89
|
return [
|
|
90
90
|
ExperimentAnnotationSummary(
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import List
|
|
2
|
-
|
|
3
1
|
import strawberry
|
|
4
2
|
from strawberry.relay import GlobalID
|
|
5
3
|
|
|
@@ -10,10 +8,10 @@ from phoenix.server.api.types.ExperimentRun import ExperimentRun
|
|
|
10
8
|
@strawberry.type
|
|
11
9
|
class RunComparisonItem:
|
|
12
10
|
experiment_id: GlobalID
|
|
13
|
-
runs:
|
|
11
|
+
runs: list[ExperimentRun]
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
@strawberry.type
|
|
17
15
|
class ExperimentComparison:
|
|
18
16
|
example: DatasetExample
|
|
19
|
-
run_comparison_items:
|
|
17
|
+
run_comparison_items: list[RunComparisonItem]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
1
2
|
from datetime import datetime
|
|
2
|
-
from typing import
|
|
3
|
+
from typing import Optional, Union
|
|
3
4
|
|
|
4
5
|
import strawberry
|
|
5
6
|
from strawberry import ID, UNSET
|
|
@@ -30,9 +31,9 @@ class Inferences:
|
|
|
30
31
|
@strawberry.field
|
|
31
32
|
def events(
|
|
32
33
|
self,
|
|
33
|
-
event_ids:
|
|
34
|
-
dimensions: Optional[
|
|
35
|
-
) ->
|
|
34
|
+
event_ids: list[ID],
|
|
35
|
+
dimensions: Optional[list[DimensionInput]] = UNSET,
|
|
36
|
+
) -> list[Event]:
|
|
36
37
|
"""
|
|
37
38
|
Returns events for specific event IDs and dimensions. If no input
|
|
38
39
|
dimensions are provided, returns all features and tags.
|
|
@@ -62,16 +63,16 @@ class Inferences:
|
|
|
62
63
|
|
|
63
64
|
def _get_requested_features_and_tags(
|
|
64
65
|
core_dimensions: Iterable[ScalarDimension],
|
|
65
|
-
requested_dimension_names: Optional[
|
|
66
|
-
) ->
|
|
66
|
+
requested_dimension_names: Optional[set[str]] = UNSET,
|
|
67
|
+
) -> list[Dimension]:
|
|
67
68
|
"""
|
|
68
69
|
Returns requested features and tags as a list of strawberry Inferences. If no
|
|
69
70
|
dimensions are explicitly requested, returns all features and tags.
|
|
70
71
|
"""
|
|
71
|
-
requested_features_and_tags:
|
|
72
|
+
requested_features_and_tags: list[Dimension] = []
|
|
72
73
|
for id, dim in enumerate(core_dimensions):
|
|
73
74
|
is_requested = (
|
|
74
|
-
not isinstance(requested_dimension_names,
|
|
75
|
+
not isinstance(requested_dimension_names, set)
|
|
75
76
|
) or dim.name in requested_dimension_names
|
|
76
77
|
is_feature_or_tag = dim.role in (FEATURE, TAG)
|
|
77
78
|
if is_requested and is_feature_or_tag:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Union
|
|
3
3
|
|
|
4
4
|
import strawberry
|
|
5
5
|
|
|
@@ -16,7 +16,7 @@ class AncillaryInferencesRole(Enum):
|
|
|
16
16
|
corpus = "InferencesRole.CORPUS"
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
STR_TO_INFEREENCES_ROLE:
|
|
19
|
+
STR_TO_INFEREENCES_ROLE: dict[str, Union[InferencesRole, AncillaryInferencesRole]] = {
|
|
20
20
|
str(InferencesRole.primary.value): InferencesRole.primary,
|
|
21
21
|
str(InferencesRole.reference.value): InferencesRole.reference,
|
|
22
22
|
str(AncillaryInferencesRole.corpus.value): AncillaryInferencesRole.corpus,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
import strawberry
|
|
5
5
|
from strawberry import UNSET, Info
|
|
@@ -135,7 +135,7 @@ class Model:
|
|
|
135
135
|
async def exported_files(
|
|
136
136
|
self,
|
|
137
137
|
info: Info[Context, None],
|
|
138
|
-
) ->
|
|
138
|
+
) -> list[ExportedFile]:
|
|
139
139
|
loop = asyncio.get_running_loop()
|
|
140
140
|
return [
|
|
141
141
|
ExportedFile(file_name=path.stem)
|
|
@@ -3,9 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from typing import (
|
|
4
4
|
Any,
|
|
5
5
|
ClassVar,
|
|
6
|
-
List,
|
|
7
6
|
Optional,
|
|
8
|
-
Type,
|
|
9
7
|
)
|
|
10
8
|
|
|
11
9
|
import strawberry
|
|
@@ -39,7 +37,7 @@ from phoenix.trace.dsl import SpanFilter
|
|
|
39
37
|
|
|
40
38
|
@strawberry.type
|
|
41
39
|
class Project(Node):
|
|
42
|
-
_table: ClassVar[
|
|
40
|
+
_table: ClassVar[type[models.Base]] = models.Project
|
|
43
41
|
id_attr: NodeID[int]
|
|
44
42
|
name: str
|
|
45
43
|
gradient_start_color: str
|
|
@@ -223,18 +221,13 @@ class Project(Node):
|
|
|
223
221
|
span_records = await session.execute(stmt)
|
|
224
222
|
async for span_record in islice(span_records, first):
|
|
225
223
|
span = span_record[0]
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
sort_column=(
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
)
|
|
234
|
-
if sort_config
|
|
235
|
-
else None
|
|
236
|
-
),
|
|
237
|
-
)
|
|
224
|
+
cursor = Cursor(rowid=span.id)
|
|
225
|
+
if sort_config:
|
|
226
|
+
assert len(span_record) > 1
|
|
227
|
+
cursor.sort_column = CursorSortColumn(
|
|
228
|
+
type=sort_config.column_data_type,
|
|
229
|
+
value=span_record[1],
|
|
230
|
+
)
|
|
238
231
|
cursors_and_nodes.append((cursor, to_gql_span(span)))
|
|
239
232
|
has_next_page = True
|
|
240
233
|
try:
|
|
@@ -255,7 +248,7 @@ class Project(Node):
|
|
|
255
248
|
async def trace_annotations_names(
|
|
256
249
|
self,
|
|
257
250
|
info: Info[Context, None],
|
|
258
|
-
) ->
|
|
251
|
+
) -> list[str]:
|
|
259
252
|
stmt = (
|
|
260
253
|
select(distinct(models.TraceAnnotation.name))
|
|
261
254
|
.join(models.Trace)
|
|
@@ -271,7 +264,7 @@ class Project(Node):
|
|
|
271
264
|
async def span_annotation_names(
|
|
272
265
|
self,
|
|
273
266
|
info: Info[Context, None],
|
|
274
|
-
) ->
|
|
267
|
+
) -> list[str]:
|
|
275
268
|
stmt = (
|
|
276
269
|
select(distinct(models.SpanAnnotation.name))
|
|
277
270
|
.join(models.Span)
|
|
@@ -288,7 +281,7 @@ class Project(Node):
|
|
|
288
281
|
self,
|
|
289
282
|
info: Info[Context, None],
|
|
290
283
|
span_id: Optional[ID] = UNSET,
|
|
291
|
-
) ->
|
|
284
|
+
) -> list[str]:
|
|
292
285
|
stmt = (
|
|
293
286
|
select(distinct(models.DocumentAnnotation.name))
|
|
294
287
|
.join(models.Span)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import math
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional, Union, overload
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
@@ -72,12 +72,12 @@ class Segment:
|
|
|
72
72
|
default_factory=DatasetValues,
|
|
73
73
|
)
|
|
74
74
|
# TODO add support for a "z" metric list
|
|
75
|
-
# values:
|
|
75
|
+
# values: list[Optional[float]]
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
@strawberry.type
|
|
79
79
|
class Segments:
|
|
80
|
-
segments:
|
|
80
|
+
segments: list[Segment] = strawberry.field(default_factory=list)
|
|
81
81
|
total_counts: DatasetValues = strawberry.field(
|
|
82
82
|
default_factory=DatasetValues,
|
|
83
83
|
)
|
phoenix/server/api/types/Span.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from collections.abc import Mapping, Sized
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from enum import Enum
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Optional, cast
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
import strawberry
|
|
@@ -152,7 +153,7 @@ class Span(Node):
|
|
|
152
153
|
token_count_completion: Optional[int]
|
|
153
154
|
input: Optional[SpanIOValue]
|
|
154
155
|
output: Optional[SpanIOValue]
|
|
155
|
-
events:
|
|
156
|
+
events: list[SpanEvent]
|
|
156
157
|
cumulative_token_count_total: Optional[int] = strawberry.field(
|
|
157
158
|
description="Cumulative (prompt plus completion) token count from "
|
|
158
159
|
"self and all descendant spans (children, grandchildren, etc.)",
|
|
@@ -180,7 +181,7 @@ class Span(Node):
|
|
|
180
181
|
self,
|
|
181
182
|
info: Info[Context, None],
|
|
182
183
|
sort: Optional[SpanAnnotationSort] = UNSET,
|
|
183
|
-
) ->
|
|
184
|
+
) -> list[SpanAnnotation]:
|
|
184
185
|
span_id = self.id_attr
|
|
185
186
|
annotations = await info.context.data_loaders.span_annotations.load(span_id)
|
|
186
187
|
sort_key = SpanAnnotationColumn.name.value
|
|
@@ -201,7 +202,7 @@ class Span(Node):
|
|
|
201
202
|
"a list, and each evaluation is identified by its document's (zero-based) "
|
|
202
203
|
"index in that list."
|
|
203
204
|
) # type: ignore
|
|
204
|
-
async def document_evaluations(self, info: Info[Context, None]) ->
|
|
205
|
+
async def document_evaluations(self, info: Info[Context, None]) -> list[DocumentEvaluation]:
|
|
205
206
|
return await info.context.data_loaders.document_evaluations.load(self.id_attr)
|
|
206
207
|
|
|
207
208
|
@strawberry.field(
|
|
@@ -211,7 +212,7 @@ class Span(Node):
|
|
|
211
212
|
self,
|
|
212
213
|
info: Info[Context, None],
|
|
213
214
|
evaluation_name: Optional[str] = UNSET,
|
|
214
|
-
) ->
|
|
215
|
+
) -> list[DocumentRetrievalMetrics]:
|
|
215
216
|
if not self.num_documents:
|
|
216
217
|
return []
|
|
217
218
|
return await info.context.data_loaders.document_retrieval_metrics.load(
|
|
@@ -224,7 +225,7 @@ class Span(Node):
|
|
|
224
225
|
async def descendants(
|
|
225
226
|
self,
|
|
226
227
|
info: Info[Context, None],
|
|
227
|
-
) ->
|
|
228
|
+
) -> list["Span"]:
|
|
228
229
|
span_id = str(self.context.span_id)
|
|
229
230
|
spans = await info.context.data_loaders.span_descendants.load(span_id)
|
|
230
231
|
return [to_gql_span(span) for span in spans]
|
|
@@ -292,7 +293,7 @@ class Span(Node):
|
|
|
292
293
|
|
|
293
294
|
|
|
294
295
|
def to_gql_span(span: models.Span) -> Span:
|
|
295
|
-
events:
|
|
296
|
+
events: list[SpanEvent] = list(map(SpanEvent.from_dict, span.events))
|
|
296
297
|
input_value = cast(Optional[str], get_attribute_value(span.attributes, INPUT_VALUE))
|
|
297
298
|
output_value = cast(Optional[str], get_attribute_value(span.attributes, OUTPUT_VALUE))
|
|
298
299
|
retrieval_documents = get_attribute_value(span.attributes, RETRIEVAL_DOCUMENTS)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
1
2
|
from dataclasses import replace
|
|
2
3
|
from datetime import datetime, timedelta
|
|
3
4
|
from functools import total_ordering
|
|
4
|
-
from typing import
|
|
5
|
+
from typing import Optional, Union, cast
|
|
5
6
|
|
|
6
7
|
import pandas as pd
|
|
7
8
|
import strawberry
|
|
@@ -39,7 +40,7 @@ def to_gql_datapoints(
|
|
|
39
40
|
df: pd.DataFrame,
|
|
40
41
|
metric: Metric,
|
|
41
42
|
timestamps: Iterable[datetime],
|
|
42
|
-
) ->
|
|
43
|
+
) -> list[TimeSeriesDataPoint]:
|
|
43
44
|
data = []
|
|
44
45
|
for timestamp in timestamps:
|
|
45
46
|
try:
|
|
@@ -59,7 +60,7 @@ def to_gql_datapoints(
|
|
|
59
60
|
class TimeSeries:
|
|
60
61
|
"""A collection of data points over time"""
|
|
61
62
|
|
|
62
|
-
data:
|
|
63
|
+
data: list[TimeSeriesDataPoint]
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
def get_timeseries_data(
|
|
@@ -67,7 +68,7 @@ def get_timeseries_data(
|
|
|
67
68
|
metric: Metric,
|
|
68
69
|
time_range: TimeRange,
|
|
69
70
|
granularity: Granularity,
|
|
70
|
-
) ->
|
|
71
|
+
) -> list[TimeSeriesDataPoint]:
|
|
71
72
|
return df.pipe(
|
|
72
73
|
timeseries(
|
|
73
74
|
start_time=time_range.start,
|
|
@@ -98,7 +99,7 @@ def get_data_quality_timeseries_data(
|
|
|
98
99
|
time_range: TimeRange,
|
|
99
100
|
granularity: Granularity,
|
|
100
101
|
inferences_role: InferencesRole,
|
|
101
|
-
) ->
|
|
102
|
+
) -> list[TimeSeriesDataPoint]:
|
|
102
103
|
metric_instance = metric.value()
|
|
103
104
|
if isinstance(metric_instance, UnaryOperator):
|
|
104
105
|
metric_instance = replace(
|
|
@@ -128,7 +129,7 @@ def get_drift_timeseries_data(
|
|
|
128
129
|
time_range: TimeRange,
|
|
129
130
|
granularity: Granularity,
|
|
130
131
|
reference_data: pd.DataFrame,
|
|
131
|
-
) ->
|
|
132
|
+
) -> list[TimeSeriesDataPoint]:
|
|
132
133
|
metric_instance = metric.value()
|
|
133
134
|
metric_instance = replace(
|
|
134
135
|
metric_instance,
|
|
@@ -163,7 +164,7 @@ def ensure_timeseries_parameters(
|
|
|
163
164
|
inferences: Inferences,
|
|
164
165
|
time_range: Optional[TimeRange] = UNSET,
|
|
165
166
|
granularity: Optional[Granularity] = UNSET,
|
|
166
|
-
) ->
|
|
167
|
+
) -> tuple[TimeRange, Granularity]:
|
|
167
168
|
if not isinstance(time_range, TimeRange):
|
|
168
169
|
start, stop = inferences.time_range
|
|
169
170
|
time_range = TimeRange(start=start, end=stop)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
import strawberry
|
|
6
6
|
from sqlalchemy import desc, select
|
|
@@ -69,7 +69,7 @@ class Trace(Node):
|
|
|
69
69
|
self,
|
|
70
70
|
info: Info[Context, None],
|
|
71
71
|
sort: Optional[TraceAnnotationSort] = None,
|
|
72
|
-
) ->
|
|
72
|
+
) -> list[TraceAnnotation]:
|
|
73
73
|
async with info.context.db() as session:
|
|
74
74
|
stmt = select(models.TraceAnnotation).filter_by(span_rowid=self.id_attr)
|
|
75
75
|
if sort:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import numpy.typing as npt
|
|
@@ -57,8 +57,8 @@ class UMAPPoint:
|
|
|
57
57
|
|
|
58
58
|
@strawberry.type
|
|
59
59
|
class UMAPPoints:
|
|
60
|
-
data:
|
|
61
|
-
reference_data:
|
|
62
|
-
clusters:
|
|
63
|
-
corpus_data:
|
|
64
|
-
context_retrievals:
|
|
60
|
+
data: list[UMAPPoint]
|
|
61
|
+
reference_data: list[UMAPPoint]
|
|
62
|
+
clusters: list[Cluster]
|
|
63
|
+
corpus_data: list[UMAPPoint] = strawberry.field(default_factory=list)
|
|
64
|
+
context_retrievals: list[Retrieval] = strawberry.field(default_factory=list)
|