arize-phoenix 4.4.4rc6__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/METADATA +8 -14
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/RECORD +58 -122
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +27 -0
- phoenix/config.py +7 -42
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +62 -64
- phoenix/core/model_schema_adapter.py +25 -27
- phoenix/datetime_utils.py +0 -4
- phoenix/db/bulk_inserter.py +14 -54
- phoenix/db/insertion/evaluation.py +10 -10
- phoenix/db/insertion/helpers.py +14 -17
- phoenix/db/insertion/span.py +3 -3
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +28 -2
- phoenix/db/models.py +4 -236
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +0 -20
- phoenix/server/api/dataloaders/__init__.py +0 -20
- phoenix/server/api/dataloaders/span_descendants.py +3 -2
- phoenix/server/api/routers/v1/__init__.py +2 -77
- phoenix/server/api/routers/v1/evaluations.py +13 -8
- phoenix/server/api/routers/v1/spans.py +5 -9
- phoenix/server/api/routers/v1/traces.py +4 -1
- phoenix/server/api/schema.py +303 -2
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/Dataset.py +63 -282
- phoenix/server/api/types/DatasetRole.py +23 -0
- phoenix/server/api/types/Dimension.py +29 -30
- phoenix/server/api/types/EmbeddingDimension.py +34 -40
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/{mutations/export_events_mutations.py → types/ExportEventsMutation.py} +14 -17
- phoenix/server/api/types/Model.py +42 -43
- phoenix/server/api/types/Project.py +12 -26
- phoenix/server/api/types/Span.py +2 -79
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +4 -15
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +111 -5
- phoenix/server/api/types/pagination.py +52 -10
- phoenix/server/app.py +49 -103
- phoenix/server/main.py +27 -49
- phoenix/server/openapi/docs.py +0 -3
- phoenix/server/static/index.js +1384 -2390
- phoenix/server/templates/index.html +0 -1
- phoenix/services.py +15 -15
- phoenix/session/client.py +23 -611
- phoenix/session/session.py +37 -47
- phoenix/trace/exporter.py +9 -14
- phoenix/trace/fixtures.py +7 -133
- phoenix/trace/schemas.py +2 -1
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/version.py +1 -1
- phoenix/db/insertion/dataset.py +0 -237
- phoenix/db/migrations/types.py +0 -29
- phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -291
- phoenix/experiments/__init__.py +0 -6
- phoenix/experiments/evaluators/__init__.py +0 -29
- phoenix/experiments/evaluators/base.py +0 -153
- phoenix/experiments/evaluators/code_evaluators.py +0 -99
- phoenix/experiments/evaluators/llm_evaluators.py +0 -244
- phoenix/experiments/evaluators/utils.py +0 -189
- phoenix/experiments/functions.py +0 -616
- phoenix/experiments/tracing.py +0 -85
- phoenix/experiments/types.py +0 -722
- phoenix/experiments/utils.py +0 -9
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +0 -54
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -100
- phoenix/server/api/dataloaders/dataset_example_spans.py +0 -43
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -85
- phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
- phoenix/server/api/dataloaders/experiment_run_counts.py +0 -42
- phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -49
- phoenix/server/api/dataloaders/project_by_name.py +0 -31
- phoenix/server/api/dataloaders/span_projects.py +0 -33
- phoenix/server/api/dataloaders/trace_row_ids.py +0 -39
- phoenix/server/api/helpers/dataset_helpers.py +0 -179
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -16
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -14
- phoenix/server/api/input_types/ClearProjectInput.py +0 -15
- phoenix/server/api/input_types/CreateDatasetInput.py +0 -12
- phoenix/server/api/input_types/DatasetExampleInput.py +0 -14
- phoenix/server/api/input_types/DatasetSort.py +0 -17
- phoenix/server/api/input_types/DatasetVersionSort.py +0 -16
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -13
- phoenix/server/api/input_types/DeleteDatasetInput.py +0 -7
- phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -9
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -35
- phoenix/server/api/input_types/PatchDatasetInput.py +0 -14
- phoenix/server/api/mutations/__init__.py +0 -13
- phoenix/server/api/mutations/auth.py +0 -11
- phoenix/server/api/mutations/dataset_mutations.py +0 -520
- phoenix/server/api/mutations/experiment_mutations.py +0 -65
- phoenix/server/api/mutations/project_mutations.py +0 -47
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +0 -6
- phoenix/server/api/openapi/schema.py +0 -16
- phoenix/server/api/queries.py +0 -503
- phoenix/server/api/routers/v1/dataset_examples.py +0 -178
- phoenix/server/api/routers/v1/datasets.py +0 -965
- phoenix/server/api/routers/v1/experiment_evaluations.py +0 -65
- phoenix/server/api/routers/v1/experiment_runs.py +0 -96
- phoenix/server/api/routers/v1/experiments.py +0 -174
- phoenix/server/api/types/AnnotatorKind.py +0 -10
- phoenix/server/api/types/CreateDatasetPayload.py +0 -8
- phoenix/server/api/types/DatasetExample.py +0 -85
- phoenix/server/api/types/DatasetExampleRevision.py +0 -34
- phoenix/server/api/types/DatasetVersion.py +0 -14
- phoenix/server/api/types/ExampleRevisionInterface.py +0 -14
- phoenix/server/api/types/Experiment.py +0 -147
- phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -13
- phoenix/server/api/types/ExperimentComparison.py +0 -19
- phoenix/server/api/types/ExperimentRun.py +0 -91
- phoenix/server/api/types/ExperimentRunAnnotation.py +0 -57
- phoenix/server/api/types/Inferences.py +0 -80
- phoenix/server/api/types/InferencesRole.py +0 -23
- phoenix/utilities/json.py +0 -61
- phoenix/utilities/re.py +0 -50
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers/__init__.py → helpers.py} +0 -0
|
@@ -8,7 +8,6 @@ import numpy.typing as npt
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import strawberry
|
|
10
10
|
from strawberry import UNSET
|
|
11
|
-
from strawberry.relay import GlobalID, Node, NodeID
|
|
12
11
|
from strawberry.scalars import ID
|
|
13
12
|
from strawberry.types import Info
|
|
14
13
|
from typing_extensions import Annotated
|
|
@@ -23,7 +22,7 @@ from phoenix.core.model_schema import (
|
|
|
23
22
|
PRIMARY,
|
|
24
23
|
PROMPT,
|
|
25
24
|
REFERENCE,
|
|
26
|
-
|
|
25
|
+
Dataset,
|
|
27
26
|
)
|
|
28
27
|
from phoenix.metrics.timeseries import row_interval_from_sorted_time_index
|
|
29
28
|
from phoenix.pointcloud.clustering import Hdbscan
|
|
@@ -32,7 +31,7 @@ from phoenix.pointcloud.projectors import Umap
|
|
|
32
31
|
from phoenix.server.api.context import Context
|
|
33
32
|
from phoenix.server.api.input_types.TimeRange import TimeRange
|
|
34
33
|
from phoenix.server.api.types.Cluster import to_gql_clusters
|
|
35
|
-
from phoenix.server.api.types.
|
|
34
|
+
from phoenix.server.api.types.DatasetRole import AncillaryDatasetRole, DatasetRole
|
|
36
35
|
from phoenix.server.api.types.VectorDriftMetricEnum import VectorDriftMetric
|
|
37
36
|
|
|
38
37
|
from ..input_types.Granularity import Granularity
|
|
@@ -40,6 +39,7 @@ from .DataQualityMetric import DataQualityMetric
|
|
|
40
39
|
from .EmbeddingMetadata import EmbeddingMetadata
|
|
41
40
|
from .Event import create_event_id, unpack_event_id
|
|
42
41
|
from .EventMetadata import EventMetadata
|
|
42
|
+
from .node import GlobalID, Node
|
|
43
43
|
from .Retrieval import Retrieval
|
|
44
44
|
from .TimeSeries import (
|
|
45
45
|
DataQualityTimeSeries,
|
|
@@ -70,7 +70,6 @@ CORPUS = "CORPUS"
|
|
|
70
70
|
class EmbeddingDimension(Node):
|
|
71
71
|
"""A embedding dimension of a model. Represents unstructured data"""
|
|
72
72
|
|
|
73
|
-
id_attr: NodeID[int]
|
|
74
73
|
name: str
|
|
75
74
|
dimension: strawberry.Private[ms.EmbeddingDimension]
|
|
76
75
|
|
|
@@ -156,16 +155,16 @@ class EmbeddingDimension(Node):
|
|
|
156
155
|
metric: DataQualityMetric,
|
|
157
156
|
time_range: TimeRange,
|
|
158
157
|
granularity: Granularity,
|
|
159
|
-
|
|
160
|
-
Optional[
|
|
158
|
+
dataset_role: Annotated[
|
|
159
|
+
Optional[DatasetRole],
|
|
161
160
|
strawberry.argument(
|
|
162
161
|
description="The dataset (primary or reference) to query",
|
|
163
162
|
),
|
|
164
|
-
] =
|
|
163
|
+
] = DatasetRole.primary,
|
|
165
164
|
) -> DataQualityTimeSeries:
|
|
166
|
-
if not isinstance(
|
|
167
|
-
|
|
168
|
-
dataset = info.context.model[
|
|
165
|
+
if not isinstance(dataset_role, DatasetRole):
|
|
166
|
+
dataset_role = DatasetRole.primary
|
|
167
|
+
dataset = info.context.model[dataset_role.value]
|
|
169
168
|
time_range, granularity = ensure_timeseries_parameters(
|
|
170
169
|
dataset,
|
|
171
170
|
time_range,
|
|
@@ -177,7 +176,7 @@ class EmbeddingDimension(Node):
|
|
|
177
176
|
metric,
|
|
178
177
|
time_range,
|
|
179
178
|
granularity,
|
|
180
|
-
|
|
179
|
+
dataset_role,
|
|
181
180
|
)
|
|
182
181
|
)
|
|
183
182
|
|
|
@@ -315,16 +314,16 @@ class EmbeddingDimension(Node):
|
|
|
315
314
|
model = info.context.model
|
|
316
315
|
data: Dict[ID, npt.NDArray[np.float64]] = {}
|
|
317
316
|
retrievals: List[Tuple[ID, Any, Any]] = []
|
|
318
|
-
for
|
|
319
|
-
|
|
320
|
-
row_id_start, row_id_stop = 0, len(
|
|
321
|
-
if
|
|
317
|
+
for dataset in model[Dataset]:
|
|
318
|
+
dataset_id = dataset.role
|
|
319
|
+
row_id_start, row_id_stop = 0, len(dataset)
|
|
320
|
+
if dataset_id is PRIMARY:
|
|
322
321
|
row_id_start, row_id_stop = row_interval_from_sorted_time_index(
|
|
323
|
-
time_index=cast(pd.DatetimeIndex,
|
|
322
|
+
time_index=cast(pd.DatetimeIndex, dataset.index),
|
|
324
323
|
time_start=time_range.start,
|
|
325
324
|
time_stop=time_range.end,
|
|
326
325
|
)
|
|
327
|
-
vector_column = self.dimension[
|
|
326
|
+
vector_column = self.dimension[dataset_id]
|
|
328
327
|
samples_collected = 0
|
|
329
328
|
for row_id in _row_indices(
|
|
330
329
|
row_id_start,
|
|
@@ -338,7 +337,7 @@ class EmbeddingDimension(Node):
|
|
|
338
337
|
# of dunder method __len__.
|
|
339
338
|
if not hasattr(embedding_vector, "__len__"):
|
|
340
339
|
continue
|
|
341
|
-
event_id = create_event_id(row_id,
|
|
340
|
+
event_id = create_event_id(row_id, dataset_id)
|
|
342
341
|
data[event_id] = embedding_vector
|
|
343
342
|
samples_collected += 1
|
|
344
343
|
if isinstance(
|
|
@@ -348,8 +347,8 @@ class EmbeddingDimension(Node):
|
|
|
348
347
|
retrievals.append(
|
|
349
348
|
(
|
|
350
349
|
event_id,
|
|
351
|
-
self.dimension.context_retrieval_ids(
|
|
352
|
-
self.dimension.context_retrieval_scores(
|
|
350
|
+
self.dimension.context_retrieval_ids(dataset).iloc[row_id],
|
|
351
|
+
self.dimension.context_retrieval_scores(dataset).iloc[row_id],
|
|
353
352
|
)
|
|
354
353
|
)
|
|
355
354
|
|
|
@@ -358,13 +357,13 @@ class EmbeddingDimension(Node):
|
|
|
358
357
|
self.dimension,
|
|
359
358
|
ms.RetrievalEmbeddingDimension,
|
|
360
359
|
) and (corpus := info.context.corpus):
|
|
361
|
-
|
|
362
|
-
for row_id, document_embedding_vector in enumerate(
|
|
360
|
+
corpus_dataset = corpus[PRIMARY]
|
|
361
|
+
for row_id, document_embedding_vector in enumerate(corpus_dataset[PROMPT]):
|
|
363
362
|
if not hasattr(document_embedding_vector, "__len__"):
|
|
364
363
|
continue
|
|
365
|
-
event_id = create_event_id(row_id,
|
|
364
|
+
event_id = create_event_id(row_id, AncillaryDatasetRole.corpus)
|
|
366
365
|
data[event_id] = document_embedding_vector
|
|
367
|
-
corpus_primary_key =
|
|
366
|
+
corpus_primary_key = corpus_dataset.primary_key
|
|
368
367
|
for event_id, retrieval_ids, retrieval_scores in retrievals:
|
|
369
368
|
if not isinstance(retrieval_ids, Iterable):
|
|
370
369
|
continue
|
|
@@ -386,7 +385,7 @@ class EmbeddingDimension(Node):
|
|
|
386
385
|
)
|
|
387
386
|
except KeyError:
|
|
388
387
|
continue
|
|
389
|
-
document_embedding_vector =
|
|
388
|
+
document_embedding_vector = corpus_dataset[PROMPT].iloc[document_row_id]
|
|
390
389
|
if not hasattr(document_embedding_vector, "__len__"):
|
|
391
390
|
continue
|
|
392
391
|
context_retrievals.append(
|
|
@@ -394,7 +393,7 @@ class EmbeddingDimension(Node):
|
|
|
394
393
|
query_id=event_id,
|
|
395
394
|
document_id=create_event_id(
|
|
396
395
|
document_row_id,
|
|
397
|
-
|
|
396
|
+
AncillaryDatasetRole.corpus,
|
|
398
397
|
),
|
|
399
398
|
relevance=document_score,
|
|
400
399
|
)
|
|
@@ -414,13 +413,11 @@ class EmbeddingDimension(Node):
|
|
|
414
413
|
),
|
|
415
414
|
).generate(data, n_components=n_components)
|
|
416
415
|
|
|
417
|
-
points: Dict[Union[
|
|
418
|
-
list
|
|
419
|
-
)
|
|
416
|
+
points: Dict[Union[DatasetRole, AncillaryDatasetRole], List[UMAPPoint]] = defaultdict(list)
|
|
420
417
|
for event_id, vector in vectors.items():
|
|
421
|
-
row_id,
|
|
422
|
-
if isinstance(
|
|
423
|
-
dataset = model[
|
|
418
|
+
row_id, dataset_role = unpack_event_id(event_id)
|
|
419
|
+
if isinstance(dataset_role, DatasetRole):
|
|
420
|
+
dataset = model[dataset_role.value]
|
|
424
421
|
embedding_metadata = EmbeddingMetadata(
|
|
425
422
|
prediction_id=dataset[PREDICTION_ID][row_id],
|
|
426
423
|
link_to_data=dataset[self.dimension.link_to_data][row_id],
|
|
@@ -436,12 +433,9 @@ class EmbeddingDimension(Node):
|
|
|
436
433
|
)
|
|
437
434
|
else:
|
|
438
435
|
continue
|
|
439
|
-
points[
|
|
436
|
+
points[dataset_role].append(
|
|
440
437
|
UMAPPoint(
|
|
441
|
-
id=GlobalID(
|
|
442
|
-
type_name=f"{type(self).__name__}:{str(inferences_role)}",
|
|
443
|
-
node_id=str(row_id),
|
|
444
|
-
),
|
|
438
|
+
id=GlobalID(f"{type(self).__name__}:{str(dataset_role)}", row_id),
|
|
445
439
|
event_id=event_id,
|
|
446
440
|
coordinates=to_gql_coordinates(vector),
|
|
447
441
|
event_metadata=EventMetadata(
|
|
@@ -455,12 +449,12 @@ class EmbeddingDimension(Node):
|
|
|
455
449
|
)
|
|
456
450
|
|
|
457
451
|
return UMAPPoints(
|
|
458
|
-
data=points[
|
|
459
|
-
reference_data=points[
|
|
452
|
+
data=points[DatasetRole.primary],
|
|
453
|
+
reference_data=points[DatasetRole.reference],
|
|
460
454
|
clusters=to_gql_clusters(
|
|
461
455
|
clustered_events=clustered_events,
|
|
462
456
|
),
|
|
463
|
-
corpus_data=points[
|
|
457
|
+
corpus_data=points[AncillaryDatasetRole.corpus],
|
|
464
458
|
context_retrievals=context_retrievals,
|
|
465
459
|
)
|
|
466
460
|
|
|
@@ -17,10 +17,10 @@ from phoenix.core.model_schema import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
from ..interceptor import GqlValueMediator
|
|
20
|
+
from .DatasetRole import STR_TO_DATASET_ROLE, AncillaryDatasetRole, DatasetRole
|
|
20
21
|
from .Dimension import Dimension
|
|
21
22
|
from .DimensionWithValue import DimensionWithValue
|
|
22
23
|
from .EventMetadata import EventMetadata
|
|
23
|
-
from .InferencesRole import STR_TO_INFEREENCES_ROLE, AncillaryInferencesRole, InferencesRole
|
|
24
24
|
from .PromptResponse import PromptResponse
|
|
25
25
|
|
|
26
26
|
|
|
@@ -41,35 +41,35 @@ class Event:
|
|
|
41
41
|
|
|
42
42
|
def create_event_id(
|
|
43
43
|
row_id: int,
|
|
44
|
-
|
|
44
|
+
dataset_role: Union[DatasetRole, AncillaryDatasetRole, ms.DatasetRole],
|
|
45
45
|
) -> ID:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if isinstance(
|
|
49
|
-
else
|
|
46
|
+
dataset_role_str = (
|
|
47
|
+
dataset_role.value
|
|
48
|
+
if isinstance(dataset_role, (DatasetRole, AncillaryDatasetRole))
|
|
49
|
+
else dataset_role
|
|
50
50
|
)
|
|
51
|
-
return ID(f"{row_id}:{
|
|
51
|
+
return ID(f"{row_id}:{dataset_role_str}")
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def unpack_event_id(
|
|
55
55
|
event_id: ID,
|
|
56
|
-
) -> Tuple[int, Union[
|
|
57
|
-
row_id_str,
|
|
56
|
+
) -> Tuple[int, Union[DatasetRole, AncillaryDatasetRole]]:
|
|
57
|
+
row_id_str, dataset_role_str = str(event_id).split(":")
|
|
58
58
|
row_id = int(row_id_str)
|
|
59
|
-
|
|
60
|
-
return row_id,
|
|
59
|
+
dataset_role = STR_TO_DATASET_ROLE[dataset_role_str]
|
|
60
|
+
return row_id, dataset_role
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def
|
|
63
|
+
def parse_event_ids_by_dataset_role(
|
|
64
64
|
event_ids: List[ID],
|
|
65
|
-
) -> Dict[Union[
|
|
65
|
+
) -> Dict[Union[DatasetRole, AncillaryDatasetRole], List[int]]:
|
|
66
66
|
"""
|
|
67
67
|
Parses event IDs and returns the corresponding row indexes.
|
|
68
68
|
"""
|
|
69
|
-
row_indexes: Dict[Union[
|
|
69
|
+
row_indexes: Dict[Union[DatasetRole, AncillaryDatasetRole], List[int]] = defaultdict(list)
|
|
70
70
|
for event_id in event_ids:
|
|
71
|
-
row_id,
|
|
72
|
-
row_indexes[
|
|
71
|
+
row_id, dataset_role = unpack_event_id(event_id)
|
|
72
|
+
row_indexes[dataset_role].append(row_id)
|
|
73
73
|
return row_indexes
|
|
74
74
|
|
|
75
75
|
|
|
@@ -10,16 +10,14 @@ from strawberry.types import Info
|
|
|
10
10
|
import phoenix.core.model_schema as ms
|
|
11
11
|
from phoenix.server.api.context import Context
|
|
12
12
|
from phoenix.server.api.input_types.ClusterInput import ClusterInput
|
|
13
|
-
from phoenix.server.api.
|
|
14
|
-
from phoenix.server.api.types.Event import
|
|
13
|
+
from phoenix.server.api.types.DatasetRole import AncillaryDatasetRole, DatasetRole
|
|
14
|
+
from phoenix.server.api.types.Event import parse_event_ids_by_dataset_role, unpack_event_id
|
|
15
15
|
from phoenix.server.api.types.ExportedFile import ExportedFile
|
|
16
|
-
from phoenix.server.api.types.InferencesRole import AncillaryInferencesRole, InferencesRole
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
@strawberry.type
|
|
20
|
-
class
|
|
19
|
+
class ExportEventsMutation:
|
|
21
20
|
@strawberry.mutation(
|
|
22
|
-
permission_classes=[IsAuthenticated],
|
|
23
21
|
description=(
|
|
24
22
|
"Given a list of event ids, export the corresponding data subset in Parquet format."
|
|
25
23
|
" File name is optional, but if specified, should be without file extension. By default"
|
|
@@ -34,11 +32,11 @@ class ExportEventsMutationMixin:
|
|
|
34
32
|
) -> ExportedFile:
|
|
35
33
|
if not isinstance(file_name, str):
|
|
36
34
|
file_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
37
|
-
row_ids =
|
|
35
|
+
row_ids = parse_event_ids_by_dataset_role(event_ids)
|
|
38
36
|
exclude_corpus_row_ids = {}
|
|
39
|
-
for
|
|
40
|
-
if isinstance(
|
|
41
|
-
exclude_corpus_row_ids[
|
|
37
|
+
for dataset_role in list(row_ids.keys()):
|
|
38
|
+
if isinstance(dataset_role, DatasetRole):
|
|
39
|
+
exclude_corpus_row_ids[dataset_role.value] = row_ids[dataset_role]
|
|
42
40
|
path = info.context.export_path
|
|
43
41
|
with open(path / (file_name + ".parquet"), "wb") as fd:
|
|
44
42
|
loop = asyncio.get_running_loop()
|
|
@@ -51,7 +49,6 @@ class ExportEventsMutationMixin:
|
|
|
51
49
|
return ExportedFile(file_name=file_name)
|
|
52
50
|
|
|
53
51
|
@strawberry.mutation(
|
|
54
|
-
permission_classes=[IsAuthenticated],
|
|
55
52
|
description=(
|
|
56
53
|
"Given a list of clusters, export the corresponding data subset in Parquet format."
|
|
57
54
|
" File name is optional, but if specified, should be without file extension. By default"
|
|
@@ -82,13 +79,13 @@ class ExportEventsMutationMixin:
|
|
|
82
79
|
|
|
83
80
|
def _unpack_clusters(
|
|
84
81
|
clusters: List[ClusterInput],
|
|
85
|
-
) -> Tuple[Dict[ms.
|
|
86
|
-
row_numbers: Dict[ms.
|
|
87
|
-
cluster_ids: Dict[ms.
|
|
82
|
+
) -> Tuple[Dict[ms.DatasetRole, List[int]], Dict[ms.DatasetRole, Dict[int, str]]]:
|
|
83
|
+
row_numbers: Dict[ms.DatasetRole, List[int]] = defaultdict(list)
|
|
84
|
+
cluster_ids: Dict[ms.DatasetRole, Dict[int, str]] = defaultdict(dict)
|
|
88
85
|
for i, cluster in enumerate(clusters):
|
|
89
|
-
for row_number,
|
|
90
|
-
if isinstance(
|
|
86
|
+
for row_number, dataset_role in map(unpack_event_id, cluster.event_ids):
|
|
87
|
+
if isinstance(dataset_role, AncillaryDatasetRole):
|
|
91
88
|
continue
|
|
92
|
-
row_numbers[
|
|
93
|
-
cluster_ids[
|
|
89
|
+
row_numbers[dataset_role.value].append(row_number)
|
|
90
|
+
cluster_ids[dataset_role.value][row_number] = cluster.id or str(i)
|
|
94
91
|
return row_numbers, cluster_ids
|
|
@@ -2,7 +2,6 @@ import asyncio
|
|
|
2
2
|
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
import strawberry
|
|
5
|
-
from strawberry.relay import Connection
|
|
6
5
|
from strawberry.types import Info
|
|
7
6
|
from strawberry.unset import UNSET
|
|
8
7
|
from typing_extensions import Annotated
|
|
@@ -15,12 +14,12 @@ from ..input_types.DimensionFilter import DimensionFilter
|
|
|
15
14
|
from ..input_types.Granularity import Granularity
|
|
16
15
|
from ..input_types.PerformanceMetricInput import PerformanceMetricInput
|
|
17
16
|
from ..input_types.TimeRange import TimeRange
|
|
17
|
+
from .Dataset import Dataset
|
|
18
|
+
from .DatasetRole import AncillaryDatasetRole, DatasetRole
|
|
18
19
|
from .Dimension import Dimension, to_gql_dimension
|
|
19
20
|
from .EmbeddingDimension import EmbeddingDimension, to_gql_embedding_dimension
|
|
20
21
|
from .ExportedFile import ExportedFile
|
|
21
|
-
from .
|
|
22
|
-
from .InferencesRole import AncillaryInferencesRole, InferencesRole
|
|
23
|
-
from .pagination import ConnectionArgs, CursorString, connection_from_list
|
|
22
|
+
from .pagination import Connection, ConnectionArgs, CursorString, connection_from_list
|
|
24
23
|
from .TimeSeries import (
|
|
25
24
|
PerformanceTimeSeries,
|
|
26
25
|
ensure_timeseries_parameters,
|
|
@@ -58,45 +57,45 @@ class Model:
|
|
|
58
57
|
)
|
|
59
58
|
|
|
60
59
|
@strawberry.field
|
|
61
|
-
def
|
|
62
|
-
|
|
63
|
-
start, stop =
|
|
64
|
-
return
|
|
60
|
+
def primary_dataset(self, info: Info[Context, None]) -> Dataset:
|
|
61
|
+
dataset = info.context.model[PRIMARY]
|
|
62
|
+
start, stop = dataset.time_range
|
|
63
|
+
return Dataset(
|
|
65
64
|
start_time=start,
|
|
66
65
|
end_time=stop,
|
|
67
|
-
record_count=len(
|
|
68
|
-
|
|
69
|
-
|
|
66
|
+
record_count=len(dataset),
|
|
67
|
+
dataset=dataset,
|
|
68
|
+
dataset_role=DatasetRole.primary,
|
|
70
69
|
model=info.context.model,
|
|
71
70
|
)
|
|
72
71
|
|
|
73
72
|
@strawberry.field
|
|
74
|
-
def
|
|
75
|
-
if (
|
|
73
|
+
def reference_dataset(self, info: Info[Context, None]) -> Optional[Dataset]:
|
|
74
|
+
if (dataset := info.context.model[REFERENCE]).empty:
|
|
76
75
|
return None
|
|
77
|
-
start, stop =
|
|
78
|
-
return
|
|
76
|
+
start, stop = dataset.time_range
|
|
77
|
+
return Dataset(
|
|
79
78
|
start_time=start,
|
|
80
79
|
end_time=stop,
|
|
81
|
-
record_count=len(
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
record_count=len(dataset),
|
|
81
|
+
dataset=dataset,
|
|
82
|
+
dataset_role=DatasetRole.reference,
|
|
84
83
|
model=info.context.model,
|
|
85
84
|
)
|
|
86
85
|
|
|
87
86
|
@strawberry.field
|
|
88
|
-
def
|
|
87
|
+
def corpus_dataset(self, info: Info[Context, None]) -> Optional[Dataset]:
|
|
89
88
|
if info.context.corpus is None:
|
|
90
89
|
return None
|
|
91
|
-
if (
|
|
90
|
+
if (dataset := info.context.corpus[PRIMARY]).empty:
|
|
92
91
|
return None
|
|
93
|
-
start, stop =
|
|
94
|
-
return
|
|
92
|
+
start, stop = dataset.time_range
|
|
93
|
+
return Dataset(
|
|
95
94
|
start_time=start,
|
|
96
95
|
end_time=stop,
|
|
97
|
-
record_count=len(
|
|
98
|
-
|
|
99
|
-
|
|
96
|
+
record_count=len(dataset),
|
|
97
|
+
dataset=dataset,
|
|
98
|
+
dataset_role=AncillaryDatasetRole.corpus,
|
|
100
99
|
model=info.context.corpus,
|
|
101
100
|
)
|
|
102
101
|
|
|
@@ -157,24 +156,24 @@ class Model:
|
|
|
157
156
|
info: Info[Context, None],
|
|
158
157
|
metric: PerformanceMetricInput,
|
|
159
158
|
time_range: Optional[TimeRange] = UNSET,
|
|
160
|
-
|
|
161
|
-
Optional[
|
|
159
|
+
dataset_role: Annotated[
|
|
160
|
+
Optional[DatasetRole],
|
|
162
161
|
strawberry.argument(
|
|
163
|
-
description="The
|
|
162
|
+
description="The dataset (primary or reference) to query",
|
|
164
163
|
),
|
|
165
|
-
] =
|
|
164
|
+
] = DatasetRole.primary,
|
|
166
165
|
) -> Optional[float]:
|
|
167
|
-
if not isinstance(
|
|
168
|
-
|
|
166
|
+
if not isinstance(dataset_role, DatasetRole):
|
|
167
|
+
dataset_role = DatasetRole.primary
|
|
169
168
|
model = info.context.model
|
|
170
|
-
|
|
169
|
+
dataset = model[dataset_role.value]
|
|
171
170
|
time_range, granularity = ensure_timeseries_parameters(
|
|
172
|
-
|
|
171
|
+
dataset,
|
|
173
172
|
time_range,
|
|
174
173
|
)
|
|
175
174
|
metric_instance = metric.metric_instance(model)
|
|
176
175
|
data = get_timeseries_data(
|
|
177
|
-
|
|
176
|
+
dataset,
|
|
178
177
|
metric_instance,
|
|
179
178
|
time_range,
|
|
180
179
|
granularity,
|
|
@@ -195,26 +194,26 @@ class Model:
|
|
|
195
194
|
metric: PerformanceMetricInput,
|
|
196
195
|
time_range: TimeRange,
|
|
197
196
|
granularity: Granularity,
|
|
198
|
-
|
|
199
|
-
Optional[
|
|
197
|
+
dataset_role: Annotated[
|
|
198
|
+
Optional[DatasetRole],
|
|
200
199
|
strawberry.argument(
|
|
201
|
-
description="The
|
|
200
|
+
description="The dataset (primary or reference) to query",
|
|
202
201
|
),
|
|
203
|
-
] =
|
|
202
|
+
] = DatasetRole.primary,
|
|
204
203
|
) -> PerformanceTimeSeries:
|
|
205
|
-
if not isinstance(
|
|
206
|
-
|
|
204
|
+
if not isinstance(dataset_role, DatasetRole):
|
|
205
|
+
dataset_role = DatasetRole.primary
|
|
207
206
|
model = info.context.model
|
|
208
|
-
|
|
207
|
+
dataset = model[dataset_role.value]
|
|
209
208
|
time_range, granularity = ensure_timeseries_parameters(
|
|
210
|
-
|
|
209
|
+
dataset,
|
|
211
210
|
time_range,
|
|
212
211
|
granularity,
|
|
213
212
|
)
|
|
214
213
|
metric_instance = metric.metric_instance(model)
|
|
215
214
|
return PerformanceTimeSeries(
|
|
216
215
|
data=get_timeseries_data(
|
|
217
|
-
|
|
216
|
+
dataset,
|
|
218
217
|
metric_instance,
|
|
219
218
|
time_range,
|
|
220
219
|
granularity,
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
import operator
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import
|
|
4
|
-
Any,
|
|
5
|
-
List,
|
|
6
|
-
Optional,
|
|
7
|
-
)
|
|
3
|
+
from typing import Any, List, Optional
|
|
8
4
|
|
|
9
5
|
import strawberry
|
|
10
6
|
from aioitertools.itertools import islice
|
|
@@ -12,7 +8,6 @@ from sqlalchemy import and_, desc, distinct, select
|
|
|
12
8
|
from sqlalchemy.orm import contains_eager
|
|
13
9
|
from sqlalchemy.sql.expression import tuple_
|
|
14
10
|
from strawberry import ID, UNSET
|
|
15
|
-
from strawberry.relay import Connection, Node, NodeID
|
|
16
11
|
from strawberry.types import Info
|
|
17
12
|
|
|
18
13
|
from phoenix.datetime_utils import right_open_time_range
|
|
@@ -22,11 +17,13 @@ from phoenix.server.api.input_types.SpanSort import SpanSort, SpanSortConfig
|
|
|
22
17
|
from phoenix.server.api.input_types.TimeRange import TimeRange
|
|
23
18
|
from phoenix.server.api.types.DocumentEvaluationSummary import DocumentEvaluationSummary
|
|
24
19
|
from phoenix.server.api.types.EvaluationSummary import EvaluationSummary
|
|
20
|
+
from phoenix.server.api.types.node import Node
|
|
25
21
|
from phoenix.server.api.types.pagination import (
|
|
22
|
+
Connection,
|
|
26
23
|
Cursor,
|
|
27
24
|
CursorSortColumn,
|
|
28
25
|
CursorString,
|
|
29
|
-
|
|
26
|
+
connections,
|
|
30
27
|
)
|
|
31
28
|
from phoenix.server.api.types.SortDir import SortDir
|
|
32
29
|
from phoenix.server.api.types.Span import Span, to_gql_span
|
|
@@ -34,10 +31,11 @@ from phoenix.server.api.types.Trace import Trace
|
|
|
34
31
|
from phoenix.server.api.types.ValidationResult import ValidationResult
|
|
35
32
|
from phoenix.trace.dsl import SpanFilter
|
|
36
33
|
|
|
34
|
+
SPANS_LIMIT = 1000
|
|
35
|
+
|
|
37
36
|
|
|
38
37
|
@strawberry.type
|
|
39
38
|
class Project(Node):
|
|
40
|
-
id_attr: NodeID[int]
|
|
41
39
|
name: str
|
|
42
40
|
gradient_start_color: str
|
|
43
41
|
gradient_end_color: str
|
|
@@ -151,7 +149,7 @@ class Project(Node):
|
|
|
151
149
|
async with info.context.db() as session:
|
|
152
150
|
if (id_attr := await session.scalar(stmt)) is None:
|
|
153
151
|
return None
|
|
154
|
-
return Trace(id_attr=id_attr
|
|
152
|
+
return Trace(id_attr=id_attr)
|
|
155
153
|
|
|
156
154
|
@strawberry.field
|
|
157
155
|
async def spans(
|
|
@@ -170,7 +168,7 @@ class Project(Node):
|
|
|
170
168
|
select(models.Span)
|
|
171
169
|
.join(models.Trace)
|
|
172
170
|
.where(models.Trace.project_rowid == self.id_attr)
|
|
173
|
-
.options(contains_eager(models.Span.trace)
|
|
171
|
+
.options(contains_eager(models.Span.trace))
|
|
174
172
|
)
|
|
175
173
|
if time_range:
|
|
176
174
|
stmt = stmt.where(
|
|
@@ -215,7 +213,7 @@ class Project(Node):
|
|
|
215
213
|
first + 1 # overfetch by one to determine whether there's a next page
|
|
216
214
|
)
|
|
217
215
|
stmt = stmt.order_by(cursor_rowid_column)
|
|
218
|
-
|
|
216
|
+
data = []
|
|
219
217
|
async with info.context.db() as session:
|
|
220
218
|
span_records = await session.execute(stmt)
|
|
221
219
|
async for span_record in islice(span_records, first):
|
|
@@ -232,15 +230,15 @@ class Project(Node):
|
|
|
232
230
|
else None
|
|
233
231
|
),
|
|
234
232
|
)
|
|
235
|
-
|
|
233
|
+
data.append((cursor, to_gql_span(span)))
|
|
236
234
|
has_next_page = True
|
|
237
235
|
try:
|
|
238
236
|
next(span_records)
|
|
239
237
|
except StopIteration:
|
|
240
238
|
has_next_page = False
|
|
241
239
|
|
|
242
|
-
return
|
|
243
|
-
|
|
240
|
+
return connections(
|
|
241
|
+
data,
|
|
244
242
|
has_previous_page=False,
|
|
245
243
|
has_next_page=has_next_page,
|
|
246
244
|
)
|
|
@@ -357,15 +355,3 @@ class Project(Node):
|
|
|
357
355
|
is_valid=False,
|
|
358
356
|
error_message=e.msg,
|
|
359
357
|
)
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
def to_gql_project(project: models.Project) -> Project:
|
|
363
|
-
"""
|
|
364
|
-
Converts an ORM project to a GraphQL Project.
|
|
365
|
-
"""
|
|
366
|
-
return Project(
|
|
367
|
-
id_attr=project.id,
|
|
368
|
-
name=project.name,
|
|
369
|
-
gradient_start_color=project.gradient_start_color,
|
|
370
|
-
gradient_end_color=project.gradient_end_color,
|
|
371
|
-
)
|