arize-phoenix 4.5.0__py3-none-any.whl → 4.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (123) hide show
  1. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/METADATA +16 -8
  2. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/RECORD +122 -58
  3. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/WHEEL +1 -1
  4. phoenix/__init__.py +0 -27
  5. phoenix/config.py +42 -7
  6. phoenix/core/model.py +25 -25
  7. phoenix/core/model_schema.py +64 -62
  8. phoenix/core/model_schema_adapter.py +27 -25
  9. phoenix/datetime_utils.py +4 -0
  10. phoenix/db/bulk_inserter.py +54 -14
  11. phoenix/db/insertion/dataset.py +237 -0
  12. phoenix/db/insertion/evaluation.py +10 -10
  13. phoenix/db/insertion/helpers.py +17 -14
  14. phoenix/db/insertion/span.py +3 -3
  15. phoenix/db/migrations/types.py +29 -0
  16. phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
  17. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
  18. phoenix/db/models.py +236 -4
  19. phoenix/experiments/__init__.py +6 -0
  20. phoenix/experiments/evaluators/__init__.py +29 -0
  21. phoenix/experiments/evaluators/base.py +153 -0
  22. phoenix/experiments/evaluators/code_evaluators.py +99 -0
  23. phoenix/experiments/evaluators/llm_evaluators.py +244 -0
  24. phoenix/experiments/evaluators/utils.py +186 -0
  25. phoenix/experiments/functions.py +757 -0
  26. phoenix/experiments/tracing.py +85 -0
  27. phoenix/experiments/types.py +753 -0
  28. phoenix/experiments/utils.py +24 -0
  29. phoenix/inferences/fixtures.py +23 -23
  30. phoenix/inferences/inferences.py +7 -7
  31. phoenix/inferences/validation.py +1 -1
  32. phoenix/server/api/context.py +20 -0
  33. phoenix/server/api/dataloaders/__init__.py +20 -0
  34. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  35. phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
  36. phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
  37. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
  38. phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
  39. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  40. phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
  41. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  42. phoenix/server/api/dataloaders/span_descendants.py +2 -3
  43. phoenix/server/api/dataloaders/span_projects.py +33 -0
  44. phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
  45. phoenix/server/api/helpers/dataset_helpers.py +179 -0
  46. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  47. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  48. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  49. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  50. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  51. phoenix/server/api/input_types/DatasetSort.py +17 -0
  52. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  53. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  54. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  55. phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
  56. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  57. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  58. phoenix/server/api/mutations/__init__.py +13 -0
  59. phoenix/server/api/mutations/auth.py +11 -0
  60. phoenix/server/api/mutations/dataset_mutations.py +520 -0
  61. phoenix/server/api/mutations/experiment_mutations.py +65 -0
  62. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
  63. phoenix/server/api/mutations/project_mutations.py +47 -0
  64. phoenix/server/api/openapi/__init__.py +0 -0
  65. phoenix/server/api/openapi/main.py +6 -0
  66. phoenix/server/api/openapi/schema.py +16 -0
  67. phoenix/server/api/queries.py +503 -0
  68. phoenix/server/api/routers/v1/__init__.py +77 -2
  69. phoenix/server/api/routers/v1/dataset_examples.py +178 -0
  70. phoenix/server/api/routers/v1/datasets.py +965 -0
  71. phoenix/server/api/routers/v1/evaluations.py +8 -13
  72. phoenix/server/api/routers/v1/experiment_evaluations.py +143 -0
  73. phoenix/server/api/routers/v1/experiment_runs.py +220 -0
  74. phoenix/server/api/routers/v1/experiments.py +302 -0
  75. phoenix/server/api/routers/v1/spans.py +9 -5
  76. phoenix/server/api/routers/v1/traces.py +1 -4
  77. phoenix/server/api/schema.py +2 -303
  78. phoenix/server/api/types/AnnotatorKind.py +10 -0
  79. phoenix/server/api/types/Cluster.py +19 -19
  80. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  81. phoenix/server/api/types/Dataset.py +282 -63
  82. phoenix/server/api/types/DatasetExample.py +85 -0
  83. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  84. phoenix/server/api/types/DatasetVersion.py +14 -0
  85. phoenix/server/api/types/Dimension.py +30 -29
  86. phoenix/server/api/types/EmbeddingDimension.py +40 -34
  87. phoenix/server/api/types/Event.py +16 -16
  88. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  89. phoenix/server/api/types/Experiment.py +147 -0
  90. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  91. phoenix/server/api/types/ExperimentComparison.py +19 -0
  92. phoenix/server/api/types/ExperimentRun.py +91 -0
  93. phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
  94. phoenix/server/api/types/Inferences.py +80 -0
  95. phoenix/server/api/types/InferencesRole.py +23 -0
  96. phoenix/server/api/types/Model.py +43 -42
  97. phoenix/server/api/types/Project.py +26 -12
  98. phoenix/server/api/types/Span.py +79 -2
  99. phoenix/server/api/types/TimeSeries.py +6 -6
  100. phoenix/server/api/types/Trace.py +15 -4
  101. phoenix/server/api/types/UMAPPoints.py +1 -1
  102. phoenix/server/api/types/node.py +5 -111
  103. phoenix/server/api/types/pagination.py +10 -52
  104. phoenix/server/app.py +103 -49
  105. phoenix/server/main.py +49 -27
  106. phoenix/server/openapi/docs.py +3 -0
  107. phoenix/server/static/index.js +2300 -1294
  108. phoenix/server/templates/index.html +1 -0
  109. phoenix/services.py +15 -15
  110. phoenix/session/client.py +581 -22
  111. phoenix/session/session.py +47 -37
  112. phoenix/trace/exporter.py +14 -9
  113. phoenix/trace/fixtures.py +133 -7
  114. phoenix/trace/schemas.py +1 -2
  115. phoenix/trace/span_evaluations.py +3 -3
  116. phoenix/trace/trace_dataset.py +6 -6
  117. phoenix/utilities/json.py +61 -0
  118. phoenix/utilities/re.py +50 -0
  119. phoenix/version.py +1 -1
  120. phoenix/server/api/types/DatasetRole.py +0 -23
  121. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/IP_NOTICE +0 -0
  122. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/LICENSE +0 -0
  123. /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
@@ -0,0 +1,91 @@
1
+ from datetime import datetime
2
+ from typing import Optional
3
+
4
+ import strawberry
5
+ from sqlalchemy import select
6
+ from strawberry import UNSET
7
+ from strawberry.relay import Connection, GlobalID, Node, NodeID
8
+ from strawberry.scalars import JSON
9
+ from strawberry.types import Info
10
+
11
+ from phoenix.db import models
12
+ from phoenix.server.api.context import Context
13
+ from phoenix.server.api.types.ExperimentRunAnnotation import (
14
+ ExperimentRunAnnotation,
15
+ to_gql_experiment_run_annotation,
16
+ )
17
+ from phoenix.server.api.types.pagination import (
18
+ ConnectionArgs,
19
+ CursorString,
20
+ connection_from_list,
21
+ )
22
+ from phoenix.server.api.types.Trace import Trace
23
+
24
+
25
+ @strawberry.type
26
+ class ExperimentRun(Node):
27
+ id_attr: NodeID[int]
28
+ experiment_id: GlobalID
29
+ trace_id: Optional[str]
30
+ output: Optional[JSON]
31
+ start_time: datetime
32
+ end_time: datetime
33
+ error: Optional[str]
34
+
35
+ @strawberry.field
36
+ async def annotations(
37
+ self,
38
+ info: Info[Context, None],
39
+ first: Optional[int] = 50,
40
+ last: Optional[int] = UNSET,
41
+ after: Optional[CursorString] = UNSET,
42
+ before: Optional[CursorString] = UNSET,
43
+ ) -> Connection[ExperimentRunAnnotation]:
44
+ args = ConnectionArgs(
45
+ first=first,
46
+ after=after if isinstance(after, CursorString) else None,
47
+ last=last,
48
+ before=before if isinstance(before, CursorString) else None,
49
+ )
50
+ run_id = self.id_attr
51
+ async with info.context.db() as session:
52
+ annotations = (
53
+ await session.scalars(
54
+ select(models.ExperimentRunAnnotation)
55
+ .where(models.ExperimentRunAnnotation.experiment_run_id == run_id)
56
+ .order_by(models.ExperimentRunAnnotation.name.desc())
57
+ )
58
+ ).all()
59
+ return connection_from_list(
60
+ [to_gql_experiment_run_annotation(annotation) for annotation in annotations], args
61
+ )
62
+
63
+ @strawberry.field
64
+ async def trace(self, info: Info) -> Optional[Trace]:
65
+ if not self.trace_id:
66
+ return None
67
+ dataloader = info.context.data_loaders.trace_row_ids
68
+ if (trace := await dataloader.load(self.trace_id)) is None:
69
+ return None
70
+ trace_rowid, project_rowid = trace
71
+ return Trace(id_attr=trace_rowid, trace_id=self.trace_id, project_rowid=project_rowid)
72
+
73
+
74
+ def to_gql_experiment_run(run: models.ExperimentRun) -> ExperimentRun:
75
+ """
76
+ Converts an ORM experiment run to a GraphQL ExperimentRun.
77
+ """
78
+
79
+ from phoenix.server.api.types.Experiment import Experiment
80
+
81
+ return ExperimentRun(
82
+ id_attr=run.id,
83
+ experiment_id=GlobalID(Experiment.__name__, str(run.experiment_id)),
84
+ trace_id=trace_id
85
+ if (trace := run.trace) and (trace_id := trace.trace_id) is not None
86
+ else None,
87
+ output=run.output.get("task_output"),
88
+ start_time=run.start_time,
89
+ end_time=run.end_time,
90
+ error=run.error,
91
+ )
@@ -0,0 +1,57 @@
1
+ from datetime import datetime
2
+ from typing import Optional
3
+
4
+ import strawberry
5
+ from strawberry import Info
6
+ from strawberry.relay import Node, NodeID
7
+ from strawberry.scalars import JSON
8
+
9
+ from phoenix.db import models
10
+ from phoenix.server.api.types.AnnotatorKind import AnnotatorKind
11
+ from phoenix.server.api.types.Trace import Trace
12
+
13
+
14
+ @strawberry.type
15
+ class ExperimentRunAnnotation(Node):
16
+ id_attr: NodeID[int]
17
+ name: str
18
+ annotator_kind: AnnotatorKind
19
+ label: Optional[str]
20
+ score: Optional[float]
21
+ explanation: Optional[str]
22
+ error: Optional[str]
23
+ metadata: JSON
24
+ start_time: datetime
25
+ end_time: datetime
26
+ trace_id: Optional[str]
27
+
28
+ @strawberry.field
29
+ async def trace(self, info: Info) -> Optional[Trace]:
30
+ if not self.trace_id:
31
+ return None
32
+ dataloader = info.context.data_loaders.trace_row_ids
33
+ if (trace := await dataloader.load(self.trace_id)) is None:
34
+ return None
35
+ trace_row_id, project_row_id = trace
36
+ return Trace(id_attr=trace_row_id, trace_id=self.trace_id, project_rowid=project_row_id)
37
+
38
+
39
+ def to_gql_experiment_run_annotation(
40
+ annotation: models.ExperimentRunAnnotation,
41
+ ) -> ExperimentRunAnnotation:
42
+ """
43
+ Converts an ORM experiment run annotation to a GraphQL ExperimentRunAnnotation.
44
+ """
45
+ return ExperimentRunAnnotation(
46
+ id_attr=annotation.id,
47
+ name=annotation.name,
48
+ annotator_kind=AnnotatorKind(annotation.annotator_kind),
49
+ label=annotation.label,
50
+ score=annotation.score,
51
+ explanation=annotation.explanation,
52
+ error=annotation.error,
53
+ metadata=annotation.metadata_,
54
+ start_time=annotation.start_time,
55
+ end_time=annotation.end_time,
56
+ trace_id=annotation.trace_id,
57
+ )
@@ -0,0 +1,80 @@
1
+ from datetime import datetime
2
+ from typing import Iterable, List, Optional, Set, Union
3
+
4
+ import strawberry
5
+ from strawberry.scalars import ID
6
+ from strawberry.unset import UNSET
7
+
8
+ import phoenix.core.model_schema as ms
9
+ from phoenix.core.model_schema import FEATURE, TAG, ScalarDimension
10
+
11
+ from ..input_types.DimensionInput import DimensionInput
12
+ from .Dimension import Dimension, to_gql_dimension
13
+ from .Event import Event, create_event, create_event_id, parse_event_ids_by_inferences_role
14
+ from .InferencesRole import AncillaryInferencesRole, InferencesRole
15
+
16
+
17
+ @strawberry.type
18
+ class Inferences:
19
+ start_time: datetime = strawberry.field(description="The start bookend of the data")
20
+ end_time: datetime = strawberry.field(description="The end bookend of the data")
21
+ record_count: int = strawberry.field(description="The record count of the data")
22
+ inferences: strawberry.Private[ms.Inferences]
23
+ inferences_role: strawberry.Private[Union[InferencesRole, AncillaryInferencesRole]]
24
+ model: strawberry.Private[ms.Model]
25
+
26
+ # type ignored here to get around the following: https://github.com/strawberry-graphql/strawberry/issues/1929
27
+ @strawberry.field(description="Returns a human friendly name for the inferences.") # type: ignore
28
+ def name(self) -> str:
29
+ return self.inferences.display_name
30
+
31
+ @strawberry.field
32
+ def events(
33
+ self,
34
+ event_ids: List[ID],
35
+ dimensions: Optional[List[DimensionInput]] = UNSET,
36
+ ) -> List[Event]:
37
+ """
38
+ Returns events for specific event IDs and dimensions. If no input
39
+ dimensions are provided, returns all features and tags.
40
+ """
41
+ if not event_ids:
42
+ return []
43
+ row_ids = parse_event_ids_by_inferences_role(event_ids)
44
+ if len(row_ids) > 1 or self.inferences_role not in row_ids:
45
+ raise ValueError("eventIds contains IDs from incorrect inferences.")
46
+ events = self.inferences[row_ids[self.inferences_role]]
47
+ requested_gql_dimensions = _get_requested_features_and_tags(
48
+ core_dimensions=self.model.scalar_dimensions,
49
+ requested_dimension_names=set(dim.name for dim in dimensions)
50
+ if isinstance(dimensions, list)
51
+ else None,
52
+ )
53
+ return [
54
+ create_event(
55
+ event_id=create_event_id(event.id.row_id, self.inferences_role),
56
+ event=event,
57
+ dimensions=requested_gql_dimensions,
58
+ is_document_record=self.inferences_role is AncillaryInferencesRole.corpus,
59
+ )
60
+ for event in events
61
+ ]
62
+
63
+
64
+ def _get_requested_features_and_tags(
65
+ core_dimensions: Iterable[ScalarDimension],
66
+ requested_dimension_names: Optional[Set[str]] = UNSET,
67
+ ) -> List[Dimension]:
68
+ """
69
+ Returns requested features and tags as a list of strawberry Inferences. If no
70
+ dimensions are explicitly requested, returns all features and tags.
71
+ """
72
+ requested_features_and_tags: List[Dimension] = []
73
+ for id, dim in enumerate(core_dimensions):
74
+ is_requested = (
75
+ not isinstance(requested_dimension_names, Set)
76
+ ) or dim.name in requested_dimension_names
77
+ is_feature_or_tag = dim.role in (FEATURE, TAG)
78
+ if is_requested and is_feature_or_tag:
79
+ requested_features_and_tags.append(to_gql_dimension(id_attr=id, dimension=dim))
80
+ return requested_features_and_tags
@@ -0,0 +1,23 @@
1
+ from enum import Enum
2
+ from typing import Dict, Union
3
+
4
+ import strawberry
5
+
6
+ from phoenix.core.model_schema import PRIMARY, REFERENCE
7
+
8
+
9
+ @strawberry.enum
10
+ class InferencesRole(Enum):
11
+ primary = PRIMARY
12
+ reference = REFERENCE
13
+
14
+
15
+ class AncillaryInferencesRole(Enum):
16
+ corpus = "InferencesRole.CORPUS"
17
+
18
+
19
+ STR_TO_INFEREENCES_ROLE: Dict[str, Union[InferencesRole, AncillaryInferencesRole]] = {
20
+ str(InferencesRole.primary.value): InferencesRole.primary,
21
+ str(InferencesRole.reference.value): InferencesRole.reference,
22
+ str(AncillaryInferencesRole.corpus.value): AncillaryInferencesRole.corpus,
23
+ }
@@ -2,6 +2,7 @@ import asyncio
2
2
  from typing import List, Optional
3
3
 
4
4
  import strawberry
5
+ from strawberry.relay import Connection
5
6
  from strawberry.types import Info
6
7
  from strawberry.unset import UNSET
7
8
  from typing_extensions import Annotated
@@ -14,12 +15,12 @@ from ..input_types.DimensionFilter import DimensionFilter
14
15
  from ..input_types.Granularity import Granularity
15
16
  from ..input_types.PerformanceMetricInput import PerformanceMetricInput
16
17
  from ..input_types.TimeRange import TimeRange
17
- from .Dataset import Dataset
18
- from .DatasetRole import AncillaryDatasetRole, DatasetRole
19
18
  from .Dimension import Dimension, to_gql_dimension
20
19
  from .EmbeddingDimension import EmbeddingDimension, to_gql_embedding_dimension
21
20
  from .ExportedFile import ExportedFile
22
- from .pagination import Connection, ConnectionArgs, CursorString, connection_from_list
21
+ from .Inferences import Inferences
22
+ from .InferencesRole import AncillaryInferencesRole, InferencesRole
23
+ from .pagination import ConnectionArgs, CursorString, connection_from_list
23
24
  from .TimeSeries import (
24
25
  PerformanceTimeSeries,
25
26
  ensure_timeseries_parameters,
@@ -57,45 +58,45 @@ class Model:
57
58
  )
58
59
 
59
60
  @strawberry.field
60
- def primary_dataset(self, info: Info[Context, None]) -> Dataset:
61
- dataset = info.context.model[PRIMARY]
62
- start, stop = dataset.time_range
63
- return Dataset(
61
+ def primary_inferences(self, info: Info[Context, None]) -> Inferences:
62
+ inferences = info.context.model[PRIMARY]
63
+ start, stop = inferences.time_range
64
+ return Inferences(
64
65
  start_time=start,
65
66
  end_time=stop,
66
- record_count=len(dataset),
67
- dataset=dataset,
68
- dataset_role=DatasetRole.primary,
67
+ record_count=len(inferences),
68
+ inferences=inferences,
69
+ inferences_role=InferencesRole.primary,
69
70
  model=info.context.model,
70
71
  )
71
72
 
72
73
  @strawberry.field
73
- def reference_dataset(self, info: Info[Context, None]) -> Optional[Dataset]:
74
- if (dataset := info.context.model[REFERENCE]).empty:
74
+ def reference_inferences(self, info: Info[Context, None]) -> Optional[Inferences]:
75
+ if (inferences := info.context.model[REFERENCE]).empty:
75
76
  return None
76
- start, stop = dataset.time_range
77
- return Dataset(
77
+ start, stop = inferences.time_range
78
+ return Inferences(
78
79
  start_time=start,
79
80
  end_time=stop,
80
- record_count=len(dataset),
81
- dataset=dataset,
82
- dataset_role=DatasetRole.reference,
81
+ record_count=len(inferences),
82
+ inferences=inferences,
83
+ inferences_role=InferencesRole.reference,
83
84
  model=info.context.model,
84
85
  )
85
86
 
86
87
  @strawberry.field
87
- def corpus_dataset(self, info: Info[Context, None]) -> Optional[Dataset]:
88
+ def corpus_inferences(self, info: Info[Context, None]) -> Optional[Inferences]:
88
89
  if info.context.corpus is None:
89
90
  return None
90
- if (dataset := info.context.corpus[PRIMARY]).empty:
91
+ if (inferences := info.context.corpus[PRIMARY]).empty:
91
92
  return None
92
- start, stop = dataset.time_range
93
- return Dataset(
93
+ start, stop = inferences.time_range
94
+ return Inferences(
94
95
  start_time=start,
95
96
  end_time=stop,
96
- record_count=len(dataset),
97
- dataset=dataset,
98
- dataset_role=AncillaryDatasetRole.corpus,
97
+ record_count=len(inferences),
98
+ inferences=inferences,
99
+ inferences_role=AncillaryInferencesRole.corpus,
99
100
  model=info.context.corpus,
100
101
  )
101
102
 
@@ -156,24 +157,24 @@ class Model:
156
157
  info: Info[Context, None],
157
158
  metric: PerformanceMetricInput,
158
159
  time_range: Optional[TimeRange] = UNSET,
159
- dataset_role: Annotated[
160
- Optional[DatasetRole],
160
+ inferences_role: Annotated[
161
+ Optional[InferencesRole],
161
162
  strawberry.argument(
162
- description="The dataset (primary or reference) to query",
163
+ description="The inferences (primary or reference) to query",
163
164
  ),
164
- ] = DatasetRole.primary,
165
+ ] = InferencesRole.primary,
165
166
  ) -> Optional[float]:
166
- if not isinstance(dataset_role, DatasetRole):
167
- dataset_role = DatasetRole.primary
167
+ if not isinstance(inferences_role, InferencesRole):
168
+ inferences_role = InferencesRole.primary
168
169
  model = info.context.model
169
- dataset = model[dataset_role.value]
170
+ inferences = model[inferences_role.value]
170
171
  time_range, granularity = ensure_timeseries_parameters(
171
- dataset,
172
+ inferences,
172
173
  time_range,
173
174
  )
174
175
  metric_instance = metric.metric_instance(model)
175
176
  data = get_timeseries_data(
176
- dataset,
177
+ inferences,
177
178
  metric_instance,
178
179
  time_range,
179
180
  granularity,
@@ -194,26 +195,26 @@ class Model:
194
195
  metric: PerformanceMetricInput,
195
196
  time_range: TimeRange,
196
197
  granularity: Granularity,
197
- dataset_role: Annotated[
198
- Optional[DatasetRole],
198
+ inferences_role: Annotated[
199
+ Optional[InferencesRole],
199
200
  strawberry.argument(
200
- description="The dataset (primary or reference) to query",
201
+ description="The inferences (primary or reference) to query",
201
202
  ),
202
- ] = DatasetRole.primary,
203
+ ] = InferencesRole.primary,
203
204
  ) -> PerformanceTimeSeries:
204
- if not isinstance(dataset_role, DatasetRole):
205
- dataset_role = DatasetRole.primary
205
+ if not isinstance(inferences_role, InferencesRole):
206
+ inferences_role = InferencesRole.primary
206
207
  model = info.context.model
207
- dataset = model[dataset_role.value]
208
+ inferences = model[inferences_role.value]
208
209
  time_range, granularity = ensure_timeseries_parameters(
209
- dataset,
210
+ inferences,
210
211
  time_range,
211
212
  granularity,
212
213
  )
213
214
  metric_instance = metric.metric_instance(model)
214
215
  return PerformanceTimeSeries(
215
216
  data=get_timeseries_data(
216
- dataset,
217
+ inferences,
217
218
  metric_instance,
218
219
  time_range,
219
220
  granularity,
@@ -1,6 +1,10 @@
1
1
  import operator
2
2
  from datetime import datetime
3
- from typing import Any, List, Optional
3
+ from typing import (
4
+ Any,
5
+ List,
6
+ Optional,
7
+ )
4
8
 
5
9
  import strawberry
6
10
  from aioitertools.itertools import islice
@@ -8,6 +12,7 @@ from sqlalchemy import and_, desc, distinct, select
8
12
  from sqlalchemy.orm import contains_eager
9
13
  from sqlalchemy.sql.expression import tuple_
10
14
  from strawberry import ID, UNSET
15
+ from strawberry.relay import Connection, Node, NodeID
11
16
  from strawberry.types import Info
12
17
 
13
18
  from phoenix.datetime_utils import right_open_time_range
@@ -17,13 +22,11 @@ from phoenix.server.api.input_types.SpanSort import SpanSort, SpanSortConfig
17
22
  from phoenix.server.api.input_types.TimeRange import TimeRange
18
23
  from phoenix.server.api.types.DocumentEvaluationSummary import DocumentEvaluationSummary
19
24
  from phoenix.server.api.types.EvaluationSummary import EvaluationSummary
20
- from phoenix.server.api.types.node import Node
21
25
  from phoenix.server.api.types.pagination import (
22
- Connection,
23
26
  Cursor,
24
27
  CursorSortColumn,
25
28
  CursorString,
26
- connections,
29
+ connection_from_cursors_and_nodes,
27
30
  )
28
31
  from phoenix.server.api.types.SortDir import SortDir
29
32
  from phoenix.server.api.types.Span import Span, to_gql_span
@@ -31,11 +34,10 @@ from phoenix.server.api.types.Trace import Trace
31
34
  from phoenix.server.api.types.ValidationResult import ValidationResult
32
35
  from phoenix.trace.dsl import SpanFilter
33
36
 
34
- SPANS_LIMIT = 1000
35
-
36
37
 
37
38
  @strawberry.type
38
39
  class Project(Node):
40
+ id_attr: NodeID[int]
39
41
  name: str
40
42
  gradient_start_color: str
41
43
  gradient_end_color: str
@@ -149,7 +151,7 @@ class Project(Node):
149
151
  async with info.context.db() as session:
150
152
  if (id_attr := await session.scalar(stmt)) is None:
151
153
  return None
152
- return Trace(id_attr=id_attr)
154
+ return Trace(id_attr=id_attr, trace_id=trace_id, project_rowid=self.id_attr)
153
155
 
154
156
  @strawberry.field
155
157
  async def spans(
@@ -168,7 +170,7 @@ class Project(Node):
168
170
  select(models.Span)
169
171
  .join(models.Trace)
170
172
  .where(models.Trace.project_rowid == self.id_attr)
171
- .options(contains_eager(models.Span.trace))
173
+ .options(contains_eager(models.Span.trace).load_only(models.Trace.trace_id))
172
174
  )
173
175
  if time_range:
174
176
  stmt = stmt.where(
@@ -213,7 +215,7 @@ class Project(Node):
213
215
  first + 1 # overfetch by one to determine whether there's a next page
214
216
  )
215
217
  stmt = stmt.order_by(cursor_rowid_column)
216
- data = []
218
+ cursors_and_nodes = []
217
219
  async with info.context.db() as session:
218
220
  span_records = await session.execute(stmt)
219
221
  async for span_record in islice(span_records, first):
@@ -230,15 +232,15 @@ class Project(Node):
230
232
  else None
231
233
  ),
232
234
  )
233
- data.append((cursor, to_gql_span(span)))
235
+ cursors_and_nodes.append((cursor, to_gql_span(span)))
234
236
  has_next_page = True
235
237
  try:
236
238
  next(span_records)
237
239
  except StopIteration:
238
240
  has_next_page = False
239
241
 
240
- return connections(
241
- data,
242
+ return connection_from_cursors_and_nodes(
243
+ cursors_and_nodes,
242
244
  has_previous_page=False,
243
245
  has_next_page=has_next_page,
244
246
  )
@@ -355,3 +357,15 @@ class Project(Node):
355
357
  is_valid=False,
356
358
  error_message=e.msg,
357
359
  )
360
+
361
+
362
+ def to_gql_project(project: models.Project) -> Project:
363
+ """
364
+ Converts an ORM project to a GraphQL Project.
365
+ """
366
+ return Project(
367
+ id_attr=project.id,
368
+ name=project.name,
369
+ gradient_start_color=project.gradient_start_color,
370
+ gradient_end_color=project.gradient_end_color,
371
+ )
@@ -1,23 +1,33 @@
1
1
  import json
2
+ from dataclasses import dataclass
2
3
  from datetime import datetime
3
4
  from enum import Enum
4
- from typing import Any, List, Mapping, Optional, Sized, cast
5
+ from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Sized, cast
5
6
 
6
7
  import numpy as np
7
8
  import strawberry
8
9
  from openinference.semconv.trace import EmbeddingAttributes, SpanAttributes
9
10
  from strawberry import ID, UNSET
11
+ from strawberry.relay import Node, NodeID
10
12
  from strawberry.types import Info
13
+ from typing_extensions import Annotated
11
14
 
12
15
  import phoenix.trace.schemas as trace_schema
13
16
  from phoenix.db import models
14
17
  from phoenix.server.api.context import Context
18
+ from phoenix.server.api.helpers.dataset_helpers import (
19
+ get_dataset_example_input,
20
+ get_dataset_example_output,
21
+ )
15
22
  from phoenix.server.api.types.DocumentRetrievalMetrics import DocumentRetrievalMetrics
16
23
  from phoenix.server.api.types.Evaluation import DocumentEvaluation, SpanEvaluation
24
+ from phoenix.server.api.types.ExampleRevisionInterface import ExampleRevision
17
25
  from phoenix.server.api.types.MimeType import MimeType
18
- from phoenix.server.api.types.node import Node
19
26
  from phoenix.trace.attributes import get_attribute_value
20
27
 
28
+ if TYPE_CHECKING:
29
+ from phoenix.server.api.types.Project import Project
30
+
21
31
  EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
22
32
  EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR
23
33
  INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
@@ -25,6 +35,9 @@ INPUT_VALUE = SpanAttributes.INPUT_VALUE
25
35
  LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
26
36
  LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
27
37
  LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
38
+ LLM_PROMPT_TEMPLATE_VARIABLES = SpanAttributes.LLM_PROMPT_TEMPLATE_VARIABLES
39
+ LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
40
+ LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
28
41
  METADATA = SpanAttributes.METADATA
29
42
  OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
30
43
  OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
@@ -46,6 +59,7 @@ class SpanKind(Enum):
46
59
  embedding = "EMBEDDING"
47
60
  agent = "AGENT"
48
61
  reranker = "RERANKER"
62
+ evaluator = "EVALUATOR"
49
63
  unknown = "UNKNOWN"
50
64
 
51
65
  @classmethod
@@ -101,8 +115,14 @@ class SpanEvent:
101
115
  )
102
116
 
103
117
 
118
+ @strawberry.type
119
+ class SpanAsExampleRevision(ExampleRevision): ...
120
+
121
+
104
122
  @strawberry.type
105
123
  class Span(Node):
124
+ id_attr: NodeID[int]
125
+ db_span: strawberry.Private[models.Span]
106
126
  name: str
107
127
  status_code: SpanStatusCode
108
128
  status_message: str
@@ -188,6 +208,44 @@ class Span(Node):
188
208
  spans = await info.context.data_loaders.span_descendants.load(span_id)
189
209
  return [to_gql_span(span) for span in spans]
190
210
 
211
+ @strawberry.field(
212
+ description="The span's attributes translated into an example revision for a dataset",
213
+ ) # type: ignore
214
+ def as_example_revision(self) -> SpanAsExampleRevision:
215
+ db_span = self.db_span
216
+ attributes = db_span.attributes
217
+ span_io = _SpanIO(
218
+ span_kind=db_span.span_kind,
219
+ input_value=get_attribute_value(attributes, INPUT_VALUE),
220
+ input_mime_type=get_attribute_value(attributes, INPUT_MIME_TYPE),
221
+ output_value=get_attribute_value(attributes, OUTPUT_VALUE),
222
+ output_mime_type=get_attribute_value(attributes, OUTPUT_MIME_TYPE),
223
+ llm_prompt_template_variables=get_attribute_value(
224
+ attributes, LLM_PROMPT_TEMPLATE_VARIABLES
225
+ ),
226
+ llm_input_messages=get_attribute_value(attributes, LLM_INPUT_MESSAGES),
227
+ llm_output_messages=get_attribute_value(attributes, LLM_OUTPUT_MESSAGES),
228
+ retrieval_documents=get_attribute_value(attributes, RETRIEVAL_DOCUMENTS),
229
+ )
230
+ return SpanAsExampleRevision(
231
+ input=get_dataset_example_input(span_io),
232
+ output=get_dataset_example_output(span_io),
233
+ metadata=attributes,
234
+ )
235
+
236
+ @strawberry.field(description="The project that this span belongs to.") # type: ignore
237
+ async def project(
238
+ self,
239
+ info: Info[Context, None],
240
+ ) -> Annotated[
241
+ "Project", strawberry.lazy("phoenix.server.api.types.Project")
242
+ ]: # use lazy types to avoid circular import: https://strawberry.rocks/docs/types/lazy
243
+ from phoenix.server.api.types.Project import to_gql_project
244
+
245
+ span_id = self.id_attr
246
+ project = await info.context.data_loaders.span_projects.load(span_id)
247
+ return to_gql_project(project)
248
+
191
249
 
192
250
  def to_gql_span(span: models.Span) -> Span:
193
251
  events: List[SpanEvent] = list(map(SpanEvent.from_dict, span.events))
@@ -197,6 +255,7 @@ def to_gql_span(span: models.Span) -> Span:
197
255
  num_documents = len(retrieval_documents) if isinstance(retrieval_documents, Sized) else None
198
256
  return Span(
199
257
  id_attr=span.id,
258
+ db_span=span,
200
259
  name=span.name,
201
260
  status_code=SpanStatusCode(span.status_code),
202
261
  status_message=span.status_message,
@@ -302,3 +361,21 @@ def _convert_metadata_to_string(metadata: Any) -> Optional[str]:
302
361
  return json.dumps(metadata)
303
362
  except Exception:
304
363
  return str(metadata)
364
+
365
+
366
+ @dataclass
367
+ class _SpanIO:
368
+ """
369
+ An class that contains the information needed to extract dataset example
370
+ input and output values from a span.
371
+ """
372
+
373
+ span_kind: Optional[str]
374
+ input_value: Any
375
+ input_mime_type: Optional[str]
376
+ output_value: Any
377
+ output_mime_type: Optional[str]
378
+ llm_prompt_template_variables: Any
379
+ llm_input_messages: Any
380
+ llm_output_messages: Any
381
+ retrieval_documents: Any