arize-phoenix 4.5.0__py3-none-any.whl → 4.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/METADATA +16 -8
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/RECORD +122 -58
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +0 -27
- phoenix/config.py +42 -7
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +64 -62
- phoenix/core/model_schema_adapter.py +27 -25
- phoenix/datetime_utils.py +4 -0
- phoenix/db/bulk_inserter.py +54 -14
- phoenix/db/insertion/dataset.py +237 -0
- phoenix/db/insertion/evaluation.py +10 -10
- phoenix/db/insertion/helpers.py +17 -14
- phoenix/db/insertion/span.py +3 -3
- phoenix/db/migrations/types.py +29 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
- phoenix/db/models.py +236 -4
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +153 -0
- phoenix/experiments/evaluators/code_evaluators.py +99 -0
- phoenix/experiments/evaluators/llm_evaluators.py +244 -0
- phoenix/experiments/evaluators/utils.py +186 -0
- phoenix/experiments/functions.py +757 -0
- phoenix/experiments/tracing.py +85 -0
- phoenix/experiments/types.py +753 -0
- phoenix/experiments/utils.py +24 -0
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +20 -0
- phoenix/server/api/dataloaders/__init__.py +20 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +2 -3
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
- phoenix/server/api/helpers/dataset_helpers.py +179 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/mutations/__init__.py +13 -0
- phoenix/server/api/mutations/auth.py +11 -0
- phoenix/server/api/mutations/dataset_mutations.py +520 -0
- phoenix/server/api/mutations/experiment_mutations.py +65 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
- phoenix/server/api/mutations/project_mutations.py +47 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +6 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +503 -0
- phoenix/server/api/routers/v1/__init__.py +77 -2
- phoenix/server/api/routers/v1/dataset_examples.py +178 -0
- phoenix/server/api/routers/v1/datasets.py +965 -0
- phoenix/server/api/routers/v1/evaluations.py +8 -13
- phoenix/server/api/routers/v1/experiment_evaluations.py +143 -0
- phoenix/server/api/routers/v1/experiment_runs.py +220 -0
- phoenix/server/api/routers/v1/experiments.py +302 -0
- phoenix/server/api/routers/v1/spans.py +9 -5
- phoenix/server/api/routers/v1/traces.py +1 -4
- phoenix/server/api/schema.py +2 -303
- phoenix/server/api/types/AnnotatorKind.py +10 -0
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/Dataset.py +282 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +30 -29
- phoenix/server/api/types/EmbeddingDimension.py +40 -34
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +147 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +19 -0
- phoenix/server/api/types/ExperimentRun.py +91 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/Model.py +43 -42
- phoenix/server/api/types/Project.py +26 -12
- phoenix/server/api/types/Span.py +79 -2
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +15 -4
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +5 -111
- phoenix/server/api/types/pagination.py +10 -52
- phoenix/server/app.py +103 -49
- phoenix/server/main.py +49 -27
- phoenix/server/openapi/docs.py +3 -0
- phoenix/server/static/index.js +2300 -1294
- phoenix/server/templates/index.html +1 -0
- phoenix/services.py +15 -15
- phoenix/session/client.py +581 -22
- phoenix/session/session.py +47 -37
- phoenix/trace/exporter.py +14 -9
- phoenix/trace/fixtures.py +133 -7
- phoenix/trace/schemas.py +1 -2
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/utilities/json.py +61 -0
- phoenix/utilities/re.py +50 -0
- phoenix/version.py +1 -1
- phoenix/server/api/types/DatasetRole.py +0 -23
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
from strawberry import UNSET
|
|
7
|
+
from strawberry.relay import Connection, GlobalID, Node, NodeID
|
|
8
|
+
from strawberry.scalars import JSON
|
|
9
|
+
from strawberry.types import Info
|
|
10
|
+
|
|
11
|
+
from phoenix.db import models
|
|
12
|
+
from phoenix.server.api.context import Context
|
|
13
|
+
from phoenix.server.api.types.ExperimentRunAnnotation import (
|
|
14
|
+
ExperimentRunAnnotation,
|
|
15
|
+
to_gql_experiment_run_annotation,
|
|
16
|
+
)
|
|
17
|
+
from phoenix.server.api.types.pagination import (
|
|
18
|
+
ConnectionArgs,
|
|
19
|
+
CursorString,
|
|
20
|
+
connection_from_list,
|
|
21
|
+
)
|
|
22
|
+
from phoenix.server.api.types.Trace import Trace
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@strawberry.type
|
|
26
|
+
class ExperimentRun(Node):
|
|
27
|
+
id_attr: NodeID[int]
|
|
28
|
+
experiment_id: GlobalID
|
|
29
|
+
trace_id: Optional[str]
|
|
30
|
+
output: Optional[JSON]
|
|
31
|
+
start_time: datetime
|
|
32
|
+
end_time: datetime
|
|
33
|
+
error: Optional[str]
|
|
34
|
+
|
|
35
|
+
@strawberry.field
|
|
36
|
+
async def annotations(
|
|
37
|
+
self,
|
|
38
|
+
info: Info[Context, None],
|
|
39
|
+
first: Optional[int] = 50,
|
|
40
|
+
last: Optional[int] = UNSET,
|
|
41
|
+
after: Optional[CursorString] = UNSET,
|
|
42
|
+
before: Optional[CursorString] = UNSET,
|
|
43
|
+
) -> Connection[ExperimentRunAnnotation]:
|
|
44
|
+
args = ConnectionArgs(
|
|
45
|
+
first=first,
|
|
46
|
+
after=after if isinstance(after, CursorString) else None,
|
|
47
|
+
last=last,
|
|
48
|
+
before=before if isinstance(before, CursorString) else None,
|
|
49
|
+
)
|
|
50
|
+
run_id = self.id_attr
|
|
51
|
+
async with info.context.db() as session:
|
|
52
|
+
annotations = (
|
|
53
|
+
await session.scalars(
|
|
54
|
+
select(models.ExperimentRunAnnotation)
|
|
55
|
+
.where(models.ExperimentRunAnnotation.experiment_run_id == run_id)
|
|
56
|
+
.order_by(models.ExperimentRunAnnotation.name.desc())
|
|
57
|
+
)
|
|
58
|
+
).all()
|
|
59
|
+
return connection_from_list(
|
|
60
|
+
[to_gql_experiment_run_annotation(annotation) for annotation in annotations], args
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@strawberry.field
|
|
64
|
+
async def trace(self, info: Info) -> Optional[Trace]:
|
|
65
|
+
if not self.trace_id:
|
|
66
|
+
return None
|
|
67
|
+
dataloader = info.context.data_loaders.trace_row_ids
|
|
68
|
+
if (trace := await dataloader.load(self.trace_id)) is None:
|
|
69
|
+
return None
|
|
70
|
+
trace_rowid, project_rowid = trace
|
|
71
|
+
return Trace(id_attr=trace_rowid, trace_id=self.trace_id, project_rowid=project_rowid)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def to_gql_experiment_run(run: models.ExperimentRun) -> ExperimentRun:
|
|
75
|
+
"""
|
|
76
|
+
Converts an ORM experiment run to a GraphQL ExperimentRun.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
from phoenix.server.api.types.Experiment import Experiment
|
|
80
|
+
|
|
81
|
+
return ExperimentRun(
|
|
82
|
+
id_attr=run.id,
|
|
83
|
+
experiment_id=GlobalID(Experiment.__name__, str(run.experiment_id)),
|
|
84
|
+
trace_id=trace_id
|
|
85
|
+
if (trace := run.trace) and (trace_id := trace.trace_id) is not None
|
|
86
|
+
else None,
|
|
87
|
+
output=run.output.get("task_output"),
|
|
88
|
+
start_time=run.start_time,
|
|
89
|
+
end_time=run.end_time,
|
|
90
|
+
error=run.error,
|
|
91
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from strawberry import Info
|
|
6
|
+
from strawberry.relay import Node, NodeID
|
|
7
|
+
from strawberry.scalars import JSON
|
|
8
|
+
|
|
9
|
+
from phoenix.db import models
|
|
10
|
+
from phoenix.server.api.types.AnnotatorKind import AnnotatorKind
|
|
11
|
+
from phoenix.server.api.types.Trace import Trace
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@strawberry.type
|
|
15
|
+
class ExperimentRunAnnotation(Node):
|
|
16
|
+
id_attr: NodeID[int]
|
|
17
|
+
name: str
|
|
18
|
+
annotator_kind: AnnotatorKind
|
|
19
|
+
label: Optional[str]
|
|
20
|
+
score: Optional[float]
|
|
21
|
+
explanation: Optional[str]
|
|
22
|
+
error: Optional[str]
|
|
23
|
+
metadata: JSON
|
|
24
|
+
start_time: datetime
|
|
25
|
+
end_time: datetime
|
|
26
|
+
trace_id: Optional[str]
|
|
27
|
+
|
|
28
|
+
@strawberry.field
|
|
29
|
+
async def trace(self, info: Info) -> Optional[Trace]:
|
|
30
|
+
if not self.trace_id:
|
|
31
|
+
return None
|
|
32
|
+
dataloader = info.context.data_loaders.trace_row_ids
|
|
33
|
+
if (trace := await dataloader.load(self.trace_id)) is None:
|
|
34
|
+
return None
|
|
35
|
+
trace_row_id, project_row_id = trace
|
|
36
|
+
return Trace(id_attr=trace_row_id, trace_id=self.trace_id, project_rowid=project_row_id)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def to_gql_experiment_run_annotation(
|
|
40
|
+
annotation: models.ExperimentRunAnnotation,
|
|
41
|
+
) -> ExperimentRunAnnotation:
|
|
42
|
+
"""
|
|
43
|
+
Converts an ORM experiment run annotation to a GraphQL ExperimentRunAnnotation.
|
|
44
|
+
"""
|
|
45
|
+
return ExperimentRunAnnotation(
|
|
46
|
+
id_attr=annotation.id,
|
|
47
|
+
name=annotation.name,
|
|
48
|
+
annotator_kind=AnnotatorKind(annotation.annotator_kind),
|
|
49
|
+
label=annotation.label,
|
|
50
|
+
score=annotation.score,
|
|
51
|
+
explanation=annotation.explanation,
|
|
52
|
+
error=annotation.error,
|
|
53
|
+
metadata=annotation.metadata_,
|
|
54
|
+
start_time=annotation.start_time,
|
|
55
|
+
end_time=annotation.end_time,
|
|
56
|
+
trace_id=annotation.trace_id,
|
|
57
|
+
)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Iterable, List, Optional, Set, Union
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from strawberry.scalars import ID
|
|
6
|
+
from strawberry.unset import UNSET
|
|
7
|
+
|
|
8
|
+
import phoenix.core.model_schema as ms
|
|
9
|
+
from phoenix.core.model_schema import FEATURE, TAG, ScalarDimension
|
|
10
|
+
|
|
11
|
+
from ..input_types.DimensionInput import DimensionInput
|
|
12
|
+
from .Dimension import Dimension, to_gql_dimension
|
|
13
|
+
from .Event import Event, create_event, create_event_id, parse_event_ids_by_inferences_role
|
|
14
|
+
from .InferencesRole import AncillaryInferencesRole, InferencesRole
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@strawberry.type
|
|
18
|
+
class Inferences:
|
|
19
|
+
start_time: datetime = strawberry.field(description="The start bookend of the data")
|
|
20
|
+
end_time: datetime = strawberry.field(description="The end bookend of the data")
|
|
21
|
+
record_count: int = strawberry.field(description="The record count of the data")
|
|
22
|
+
inferences: strawberry.Private[ms.Inferences]
|
|
23
|
+
inferences_role: strawberry.Private[Union[InferencesRole, AncillaryInferencesRole]]
|
|
24
|
+
model: strawberry.Private[ms.Model]
|
|
25
|
+
|
|
26
|
+
# type ignored here to get around the following: https://github.com/strawberry-graphql/strawberry/issues/1929
|
|
27
|
+
@strawberry.field(description="Returns a human friendly name for the inferences.") # type: ignore
|
|
28
|
+
def name(self) -> str:
|
|
29
|
+
return self.inferences.display_name
|
|
30
|
+
|
|
31
|
+
@strawberry.field
|
|
32
|
+
def events(
|
|
33
|
+
self,
|
|
34
|
+
event_ids: List[ID],
|
|
35
|
+
dimensions: Optional[List[DimensionInput]] = UNSET,
|
|
36
|
+
) -> List[Event]:
|
|
37
|
+
"""
|
|
38
|
+
Returns events for specific event IDs and dimensions. If no input
|
|
39
|
+
dimensions are provided, returns all features and tags.
|
|
40
|
+
"""
|
|
41
|
+
if not event_ids:
|
|
42
|
+
return []
|
|
43
|
+
row_ids = parse_event_ids_by_inferences_role(event_ids)
|
|
44
|
+
if len(row_ids) > 1 or self.inferences_role not in row_ids:
|
|
45
|
+
raise ValueError("eventIds contains IDs from incorrect inferences.")
|
|
46
|
+
events = self.inferences[row_ids[self.inferences_role]]
|
|
47
|
+
requested_gql_dimensions = _get_requested_features_and_tags(
|
|
48
|
+
core_dimensions=self.model.scalar_dimensions,
|
|
49
|
+
requested_dimension_names=set(dim.name for dim in dimensions)
|
|
50
|
+
if isinstance(dimensions, list)
|
|
51
|
+
else None,
|
|
52
|
+
)
|
|
53
|
+
return [
|
|
54
|
+
create_event(
|
|
55
|
+
event_id=create_event_id(event.id.row_id, self.inferences_role),
|
|
56
|
+
event=event,
|
|
57
|
+
dimensions=requested_gql_dimensions,
|
|
58
|
+
is_document_record=self.inferences_role is AncillaryInferencesRole.corpus,
|
|
59
|
+
)
|
|
60
|
+
for event in events
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_requested_features_and_tags(
|
|
65
|
+
core_dimensions: Iterable[ScalarDimension],
|
|
66
|
+
requested_dimension_names: Optional[Set[str]] = UNSET,
|
|
67
|
+
) -> List[Dimension]:
|
|
68
|
+
"""
|
|
69
|
+
Returns requested features and tags as a list of strawberry Inferences. If no
|
|
70
|
+
dimensions are explicitly requested, returns all features and tags.
|
|
71
|
+
"""
|
|
72
|
+
requested_features_and_tags: List[Dimension] = []
|
|
73
|
+
for id, dim in enumerate(core_dimensions):
|
|
74
|
+
is_requested = (
|
|
75
|
+
not isinstance(requested_dimension_names, Set)
|
|
76
|
+
) or dim.name in requested_dimension_names
|
|
77
|
+
is_feature_or_tag = dim.role in (FEATURE, TAG)
|
|
78
|
+
if is_requested and is_feature_or_tag:
|
|
79
|
+
requested_features_and_tags.append(to_gql_dimension(id_attr=id, dimension=dim))
|
|
80
|
+
return requested_features_and_tags
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Dict, Union
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
|
|
6
|
+
from phoenix.core.model_schema import PRIMARY, REFERENCE
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@strawberry.enum
|
|
10
|
+
class InferencesRole(Enum):
|
|
11
|
+
primary = PRIMARY
|
|
12
|
+
reference = REFERENCE
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AncillaryInferencesRole(Enum):
|
|
16
|
+
corpus = "InferencesRole.CORPUS"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
STR_TO_INFEREENCES_ROLE: Dict[str, Union[InferencesRole, AncillaryInferencesRole]] = {
|
|
20
|
+
str(InferencesRole.primary.value): InferencesRole.primary,
|
|
21
|
+
str(InferencesRole.reference.value): InferencesRole.reference,
|
|
22
|
+
str(AncillaryInferencesRole.corpus.value): AncillaryInferencesRole.corpus,
|
|
23
|
+
}
|
|
@@ -2,6 +2,7 @@ import asyncio
|
|
|
2
2
|
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
import strawberry
|
|
5
|
+
from strawberry.relay import Connection
|
|
5
6
|
from strawberry.types import Info
|
|
6
7
|
from strawberry.unset import UNSET
|
|
7
8
|
from typing_extensions import Annotated
|
|
@@ -14,12 +15,12 @@ from ..input_types.DimensionFilter import DimensionFilter
|
|
|
14
15
|
from ..input_types.Granularity import Granularity
|
|
15
16
|
from ..input_types.PerformanceMetricInput import PerformanceMetricInput
|
|
16
17
|
from ..input_types.TimeRange import TimeRange
|
|
17
|
-
from .Dataset import Dataset
|
|
18
|
-
from .DatasetRole import AncillaryDatasetRole, DatasetRole
|
|
19
18
|
from .Dimension import Dimension, to_gql_dimension
|
|
20
19
|
from .EmbeddingDimension import EmbeddingDimension, to_gql_embedding_dimension
|
|
21
20
|
from .ExportedFile import ExportedFile
|
|
22
|
-
from .
|
|
21
|
+
from .Inferences import Inferences
|
|
22
|
+
from .InferencesRole import AncillaryInferencesRole, InferencesRole
|
|
23
|
+
from .pagination import ConnectionArgs, CursorString, connection_from_list
|
|
23
24
|
from .TimeSeries import (
|
|
24
25
|
PerformanceTimeSeries,
|
|
25
26
|
ensure_timeseries_parameters,
|
|
@@ -57,45 +58,45 @@ class Model:
|
|
|
57
58
|
)
|
|
58
59
|
|
|
59
60
|
@strawberry.field
|
|
60
|
-
def
|
|
61
|
-
|
|
62
|
-
start, stop =
|
|
63
|
-
return
|
|
61
|
+
def primary_inferences(self, info: Info[Context, None]) -> Inferences:
|
|
62
|
+
inferences = info.context.model[PRIMARY]
|
|
63
|
+
start, stop = inferences.time_range
|
|
64
|
+
return Inferences(
|
|
64
65
|
start_time=start,
|
|
65
66
|
end_time=stop,
|
|
66
|
-
record_count=len(
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
record_count=len(inferences),
|
|
68
|
+
inferences=inferences,
|
|
69
|
+
inferences_role=InferencesRole.primary,
|
|
69
70
|
model=info.context.model,
|
|
70
71
|
)
|
|
71
72
|
|
|
72
73
|
@strawberry.field
|
|
73
|
-
def
|
|
74
|
-
if (
|
|
74
|
+
def reference_inferences(self, info: Info[Context, None]) -> Optional[Inferences]:
|
|
75
|
+
if (inferences := info.context.model[REFERENCE]).empty:
|
|
75
76
|
return None
|
|
76
|
-
start, stop =
|
|
77
|
-
return
|
|
77
|
+
start, stop = inferences.time_range
|
|
78
|
+
return Inferences(
|
|
78
79
|
start_time=start,
|
|
79
80
|
end_time=stop,
|
|
80
|
-
record_count=len(
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
record_count=len(inferences),
|
|
82
|
+
inferences=inferences,
|
|
83
|
+
inferences_role=InferencesRole.reference,
|
|
83
84
|
model=info.context.model,
|
|
84
85
|
)
|
|
85
86
|
|
|
86
87
|
@strawberry.field
|
|
87
|
-
def
|
|
88
|
+
def corpus_inferences(self, info: Info[Context, None]) -> Optional[Inferences]:
|
|
88
89
|
if info.context.corpus is None:
|
|
89
90
|
return None
|
|
90
|
-
if (
|
|
91
|
+
if (inferences := info.context.corpus[PRIMARY]).empty:
|
|
91
92
|
return None
|
|
92
|
-
start, stop =
|
|
93
|
-
return
|
|
93
|
+
start, stop = inferences.time_range
|
|
94
|
+
return Inferences(
|
|
94
95
|
start_time=start,
|
|
95
96
|
end_time=stop,
|
|
96
|
-
record_count=len(
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
record_count=len(inferences),
|
|
98
|
+
inferences=inferences,
|
|
99
|
+
inferences_role=AncillaryInferencesRole.corpus,
|
|
99
100
|
model=info.context.corpus,
|
|
100
101
|
)
|
|
101
102
|
|
|
@@ -156,24 +157,24 @@ class Model:
|
|
|
156
157
|
info: Info[Context, None],
|
|
157
158
|
metric: PerformanceMetricInput,
|
|
158
159
|
time_range: Optional[TimeRange] = UNSET,
|
|
159
|
-
|
|
160
|
-
Optional[
|
|
160
|
+
inferences_role: Annotated[
|
|
161
|
+
Optional[InferencesRole],
|
|
161
162
|
strawberry.argument(
|
|
162
|
-
description="The
|
|
163
|
+
description="The inferences (primary or reference) to query",
|
|
163
164
|
),
|
|
164
|
-
] =
|
|
165
|
+
] = InferencesRole.primary,
|
|
165
166
|
) -> Optional[float]:
|
|
166
|
-
if not isinstance(
|
|
167
|
-
|
|
167
|
+
if not isinstance(inferences_role, InferencesRole):
|
|
168
|
+
inferences_role = InferencesRole.primary
|
|
168
169
|
model = info.context.model
|
|
169
|
-
|
|
170
|
+
inferences = model[inferences_role.value]
|
|
170
171
|
time_range, granularity = ensure_timeseries_parameters(
|
|
171
|
-
|
|
172
|
+
inferences,
|
|
172
173
|
time_range,
|
|
173
174
|
)
|
|
174
175
|
metric_instance = metric.metric_instance(model)
|
|
175
176
|
data = get_timeseries_data(
|
|
176
|
-
|
|
177
|
+
inferences,
|
|
177
178
|
metric_instance,
|
|
178
179
|
time_range,
|
|
179
180
|
granularity,
|
|
@@ -194,26 +195,26 @@ class Model:
|
|
|
194
195
|
metric: PerformanceMetricInput,
|
|
195
196
|
time_range: TimeRange,
|
|
196
197
|
granularity: Granularity,
|
|
197
|
-
|
|
198
|
-
Optional[
|
|
198
|
+
inferences_role: Annotated[
|
|
199
|
+
Optional[InferencesRole],
|
|
199
200
|
strawberry.argument(
|
|
200
|
-
description="The
|
|
201
|
+
description="The inferences (primary or reference) to query",
|
|
201
202
|
),
|
|
202
|
-
] =
|
|
203
|
+
] = InferencesRole.primary,
|
|
203
204
|
) -> PerformanceTimeSeries:
|
|
204
|
-
if not isinstance(
|
|
205
|
-
|
|
205
|
+
if not isinstance(inferences_role, InferencesRole):
|
|
206
|
+
inferences_role = InferencesRole.primary
|
|
206
207
|
model = info.context.model
|
|
207
|
-
|
|
208
|
+
inferences = model[inferences_role.value]
|
|
208
209
|
time_range, granularity = ensure_timeseries_parameters(
|
|
209
|
-
|
|
210
|
+
inferences,
|
|
210
211
|
time_range,
|
|
211
212
|
granularity,
|
|
212
213
|
)
|
|
213
214
|
metric_instance = metric.metric_instance(model)
|
|
214
215
|
return PerformanceTimeSeries(
|
|
215
216
|
data=get_timeseries_data(
|
|
216
|
-
|
|
217
|
+
inferences,
|
|
217
218
|
metric_instance,
|
|
218
219
|
time_range,
|
|
219
220
|
granularity,
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import operator
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import (
|
|
4
|
+
Any,
|
|
5
|
+
List,
|
|
6
|
+
Optional,
|
|
7
|
+
)
|
|
4
8
|
|
|
5
9
|
import strawberry
|
|
6
10
|
from aioitertools.itertools import islice
|
|
@@ -8,6 +12,7 @@ from sqlalchemy import and_, desc, distinct, select
|
|
|
8
12
|
from sqlalchemy.orm import contains_eager
|
|
9
13
|
from sqlalchemy.sql.expression import tuple_
|
|
10
14
|
from strawberry import ID, UNSET
|
|
15
|
+
from strawberry.relay import Connection, Node, NodeID
|
|
11
16
|
from strawberry.types import Info
|
|
12
17
|
|
|
13
18
|
from phoenix.datetime_utils import right_open_time_range
|
|
@@ -17,13 +22,11 @@ from phoenix.server.api.input_types.SpanSort import SpanSort, SpanSortConfig
|
|
|
17
22
|
from phoenix.server.api.input_types.TimeRange import TimeRange
|
|
18
23
|
from phoenix.server.api.types.DocumentEvaluationSummary import DocumentEvaluationSummary
|
|
19
24
|
from phoenix.server.api.types.EvaluationSummary import EvaluationSummary
|
|
20
|
-
from phoenix.server.api.types.node import Node
|
|
21
25
|
from phoenix.server.api.types.pagination import (
|
|
22
|
-
Connection,
|
|
23
26
|
Cursor,
|
|
24
27
|
CursorSortColumn,
|
|
25
28
|
CursorString,
|
|
26
|
-
|
|
29
|
+
connection_from_cursors_and_nodes,
|
|
27
30
|
)
|
|
28
31
|
from phoenix.server.api.types.SortDir import SortDir
|
|
29
32
|
from phoenix.server.api.types.Span import Span, to_gql_span
|
|
@@ -31,11 +34,10 @@ from phoenix.server.api.types.Trace import Trace
|
|
|
31
34
|
from phoenix.server.api.types.ValidationResult import ValidationResult
|
|
32
35
|
from phoenix.trace.dsl import SpanFilter
|
|
33
36
|
|
|
34
|
-
SPANS_LIMIT = 1000
|
|
35
|
-
|
|
36
37
|
|
|
37
38
|
@strawberry.type
|
|
38
39
|
class Project(Node):
|
|
40
|
+
id_attr: NodeID[int]
|
|
39
41
|
name: str
|
|
40
42
|
gradient_start_color: str
|
|
41
43
|
gradient_end_color: str
|
|
@@ -149,7 +151,7 @@ class Project(Node):
|
|
|
149
151
|
async with info.context.db() as session:
|
|
150
152
|
if (id_attr := await session.scalar(stmt)) is None:
|
|
151
153
|
return None
|
|
152
|
-
return Trace(id_attr=id_attr)
|
|
154
|
+
return Trace(id_attr=id_attr, trace_id=trace_id, project_rowid=self.id_attr)
|
|
153
155
|
|
|
154
156
|
@strawberry.field
|
|
155
157
|
async def spans(
|
|
@@ -168,7 +170,7 @@ class Project(Node):
|
|
|
168
170
|
select(models.Span)
|
|
169
171
|
.join(models.Trace)
|
|
170
172
|
.where(models.Trace.project_rowid == self.id_attr)
|
|
171
|
-
.options(contains_eager(models.Span.trace))
|
|
173
|
+
.options(contains_eager(models.Span.trace).load_only(models.Trace.trace_id))
|
|
172
174
|
)
|
|
173
175
|
if time_range:
|
|
174
176
|
stmt = stmt.where(
|
|
@@ -213,7 +215,7 @@ class Project(Node):
|
|
|
213
215
|
first + 1 # overfetch by one to determine whether there's a next page
|
|
214
216
|
)
|
|
215
217
|
stmt = stmt.order_by(cursor_rowid_column)
|
|
216
|
-
|
|
218
|
+
cursors_and_nodes = []
|
|
217
219
|
async with info.context.db() as session:
|
|
218
220
|
span_records = await session.execute(stmt)
|
|
219
221
|
async for span_record in islice(span_records, first):
|
|
@@ -230,15 +232,15 @@ class Project(Node):
|
|
|
230
232
|
else None
|
|
231
233
|
),
|
|
232
234
|
)
|
|
233
|
-
|
|
235
|
+
cursors_and_nodes.append((cursor, to_gql_span(span)))
|
|
234
236
|
has_next_page = True
|
|
235
237
|
try:
|
|
236
238
|
next(span_records)
|
|
237
239
|
except StopIteration:
|
|
238
240
|
has_next_page = False
|
|
239
241
|
|
|
240
|
-
return
|
|
241
|
-
|
|
242
|
+
return connection_from_cursors_and_nodes(
|
|
243
|
+
cursors_and_nodes,
|
|
242
244
|
has_previous_page=False,
|
|
243
245
|
has_next_page=has_next_page,
|
|
244
246
|
)
|
|
@@ -355,3 +357,15 @@ class Project(Node):
|
|
|
355
357
|
is_valid=False,
|
|
356
358
|
error_message=e.msg,
|
|
357
359
|
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def to_gql_project(project: models.Project) -> Project:
|
|
363
|
+
"""
|
|
364
|
+
Converts an ORM project to a GraphQL Project.
|
|
365
|
+
"""
|
|
366
|
+
return Project(
|
|
367
|
+
id_attr=project.id,
|
|
368
|
+
name=project.name,
|
|
369
|
+
gradient_start_color=project.gradient_start_color,
|
|
370
|
+
gradient_end_color=project.gradient_end_color,
|
|
371
|
+
)
|
phoenix/server/api/types/Span.py
CHANGED
|
@@ -1,23 +1,33 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from dataclasses import dataclass
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from enum import Enum
|
|
4
|
-
from typing import Any, List, Mapping, Optional, Sized, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Sized, cast
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import strawberry
|
|
8
9
|
from openinference.semconv.trace import EmbeddingAttributes, SpanAttributes
|
|
9
10
|
from strawberry import ID, UNSET
|
|
11
|
+
from strawberry.relay import Node, NodeID
|
|
10
12
|
from strawberry.types import Info
|
|
13
|
+
from typing_extensions import Annotated
|
|
11
14
|
|
|
12
15
|
import phoenix.trace.schemas as trace_schema
|
|
13
16
|
from phoenix.db import models
|
|
14
17
|
from phoenix.server.api.context import Context
|
|
18
|
+
from phoenix.server.api.helpers.dataset_helpers import (
|
|
19
|
+
get_dataset_example_input,
|
|
20
|
+
get_dataset_example_output,
|
|
21
|
+
)
|
|
15
22
|
from phoenix.server.api.types.DocumentRetrievalMetrics import DocumentRetrievalMetrics
|
|
16
23
|
from phoenix.server.api.types.Evaluation import DocumentEvaluation, SpanEvaluation
|
|
24
|
+
from phoenix.server.api.types.ExampleRevisionInterface import ExampleRevision
|
|
17
25
|
from phoenix.server.api.types.MimeType import MimeType
|
|
18
|
-
from phoenix.server.api.types.node import Node
|
|
19
26
|
from phoenix.trace.attributes import get_attribute_value
|
|
20
27
|
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from phoenix.server.api.types.Project import Project
|
|
30
|
+
|
|
21
31
|
EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
|
|
22
32
|
EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR
|
|
23
33
|
INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
|
|
@@ -25,6 +35,9 @@ INPUT_VALUE = SpanAttributes.INPUT_VALUE
|
|
|
25
35
|
LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
|
|
26
36
|
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
|
|
27
37
|
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
|
|
38
|
+
LLM_PROMPT_TEMPLATE_VARIABLES = SpanAttributes.LLM_PROMPT_TEMPLATE_VARIABLES
|
|
39
|
+
LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
|
|
40
|
+
LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
|
|
28
41
|
METADATA = SpanAttributes.METADATA
|
|
29
42
|
OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
|
|
30
43
|
OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
|
|
@@ -46,6 +59,7 @@ class SpanKind(Enum):
|
|
|
46
59
|
embedding = "EMBEDDING"
|
|
47
60
|
agent = "AGENT"
|
|
48
61
|
reranker = "RERANKER"
|
|
62
|
+
evaluator = "EVALUATOR"
|
|
49
63
|
unknown = "UNKNOWN"
|
|
50
64
|
|
|
51
65
|
@classmethod
|
|
@@ -101,8 +115,14 @@ class SpanEvent:
|
|
|
101
115
|
)
|
|
102
116
|
|
|
103
117
|
|
|
118
|
+
@strawberry.type
|
|
119
|
+
class SpanAsExampleRevision(ExampleRevision): ...
|
|
120
|
+
|
|
121
|
+
|
|
104
122
|
@strawberry.type
|
|
105
123
|
class Span(Node):
|
|
124
|
+
id_attr: NodeID[int]
|
|
125
|
+
db_span: strawberry.Private[models.Span]
|
|
106
126
|
name: str
|
|
107
127
|
status_code: SpanStatusCode
|
|
108
128
|
status_message: str
|
|
@@ -188,6 +208,44 @@ class Span(Node):
|
|
|
188
208
|
spans = await info.context.data_loaders.span_descendants.load(span_id)
|
|
189
209
|
return [to_gql_span(span) for span in spans]
|
|
190
210
|
|
|
211
|
+
@strawberry.field(
|
|
212
|
+
description="The span's attributes translated into an example revision for a dataset",
|
|
213
|
+
) # type: ignore
|
|
214
|
+
def as_example_revision(self) -> SpanAsExampleRevision:
|
|
215
|
+
db_span = self.db_span
|
|
216
|
+
attributes = db_span.attributes
|
|
217
|
+
span_io = _SpanIO(
|
|
218
|
+
span_kind=db_span.span_kind,
|
|
219
|
+
input_value=get_attribute_value(attributes, INPUT_VALUE),
|
|
220
|
+
input_mime_type=get_attribute_value(attributes, INPUT_MIME_TYPE),
|
|
221
|
+
output_value=get_attribute_value(attributes, OUTPUT_VALUE),
|
|
222
|
+
output_mime_type=get_attribute_value(attributes, OUTPUT_MIME_TYPE),
|
|
223
|
+
llm_prompt_template_variables=get_attribute_value(
|
|
224
|
+
attributes, LLM_PROMPT_TEMPLATE_VARIABLES
|
|
225
|
+
),
|
|
226
|
+
llm_input_messages=get_attribute_value(attributes, LLM_INPUT_MESSAGES),
|
|
227
|
+
llm_output_messages=get_attribute_value(attributes, LLM_OUTPUT_MESSAGES),
|
|
228
|
+
retrieval_documents=get_attribute_value(attributes, RETRIEVAL_DOCUMENTS),
|
|
229
|
+
)
|
|
230
|
+
return SpanAsExampleRevision(
|
|
231
|
+
input=get_dataset_example_input(span_io),
|
|
232
|
+
output=get_dataset_example_output(span_io),
|
|
233
|
+
metadata=attributes,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
@strawberry.field(description="The project that this span belongs to.") # type: ignore
|
|
237
|
+
async def project(
|
|
238
|
+
self,
|
|
239
|
+
info: Info[Context, None],
|
|
240
|
+
) -> Annotated[
|
|
241
|
+
"Project", strawberry.lazy("phoenix.server.api.types.Project")
|
|
242
|
+
]: # use lazy types to avoid circular import: https://strawberry.rocks/docs/types/lazy
|
|
243
|
+
from phoenix.server.api.types.Project import to_gql_project
|
|
244
|
+
|
|
245
|
+
span_id = self.id_attr
|
|
246
|
+
project = await info.context.data_loaders.span_projects.load(span_id)
|
|
247
|
+
return to_gql_project(project)
|
|
248
|
+
|
|
191
249
|
|
|
192
250
|
def to_gql_span(span: models.Span) -> Span:
|
|
193
251
|
events: List[SpanEvent] = list(map(SpanEvent.from_dict, span.events))
|
|
@@ -197,6 +255,7 @@ def to_gql_span(span: models.Span) -> Span:
|
|
|
197
255
|
num_documents = len(retrieval_documents) if isinstance(retrieval_documents, Sized) else None
|
|
198
256
|
return Span(
|
|
199
257
|
id_attr=span.id,
|
|
258
|
+
db_span=span,
|
|
200
259
|
name=span.name,
|
|
201
260
|
status_code=SpanStatusCode(span.status_code),
|
|
202
261
|
status_message=span.status_message,
|
|
@@ -302,3 +361,21 @@ def _convert_metadata_to_string(metadata: Any) -> Optional[str]:
|
|
|
302
361
|
return json.dumps(metadata)
|
|
303
362
|
except Exception:
|
|
304
363
|
return str(metadata)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
@dataclass
|
|
367
|
+
class _SpanIO:
|
|
368
|
+
"""
|
|
369
|
+
An class that contains the information needed to extract dataset example
|
|
370
|
+
input and output values from a span.
|
|
371
|
+
"""
|
|
372
|
+
|
|
373
|
+
span_kind: Optional[str]
|
|
374
|
+
input_value: Any
|
|
375
|
+
input_mime_type: Optional[str]
|
|
376
|
+
output_value: Any
|
|
377
|
+
output_mime_type: Optional[str]
|
|
378
|
+
llm_prompt_template_variables: Any
|
|
379
|
+
llm_input_messages: Any
|
|
380
|
+
llm_output_messages: Any
|
|
381
|
+
retrieval_documents: Any
|