arize-phoenix 3.16.1__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.0.dist-info/METADATA +261 -0
- arize_phoenix-7.7.0.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -241
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +4 -112
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.1.dist-info/METADATA +0 -495
- arize_phoenix-3.16.1.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -619
- phoenix/core/traces.py +0 -96
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
|
@@ -1,52 +1,38 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
import strawberry
|
|
4
2
|
|
|
5
3
|
import phoenix.trace.v1 as pb
|
|
6
|
-
from phoenix.
|
|
4
|
+
from phoenix.db.models import DocumentAnnotation, TraceAnnotation
|
|
7
5
|
|
|
8
|
-
|
|
9
|
-
@strawberry.interface
|
|
10
|
-
class Evaluation:
|
|
11
|
-
name: str = strawberry.field(
|
|
12
|
-
description="Name of the evaluation, e.g. 'helpfulness' or 'relevance'."
|
|
13
|
-
)
|
|
14
|
-
score: Optional[float] = strawberry.field(
|
|
15
|
-
description="Result of the evaluation in the form of a numeric score."
|
|
16
|
-
)
|
|
17
|
-
label: Optional[str] = strawberry.field(
|
|
18
|
-
description="Result of the evaluation in the form of a string, e.g. "
|
|
19
|
-
"'helpful' or 'not helpful'. Note that the label is not necessarily binary."
|
|
20
|
-
)
|
|
21
|
-
explanation: Optional[str] = strawberry.field(
|
|
22
|
-
description="The evaluator's explanation for the evaluation result (i.e. "
|
|
23
|
-
"score or label, or both) given to the subject."
|
|
24
|
-
)
|
|
6
|
+
from .Annotation import Annotation
|
|
25
7
|
|
|
26
8
|
|
|
27
9
|
@strawberry.type
|
|
28
|
-
class
|
|
29
|
-
span_id: strawberry.Private[SpanID]
|
|
30
|
-
|
|
10
|
+
class TraceEvaluation(Annotation):
|
|
31
11
|
@staticmethod
|
|
32
|
-
def from_pb_evaluation(evaluation: pb.Evaluation) -> "
|
|
12
|
+
def from_pb_evaluation(evaluation: pb.Evaluation) -> "TraceEvaluation":
|
|
33
13
|
result = evaluation.result
|
|
34
14
|
score = result.score.value if result.HasField("score") else None
|
|
35
15
|
label = result.label.value if result.HasField("label") else None
|
|
36
16
|
explanation = result.explanation.value if result.HasField("explanation") else None
|
|
37
|
-
|
|
38
|
-
return SpanEvaluation(
|
|
17
|
+
return TraceEvaluation(
|
|
39
18
|
name=evaluation.name,
|
|
40
19
|
score=score,
|
|
41
20
|
label=label,
|
|
42
21
|
explanation=explanation,
|
|
43
|
-
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def from_sql_trace_annotation(annotation: TraceAnnotation) -> "TraceEvaluation":
|
|
26
|
+
return TraceEvaluation(
|
|
27
|
+
name=annotation.name,
|
|
28
|
+
score=annotation.score,
|
|
29
|
+
label=annotation.label,
|
|
30
|
+
explanation=annotation.explanation,
|
|
44
31
|
)
|
|
45
32
|
|
|
46
33
|
|
|
47
34
|
@strawberry.type
|
|
48
|
-
class DocumentEvaluation(
|
|
49
|
-
span_id: strawberry.Private[SpanID]
|
|
35
|
+
class DocumentEvaluation(Annotation):
|
|
50
36
|
document_position: int = strawberry.field(
|
|
51
37
|
description="The zero-based index among retrieved documents, which "
|
|
52
38
|
"is collected as a list (even when ordering is not inherently meaningful)."
|
|
@@ -60,12 +46,20 @@ class DocumentEvaluation(Evaluation):
|
|
|
60
46
|
explanation = result.explanation.value if result.HasField("explanation") else None
|
|
61
47
|
document_retrieval_id = evaluation.subject_id.document_retrieval_id
|
|
62
48
|
document_position = document_retrieval_id.document_position
|
|
63
|
-
span_id = SpanID(document_retrieval_id.span_id)
|
|
64
49
|
return DocumentEvaluation(
|
|
65
50
|
name=evaluation.name,
|
|
66
51
|
score=score,
|
|
67
52
|
label=label,
|
|
68
53
|
explanation=explanation,
|
|
69
54
|
document_position=document_position,
|
|
70
|
-
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def from_sql_document_annotation(annotation: DocumentAnnotation) -> "DocumentEvaluation":
|
|
59
|
+
return DocumentEvaluation(
|
|
60
|
+
name=annotation.name,
|
|
61
|
+
score=annotation.score,
|
|
62
|
+
label=annotation.label,
|
|
63
|
+
explanation=annotation.explanation,
|
|
64
|
+
document_position=annotation.document_position,
|
|
71
65
|
)
|
|
@@ -1,75 +1,55 @@
|
|
|
1
|
-
import
|
|
2
|
-
from functools import cached_property
|
|
3
|
-
from typing import List, Optional, Tuple, cast
|
|
1
|
+
from typing import Optional, Union, cast
|
|
4
2
|
|
|
5
3
|
import pandas as pd
|
|
6
4
|
import strawberry
|
|
7
|
-
from pandas.api.types import CategoricalDtype
|
|
8
5
|
from strawberry import Private
|
|
9
6
|
|
|
10
|
-
|
|
7
|
+
from phoenix.db import models
|
|
8
|
+
from phoenix.server.api.types.LabelFraction import LabelFraction
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
@strawberry.type
|
|
14
|
-
class LabelFraction:
|
|
15
|
-
label: str
|
|
16
|
-
fraction: float
|
|
10
|
+
AnnotationType = Union[models.SpanAnnotation, models.TraceAnnotation]
|
|
17
11
|
|
|
18
12
|
|
|
19
13
|
@strawberry.type
|
|
20
14
|
class EvaluationSummary:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
15
|
+
df: Private[pd.DataFrame]
|
|
16
|
+
|
|
17
|
+
def __init__(self, dataframe: pd.DataFrame) -> None:
|
|
18
|
+
self.df = dataframe
|
|
19
|
+
|
|
20
|
+
@strawberry.field
|
|
21
|
+
def count(self) -> int:
|
|
22
|
+
return cast(int, self.df.record_count.sum())
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
labels: Tuple[str, ...],
|
|
29
|
-
) -> None:
|
|
30
|
-
self.evaluations = evaluations
|
|
31
|
-
self.labels = labels
|
|
32
|
-
self.count = len(evaluations)
|
|
24
|
+
@strawberry.field
|
|
25
|
+
def labels(self) -> list[str]:
|
|
26
|
+
return self.df.label.dropna().tolist()
|
|
33
27
|
|
|
34
28
|
@strawberry.field
|
|
35
|
-
def label_fractions(self) ->
|
|
36
|
-
if not
|
|
29
|
+
def label_fractions(self) -> list[LabelFraction]:
|
|
30
|
+
if not (n := self.df.label_count.sum()):
|
|
37
31
|
return []
|
|
38
|
-
counts = self._eval_labels.value_counts(dropna=True)
|
|
39
32
|
return [
|
|
40
|
-
LabelFraction(
|
|
41
|
-
|
|
33
|
+
LabelFraction(
|
|
34
|
+
label=cast(str, row.label),
|
|
35
|
+
fraction=row.label_count / n,
|
|
36
|
+
)
|
|
37
|
+
for row in self.df.loc[
|
|
38
|
+
self.df.label.notna(),
|
|
39
|
+
["label", "label_count"],
|
|
40
|
+
].itertuples()
|
|
42
41
|
]
|
|
43
42
|
|
|
44
43
|
@strawberry.field
|
|
45
44
|
def mean_score(self) -> Optional[float]:
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
if not (n := self.df.score_count.sum()):
|
|
46
|
+
return None
|
|
47
|
+
return cast(float, self.df.score_sum.sum() / n)
|
|
48
48
|
|
|
49
49
|
@strawberry.field
|
|
50
50
|
def score_count(self) -> int:
|
|
51
|
-
return self.
|
|
51
|
+
return cast(int, self.df.score_count.sum())
|
|
52
52
|
|
|
53
53
|
@strawberry.field
|
|
54
54
|
def label_count(self) -> int:
|
|
55
|
-
return self.
|
|
56
|
-
|
|
57
|
-
@cached_property
|
|
58
|
-
def _eval_scores(self) -> "pd.Series[float]":
|
|
59
|
-
return pd.Series(
|
|
60
|
-
(
|
|
61
|
-
evaluation.result.score.value if evaluation.result.HasField("score") else None
|
|
62
|
-
for evaluation in self.evaluations
|
|
63
|
-
),
|
|
64
|
-
dtype=float,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
@cached_property
|
|
68
|
-
def _eval_labels(self) -> "pd.Series[CategoricalDtype]":
|
|
69
|
-
return pd.Series(
|
|
70
|
-
(
|
|
71
|
-
evaluation.result.label.value if evaluation.result.HasField("label") else None
|
|
72
|
-
for evaluation in self.evaluations
|
|
73
|
-
),
|
|
74
|
-
dtype=CategoricalDtype(categories=self.labels), # type: ignore
|
|
75
|
-
)
|
|
55
|
+
return cast(int, self.df.label_count.sum())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import math
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional, Union, cast
|
|
4
4
|
|
|
5
5
|
import strawberry
|
|
6
6
|
from strawberry import ID
|
|
@@ -17,10 +17,10 @@ from phoenix.core.model_schema import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
from ..interceptor import GqlValueMediator
|
|
20
|
-
from .DatasetRole import STR_TO_DATASET_ROLE, AncillaryDatasetRole, DatasetRole
|
|
21
20
|
from .Dimension import Dimension
|
|
22
21
|
from .DimensionWithValue import DimensionWithValue
|
|
23
22
|
from .EventMetadata import EventMetadata
|
|
23
|
+
from .InferencesRole import STR_TO_INFEREENCES_ROLE, AncillaryInferencesRole, InferencesRole
|
|
24
24
|
from .PromptResponse import PromptResponse
|
|
25
25
|
|
|
26
26
|
|
|
@@ -28,7 +28,7 @@ from .PromptResponse import PromptResponse
|
|
|
28
28
|
class Event:
|
|
29
29
|
id: strawberry.ID
|
|
30
30
|
eventMetadata: EventMetadata
|
|
31
|
-
dimensions:
|
|
31
|
+
dimensions: list[DimensionWithValue]
|
|
32
32
|
prompt_and_response: Optional[PromptResponse] = strawberry.field(
|
|
33
33
|
description="The prompt and response pair associated with the event",
|
|
34
34
|
default=GqlValueMediator(),
|
|
@@ -41,42 +41,42 @@ class Event:
|
|
|
41
41
|
|
|
42
42
|
def create_event_id(
|
|
43
43
|
row_id: int,
|
|
44
|
-
|
|
44
|
+
inferences_role: Union[InferencesRole, AncillaryInferencesRole, ms.InferencesRole],
|
|
45
45
|
) -> ID:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if isinstance(
|
|
49
|
-
else
|
|
46
|
+
inferences_role_str = (
|
|
47
|
+
inferences_role.value
|
|
48
|
+
if isinstance(inferences_role, (InferencesRole, AncillaryInferencesRole))
|
|
49
|
+
else inferences_role
|
|
50
50
|
)
|
|
51
|
-
return ID(f"{row_id}:{
|
|
51
|
+
return ID(f"{row_id}:{inferences_role_str}")
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def unpack_event_id(
|
|
55
55
|
event_id: ID,
|
|
56
|
-
) ->
|
|
57
|
-
row_id_str,
|
|
56
|
+
) -> tuple[int, Union[InferencesRole, AncillaryInferencesRole]]:
|
|
57
|
+
row_id_str, inferences_role_str = str(event_id).split(":")
|
|
58
58
|
row_id = int(row_id_str)
|
|
59
|
-
|
|
60
|
-
return row_id,
|
|
59
|
+
inferences_role = STR_TO_INFEREENCES_ROLE[inferences_role_str]
|
|
60
|
+
return row_id, inferences_role
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def
|
|
64
|
-
event_ids:
|
|
65
|
-
) ->
|
|
63
|
+
def parse_event_ids_by_inferences_role(
|
|
64
|
+
event_ids: list[ID],
|
|
65
|
+
) -> dict[Union[InferencesRole, AncillaryInferencesRole], list[int]]:
|
|
66
66
|
"""
|
|
67
67
|
Parses event IDs and returns the corresponding row indexes.
|
|
68
68
|
"""
|
|
69
|
-
row_indexes:
|
|
69
|
+
row_indexes: dict[Union[InferencesRole, AncillaryInferencesRole], list[int]] = defaultdict(list)
|
|
70
70
|
for event_id in event_ids:
|
|
71
|
-
row_id,
|
|
72
|
-
row_indexes[
|
|
71
|
+
row_id, inferences_role = unpack_event_id(event_id)
|
|
72
|
+
row_indexes[inferences_role].append(row_id)
|
|
73
73
|
return row_indexes
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
def create_event(
|
|
77
77
|
event_id: ID,
|
|
78
78
|
event: ms.Event,
|
|
79
|
-
dimensions:
|
|
79
|
+
dimensions: list[Dimension],
|
|
80
80
|
is_document_record: bool = False,
|
|
81
81
|
) -> Event:
|
|
82
82
|
"""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import strawberry
|
|
2
|
+
from strawberry.scalars import JSON
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@strawberry.interface
|
|
6
|
+
class ExampleRevision:
|
|
7
|
+
"""
|
|
8
|
+
Represents an example revision for generative tasks.
|
|
9
|
+
For example, you might have text -> text, text -> labels, etc.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
input: JSON
|
|
13
|
+
output: JSON
|
|
14
|
+
metadata: JSON
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import ClassVar, Optional
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
from sqlalchemy.orm import joinedload
|
|
7
|
+
from strawberry import UNSET, Private
|
|
8
|
+
from strawberry.relay import Connection, Node, NodeID
|
|
9
|
+
from strawberry.scalars import JSON
|
|
10
|
+
from strawberry.types import Info
|
|
11
|
+
|
|
12
|
+
from phoenix.db import models
|
|
13
|
+
from phoenix.server.api.context import Context
|
|
14
|
+
from phoenix.server.api.types.ExperimentAnnotationSummary import ExperimentAnnotationSummary
|
|
15
|
+
from phoenix.server.api.types.ExperimentRun import ExperimentRun, to_gql_experiment_run
|
|
16
|
+
from phoenix.server.api.types.pagination import (
|
|
17
|
+
ConnectionArgs,
|
|
18
|
+
CursorString,
|
|
19
|
+
connection_from_list,
|
|
20
|
+
)
|
|
21
|
+
from phoenix.server.api.types.Project import Project
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@strawberry.type
|
|
25
|
+
class Experiment(Node):
|
|
26
|
+
_table: ClassVar[type[models.Base]] = models.Experiment
|
|
27
|
+
cached_sequence_number: Private[Optional[int]] = None
|
|
28
|
+
id_attr: NodeID[int]
|
|
29
|
+
name: str
|
|
30
|
+
project_name: Optional[str]
|
|
31
|
+
description: Optional[str]
|
|
32
|
+
metadata: JSON
|
|
33
|
+
created_at: datetime
|
|
34
|
+
updated_at: datetime
|
|
35
|
+
|
|
36
|
+
@strawberry.field(
|
|
37
|
+
description="Sequence number (1-based) of experiments belonging to the same dataset"
|
|
38
|
+
) # type: ignore
|
|
39
|
+
async def sequence_number(
|
|
40
|
+
self,
|
|
41
|
+
info: Info[Context, None],
|
|
42
|
+
) -> int:
|
|
43
|
+
if self.cached_sequence_number is None:
|
|
44
|
+
seq_num = await info.context.data_loaders.experiment_sequence_number.load(self.id_attr)
|
|
45
|
+
if seq_num is None:
|
|
46
|
+
raise ValueError(f"invalid experiment: id={self.id_attr}")
|
|
47
|
+
self.cached_sequence_number = seq_num
|
|
48
|
+
return self.cached_sequence_number
|
|
49
|
+
|
|
50
|
+
@strawberry.field
|
|
51
|
+
async def runs(
|
|
52
|
+
self,
|
|
53
|
+
info: Info[Context, None],
|
|
54
|
+
first: Optional[int] = 50,
|
|
55
|
+
last: Optional[int] = UNSET,
|
|
56
|
+
after: Optional[CursorString] = UNSET,
|
|
57
|
+
before: Optional[CursorString] = UNSET,
|
|
58
|
+
) -> Connection[ExperimentRun]:
|
|
59
|
+
args = ConnectionArgs(
|
|
60
|
+
first=first,
|
|
61
|
+
after=after if isinstance(after, CursorString) else None,
|
|
62
|
+
last=last,
|
|
63
|
+
before=before if isinstance(before, CursorString) else None,
|
|
64
|
+
)
|
|
65
|
+
experiment_id = self.id_attr
|
|
66
|
+
async with info.context.db() as session:
|
|
67
|
+
runs = (
|
|
68
|
+
await session.scalars(
|
|
69
|
+
select(models.ExperimentRun)
|
|
70
|
+
.where(models.ExperimentRun.experiment_id == experiment_id)
|
|
71
|
+
.order_by(models.ExperimentRun.id.desc())
|
|
72
|
+
.options(
|
|
73
|
+
joinedload(models.ExperimentRun.trace).load_only(models.Trace.trace_id)
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
).all()
|
|
77
|
+
return connection_from_list([to_gql_experiment_run(run) for run in runs], args)
|
|
78
|
+
|
|
79
|
+
@strawberry.field
|
|
80
|
+
async def run_count(self, info: Info[Context, None]) -> int:
|
|
81
|
+
experiment_id = self.id_attr
|
|
82
|
+
return await info.context.data_loaders.experiment_run_counts.load(experiment_id)
|
|
83
|
+
|
|
84
|
+
@strawberry.field
|
|
85
|
+
async def annotation_summaries(
|
|
86
|
+
self, info: Info[Context, None]
|
|
87
|
+
) -> list[ExperimentAnnotationSummary]:
|
|
88
|
+
experiment_id = self.id_attr
|
|
89
|
+
return [
|
|
90
|
+
ExperimentAnnotationSummary(
|
|
91
|
+
annotation_name=summary.annotation_name,
|
|
92
|
+
min_score=summary.min_score,
|
|
93
|
+
max_score=summary.max_score,
|
|
94
|
+
mean_score=summary.mean_score,
|
|
95
|
+
count=summary.count,
|
|
96
|
+
error_count=summary.error_count,
|
|
97
|
+
)
|
|
98
|
+
for summary in await info.context.data_loaders.experiment_annotation_summaries.load(
|
|
99
|
+
experiment_id
|
|
100
|
+
)
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
@strawberry.field
|
|
104
|
+
async def error_rate(self, info: Info[Context, None]) -> Optional[float]:
|
|
105
|
+
return await info.context.data_loaders.experiment_error_rates.load(self.id_attr)
|
|
106
|
+
|
|
107
|
+
@strawberry.field
|
|
108
|
+
async def average_run_latency_ms(self, info: Info[Context, None]) -> Optional[float]:
|
|
109
|
+
latency_seconds = await info.context.data_loaders.average_experiment_run_latency.load(
|
|
110
|
+
self.id_attr
|
|
111
|
+
)
|
|
112
|
+
return latency_seconds * 1000 if latency_seconds is not None else None
|
|
113
|
+
|
|
114
|
+
@strawberry.field
|
|
115
|
+
async def project(self, info: Info[Context, None]) -> Optional[Project]:
|
|
116
|
+
if self.project_name is None:
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
db_project = await info.context.data_loaders.project_by_name.load(self.project_name)
|
|
120
|
+
|
|
121
|
+
if db_project is None:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
return Project(
|
|
125
|
+
id_attr=db_project.id,
|
|
126
|
+
name=db_project.name,
|
|
127
|
+
gradient_start_color=db_project.gradient_start_color,
|
|
128
|
+
gradient_end_color=db_project.gradient_end_color,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
@strawberry.field
|
|
132
|
+
def last_updated_at(self, info: Info[Context, None]) -> Optional[datetime]:
|
|
133
|
+
return info.context.last_updated_at.get(self._table, self.id_attr)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def to_gql_experiment(
|
|
137
|
+
experiment: models.Experiment,
|
|
138
|
+
sequence_number: Optional[int] = None,
|
|
139
|
+
) -> Experiment:
|
|
140
|
+
"""
|
|
141
|
+
Converts an ORM experiment to a GraphQL Experiment.
|
|
142
|
+
"""
|
|
143
|
+
return Experiment(
|
|
144
|
+
cached_sequence_number=sequence_number,
|
|
145
|
+
id_attr=experiment.id,
|
|
146
|
+
name=experiment.name,
|
|
147
|
+
project_name=experiment.project_name,
|
|
148
|
+
description=experiment.description,
|
|
149
|
+
metadata=experiment.metadata_,
|
|
150
|
+
created_at=experiment.created_at,
|
|
151
|
+
updated_at=experiment.updated_at,
|
|
152
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import strawberry
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@strawberry.type
|
|
7
|
+
class ExperimentAnnotationSummary:
|
|
8
|
+
annotation_name: str
|
|
9
|
+
min_score: Optional[float]
|
|
10
|
+
max_score: Optional[float]
|
|
11
|
+
mean_score: Optional[float]
|
|
12
|
+
count: int
|
|
13
|
+
error_count: int
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import strawberry
|
|
2
|
+
from strawberry.relay import GlobalID
|
|
3
|
+
|
|
4
|
+
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
5
|
+
from phoenix.server.api.types.ExperimentRun import ExperimentRun
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@strawberry.type
|
|
9
|
+
class RunComparisonItem:
|
|
10
|
+
experiment_id: GlobalID
|
|
11
|
+
runs: list[ExperimentRun]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@strawberry.type
|
|
15
|
+
class ExperimentComparison:
|
|
16
|
+
example: DatasetExample
|
|
17
|
+
run_comparison_items: list[RunComparisonItem]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import TYPE_CHECKING, Annotated, Optional
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
from sqlalchemy.orm import load_only
|
|
7
|
+
from strawberry import UNSET
|
|
8
|
+
from strawberry.relay import Connection, GlobalID, Node, NodeID
|
|
9
|
+
from strawberry.scalars import JSON
|
|
10
|
+
from strawberry.types import Info
|
|
11
|
+
|
|
12
|
+
from phoenix.db import models
|
|
13
|
+
from phoenix.server.api.context import Context
|
|
14
|
+
from phoenix.server.api.types.ExperimentRunAnnotation import (
|
|
15
|
+
ExperimentRunAnnotation,
|
|
16
|
+
to_gql_experiment_run_annotation,
|
|
17
|
+
)
|
|
18
|
+
from phoenix.server.api.types.pagination import (
|
|
19
|
+
ConnectionArgs,
|
|
20
|
+
CursorString,
|
|
21
|
+
connection_from_list,
|
|
22
|
+
)
|
|
23
|
+
from phoenix.server.api.types.Trace import Trace, to_gql_trace
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@strawberry.type
|
|
30
|
+
class ExperimentRun(Node):
|
|
31
|
+
id_attr: NodeID[int]
|
|
32
|
+
experiment_id: GlobalID
|
|
33
|
+
trace_id: Optional[str]
|
|
34
|
+
output: Optional[JSON]
|
|
35
|
+
start_time: datetime
|
|
36
|
+
end_time: datetime
|
|
37
|
+
error: Optional[str]
|
|
38
|
+
|
|
39
|
+
@strawberry.field
|
|
40
|
+
async def annotations(
|
|
41
|
+
self,
|
|
42
|
+
info: Info[Context, None],
|
|
43
|
+
first: Optional[int] = 50,
|
|
44
|
+
last: Optional[int] = UNSET,
|
|
45
|
+
after: Optional[CursorString] = UNSET,
|
|
46
|
+
before: Optional[CursorString] = UNSET,
|
|
47
|
+
) -> Connection[ExperimentRunAnnotation]:
|
|
48
|
+
args = ConnectionArgs(
|
|
49
|
+
first=first,
|
|
50
|
+
after=after if isinstance(after, CursorString) else None,
|
|
51
|
+
last=last,
|
|
52
|
+
before=before if isinstance(before, CursorString) else None,
|
|
53
|
+
)
|
|
54
|
+
run_id = self.id_attr
|
|
55
|
+
annotations = await info.context.data_loaders.experiment_run_annotations.load(run_id)
|
|
56
|
+
return connection_from_list(
|
|
57
|
+
[to_gql_experiment_run_annotation(annotation) for annotation in annotations], args
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
@strawberry.field
|
|
61
|
+
async def trace(self, info: Info) -> Optional[Trace]:
|
|
62
|
+
if not self.trace_id:
|
|
63
|
+
return None
|
|
64
|
+
dataloader = info.context.data_loaders.trace_by_trace_ids
|
|
65
|
+
if (trace := await dataloader.load(self.trace_id)) is None:
|
|
66
|
+
return None
|
|
67
|
+
return to_gql_trace(trace)
|
|
68
|
+
|
|
69
|
+
@strawberry.field
|
|
70
|
+
async def example(
|
|
71
|
+
self, info: Info
|
|
72
|
+
) -> Annotated[
|
|
73
|
+
"DatasetExample", strawberry.lazy("phoenix.server.api.types.DatasetExample")
|
|
74
|
+
]: # use lazy types to avoid circular import: https://strawberry.rocks/docs/types/lazy
|
|
75
|
+
from phoenix.server.api.types.DatasetExample import DatasetExample
|
|
76
|
+
|
|
77
|
+
async with info.context.db() as session:
|
|
78
|
+
assert (
|
|
79
|
+
result := await session.execute(
|
|
80
|
+
select(models.DatasetExample, models.Experiment.dataset_version_id)
|
|
81
|
+
.select_from(models.ExperimentRun)
|
|
82
|
+
.join(
|
|
83
|
+
models.DatasetExample,
|
|
84
|
+
models.DatasetExample.id == models.ExperimentRun.dataset_example_id,
|
|
85
|
+
)
|
|
86
|
+
.join(
|
|
87
|
+
models.Experiment,
|
|
88
|
+
models.Experiment.id == models.ExperimentRun.experiment_id,
|
|
89
|
+
)
|
|
90
|
+
.where(models.ExperimentRun.id == self.id_attr)
|
|
91
|
+
.options(load_only(models.DatasetExample.id, models.DatasetExample.created_at))
|
|
92
|
+
)
|
|
93
|
+
) is not None
|
|
94
|
+
example, version_id = result.first()
|
|
95
|
+
return DatasetExample(
|
|
96
|
+
id_attr=example.id,
|
|
97
|
+
created_at=example.created_at,
|
|
98
|
+
version_id=version_id,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def to_gql_experiment_run(run: models.ExperimentRun) -> ExperimentRun:
|
|
103
|
+
"""
|
|
104
|
+
Converts an ORM experiment run to a GraphQL ExperimentRun.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
from phoenix.server.api.types.Experiment import Experiment
|
|
108
|
+
|
|
109
|
+
return ExperimentRun(
|
|
110
|
+
id_attr=run.id,
|
|
111
|
+
experiment_id=GlobalID(Experiment.__name__, str(run.experiment_id)),
|
|
112
|
+
trace_id=trace_id
|
|
113
|
+
if (trace := run.trace) and (trace_id := trace.trace_id) is not None
|
|
114
|
+
else None,
|
|
115
|
+
output=run.output.get("task_output"),
|
|
116
|
+
start_time=run.start_time,
|
|
117
|
+
end_time=run.end_time,
|
|
118
|
+
error=run.error,
|
|
119
|
+
)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import strawberry
|
|
5
|
+
from strawberry import Info
|
|
6
|
+
from strawberry.relay import Node, NodeID
|
|
7
|
+
from strawberry.scalars import JSON
|
|
8
|
+
|
|
9
|
+
from phoenix.db import models
|
|
10
|
+
from phoenix.server.api.types.AnnotatorKind import ExperimentRunAnnotatorKind
|
|
11
|
+
from phoenix.server.api.types.Trace import Trace, to_gql_trace
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@strawberry.type
|
|
15
|
+
class ExperimentRunAnnotation(Node):
|
|
16
|
+
id_attr: NodeID[int]
|
|
17
|
+
name: str
|
|
18
|
+
annotator_kind: ExperimentRunAnnotatorKind
|
|
19
|
+
label: Optional[str]
|
|
20
|
+
score: Optional[float]
|
|
21
|
+
explanation: Optional[str]
|
|
22
|
+
error: Optional[str]
|
|
23
|
+
metadata: JSON
|
|
24
|
+
start_time: datetime
|
|
25
|
+
end_time: datetime
|
|
26
|
+
trace_id: Optional[str]
|
|
27
|
+
|
|
28
|
+
@strawberry.field
|
|
29
|
+
async def trace(self, info: Info) -> Optional[Trace]:
|
|
30
|
+
if not self.trace_id:
|
|
31
|
+
return None
|
|
32
|
+
dataloader = info.context.data_loaders.trace_by_trace_ids
|
|
33
|
+
if (trace := await dataloader.load(self.trace_id)) is None:
|
|
34
|
+
return None
|
|
35
|
+
return to_gql_trace(trace)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def to_gql_experiment_run_annotation(
|
|
39
|
+
annotation: models.ExperimentRunAnnotation,
|
|
40
|
+
) -> ExperimentRunAnnotation:
|
|
41
|
+
"""
|
|
42
|
+
Converts an ORM experiment run annotation to a GraphQL ExperimentRunAnnotation.
|
|
43
|
+
"""
|
|
44
|
+
return ExperimentRunAnnotation(
|
|
45
|
+
id_attr=annotation.id,
|
|
46
|
+
name=annotation.name,
|
|
47
|
+
annotator_kind=ExperimentRunAnnotatorKind(annotation.annotator_kind),
|
|
48
|
+
label=annotation.label,
|
|
49
|
+
score=annotation.score,
|
|
50
|
+
explanation=annotation.explanation,
|
|
51
|
+
error=annotation.error,
|
|
52
|
+
metadata=annotation.metadata_,
|
|
53
|
+
start_time=annotation.start_time,
|
|
54
|
+
end_time=annotation.end_time,
|
|
55
|
+
trace_id=annotation.trace_id,
|
|
56
|
+
)
|