arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
- arize_phoenix-12.28.1.dist-info/RECORD +499 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
- phoenix/__generated__/__init__.py +0 -0
- phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
- phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
- phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
- phoenix/__init__.py +5 -4
- phoenix/auth.py +39 -2
- phoenix/config.py +1763 -91
- phoenix/datetime_utils.py +120 -2
- phoenix/db/README.md +595 -25
- phoenix/db/bulk_inserter.py +145 -103
- phoenix/db/engines.py +140 -33
- phoenix/db/enums.py +3 -12
- phoenix/db/facilitator.py +302 -35
- phoenix/db/helpers.py +1000 -65
- phoenix/db/iam_auth.py +64 -0
- phoenix/db/insertion/dataset.py +135 -2
- phoenix/db/insertion/document_annotation.py +9 -6
- phoenix/db/insertion/evaluation.py +2 -3
- phoenix/db/insertion/helpers.py +17 -2
- phoenix/db/insertion/session_annotation.py +176 -0
- phoenix/db/insertion/span.py +15 -11
- phoenix/db/insertion/span_annotation.py +3 -4
- phoenix/db/insertion/trace_annotation.py +3 -4
- phoenix/db/insertion/types.py +50 -20
- phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
- phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
- phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
- phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
- phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
- phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
- phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
- phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
- phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
- phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
- phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
- phoenix/db/models.py +669 -56
- phoenix/db/pg_config.py +10 -0
- phoenix/db/types/model_provider.py +4 -0
- phoenix/db/types/token_price_customization.py +29 -0
- phoenix/db/types/trace_retention.py +23 -15
- phoenix/experiments/evaluators/utils.py +3 -3
- phoenix/experiments/functions.py +160 -52
- phoenix/experiments/tracing.py +2 -2
- phoenix/experiments/types.py +1 -1
- phoenix/inferences/inferences.py +1 -2
- phoenix/server/api/auth.py +38 -7
- phoenix/server/api/auth_messages.py +46 -0
- phoenix/server/api/context.py +100 -4
- phoenix/server/api/dataloaders/__init__.py +79 -5
- phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
- phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
- phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
- phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
- phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
- phoenix/server/api/dataloaders/dataset_labels.py +36 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
- phoenix/server/api/dataloaders/document_evaluations.py +6 -9
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
- phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
- phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
- phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
- phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
- phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
- phoenix/server/api/dataloaders/record_counts.py +37 -10
- phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
- phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
- phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
- phoenix/server/api/dataloaders/span_costs.py +29 -0
- phoenix/server/api/dataloaders/table_fields.py +2 -2
- phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
- phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
- phoenix/server/api/dataloaders/types.py +29 -0
- phoenix/server/api/exceptions.py +11 -1
- phoenix/server/api/helpers/dataset_helpers.py +5 -1
- phoenix/server/api/helpers/playground_clients.py +1243 -292
- phoenix/server/api/helpers/playground_registry.py +2 -2
- phoenix/server/api/helpers/playground_spans.py +8 -4
- phoenix/server/api/helpers/playground_users.py +26 -0
- phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
- phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
- phoenix/server/api/helpers/prompts/models.py +205 -22
- phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
- phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
- phoenix/server/api/input_types/CreateProjectInput.py +27 -0
- phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
- phoenix/server/api/input_types/DatasetFilter.py +17 -0
- phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
- phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
- phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
- phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
- phoenix/server/api/input_types/PromptFilter.py +14 -0
- phoenix/server/api/input_types/PromptVersionInput.py +52 -1
- phoenix/server/api/input_types/SpanSort.py +44 -7
- phoenix/server/api/input_types/TimeBinConfig.py +23 -0
- phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
- phoenix/server/api/input_types/UserRoleInput.py +1 -0
- phoenix/server/api/mutations/__init__.py +10 -0
- phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
- phoenix/server/api/mutations/api_key_mutations.py +19 -23
- phoenix/server/api/mutations/chat_mutations.py +154 -47
- phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
- phoenix/server/api/mutations/dataset_mutations.py +21 -16
- phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
- phoenix/server/api/mutations/experiment_mutations.py +2 -2
- phoenix/server/api/mutations/export_events_mutations.py +3 -3
- phoenix/server/api/mutations/model_mutations.py +210 -0
- phoenix/server/api/mutations/project_mutations.py +49 -10
- phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
- phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
- phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
- phoenix/server/api/mutations/prompt_mutations.py +65 -129
- phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
- phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
- phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
- phoenix/server/api/mutations/trace_mutations.py +47 -3
- phoenix/server/api/mutations/user_mutations.py +66 -41
- phoenix/server/api/queries.py +768 -293
- phoenix/server/api/routers/__init__.py +2 -2
- phoenix/server/api/routers/auth.py +154 -88
- phoenix/server/api/routers/ldap.py +229 -0
- phoenix/server/api/routers/oauth2.py +369 -106
- phoenix/server/api/routers/v1/__init__.py +24 -4
- phoenix/server/api/routers/v1/annotation_configs.py +23 -31
- phoenix/server/api/routers/v1/annotations.py +481 -17
- phoenix/server/api/routers/v1/datasets.py +395 -81
- phoenix/server/api/routers/v1/documents.py +142 -0
- phoenix/server/api/routers/v1/evaluations.py +24 -31
- phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
- phoenix/server/api/routers/v1/experiment_runs.py +337 -59
- phoenix/server/api/routers/v1/experiments.py +479 -48
- phoenix/server/api/routers/v1/models.py +7 -0
- phoenix/server/api/routers/v1/projects.py +18 -49
- phoenix/server/api/routers/v1/prompts.py +54 -40
- phoenix/server/api/routers/v1/sessions.py +108 -0
- phoenix/server/api/routers/v1/spans.py +1091 -81
- phoenix/server/api/routers/v1/traces.py +132 -78
- phoenix/server/api/routers/v1/users.py +389 -0
- phoenix/server/api/routers/v1/utils.py +3 -7
- phoenix/server/api/subscriptions.py +305 -88
- phoenix/server/api/types/Annotation.py +90 -23
- phoenix/server/api/types/ApiKey.py +13 -17
- phoenix/server/api/types/AuthMethod.py +1 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
- phoenix/server/api/types/CostBreakdown.py +12 -0
- phoenix/server/api/types/Dataset.py +226 -72
- phoenix/server/api/types/DatasetExample.py +88 -18
- phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
- phoenix/server/api/types/DatasetLabel.py +57 -0
- phoenix/server/api/types/DatasetSplit.py +98 -0
- phoenix/server/api/types/DatasetVersion.py +49 -4
- phoenix/server/api/types/DocumentAnnotation.py +212 -0
- phoenix/server/api/types/Experiment.py +264 -59
- phoenix/server/api/types/ExperimentComparison.py +5 -10
- phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
- phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
- phoenix/server/api/types/ExperimentRun.py +169 -65
- phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
- phoenix/server/api/types/GenerativeModel.py +245 -3
- phoenix/server/api/types/GenerativeProvider.py +70 -11
- phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
- phoenix/server/api/types/ModelInterface.py +16 -0
- phoenix/server/api/types/PlaygroundModel.py +20 -0
- phoenix/server/api/types/Project.py +1278 -216
- phoenix/server/api/types/ProjectSession.py +188 -28
- phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
- phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
- phoenix/server/api/types/Prompt.py +119 -39
- phoenix/server/api/types/PromptLabel.py +42 -25
- phoenix/server/api/types/PromptVersion.py +11 -8
- phoenix/server/api/types/PromptVersionTag.py +65 -25
- phoenix/server/api/types/ServerStatus.py +6 -0
- phoenix/server/api/types/Span.py +167 -123
- phoenix/server/api/types/SpanAnnotation.py +189 -42
- phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
- phoenix/server/api/types/SpanCostSummary.py +10 -0
- phoenix/server/api/types/SystemApiKey.py +65 -1
- phoenix/server/api/types/TokenPrice.py +16 -0
- phoenix/server/api/types/TokenUsage.py +3 -3
- phoenix/server/api/types/Trace.py +223 -51
- phoenix/server/api/types/TraceAnnotation.py +149 -50
- phoenix/server/api/types/User.py +137 -32
- phoenix/server/api/types/UserApiKey.py +73 -26
- phoenix/server/api/types/node.py +10 -0
- phoenix/server/api/types/pagination.py +11 -2
- phoenix/server/app.py +290 -45
- phoenix/server/authorization.py +38 -3
- phoenix/server/bearer_auth.py +34 -24
- phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
- phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
- phoenix/server/cost_tracking/helpers.py +68 -0
- phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
- phoenix/server/cost_tracking/regex_specificity.py +397 -0
- phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
- phoenix/server/daemons/__init__.py +0 -0
- phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
- phoenix/server/daemons/generative_model_store.py +103 -0
- phoenix/server/daemons/span_cost_calculator.py +99 -0
- phoenix/server/dml_event.py +17 -0
- phoenix/server/dml_event_handler.py +5 -0
- phoenix/server/email/sender.py +56 -3
- phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/experiments/__init__.py +0 -0
- phoenix/server/experiments/utils.py +14 -0
- phoenix/server/grpc_server.py +11 -11
- phoenix/server/jwt_store.py +17 -15
- phoenix/server/ldap.py +1449 -0
- phoenix/server/main.py +26 -10
- phoenix/server/oauth2.py +330 -12
- phoenix/server/prometheus.py +66 -6
- phoenix/server/rate_limiters.py +4 -9
- phoenix/server/retention.py +33 -20
- phoenix/server/session_filters.py +49 -0
- phoenix/server/static/.vite/manifest.json +55 -51
- phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
- phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
- phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
- phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
- phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
- phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
- phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
- phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
- phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
- phoenix/server/templates/index.html +40 -6
- phoenix/server/thread_server.py +1 -2
- phoenix/server/types.py +14 -4
- phoenix/server/utils.py +74 -0
- phoenix/session/client.py +56 -3
- phoenix/session/data_extractor.py +5 -0
- phoenix/session/evaluation.py +14 -5
- phoenix/session/session.py +45 -9
- phoenix/settings.py +5 -0
- phoenix/trace/attributes.py +80 -13
- phoenix/trace/dsl/helpers.py +90 -1
- phoenix/trace/dsl/query.py +8 -6
- phoenix/trace/projects.py +5 -0
- phoenix/utilities/template_formatters.py +1 -1
- phoenix/version.py +1 -1
- arize_phoenix-10.0.4.dist-info/RECORD +0 -405
- phoenix/server/api/types/Evaluation.py +0 -39
- phoenix/server/cost_tracking/cost_lookup.py +0 -255
- phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
- phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
- phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
- phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
- phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
- phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
- phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
- phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
- phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
- phoenix/utilities/deprecation.py +0 -31
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
phoenix/db/pg_config.py
CHANGED
|
@@ -10,12 +10,14 @@ from typing_extensions import assert_never
|
|
|
10
10
|
def get_pg_config(
|
|
11
11
|
url: URL,
|
|
12
12
|
driver: Literal["psycopg", "asyncpg"],
|
|
13
|
+
enforce_ssl: bool = False,
|
|
13
14
|
) -> tuple[URL, dict[str, Any]]:
|
|
14
15
|
"""Convert SQLAlchemy URL to driver-specific configuration.
|
|
15
16
|
|
|
16
17
|
Args:
|
|
17
18
|
url: SQLAlchemy URL
|
|
18
19
|
driver: "psycopg" or "asyncpg"
|
|
20
|
+
enforce_ssl: If True, ensure SSL is enabled (required for AWS RDS IAM auth)
|
|
19
21
|
|
|
20
22
|
Returns:
|
|
21
23
|
Tuple of (base_url, connect_args):
|
|
@@ -26,6 +28,14 @@ def get_pg_config(
|
|
|
26
28
|
query = url.query
|
|
27
29
|
ssl_args = _get_ssl_args(query)
|
|
28
30
|
|
|
31
|
+
if enforce_ssl and not ssl_args:
|
|
32
|
+
ssl_args = {"sslmode": "require"}
|
|
33
|
+
elif enforce_ssl and ssl_args.get("sslmode") == "disable":
|
|
34
|
+
raise ValueError(
|
|
35
|
+
"SSL cannot be disabled when using AWS RDS IAM authentication. "
|
|
36
|
+
"Remove 'sslmode=disable' from the connection string."
|
|
37
|
+
)
|
|
38
|
+
|
|
29
39
|
# Create base URL without SSL parameters
|
|
30
40
|
base_url = url.set(
|
|
31
41
|
drivername=f"postgresql+{driver}",
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from typing import Any, Literal, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, ValidationError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TokenPriceCustomization(BaseModel, ABC):
|
|
8
|
+
model_config = {"extra": "allow"}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ThresholdBasedTokenPriceCustomization(TokenPriceCustomization):
|
|
12
|
+
type: Literal["threshold_based"] = "threshold_based"
|
|
13
|
+
key: str
|
|
14
|
+
threshold: float
|
|
15
|
+
new_rate: float
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TokenPriceCustomizationParser:
|
|
19
|
+
"""Intended to be forward-compatible while maintaining the ability to round-trip."""
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def parse(data: Optional[dict[str, Any]]) -> Optional[TokenPriceCustomization]:
|
|
23
|
+
if not data:
|
|
24
|
+
return None
|
|
25
|
+
try:
|
|
26
|
+
return ThresholdBasedTokenPriceCustomization.model_validate(data)
|
|
27
|
+
except ValidationError:
|
|
28
|
+
pass
|
|
29
|
+
return TokenPriceCustomization.model_validate(data)
|
|
@@ -5,7 +5,9 @@ from typing import Annotated, Iterable, Literal, Optional, Union
|
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sa
|
|
7
7
|
from pydantic import AfterValidator, BaseModel, Field, RootModel
|
|
8
|
+
from sqlalchemy import func
|
|
8
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
+
from sqlalchemy.sql.roles import InElementRole
|
|
9
11
|
|
|
10
12
|
from phoenix.utilities import hour_of_week
|
|
11
13
|
|
|
@@ -25,19 +27,25 @@ class _MaxDays(BaseModel):
|
|
|
25
27
|
class _MaxCount(BaseModel):
|
|
26
28
|
max_count: Annotated[int, Field(ge=0)]
|
|
27
29
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
def max_count_filter(
|
|
31
|
+
self,
|
|
32
|
+
project_rowids: Union[Iterable[int], InElementRole],
|
|
33
|
+
) -> sa.ColumnElement[bool]:
|
|
30
34
|
if self.max_count <= 0:
|
|
31
35
|
return sa.literal(False)
|
|
32
36
|
from phoenix.db.models import Trace
|
|
33
37
|
|
|
34
|
-
|
|
35
|
-
sa.select(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
ranked = (
|
|
39
|
+
sa.select(
|
|
40
|
+
Trace.id,
|
|
41
|
+
func.row_number()
|
|
42
|
+
.over(partition_by=Trace.project_rowid, order_by=Trace.start_time.desc())
|
|
43
|
+
.label("rn"),
|
|
44
|
+
)
|
|
45
|
+
.where(Trace.project_rowid.in_(project_rowids))
|
|
46
|
+
.cte("ranked")
|
|
40
47
|
)
|
|
48
|
+
return Trace.id.in_(sa.select(ranked.c.id).where(ranked.c.rn > self.max_count))
|
|
41
49
|
|
|
42
50
|
|
|
43
51
|
class MaxDaysRule(_MaxDays, BaseModel):
|
|
@@ -49,7 +57,7 @@ class MaxDaysRule(_MaxDays, BaseModel):
|
|
|
49
57
|
async def delete_traces(
|
|
50
58
|
self,
|
|
51
59
|
session: AsyncSession,
|
|
52
|
-
project_rowids: Union[Iterable[int],
|
|
60
|
+
project_rowids: Union[Iterable[int], InElementRole],
|
|
53
61
|
) -> set[int]:
|
|
54
62
|
if self.max_days <= 0:
|
|
55
63
|
return set()
|
|
@@ -73,7 +81,7 @@ class MaxCountRule(_MaxCount, BaseModel):
|
|
|
73
81
|
async def delete_traces(
|
|
74
82
|
self,
|
|
75
83
|
session: AsyncSession,
|
|
76
|
-
project_rowids: Union[Iterable[int],
|
|
84
|
+
project_rowids: Union[Iterable[int], InElementRole],
|
|
77
85
|
) -> set[int]:
|
|
78
86
|
if self.max_count <= 0:
|
|
79
87
|
return set()
|
|
@@ -82,7 +90,7 @@ class MaxCountRule(_MaxCount, BaseModel):
|
|
|
82
90
|
stmt = (
|
|
83
91
|
sa.delete(Trace)
|
|
84
92
|
.where(Trace.project_rowid.in_(project_rowids))
|
|
85
|
-
.where(self.max_count_filter)
|
|
93
|
+
.where(self.max_count_filter(project_rowids))
|
|
86
94
|
.returning(Trace.project_rowid)
|
|
87
95
|
)
|
|
88
96
|
return set(await session.scalars(stmt))
|
|
@@ -97,7 +105,7 @@ class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
|
|
|
97
105
|
async def delete_traces(
|
|
98
106
|
self,
|
|
99
107
|
session: AsyncSession,
|
|
100
|
-
project_rowids: Union[Iterable[int],
|
|
108
|
+
project_rowids: Union[Iterable[int], InElementRole],
|
|
101
109
|
) -> set[int]:
|
|
102
110
|
if self.max_days <= 0 and self.max_count <= 0:
|
|
103
111
|
return set()
|
|
@@ -106,7 +114,7 @@ class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
|
|
|
106
114
|
stmt = (
|
|
107
115
|
sa.delete(Trace)
|
|
108
116
|
.where(Trace.project_rowid.in_(project_rowids))
|
|
109
|
-
.where(sa.or_(self.max_days_filter, self.max_count_filter))
|
|
117
|
+
.where(sa.or_(self.max_days_filter, self.max_count_filter(project_rowids)))
|
|
110
118
|
.returning(Trace.project_rowid)
|
|
111
119
|
)
|
|
112
120
|
return set(await session.scalars(stmt))
|
|
@@ -123,7 +131,7 @@ class TraceRetentionRule(RootModel[Union[MaxDaysRule, MaxCountRule, MaxDaysOrCou
|
|
|
123
131
|
async def delete_traces(
|
|
124
132
|
self,
|
|
125
133
|
session: AsyncSession,
|
|
126
|
-
project_rowids: Union[Iterable[int],
|
|
134
|
+
project_rowids: Union[Iterable[int], InElementRole],
|
|
127
135
|
) -> set[int]:
|
|
128
136
|
return await self.root.delete_traces(session, project_rowids)
|
|
129
137
|
|
|
@@ -192,7 +200,7 @@ class TraceRetentionCronExpression(RootModel[str]):
|
|
|
192
200
|
|
|
193
201
|
def _parse_field(field: str, min_val: int, max_val: int) -> set[int]:
|
|
194
202
|
"""
|
|
195
|
-
|
|
203
|
+
Parses a cron field and returns the set of matching values.
|
|
196
204
|
|
|
197
205
|
Args:
|
|
198
206
|
field (str): The cron field to parse
|
|
@@ -19,9 +19,9 @@ def unwrap_json(obj: JSONSerializable) -> JSONSerializable:
|
|
|
19
19
|
if len(obj) == 1:
|
|
20
20
|
key = next(iter(obj.keys()))
|
|
21
21
|
output = obj[key]
|
|
22
|
-
assert isinstance(
|
|
23
|
-
|
|
24
|
-
)
|
|
22
|
+
assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
|
|
23
|
+
"Output must be JSON serializable"
|
|
24
|
+
)
|
|
25
25
|
return output
|
|
26
26
|
return obj
|
|
27
27
|
|
phoenix/experiments/functions.py
CHANGED
|
@@ -10,7 +10,7 @@ from copy import deepcopy
|
|
|
10
10
|
from dataclasses import replace
|
|
11
11
|
from datetime import datetime, timezone
|
|
12
12
|
from itertools import product
|
|
13
|
-
from typing import Any, Literal, Optional, Union, cast
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
|
|
14
14
|
from urllib.parse import urljoin
|
|
15
15
|
|
|
16
16
|
import httpx
|
|
@@ -65,6 +65,41 @@ from phoenix.trace.attributes import flatten
|
|
|
65
65
|
from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
|
|
66
66
|
from phoenix.utilities.json import jsonify
|
|
67
67
|
|
|
68
|
+
if TYPE_CHECKING:
|
|
69
|
+
from phoenix.client.resources.datasets import Dataset as ClientDataset
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _convert_client_dataset(new_dataset: "ClientDataset") -> Dataset:
|
|
73
|
+
"""
|
|
74
|
+
Converts Dataset objects from `phoenix.client` to Dataset objects compatible with experiments.
|
|
75
|
+
"""
|
|
76
|
+
examples_dict: dict[str, Example] = {}
|
|
77
|
+
for example_data in new_dataset.examples:
|
|
78
|
+
legacy_example = Example(
|
|
79
|
+
id=example_data["id"],
|
|
80
|
+
input=example_data["input"],
|
|
81
|
+
output=example_data["output"],
|
|
82
|
+
metadata=example_data["metadata"],
|
|
83
|
+
updated_at=datetime.fromisoformat(example_data["updated_at"]),
|
|
84
|
+
)
|
|
85
|
+
examples_dict[legacy_example.id] = legacy_example
|
|
86
|
+
|
|
87
|
+
return Dataset(
|
|
88
|
+
id=new_dataset.id,
|
|
89
|
+
version_id=new_dataset.version_id,
|
|
90
|
+
examples=examples_dict,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _is_new_client_dataset(dataset: Any) -> bool:
|
|
95
|
+
"""Check if dataset is from new client (has list examples)."""
|
|
96
|
+
try:
|
|
97
|
+
from phoenix.client.resources.datasets import Dataset as _ClientDataset
|
|
98
|
+
|
|
99
|
+
return isinstance(dataset, _ClientDataset)
|
|
100
|
+
except ImportError:
|
|
101
|
+
return False
|
|
102
|
+
|
|
68
103
|
|
|
69
104
|
def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
|
|
70
105
|
return VersionedClient(
|
|
@@ -74,6 +109,64 @@ def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
|
|
|
74
109
|
)
|
|
75
110
|
|
|
76
111
|
|
|
112
|
+
def _get_all_experiment_runs(
|
|
113
|
+
client: httpx.Client,
|
|
114
|
+
experiment_id: str,
|
|
115
|
+
page_size: int = 50,
|
|
116
|
+
) -> list[ExperimentRun]:
|
|
117
|
+
"""
|
|
118
|
+
Fetch all experiment runs using pagination to handle large datasets.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
client: The HTTP client to use for requests.
|
|
122
|
+
experiment_id: The ID of the experiment.
|
|
123
|
+
page_size: Number of runs to fetch per page. Defaults to 50.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of all experiment runs as ExperimentRun objects.
|
|
127
|
+
"""
|
|
128
|
+
all_runs: list[dict[str, Any]] = []
|
|
129
|
+
cursor = None
|
|
130
|
+
|
|
131
|
+
while True:
|
|
132
|
+
params: dict[str, Any] = {"limit": page_size}
|
|
133
|
+
if cursor:
|
|
134
|
+
params["cursor"] = cursor
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
response = client.get(
|
|
138
|
+
f"v1/experiments/{experiment_id}/runs",
|
|
139
|
+
params=params,
|
|
140
|
+
)
|
|
141
|
+
response.raise_for_status()
|
|
142
|
+
data = response.json()
|
|
143
|
+
|
|
144
|
+
runs = data["data"]
|
|
145
|
+
all_runs.extend(runs)
|
|
146
|
+
|
|
147
|
+
# Check if there are more pages
|
|
148
|
+
cursor = data.get("next_cursor")
|
|
149
|
+
if not cursor:
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
except HTTPStatusError as e:
|
|
153
|
+
if e.response.status_code == 404:
|
|
154
|
+
# Experiment doesn't exist - treat as empty result
|
|
155
|
+
break
|
|
156
|
+
else:
|
|
157
|
+
raise
|
|
158
|
+
|
|
159
|
+
# Convert dicts to ExperimentRun objects
|
|
160
|
+
experiment_runs: list[ExperimentRun] = []
|
|
161
|
+
for run in all_runs:
|
|
162
|
+
# Parse datetime strings
|
|
163
|
+
run["start_time"] = datetime.fromisoformat(run["start_time"])
|
|
164
|
+
run["end_time"] = datetime.fromisoformat(run["end_time"])
|
|
165
|
+
experiment_runs.append(ExperimentRun.from_dict(run))
|
|
166
|
+
|
|
167
|
+
return experiment_runs
|
|
168
|
+
|
|
169
|
+
|
|
77
170
|
Evaluators: TypeAlias = Union[
|
|
78
171
|
ExperimentEvaluator,
|
|
79
172
|
Sequence[ExperimentEvaluator],
|
|
@@ -85,7 +178,7 @@ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseExcept
|
|
|
85
178
|
|
|
86
179
|
|
|
87
180
|
def run_experiment(
|
|
88
|
-
dataset: Dataset,
|
|
181
|
+
dataset: Union[Dataset, Any], # Accept both legacy and new client datasets
|
|
89
182
|
task: ExperimentTask,
|
|
90
183
|
evaluators: Optional[Evaluators] = None,
|
|
91
184
|
*,
|
|
@@ -166,11 +259,20 @@ def run_experiment(
|
|
|
166
259
|
RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
|
|
167
260
|
added to the experiment using the `evaluate_experiment` function.
|
|
168
261
|
"""
|
|
262
|
+
# Auto-convert client Dataset objects to legacy format
|
|
263
|
+
normalized_dataset: Dataset
|
|
264
|
+
if _is_new_client_dataset(dataset):
|
|
265
|
+
normalized_dataset = _convert_client_dataset(cast("ClientDataset", dataset))
|
|
266
|
+
else:
|
|
267
|
+
normalized_dataset = dataset
|
|
268
|
+
|
|
169
269
|
task_signature = inspect.signature(task)
|
|
170
270
|
_validate_task_signature(task_signature)
|
|
171
271
|
|
|
172
|
-
if not
|
|
173
|
-
raise ValueError(
|
|
272
|
+
if not normalized_dataset.examples:
|
|
273
|
+
raise ValueError(
|
|
274
|
+
f"Dataset has no examples: {normalized_dataset.id=}, {normalized_dataset.version_id=}"
|
|
275
|
+
)
|
|
174
276
|
# Add this to the params once supported in the UI
|
|
175
277
|
repetitions = 1
|
|
176
278
|
assert repetitions > 0, "Must run the experiment at least once."
|
|
@@ -179,7 +281,7 @@ def run_experiment(
|
|
|
179
281
|
sync_client, async_client = _phoenix_clients()
|
|
180
282
|
|
|
181
283
|
payload = {
|
|
182
|
-
"version_id":
|
|
284
|
+
"version_id": normalized_dataset.version_id,
|
|
183
285
|
"name": experiment_name,
|
|
184
286
|
"description": experiment_description,
|
|
185
287
|
"metadata": experiment_metadata,
|
|
@@ -187,23 +289,23 @@ def run_experiment(
|
|
|
187
289
|
}
|
|
188
290
|
if not dry_run:
|
|
189
291
|
experiment_response = sync_client.post(
|
|
190
|
-
f"
|
|
292
|
+
f"v1/datasets/{normalized_dataset.id}/experiments",
|
|
191
293
|
json=payload,
|
|
192
294
|
)
|
|
193
295
|
experiment_response.raise_for_status()
|
|
194
296
|
exp_json = experiment_response.json()["data"]
|
|
195
297
|
project_name = exp_json["project_name"]
|
|
196
298
|
experiment = Experiment(
|
|
197
|
-
dataset_id=
|
|
198
|
-
dataset_version_id=
|
|
299
|
+
dataset_id=normalized_dataset.id,
|
|
300
|
+
dataset_version_id=normalized_dataset.version_id,
|
|
199
301
|
repetitions=repetitions,
|
|
200
302
|
id=exp_json["id"],
|
|
201
303
|
project_name=project_name,
|
|
202
304
|
)
|
|
203
305
|
else:
|
|
204
306
|
experiment = Experiment(
|
|
205
|
-
dataset_id=
|
|
206
|
-
dataset_version_id=
|
|
307
|
+
dataset_id=normalized_dataset.id,
|
|
308
|
+
dataset_version_id=normalized_dataset.version_id,
|
|
207
309
|
repetitions=repetitions,
|
|
208
310
|
id=DRY_RUN,
|
|
209
311
|
project_name="",
|
|
@@ -216,18 +318,18 @@ def run_experiment(
|
|
|
216
318
|
print("🧪 Experiment started.")
|
|
217
319
|
if dry_run:
|
|
218
320
|
examples = {
|
|
219
|
-
(ex :=
|
|
220
|
-
for i in pd.Series(range(len(
|
|
221
|
-
.sample(min(len(
|
|
321
|
+
(ex := normalized_dataset[i]).id: ex
|
|
322
|
+
for i in pd.Series(range(len(normalized_dataset)))
|
|
323
|
+
.sample(min(len(normalized_dataset), int(dry_run)), random_state=42)
|
|
222
324
|
.sort_values()
|
|
223
325
|
}
|
|
224
326
|
id_selection = "\n".join(examples)
|
|
225
327
|
print(f"🌵️ This is a dry-run for these example IDs:\n{id_selection}")
|
|
226
|
-
|
|
328
|
+
normalized_dataset = replace(normalized_dataset, examples=examples)
|
|
227
329
|
else:
|
|
228
|
-
dataset_experiments_url = get_dataset_experiments_url(dataset_id=
|
|
330
|
+
dataset_experiments_url = get_dataset_experiments_url(dataset_id=normalized_dataset.id)
|
|
229
331
|
experiment_compare_url = get_experiment_url(
|
|
230
|
-
dataset_id=
|
|
332
|
+
dataset_id=normalized_dataset.id,
|
|
231
333
|
experiment_id=experiment.id,
|
|
232
334
|
)
|
|
233
335
|
print(f"📺 View dataset experiments: {dataset_experiments_url}")
|
|
@@ -259,7 +361,7 @@ def run_experiment(
|
|
|
259
361
|
try:
|
|
260
362
|
# Try to create the run directly
|
|
261
363
|
resp = sync_client.post(
|
|
262
|
-
f"
|
|
364
|
+
f"v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
|
|
263
365
|
)
|
|
264
366
|
resp.raise_for_status()
|
|
265
367
|
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
@@ -274,8 +376,11 @@ def run_experiment(
|
|
|
274
376
|
error: Optional[BaseException] = None
|
|
275
377
|
status = Status(StatusCode.OK)
|
|
276
378
|
with ExitStack() as stack:
|
|
277
|
-
span
|
|
278
|
-
|
|
379
|
+
span = cast(
|
|
380
|
+
Span,
|
|
381
|
+
stack.enter_context(
|
|
382
|
+
tracer.start_as_current_span(root_span_name, context=Context())
|
|
383
|
+
),
|
|
279
384
|
)
|
|
280
385
|
stack.enter_context(capture_spans(resource))
|
|
281
386
|
try:
|
|
@@ -316,9 +421,9 @@ def run_experiment(
|
|
|
316
421
|
span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND, root_span_kind)
|
|
317
422
|
span.set_status(status)
|
|
318
423
|
|
|
319
|
-
assert isinstance(
|
|
320
|
-
|
|
321
|
-
)
|
|
424
|
+
assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
|
|
425
|
+
"Output must be JSON serializable"
|
|
426
|
+
)
|
|
322
427
|
|
|
323
428
|
exp_run = ExperimentRun(
|
|
324
429
|
start_time=_decode_unix_nano(cast(int, span.start_time)),
|
|
@@ -334,7 +439,7 @@ def run_experiment(
|
|
|
334
439
|
try:
|
|
335
440
|
# Try to create the run directly
|
|
336
441
|
resp = sync_client.post(
|
|
337
|
-
f"
|
|
442
|
+
f"v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
|
|
338
443
|
)
|
|
339
444
|
resp.raise_for_status()
|
|
340
445
|
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
@@ -373,7 +478,7 @@ def run_experiment(
|
|
|
373
478
|
None,
|
|
374
479
|
functools.partial(
|
|
375
480
|
sync_client.post,
|
|
376
|
-
url=f"
|
|
481
|
+
url=f"v1/experiments/{experiment.id}/runs",
|
|
377
482
|
json=jsonify(exp_run),
|
|
378
483
|
),
|
|
379
484
|
)
|
|
@@ -391,8 +496,11 @@ def run_experiment(
|
|
|
391
496
|
error: Optional[BaseException] = None
|
|
392
497
|
status = Status(StatusCode.OK)
|
|
393
498
|
with ExitStack() as stack:
|
|
394
|
-
span
|
|
395
|
-
|
|
499
|
+
span = cast(
|
|
500
|
+
Span,
|
|
501
|
+
stack.enter_context(
|
|
502
|
+
tracer.start_as_current_span(root_span_name, context=Context())
|
|
503
|
+
),
|
|
396
504
|
)
|
|
397
505
|
stack.enter_context(capture_spans(resource))
|
|
398
506
|
try:
|
|
@@ -427,9 +535,9 @@ def run_experiment(
|
|
|
427
535
|
span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
|
|
428
536
|
span.set_status(status)
|
|
429
537
|
|
|
430
|
-
assert isinstance(
|
|
431
|
-
|
|
432
|
-
)
|
|
538
|
+
assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
|
|
539
|
+
"Output must be JSON serializable"
|
|
540
|
+
)
|
|
433
541
|
|
|
434
542
|
exp_run = ExperimentRun(
|
|
435
543
|
start_time=_decode_unix_nano(cast(int, span.start_time)),
|
|
@@ -448,7 +556,7 @@ def run_experiment(
|
|
|
448
556
|
None,
|
|
449
557
|
functools.partial(
|
|
450
558
|
sync_client.post,
|
|
451
|
-
url=f"
|
|
559
|
+
url=f"v1/experiments/{experiment.id}/runs",
|
|
452
560
|
json=jsonify(exp_run),
|
|
453
561
|
),
|
|
454
562
|
)
|
|
@@ -491,23 +599,17 @@ def run_experiment(
|
|
|
491
599
|
|
|
492
600
|
test_cases = [
|
|
493
601
|
TestCase(example=deepcopy(ex), repetition_number=rep)
|
|
494
|
-
for ex, rep in product(
|
|
602
|
+
for ex, rep in product(normalized_dataset.examples.values(), range(1, repetitions + 1))
|
|
495
603
|
]
|
|
496
604
|
task_runs, _execution_details = executor.run(test_cases)
|
|
497
605
|
print("✅ Task runs completed.")
|
|
498
606
|
|
|
499
607
|
# Get the final state of runs from the database
|
|
500
608
|
if not dry_run:
|
|
501
|
-
|
|
502
|
-
task_runs = []
|
|
503
|
-
for run in all_runs:
|
|
504
|
-
# Parse datetime strings
|
|
505
|
-
run["start_time"] = datetime.fromisoformat(run["start_time"])
|
|
506
|
-
run["end_time"] = datetime.fromisoformat(run["end_time"])
|
|
507
|
-
task_runs.append(ExperimentRun.from_dict(run))
|
|
609
|
+
task_runs = _get_all_experiment_runs(sync_client, experiment.id)
|
|
508
610
|
|
|
509
611
|
# Check if we got all expected runs
|
|
510
|
-
expected_runs = len(
|
|
612
|
+
expected_runs = len(normalized_dataset.examples) * repetitions
|
|
511
613
|
actual_runs = len(task_runs)
|
|
512
614
|
if actual_runs < expected_runs:
|
|
513
615
|
print(
|
|
@@ -515,12 +617,14 @@ def run_experiment(
|
|
|
515
617
|
"completed successfully."
|
|
516
618
|
)
|
|
517
619
|
|
|
518
|
-
params = ExperimentParameters(
|
|
620
|
+
params = ExperimentParameters(
|
|
621
|
+
n_examples=len(normalized_dataset.examples), n_repetitions=repetitions
|
|
622
|
+
)
|
|
519
623
|
task_summary = TaskSummary.from_task_runs(params, task_runs)
|
|
520
624
|
ran_experiment: RanExperiment = object.__new__(RanExperiment)
|
|
521
625
|
ran_experiment.__init__( # type: ignore[misc]
|
|
522
626
|
params=params,
|
|
523
|
-
dataset=
|
|
627
|
+
dataset=normalized_dataset,
|
|
524
628
|
runs={r.id: r for r in task_runs if r is not None},
|
|
525
629
|
task_summary=task_summary,
|
|
526
630
|
**_asdict(experiment),
|
|
@@ -561,16 +665,14 @@ def evaluate_experiment(
|
|
|
561
665
|
else:
|
|
562
666
|
dataset = Dataset.from_dict(
|
|
563
667
|
sync_client.get(
|
|
564
|
-
f"
|
|
668
|
+
f"v1/datasets/{dataset_id}/examples",
|
|
565
669
|
params={"version_id": str(dataset_version_id)},
|
|
566
670
|
).json()["data"]
|
|
567
671
|
)
|
|
568
672
|
if not dataset.examples:
|
|
569
673
|
raise ValueError(f"Dataset has no examples: {dataset_id=}, {dataset_version_id=}")
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
for exp_run in sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
|
|
573
|
-
}
|
|
674
|
+
all_runs = _get_all_experiment_runs(sync_client, experiment.id)
|
|
675
|
+
experiment_runs = {exp_run.id: exp_run for exp_run in all_runs}
|
|
574
676
|
if not experiment_runs:
|
|
575
677
|
raise ValueError("Experiment has not been run")
|
|
576
678
|
params = ExperimentParameters(n_examples=len(dataset.examples))
|
|
@@ -622,8 +724,11 @@ def evaluate_experiment(
|
|
|
622
724
|
status = Status(StatusCode.OK)
|
|
623
725
|
root_span_name = f"Evaluation: {evaluator.name}"
|
|
624
726
|
with ExitStack() as stack:
|
|
625
|
-
span
|
|
626
|
-
|
|
727
|
+
span = cast(
|
|
728
|
+
Span,
|
|
729
|
+
stack.enter_context(
|
|
730
|
+
tracer.start_as_current_span(root_span_name, context=Context())
|
|
731
|
+
),
|
|
627
732
|
)
|
|
628
733
|
stack.enter_context(capture_spans(resource))
|
|
629
734
|
try:
|
|
@@ -660,7 +765,7 @@ def evaluate_experiment(
|
|
|
660
765
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
661
766
|
)
|
|
662
767
|
if not dry_run:
|
|
663
|
-
resp = sync_client.post("
|
|
768
|
+
resp = sync_client.post("v1/experiment_evaluations", json=jsonify(eval_run))
|
|
664
769
|
resp.raise_for_status()
|
|
665
770
|
eval_run = replace(eval_run, id=resp.json()["data"]["id"])
|
|
666
771
|
return eval_run
|
|
@@ -674,8 +779,11 @@ def evaluate_experiment(
|
|
|
674
779
|
status = Status(StatusCode.OK)
|
|
675
780
|
root_span_name = f"Evaluation: {evaluator.name}"
|
|
676
781
|
with ExitStack() as stack:
|
|
677
|
-
span
|
|
678
|
-
|
|
782
|
+
span = cast(
|
|
783
|
+
Span,
|
|
784
|
+
stack.enter_context(
|
|
785
|
+
tracer.start_as_current_span(root_span_name, context=Context())
|
|
786
|
+
),
|
|
679
787
|
)
|
|
680
788
|
stack.enter_context(capture_spans(resource))
|
|
681
789
|
try:
|
|
@@ -719,7 +827,7 @@ def evaluate_experiment(
|
|
|
719
827
|
None,
|
|
720
828
|
functools.partial(
|
|
721
829
|
sync_client.post,
|
|
722
|
-
url="
|
|
830
|
+
url="v1/experiment_evaluations",
|
|
723
831
|
json=jsonify(eval_run),
|
|
724
832
|
),
|
|
725
833
|
)
|
phoenix/experiments/tracing.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import Any, Optional
|
|
|
8
8
|
|
|
9
9
|
from opentelemetry.sdk.resources import Resource
|
|
10
10
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
11
|
-
from opentelemetry.trace import
|
|
11
|
+
from opentelemetry.trace import INVALID_SPAN_ID
|
|
12
12
|
from wrapt import apply_patch, resolve_path, wrap_function_wrapper
|
|
13
13
|
|
|
14
14
|
|
|
@@ -29,7 +29,7 @@ class SpanModifier:
|
|
|
29
29
|
Args:
|
|
30
30
|
span: ReadableSpan: the span to modify
|
|
31
31
|
"""
|
|
32
|
-
if (ctx := span._context) is None or ctx.span_id ==
|
|
32
|
+
if (ctx := span._context) is None or ctx.span_id == INVALID_SPAN_ID:
|
|
33
33
|
return
|
|
34
34
|
span._resource = span._resource.merge(self._resource)
|
|
35
35
|
|
phoenix/experiments/types.py
CHANGED
|
@@ -322,7 +322,7 @@ class _HasStats:
|
|
|
322
322
|
text = self.stats.__str__()
|
|
323
323
|
else:
|
|
324
324
|
text = self.stats.to_markdown(index=False)
|
|
325
|
-
return f"{self.title}\n{'-'*len(self.title)}\n" + text
|
|
325
|
+
return f"{self.title}\n{'-' * len(self.title)}\n" + text
|
|
326
326
|
|
|
327
327
|
|
|
328
328
|
@dataclass(frozen=True)
|
phoenix/inferences/inferences.py
CHANGED
|
@@ -13,11 +13,10 @@ from pandas import DataFrame, Series, Timestamp, read_parquet
|
|
|
13
13
|
from pandas.api.types import (
|
|
14
14
|
is_numeric_dtype,
|
|
15
15
|
)
|
|
16
|
-
from typing_extensions import TypeAlias
|
|
16
|
+
from typing_extensions import TypeAlias, deprecated
|
|
17
17
|
|
|
18
18
|
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
|
|
19
19
|
from phoenix.datetime_utils import normalize_timestamps
|
|
20
|
-
from phoenix.utilities.deprecation import deprecated
|
|
21
20
|
|
|
22
21
|
from . import errors as err
|
|
23
22
|
from .schema import (
|