arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/METADATA +5 -5
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/RECORD +56 -117
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +27 -0
- phoenix/config.py +7 -21
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +62 -64
- phoenix/core/model_schema_adapter.py +25 -27
- phoenix/db/bulk_inserter.py +14 -54
- phoenix/db/insertion/evaluation.py +6 -6
- phoenix/db/insertion/helpers.py +2 -13
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +28 -2
- phoenix/db/models.py +4 -236
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +0 -18
- phoenix/server/api/dataloaders/__init__.py +0 -18
- phoenix/server/api/dataloaders/span_descendants.py +3 -2
- phoenix/server/api/routers/v1/__init__.py +2 -77
- phoenix/server/api/routers/v1/evaluations.py +2 -4
- phoenix/server/api/routers/v1/spans.py +1 -3
- phoenix/server/api/routers/v1/traces.py +4 -1
- phoenix/server/api/schema.py +303 -2
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/Dataset.py +63 -282
- phoenix/server/api/types/DatasetRole.py +23 -0
- phoenix/server/api/types/Dimension.py +29 -30
- phoenix/server/api/types/EmbeddingDimension.py +34 -40
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/{mutations/export_events_mutations.py → types/ExportEventsMutation.py} +14 -17
- phoenix/server/api/types/Model.py +42 -43
- phoenix/server/api/types/Project.py +12 -26
- phoenix/server/api/types/Span.py +2 -79
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +4 -15
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +111 -5
- phoenix/server/api/types/pagination.py +52 -10
- phoenix/server/app.py +49 -101
- phoenix/server/main.py +27 -49
- phoenix/server/openapi/docs.py +0 -3
- phoenix/server/static/index.js +2595 -3523
- phoenix/server/templates/index.html +0 -1
- phoenix/services.py +15 -15
- phoenix/session/client.py +21 -438
- phoenix/session/session.py +37 -47
- phoenix/trace/exporter.py +9 -14
- phoenix/trace/fixtures.py +7 -133
- phoenix/trace/schemas.py +2 -1
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/version.py +1 -1
- phoenix/datasets/__init__.py +0 -0
- phoenix/datasets/evaluators/__init__.py +0 -18
- phoenix/datasets/evaluators/code_evaluators.py +0 -99
- phoenix/datasets/evaluators/llm_evaluators.py +0 -244
- phoenix/datasets/evaluators/utils.py +0 -292
- phoenix/datasets/experiments.py +0 -550
- phoenix/datasets/tracing.py +0 -85
- phoenix/datasets/types.py +0 -178
- phoenix/db/insertion/dataset.py +0 -237
- phoenix/db/migrations/types.py +0 -29
- phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -291
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -100
- phoenix/server/api/dataloaders/dataset_example_spans.py +0 -43
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -85
- phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
- phoenix/server/api/dataloaders/experiment_run_counts.py +0 -42
- phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -49
- phoenix/server/api/dataloaders/project_by_name.py +0 -31
- phoenix/server/api/dataloaders/span_projects.py +0 -33
- phoenix/server/api/dataloaders/trace_row_ids.py +0 -39
- phoenix/server/api/helpers/dataset_helpers.py +0 -179
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -16
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -14
- phoenix/server/api/input_types/ClearProjectInput.py +0 -15
- phoenix/server/api/input_types/CreateDatasetInput.py +0 -12
- phoenix/server/api/input_types/DatasetExampleInput.py +0 -14
- phoenix/server/api/input_types/DatasetSort.py +0 -17
- phoenix/server/api/input_types/DatasetVersionSort.py +0 -16
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -13
- phoenix/server/api/input_types/DeleteDatasetInput.py +0 -7
- phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -9
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -35
- phoenix/server/api/input_types/PatchDatasetInput.py +0 -14
- phoenix/server/api/mutations/__init__.py +0 -13
- phoenix/server/api/mutations/auth.py +0 -11
- phoenix/server/api/mutations/dataset_mutations.py +0 -520
- phoenix/server/api/mutations/experiment_mutations.py +0 -65
- phoenix/server/api/mutations/project_mutations.py +0 -47
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +0 -6
- phoenix/server/api/openapi/schema.py +0 -16
- phoenix/server/api/queries.py +0 -503
- phoenix/server/api/routers/v1/dataset_examples.py +0 -178
- phoenix/server/api/routers/v1/datasets.py +0 -965
- phoenix/server/api/routers/v1/experiment_evaluations.py +0 -66
- phoenix/server/api/routers/v1/experiment_runs.py +0 -108
- phoenix/server/api/routers/v1/experiments.py +0 -174
- phoenix/server/api/types/AnnotatorKind.py +0 -10
- phoenix/server/api/types/CreateDatasetPayload.py +0 -8
- phoenix/server/api/types/DatasetExample.py +0 -85
- phoenix/server/api/types/DatasetExampleRevision.py +0 -34
- phoenix/server/api/types/DatasetVersion.py +0 -14
- phoenix/server/api/types/ExampleRevisionInterface.py +0 -14
- phoenix/server/api/types/Experiment.py +0 -140
- phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -13
- phoenix/server/api/types/ExperimentComparison.py +0 -19
- phoenix/server/api/types/ExperimentRun.py +0 -91
- phoenix/server/api/types/ExperimentRunAnnotation.py +0 -57
- phoenix/server/api/types/Inferences.py +0 -80
- phoenix/server/api/types/InferencesRole.py +0 -23
- phoenix/utilities/json.py +0 -61
- phoenix/utilities/re.py +0 -50
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers/__init__.py → helpers.py} +0 -0
|
@@ -6,11 +6,13 @@ Create Date: 2024-04-03 19:41:48.871555
|
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from typing import Sequence, Union
|
|
9
|
+
from typing import Any, Sequence, Union
|
|
10
10
|
|
|
11
11
|
import sqlalchemy as sa
|
|
12
12
|
from alembic import op
|
|
13
|
-
from
|
|
13
|
+
from sqlalchemy import JSON
|
|
14
|
+
from sqlalchemy.dialects import postgresql
|
|
15
|
+
from sqlalchemy.ext.compiler import compiles
|
|
14
16
|
|
|
15
17
|
# revision identifiers, used by Alembic.
|
|
16
18
|
revision: str = "cf03bd6bae1d"
|
|
@@ -19,6 +21,30 @@ branch_labels: Union[str, Sequence[str], None] = None
|
|
|
19
21
|
depends_on: Union[str, Sequence[str], None] = None
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
class JSONB(JSON):
|
|
25
|
+
# See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
26
|
+
__visit_name__ = "JSONB"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@compiles(JSONB, "sqlite") # type: ignore
|
|
30
|
+
def _(*args: Any, **kwargs: Any) -> str:
|
|
31
|
+
# See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
32
|
+
return "JSONB"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
JSON_ = (
|
|
36
|
+
JSON()
|
|
37
|
+
.with_variant(
|
|
38
|
+
postgresql.JSONB(), # type: ignore
|
|
39
|
+
"postgresql",
|
|
40
|
+
)
|
|
41
|
+
.with_variant(
|
|
42
|
+
JSONB(),
|
|
43
|
+
"sqlite",
|
|
44
|
+
)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
22
48
|
def upgrade() -> None:
|
|
23
49
|
projects_table = op.create_table(
|
|
24
50
|
"projects",
|
phoenix/db/models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
from sqlalchemy import (
|
|
5
5
|
JSON,
|
|
@@ -15,14 +15,12 @@ from sqlalchemy import (
|
|
|
15
15
|
String,
|
|
16
16
|
TypeDecorator,
|
|
17
17
|
UniqueConstraint,
|
|
18
|
-
case,
|
|
19
18
|
func,
|
|
20
19
|
insert,
|
|
21
|
-
select,
|
|
22
20
|
text,
|
|
23
21
|
)
|
|
24
22
|
from sqlalchemy.dialects import postgresql
|
|
25
|
-
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
23
|
+
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
26
24
|
from sqlalchemy.ext.compiler import compiles
|
|
27
25
|
from sqlalchemy.ext.hybrid import hybrid_property
|
|
28
26
|
from sqlalchemy.orm import (
|
|
@@ -61,24 +59,6 @@ JSON_ = (
|
|
|
61
59
|
)
|
|
62
60
|
|
|
63
61
|
|
|
64
|
-
class JsonDict(TypeDecorator[Dict[str, Any]]):
|
|
65
|
-
# See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
66
|
-
cache_ok = True
|
|
67
|
-
impl = JSON_
|
|
68
|
-
|
|
69
|
-
def process_bind_param(self, value: Optional[Dict[str, Any]], _: Dialect) -> Dict[str, Any]:
|
|
70
|
-
return value if isinstance(value, dict) else {}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class JsonList(TypeDecorator[List[Any]]):
|
|
74
|
-
# See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
75
|
-
cache_ok = True
|
|
76
|
-
impl = JSON_
|
|
77
|
-
|
|
78
|
-
def process_bind_param(self, value: Optional[List[Any]], _: Dialect) -> List[Any]:
|
|
79
|
-
return value if isinstance(value, list) else []
|
|
80
|
-
|
|
81
|
-
|
|
82
62
|
class UtcTimeStamp(TypeDecorator[datetime]):
|
|
83
63
|
# See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
84
64
|
cache_ok = True
|
|
@@ -91,10 +71,6 @@ class UtcTimeStamp(TypeDecorator[datetime]):
|
|
|
91
71
|
return normalize_datetime(value, timezone.utc)
|
|
92
72
|
|
|
93
73
|
|
|
94
|
-
class ExperimentResult(TypedDict, total=False):
|
|
95
|
-
result: Dict[str, Any]
|
|
96
|
-
|
|
97
|
-
|
|
98
74
|
class Base(DeclarativeBase):
|
|
99
75
|
# Enforce best practices for naming constraints
|
|
100
76
|
# https://alembic.sqlalchemy.org/en/latest/naming.html#integration-of-naming-conventions-into-operations-autogenerate
|
|
@@ -108,9 +84,8 @@ class Base(DeclarativeBase):
|
|
|
108
84
|
}
|
|
109
85
|
)
|
|
110
86
|
type_annotation_map = {
|
|
111
|
-
Dict[str, Any]:
|
|
112
|
-
List[Dict[str, Any]]:
|
|
113
|
-
ExperimentResult: JsonDict,
|
|
87
|
+
Dict[str, Any]: JSON_,
|
|
88
|
+
List[Dict[str, Any]]: JSON_,
|
|
114
89
|
}
|
|
115
90
|
|
|
116
91
|
|
|
@@ -179,10 +154,6 @@ class Trace(Base):
|
|
|
179
154
|
cascade="all, delete-orphan",
|
|
180
155
|
uselist=True,
|
|
181
156
|
)
|
|
182
|
-
experiment_runs: Mapped[List["ExperimentRun"]] = relationship(
|
|
183
|
-
primaryjoin="foreign(ExperimentRun.trace_id) == Trace.trace_id",
|
|
184
|
-
back_populates="trace",
|
|
185
|
-
)
|
|
186
157
|
__table_args__ = (
|
|
187
158
|
UniqueConstraint(
|
|
188
159
|
"trace_id",
|
|
@@ -232,7 +203,6 @@ class Span(Base):
|
|
|
232
203
|
|
|
233
204
|
trace: Mapped["Trace"] = relationship("Trace", back_populates="spans")
|
|
234
205
|
document_annotations: Mapped[List["DocumentAnnotation"]] = relationship(back_populates="span")
|
|
235
|
-
dataset_examples: Mapped[List["DatasetExample"]] = relationship(back_populates="span")
|
|
236
206
|
|
|
237
207
|
__table_args__ = (
|
|
238
208
|
UniqueConstraint(
|
|
@@ -406,205 +376,3 @@ class DocumentAnnotation(Base):
|
|
|
406
376
|
"document_position",
|
|
407
377
|
),
|
|
408
378
|
)
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
class Dataset(Base):
|
|
412
|
-
__tablename__ = "datasets"
|
|
413
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
414
|
-
name: Mapped[str] = mapped_column(unique=True)
|
|
415
|
-
description: Mapped[Optional[str]]
|
|
416
|
-
metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
|
|
417
|
-
created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
|
|
418
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
419
|
-
UtcTimeStamp, server_default=func.now(), onupdate=func.now()
|
|
420
|
-
)
|
|
421
|
-
|
|
422
|
-
@hybrid_property
|
|
423
|
-
def example_count(self) -> Optional[int]:
|
|
424
|
-
if hasattr(self, "_example_count_value"):
|
|
425
|
-
assert isinstance(self._example_count_value, int)
|
|
426
|
-
return self._example_count_value
|
|
427
|
-
return None
|
|
428
|
-
|
|
429
|
-
@example_count.inplace.expression
|
|
430
|
-
def _example_count(cls) -> ColumnElement[int]:
|
|
431
|
-
return (
|
|
432
|
-
select(
|
|
433
|
-
func.sum(
|
|
434
|
-
case(
|
|
435
|
-
(DatasetExampleRevision.revision_kind == "CREATE", 1),
|
|
436
|
-
(DatasetExampleRevision.revision_kind == "DELETE", -1),
|
|
437
|
-
else_=0,
|
|
438
|
-
)
|
|
439
|
-
)
|
|
440
|
-
)
|
|
441
|
-
.select_from(DatasetExampleRevision)
|
|
442
|
-
.join(
|
|
443
|
-
DatasetExample,
|
|
444
|
-
onclause=DatasetExample.id == DatasetExampleRevision.dataset_example_id,
|
|
445
|
-
)
|
|
446
|
-
.filter(DatasetExample.dataset_id == cls.id)
|
|
447
|
-
.label("example_count")
|
|
448
|
-
)
|
|
449
|
-
|
|
450
|
-
async def load_example_count(self, session: AsyncSession) -> None:
|
|
451
|
-
if not hasattr(self, "_example_count_value"):
|
|
452
|
-
self._example_count_value = await session.scalar(
|
|
453
|
-
select(
|
|
454
|
-
func.sum(
|
|
455
|
-
case(
|
|
456
|
-
(DatasetExampleRevision.revision_kind == "CREATE", 1),
|
|
457
|
-
(DatasetExampleRevision.revision_kind == "DELETE", -1),
|
|
458
|
-
else_=0,
|
|
459
|
-
)
|
|
460
|
-
)
|
|
461
|
-
)
|
|
462
|
-
.select_from(DatasetExampleRevision)
|
|
463
|
-
.join(
|
|
464
|
-
DatasetExample,
|
|
465
|
-
onclause=DatasetExample.id == DatasetExampleRevision.dataset_example_id,
|
|
466
|
-
)
|
|
467
|
-
.filter(DatasetExample.dataset_id == self.id)
|
|
468
|
-
)
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
class DatasetVersion(Base):
|
|
472
|
-
__tablename__ = "dataset_versions"
|
|
473
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
474
|
-
dataset_id: Mapped[int] = mapped_column(
|
|
475
|
-
ForeignKey("datasets.id", ondelete="CASCADE"),
|
|
476
|
-
index=True,
|
|
477
|
-
)
|
|
478
|
-
description: Mapped[Optional[str]]
|
|
479
|
-
metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
|
|
480
|
-
created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
class DatasetExample(Base):
|
|
484
|
-
__tablename__ = "dataset_examples"
|
|
485
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
486
|
-
dataset_id: Mapped[int] = mapped_column(
|
|
487
|
-
ForeignKey("datasets.id", ondelete="CASCADE"),
|
|
488
|
-
index=True,
|
|
489
|
-
)
|
|
490
|
-
span_rowid: Mapped[Optional[int]] = mapped_column(
|
|
491
|
-
ForeignKey("spans.id", ondelete="SET NULL"),
|
|
492
|
-
index=True,
|
|
493
|
-
nullable=True,
|
|
494
|
-
)
|
|
495
|
-
created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
|
|
496
|
-
|
|
497
|
-
span: Mapped[Optional[Span]] = relationship(back_populates="dataset_examples")
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
class DatasetExampleRevision(Base):
|
|
501
|
-
__tablename__ = "dataset_example_revisions"
|
|
502
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
503
|
-
dataset_example_id: Mapped[int] = mapped_column(
|
|
504
|
-
ForeignKey("dataset_examples.id", ondelete="CASCADE"),
|
|
505
|
-
index=True,
|
|
506
|
-
)
|
|
507
|
-
dataset_version_id: Mapped[int] = mapped_column(
|
|
508
|
-
ForeignKey("dataset_versions.id", ondelete="CASCADE"),
|
|
509
|
-
index=True,
|
|
510
|
-
)
|
|
511
|
-
input: Mapped[Dict[str, Any]]
|
|
512
|
-
output: Mapped[Dict[str, Any]]
|
|
513
|
-
metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
|
|
514
|
-
revision_kind: Mapped[str] = mapped_column(
|
|
515
|
-
CheckConstraint(
|
|
516
|
-
"revision_kind IN ('CREATE', 'PATCH', 'DELETE')", name="valid_revision_kind"
|
|
517
|
-
),
|
|
518
|
-
)
|
|
519
|
-
created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
|
|
520
|
-
|
|
521
|
-
__table_args__ = (
|
|
522
|
-
UniqueConstraint(
|
|
523
|
-
"dataset_example_id",
|
|
524
|
-
"dataset_version_id",
|
|
525
|
-
),
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
class Experiment(Base):
|
|
530
|
-
__tablename__ = "experiments"
|
|
531
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
532
|
-
dataset_id: Mapped[int] = mapped_column(
|
|
533
|
-
ForeignKey("datasets.id", ondelete="CASCADE"),
|
|
534
|
-
index=True,
|
|
535
|
-
)
|
|
536
|
-
dataset_version_id: Mapped[int] = mapped_column(
|
|
537
|
-
ForeignKey("dataset_versions.id", ondelete="CASCADE"),
|
|
538
|
-
index=True,
|
|
539
|
-
)
|
|
540
|
-
name: Mapped[str]
|
|
541
|
-
description: Mapped[Optional[str]]
|
|
542
|
-
repetitions: Mapped[int]
|
|
543
|
-
metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
|
|
544
|
-
project_name: Mapped[Optional[str]] = mapped_column(index=True)
|
|
545
|
-
created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
|
|
546
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
547
|
-
UtcTimeStamp, server_default=func.now(), onupdate=func.now()
|
|
548
|
-
)
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
class ExperimentRun(Base):
|
|
552
|
-
__tablename__ = "experiment_runs"
|
|
553
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
554
|
-
experiment_id: Mapped[int] = mapped_column(
|
|
555
|
-
ForeignKey("experiments.id", ondelete="CASCADE"),
|
|
556
|
-
index=True,
|
|
557
|
-
)
|
|
558
|
-
dataset_example_id: Mapped[int] = mapped_column(
|
|
559
|
-
ForeignKey("dataset_examples.id", ondelete="CASCADE"),
|
|
560
|
-
index=True,
|
|
561
|
-
)
|
|
562
|
-
repetition_number: Mapped[int]
|
|
563
|
-
trace_id: Mapped[Optional[str]]
|
|
564
|
-
output: Mapped[ExperimentResult]
|
|
565
|
-
start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
566
|
-
end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
567
|
-
prompt_token_count: Mapped[Optional[int]]
|
|
568
|
-
completion_token_count: Mapped[Optional[int]]
|
|
569
|
-
error: Mapped[Optional[str]]
|
|
570
|
-
|
|
571
|
-
trace: Mapped["Trace"] = relationship(
|
|
572
|
-
primaryjoin="foreign(ExperimentRun.trace_id) == Trace.trace_id",
|
|
573
|
-
back_populates="experiment_runs",
|
|
574
|
-
)
|
|
575
|
-
|
|
576
|
-
__table_args__ = (
|
|
577
|
-
UniqueConstraint(
|
|
578
|
-
"experiment_id",
|
|
579
|
-
"dataset_example_id",
|
|
580
|
-
"repetition_number",
|
|
581
|
-
),
|
|
582
|
-
)
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
class ExperimentRunAnnotation(Base):
|
|
586
|
-
__tablename__ = "experiment_run_annotations"
|
|
587
|
-
id: Mapped[int] = mapped_column(primary_key=True)
|
|
588
|
-
experiment_run_id: Mapped[int] = mapped_column(
|
|
589
|
-
ForeignKey("experiment_runs.id", ondelete="CASCADE"),
|
|
590
|
-
index=True,
|
|
591
|
-
)
|
|
592
|
-
name: Mapped[str]
|
|
593
|
-
annotator_kind: Mapped[str] = mapped_column(
|
|
594
|
-
CheckConstraint("annotator_kind IN ('LLM', 'CODE', 'HUMAN')", name="valid_annotator_kind"),
|
|
595
|
-
)
|
|
596
|
-
label: Mapped[Optional[str]]
|
|
597
|
-
score: Mapped[Optional[float]]
|
|
598
|
-
explanation: Mapped[Optional[str]]
|
|
599
|
-
trace_id: Mapped[Optional[str]]
|
|
600
|
-
error: Mapped[Optional[str]]
|
|
601
|
-
metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
|
|
602
|
-
start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
603
|
-
end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
604
|
-
|
|
605
|
-
__table_args__ = (
|
|
606
|
-
UniqueConstraint(
|
|
607
|
-
"experiment_run_id",
|
|
608
|
-
"name",
|
|
609
|
-
),
|
|
610
|
-
)
|
phoenix/inferences/fixtures.py
CHANGED
|
@@ -9,7 +9,7 @@ from urllib.parse import quote, urljoin
|
|
|
9
9
|
|
|
10
10
|
from pandas import read_parquet
|
|
11
11
|
|
|
12
|
-
from phoenix.config import
|
|
12
|
+
from phoenix.config import DATASET_DIR
|
|
13
13
|
from phoenix.inferences.inferences import Inferences
|
|
14
14
|
from phoenix.inferences.schema import (
|
|
15
15
|
EmbeddingColumnNames,
|
|
@@ -20,7 +20,7 @@ from phoenix.inferences.schema import (
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class
|
|
23
|
+
class DatasetRole(Enum):
|
|
24
24
|
PRIMARY = auto()
|
|
25
25
|
REFERENCE = auto()
|
|
26
26
|
CORPUS = auto()
|
|
@@ -39,11 +39,11 @@ class Fixture:
|
|
|
39
39
|
corpus_file_name: Optional[str] = None
|
|
40
40
|
corpus_schema: Optional[Schema] = None
|
|
41
41
|
|
|
42
|
-
def paths(self) -> Iterator[Tuple[
|
|
42
|
+
def paths(self) -> Iterator[Tuple[DatasetRole, Path]]:
|
|
43
43
|
return (
|
|
44
44
|
(role, Path(self.prefix) / name)
|
|
45
45
|
for role, name in zip(
|
|
46
|
-
|
|
46
|
+
DatasetRole,
|
|
47
47
|
(
|
|
48
48
|
self.primary_file_name,
|
|
49
49
|
self.reference_file_name,
|
|
@@ -413,41 +413,41 @@ FIXTURES: Tuple[Fixture, ...] = (
|
|
|
413
413
|
NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
|
|
414
414
|
|
|
415
415
|
|
|
416
|
-
def
|
|
416
|
+
def get_datasets(
|
|
417
417
|
fixture_name: str,
|
|
418
418
|
no_internet: bool = False,
|
|
419
419
|
) -> Tuple[Inferences, Optional[Inferences], Optional[Inferences]]:
|
|
420
420
|
"""
|
|
421
|
-
Downloads primary and reference
|
|
421
|
+
Downloads primary and reference datasets for a fixture if they are not found
|
|
422
422
|
locally.
|
|
423
423
|
"""
|
|
424
424
|
fixture = _get_fixture_by_name(fixture_name=fixture_name)
|
|
425
425
|
if no_internet:
|
|
426
|
-
paths = {role:
|
|
426
|
+
paths = {role: DATASET_DIR / path for role, path in fixture.paths()}
|
|
427
427
|
else:
|
|
428
|
-
paths = dict(_download(fixture,
|
|
429
|
-
|
|
430
|
-
read_parquet(paths[
|
|
428
|
+
paths = dict(_download(fixture, DATASET_DIR))
|
|
429
|
+
primary_dataset = Inferences(
|
|
430
|
+
read_parquet(paths[DatasetRole.PRIMARY]),
|
|
431
431
|
fixture.primary_schema,
|
|
432
432
|
"production",
|
|
433
433
|
)
|
|
434
|
-
|
|
434
|
+
reference_dataset = None
|
|
435
435
|
if fixture.reference_file_name is not None:
|
|
436
|
-
|
|
437
|
-
read_parquet(paths[
|
|
436
|
+
reference_dataset = Inferences(
|
|
437
|
+
read_parquet(paths[DatasetRole.REFERENCE]),
|
|
438
438
|
fixture.reference_schema
|
|
439
439
|
if fixture.reference_schema is not None
|
|
440
440
|
else fixture.primary_schema,
|
|
441
441
|
"training",
|
|
442
442
|
)
|
|
443
|
-
|
|
443
|
+
corpus_dataset = None
|
|
444
444
|
if fixture.corpus_file_name is not None:
|
|
445
|
-
|
|
446
|
-
read_parquet(paths[
|
|
445
|
+
corpus_dataset = Inferences(
|
|
446
|
+
read_parquet(paths[DatasetRole.CORPUS]),
|
|
447
447
|
fixture.corpus_schema,
|
|
448
448
|
"knowledge_base",
|
|
449
449
|
)
|
|
450
|
-
return
|
|
450
|
+
return primary_dataset, reference_dataset, corpus_dataset
|
|
451
451
|
|
|
452
452
|
|
|
453
453
|
def _get_fixture_by_name(fixture_name: str) -> Fixture:
|
|
@@ -496,14 +496,14 @@ def load_example(use_case: str) -> ExampleInferences:
|
|
|
496
496
|
|
|
497
497
|
"""
|
|
498
498
|
fixture = _get_fixture_by_name(use_case)
|
|
499
|
-
|
|
499
|
+
primary_dataset, reference_dataset, corpus_dataset = get_datasets(use_case)
|
|
500
500
|
print(f"📥 Loaded {use_case} example datasets.")
|
|
501
501
|
print("ℹ️ About this use-case:")
|
|
502
502
|
print(fixture.description)
|
|
503
503
|
return ExampleInferences(
|
|
504
|
-
primary=
|
|
505
|
-
reference=
|
|
506
|
-
corpus=
|
|
504
|
+
primary=primary_dataset,
|
|
505
|
+
reference=reference_dataset,
|
|
506
|
+
corpus=corpus_dataset,
|
|
507
507
|
)
|
|
508
508
|
|
|
509
509
|
|
|
@@ -544,7 +544,7 @@ class GCSAssets(NamedTuple):
|
|
|
544
544
|
)
|
|
545
545
|
|
|
546
546
|
|
|
547
|
-
def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[
|
|
547
|
+
def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[DatasetRole, Path]]:
|
|
548
548
|
for role, path in fixture.paths():
|
|
549
549
|
yield role, GCSAssets().metadata(path).save_artifact(location)
|
|
550
550
|
|
|
@@ -556,5 +556,5 @@ if __name__ == "__main__":
|
|
|
556
556
|
for fixture in FIXTURES:
|
|
557
557
|
start_time = time.time()
|
|
558
558
|
print(f"getting {fixture.name}", end="...")
|
|
559
|
-
dict(_download(fixture,
|
|
559
|
+
dict(_download(fixture, DATASET_DIR))
|
|
560
560
|
print(f"done ({time.time() - start_time:.2f}s)")
|
phoenix/inferences/inferences.py
CHANGED
|
@@ -15,7 +15,7 @@ from pandas.api.types import (
|
|
|
15
15
|
)
|
|
16
16
|
from typing_extensions import TypeAlias
|
|
17
17
|
|
|
18
|
-
from phoenix.config import
|
|
18
|
+
from phoenix.config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
|
|
19
19
|
from phoenix.datetime_utils import normalize_timestamps
|
|
20
20
|
from phoenix.utilities.deprecation import deprecated
|
|
21
21
|
|
|
@@ -31,7 +31,7 @@ from .schema import (
|
|
|
31
31
|
SchemaFieldName,
|
|
32
32
|
SchemaFieldValue,
|
|
33
33
|
)
|
|
34
|
-
from .validation import
|
|
34
|
+
from .validation import validate_dataset_inputs
|
|
35
35
|
|
|
36
36
|
logger = logging.getLogger(__name__)
|
|
37
37
|
|
|
@@ -62,7 +62,7 @@ class Inferences:
|
|
|
62
62
|
|
|
63
63
|
Examples
|
|
64
64
|
--------
|
|
65
|
-
>>>
|
|
65
|
+
>>> primary_dataset = px.Inferences(
|
|
66
66
|
>>> dataframe=production_dataframe, schema=schema, name="primary"
|
|
67
67
|
>>> )
|
|
68
68
|
"""
|
|
@@ -81,7 +81,7 @@ class Inferences:
|
|
|
81
81
|
# allow for schema like objects
|
|
82
82
|
if not isinstance(schema, Schema):
|
|
83
83
|
schema = _get_schema_from_unknown_schema_param(schema)
|
|
84
|
-
errors =
|
|
84
|
+
errors = validate_dataset_inputs(
|
|
85
85
|
dataframe=dataframe,
|
|
86
86
|
schema=schema,
|
|
87
87
|
)
|
|
@@ -95,7 +95,7 @@ class Inferences:
|
|
|
95
95
|
self.__dataframe: DataFrame = dataframe
|
|
96
96
|
self.__schema: Schema = schema
|
|
97
97
|
self.__name: str = (
|
|
98
|
-
name if name is not None else f"{
|
|
98
|
+
name if name is not None else f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
|
|
99
99
|
)
|
|
100
100
|
self._is_empty = self.dataframe.empty
|
|
101
101
|
logger.info(f"""Dataset: {self.__name} initialized""")
|
|
@@ -118,7 +118,7 @@ class Inferences:
|
|
|
118
118
|
@classmethod
|
|
119
119
|
def from_name(cls, name: str) -> "Inferences":
|
|
120
120
|
"""Retrieves a dataset by name from the file system"""
|
|
121
|
-
directory =
|
|
121
|
+
directory = DATASET_DIR / name
|
|
122
122
|
df = read_parquet(directory / cls._data_file_name)
|
|
123
123
|
with open(directory / cls._schema_file_name) as schema_file:
|
|
124
124
|
schema_json = schema_file.read()
|
|
@@ -127,7 +127,7 @@ class Inferences:
|
|
|
127
127
|
|
|
128
128
|
def to_disc(self) -> None:
|
|
129
129
|
"""writes the data and schema to disc"""
|
|
130
|
-
directory =
|
|
130
|
+
directory = DATASET_DIR / self.name
|
|
131
131
|
directory.mkdir(parents=True, exist_ok=True)
|
|
132
132
|
self.dataframe.to_parquet(
|
|
133
133
|
directory / self._data_file_name,
|
phoenix/inferences/validation.py
CHANGED
|
@@ -34,7 +34,7 @@ def _check_valid_schema(schema: Schema) -> List[err.ValidationError]:
|
|
|
34
34
|
return []
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def
|
|
37
|
+
def validate_dataset_inputs(dataframe: DataFrame, schema: Schema) -> List[err.ValidationError]:
|
|
38
38
|
errors = _check_missing_columns(dataframe, schema)
|
|
39
39
|
if errors:
|
|
40
40
|
return errors
|
phoenix/server/api/context.py
CHANGED
|
@@ -12,51 +12,33 @@ from typing_extensions import TypeAlias
|
|
|
12
12
|
from phoenix.core.model_schema import Model
|
|
13
13
|
from phoenix.server.api.dataloaders import (
|
|
14
14
|
CacheForDataLoaders,
|
|
15
|
-
DatasetExampleRevisionsDataLoader,
|
|
16
|
-
DatasetExampleSpansDataLoader,
|
|
17
15
|
DocumentEvaluationsDataLoader,
|
|
18
16
|
DocumentEvaluationSummaryDataLoader,
|
|
19
17
|
DocumentRetrievalMetricsDataLoader,
|
|
20
18
|
EvaluationSummaryDataLoader,
|
|
21
|
-
ExperimentAnnotationSummaryDataLoader,
|
|
22
|
-
ExperimentErrorRatesDataLoader,
|
|
23
|
-
ExperimentRunCountsDataLoader,
|
|
24
|
-
ExperimentSequenceNumberDataLoader,
|
|
25
19
|
LatencyMsQuantileDataLoader,
|
|
26
20
|
MinStartOrMaxEndTimeDataLoader,
|
|
27
|
-
ProjectByNameDataLoader,
|
|
28
21
|
RecordCountDataLoader,
|
|
29
22
|
SpanDescendantsDataLoader,
|
|
30
23
|
SpanEvaluationsDataLoader,
|
|
31
|
-
SpanProjectsDataLoader,
|
|
32
24
|
TokenCountDataLoader,
|
|
33
25
|
TraceEvaluationsDataLoader,
|
|
34
|
-
TraceRowIdsDataLoader,
|
|
35
26
|
)
|
|
36
27
|
|
|
37
28
|
|
|
38
29
|
@dataclass
|
|
39
30
|
class DataLoaders:
|
|
40
|
-
dataset_example_revisions: DatasetExampleRevisionsDataLoader
|
|
41
|
-
dataset_example_spans: DatasetExampleSpansDataLoader
|
|
42
31
|
document_evaluation_summaries: DocumentEvaluationSummaryDataLoader
|
|
43
32
|
document_evaluations: DocumentEvaluationsDataLoader
|
|
44
33
|
document_retrieval_metrics: DocumentRetrievalMetricsDataLoader
|
|
45
34
|
evaluation_summaries: EvaluationSummaryDataLoader
|
|
46
|
-
experiment_annotation_summaries: ExperimentAnnotationSummaryDataLoader
|
|
47
|
-
experiment_error_rates: ExperimentErrorRatesDataLoader
|
|
48
|
-
experiment_run_counts: ExperimentRunCountsDataLoader
|
|
49
|
-
experiment_sequence_number: ExperimentSequenceNumberDataLoader
|
|
50
35
|
latency_ms_quantile: LatencyMsQuantileDataLoader
|
|
51
36
|
min_start_or_max_end_times: MinStartOrMaxEndTimeDataLoader
|
|
52
37
|
record_counts: RecordCountDataLoader
|
|
53
38
|
span_descendants: SpanDescendantsDataLoader
|
|
54
39
|
span_evaluations: SpanEvaluationsDataLoader
|
|
55
|
-
span_projects: SpanProjectsDataLoader
|
|
56
40
|
token_counts: TokenCountDataLoader
|
|
57
41
|
trace_evaluations: TraceEvaluationsDataLoader
|
|
58
|
-
trace_row_ids: TraceRowIdsDataLoader
|
|
59
|
-
project_by_name: ProjectByNameDataLoader
|
|
60
42
|
|
|
61
43
|
|
|
62
44
|
ProjectRowId: TypeAlias = int
|
|
@@ -8,8 +8,6 @@ from phoenix.db.insertion.evaluation import (
|
|
|
8
8
|
)
|
|
9
9
|
from phoenix.db.insertion.span import ClearProjectSpansEvent, SpanInsertionEvent
|
|
10
10
|
|
|
11
|
-
from .dataset_example_revisions import DatasetExampleRevisionsDataLoader
|
|
12
|
-
from .dataset_example_spans import DatasetExampleSpansDataLoader
|
|
13
11
|
from .document_evaluation_summaries import (
|
|
14
12
|
DocumentEvaluationSummaryCache,
|
|
15
13
|
DocumentEvaluationSummaryDataLoader,
|
|
@@ -17,43 +15,27 @@ from .document_evaluation_summaries import (
|
|
|
17
15
|
from .document_evaluations import DocumentEvaluationsDataLoader
|
|
18
16
|
from .document_retrieval_metrics import DocumentRetrievalMetricsDataLoader
|
|
19
17
|
from .evaluation_summaries import EvaluationSummaryCache, EvaluationSummaryDataLoader
|
|
20
|
-
from .experiment_annotation_summaries import ExperimentAnnotationSummaryDataLoader
|
|
21
|
-
from .experiment_error_rates import ExperimentErrorRatesDataLoader
|
|
22
|
-
from .experiment_run_counts import ExperimentRunCountsDataLoader
|
|
23
|
-
from .experiment_sequence_number import ExperimentSequenceNumberDataLoader
|
|
24
18
|
from .latency_ms_quantile import LatencyMsQuantileCache, LatencyMsQuantileDataLoader
|
|
25
19
|
from .min_start_or_max_end_times import MinStartOrMaxEndTimeCache, MinStartOrMaxEndTimeDataLoader
|
|
26
|
-
from .project_by_name import ProjectByNameDataLoader
|
|
27
20
|
from .record_counts import RecordCountCache, RecordCountDataLoader
|
|
28
21
|
from .span_descendants import SpanDescendantsDataLoader
|
|
29
22
|
from .span_evaluations import SpanEvaluationsDataLoader
|
|
30
|
-
from .span_projects import SpanProjectsDataLoader
|
|
31
23
|
from .token_counts import TokenCountCache, TokenCountDataLoader
|
|
32
24
|
from .trace_evaluations import TraceEvaluationsDataLoader
|
|
33
|
-
from .trace_row_ids import TraceRowIdsDataLoader
|
|
34
25
|
|
|
35
26
|
__all__ = [
|
|
36
27
|
"CacheForDataLoaders",
|
|
37
|
-
"DatasetExampleRevisionsDataLoader",
|
|
38
|
-
"DatasetExampleSpansDataLoader",
|
|
39
28
|
"DocumentEvaluationSummaryDataLoader",
|
|
40
29
|
"DocumentEvaluationsDataLoader",
|
|
41
30
|
"DocumentRetrievalMetricsDataLoader",
|
|
42
31
|
"EvaluationSummaryDataLoader",
|
|
43
|
-
"ExperimentAnnotationSummaryDataLoader",
|
|
44
|
-
"ExperimentErrorRatesDataLoader",
|
|
45
|
-
"ExperimentRunCountsDataLoader",
|
|
46
|
-
"ExperimentSequenceNumberDataLoader",
|
|
47
32
|
"LatencyMsQuantileDataLoader",
|
|
48
33
|
"MinStartOrMaxEndTimeDataLoader",
|
|
49
34
|
"RecordCountDataLoader",
|
|
50
35
|
"SpanDescendantsDataLoader",
|
|
51
36
|
"SpanEvaluationsDataLoader",
|
|
52
|
-
"SpanProjectsDataLoader",
|
|
53
37
|
"TokenCountDataLoader",
|
|
54
38
|
"TraceEvaluationsDataLoader",
|
|
55
|
-
"TraceRowIdsDataLoader",
|
|
56
|
-
"ProjectByNameDataLoader",
|
|
57
39
|
]
|
|
58
40
|
|
|
59
41
|
|
|
@@ -9,7 +9,7 @@ from typing import (
|
|
|
9
9
|
from aioitertools.itertools import groupby
|
|
10
10
|
from sqlalchemy import select
|
|
11
11
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
|
-
from sqlalchemy.orm import
|
|
12
|
+
from sqlalchemy.orm import contains_eager
|
|
13
13
|
from strawberry.dataloader import DataLoader
|
|
14
14
|
from typing_extensions import TypeAlias
|
|
15
15
|
|
|
@@ -52,7 +52,8 @@ class SpanDescendantsDataLoader(DataLoader[Key, Result]):
|
|
|
52
52
|
stmt = (
|
|
53
53
|
select(descendant_ids.c[root_id_label], models.Span)
|
|
54
54
|
.join(descendant_ids, models.Span.id == descendant_ids.c.id)
|
|
55
|
-
.
|
|
55
|
+
.join(models.Trace)
|
|
56
|
+
.options(contains_eager(models.Span.trace))
|
|
56
57
|
.order_by(descendant_ids.c[root_id_label])
|
|
57
58
|
)
|
|
58
59
|
results: Dict[SpanId, Result] = {key: [] for key in keys}
|