arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (118) hide show
  1. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/METADATA +5 -5
  2. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/RECORD +56 -117
  3. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/WHEEL +1 -1
  4. phoenix/__init__.py +27 -0
  5. phoenix/config.py +7 -21
  6. phoenix/core/model.py +25 -25
  7. phoenix/core/model_schema.py +62 -64
  8. phoenix/core/model_schema_adapter.py +25 -27
  9. phoenix/db/bulk_inserter.py +14 -54
  10. phoenix/db/insertion/evaluation.py +6 -6
  11. phoenix/db/insertion/helpers.py +2 -13
  12. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +28 -2
  13. phoenix/db/models.py +4 -236
  14. phoenix/inferences/fixtures.py +23 -23
  15. phoenix/inferences/inferences.py +7 -7
  16. phoenix/inferences/validation.py +1 -1
  17. phoenix/server/api/context.py +0 -18
  18. phoenix/server/api/dataloaders/__init__.py +0 -18
  19. phoenix/server/api/dataloaders/span_descendants.py +3 -2
  20. phoenix/server/api/routers/v1/__init__.py +2 -77
  21. phoenix/server/api/routers/v1/evaluations.py +2 -4
  22. phoenix/server/api/routers/v1/spans.py +1 -3
  23. phoenix/server/api/routers/v1/traces.py +4 -1
  24. phoenix/server/api/schema.py +303 -2
  25. phoenix/server/api/types/Cluster.py +19 -19
  26. phoenix/server/api/types/Dataset.py +63 -282
  27. phoenix/server/api/types/DatasetRole.py +23 -0
  28. phoenix/server/api/types/Dimension.py +29 -30
  29. phoenix/server/api/types/EmbeddingDimension.py +34 -40
  30. phoenix/server/api/types/Event.py +16 -16
  31. phoenix/server/api/{mutations/export_events_mutations.py → types/ExportEventsMutation.py} +14 -17
  32. phoenix/server/api/types/Model.py +42 -43
  33. phoenix/server/api/types/Project.py +12 -26
  34. phoenix/server/api/types/Span.py +2 -79
  35. phoenix/server/api/types/TimeSeries.py +6 -6
  36. phoenix/server/api/types/Trace.py +4 -15
  37. phoenix/server/api/types/UMAPPoints.py +1 -1
  38. phoenix/server/api/types/node.py +111 -5
  39. phoenix/server/api/types/pagination.py +52 -10
  40. phoenix/server/app.py +49 -101
  41. phoenix/server/main.py +27 -49
  42. phoenix/server/openapi/docs.py +0 -3
  43. phoenix/server/static/index.js +2595 -3523
  44. phoenix/server/templates/index.html +0 -1
  45. phoenix/services.py +15 -15
  46. phoenix/session/client.py +21 -438
  47. phoenix/session/session.py +37 -47
  48. phoenix/trace/exporter.py +9 -14
  49. phoenix/trace/fixtures.py +7 -133
  50. phoenix/trace/schemas.py +2 -1
  51. phoenix/trace/span_evaluations.py +3 -3
  52. phoenix/trace/trace_dataset.py +6 -6
  53. phoenix/version.py +1 -1
  54. phoenix/datasets/__init__.py +0 -0
  55. phoenix/datasets/evaluators/__init__.py +0 -18
  56. phoenix/datasets/evaluators/code_evaluators.py +0 -99
  57. phoenix/datasets/evaluators/llm_evaluators.py +0 -244
  58. phoenix/datasets/evaluators/utils.py +0 -292
  59. phoenix/datasets/experiments.py +0 -550
  60. phoenix/datasets/tracing.py +0 -85
  61. phoenix/datasets/types.py +0 -178
  62. phoenix/db/insertion/dataset.py +0 -237
  63. phoenix/db/migrations/types.py +0 -29
  64. phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -291
  65. phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -100
  66. phoenix/server/api/dataloaders/dataset_example_spans.py +0 -43
  67. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -85
  68. phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
  69. phoenix/server/api/dataloaders/experiment_run_counts.py +0 -42
  70. phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -49
  71. phoenix/server/api/dataloaders/project_by_name.py +0 -31
  72. phoenix/server/api/dataloaders/span_projects.py +0 -33
  73. phoenix/server/api/dataloaders/trace_row_ids.py +0 -39
  74. phoenix/server/api/helpers/dataset_helpers.py +0 -179
  75. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -16
  76. phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -14
  77. phoenix/server/api/input_types/ClearProjectInput.py +0 -15
  78. phoenix/server/api/input_types/CreateDatasetInput.py +0 -12
  79. phoenix/server/api/input_types/DatasetExampleInput.py +0 -14
  80. phoenix/server/api/input_types/DatasetSort.py +0 -17
  81. phoenix/server/api/input_types/DatasetVersionSort.py +0 -16
  82. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -13
  83. phoenix/server/api/input_types/DeleteDatasetInput.py +0 -7
  84. phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -9
  85. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -35
  86. phoenix/server/api/input_types/PatchDatasetInput.py +0 -14
  87. phoenix/server/api/mutations/__init__.py +0 -13
  88. phoenix/server/api/mutations/auth.py +0 -11
  89. phoenix/server/api/mutations/dataset_mutations.py +0 -520
  90. phoenix/server/api/mutations/experiment_mutations.py +0 -65
  91. phoenix/server/api/mutations/project_mutations.py +0 -47
  92. phoenix/server/api/openapi/__init__.py +0 -0
  93. phoenix/server/api/openapi/main.py +0 -6
  94. phoenix/server/api/openapi/schema.py +0 -16
  95. phoenix/server/api/queries.py +0 -503
  96. phoenix/server/api/routers/v1/dataset_examples.py +0 -178
  97. phoenix/server/api/routers/v1/datasets.py +0 -965
  98. phoenix/server/api/routers/v1/experiment_evaluations.py +0 -66
  99. phoenix/server/api/routers/v1/experiment_runs.py +0 -108
  100. phoenix/server/api/routers/v1/experiments.py +0 -174
  101. phoenix/server/api/types/AnnotatorKind.py +0 -10
  102. phoenix/server/api/types/CreateDatasetPayload.py +0 -8
  103. phoenix/server/api/types/DatasetExample.py +0 -85
  104. phoenix/server/api/types/DatasetExampleRevision.py +0 -34
  105. phoenix/server/api/types/DatasetVersion.py +0 -14
  106. phoenix/server/api/types/ExampleRevisionInterface.py +0 -14
  107. phoenix/server/api/types/Experiment.py +0 -140
  108. phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -13
  109. phoenix/server/api/types/ExperimentComparison.py +0 -19
  110. phoenix/server/api/types/ExperimentRun.py +0 -91
  111. phoenix/server/api/types/ExperimentRunAnnotation.py +0 -57
  112. phoenix/server/api/types/Inferences.py +0 -80
  113. phoenix/server/api/types/InferencesRole.py +0 -23
  114. phoenix/utilities/json.py +0 -61
  115. phoenix/utilities/re.py +0 -50
  116. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/IP_NOTICE +0 -0
  117. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/LICENSE +0 -0
  118. /phoenix/server/api/{helpers/__init__.py → helpers.py} +0 -0
@@ -6,11 +6,13 @@ Create Date: 2024-04-03 19:41:48.871555
6
6
 
7
7
  """
8
8
 
9
- from typing import Sequence, Union
9
+ from typing import Any, Sequence, Union
10
10
 
11
11
  import sqlalchemy as sa
12
12
  from alembic import op
13
- from phoenix.db.migrations.types import JSON_
13
+ from sqlalchemy import JSON
14
+ from sqlalchemy.dialects import postgresql
15
+ from sqlalchemy.ext.compiler import compiles
14
16
 
15
17
  # revision identifiers, used by Alembic.
16
18
  revision: str = "cf03bd6bae1d"
@@ -19,6 +21,30 @@ branch_labels: Union[str, Sequence[str], None] = None
19
21
  depends_on: Union[str, Sequence[str], None] = None
20
22
 
21
23
 
24
+ class JSONB(JSON):
25
+ # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
26
+ __visit_name__ = "JSONB"
27
+
28
+
29
+ @compiles(JSONB, "sqlite") # type: ignore
30
+ def _(*args: Any, **kwargs: Any) -> str:
31
+ # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
32
+ return "JSONB"
33
+
34
+
35
+ JSON_ = (
36
+ JSON()
37
+ .with_variant(
38
+ postgresql.JSONB(), # type: ignore
39
+ "postgresql",
40
+ )
41
+ .with_variant(
42
+ JSONB(),
43
+ "sqlite",
44
+ )
45
+ )
46
+
47
+
22
48
  def upgrade() -> None:
23
49
  projects_table = op.create_table(
24
50
  "projects",
phoenix/db/models.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime, timezone
2
- from typing import Any, Dict, List, Optional, TypedDict
2
+ from typing import Any, Dict, List, Optional
3
3
 
4
4
  from sqlalchemy import (
5
5
  JSON,
@@ -15,14 +15,12 @@ from sqlalchemy import (
15
15
  String,
16
16
  TypeDecorator,
17
17
  UniqueConstraint,
18
- case,
19
18
  func,
20
19
  insert,
21
- select,
22
20
  text,
23
21
  )
24
22
  from sqlalchemy.dialects import postgresql
25
- from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
23
+ from sqlalchemy.ext.asyncio import AsyncEngine
26
24
  from sqlalchemy.ext.compiler import compiles
27
25
  from sqlalchemy.ext.hybrid import hybrid_property
28
26
  from sqlalchemy.orm import (
@@ -61,24 +59,6 @@ JSON_ = (
61
59
  )
62
60
 
63
61
 
64
- class JsonDict(TypeDecorator[Dict[str, Any]]):
65
- # See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
66
- cache_ok = True
67
- impl = JSON_
68
-
69
- def process_bind_param(self, value: Optional[Dict[str, Any]], _: Dialect) -> Dict[str, Any]:
70
- return value if isinstance(value, dict) else {}
71
-
72
-
73
- class JsonList(TypeDecorator[List[Any]]):
74
- # See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
75
- cache_ok = True
76
- impl = JSON_
77
-
78
- def process_bind_param(self, value: Optional[List[Any]], _: Dialect) -> List[Any]:
79
- return value if isinstance(value, list) else []
80
-
81
-
82
62
  class UtcTimeStamp(TypeDecorator[datetime]):
83
63
  # See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
84
64
  cache_ok = True
@@ -91,10 +71,6 @@ class UtcTimeStamp(TypeDecorator[datetime]):
91
71
  return normalize_datetime(value, timezone.utc)
92
72
 
93
73
 
94
- class ExperimentResult(TypedDict, total=False):
95
- result: Dict[str, Any]
96
-
97
-
98
74
  class Base(DeclarativeBase):
99
75
  # Enforce best practices for naming constraints
100
76
  # https://alembic.sqlalchemy.org/en/latest/naming.html#integration-of-naming-conventions-into-operations-autogenerate
@@ -108,9 +84,8 @@ class Base(DeclarativeBase):
108
84
  }
109
85
  )
110
86
  type_annotation_map = {
111
- Dict[str, Any]: JsonDict,
112
- List[Dict[str, Any]]: JsonList,
113
- ExperimentResult: JsonDict,
87
+ Dict[str, Any]: JSON_,
88
+ List[Dict[str, Any]]: JSON_,
114
89
  }
115
90
 
116
91
 
@@ -179,10 +154,6 @@ class Trace(Base):
179
154
  cascade="all, delete-orphan",
180
155
  uselist=True,
181
156
  )
182
- experiment_runs: Mapped[List["ExperimentRun"]] = relationship(
183
- primaryjoin="foreign(ExperimentRun.trace_id) == Trace.trace_id",
184
- back_populates="trace",
185
- )
186
157
  __table_args__ = (
187
158
  UniqueConstraint(
188
159
  "trace_id",
@@ -232,7 +203,6 @@ class Span(Base):
232
203
 
233
204
  trace: Mapped["Trace"] = relationship("Trace", back_populates="spans")
234
205
  document_annotations: Mapped[List["DocumentAnnotation"]] = relationship(back_populates="span")
235
- dataset_examples: Mapped[List["DatasetExample"]] = relationship(back_populates="span")
236
206
 
237
207
  __table_args__ = (
238
208
  UniqueConstraint(
@@ -406,205 +376,3 @@ class DocumentAnnotation(Base):
406
376
  "document_position",
407
377
  ),
408
378
  )
409
-
410
-
411
- class Dataset(Base):
412
- __tablename__ = "datasets"
413
- id: Mapped[int] = mapped_column(primary_key=True)
414
- name: Mapped[str] = mapped_column(unique=True)
415
- description: Mapped[Optional[str]]
416
- metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
417
- created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
418
- updated_at: Mapped[datetime] = mapped_column(
419
- UtcTimeStamp, server_default=func.now(), onupdate=func.now()
420
- )
421
-
422
- @hybrid_property
423
- def example_count(self) -> Optional[int]:
424
- if hasattr(self, "_example_count_value"):
425
- assert isinstance(self._example_count_value, int)
426
- return self._example_count_value
427
- return None
428
-
429
- @example_count.inplace.expression
430
- def _example_count(cls) -> ColumnElement[int]:
431
- return (
432
- select(
433
- func.sum(
434
- case(
435
- (DatasetExampleRevision.revision_kind == "CREATE", 1),
436
- (DatasetExampleRevision.revision_kind == "DELETE", -1),
437
- else_=0,
438
- )
439
- )
440
- )
441
- .select_from(DatasetExampleRevision)
442
- .join(
443
- DatasetExample,
444
- onclause=DatasetExample.id == DatasetExampleRevision.dataset_example_id,
445
- )
446
- .filter(DatasetExample.dataset_id == cls.id)
447
- .label("example_count")
448
- )
449
-
450
- async def load_example_count(self, session: AsyncSession) -> None:
451
- if not hasattr(self, "_example_count_value"):
452
- self._example_count_value = await session.scalar(
453
- select(
454
- func.sum(
455
- case(
456
- (DatasetExampleRevision.revision_kind == "CREATE", 1),
457
- (DatasetExampleRevision.revision_kind == "DELETE", -1),
458
- else_=0,
459
- )
460
- )
461
- )
462
- .select_from(DatasetExampleRevision)
463
- .join(
464
- DatasetExample,
465
- onclause=DatasetExample.id == DatasetExampleRevision.dataset_example_id,
466
- )
467
- .filter(DatasetExample.dataset_id == self.id)
468
- )
469
-
470
-
471
- class DatasetVersion(Base):
472
- __tablename__ = "dataset_versions"
473
- id: Mapped[int] = mapped_column(primary_key=True)
474
- dataset_id: Mapped[int] = mapped_column(
475
- ForeignKey("datasets.id", ondelete="CASCADE"),
476
- index=True,
477
- )
478
- description: Mapped[Optional[str]]
479
- metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
480
- created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
481
-
482
-
483
- class DatasetExample(Base):
484
- __tablename__ = "dataset_examples"
485
- id: Mapped[int] = mapped_column(primary_key=True)
486
- dataset_id: Mapped[int] = mapped_column(
487
- ForeignKey("datasets.id", ondelete="CASCADE"),
488
- index=True,
489
- )
490
- span_rowid: Mapped[Optional[int]] = mapped_column(
491
- ForeignKey("spans.id", ondelete="SET NULL"),
492
- index=True,
493
- nullable=True,
494
- )
495
- created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
496
-
497
- span: Mapped[Optional[Span]] = relationship(back_populates="dataset_examples")
498
-
499
-
500
- class DatasetExampleRevision(Base):
501
- __tablename__ = "dataset_example_revisions"
502
- id: Mapped[int] = mapped_column(primary_key=True)
503
- dataset_example_id: Mapped[int] = mapped_column(
504
- ForeignKey("dataset_examples.id", ondelete="CASCADE"),
505
- index=True,
506
- )
507
- dataset_version_id: Mapped[int] = mapped_column(
508
- ForeignKey("dataset_versions.id", ondelete="CASCADE"),
509
- index=True,
510
- )
511
- input: Mapped[Dict[str, Any]]
512
- output: Mapped[Dict[str, Any]]
513
- metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
514
- revision_kind: Mapped[str] = mapped_column(
515
- CheckConstraint(
516
- "revision_kind IN ('CREATE', 'PATCH', 'DELETE')", name="valid_revision_kind"
517
- ),
518
- )
519
- created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
520
-
521
- __table_args__ = (
522
- UniqueConstraint(
523
- "dataset_example_id",
524
- "dataset_version_id",
525
- ),
526
- )
527
-
528
-
529
- class Experiment(Base):
530
- __tablename__ = "experiments"
531
- id: Mapped[int] = mapped_column(primary_key=True)
532
- dataset_id: Mapped[int] = mapped_column(
533
- ForeignKey("datasets.id", ondelete="CASCADE"),
534
- index=True,
535
- )
536
- dataset_version_id: Mapped[int] = mapped_column(
537
- ForeignKey("dataset_versions.id", ondelete="CASCADE"),
538
- index=True,
539
- )
540
- name: Mapped[str]
541
- description: Mapped[Optional[str]]
542
- repetitions: Mapped[int]
543
- metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
544
- project_name: Mapped[Optional[str]] = mapped_column(index=True)
545
- created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
546
- updated_at: Mapped[datetime] = mapped_column(
547
- UtcTimeStamp, server_default=func.now(), onupdate=func.now()
548
- )
549
-
550
-
551
- class ExperimentRun(Base):
552
- __tablename__ = "experiment_runs"
553
- id: Mapped[int] = mapped_column(primary_key=True)
554
- experiment_id: Mapped[int] = mapped_column(
555
- ForeignKey("experiments.id", ondelete="CASCADE"),
556
- index=True,
557
- )
558
- dataset_example_id: Mapped[int] = mapped_column(
559
- ForeignKey("dataset_examples.id", ondelete="CASCADE"),
560
- index=True,
561
- )
562
- repetition_number: Mapped[int]
563
- trace_id: Mapped[Optional[str]]
564
- output: Mapped[ExperimentResult]
565
- start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
566
- end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
567
- prompt_token_count: Mapped[Optional[int]]
568
- completion_token_count: Mapped[Optional[int]]
569
- error: Mapped[Optional[str]]
570
-
571
- trace: Mapped["Trace"] = relationship(
572
- primaryjoin="foreign(ExperimentRun.trace_id) == Trace.trace_id",
573
- back_populates="experiment_runs",
574
- )
575
-
576
- __table_args__ = (
577
- UniqueConstraint(
578
- "experiment_id",
579
- "dataset_example_id",
580
- "repetition_number",
581
- ),
582
- )
583
-
584
-
585
- class ExperimentRunAnnotation(Base):
586
- __tablename__ = "experiment_run_annotations"
587
- id: Mapped[int] = mapped_column(primary_key=True)
588
- experiment_run_id: Mapped[int] = mapped_column(
589
- ForeignKey("experiment_runs.id", ondelete="CASCADE"),
590
- index=True,
591
- )
592
- name: Mapped[str]
593
- annotator_kind: Mapped[str] = mapped_column(
594
- CheckConstraint("annotator_kind IN ('LLM', 'CODE', 'HUMAN')", name="valid_annotator_kind"),
595
- )
596
- label: Mapped[Optional[str]]
597
- score: Mapped[Optional[float]]
598
- explanation: Mapped[Optional[str]]
599
- trace_id: Mapped[Optional[str]]
600
- error: Mapped[Optional[str]]
601
- metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
602
- start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
603
- end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
604
-
605
- __table_args__ = (
606
- UniqueConstraint(
607
- "experiment_run_id",
608
- "name",
609
- ),
610
- )
@@ -9,7 +9,7 @@ from urllib.parse import quote, urljoin
9
9
 
10
10
  from pandas import read_parquet
11
11
 
12
- from phoenix.config import INFERENCES_DIR
12
+ from phoenix.config import DATASET_DIR
13
13
  from phoenix.inferences.inferences import Inferences
14
14
  from phoenix.inferences.schema import (
15
15
  EmbeddingColumnNames,
@@ -20,7 +20,7 @@ from phoenix.inferences.schema import (
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
- class InferencesRole(Enum):
23
+ class DatasetRole(Enum):
24
24
  PRIMARY = auto()
25
25
  REFERENCE = auto()
26
26
  CORPUS = auto()
@@ -39,11 +39,11 @@ class Fixture:
39
39
  corpus_file_name: Optional[str] = None
40
40
  corpus_schema: Optional[Schema] = None
41
41
 
42
- def paths(self) -> Iterator[Tuple[InferencesRole, Path]]:
42
+ def paths(self) -> Iterator[Tuple[DatasetRole, Path]]:
43
43
  return (
44
44
  (role, Path(self.prefix) / name)
45
45
  for role, name in zip(
46
- InferencesRole,
46
+ DatasetRole,
47
47
  (
48
48
  self.primary_file_name,
49
49
  self.reference_file_name,
@@ -413,41 +413,41 @@ FIXTURES: Tuple[Fixture, ...] = (
413
413
  NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
414
414
 
415
415
 
416
- def get_inferences(
416
+ def get_datasets(
417
417
  fixture_name: str,
418
418
  no_internet: bool = False,
419
419
  ) -> Tuple[Inferences, Optional[Inferences], Optional[Inferences]]:
420
420
  """
421
- Downloads primary and reference inferences for a fixture if they are not found
421
+ Downloads primary and reference datasets for a fixture if they are not found
422
422
  locally.
423
423
  """
424
424
  fixture = _get_fixture_by_name(fixture_name=fixture_name)
425
425
  if no_internet:
426
- paths = {role: INFERENCES_DIR / path for role, path in fixture.paths()}
426
+ paths = {role: DATASET_DIR / path for role, path in fixture.paths()}
427
427
  else:
428
- paths = dict(_download(fixture, INFERENCES_DIR))
429
- primary_inferences = Inferences(
430
- read_parquet(paths[InferencesRole.PRIMARY]),
428
+ paths = dict(_download(fixture, DATASET_DIR))
429
+ primary_dataset = Inferences(
430
+ read_parquet(paths[DatasetRole.PRIMARY]),
431
431
  fixture.primary_schema,
432
432
  "production",
433
433
  )
434
- reference_inferences = None
434
+ reference_dataset = None
435
435
  if fixture.reference_file_name is not None:
436
- reference_inferences = Inferences(
437
- read_parquet(paths[InferencesRole.REFERENCE]),
436
+ reference_dataset = Inferences(
437
+ read_parquet(paths[DatasetRole.REFERENCE]),
438
438
  fixture.reference_schema
439
439
  if fixture.reference_schema is not None
440
440
  else fixture.primary_schema,
441
441
  "training",
442
442
  )
443
- corpus_inferences = None
443
+ corpus_dataset = None
444
444
  if fixture.corpus_file_name is not None:
445
- corpus_inferences = Inferences(
446
- read_parquet(paths[InferencesRole.CORPUS]),
445
+ corpus_dataset = Inferences(
446
+ read_parquet(paths[DatasetRole.CORPUS]),
447
447
  fixture.corpus_schema,
448
448
  "knowledge_base",
449
449
  )
450
- return primary_inferences, reference_inferences, corpus_inferences
450
+ return primary_dataset, reference_dataset, corpus_dataset
451
451
 
452
452
 
453
453
  def _get_fixture_by_name(fixture_name: str) -> Fixture:
@@ -496,14 +496,14 @@ def load_example(use_case: str) -> ExampleInferences:
496
496
 
497
497
  """
498
498
  fixture = _get_fixture_by_name(use_case)
499
- primary_inferences, reference_inferences, corpus_inferences = get_inferences(use_case)
499
+ primary_dataset, reference_dataset, corpus_dataset = get_datasets(use_case)
500
500
  print(f"📥 Loaded {use_case} example datasets.")
501
501
  print("ℹ️ About this use-case:")
502
502
  print(fixture.description)
503
503
  return ExampleInferences(
504
- primary=primary_inferences,
505
- reference=reference_inferences,
506
- corpus=corpus_inferences,
504
+ primary=primary_dataset,
505
+ reference=reference_dataset,
506
+ corpus=corpus_dataset,
507
507
  )
508
508
 
509
509
 
@@ -544,7 +544,7 @@ class GCSAssets(NamedTuple):
544
544
  )
545
545
 
546
546
 
547
- def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[InferencesRole, Path]]:
547
+ def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[DatasetRole, Path]]:
548
548
  for role, path in fixture.paths():
549
549
  yield role, GCSAssets().metadata(path).save_artifact(location)
550
550
 
@@ -556,5 +556,5 @@ if __name__ == "__main__":
556
556
  for fixture in FIXTURES:
557
557
  start_time = time.time()
558
558
  print(f"getting {fixture.name}", end="...")
559
- dict(_download(fixture, INFERENCES_DIR))
559
+ dict(_download(fixture, DATASET_DIR))
560
560
  print(f"done ({time.time() - start_time:.2f}s)")
@@ -15,7 +15,7 @@ from pandas.api.types import (
15
15
  )
16
16
  from typing_extensions import TypeAlias
17
17
 
18
- from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
18
+ from phoenix.config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
19
19
  from phoenix.datetime_utils import normalize_timestamps
20
20
  from phoenix.utilities.deprecation import deprecated
21
21
 
@@ -31,7 +31,7 @@ from .schema import (
31
31
  SchemaFieldName,
32
32
  SchemaFieldValue,
33
33
  )
34
- from .validation import validate_inferences_inputs
34
+ from .validation import validate_dataset_inputs
35
35
 
36
36
  logger = logging.getLogger(__name__)
37
37
 
@@ -62,7 +62,7 @@ class Inferences:
62
62
 
63
63
  Examples
64
64
  --------
65
- >>> primary_inferences = px.Inferences(
65
+ >>> primary_dataset = px.Inferences(
66
66
  >>> dataframe=production_dataframe, schema=schema, name="primary"
67
67
  >>> )
68
68
  """
@@ -81,7 +81,7 @@ class Inferences:
81
81
  # allow for schema like objects
82
82
  if not isinstance(schema, Schema):
83
83
  schema = _get_schema_from_unknown_schema_param(schema)
84
- errors = validate_inferences_inputs(
84
+ errors = validate_dataset_inputs(
85
85
  dataframe=dataframe,
86
86
  schema=schema,
87
87
  )
@@ -95,7 +95,7 @@ class Inferences:
95
95
  self.__dataframe: DataFrame = dataframe
96
96
  self.__schema: Schema = schema
97
97
  self.__name: str = (
98
- name if name is not None else f"{GENERATED_INFERENCES_NAME_PREFIX}{str(uuid.uuid4())}"
98
+ name if name is not None else f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
99
99
  )
100
100
  self._is_empty = self.dataframe.empty
101
101
  logger.info(f"""Dataset: {self.__name} initialized""")
@@ -118,7 +118,7 @@ class Inferences:
118
118
  @classmethod
119
119
  def from_name(cls, name: str) -> "Inferences":
120
120
  """Retrieves a dataset by name from the file system"""
121
- directory = INFERENCES_DIR / name
121
+ directory = DATASET_DIR / name
122
122
  df = read_parquet(directory / cls._data_file_name)
123
123
  with open(directory / cls._schema_file_name) as schema_file:
124
124
  schema_json = schema_file.read()
@@ -127,7 +127,7 @@ class Inferences:
127
127
 
128
128
  def to_disc(self) -> None:
129
129
  """writes the data and schema to disc"""
130
- directory = INFERENCES_DIR / self.name
130
+ directory = DATASET_DIR / self.name
131
131
  directory.mkdir(parents=True, exist_ok=True)
132
132
  self.dataframe.to_parquet(
133
133
  directory / self._data_file_name,
@@ -34,7 +34,7 @@ def _check_valid_schema(schema: Schema) -> List[err.ValidationError]:
34
34
  return []
35
35
 
36
36
 
37
- def validate_inferences_inputs(dataframe: DataFrame, schema: Schema) -> List[err.ValidationError]:
37
+ def validate_dataset_inputs(dataframe: DataFrame, schema: Schema) -> List[err.ValidationError]:
38
38
  errors = _check_missing_columns(dataframe, schema)
39
39
  if errors:
40
40
  return errors
@@ -12,51 +12,33 @@ from typing_extensions import TypeAlias
12
12
  from phoenix.core.model_schema import Model
13
13
  from phoenix.server.api.dataloaders import (
14
14
  CacheForDataLoaders,
15
- DatasetExampleRevisionsDataLoader,
16
- DatasetExampleSpansDataLoader,
17
15
  DocumentEvaluationsDataLoader,
18
16
  DocumentEvaluationSummaryDataLoader,
19
17
  DocumentRetrievalMetricsDataLoader,
20
18
  EvaluationSummaryDataLoader,
21
- ExperimentAnnotationSummaryDataLoader,
22
- ExperimentErrorRatesDataLoader,
23
- ExperimentRunCountsDataLoader,
24
- ExperimentSequenceNumberDataLoader,
25
19
  LatencyMsQuantileDataLoader,
26
20
  MinStartOrMaxEndTimeDataLoader,
27
- ProjectByNameDataLoader,
28
21
  RecordCountDataLoader,
29
22
  SpanDescendantsDataLoader,
30
23
  SpanEvaluationsDataLoader,
31
- SpanProjectsDataLoader,
32
24
  TokenCountDataLoader,
33
25
  TraceEvaluationsDataLoader,
34
- TraceRowIdsDataLoader,
35
26
  )
36
27
 
37
28
 
38
29
  @dataclass
39
30
  class DataLoaders:
40
- dataset_example_revisions: DatasetExampleRevisionsDataLoader
41
- dataset_example_spans: DatasetExampleSpansDataLoader
42
31
  document_evaluation_summaries: DocumentEvaluationSummaryDataLoader
43
32
  document_evaluations: DocumentEvaluationsDataLoader
44
33
  document_retrieval_metrics: DocumentRetrievalMetricsDataLoader
45
34
  evaluation_summaries: EvaluationSummaryDataLoader
46
- experiment_annotation_summaries: ExperimentAnnotationSummaryDataLoader
47
- experiment_error_rates: ExperimentErrorRatesDataLoader
48
- experiment_run_counts: ExperimentRunCountsDataLoader
49
- experiment_sequence_number: ExperimentSequenceNumberDataLoader
50
35
  latency_ms_quantile: LatencyMsQuantileDataLoader
51
36
  min_start_or_max_end_times: MinStartOrMaxEndTimeDataLoader
52
37
  record_counts: RecordCountDataLoader
53
38
  span_descendants: SpanDescendantsDataLoader
54
39
  span_evaluations: SpanEvaluationsDataLoader
55
- span_projects: SpanProjectsDataLoader
56
40
  token_counts: TokenCountDataLoader
57
41
  trace_evaluations: TraceEvaluationsDataLoader
58
- trace_row_ids: TraceRowIdsDataLoader
59
- project_by_name: ProjectByNameDataLoader
60
42
 
61
43
 
62
44
  ProjectRowId: TypeAlias = int
@@ -8,8 +8,6 @@ from phoenix.db.insertion.evaluation import (
8
8
  )
9
9
  from phoenix.db.insertion.span import ClearProjectSpansEvent, SpanInsertionEvent
10
10
 
11
- from .dataset_example_revisions import DatasetExampleRevisionsDataLoader
12
- from .dataset_example_spans import DatasetExampleSpansDataLoader
13
11
  from .document_evaluation_summaries import (
14
12
  DocumentEvaluationSummaryCache,
15
13
  DocumentEvaluationSummaryDataLoader,
@@ -17,43 +15,27 @@ from .document_evaluation_summaries import (
17
15
  from .document_evaluations import DocumentEvaluationsDataLoader
18
16
  from .document_retrieval_metrics import DocumentRetrievalMetricsDataLoader
19
17
  from .evaluation_summaries import EvaluationSummaryCache, EvaluationSummaryDataLoader
20
- from .experiment_annotation_summaries import ExperimentAnnotationSummaryDataLoader
21
- from .experiment_error_rates import ExperimentErrorRatesDataLoader
22
- from .experiment_run_counts import ExperimentRunCountsDataLoader
23
- from .experiment_sequence_number import ExperimentSequenceNumberDataLoader
24
18
  from .latency_ms_quantile import LatencyMsQuantileCache, LatencyMsQuantileDataLoader
25
19
  from .min_start_or_max_end_times import MinStartOrMaxEndTimeCache, MinStartOrMaxEndTimeDataLoader
26
- from .project_by_name import ProjectByNameDataLoader
27
20
  from .record_counts import RecordCountCache, RecordCountDataLoader
28
21
  from .span_descendants import SpanDescendantsDataLoader
29
22
  from .span_evaluations import SpanEvaluationsDataLoader
30
- from .span_projects import SpanProjectsDataLoader
31
23
  from .token_counts import TokenCountCache, TokenCountDataLoader
32
24
  from .trace_evaluations import TraceEvaluationsDataLoader
33
- from .trace_row_ids import TraceRowIdsDataLoader
34
25
 
35
26
  __all__ = [
36
27
  "CacheForDataLoaders",
37
- "DatasetExampleRevisionsDataLoader",
38
- "DatasetExampleSpansDataLoader",
39
28
  "DocumentEvaluationSummaryDataLoader",
40
29
  "DocumentEvaluationsDataLoader",
41
30
  "DocumentRetrievalMetricsDataLoader",
42
31
  "EvaluationSummaryDataLoader",
43
- "ExperimentAnnotationSummaryDataLoader",
44
- "ExperimentErrorRatesDataLoader",
45
- "ExperimentRunCountsDataLoader",
46
- "ExperimentSequenceNumberDataLoader",
47
32
  "LatencyMsQuantileDataLoader",
48
33
  "MinStartOrMaxEndTimeDataLoader",
49
34
  "RecordCountDataLoader",
50
35
  "SpanDescendantsDataLoader",
51
36
  "SpanEvaluationsDataLoader",
52
- "SpanProjectsDataLoader",
53
37
  "TokenCountDataLoader",
54
38
  "TraceEvaluationsDataLoader",
55
- "TraceRowIdsDataLoader",
56
- "ProjectByNameDataLoader",
57
39
  ]
58
40
 
59
41
 
@@ -9,7 +9,7 @@ from typing import (
9
9
  from aioitertools.itertools import groupby
10
10
  from sqlalchemy import select
11
11
  from sqlalchemy.ext.asyncio import AsyncSession
12
- from sqlalchemy.orm import joinedload
12
+ from sqlalchemy.orm import contains_eager
13
13
  from strawberry.dataloader import DataLoader
14
14
  from typing_extensions import TypeAlias
15
15
 
@@ -52,7 +52,8 @@ class SpanDescendantsDataLoader(DataLoader[Key, Result]):
52
52
  stmt = (
53
53
  select(descendant_ids.c[root_id_label], models.Span)
54
54
  .join(descendant_ids, models.Span.id == descendant_ids.c.id)
55
- .options(joinedload(models.Span.trace, innerjoin=True).load_only(models.Trace.trace_id))
55
+ .join(models.Trace)
56
+ .options(contains_eager(models.Span.trace))
56
57
  .order_by(descendant_ids.c[root_id_label])
57
58
  )
58
59
  results: Dict[SpanId, Result] = {key: [] for key in keys}