arize-phoenix 4.5.0__py3-none-any.whl → 4.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (123) hide show
  1. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/METADATA +16 -8
  2. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/RECORD +122 -58
  3. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/WHEEL +1 -1
  4. phoenix/__init__.py +0 -27
  5. phoenix/config.py +42 -7
  6. phoenix/core/model.py +25 -25
  7. phoenix/core/model_schema.py +64 -62
  8. phoenix/core/model_schema_adapter.py +27 -25
  9. phoenix/datetime_utils.py +4 -0
  10. phoenix/db/bulk_inserter.py +54 -14
  11. phoenix/db/insertion/dataset.py +237 -0
  12. phoenix/db/insertion/evaluation.py +10 -10
  13. phoenix/db/insertion/helpers.py +17 -14
  14. phoenix/db/insertion/span.py +3 -3
  15. phoenix/db/migrations/types.py +29 -0
  16. phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
  17. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
  18. phoenix/db/models.py +236 -4
  19. phoenix/experiments/__init__.py +6 -0
  20. phoenix/experiments/evaluators/__init__.py +29 -0
  21. phoenix/experiments/evaluators/base.py +153 -0
  22. phoenix/experiments/evaluators/code_evaluators.py +99 -0
  23. phoenix/experiments/evaluators/llm_evaluators.py +244 -0
  24. phoenix/experiments/evaluators/utils.py +186 -0
  25. phoenix/experiments/functions.py +757 -0
  26. phoenix/experiments/tracing.py +85 -0
  27. phoenix/experiments/types.py +753 -0
  28. phoenix/experiments/utils.py +24 -0
  29. phoenix/inferences/fixtures.py +23 -23
  30. phoenix/inferences/inferences.py +7 -7
  31. phoenix/inferences/validation.py +1 -1
  32. phoenix/server/api/context.py +20 -0
  33. phoenix/server/api/dataloaders/__init__.py +20 -0
  34. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  35. phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
  36. phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
  37. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
  38. phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
  39. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  40. phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
  41. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  42. phoenix/server/api/dataloaders/span_descendants.py +2 -3
  43. phoenix/server/api/dataloaders/span_projects.py +33 -0
  44. phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
  45. phoenix/server/api/helpers/dataset_helpers.py +179 -0
  46. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  47. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  48. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  49. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  50. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  51. phoenix/server/api/input_types/DatasetSort.py +17 -0
  52. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  53. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  54. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  55. phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
  56. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  57. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  58. phoenix/server/api/mutations/__init__.py +13 -0
  59. phoenix/server/api/mutations/auth.py +11 -0
  60. phoenix/server/api/mutations/dataset_mutations.py +520 -0
  61. phoenix/server/api/mutations/experiment_mutations.py +65 -0
  62. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
  63. phoenix/server/api/mutations/project_mutations.py +47 -0
  64. phoenix/server/api/openapi/__init__.py +0 -0
  65. phoenix/server/api/openapi/main.py +6 -0
  66. phoenix/server/api/openapi/schema.py +16 -0
  67. phoenix/server/api/queries.py +503 -0
  68. phoenix/server/api/routers/v1/__init__.py +77 -2
  69. phoenix/server/api/routers/v1/dataset_examples.py +178 -0
  70. phoenix/server/api/routers/v1/datasets.py +965 -0
  71. phoenix/server/api/routers/v1/evaluations.py +8 -13
  72. phoenix/server/api/routers/v1/experiment_evaluations.py +143 -0
  73. phoenix/server/api/routers/v1/experiment_runs.py +220 -0
  74. phoenix/server/api/routers/v1/experiments.py +302 -0
  75. phoenix/server/api/routers/v1/spans.py +9 -5
  76. phoenix/server/api/routers/v1/traces.py +1 -4
  77. phoenix/server/api/schema.py +2 -303
  78. phoenix/server/api/types/AnnotatorKind.py +10 -0
  79. phoenix/server/api/types/Cluster.py +19 -19
  80. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  81. phoenix/server/api/types/Dataset.py +282 -63
  82. phoenix/server/api/types/DatasetExample.py +85 -0
  83. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  84. phoenix/server/api/types/DatasetVersion.py +14 -0
  85. phoenix/server/api/types/Dimension.py +30 -29
  86. phoenix/server/api/types/EmbeddingDimension.py +40 -34
  87. phoenix/server/api/types/Event.py +16 -16
  88. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  89. phoenix/server/api/types/Experiment.py +147 -0
  90. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  91. phoenix/server/api/types/ExperimentComparison.py +19 -0
  92. phoenix/server/api/types/ExperimentRun.py +91 -0
  93. phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
  94. phoenix/server/api/types/Inferences.py +80 -0
  95. phoenix/server/api/types/InferencesRole.py +23 -0
  96. phoenix/server/api/types/Model.py +43 -42
  97. phoenix/server/api/types/Project.py +26 -12
  98. phoenix/server/api/types/Span.py +79 -2
  99. phoenix/server/api/types/TimeSeries.py +6 -6
  100. phoenix/server/api/types/Trace.py +15 -4
  101. phoenix/server/api/types/UMAPPoints.py +1 -1
  102. phoenix/server/api/types/node.py +5 -111
  103. phoenix/server/api/types/pagination.py +10 -52
  104. phoenix/server/app.py +103 -49
  105. phoenix/server/main.py +49 -27
  106. phoenix/server/openapi/docs.py +3 -0
  107. phoenix/server/static/index.js +2300 -1294
  108. phoenix/server/templates/index.html +1 -0
  109. phoenix/services.py +15 -15
  110. phoenix/session/client.py +581 -22
  111. phoenix/session/session.py +47 -37
  112. phoenix/trace/exporter.py +14 -9
  113. phoenix/trace/fixtures.py +133 -7
  114. phoenix/trace/schemas.py +1 -2
  115. phoenix/trace/span_evaluations.py +3 -3
  116. phoenix/trace/trace_dataset.py +6 -6
  117. phoenix/utilities/json.py +61 -0
  118. phoenix/utilities/re.py +50 -0
  119. phoenix/version.py +1 -1
  120. phoenix/server/api/types/DatasetRole.py +0 -23
  121. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/IP_NOTICE +0 -0
  122. {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/LICENSE +0 -0
  123. /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
@@ -0,0 +1,291 @@
1
+ """datasets
2
+
3
+ Revision ID: 10460e46d750
4
+ Revises: cf03bd6bae1d
5
+ Create Date: 2024-05-10 11:24:23.985834
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+ from phoenix.db.migrations.types import JSON_
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "10460e46d750"
17
+ down_revision: Union[str, None] = "cf03bd6bae1d"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ op.create_table(
24
+ "datasets",
25
+ sa.Column("id", sa.Integer, primary_key=True),
26
+ sa.Column("name", sa.String, nullable=False, unique=True),
27
+ sa.Column("description", sa.String, nullable=True),
28
+ sa.Column("metadata", JSON_, nullable=False),
29
+ sa.Column(
30
+ "created_at",
31
+ sa.TIMESTAMP(timezone=True),
32
+ nullable=False,
33
+ server_default=sa.func.now(),
34
+ ),
35
+ sa.Column(
36
+ "updated_at",
37
+ sa.TIMESTAMP(timezone=True),
38
+ nullable=False,
39
+ server_default=sa.func.now(),
40
+ onupdate=sa.func.now(),
41
+ ),
42
+ )
43
+ op.create_table(
44
+ "dataset_versions",
45
+ sa.Column("id", sa.Integer, primary_key=True),
46
+ sa.Column(
47
+ "dataset_id",
48
+ sa.Integer,
49
+ sa.ForeignKey("datasets.id", ondelete="CASCADE"),
50
+ nullable=False,
51
+ index=True,
52
+ ),
53
+ sa.Column("description", sa.String, nullable=True),
54
+ sa.Column("metadata", JSON_, nullable=False),
55
+ sa.Column(
56
+ "created_at",
57
+ sa.TIMESTAMP(timezone=True),
58
+ nullable=False,
59
+ server_default=sa.func.now(),
60
+ ),
61
+ )
62
+ op.create_table(
63
+ "dataset_examples",
64
+ sa.Column("id", sa.Integer, primary_key=True),
65
+ sa.Column(
66
+ "dataset_id",
67
+ sa.Integer,
68
+ sa.ForeignKey("datasets.id", ondelete="CASCADE"),
69
+ nullable=False,
70
+ index=True,
71
+ ),
72
+ sa.Column(
73
+ "span_rowid",
74
+ sa.Integer,
75
+ sa.ForeignKey("spans.id", ondelete="SET NULL"),
76
+ nullable=True,
77
+ index=True,
78
+ ),
79
+ sa.Column(
80
+ "created_at",
81
+ sa.TIMESTAMP(timezone=True),
82
+ nullable=False,
83
+ server_default=sa.func.now(),
84
+ ),
85
+ )
86
+ op.create_table(
87
+ "dataset_example_revisions",
88
+ sa.Column("id", sa.Integer, primary_key=True),
89
+ sa.Column(
90
+ "dataset_example_id",
91
+ sa.Integer,
92
+ sa.ForeignKey("dataset_examples.id", ondelete="CASCADE"),
93
+ nullable=False,
94
+ index=True,
95
+ ),
96
+ sa.Column(
97
+ "dataset_version_id",
98
+ sa.Integer,
99
+ sa.ForeignKey("dataset_versions.id", ondelete="CASCADE"),
100
+ nullable=False,
101
+ index=True,
102
+ ),
103
+ sa.Column("input", JSON_, nullable=False),
104
+ sa.Column("output", JSON_, nullable=False),
105
+ sa.Column("metadata", JSON_, nullable=False),
106
+ sa.Column(
107
+ "revision_kind",
108
+ sa.String,
109
+ sa.CheckConstraint(
110
+ "revision_kind IN ('CREATE', 'PATCH', 'DELETE')",
111
+ name="valid_revision_kind",
112
+ ),
113
+ nullable=False,
114
+ ),
115
+ sa.Column(
116
+ "created_at",
117
+ sa.TIMESTAMP(timezone=True),
118
+ nullable=False,
119
+ server_default=sa.func.now(),
120
+ ),
121
+ sa.UniqueConstraint(
122
+ "dataset_example_id",
123
+ "dataset_version_id",
124
+ ),
125
+ )
126
+ op.create_table(
127
+ "experiments",
128
+ sa.Column("id", sa.Integer, primary_key=True),
129
+ sa.Column(
130
+ "dataset_id",
131
+ sa.Integer,
132
+ sa.ForeignKey("datasets.id", ondelete="CASCADE"),
133
+ nullable=False,
134
+ index=True,
135
+ ),
136
+ sa.Column(
137
+ "dataset_version_id",
138
+ sa.Integer,
139
+ sa.ForeignKey("dataset_versions.id", ondelete="CASCADE"),
140
+ nullable=False,
141
+ index=True,
142
+ ),
143
+ sa.Column(
144
+ "name",
145
+ sa.String,
146
+ nullable=False,
147
+ ),
148
+ sa.Column(
149
+ "description",
150
+ sa.String,
151
+ nullable=True,
152
+ ),
153
+ sa.Column(
154
+ "repetitions",
155
+ sa.Integer,
156
+ nullable=False,
157
+ ),
158
+ sa.Column("metadata", JSON_, nullable=False),
159
+ sa.Column("project_name", sa.String, index=True),
160
+ sa.Column(
161
+ "created_at",
162
+ sa.TIMESTAMP(timezone=True),
163
+ nullable=False,
164
+ server_default=sa.func.now(),
165
+ ),
166
+ sa.Column(
167
+ "updated_at",
168
+ sa.TIMESTAMP(timezone=True),
169
+ nullable=False,
170
+ server_default=sa.func.now(),
171
+ onupdate=sa.func.now(),
172
+ ),
173
+ )
174
+ op.create_table(
175
+ "experiment_runs",
176
+ sa.Column("id", sa.Integer, primary_key=True),
177
+ sa.Column(
178
+ "experiment_id",
179
+ sa.Integer,
180
+ sa.ForeignKey("experiments.id", ondelete="CASCADE"),
181
+ nullable=False,
182
+ index=True,
183
+ ),
184
+ sa.Column(
185
+ "dataset_example_id",
186
+ sa.Integer,
187
+ sa.ForeignKey("dataset_examples.id", ondelete="CASCADE"),
188
+ nullable=False,
189
+ index=True,
190
+ ),
191
+ sa.Column(
192
+ "repetition_number",
193
+ sa.Integer,
194
+ nullable=False,
195
+ ),
196
+ sa.Column(
197
+ "trace_id",
198
+ sa.String,
199
+ nullable=True,
200
+ ),
201
+ sa.Column("output", JSON_, nullable=False),
202
+ sa.Column("start_time", sa.TIMESTAMP(timezone=True), nullable=False),
203
+ sa.Column("end_time", sa.TIMESTAMP(timezone=True), nullable=False),
204
+ sa.Column(
205
+ "prompt_token_count",
206
+ sa.Integer,
207
+ nullable=True,
208
+ ),
209
+ sa.Column(
210
+ "completion_token_count",
211
+ sa.Integer,
212
+ nullable=True,
213
+ ),
214
+ sa.Column(
215
+ "error",
216
+ sa.String,
217
+ nullable=True,
218
+ ),
219
+ sa.UniqueConstraint(
220
+ "experiment_id",
221
+ "dataset_example_id",
222
+ "repetition_number",
223
+ ),
224
+ )
225
+ op.create_table(
226
+ "experiment_run_annotations",
227
+ sa.Column("id", sa.Integer, primary_key=True),
228
+ sa.Column(
229
+ "experiment_run_id",
230
+ sa.Integer,
231
+ sa.ForeignKey("experiment_runs.id", ondelete="CASCADE"),
232
+ nullable=False,
233
+ index=True,
234
+ ),
235
+ sa.Column(
236
+ "name",
237
+ sa.String,
238
+ nullable=False,
239
+ ),
240
+ sa.Column(
241
+ "annotator_kind",
242
+ sa.String,
243
+ sa.CheckConstraint(
244
+ "annotator_kind IN ('LLM', 'CODE', 'HUMAN')",
245
+ name="valid_annotator_kind",
246
+ ),
247
+ nullable=False,
248
+ ),
249
+ sa.Column(
250
+ "label",
251
+ sa.String,
252
+ nullable=True,
253
+ ),
254
+ sa.Column(
255
+ "score",
256
+ sa.Float,
257
+ nullable=True,
258
+ ),
259
+ sa.Column(
260
+ "explanation",
261
+ sa.String,
262
+ nullable=True,
263
+ ),
264
+ sa.Column(
265
+ "trace_id",
266
+ sa.String,
267
+ nullable=True,
268
+ ),
269
+ sa.Column(
270
+ "error",
271
+ sa.String,
272
+ nullable=True,
273
+ ),
274
+ sa.Column("metadata", JSON_, nullable=False),
275
+ sa.Column("start_time", sa.TIMESTAMP(timezone=True), nullable=False),
276
+ sa.Column("end_time", sa.TIMESTAMP(timezone=True), nullable=False),
277
+ sa.UniqueConstraint(
278
+ "experiment_run_id",
279
+ "name",
280
+ ),
281
+ )
282
+
283
+
284
+ def downgrade() -> None:
285
+ op.drop_table("experiment_run_annotations")
286
+ op.drop_table("experiment_runs")
287
+ op.drop_table("experiments")
288
+ op.drop_table("dataset_example_revisions")
289
+ op.drop_table("dataset_examples")
290
+ op.drop_table("dataset_versions")
291
+ op.drop_table("datasets")
@@ -6,13 +6,11 @@ Create Date: 2024-04-03 19:41:48.871555
6
6
 
7
7
  """
8
8
 
9
- from typing import Any, Sequence, Union
9
+ from typing import Sequence, Union
10
10
 
11
11
  import sqlalchemy as sa
12
12
  from alembic import op
13
- from sqlalchemy import JSON
14
- from sqlalchemy.dialects import postgresql
15
- from sqlalchemy.ext.compiler import compiles
13
+ from phoenix.db.migrations.types import JSON_
16
14
 
17
15
  # revision identifiers, used by Alembic.
18
16
  revision: str = "cf03bd6bae1d"
@@ -21,30 +19,6 @@ branch_labels: Union[str, Sequence[str], None] = None
21
19
  depends_on: Union[str, Sequence[str], None] = None
22
20
 
23
21
 
24
- class JSONB(JSON):
25
- # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
26
- __visit_name__ = "JSONB"
27
-
28
-
29
- @compiles(JSONB, "sqlite") # type: ignore
30
- def _(*args: Any, **kwargs: Any) -> str:
31
- # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
32
- return "JSONB"
33
-
34
-
35
- JSON_ = (
36
- JSON()
37
- .with_variant(
38
- postgresql.JSONB(), # type: ignore
39
- "postgresql",
40
- )
41
- .with_variant(
42
- JSONB(),
43
- "sqlite",
44
- )
45
- )
46
-
47
-
48
22
  def upgrade() -> None:
49
23
  projects_table = op.create_table(
50
24
  "projects",
phoenix/db/models.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime, timezone
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, TypedDict
3
3
 
4
4
  from sqlalchemy import (
5
5
  JSON,
@@ -15,12 +15,14 @@ from sqlalchemy import (
15
15
  String,
16
16
  TypeDecorator,
17
17
  UniqueConstraint,
18
+ case,
18
19
  func,
19
20
  insert,
21
+ select,
20
22
  text,
21
23
  )
22
24
  from sqlalchemy.dialects import postgresql
23
- from sqlalchemy.ext.asyncio import AsyncEngine
25
+ from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
24
26
  from sqlalchemy.ext.compiler import compiles
25
27
  from sqlalchemy.ext.hybrid import hybrid_property
26
28
  from sqlalchemy.orm import (
@@ -59,6 +61,24 @@ JSON_ = (
59
61
  )
60
62
 
61
63
 
64
+ class JsonDict(TypeDecorator[Dict[str, Any]]):
65
+ # See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
66
+ cache_ok = True
67
+ impl = JSON_
68
+
69
+ def process_bind_param(self, value: Optional[Dict[str, Any]], _: Dialect) -> Dict[str, Any]:
70
+ return value if isinstance(value, dict) else {}
71
+
72
+
73
+ class JsonList(TypeDecorator[List[Any]]):
74
+ # See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
75
+ cache_ok = True
76
+ impl = JSON_
77
+
78
+ def process_bind_param(self, value: Optional[List[Any]], _: Dialect) -> List[Any]:
79
+ return value if isinstance(value, list) else []
80
+
81
+
62
82
  class UtcTimeStamp(TypeDecorator[datetime]):
63
83
  # See # See https://docs.sqlalchemy.org/en/20/core/custom_types.html
64
84
  cache_ok = True
@@ -71,6 +91,10 @@ class UtcTimeStamp(TypeDecorator[datetime]):
71
91
  return normalize_datetime(value, timezone.utc)
72
92
 
73
93
 
94
+ class ExperimentRunOutput(TypedDict, total=False):
95
+ task_output: Any
96
+
97
+
74
98
  class Base(DeclarativeBase):
75
99
  # Enforce best practices for naming constraints
76
100
  # https://alembic.sqlalchemy.org/en/latest/naming.html#integration-of-naming-conventions-into-operations-autogenerate
@@ -84,8 +108,9 @@ class Base(DeclarativeBase):
84
108
  }
85
109
  )
86
110
  type_annotation_map = {
87
- Dict[str, Any]: JSON_,
88
- List[Dict[str, Any]]: JSON_,
111
+ Dict[str, Any]: JsonDict,
112
+ List[Dict[str, Any]]: JsonList,
113
+ ExperimentRunOutput: JsonDict,
89
114
  }
90
115
 
91
116
 
@@ -154,6 +179,10 @@ class Trace(Base):
154
179
  cascade="all, delete-orphan",
155
180
  uselist=True,
156
181
  )
182
+ experiment_runs: Mapped[List["ExperimentRun"]] = relationship(
183
+ primaryjoin="foreign(ExperimentRun.trace_id) == Trace.trace_id",
184
+ back_populates="trace",
185
+ )
157
186
  __table_args__ = (
158
187
  UniqueConstraint(
159
188
  "trace_id",
@@ -203,6 +232,7 @@ class Span(Base):
203
232
 
204
233
  trace: Mapped["Trace"] = relationship("Trace", back_populates="spans")
205
234
  document_annotations: Mapped[List["DocumentAnnotation"]] = relationship(back_populates="span")
235
+ dataset_examples: Mapped[List["DatasetExample"]] = relationship(back_populates="span")
206
236
 
207
237
  __table_args__ = (
208
238
  UniqueConstraint(
@@ -376,3 +406,205 @@ class DocumentAnnotation(Base):
376
406
  "document_position",
377
407
  ),
378
408
  )
409
+
410
+
411
+ class Dataset(Base):
412
+ __tablename__ = "datasets"
413
+ id: Mapped[int] = mapped_column(primary_key=True)
414
+ name: Mapped[str] = mapped_column(unique=True)
415
+ description: Mapped[Optional[str]]
416
+ metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
417
+ created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
418
+ updated_at: Mapped[datetime] = mapped_column(
419
+ UtcTimeStamp, server_default=func.now(), onupdate=func.now()
420
+ )
421
+
422
+ @hybrid_property
423
+ def example_count(self) -> Optional[int]:
424
+ if hasattr(self, "_example_count_value"):
425
+ assert isinstance(self._example_count_value, int)
426
+ return self._example_count_value
427
+ return None
428
+
429
+ @example_count.inplace.expression
430
+ def _example_count(cls) -> ColumnElement[int]:
431
+ return (
432
+ select(
433
+ func.sum(
434
+ case(
435
+ (DatasetExampleRevision.revision_kind == "CREATE", 1),
436
+ (DatasetExampleRevision.revision_kind == "DELETE", -1),
437
+ else_=0,
438
+ )
439
+ )
440
+ )
441
+ .select_from(DatasetExampleRevision)
442
+ .join(
443
+ DatasetExample,
444
+ onclause=DatasetExample.id == DatasetExampleRevision.dataset_example_id,
445
+ )
446
+ .filter(DatasetExample.dataset_id == cls.id)
447
+ .label("example_count")
448
+ )
449
+
450
+ async def load_example_count(self, session: AsyncSession) -> None:
451
+ if not hasattr(self, "_example_count_value"):
452
+ self._example_count_value = await session.scalar(
453
+ select(
454
+ func.sum(
455
+ case(
456
+ (DatasetExampleRevision.revision_kind == "CREATE", 1),
457
+ (DatasetExampleRevision.revision_kind == "DELETE", -1),
458
+ else_=0,
459
+ )
460
+ )
461
+ )
462
+ .select_from(DatasetExampleRevision)
463
+ .join(
464
+ DatasetExample,
465
+ onclause=DatasetExample.id == DatasetExampleRevision.dataset_example_id,
466
+ )
467
+ .filter(DatasetExample.dataset_id == self.id)
468
+ )
469
+
470
+
471
+ class DatasetVersion(Base):
472
+ __tablename__ = "dataset_versions"
473
+ id: Mapped[int] = mapped_column(primary_key=True)
474
+ dataset_id: Mapped[int] = mapped_column(
475
+ ForeignKey("datasets.id", ondelete="CASCADE"),
476
+ index=True,
477
+ )
478
+ description: Mapped[Optional[str]]
479
+ metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
480
+ created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
481
+
482
+
483
+ class DatasetExample(Base):
484
+ __tablename__ = "dataset_examples"
485
+ id: Mapped[int] = mapped_column(primary_key=True)
486
+ dataset_id: Mapped[int] = mapped_column(
487
+ ForeignKey("datasets.id", ondelete="CASCADE"),
488
+ index=True,
489
+ )
490
+ span_rowid: Mapped[Optional[int]] = mapped_column(
491
+ ForeignKey("spans.id", ondelete="SET NULL"),
492
+ index=True,
493
+ nullable=True,
494
+ )
495
+ created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
496
+
497
+ span: Mapped[Optional[Span]] = relationship(back_populates="dataset_examples")
498
+
499
+
500
+ class DatasetExampleRevision(Base):
501
+ __tablename__ = "dataset_example_revisions"
502
+ id: Mapped[int] = mapped_column(primary_key=True)
503
+ dataset_example_id: Mapped[int] = mapped_column(
504
+ ForeignKey("dataset_examples.id", ondelete="CASCADE"),
505
+ index=True,
506
+ )
507
+ dataset_version_id: Mapped[int] = mapped_column(
508
+ ForeignKey("dataset_versions.id", ondelete="CASCADE"),
509
+ index=True,
510
+ )
511
+ input: Mapped[Dict[str, Any]]
512
+ output: Mapped[Dict[str, Any]]
513
+ metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
514
+ revision_kind: Mapped[str] = mapped_column(
515
+ CheckConstraint(
516
+ "revision_kind IN ('CREATE', 'PATCH', 'DELETE')", name="valid_revision_kind"
517
+ ),
518
+ )
519
+ created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
520
+
521
+ __table_args__ = (
522
+ UniqueConstraint(
523
+ "dataset_example_id",
524
+ "dataset_version_id",
525
+ ),
526
+ )
527
+
528
+
529
+ class Experiment(Base):
530
+ __tablename__ = "experiments"
531
+ id: Mapped[int] = mapped_column(primary_key=True)
532
+ dataset_id: Mapped[int] = mapped_column(
533
+ ForeignKey("datasets.id", ondelete="CASCADE"),
534
+ index=True,
535
+ )
536
+ dataset_version_id: Mapped[int] = mapped_column(
537
+ ForeignKey("dataset_versions.id", ondelete="CASCADE"),
538
+ index=True,
539
+ )
540
+ name: Mapped[str]
541
+ description: Mapped[Optional[str]]
542
+ repetitions: Mapped[int]
543
+ metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
544
+ project_name: Mapped[Optional[str]] = mapped_column(index=True)
545
+ created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
546
+ updated_at: Mapped[datetime] = mapped_column(
547
+ UtcTimeStamp, server_default=func.now(), onupdate=func.now()
548
+ )
549
+
550
+
551
+ class ExperimentRun(Base):
552
+ __tablename__ = "experiment_runs"
553
+ id: Mapped[int] = mapped_column(primary_key=True)
554
+ experiment_id: Mapped[int] = mapped_column(
555
+ ForeignKey("experiments.id", ondelete="CASCADE"),
556
+ index=True,
557
+ )
558
+ dataset_example_id: Mapped[int] = mapped_column(
559
+ ForeignKey("dataset_examples.id", ondelete="CASCADE"),
560
+ index=True,
561
+ )
562
+ repetition_number: Mapped[int]
563
+ trace_id: Mapped[Optional[str]]
564
+ output: Mapped[ExperimentRunOutput]
565
+ start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
566
+ end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
567
+ prompt_token_count: Mapped[Optional[int]]
568
+ completion_token_count: Mapped[Optional[int]]
569
+ error: Mapped[Optional[str]]
570
+
571
+ trace: Mapped["Trace"] = relationship(
572
+ primaryjoin="foreign(ExperimentRun.trace_id) == Trace.trace_id",
573
+ back_populates="experiment_runs",
574
+ )
575
+
576
+ __table_args__ = (
577
+ UniqueConstraint(
578
+ "experiment_id",
579
+ "dataset_example_id",
580
+ "repetition_number",
581
+ ),
582
+ )
583
+
584
+
585
+ class ExperimentRunAnnotation(Base):
586
+ __tablename__ = "experiment_run_annotations"
587
+ id: Mapped[int] = mapped_column(primary_key=True)
588
+ experiment_run_id: Mapped[int] = mapped_column(
589
+ ForeignKey("experiment_runs.id", ondelete="CASCADE"),
590
+ index=True,
591
+ )
592
+ name: Mapped[str]
593
+ annotator_kind: Mapped[str] = mapped_column(
594
+ CheckConstraint("annotator_kind IN ('LLM', 'CODE', 'HUMAN')", name="valid_annotator_kind"),
595
+ )
596
+ label: Mapped[Optional[str]]
597
+ score: Mapped[Optional[float]]
598
+ explanation: Mapped[Optional[str]]
599
+ trace_id: Mapped[Optional[str]]
600
+ error: Mapped[Optional[str]]
601
+ metadata_: Mapped[Dict[str, Any]] = mapped_column("metadata")
602
+ start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
603
+ end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
604
+
605
+ __table_args__ = (
606
+ UniqueConstraint(
607
+ "experiment_run_id",
608
+ "name",
609
+ ),
610
+ )
@@ -0,0 +1,6 @@
1
+ from .functions import evaluate_experiment, run_experiment
2
+
3
+ __all__ = [
4
+ "evaluate_experiment",
5
+ "run_experiment",
6
+ ]
@@ -0,0 +1,29 @@
1
+ from phoenix.experiments.evaluators.code_evaluators import (
2
+ ContainsAllKeywords,
3
+ ContainsAnyKeyword,
4
+ ContainsKeyword,
5
+ JSONParsable,
6
+ MatchesRegex,
7
+ )
8
+ from phoenix.experiments.evaluators.llm_evaluators import (
9
+ CoherenceEvaluator,
10
+ ConcisenessEvaluator,
11
+ HelpfulnessEvaluator,
12
+ LLMCriteriaEvaluator,
13
+ RelevanceEvaluator,
14
+ )
15
+ from phoenix.experiments.evaluators.utils import create_evaluator
16
+
17
+ __all__ = [
18
+ "create_evaluator",
19
+ "ContainsAllKeywords",
20
+ "ContainsAnyKeyword",
21
+ "ContainsKeyword",
22
+ "JSONParsable",
23
+ "MatchesRegex",
24
+ "CoherenceEvaluator",
25
+ "ConcisenessEvaluator",
26
+ "LLMCriteriaEvaluator",
27
+ "HelpfulnessEvaluator",
28
+ "RelevanceEvaluator",
29
+ ]