arize-phoenix 4.4.4rc6__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/METADATA +8 -14
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/RECORD +58 -122
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +27 -0
- phoenix/config.py +7 -42
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +62 -64
- phoenix/core/model_schema_adapter.py +25 -27
- phoenix/datetime_utils.py +0 -4
- phoenix/db/bulk_inserter.py +14 -54
- phoenix/db/insertion/evaluation.py +10 -10
- phoenix/db/insertion/helpers.py +14 -17
- phoenix/db/insertion/span.py +3 -3
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +28 -2
- phoenix/db/models.py +4 -236
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +0 -20
- phoenix/server/api/dataloaders/__init__.py +0 -20
- phoenix/server/api/dataloaders/span_descendants.py +3 -2
- phoenix/server/api/routers/v1/__init__.py +2 -77
- phoenix/server/api/routers/v1/evaluations.py +13 -8
- phoenix/server/api/routers/v1/spans.py +5 -9
- phoenix/server/api/routers/v1/traces.py +4 -1
- phoenix/server/api/schema.py +303 -2
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/Dataset.py +63 -282
- phoenix/server/api/types/DatasetRole.py +23 -0
- phoenix/server/api/types/Dimension.py +29 -30
- phoenix/server/api/types/EmbeddingDimension.py +34 -40
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/{mutations/export_events_mutations.py → types/ExportEventsMutation.py} +14 -17
- phoenix/server/api/types/Model.py +42 -43
- phoenix/server/api/types/Project.py +12 -26
- phoenix/server/api/types/Span.py +2 -79
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +4 -15
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +111 -5
- phoenix/server/api/types/pagination.py +52 -10
- phoenix/server/app.py +49 -103
- phoenix/server/main.py +27 -49
- phoenix/server/openapi/docs.py +0 -3
- phoenix/server/static/index.js +1384 -2390
- phoenix/server/templates/index.html +0 -1
- phoenix/services.py +15 -15
- phoenix/session/client.py +23 -611
- phoenix/session/session.py +37 -47
- phoenix/trace/exporter.py +9 -14
- phoenix/trace/fixtures.py +7 -133
- phoenix/trace/schemas.py +2 -1
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/version.py +1 -1
- phoenix/db/insertion/dataset.py +0 -237
- phoenix/db/migrations/types.py +0 -29
- phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -291
- phoenix/experiments/__init__.py +0 -6
- phoenix/experiments/evaluators/__init__.py +0 -29
- phoenix/experiments/evaluators/base.py +0 -153
- phoenix/experiments/evaluators/code_evaluators.py +0 -99
- phoenix/experiments/evaluators/llm_evaluators.py +0 -244
- phoenix/experiments/evaluators/utils.py +0 -189
- phoenix/experiments/functions.py +0 -616
- phoenix/experiments/tracing.py +0 -85
- phoenix/experiments/types.py +0 -722
- phoenix/experiments/utils.py +0 -9
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +0 -54
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -100
- phoenix/server/api/dataloaders/dataset_example_spans.py +0 -43
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -85
- phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
- phoenix/server/api/dataloaders/experiment_run_counts.py +0 -42
- phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -49
- phoenix/server/api/dataloaders/project_by_name.py +0 -31
- phoenix/server/api/dataloaders/span_projects.py +0 -33
- phoenix/server/api/dataloaders/trace_row_ids.py +0 -39
- phoenix/server/api/helpers/dataset_helpers.py +0 -179
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -16
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -14
- phoenix/server/api/input_types/ClearProjectInput.py +0 -15
- phoenix/server/api/input_types/CreateDatasetInput.py +0 -12
- phoenix/server/api/input_types/DatasetExampleInput.py +0 -14
- phoenix/server/api/input_types/DatasetSort.py +0 -17
- phoenix/server/api/input_types/DatasetVersionSort.py +0 -16
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -13
- phoenix/server/api/input_types/DeleteDatasetInput.py +0 -7
- phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -9
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -35
- phoenix/server/api/input_types/PatchDatasetInput.py +0 -14
- phoenix/server/api/mutations/__init__.py +0 -13
- phoenix/server/api/mutations/auth.py +0 -11
- phoenix/server/api/mutations/dataset_mutations.py +0 -520
- phoenix/server/api/mutations/experiment_mutations.py +0 -65
- phoenix/server/api/mutations/project_mutations.py +0 -47
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +0 -6
- phoenix/server/api/openapi/schema.py +0 -16
- phoenix/server/api/queries.py +0 -503
- phoenix/server/api/routers/v1/dataset_examples.py +0 -178
- phoenix/server/api/routers/v1/datasets.py +0 -965
- phoenix/server/api/routers/v1/experiment_evaluations.py +0 -65
- phoenix/server/api/routers/v1/experiment_runs.py +0 -96
- phoenix/server/api/routers/v1/experiments.py +0 -174
- phoenix/server/api/types/AnnotatorKind.py +0 -10
- phoenix/server/api/types/CreateDatasetPayload.py +0 -8
- phoenix/server/api/types/DatasetExample.py +0 -85
- phoenix/server/api/types/DatasetExampleRevision.py +0 -34
- phoenix/server/api/types/DatasetVersion.py +0 -14
- phoenix/server/api/types/ExampleRevisionInterface.py +0 -14
- phoenix/server/api/types/Experiment.py +0 -147
- phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -13
- phoenix/server/api/types/ExperimentComparison.py +0 -19
- phoenix/server/api/types/ExperimentRun.py +0 -91
- phoenix/server/api/types/ExperimentRunAnnotation.py +0 -57
- phoenix/server/api/types/Inferences.py +0 -80
- phoenix/server/api/types/InferencesRole.py +0 -23
- phoenix/utilities/json.py +0 -61
- phoenix/utilities/re.py +0 -50
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc6.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers/__init__.py → helpers.py} +0 -0
phoenix/db/insertion/dataset.py
DELETED
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from dataclasses import dataclass, field
|
|
3
|
-
from datetime import datetime, timezone
|
|
4
|
-
from enum import Enum
|
|
5
|
-
from itertools import chain
|
|
6
|
-
from typing import (
|
|
7
|
-
Any,
|
|
8
|
-
Awaitable,
|
|
9
|
-
Dict,
|
|
10
|
-
FrozenSet,
|
|
11
|
-
Iterable,
|
|
12
|
-
Iterator,
|
|
13
|
-
Mapping,
|
|
14
|
-
Optional,
|
|
15
|
-
Union,
|
|
16
|
-
cast,
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
from sqlalchemy import insert, select
|
|
20
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
21
|
-
from typing_extensions import TypeAlias
|
|
22
|
-
|
|
23
|
-
from phoenix.db import models
|
|
24
|
-
from phoenix.db.insertion.helpers import DataManipulationEvent
|
|
25
|
-
|
|
26
|
-
logger = logging.getLogger(__name__)
|
|
27
|
-
|
|
28
|
-
DatasetId: TypeAlias = int
|
|
29
|
-
DatasetVersionId: TypeAlias = int
|
|
30
|
-
DatasetExampleId: TypeAlias = int
|
|
31
|
-
DatasetExampleRevisionId: TypeAlias = int
|
|
32
|
-
SpanRowId: TypeAlias = int
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
@dataclass(frozen=True)
|
|
36
|
-
class ExampleContent:
|
|
37
|
-
input: Dict[str, Any] = field(default_factory=dict)
|
|
38
|
-
output: Dict[str, Any] = field(default_factory=dict)
|
|
39
|
-
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
Examples: TypeAlias = Iterable[ExampleContent]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@dataclass(frozen=True)
|
|
46
|
-
class DatasetExampleAdditionEvent(DataManipulationEvent):
|
|
47
|
-
dataset_id: DatasetId
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
async def insert_dataset(
|
|
51
|
-
session: AsyncSession,
|
|
52
|
-
name: str,
|
|
53
|
-
description: Optional[str] = None,
|
|
54
|
-
metadata: Optional[Mapping[str, Any]] = None,
|
|
55
|
-
created_at: Optional[datetime] = None,
|
|
56
|
-
) -> DatasetId:
|
|
57
|
-
id_ = await session.scalar(
|
|
58
|
-
insert(models.Dataset)
|
|
59
|
-
.values(
|
|
60
|
-
name=name,
|
|
61
|
-
description=description,
|
|
62
|
-
metadata_=metadata,
|
|
63
|
-
created_at=created_at,
|
|
64
|
-
)
|
|
65
|
-
.returning(models.Dataset.id)
|
|
66
|
-
)
|
|
67
|
-
return cast(DatasetId, id_)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
async def insert_dataset_version(
|
|
71
|
-
session: AsyncSession,
|
|
72
|
-
dataset_id: DatasetId,
|
|
73
|
-
description: Optional[str] = None,
|
|
74
|
-
metadata: Optional[Mapping[str, Any]] = None,
|
|
75
|
-
created_at: Optional[datetime] = None,
|
|
76
|
-
) -> DatasetVersionId:
|
|
77
|
-
id_ = await session.scalar(
|
|
78
|
-
insert(models.DatasetVersion)
|
|
79
|
-
.values(
|
|
80
|
-
dataset_id=dataset_id,
|
|
81
|
-
description=description,
|
|
82
|
-
metadata_=metadata,
|
|
83
|
-
created_at=created_at,
|
|
84
|
-
)
|
|
85
|
-
.returning(models.DatasetVersion.id)
|
|
86
|
-
)
|
|
87
|
-
return cast(DatasetVersionId, id_)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
async def insert_dataset_example(
|
|
91
|
-
session: AsyncSession,
|
|
92
|
-
dataset_id: DatasetId,
|
|
93
|
-
span_rowid: Optional[SpanRowId] = None,
|
|
94
|
-
created_at: Optional[datetime] = None,
|
|
95
|
-
) -> DatasetExampleId:
|
|
96
|
-
id_ = await session.scalar(
|
|
97
|
-
insert(models.DatasetExample)
|
|
98
|
-
.values(
|
|
99
|
-
dataset_id=dataset_id,
|
|
100
|
-
span_rowid=span_rowid,
|
|
101
|
-
created_at=created_at,
|
|
102
|
-
)
|
|
103
|
-
.returning(models.DatasetExample.id)
|
|
104
|
-
)
|
|
105
|
-
return cast(DatasetExampleId, id_)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
class RevisionKind(Enum):
|
|
109
|
-
CREATE = "CREATE"
|
|
110
|
-
PATCH = "PATCH"
|
|
111
|
-
DELETE = "DELETE"
|
|
112
|
-
|
|
113
|
-
@classmethod
|
|
114
|
-
def _missing_(cls, v: Any) -> "RevisionKind":
|
|
115
|
-
if isinstance(v, str) and v and v.isascii() and not v.isupper():
|
|
116
|
-
return cls(v.upper())
|
|
117
|
-
raise ValueError(f"Invalid revision kind: {v}")
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
async def insert_dataset_example_revision(
|
|
121
|
-
session: AsyncSession,
|
|
122
|
-
dataset_version_id: DatasetVersionId,
|
|
123
|
-
dataset_example_id: DatasetExampleId,
|
|
124
|
-
input: Mapping[str, Any],
|
|
125
|
-
output: Mapping[str, Any],
|
|
126
|
-
metadata: Optional[Mapping[str, Any]] = None,
|
|
127
|
-
revision_kind: RevisionKind = RevisionKind.CREATE,
|
|
128
|
-
created_at: Optional[datetime] = None,
|
|
129
|
-
) -> DatasetExampleRevisionId:
|
|
130
|
-
id_ = await session.scalar(
|
|
131
|
-
insert(models.DatasetExampleRevision)
|
|
132
|
-
.values(
|
|
133
|
-
dataset_version_id=dataset_version_id,
|
|
134
|
-
dataset_example_id=dataset_example_id,
|
|
135
|
-
input=input,
|
|
136
|
-
output=output,
|
|
137
|
-
metadata_=metadata,
|
|
138
|
-
revision_kind=revision_kind.value,
|
|
139
|
-
created_at=created_at,
|
|
140
|
-
)
|
|
141
|
-
.returning(models.DatasetExampleRevision.id)
|
|
142
|
-
)
|
|
143
|
-
return cast(DatasetExampleRevisionId, id_)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
class DatasetAction(Enum):
|
|
147
|
-
CREATE = "create"
|
|
148
|
-
APPEND = "append"
|
|
149
|
-
|
|
150
|
-
@classmethod
|
|
151
|
-
def _missing_(cls, v: Any) -> "DatasetAction":
|
|
152
|
-
if isinstance(v, str) and v and v.isascii() and not v.islower():
|
|
153
|
-
return cls(v.lower())
|
|
154
|
-
raise ValueError(f"Invalid dateset action: {v}")
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
async def add_dataset_examples(
|
|
158
|
-
session: AsyncSession,
|
|
159
|
-
name: str,
|
|
160
|
-
examples: Union[Examples, Awaitable[Examples]],
|
|
161
|
-
description: Optional[str] = None,
|
|
162
|
-
metadata: Optional[Mapping[str, Any]] = None,
|
|
163
|
-
action: DatasetAction = DatasetAction.CREATE,
|
|
164
|
-
) -> Optional[DatasetExampleAdditionEvent]:
|
|
165
|
-
created_at = datetime.now(timezone.utc)
|
|
166
|
-
dataset_id: Optional[DatasetId] = None
|
|
167
|
-
if action is DatasetAction.APPEND and name:
|
|
168
|
-
dataset_id = await session.scalar(
|
|
169
|
-
select(models.Dataset.id).where(models.Dataset.name == name)
|
|
170
|
-
)
|
|
171
|
-
if action is DatasetAction.CREATE or dataset_id is None:
|
|
172
|
-
try:
|
|
173
|
-
dataset_id = await insert_dataset(
|
|
174
|
-
session=session,
|
|
175
|
-
name=name,
|
|
176
|
-
description=description,
|
|
177
|
-
metadata=metadata,
|
|
178
|
-
created_at=created_at,
|
|
179
|
-
)
|
|
180
|
-
except Exception:
|
|
181
|
-
logger.exception(f"Failed to insert dataset: {name=}")
|
|
182
|
-
raise
|
|
183
|
-
try:
|
|
184
|
-
dataset_version_id = await insert_dataset_version(
|
|
185
|
-
session=session,
|
|
186
|
-
dataset_id=dataset_id,
|
|
187
|
-
created_at=created_at,
|
|
188
|
-
)
|
|
189
|
-
except Exception:
|
|
190
|
-
logger.exception(f"Failed to insert dataset version for {dataset_id=}")
|
|
191
|
-
raise
|
|
192
|
-
for example in (await examples) if isinstance(examples, Awaitable) else examples:
|
|
193
|
-
try:
|
|
194
|
-
dataset_example_id = await insert_dataset_example(
|
|
195
|
-
session=session,
|
|
196
|
-
dataset_id=dataset_id,
|
|
197
|
-
created_at=created_at,
|
|
198
|
-
)
|
|
199
|
-
except Exception:
|
|
200
|
-
logger.exception(f"Failed to insert dataset example for {dataset_id=}")
|
|
201
|
-
raise
|
|
202
|
-
try:
|
|
203
|
-
await insert_dataset_example_revision(
|
|
204
|
-
session=session,
|
|
205
|
-
dataset_version_id=dataset_version_id,
|
|
206
|
-
dataset_example_id=dataset_example_id,
|
|
207
|
-
input=example.input,
|
|
208
|
-
output=example.output,
|
|
209
|
-
metadata=example.metadata,
|
|
210
|
-
created_at=created_at,
|
|
211
|
-
)
|
|
212
|
-
except Exception:
|
|
213
|
-
logger.exception(
|
|
214
|
-
f"Failed to insert dataset example revision for {dataset_version_id=}, "
|
|
215
|
-
f"{dataset_example_id=}"
|
|
216
|
-
)
|
|
217
|
-
raise
|
|
218
|
-
return DatasetExampleAdditionEvent(dataset_id=dataset_id)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
@dataclass(frozen=True)
|
|
222
|
-
class DatasetKeys:
|
|
223
|
-
input: FrozenSet[str]
|
|
224
|
-
output: FrozenSet[str]
|
|
225
|
-
metadata: FrozenSet[str]
|
|
226
|
-
|
|
227
|
-
def __iter__(self) -> Iterator[str]:
|
|
228
|
-
yield from sorted(set(chain(self.input, self.output, self.metadata)))
|
|
229
|
-
|
|
230
|
-
def check_differences(self, column_headers_set: FrozenSet[str]) -> None:
|
|
231
|
-
for category, keys in (
|
|
232
|
-
("input", self.input),
|
|
233
|
-
("output", self.output),
|
|
234
|
-
("metadata", self.metadata),
|
|
235
|
-
):
|
|
236
|
-
if diff := keys.difference(column_headers_set):
|
|
237
|
-
raise ValueError(f"{category} keys not found in table column headers: {diff}")
|
phoenix/db/migrations/types.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
|
|
3
|
-
from sqlalchemy import JSON
|
|
4
|
-
from sqlalchemy.dialects import postgresql
|
|
5
|
-
from sqlalchemy.ext.compiler import compiles
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class JSONB(JSON):
|
|
9
|
-
# See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
10
|
-
__visit_name__ = "JSONB"
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@compiles(JSONB, "sqlite") # type: ignore
|
|
14
|
-
def _(*args: Any, **kwargs: Any) -> str:
|
|
15
|
-
# See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
16
|
-
return "JSONB"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
JSON_ = (
|
|
20
|
-
JSON()
|
|
21
|
-
.with_variant(
|
|
22
|
-
postgresql.JSONB(), # type: ignore
|
|
23
|
-
"postgresql",
|
|
24
|
-
)
|
|
25
|
-
.with_variant(
|
|
26
|
-
JSONB(),
|
|
27
|
-
"sqlite",
|
|
28
|
-
)
|
|
29
|
-
)
|
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
"""datasets
|
|
2
|
-
|
|
3
|
-
Revision ID: 10460e46d750
|
|
4
|
-
Revises: cf03bd6bae1d
|
|
5
|
-
Create Date: 2024-05-10 11:24:23.985834
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from typing import Sequence, Union
|
|
10
|
-
|
|
11
|
-
import sqlalchemy as sa
|
|
12
|
-
from alembic import op
|
|
13
|
-
from phoenix.db.migrations.types import JSON_
|
|
14
|
-
|
|
15
|
-
# revision identifiers, used by Alembic.
|
|
16
|
-
revision: str = "10460e46d750"
|
|
17
|
-
down_revision: Union[str, None] = "cf03bd6bae1d"
|
|
18
|
-
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
-
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def upgrade() -> None:
|
|
23
|
-
op.create_table(
|
|
24
|
-
"datasets",
|
|
25
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
26
|
-
sa.Column("name", sa.String, nullable=False, unique=True),
|
|
27
|
-
sa.Column("description", sa.String, nullable=True),
|
|
28
|
-
sa.Column("metadata", JSON_, nullable=False),
|
|
29
|
-
sa.Column(
|
|
30
|
-
"created_at",
|
|
31
|
-
sa.TIMESTAMP(timezone=True),
|
|
32
|
-
nullable=False,
|
|
33
|
-
server_default=sa.func.now(),
|
|
34
|
-
),
|
|
35
|
-
sa.Column(
|
|
36
|
-
"updated_at",
|
|
37
|
-
sa.TIMESTAMP(timezone=True),
|
|
38
|
-
nullable=False,
|
|
39
|
-
server_default=sa.func.now(),
|
|
40
|
-
onupdate=sa.func.now(),
|
|
41
|
-
),
|
|
42
|
-
)
|
|
43
|
-
op.create_table(
|
|
44
|
-
"dataset_versions",
|
|
45
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
46
|
-
sa.Column(
|
|
47
|
-
"dataset_id",
|
|
48
|
-
sa.Integer,
|
|
49
|
-
sa.ForeignKey("datasets.id", ondelete="CASCADE"),
|
|
50
|
-
nullable=False,
|
|
51
|
-
index=True,
|
|
52
|
-
),
|
|
53
|
-
sa.Column("description", sa.String, nullable=True),
|
|
54
|
-
sa.Column("metadata", JSON_, nullable=False),
|
|
55
|
-
sa.Column(
|
|
56
|
-
"created_at",
|
|
57
|
-
sa.TIMESTAMP(timezone=True),
|
|
58
|
-
nullable=False,
|
|
59
|
-
server_default=sa.func.now(),
|
|
60
|
-
),
|
|
61
|
-
)
|
|
62
|
-
op.create_table(
|
|
63
|
-
"dataset_examples",
|
|
64
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
65
|
-
sa.Column(
|
|
66
|
-
"dataset_id",
|
|
67
|
-
sa.Integer,
|
|
68
|
-
sa.ForeignKey("datasets.id", ondelete="CASCADE"),
|
|
69
|
-
nullable=False,
|
|
70
|
-
index=True,
|
|
71
|
-
),
|
|
72
|
-
sa.Column(
|
|
73
|
-
"span_rowid",
|
|
74
|
-
sa.Integer,
|
|
75
|
-
sa.ForeignKey("spans.id", ondelete="SET NULL"),
|
|
76
|
-
nullable=True,
|
|
77
|
-
index=True,
|
|
78
|
-
),
|
|
79
|
-
sa.Column(
|
|
80
|
-
"created_at",
|
|
81
|
-
sa.TIMESTAMP(timezone=True),
|
|
82
|
-
nullable=False,
|
|
83
|
-
server_default=sa.func.now(),
|
|
84
|
-
),
|
|
85
|
-
)
|
|
86
|
-
op.create_table(
|
|
87
|
-
"dataset_example_revisions",
|
|
88
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
89
|
-
sa.Column(
|
|
90
|
-
"dataset_example_id",
|
|
91
|
-
sa.Integer,
|
|
92
|
-
sa.ForeignKey("dataset_examples.id", ondelete="CASCADE"),
|
|
93
|
-
nullable=False,
|
|
94
|
-
index=True,
|
|
95
|
-
),
|
|
96
|
-
sa.Column(
|
|
97
|
-
"dataset_version_id",
|
|
98
|
-
sa.Integer,
|
|
99
|
-
sa.ForeignKey("dataset_versions.id", ondelete="CASCADE"),
|
|
100
|
-
nullable=False,
|
|
101
|
-
index=True,
|
|
102
|
-
),
|
|
103
|
-
sa.Column("input", JSON_, nullable=False),
|
|
104
|
-
sa.Column("output", JSON_, nullable=False),
|
|
105
|
-
sa.Column("metadata", JSON_, nullable=False),
|
|
106
|
-
sa.Column(
|
|
107
|
-
"revision_kind",
|
|
108
|
-
sa.String,
|
|
109
|
-
sa.CheckConstraint(
|
|
110
|
-
"revision_kind IN ('CREATE', 'PATCH', 'DELETE')",
|
|
111
|
-
name="valid_revision_kind",
|
|
112
|
-
),
|
|
113
|
-
nullable=False,
|
|
114
|
-
),
|
|
115
|
-
sa.Column(
|
|
116
|
-
"created_at",
|
|
117
|
-
sa.TIMESTAMP(timezone=True),
|
|
118
|
-
nullable=False,
|
|
119
|
-
server_default=sa.func.now(),
|
|
120
|
-
),
|
|
121
|
-
sa.UniqueConstraint(
|
|
122
|
-
"dataset_example_id",
|
|
123
|
-
"dataset_version_id",
|
|
124
|
-
),
|
|
125
|
-
)
|
|
126
|
-
op.create_table(
|
|
127
|
-
"experiments",
|
|
128
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
129
|
-
sa.Column(
|
|
130
|
-
"dataset_id",
|
|
131
|
-
sa.Integer,
|
|
132
|
-
sa.ForeignKey("datasets.id", ondelete="CASCADE"),
|
|
133
|
-
nullable=False,
|
|
134
|
-
index=True,
|
|
135
|
-
),
|
|
136
|
-
sa.Column(
|
|
137
|
-
"dataset_version_id",
|
|
138
|
-
sa.Integer,
|
|
139
|
-
sa.ForeignKey("dataset_versions.id", ondelete="CASCADE"),
|
|
140
|
-
nullable=False,
|
|
141
|
-
index=True,
|
|
142
|
-
),
|
|
143
|
-
sa.Column(
|
|
144
|
-
"name",
|
|
145
|
-
sa.String,
|
|
146
|
-
nullable=False,
|
|
147
|
-
),
|
|
148
|
-
sa.Column(
|
|
149
|
-
"description",
|
|
150
|
-
sa.String,
|
|
151
|
-
nullable=True,
|
|
152
|
-
),
|
|
153
|
-
sa.Column(
|
|
154
|
-
"repetitions",
|
|
155
|
-
sa.Integer,
|
|
156
|
-
nullable=False,
|
|
157
|
-
),
|
|
158
|
-
sa.Column("metadata", JSON_, nullable=False),
|
|
159
|
-
sa.Column("project_name", sa.String, index=True),
|
|
160
|
-
sa.Column(
|
|
161
|
-
"created_at",
|
|
162
|
-
sa.TIMESTAMP(timezone=True),
|
|
163
|
-
nullable=False,
|
|
164
|
-
server_default=sa.func.now(),
|
|
165
|
-
),
|
|
166
|
-
sa.Column(
|
|
167
|
-
"updated_at",
|
|
168
|
-
sa.TIMESTAMP(timezone=True),
|
|
169
|
-
nullable=False,
|
|
170
|
-
server_default=sa.func.now(),
|
|
171
|
-
onupdate=sa.func.now(),
|
|
172
|
-
),
|
|
173
|
-
)
|
|
174
|
-
op.create_table(
|
|
175
|
-
"experiment_runs",
|
|
176
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
177
|
-
sa.Column(
|
|
178
|
-
"experiment_id",
|
|
179
|
-
sa.Integer,
|
|
180
|
-
sa.ForeignKey("experiments.id", ondelete="CASCADE"),
|
|
181
|
-
nullable=False,
|
|
182
|
-
index=True,
|
|
183
|
-
),
|
|
184
|
-
sa.Column(
|
|
185
|
-
"dataset_example_id",
|
|
186
|
-
sa.Integer,
|
|
187
|
-
sa.ForeignKey("dataset_examples.id", ondelete="CASCADE"),
|
|
188
|
-
nullable=False,
|
|
189
|
-
index=True,
|
|
190
|
-
),
|
|
191
|
-
sa.Column(
|
|
192
|
-
"repetition_number",
|
|
193
|
-
sa.Integer,
|
|
194
|
-
nullable=False,
|
|
195
|
-
),
|
|
196
|
-
sa.Column(
|
|
197
|
-
"trace_id",
|
|
198
|
-
sa.String,
|
|
199
|
-
nullable=True,
|
|
200
|
-
),
|
|
201
|
-
sa.Column("output", JSON_, nullable=False),
|
|
202
|
-
sa.Column("start_time", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
203
|
-
sa.Column("end_time", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
204
|
-
sa.Column(
|
|
205
|
-
"prompt_token_count",
|
|
206
|
-
sa.Integer,
|
|
207
|
-
nullable=True,
|
|
208
|
-
),
|
|
209
|
-
sa.Column(
|
|
210
|
-
"completion_token_count",
|
|
211
|
-
sa.Integer,
|
|
212
|
-
nullable=True,
|
|
213
|
-
),
|
|
214
|
-
sa.Column(
|
|
215
|
-
"error",
|
|
216
|
-
sa.String,
|
|
217
|
-
nullable=True,
|
|
218
|
-
),
|
|
219
|
-
sa.UniqueConstraint(
|
|
220
|
-
"experiment_id",
|
|
221
|
-
"dataset_example_id",
|
|
222
|
-
"repetition_number",
|
|
223
|
-
),
|
|
224
|
-
)
|
|
225
|
-
op.create_table(
|
|
226
|
-
"experiment_run_annotations",
|
|
227
|
-
sa.Column("id", sa.Integer, primary_key=True),
|
|
228
|
-
sa.Column(
|
|
229
|
-
"experiment_run_id",
|
|
230
|
-
sa.Integer,
|
|
231
|
-
sa.ForeignKey("experiment_runs.id", ondelete="CASCADE"),
|
|
232
|
-
nullable=False,
|
|
233
|
-
index=True,
|
|
234
|
-
),
|
|
235
|
-
sa.Column(
|
|
236
|
-
"name",
|
|
237
|
-
sa.String,
|
|
238
|
-
nullable=False,
|
|
239
|
-
),
|
|
240
|
-
sa.Column(
|
|
241
|
-
"annotator_kind",
|
|
242
|
-
sa.String,
|
|
243
|
-
sa.CheckConstraint(
|
|
244
|
-
"annotator_kind IN ('LLM', 'CODE', 'HUMAN')",
|
|
245
|
-
name="valid_annotator_kind",
|
|
246
|
-
),
|
|
247
|
-
nullable=False,
|
|
248
|
-
),
|
|
249
|
-
sa.Column(
|
|
250
|
-
"label",
|
|
251
|
-
sa.String,
|
|
252
|
-
nullable=True,
|
|
253
|
-
),
|
|
254
|
-
sa.Column(
|
|
255
|
-
"score",
|
|
256
|
-
sa.Float,
|
|
257
|
-
nullable=True,
|
|
258
|
-
),
|
|
259
|
-
sa.Column(
|
|
260
|
-
"explanation",
|
|
261
|
-
sa.String,
|
|
262
|
-
nullable=True,
|
|
263
|
-
),
|
|
264
|
-
sa.Column(
|
|
265
|
-
"trace_id",
|
|
266
|
-
sa.String,
|
|
267
|
-
nullable=True,
|
|
268
|
-
),
|
|
269
|
-
sa.Column(
|
|
270
|
-
"error",
|
|
271
|
-
sa.String,
|
|
272
|
-
nullable=True,
|
|
273
|
-
),
|
|
274
|
-
sa.Column("metadata", JSON_, nullable=False),
|
|
275
|
-
sa.Column("start_time", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
276
|
-
sa.Column("end_time", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
277
|
-
sa.UniqueConstraint(
|
|
278
|
-
"experiment_run_id",
|
|
279
|
-
"name",
|
|
280
|
-
),
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
def downgrade() -> None:
|
|
285
|
-
op.drop_table("experiment_run_annotations")
|
|
286
|
-
op.drop_table("experiment_runs")
|
|
287
|
-
op.drop_table("experiments")
|
|
288
|
-
op.drop_table("dataset_example_revisions")
|
|
289
|
-
op.drop_table("dataset_examples")
|
|
290
|
-
op.drop_table("dataset_versions")
|
|
291
|
-
op.drop_table("datasets")
|
phoenix/experiments/__init__.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
from phoenix.experiments.evaluators.code_evaluators import (
|
|
2
|
-
ContainsAllKeywords,
|
|
3
|
-
ContainsAnyKeyword,
|
|
4
|
-
ContainsKeyword,
|
|
5
|
-
JSONParsable,
|
|
6
|
-
MatchesRegex,
|
|
7
|
-
)
|
|
8
|
-
from phoenix.experiments.evaluators.llm_evaluators import (
|
|
9
|
-
CoherenceEvaluator,
|
|
10
|
-
ConcisenessEvaluator,
|
|
11
|
-
HelpfulnessEvaluator,
|
|
12
|
-
LLMCriteriaEvaluator,
|
|
13
|
-
RelevanceEvaluator,
|
|
14
|
-
)
|
|
15
|
-
from phoenix.experiments.evaluators.utils import create_evaluator
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"create_evaluator",
|
|
19
|
-
"ContainsAllKeywords",
|
|
20
|
-
"ContainsAnyKeyword",
|
|
21
|
-
"ContainsKeyword",
|
|
22
|
-
"JSONParsable",
|
|
23
|
-
"MatchesRegex",
|
|
24
|
-
"CoherenceEvaluator",
|
|
25
|
-
"ConcisenessEvaluator",
|
|
26
|
-
"LLMCriteriaEvaluator",
|
|
27
|
-
"HelpfulnessEvaluator",
|
|
28
|
-
"RelevanceEvaluator",
|
|
29
|
-
]
|