arize-phoenix 11.38.0__py3-none-any.whl → 12.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/METADATA +3 -3
- {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/RECORD +83 -58
- phoenix/config.py +1 -11
- phoenix/db/bulk_inserter.py +8 -0
- phoenix/db/facilitator.py +1 -1
- phoenix/db/helpers.py +202 -33
- phoenix/db/insertion/dataset.py +7 -0
- phoenix/db/insertion/document_annotation.py +1 -1
- phoenix/db/insertion/helpers.py +2 -2
- phoenix/db/insertion/session_annotation.py +176 -0
- phoenix/db/insertion/span_annotation.py +1 -1
- phoenix/db/insertion/trace_annotation.py +1 -1
- phoenix/db/insertion/types.py +29 -3
- phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
- phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
- phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
- phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
- phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
- phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
- phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
- phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
- phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
- phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
- phoenix/db/models.py +306 -46
- phoenix/server/api/context.py +15 -2
- phoenix/server/api/dataloaders/__init__.py +8 -2
- phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
- phoenix/server/api/dataloaders/dataset_labels.py +36 -0
- phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
- phoenix/server/api/dataloaders/table_fields.py +2 -2
- phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
- phoenix/server/api/helpers/playground_clients.py +66 -35
- phoenix/server/api/helpers/playground_users.py +26 -0
- phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
- phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
- phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
- phoenix/server/api/mutations/__init__.py +8 -0
- phoenix/server/api/mutations/chat_mutations.py +8 -3
- phoenix/server/api/mutations/dataset_label_mutations.py +291 -0
- phoenix/server/api/mutations/dataset_mutations.py +5 -0
- phoenix/server/api/mutations/dataset_split_mutations.py +423 -0
- phoenix/server/api/mutations/project_session_annotations_mutations.py +161 -0
- phoenix/server/api/queries.py +53 -0
- phoenix/server/api/routers/auth.py +5 -5
- phoenix/server/api/routers/oauth2.py +5 -23
- phoenix/server/api/routers/v1/__init__.py +2 -0
- phoenix/server/api/routers/v1/annotations.py +320 -0
- phoenix/server/api/routers/v1/datasets.py +5 -0
- phoenix/server/api/routers/v1/experiments.py +10 -3
- phoenix/server/api/routers/v1/sessions.py +111 -0
- phoenix/server/api/routers/v1/traces.py +1 -2
- phoenix/server/api/routers/v1/users.py +7 -0
- phoenix/server/api/subscriptions.py +5 -2
- phoenix/server/api/types/Dataset.py +8 -0
- phoenix/server/api/types/DatasetExample.py +18 -0
- phoenix/server/api/types/DatasetLabel.py +23 -0
- phoenix/server/api/types/DatasetSplit.py +32 -0
- phoenix/server/api/types/Experiment.py +0 -4
- phoenix/server/api/types/Project.py +16 -0
- phoenix/server/api/types/ProjectSession.py +88 -3
- phoenix/server/api/types/ProjectSessionAnnotation.py +68 -0
- phoenix/server/api/types/Prompt.py +18 -1
- phoenix/server/api/types/Span.py +5 -5
- phoenix/server/api/types/Trace.py +61 -0
- phoenix/server/app.py +13 -14
- phoenix/server/cost_tracking/model_cost_manifest.json +132 -2
- phoenix/server/dml_event.py +13 -0
- phoenix/server/static/.vite/manifest.json +39 -39
- phoenix/server/static/assets/{components-BQPHTBfv.js → components-BG6v0EM8.js} +705 -385
- phoenix/server/static/assets/{index-BL5BMgJU.js → index-CSVcULw1.js} +13 -13
- phoenix/server/static/assets/{pages-C0Y17J0T.js → pages-DgaM7kpM.js} +1356 -1155
- phoenix/server/static/assets/{vendor-BdjZxMii.js → vendor-BqTEkGQU.js} +183 -183
- phoenix/server/static/assets/{vendor-arizeai-CHYlS8jV.js → vendor-arizeai-DlOj0PQQ.js} +15 -24
- phoenix/server/static/assets/{vendor-codemirror-Di6t4HnH.js → vendor-codemirror-B2PHH5yZ.js} +3 -3
- phoenix/server/static/assets/{vendor-recharts-C9wCDYj3.js → vendor-recharts-CKsi4IjN.js} +1 -1
- phoenix/server/static/assets/{vendor-shiki-MNnmOotP.js → vendor-shiki-DN26BkKE.js} +1 -1
- phoenix/server/utils.py +74 -0
- phoenix/session/session.py +25 -5
- phoenix/version.py +1 -1
- phoenix/server/api/dataloaders/experiment_repetition_counts.py +0 -39
- {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/licenses/LICENSE +0 -0
phoenix/db/helpers.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from collections.abc import Callable, Hashable, Iterable
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Any, Literal, Optional, TypeVar, Union
|
|
4
|
+
from typing import Any, Literal, Optional, Sequence, TypeVar, Union
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sa
|
|
7
7
|
from openinference.semconv.trace import (
|
|
@@ -10,16 +10,24 @@ from openinference.semconv.trace import (
|
|
|
10
10
|
SpanAttributes,
|
|
11
11
|
)
|
|
12
12
|
from sqlalchemy import (
|
|
13
|
+
Insert,
|
|
13
14
|
Integer,
|
|
14
15
|
Select,
|
|
15
16
|
SQLColumnExpression,
|
|
16
17
|
and_,
|
|
17
18
|
case,
|
|
18
19
|
distinct,
|
|
20
|
+
exists,
|
|
19
21
|
func,
|
|
22
|
+
insert,
|
|
23
|
+
literal,
|
|
24
|
+
or_,
|
|
20
25
|
select,
|
|
26
|
+
util,
|
|
21
27
|
)
|
|
28
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
22
29
|
from sqlalchemy.orm import QueryableAttribute
|
|
30
|
+
from sqlalchemy.sql.roles import InElementRole
|
|
23
31
|
from typing_extensions import assert_never
|
|
24
32
|
|
|
25
33
|
from phoenix.config import PLAYGROUND_PROJECT_NAME
|
|
@@ -118,51 +126,205 @@ def dedup(
|
|
|
118
126
|
return ans
|
|
119
127
|
|
|
120
128
|
|
|
121
|
-
def
|
|
129
|
+
def _build_ranked_revisions_query(
|
|
122
130
|
dataset_version_id: int,
|
|
123
|
-
|
|
124
|
-
|
|
131
|
+
/,
|
|
132
|
+
*,
|
|
133
|
+
dataset_id: Optional[int] = None,
|
|
134
|
+
example_ids: Optional[Union[Sequence[int], InElementRole]] = None,
|
|
135
|
+
) -> Select[tuple[int]]:
|
|
136
|
+
"""
|
|
137
|
+
Build a query that ranks revisions per example within a dataset version.
|
|
138
|
+
|
|
139
|
+
This performs the core ranking logic using ROW_NUMBER() to find the latest
|
|
140
|
+
revision for each example within the specified dataset version.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
dataset_version_id: Maximum dataset version to consider
|
|
144
|
+
dataset_id: Optional dataset ID - if provided, avoids subquery lookup
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
SQLAlchemy SELECT query with revision ranking and basic dataset filtering
|
|
148
|
+
"""
|
|
149
|
+
stmt = (
|
|
125
150
|
select(
|
|
126
|
-
|
|
127
|
-
|
|
151
|
+
func.row_number()
|
|
152
|
+
.over(
|
|
153
|
+
partition_by=models.DatasetExampleRevision.dataset_example_id,
|
|
154
|
+
order_by=models.DatasetExampleRevision.dataset_version_id.desc(),
|
|
155
|
+
)
|
|
156
|
+
.label("rn"),
|
|
128
157
|
)
|
|
129
|
-
.
|
|
130
|
-
.
|
|
158
|
+
.join(models.DatasetExample)
|
|
159
|
+
.where(models.DatasetExampleRevision.dataset_version_id <= dataset_version_id)
|
|
131
160
|
)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
.join_from(
|
|
139
|
-
table,
|
|
140
|
-
models.DatasetExample,
|
|
141
|
-
table.dataset_example_id == models.DatasetExample.id,
|
|
161
|
+
|
|
162
|
+
if dataset_id is None:
|
|
163
|
+
version_subquery = (
|
|
164
|
+
select(models.DatasetVersion.dataset_id)
|
|
165
|
+
.filter_by(id=dataset_version_id)
|
|
166
|
+
.scalar_subquery()
|
|
142
167
|
)
|
|
143
|
-
.
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
168
|
+
stmt = stmt.where(models.DatasetExample.dataset_id == version_subquery)
|
|
169
|
+
else:
|
|
170
|
+
stmt = stmt.where(models.DatasetExample.dataset_id == dataset_id)
|
|
171
|
+
|
|
172
|
+
if example_ids is not None:
|
|
173
|
+
stmt = stmt.where(models.DatasetExampleRevision.dataset_example_id.in_(example_ids))
|
|
174
|
+
|
|
175
|
+
return stmt
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def get_dataset_example_revisions(
|
|
179
|
+
dataset_version_id: int,
|
|
180
|
+
/,
|
|
181
|
+
*,
|
|
182
|
+
dataset_id: Optional[int] = None,
|
|
183
|
+
example_ids: Optional[Union[Sequence[int], InElementRole]] = None,
|
|
184
|
+
split_ids: Optional[Union[Sequence[int], InElementRole]] = None,
|
|
185
|
+
split_names: Optional[Union[Sequence[str], InElementRole]] = None,
|
|
186
|
+
) -> Select[tuple[models.DatasetExampleRevision]]:
|
|
187
|
+
"""
|
|
188
|
+
Get the latest revisions for all dataset examples within a specific dataset version.
|
|
189
|
+
|
|
190
|
+
Excludes examples where the latest revision is a DELETE.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
dataset_version_id: The dataset version to get revisions for
|
|
194
|
+
dataset_id: Optional dataset ID - if provided, avoids extra subquery lookup
|
|
195
|
+
example_ids: Optional filter by specific example IDs (subquery or list of IDs).
|
|
196
|
+
- None = no filtering
|
|
197
|
+
- Empty sequences/subqueries = no matches (strict filtering)
|
|
198
|
+
split_ids: Optional filter by split IDs (subquery or list of split IDs).
|
|
199
|
+
- None = no filtering
|
|
200
|
+
- Empty sequences/subqueries = no matches (strict filtering)
|
|
201
|
+
split_names: Optional filter by split names (subquery or list of split names).
|
|
202
|
+
- None = no filtering
|
|
203
|
+
- Empty sequences/subqueries = no matches (strict filtering)
|
|
204
|
+
|
|
205
|
+
Note:
|
|
206
|
+
- split_ids and split_names are mutually exclusive
|
|
207
|
+
- Use split_ids for better performance when IDs are available (avoids JOIN)
|
|
208
|
+
- Empty filters use strict behavior: empty inputs return zero results
|
|
209
|
+
"""
|
|
210
|
+
if split_ids is not None and split_names is not None:
|
|
211
|
+
raise ValueError(
|
|
212
|
+
"Cannot specify both split_ids and split_names - they are mutually exclusive"
|
|
147
213
|
)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
214
|
+
|
|
215
|
+
stmt = _build_ranked_revisions_query(
|
|
216
|
+
dataset_version_id,
|
|
217
|
+
dataset_id=dataset_id,
|
|
218
|
+
example_ids=example_ids,
|
|
219
|
+
).add_columns(
|
|
220
|
+
models.DatasetExampleRevision.id,
|
|
221
|
+
models.DatasetExampleRevision.revision_kind,
|
|
152
222
|
)
|
|
223
|
+
|
|
224
|
+
if split_ids is not None or split_names is not None:
|
|
225
|
+
if split_names is not None:
|
|
226
|
+
split_example_ids_subquery = (
|
|
227
|
+
select(models.DatasetSplitDatasetExample.dataset_example_id)
|
|
228
|
+
.join(
|
|
229
|
+
models.DatasetSplit,
|
|
230
|
+
models.DatasetSplit.id == models.DatasetSplitDatasetExample.dataset_split_id,
|
|
231
|
+
)
|
|
232
|
+
.where(models.DatasetSplit.name.in_(split_names))
|
|
233
|
+
)
|
|
234
|
+
stmt = stmt.where(models.DatasetExample.id.in_(split_example_ids_subquery))
|
|
235
|
+
else:
|
|
236
|
+
assert split_ids is not None
|
|
237
|
+
split_example_ids_subquery = select(
|
|
238
|
+
models.DatasetSplitDatasetExample.dataset_example_id
|
|
239
|
+
).where(models.DatasetSplitDatasetExample.dataset_split_id.in_(split_ids))
|
|
240
|
+
stmt = stmt.where(models.DatasetExample.id.in_(split_example_ids_subquery))
|
|
241
|
+
|
|
242
|
+
ranked_subquery = stmt.subquery()
|
|
153
243
|
return (
|
|
154
|
-
select(
|
|
155
|
-
.where(table.revision_kind != "DELETE")
|
|
244
|
+
select(models.DatasetExampleRevision)
|
|
156
245
|
.join(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
246
|
+
ranked_subquery,
|
|
247
|
+
models.DatasetExampleRevision.id == ranked_subquery.c.id,
|
|
248
|
+
)
|
|
249
|
+
.where(
|
|
250
|
+
ranked_subquery.c.rn == 1,
|
|
251
|
+
ranked_subquery.c.revision_kind != "DELETE",
|
|
162
252
|
)
|
|
163
253
|
)
|
|
164
254
|
|
|
165
255
|
|
|
256
|
+
def create_experiment_examples_snapshot_insert(
|
|
257
|
+
experiment: models.Experiment,
|
|
258
|
+
) -> Insert:
|
|
259
|
+
"""
|
|
260
|
+
Create an INSERT statement to snapshot dataset examples for an experiment.
|
|
261
|
+
|
|
262
|
+
This captures which examples belong to the experiment at the time of creation,
|
|
263
|
+
respecting any dataset splits assigned to the experiment.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
experiment: The experiment to create the snapshot for
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
SQLAlchemy INSERT statement ready for execution
|
|
270
|
+
"""
|
|
271
|
+
stmt = _build_ranked_revisions_query(
|
|
272
|
+
experiment.dataset_version_id,
|
|
273
|
+
dataset_id=experiment.dataset_id,
|
|
274
|
+
).add_columns(
|
|
275
|
+
models.DatasetExampleRevision.id,
|
|
276
|
+
models.DatasetExampleRevision.dataset_example_id,
|
|
277
|
+
models.DatasetExampleRevision.revision_kind,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
experiment_splits_subquery = select(models.ExperimentDatasetSplit.dataset_split_id).where(
|
|
281
|
+
models.ExperimentDatasetSplit.experiment_id == experiment.id
|
|
282
|
+
)
|
|
283
|
+
has_splits_condition = exists(experiment_splits_subquery)
|
|
284
|
+
split_filtered_example_ids = select(models.DatasetSplitDatasetExample.dataset_example_id).where(
|
|
285
|
+
models.DatasetSplitDatasetExample.dataset_split_id.in_(experiment_splits_subquery)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
stmt = stmt.where(
|
|
289
|
+
or_(
|
|
290
|
+
~has_splits_condition, # No splits = include all examples
|
|
291
|
+
models.DatasetExampleRevision.dataset_example_id.in_(
|
|
292
|
+
split_filtered_example_ids
|
|
293
|
+
), # Has splits = filter by splits
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
ranked_subquery = stmt.subquery()
|
|
298
|
+
return insert(models.ExperimentDatasetExample).from_select(
|
|
299
|
+
[
|
|
300
|
+
models.ExperimentDatasetExample.experiment_id,
|
|
301
|
+
models.ExperimentDatasetExample.dataset_example_id,
|
|
302
|
+
models.ExperimentDatasetExample.dataset_example_revision_id,
|
|
303
|
+
],
|
|
304
|
+
select(
|
|
305
|
+
literal(experiment.id),
|
|
306
|
+
ranked_subquery.c.dataset_example_id,
|
|
307
|
+
ranked_subquery.c.id,
|
|
308
|
+
).where(
|
|
309
|
+
ranked_subquery.c.rn == 1,
|
|
310
|
+
ranked_subquery.c.revision_kind != "DELETE",
|
|
311
|
+
),
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
async def insert_experiment_with_examples_snapshot(
|
|
316
|
+
session: AsyncSession,
|
|
317
|
+
experiment: models.Experiment,
|
|
318
|
+
) -> None:
|
|
319
|
+
"""
|
|
320
|
+
Insert an experiment with its snapshot of dataset examples.
|
|
321
|
+
"""
|
|
322
|
+
session.add(experiment)
|
|
323
|
+
await session.flush()
|
|
324
|
+
insert_stmt = create_experiment_examples_snapshot_insert(experiment)
|
|
325
|
+
await session.execute(insert_stmt)
|
|
326
|
+
|
|
327
|
+
|
|
166
328
|
_AnyTuple = TypeVar("_AnyTuple", bound=tuple[Any, ...])
|
|
167
329
|
|
|
168
330
|
|
|
@@ -355,3 +517,10 @@ def get_ancestor_span_rowids(parent_id: str) -> Select[tuple[int]]:
|
|
|
355
517
|
)
|
|
356
518
|
)
|
|
357
519
|
return select(ancestors.c.id)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def truncate_name(name: str, max_len: int = 63) -> str:
|
|
523
|
+
# https://github.com/sqlalchemy/sqlalchemy/blob/e263825e3c5060bf4f47eed0e833c6660a31658e/lib/sqlalchemy/sql/compiler.py#L7844-L7845
|
|
524
|
+
if len(name) > max_len:
|
|
525
|
+
return name[0 : max_len - 8] + "_" + util.md5_hex(name)[-4:]
|
|
526
|
+
return name
|
phoenix/db/insertion/dataset.py
CHANGED
|
@@ -44,6 +44,7 @@ async def insert_dataset(
|
|
|
44
44
|
description: Optional[str] = None,
|
|
45
45
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
46
46
|
created_at: Optional[datetime] = None,
|
|
47
|
+
user_id: Optional[int] = None,
|
|
47
48
|
) -> DatasetId:
|
|
48
49
|
id_ = await session.scalar(
|
|
49
50
|
insert(models.Dataset)
|
|
@@ -52,6 +53,7 @@ async def insert_dataset(
|
|
|
52
53
|
description=description,
|
|
53
54
|
metadata_=metadata,
|
|
54
55
|
created_at=created_at,
|
|
56
|
+
user_id=user_id,
|
|
55
57
|
)
|
|
56
58
|
.returning(models.Dataset.id)
|
|
57
59
|
)
|
|
@@ -64,6 +66,7 @@ async def insert_dataset_version(
|
|
|
64
66
|
description: Optional[str] = None,
|
|
65
67
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
66
68
|
created_at: Optional[datetime] = None,
|
|
69
|
+
user_id: Optional[int] = None,
|
|
67
70
|
) -> DatasetVersionId:
|
|
68
71
|
id_ = await session.scalar(
|
|
69
72
|
insert(models.DatasetVersion)
|
|
@@ -72,6 +75,7 @@ async def insert_dataset_version(
|
|
|
72
75
|
description=description,
|
|
73
76
|
metadata_=metadata,
|
|
74
77
|
created_at=created_at,
|
|
78
|
+
user_id=user_id,
|
|
75
79
|
)
|
|
76
80
|
.returning(models.DatasetVersion.id)
|
|
77
81
|
)
|
|
@@ -152,6 +156,7 @@ async def add_dataset_examples(
|
|
|
152
156
|
description: Optional[str] = None,
|
|
153
157
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
154
158
|
action: DatasetAction = DatasetAction.CREATE,
|
|
159
|
+
user_id: Optional[int] = None,
|
|
155
160
|
) -> Optional[DatasetExampleAdditionEvent]:
|
|
156
161
|
created_at = datetime.now(timezone.utc)
|
|
157
162
|
dataset_id: Optional[DatasetId] = None
|
|
@@ -167,6 +172,7 @@ async def add_dataset_examples(
|
|
|
167
172
|
description=description,
|
|
168
173
|
metadata=metadata,
|
|
169
174
|
created_at=created_at,
|
|
175
|
+
user_id=user_id,
|
|
170
176
|
)
|
|
171
177
|
except Exception:
|
|
172
178
|
logger.exception(f"Failed to insert dataset: {name=}")
|
|
@@ -176,6 +182,7 @@ async def add_dataset_examples(
|
|
|
176
182
|
session=session,
|
|
177
183
|
dataset_id=dataset_id,
|
|
178
184
|
created_at=created_at,
|
|
185
|
+
user_id=user_id,
|
|
179
186
|
)
|
|
180
187
|
except Exception:
|
|
181
188
|
logger.exception(f"Failed to insert dataset version for {dataset_id=}")
|
|
@@ -181,7 +181,7 @@ def _key(p: Received[Precursors.DocumentAnnotation]) -> _Key:
|
|
|
181
181
|
|
|
182
182
|
|
|
183
183
|
def _unique_by(p: Received[Insertables.DocumentAnnotation]) -> _UniqueBy:
|
|
184
|
-
return p.item.obj.name, p.item.span_rowid, p.item.document_position, p.item.identifier
|
|
184
|
+
return p.item.obj.name, p.item.span_rowid, p.item.document_position, p.item.obj.identifier
|
|
185
185
|
|
|
186
186
|
|
|
187
187
|
def _time(p: Received[Any]) -> datetime:
|
phoenix/db/insertion/helpers.py
CHANGED
|
@@ -12,7 +12,7 @@ from sqlalchemy.sql.elements import KeyedColumnElement
|
|
|
12
12
|
from typing_extensions import TypeAlias, assert_never
|
|
13
13
|
|
|
14
14
|
from phoenix.db import models
|
|
15
|
-
from phoenix.db.helpers import SupportedSQLDialect
|
|
15
|
+
from phoenix.db.helpers import SupportedSQLDialect, truncate_name
|
|
16
16
|
from phoenix.db.models import Base
|
|
17
17
|
from phoenix.trace.attributes import get_attribute_value
|
|
18
18
|
|
|
@@ -53,7 +53,7 @@ def insert_on_conflict(
|
|
|
53
53
|
unique_records.append(v)
|
|
54
54
|
seen.add(k)
|
|
55
55
|
records = tuple(reversed(unique_records))
|
|
56
|
-
constraint = constraint_name or "_".join(("uq", table.__tablename__, *unique_by))
|
|
56
|
+
constraint = constraint_name or truncate_name("_".join(("uq", table.__tablename__, *unique_by)))
|
|
57
57
|
if dialect is SupportedSQLDialect.POSTGRESQL:
|
|
58
58
|
stmt_postgresql = insert_postgresql(table).values(records)
|
|
59
59
|
if on_conflict is OnConflict.DO_NOTHING:
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, NamedTuple, Optional
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Row, Select, and_, select, tuple_
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
from phoenix.db import models
|
|
10
|
+
from phoenix.db.helpers import dedup
|
|
11
|
+
from phoenix.db.insertion.helpers import as_kv
|
|
12
|
+
from phoenix.db.insertion.types import (
|
|
13
|
+
Insertables,
|
|
14
|
+
Postponed,
|
|
15
|
+
Precursors,
|
|
16
|
+
QueueInserter,
|
|
17
|
+
Received,
|
|
18
|
+
)
|
|
19
|
+
from phoenix.server.dml_event import ProjectSessionAnnotationDmlEvent
|
|
20
|
+
|
|
21
|
+
# Type alias for consistency with other annotation patterns
|
|
22
|
+
SessionAnnotationDmlEvent = ProjectSessionAnnotationDmlEvent
|
|
23
|
+
|
|
24
|
+
_Name: TypeAlias = str
|
|
25
|
+
_SessionId: TypeAlias = str
|
|
26
|
+
_SessionRowId: TypeAlias = int
|
|
27
|
+
_AnnoRowId: TypeAlias = int
|
|
28
|
+
_Identifier: TypeAlias = str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class _Key(NamedTuple):
|
|
32
|
+
annotation_name: _Name
|
|
33
|
+
annotation_identifier: _Identifier
|
|
34
|
+
session_id: _SessionId
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_UniqueBy: TypeAlias = tuple[_Name, _SessionRowId, _Identifier]
|
|
38
|
+
_Existing: TypeAlias = tuple[
|
|
39
|
+
_SessionRowId,
|
|
40
|
+
_SessionId,
|
|
41
|
+
Optional[_AnnoRowId],
|
|
42
|
+
Optional[_Name],
|
|
43
|
+
Optional[datetime],
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SessionAnnotationQueueInserter(
|
|
48
|
+
QueueInserter[
|
|
49
|
+
Precursors.SessionAnnotation,
|
|
50
|
+
Insertables.SessionAnnotation,
|
|
51
|
+
models.ProjectSessionAnnotation,
|
|
52
|
+
SessionAnnotationDmlEvent,
|
|
53
|
+
],
|
|
54
|
+
table=models.ProjectSessionAnnotation,
|
|
55
|
+
unique_by=("name", "project_session_id", "identifier"),
|
|
56
|
+
):
|
|
57
|
+
async def _events(
|
|
58
|
+
self,
|
|
59
|
+
session: AsyncSession,
|
|
60
|
+
*insertions: Insertables.SessionAnnotation,
|
|
61
|
+
) -> list[SessionAnnotationDmlEvent]:
|
|
62
|
+
records = [{**dict(as_kv(ins.row)), "updated_at": ins.row.updated_at} for ins in insertions]
|
|
63
|
+
stmt = self._insert_on_conflict(*records).returning(self.table.id)
|
|
64
|
+
ids = tuple([_ async for _ in await session.stream_scalars(stmt)])
|
|
65
|
+
return [SessionAnnotationDmlEvent(ids)]
|
|
66
|
+
|
|
67
|
+
async def _partition(
|
|
68
|
+
self,
|
|
69
|
+
session: AsyncSession,
|
|
70
|
+
*parcels: Received[Precursors.SessionAnnotation],
|
|
71
|
+
) -> tuple[
|
|
72
|
+
list[Received[Insertables.SessionAnnotation]],
|
|
73
|
+
list[Postponed[Precursors.SessionAnnotation]],
|
|
74
|
+
list[Received[Precursors.SessionAnnotation]],
|
|
75
|
+
]:
|
|
76
|
+
to_insert: list[Received[Insertables.SessionAnnotation]] = []
|
|
77
|
+
to_postpone: list[Postponed[Precursors.SessionAnnotation]] = []
|
|
78
|
+
to_discard: list[Received[Precursors.SessionAnnotation]] = []
|
|
79
|
+
|
|
80
|
+
stmt = self._select_existing(*map(_key, parcels))
|
|
81
|
+
existing: list[Row[_Existing]] = [_ async for _ in await session.stream(stmt)]
|
|
82
|
+
existing_sessions: Mapping[str, _SessionAttr] = {
|
|
83
|
+
e.session_id: _SessionAttr(e.session_rowid) for e in existing
|
|
84
|
+
}
|
|
85
|
+
existing_annos: Mapping[_Key, _AnnoAttr] = {
|
|
86
|
+
_Key(
|
|
87
|
+
annotation_name=e.name,
|
|
88
|
+
annotation_identifier=e.identifier,
|
|
89
|
+
session_id=e.session_id,
|
|
90
|
+
): _AnnoAttr(e.session_rowid, e.id, e.updated_at)
|
|
91
|
+
for e in existing
|
|
92
|
+
if e.id is not None and e.name is not None and e.updated_at is not None
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
for p in parcels:
|
|
96
|
+
if (anno := existing_annos.get(_key(p))) is not None:
|
|
97
|
+
if p.item.updated_at <= anno.updated_at:
|
|
98
|
+
to_discard.append(p)
|
|
99
|
+
else:
|
|
100
|
+
to_insert.append(
|
|
101
|
+
Received(
|
|
102
|
+
received_at=p.received_at,
|
|
103
|
+
item=p.item.as_insertable(
|
|
104
|
+
project_session_rowid=anno.session_rowid,
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
elif (existing_session := existing_sessions.get(p.item.session_id)) is not None:
|
|
109
|
+
to_insert.append(
|
|
110
|
+
Received(
|
|
111
|
+
received_at=p.received_at,
|
|
112
|
+
item=p.item.as_insertable(
|
|
113
|
+
project_session_rowid=existing_session.session_rowid,
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
elif isinstance(p, Postponed):
|
|
118
|
+
if p.retries_left > 1:
|
|
119
|
+
to_postpone.append(p.postpone(p.retries_left - 1))
|
|
120
|
+
else:
|
|
121
|
+
to_discard.append(p)
|
|
122
|
+
elif isinstance(p, Received):
|
|
123
|
+
to_postpone.append(p.postpone(self._retry_allowance))
|
|
124
|
+
else:
|
|
125
|
+
to_discard.append(p)
|
|
126
|
+
|
|
127
|
+
assert len(to_insert) + len(to_postpone) + len(to_discard) == len(parcels)
|
|
128
|
+
to_insert = dedup(sorted(to_insert, key=_time, reverse=True), _unique_by)[::-1]
|
|
129
|
+
return to_insert, to_postpone, to_discard
|
|
130
|
+
|
|
131
|
+
def _select_existing(self, *keys: _Key) -> Select[_Existing]:
|
|
132
|
+
anno = self.table
|
|
133
|
+
session = (
|
|
134
|
+
select(models.ProjectSession.id, models.ProjectSession.session_id)
|
|
135
|
+
.where(models.ProjectSession.session_id.in_({k.session_id for k in keys}))
|
|
136
|
+
.cte()
|
|
137
|
+
)
|
|
138
|
+
onclause = and_(
|
|
139
|
+
session.c.id == anno.project_session_id,
|
|
140
|
+
anno.name.in_({k.annotation_name for k in keys}),
|
|
141
|
+
tuple_(anno.name, anno.identifier, session.c.session_id).in_(keys),
|
|
142
|
+
)
|
|
143
|
+
return select(
|
|
144
|
+
session.c.id.label("session_rowid"),
|
|
145
|
+
session.c.session_id,
|
|
146
|
+
anno.id,
|
|
147
|
+
anno.name,
|
|
148
|
+
anno.identifier,
|
|
149
|
+
anno.updated_at,
|
|
150
|
+
).outerjoin_from(session, anno, onclause)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class _SessionAttr(NamedTuple):
|
|
154
|
+
session_rowid: _SessionRowId
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class _AnnoAttr(NamedTuple):
|
|
158
|
+
session_rowid: _SessionRowId
|
|
159
|
+
id_: _AnnoRowId
|
|
160
|
+
updated_at: datetime
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _key(p: Received[Precursors.SessionAnnotation]) -> _Key:
|
|
164
|
+
return _Key(
|
|
165
|
+
annotation_name=p.item.obj.name,
|
|
166
|
+
annotation_identifier=p.item.obj.identifier,
|
|
167
|
+
session_id=p.item.session_id,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _unique_by(p: Received[Insertables.SessionAnnotation]) -> _UniqueBy:
|
|
172
|
+
return p.item.obj.name, p.item.project_session_rowid, p.item.obj.identifier
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _time(p: Received[Any]) -> datetime:
|
|
176
|
+
return p.received_at
|
|
@@ -167,7 +167,7 @@ def _key(p: Received[Precursors.SpanAnnotation]) -> _Key:
|
|
|
167
167
|
|
|
168
168
|
|
|
169
169
|
def _unique_by(p: Received[Insertables.SpanAnnotation]) -> _UniqueBy:
|
|
170
|
-
return p.item.obj.name, p.item.span_rowid, p.item.identifier
|
|
170
|
+
return p.item.obj.name, p.item.span_rowid, p.item.obj.identifier
|
|
171
171
|
|
|
172
172
|
|
|
173
173
|
def _time(p: Received[Any]) -> datetime:
|
|
@@ -166,7 +166,7 @@ def _key(p: Received[Precursors.TraceAnnotation]) -> _Key:
|
|
|
166
166
|
|
|
167
167
|
|
|
168
168
|
def _unique_by(p: Received[Insertables.TraceAnnotation]) -> _UniqueBy:
|
|
169
|
-
return p.item.obj.name, p.item.trace_rowid, p.item.identifier
|
|
169
|
+
return p.item.obj.name, p.item.trace_rowid, p.item.obj.identifier
|
|
170
170
|
|
|
171
171
|
|
|
172
172
|
def _time(p: Received[Any]) -> datetime:
|
phoenix/db/insertion/types.py
CHANGED
|
@@ -225,13 +225,29 @@ class Precursors(ABC):
|
|
|
225
225
|
span_rowid=span_rowid,
|
|
226
226
|
)
|
|
227
227
|
|
|
228
|
+
@dataclass(frozen=True)
|
|
229
|
+
class SessionAnnotation:
|
|
230
|
+
updated_at: datetime
|
|
231
|
+
session_id: str
|
|
232
|
+
obj: models.ProjectSessionAnnotation
|
|
233
|
+
|
|
234
|
+
def as_insertable(
|
|
235
|
+
self,
|
|
236
|
+
project_session_rowid: int,
|
|
237
|
+
) -> Insertables.SessionAnnotation:
|
|
238
|
+
return Insertables.SessionAnnotation(
|
|
239
|
+
updated_at=self.updated_at,
|
|
240
|
+
session_id=self.session_id,
|
|
241
|
+
obj=self.obj,
|
|
242
|
+
project_session_rowid=project_session_rowid,
|
|
243
|
+
)
|
|
244
|
+
|
|
228
245
|
|
|
229
246
|
class Insertables(ABC):
|
|
230
247
|
@dataclass(frozen=True)
|
|
231
248
|
class SpanAnnotation(Precursors.SpanAnnotation):
|
|
232
249
|
updated_at: datetime
|
|
233
250
|
span_rowid: int
|
|
234
|
-
identifier: str = ""
|
|
235
251
|
|
|
236
252
|
@property
|
|
237
253
|
def row(self) -> models.SpanAnnotation:
|
|
@@ -244,7 +260,6 @@ class Insertables(ABC):
|
|
|
244
260
|
class TraceAnnotation(Precursors.TraceAnnotation):
|
|
245
261
|
updated_at: datetime
|
|
246
262
|
trace_rowid: int
|
|
247
|
-
identifier: str = ""
|
|
248
263
|
|
|
249
264
|
@property
|
|
250
265
|
def row(self) -> models.TraceAnnotation:
|
|
@@ -257,7 +272,6 @@ class Insertables(ABC):
|
|
|
257
272
|
class DocumentAnnotation(Precursors.DocumentAnnotation):
|
|
258
273
|
updated_at: datetime
|
|
259
274
|
span_rowid: int
|
|
260
|
-
identifier: str = ""
|
|
261
275
|
|
|
262
276
|
@property
|
|
263
277
|
def row(self) -> models.DocumentAnnotation:
|
|
@@ -265,3 +279,15 @@ class Insertables(ABC):
|
|
|
265
279
|
obj.span_rowid = self.span_rowid
|
|
266
280
|
obj.updated_at = self.updated_at
|
|
267
281
|
return obj
|
|
282
|
+
|
|
283
|
+
@dataclass(frozen=True)
|
|
284
|
+
class SessionAnnotation(Precursors.SessionAnnotation):
|
|
285
|
+
updated_at: datetime
|
|
286
|
+
project_session_rowid: int
|
|
287
|
+
|
|
288
|
+
@property
|
|
289
|
+
def row(self) -> models.ProjectSessionAnnotation:
|
|
290
|
+
obj = copy(self.obj)
|
|
291
|
+
obj.project_session_id = self.project_session_rowid
|
|
292
|
+
obj.updated_at = self.updated_at
|
|
293
|
+
return obj
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""add user_id on datasets
|
|
2
|
+
|
|
3
|
+
Revision ID: 01a8342c9cdf
|
|
4
|
+
Revises: 0df286449799
|
|
5
|
+
Create Date: 2025-09-25 16:08:51.254947
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = "01a8342c9cdf"
|
|
16
|
+
down_revision: Union[str, None] = "0df286449799"
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
_Integer = sa.Integer().with_variant(
|
|
21
|
+
sa.BigInteger(),
|
|
22
|
+
"postgresql",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def upgrade() -> None:
|
|
27
|
+
with op.batch_alter_table("datasets") as batch_op:
|
|
28
|
+
batch_op.add_column(
|
|
29
|
+
sa.Column(
|
|
30
|
+
"user_id",
|
|
31
|
+
_Integer,
|
|
32
|
+
sa.ForeignKey("users.id", ondelete="SET NULL"),
|
|
33
|
+
nullable=True,
|
|
34
|
+
),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def downgrade() -> None:
|
|
39
|
+
with op.batch_alter_table("datasets") as batch_op:
|
|
40
|
+
batch_op.drop_column("user_id")
|