arize-phoenix 11.38.0__py3-none-any.whl → 12.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (84) hide show
  1. {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/METADATA +3 -3
  2. {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/RECORD +83 -58
  3. phoenix/config.py +1 -11
  4. phoenix/db/bulk_inserter.py +8 -0
  5. phoenix/db/facilitator.py +1 -1
  6. phoenix/db/helpers.py +202 -33
  7. phoenix/db/insertion/dataset.py +7 -0
  8. phoenix/db/insertion/document_annotation.py +1 -1
  9. phoenix/db/insertion/helpers.py +2 -2
  10. phoenix/db/insertion/session_annotation.py +176 -0
  11. phoenix/db/insertion/span_annotation.py +1 -1
  12. phoenix/db/insertion/trace_annotation.py +1 -1
  13. phoenix/db/insertion/types.py +29 -3
  14. phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
  15. phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
  16. phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
  17. phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
  18. phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
  19. phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
  20. phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
  21. phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
  22. phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
  23. phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
  24. phoenix/db/models.py +306 -46
  25. phoenix/server/api/context.py +15 -2
  26. phoenix/server/api/dataloaders/__init__.py +8 -2
  27. phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
  28. phoenix/server/api/dataloaders/dataset_labels.py +36 -0
  29. phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
  30. phoenix/server/api/dataloaders/table_fields.py +2 -2
  31. phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
  32. phoenix/server/api/helpers/playground_clients.py +66 -35
  33. phoenix/server/api/helpers/playground_users.py +26 -0
  34. phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
  35. phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
  36. phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
  37. phoenix/server/api/mutations/__init__.py +8 -0
  38. phoenix/server/api/mutations/chat_mutations.py +8 -3
  39. phoenix/server/api/mutations/dataset_label_mutations.py +291 -0
  40. phoenix/server/api/mutations/dataset_mutations.py +5 -0
  41. phoenix/server/api/mutations/dataset_split_mutations.py +423 -0
  42. phoenix/server/api/mutations/project_session_annotations_mutations.py +161 -0
  43. phoenix/server/api/queries.py +53 -0
  44. phoenix/server/api/routers/auth.py +5 -5
  45. phoenix/server/api/routers/oauth2.py +5 -23
  46. phoenix/server/api/routers/v1/__init__.py +2 -0
  47. phoenix/server/api/routers/v1/annotations.py +320 -0
  48. phoenix/server/api/routers/v1/datasets.py +5 -0
  49. phoenix/server/api/routers/v1/experiments.py +10 -3
  50. phoenix/server/api/routers/v1/sessions.py +111 -0
  51. phoenix/server/api/routers/v1/traces.py +1 -2
  52. phoenix/server/api/routers/v1/users.py +7 -0
  53. phoenix/server/api/subscriptions.py +5 -2
  54. phoenix/server/api/types/Dataset.py +8 -0
  55. phoenix/server/api/types/DatasetExample.py +18 -0
  56. phoenix/server/api/types/DatasetLabel.py +23 -0
  57. phoenix/server/api/types/DatasetSplit.py +32 -0
  58. phoenix/server/api/types/Experiment.py +0 -4
  59. phoenix/server/api/types/Project.py +16 -0
  60. phoenix/server/api/types/ProjectSession.py +88 -3
  61. phoenix/server/api/types/ProjectSessionAnnotation.py +68 -0
  62. phoenix/server/api/types/Prompt.py +18 -1
  63. phoenix/server/api/types/Span.py +5 -5
  64. phoenix/server/api/types/Trace.py +61 -0
  65. phoenix/server/app.py +13 -14
  66. phoenix/server/cost_tracking/model_cost_manifest.json +132 -2
  67. phoenix/server/dml_event.py +13 -0
  68. phoenix/server/static/.vite/manifest.json +39 -39
  69. phoenix/server/static/assets/{components-BQPHTBfv.js → components-BG6v0EM8.js} +705 -385
  70. phoenix/server/static/assets/{index-BL5BMgJU.js → index-CSVcULw1.js} +13 -13
  71. phoenix/server/static/assets/{pages-C0Y17J0T.js → pages-DgaM7kpM.js} +1356 -1155
  72. phoenix/server/static/assets/{vendor-BdjZxMii.js → vendor-BqTEkGQU.js} +183 -183
  73. phoenix/server/static/assets/{vendor-arizeai-CHYlS8jV.js → vendor-arizeai-DlOj0PQQ.js} +15 -24
  74. phoenix/server/static/assets/{vendor-codemirror-Di6t4HnH.js → vendor-codemirror-B2PHH5yZ.js} +3 -3
  75. phoenix/server/static/assets/{vendor-recharts-C9wCDYj3.js → vendor-recharts-CKsi4IjN.js} +1 -1
  76. phoenix/server/static/assets/{vendor-shiki-MNnmOotP.js → vendor-shiki-DN26BkKE.js} +1 -1
  77. phoenix/server/utils.py +74 -0
  78. phoenix/session/session.py +25 -5
  79. phoenix/version.py +1 -1
  80. phoenix/server/api/dataloaders/experiment_repetition_counts.py +0 -39
  81. {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/WHEEL +0 -0
  82. {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/entry_points.txt +0 -0
  83. {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/licenses/IP_NOTICE +0 -0
  84. {arize_phoenix-11.38.0.dist-info → arize_phoenix-12.2.0.dist-info}/licenses/LICENSE +0 -0
phoenix/db/helpers.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from collections.abc import Callable, Hashable, Iterable
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
- from typing import Any, Literal, Optional, TypeVar, Union
4
+ from typing import Any, Literal, Optional, Sequence, TypeVar, Union
5
5
 
6
6
  import sqlalchemy as sa
7
7
  from openinference.semconv.trace import (
@@ -10,16 +10,24 @@ from openinference.semconv.trace import (
10
10
  SpanAttributes,
11
11
  )
12
12
  from sqlalchemy import (
13
+ Insert,
13
14
  Integer,
14
15
  Select,
15
16
  SQLColumnExpression,
16
17
  and_,
17
18
  case,
18
19
  distinct,
20
+ exists,
19
21
  func,
22
+ insert,
23
+ literal,
24
+ or_,
20
25
  select,
26
+ util,
21
27
  )
28
+ from sqlalchemy.ext.asyncio import AsyncSession
22
29
  from sqlalchemy.orm import QueryableAttribute
30
+ from sqlalchemy.sql.roles import InElementRole
23
31
  from typing_extensions import assert_never
24
32
 
25
33
  from phoenix.config import PLAYGROUND_PROJECT_NAME
@@ -118,51 +126,205 @@ def dedup(
118
126
  return ans
119
127
 
120
128
 
121
- def get_dataset_example_revisions(
129
+ def _build_ranked_revisions_query(
122
130
  dataset_version_id: int,
123
- ) -> Select[tuple[models.DatasetExampleRevision]]:
124
- version = (
131
+ /,
132
+ *,
133
+ dataset_id: Optional[int] = None,
134
+ example_ids: Optional[Union[Sequence[int], InElementRole]] = None,
135
+ ) -> Select[tuple[int]]:
136
+ """
137
+ Build a query that ranks revisions per example within a dataset version.
138
+
139
+ This performs the core ranking logic using ROW_NUMBER() to find the latest
140
+ revision for each example within the specified dataset version.
141
+
142
+ Args:
143
+ dataset_version_id: Maximum dataset version to consider
144
+ dataset_id: Optional dataset ID - if provided, avoids subquery lookup
145
+
146
+ Returns:
147
+ SQLAlchemy SELECT query with revision ranking and basic dataset filtering
148
+ """
149
+ stmt = (
125
150
  select(
126
- models.DatasetVersion.id,
127
- models.DatasetVersion.dataset_id,
151
+ func.row_number()
152
+ .over(
153
+ partition_by=models.DatasetExampleRevision.dataset_example_id,
154
+ order_by=models.DatasetExampleRevision.dataset_version_id.desc(),
155
+ )
156
+ .label("rn"),
128
157
  )
129
- .filter_by(id=dataset_version_id)
130
- .subquery()
158
+ .join(models.DatasetExample)
159
+ .where(models.DatasetExampleRevision.dataset_version_id <= dataset_version_id)
131
160
  )
132
- table = models.DatasetExampleRevision
133
- revision = (
134
- select(
135
- table.dataset_example_id,
136
- func.max(table.dataset_version_id).label("dataset_version_id"),
137
- )
138
- .join_from(
139
- table,
140
- models.DatasetExample,
141
- table.dataset_example_id == models.DatasetExample.id,
161
+
162
+ if dataset_id is None:
163
+ version_subquery = (
164
+ select(models.DatasetVersion.dataset_id)
165
+ .filter_by(id=dataset_version_id)
166
+ .scalar_subquery()
142
167
  )
143
- .join_from(
144
- models.DatasetExample,
145
- version,
146
- models.DatasetExample.dataset_id == version.c.dataset_id,
168
+ stmt = stmt.where(models.DatasetExample.dataset_id == version_subquery)
169
+ else:
170
+ stmt = stmt.where(models.DatasetExample.dataset_id == dataset_id)
171
+
172
+ if example_ids is not None:
173
+ stmt = stmt.where(models.DatasetExampleRevision.dataset_example_id.in_(example_ids))
174
+
175
+ return stmt
176
+
177
+
178
+ def get_dataset_example_revisions(
179
+ dataset_version_id: int,
180
+ /,
181
+ *,
182
+ dataset_id: Optional[int] = None,
183
+ example_ids: Optional[Union[Sequence[int], InElementRole]] = None,
184
+ split_ids: Optional[Union[Sequence[int], InElementRole]] = None,
185
+ split_names: Optional[Union[Sequence[str], InElementRole]] = None,
186
+ ) -> Select[tuple[models.DatasetExampleRevision]]:
187
+ """
188
+ Get the latest revisions for all dataset examples within a specific dataset version.
189
+
190
+ Excludes examples where the latest revision is a DELETE.
191
+
192
+ Args:
193
+ dataset_version_id: The dataset version to get revisions for
194
+ dataset_id: Optional dataset ID - if provided, avoids extra subquery lookup
195
+ example_ids: Optional filter by specific example IDs (subquery or list of IDs).
196
+ - None = no filtering
197
+ - Empty sequences/subqueries = no matches (strict filtering)
198
+ split_ids: Optional filter by split IDs (subquery or list of split IDs).
199
+ - None = no filtering
200
+ - Empty sequences/subqueries = no matches (strict filtering)
201
+ split_names: Optional filter by split names (subquery or list of split names).
202
+ - None = no filtering
203
+ - Empty sequences/subqueries = no matches (strict filtering)
204
+
205
+ Note:
206
+ - split_ids and split_names are mutually exclusive
207
+ - Use split_ids for better performance when IDs are available (avoids JOIN)
208
+ - Empty filters use strict behavior: empty inputs return zero results
209
+ """
210
+ if split_ids is not None and split_names is not None:
211
+ raise ValueError(
212
+ "Cannot specify both split_ids and split_names - they are mutually exclusive"
147
213
  )
148
- .where(models.DatasetExample.dataset_id == version.c.dataset_id)
149
- .where(table.dataset_version_id <= version.c.id)
150
- .group_by(table.dataset_example_id)
151
- .subquery()
214
+
215
+ stmt = _build_ranked_revisions_query(
216
+ dataset_version_id,
217
+ dataset_id=dataset_id,
218
+ example_ids=example_ids,
219
+ ).add_columns(
220
+ models.DatasetExampleRevision.id,
221
+ models.DatasetExampleRevision.revision_kind,
152
222
  )
223
+
224
+ if split_ids is not None or split_names is not None:
225
+ if split_names is not None:
226
+ split_example_ids_subquery = (
227
+ select(models.DatasetSplitDatasetExample.dataset_example_id)
228
+ .join(
229
+ models.DatasetSplit,
230
+ models.DatasetSplit.id == models.DatasetSplitDatasetExample.dataset_split_id,
231
+ )
232
+ .where(models.DatasetSplit.name.in_(split_names))
233
+ )
234
+ stmt = stmt.where(models.DatasetExample.id.in_(split_example_ids_subquery))
235
+ else:
236
+ assert split_ids is not None
237
+ split_example_ids_subquery = select(
238
+ models.DatasetSplitDatasetExample.dataset_example_id
239
+ ).where(models.DatasetSplitDatasetExample.dataset_split_id.in_(split_ids))
240
+ stmt = stmt.where(models.DatasetExample.id.in_(split_example_ids_subquery))
241
+
242
+ ranked_subquery = stmt.subquery()
153
243
  return (
154
- select(table)
155
- .where(table.revision_kind != "DELETE")
244
+ select(models.DatasetExampleRevision)
156
245
  .join(
157
- revision,
158
- onclause=and_(
159
- revision.c.dataset_example_id == table.dataset_example_id,
160
- revision.c.dataset_version_id == table.dataset_version_id,
161
- ),
246
+ ranked_subquery,
247
+ models.DatasetExampleRevision.id == ranked_subquery.c.id,
248
+ )
249
+ .where(
250
+ ranked_subquery.c.rn == 1,
251
+ ranked_subquery.c.revision_kind != "DELETE",
162
252
  )
163
253
  )
164
254
 
165
255
 
256
+ def create_experiment_examples_snapshot_insert(
257
+ experiment: models.Experiment,
258
+ ) -> Insert:
259
+ """
260
+ Create an INSERT statement to snapshot dataset examples for an experiment.
261
+
262
+ This captures which examples belong to the experiment at the time of creation,
263
+ respecting any dataset splits assigned to the experiment.
264
+
265
+ Args:
266
+ experiment: The experiment to create the snapshot for
267
+
268
+ Returns:
269
+ SQLAlchemy INSERT statement ready for execution
270
+ """
271
+ stmt = _build_ranked_revisions_query(
272
+ experiment.dataset_version_id,
273
+ dataset_id=experiment.dataset_id,
274
+ ).add_columns(
275
+ models.DatasetExampleRevision.id,
276
+ models.DatasetExampleRevision.dataset_example_id,
277
+ models.DatasetExampleRevision.revision_kind,
278
+ )
279
+
280
+ experiment_splits_subquery = select(models.ExperimentDatasetSplit.dataset_split_id).where(
281
+ models.ExperimentDatasetSplit.experiment_id == experiment.id
282
+ )
283
+ has_splits_condition = exists(experiment_splits_subquery)
284
+ split_filtered_example_ids = select(models.DatasetSplitDatasetExample.dataset_example_id).where(
285
+ models.DatasetSplitDatasetExample.dataset_split_id.in_(experiment_splits_subquery)
286
+ )
287
+
288
+ stmt = stmt.where(
289
+ or_(
290
+ ~has_splits_condition, # No splits = include all examples
291
+ models.DatasetExampleRevision.dataset_example_id.in_(
292
+ split_filtered_example_ids
293
+ ), # Has splits = filter by splits
294
+ )
295
+ )
296
+
297
+ ranked_subquery = stmt.subquery()
298
+ return insert(models.ExperimentDatasetExample).from_select(
299
+ [
300
+ models.ExperimentDatasetExample.experiment_id,
301
+ models.ExperimentDatasetExample.dataset_example_id,
302
+ models.ExperimentDatasetExample.dataset_example_revision_id,
303
+ ],
304
+ select(
305
+ literal(experiment.id),
306
+ ranked_subquery.c.dataset_example_id,
307
+ ranked_subquery.c.id,
308
+ ).where(
309
+ ranked_subquery.c.rn == 1,
310
+ ranked_subquery.c.revision_kind != "DELETE",
311
+ ),
312
+ )
313
+
314
+
315
+ async def insert_experiment_with_examples_snapshot(
316
+ session: AsyncSession,
317
+ experiment: models.Experiment,
318
+ ) -> None:
319
+ """
320
+ Insert an experiment with its snapshot of dataset examples.
321
+ """
322
+ session.add(experiment)
323
+ await session.flush()
324
+ insert_stmt = create_experiment_examples_snapshot_insert(experiment)
325
+ await session.execute(insert_stmt)
326
+
327
+
166
328
  _AnyTuple = TypeVar("_AnyTuple", bound=tuple[Any, ...])
167
329
 
168
330
 
@@ -355,3 +517,10 @@ def get_ancestor_span_rowids(parent_id: str) -> Select[tuple[int]]:
355
517
  )
356
518
  )
357
519
  return select(ancestors.c.id)
520
+
521
+
522
+ def truncate_name(name: str, max_len: int = 63) -> str:
523
+ # https://github.com/sqlalchemy/sqlalchemy/blob/e263825e3c5060bf4f47eed0e833c6660a31658e/lib/sqlalchemy/sql/compiler.py#L7844-L7845
524
+ if len(name) > max_len:
525
+ return name[0 : max_len - 8] + "_" + util.md5_hex(name)[-4:]
526
+ return name
@@ -44,6 +44,7 @@ async def insert_dataset(
44
44
  description: Optional[str] = None,
45
45
  metadata: Optional[Mapping[str, Any]] = None,
46
46
  created_at: Optional[datetime] = None,
47
+ user_id: Optional[int] = None,
47
48
  ) -> DatasetId:
48
49
  id_ = await session.scalar(
49
50
  insert(models.Dataset)
@@ -52,6 +53,7 @@ async def insert_dataset(
52
53
  description=description,
53
54
  metadata_=metadata,
54
55
  created_at=created_at,
56
+ user_id=user_id,
55
57
  )
56
58
  .returning(models.Dataset.id)
57
59
  )
@@ -64,6 +66,7 @@ async def insert_dataset_version(
64
66
  description: Optional[str] = None,
65
67
  metadata: Optional[Mapping[str, Any]] = None,
66
68
  created_at: Optional[datetime] = None,
69
+ user_id: Optional[int] = None,
67
70
  ) -> DatasetVersionId:
68
71
  id_ = await session.scalar(
69
72
  insert(models.DatasetVersion)
@@ -72,6 +75,7 @@ async def insert_dataset_version(
72
75
  description=description,
73
76
  metadata_=metadata,
74
77
  created_at=created_at,
78
+ user_id=user_id,
75
79
  )
76
80
  .returning(models.DatasetVersion.id)
77
81
  )
@@ -152,6 +156,7 @@ async def add_dataset_examples(
152
156
  description: Optional[str] = None,
153
157
  metadata: Optional[Mapping[str, Any]] = None,
154
158
  action: DatasetAction = DatasetAction.CREATE,
159
+ user_id: Optional[int] = None,
155
160
  ) -> Optional[DatasetExampleAdditionEvent]:
156
161
  created_at = datetime.now(timezone.utc)
157
162
  dataset_id: Optional[DatasetId] = None
@@ -167,6 +172,7 @@ async def add_dataset_examples(
167
172
  description=description,
168
173
  metadata=metadata,
169
174
  created_at=created_at,
175
+ user_id=user_id,
170
176
  )
171
177
  except Exception:
172
178
  logger.exception(f"Failed to insert dataset: {name=}")
@@ -176,6 +182,7 @@ async def add_dataset_examples(
176
182
  session=session,
177
183
  dataset_id=dataset_id,
178
184
  created_at=created_at,
185
+ user_id=user_id,
179
186
  )
180
187
  except Exception:
181
188
  logger.exception(f"Failed to insert dataset version for {dataset_id=}")
@@ -181,7 +181,7 @@ def _key(p: Received[Precursors.DocumentAnnotation]) -> _Key:
181
181
 
182
182
 
183
183
  def _unique_by(p: Received[Insertables.DocumentAnnotation]) -> _UniqueBy:
184
- return p.item.obj.name, p.item.span_rowid, p.item.document_position, p.item.identifier
184
+ return p.item.obj.name, p.item.span_rowid, p.item.document_position, p.item.obj.identifier
185
185
 
186
186
 
187
187
  def _time(p: Received[Any]) -> datetime:
@@ -12,7 +12,7 @@ from sqlalchemy.sql.elements import KeyedColumnElement
12
12
  from typing_extensions import TypeAlias, assert_never
13
13
 
14
14
  from phoenix.db import models
15
- from phoenix.db.helpers import SupportedSQLDialect
15
+ from phoenix.db.helpers import SupportedSQLDialect, truncate_name
16
16
  from phoenix.db.models import Base
17
17
  from phoenix.trace.attributes import get_attribute_value
18
18
 
@@ -53,7 +53,7 @@ def insert_on_conflict(
53
53
  unique_records.append(v)
54
54
  seen.add(k)
55
55
  records = tuple(reversed(unique_records))
56
- constraint = constraint_name or "_".join(("uq", table.__tablename__, *unique_by))
56
+ constraint = constraint_name or truncate_name("_".join(("uq", table.__tablename__, *unique_by)))
57
57
  if dialect is SupportedSQLDialect.POSTGRESQL:
58
58
  stmt_postgresql = insert_postgresql(table).values(records)
59
59
  if on_conflict is OnConflict.DO_NOTHING:
@@ -0,0 +1,176 @@
1
+ from collections.abc import Mapping
2
+ from datetime import datetime
3
+ from typing import Any, NamedTuple, Optional
4
+
5
+ from sqlalchemy import Row, Select, and_, select, tuple_
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+ from typing_extensions import TypeAlias
8
+
9
+ from phoenix.db import models
10
+ from phoenix.db.helpers import dedup
11
+ from phoenix.db.insertion.helpers import as_kv
12
+ from phoenix.db.insertion.types import (
13
+ Insertables,
14
+ Postponed,
15
+ Precursors,
16
+ QueueInserter,
17
+ Received,
18
+ )
19
+ from phoenix.server.dml_event import ProjectSessionAnnotationDmlEvent
20
+
21
+ # Type alias for consistency with other annotation patterns
22
+ SessionAnnotationDmlEvent = ProjectSessionAnnotationDmlEvent
23
+
24
+ _Name: TypeAlias = str
25
+ _SessionId: TypeAlias = str
26
+ _SessionRowId: TypeAlias = int
27
+ _AnnoRowId: TypeAlias = int
28
+ _Identifier: TypeAlias = str
29
+
30
+
31
+ class _Key(NamedTuple):
32
+ annotation_name: _Name
33
+ annotation_identifier: _Identifier
34
+ session_id: _SessionId
35
+
36
+
37
+ _UniqueBy: TypeAlias = tuple[_Name, _SessionRowId, _Identifier]
38
+ _Existing: TypeAlias = tuple[
39
+ _SessionRowId,
40
+ _SessionId,
41
+ Optional[_AnnoRowId],
42
+ Optional[_Name],
43
+ Optional[datetime],
44
+ ]
45
+
46
+
47
+ class SessionAnnotationQueueInserter(
48
+ QueueInserter[
49
+ Precursors.SessionAnnotation,
50
+ Insertables.SessionAnnotation,
51
+ models.ProjectSessionAnnotation,
52
+ SessionAnnotationDmlEvent,
53
+ ],
54
+ table=models.ProjectSessionAnnotation,
55
+ unique_by=("name", "project_session_id", "identifier"),
56
+ ):
57
+ async def _events(
58
+ self,
59
+ session: AsyncSession,
60
+ *insertions: Insertables.SessionAnnotation,
61
+ ) -> list[SessionAnnotationDmlEvent]:
62
+ records = [{**dict(as_kv(ins.row)), "updated_at": ins.row.updated_at} for ins in insertions]
63
+ stmt = self._insert_on_conflict(*records).returning(self.table.id)
64
+ ids = tuple([_ async for _ in await session.stream_scalars(stmt)])
65
+ return [SessionAnnotationDmlEvent(ids)]
66
+
67
+ async def _partition(
68
+ self,
69
+ session: AsyncSession,
70
+ *parcels: Received[Precursors.SessionAnnotation],
71
+ ) -> tuple[
72
+ list[Received[Insertables.SessionAnnotation]],
73
+ list[Postponed[Precursors.SessionAnnotation]],
74
+ list[Received[Precursors.SessionAnnotation]],
75
+ ]:
76
+ to_insert: list[Received[Insertables.SessionAnnotation]] = []
77
+ to_postpone: list[Postponed[Precursors.SessionAnnotation]] = []
78
+ to_discard: list[Received[Precursors.SessionAnnotation]] = []
79
+
80
+ stmt = self._select_existing(*map(_key, parcels))
81
+ existing: list[Row[_Existing]] = [_ async for _ in await session.stream(stmt)]
82
+ existing_sessions: Mapping[str, _SessionAttr] = {
83
+ e.session_id: _SessionAttr(e.session_rowid) for e in existing
84
+ }
85
+ existing_annos: Mapping[_Key, _AnnoAttr] = {
86
+ _Key(
87
+ annotation_name=e.name,
88
+ annotation_identifier=e.identifier,
89
+ session_id=e.session_id,
90
+ ): _AnnoAttr(e.session_rowid, e.id, e.updated_at)
91
+ for e in existing
92
+ if e.id is not None and e.name is not None and e.updated_at is not None
93
+ }
94
+
95
+ for p in parcels:
96
+ if (anno := existing_annos.get(_key(p))) is not None:
97
+ if p.item.updated_at <= anno.updated_at:
98
+ to_discard.append(p)
99
+ else:
100
+ to_insert.append(
101
+ Received(
102
+ received_at=p.received_at,
103
+ item=p.item.as_insertable(
104
+ project_session_rowid=anno.session_rowid,
105
+ ),
106
+ )
107
+ )
108
+ elif (existing_session := existing_sessions.get(p.item.session_id)) is not None:
109
+ to_insert.append(
110
+ Received(
111
+ received_at=p.received_at,
112
+ item=p.item.as_insertable(
113
+ project_session_rowid=existing_session.session_rowid,
114
+ ),
115
+ )
116
+ )
117
+ elif isinstance(p, Postponed):
118
+ if p.retries_left > 1:
119
+ to_postpone.append(p.postpone(p.retries_left - 1))
120
+ else:
121
+ to_discard.append(p)
122
+ elif isinstance(p, Received):
123
+ to_postpone.append(p.postpone(self._retry_allowance))
124
+ else:
125
+ to_discard.append(p)
126
+
127
+ assert len(to_insert) + len(to_postpone) + len(to_discard) == len(parcels)
128
+ to_insert = dedup(sorted(to_insert, key=_time, reverse=True), _unique_by)[::-1]
129
+ return to_insert, to_postpone, to_discard
130
+
131
+ def _select_existing(self, *keys: _Key) -> Select[_Existing]:
132
+ anno = self.table
133
+ session = (
134
+ select(models.ProjectSession.id, models.ProjectSession.session_id)
135
+ .where(models.ProjectSession.session_id.in_({k.session_id for k in keys}))
136
+ .cte()
137
+ )
138
+ onclause = and_(
139
+ session.c.id == anno.project_session_id,
140
+ anno.name.in_({k.annotation_name for k in keys}),
141
+ tuple_(anno.name, anno.identifier, session.c.session_id).in_(keys),
142
+ )
143
+ return select(
144
+ session.c.id.label("session_rowid"),
145
+ session.c.session_id,
146
+ anno.id,
147
+ anno.name,
148
+ anno.identifier,
149
+ anno.updated_at,
150
+ ).outerjoin_from(session, anno, onclause)
151
+
152
+
153
+ class _SessionAttr(NamedTuple):
154
+ session_rowid: _SessionRowId
155
+
156
+
157
+ class _AnnoAttr(NamedTuple):
158
+ session_rowid: _SessionRowId
159
+ id_: _AnnoRowId
160
+ updated_at: datetime
161
+
162
+
163
+ def _key(p: Received[Precursors.SessionAnnotation]) -> _Key:
164
+ return _Key(
165
+ annotation_name=p.item.obj.name,
166
+ annotation_identifier=p.item.obj.identifier,
167
+ session_id=p.item.session_id,
168
+ )
169
+
170
+
171
+ def _unique_by(p: Received[Insertables.SessionAnnotation]) -> _UniqueBy:
172
+ return p.item.obj.name, p.item.project_session_rowid, p.item.obj.identifier
173
+
174
+
175
+ def _time(p: Received[Any]) -> datetime:
176
+ return p.received_at
@@ -167,7 +167,7 @@ def _key(p: Received[Precursors.SpanAnnotation]) -> _Key:
167
167
 
168
168
 
169
169
  def _unique_by(p: Received[Insertables.SpanAnnotation]) -> _UniqueBy:
170
- return p.item.obj.name, p.item.span_rowid, p.item.identifier
170
+ return p.item.obj.name, p.item.span_rowid, p.item.obj.identifier
171
171
 
172
172
 
173
173
  def _time(p: Received[Any]) -> datetime:
@@ -166,7 +166,7 @@ def _key(p: Received[Precursors.TraceAnnotation]) -> _Key:
166
166
 
167
167
 
168
168
  def _unique_by(p: Received[Insertables.TraceAnnotation]) -> _UniqueBy:
169
- return p.item.obj.name, p.item.trace_rowid, p.item.identifier
169
+ return p.item.obj.name, p.item.trace_rowid, p.item.obj.identifier
170
170
 
171
171
 
172
172
  def _time(p: Received[Any]) -> datetime:
@@ -225,13 +225,29 @@ class Precursors(ABC):
225
225
  span_rowid=span_rowid,
226
226
  )
227
227
 
228
+ @dataclass(frozen=True)
229
+ class SessionAnnotation:
230
+ updated_at: datetime
231
+ session_id: str
232
+ obj: models.ProjectSessionAnnotation
233
+
234
+ def as_insertable(
235
+ self,
236
+ project_session_rowid: int,
237
+ ) -> Insertables.SessionAnnotation:
238
+ return Insertables.SessionAnnotation(
239
+ updated_at=self.updated_at,
240
+ session_id=self.session_id,
241
+ obj=self.obj,
242
+ project_session_rowid=project_session_rowid,
243
+ )
244
+
228
245
 
229
246
  class Insertables(ABC):
230
247
  @dataclass(frozen=True)
231
248
  class SpanAnnotation(Precursors.SpanAnnotation):
232
249
  updated_at: datetime
233
250
  span_rowid: int
234
- identifier: str = ""
235
251
 
236
252
  @property
237
253
  def row(self) -> models.SpanAnnotation:
@@ -244,7 +260,6 @@ class Insertables(ABC):
244
260
  class TraceAnnotation(Precursors.TraceAnnotation):
245
261
  updated_at: datetime
246
262
  trace_rowid: int
247
- identifier: str = ""
248
263
 
249
264
  @property
250
265
  def row(self) -> models.TraceAnnotation:
@@ -257,7 +272,6 @@ class Insertables(ABC):
257
272
  class DocumentAnnotation(Precursors.DocumentAnnotation):
258
273
  updated_at: datetime
259
274
  span_rowid: int
260
- identifier: str = ""
261
275
 
262
276
  @property
263
277
  def row(self) -> models.DocumentAnnotation:
@@ -265,3 +279,15 @@ class Insertables(ABC):
265
279
  obj.span_rowid = self.span_rowid
266
280
  obj.updated_at = self.updated_at
267
281
  return obj
282
+
283
+ @dataclass(frozen=True)
284
+ class SessionAnnotation(Precursors.SessionAnnotation):
285
+ updated_at: datetime
286
+ project_session_rowid: int
287
+
288
+ @property
289
+ def row(self) -> models.ProjectSessionAnnotation:
290
+ obj = copy(self.obj)
291
+ obj.project_session_id = self.project_session_rowid
292
+ obj.updated_at = self.updated_at
293
+ return obj
@@ -0,0 +1,40 @@
1
+ """add user_id on datasets
2
+
3
+ Revision ID: 01a8342c9cdf
4
+ Revises: 0df286449799
5
+ Create Date: 2025-09-25 16:08:51.254947
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = "01a8342c9cdf"
16
+ down_revision: Union[str, None] = "0df286449799"
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+ _Integer = sa.Integer().with_variant(
21
+ sa.BigInteger(),
22
+ "postgresql",
23
+ )
24
+
25
+
26
+ def upgrade() -> None:
27
+ with op.batch_alter_table("datasets") as batch_op:
28
+ batch_op.add_column(
29
+ sa.Column(
30
+ "user_id",
31
+ _Integer,
32
+ sa.ForeignKey("users.id", ondelete="SET NULL"),
33
+ nullable=True,
34
+ ),
35
+ )
36
+
37
+
38
+ def downgrade() -> None:
39
+ with op.batch_alter_table("datasets") as batch_op:
40
+ batch_op.drop_column("user_id")