pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +11 -2
  4. pixeltable/catalog/catalog.py +407 -119
  5. pixeltable/catalog/column.py +38 -26
  6. pixeltable/catalog/globals.py +130 -15
  7. pixeltable/catalog/insertable_table.py +10 -9
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +245 -119
  10. pixeltable/catalog/table_version.py +142 -116
  11. pixeltable/catalog/table_version_handle.py +30 -2
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +14 -20
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -10
  22. pixeltable/exprs/column_ref.py +24 -21
  23. pixeltable/exprs/data_row.py +9 -0
  24. pixeltable/exprs/expr.py +4 -4
  25. pixeltable/exprs/row_builder.py +44 -13
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/mcp.py +74 -0
  28. pixeltable/func/query_template_function.py +4 -2
  29. pixeltable/func/tools.py +12 -2
  30. pixeltable/func/udf.py +2 -2
  31. pixeltable/functions/__init__.py +1 -0
  32. pixeltable/functions/groq.py +108 -0
  33. pixeltable/functions/huggingface.py +8 -6
  34. pixeltable/functions/mistralai.py +2 -13
  35. pixeltable/functions/openai.py +1 -6
  36. pixeltable/functions/replicate.py +2 -2
  37. pixeltable/functions/util.py +6 -1
  38. pixeltable/globals.py +0 -2
  39. pixeltable/io/external_store.py +81 -54
  40. pixeltable/io/globals.py +1 -1
  41. pixeltable/io/label_studio.py +49 -45
  42. pixeltable/io/table_data_conduit.py +1 -1
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_37.py +15 -0
  45. pixeltable/metadata/converters/convert_38.py +39 -0
  46. pixeltable/metadata/notes.py +2 -0
  47. pixeltable/metadata/schema.py +5 -0
  48. pixeltable/metadata/utils.py +78 -0
  49. pixeltable/plan.py +59 -139
  50. pixeltable/share/packager.py +2 -2
  51. pixeltable/store.py +114 -103
  52. pixeltable/type_system.py +30 -0
  53. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
  54. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
  55. pixeltable/utils/sample.py +0 -25
  56. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -3,14 +3,14 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- from dataclasses import dataclass
6
+ from dataclasses import dataclass, field
7
7
  from typing import Any, Optional
8
- from uuid import UUID
9
8
 
10
9
  import pixeltable.exceptions as excs
11
10
  import pixeltable.type_system as ts
12
11
  from pixeltable import Column, Table
13
- from pixeltable.catalog import TableVersion
12
+ from pixeltable.catalog import ColumnHandle, TableVersion
13
+ from pixeltable.catalog.globals import RowCountStats, UpdateStatus
14
14
 
15
15
  _logger = logging.getLogger('pixeltable')
16
16
 
@@ -22,6 +22,8 @@ class ExternalStore(abc.ABC):
22
22
  and stateful external stores.
23
23
  """
24
24
 
25
+ __name: str
26
+
25
27
  def __init__(self, name: str) -> None:
26
28
  self.__name = name
27
29
 
@@ -38,7 +40,7 @@ class ExternalStore(abc.ABC):
38
40
  """Removes store-specific metadata created in link()."""
39
41
 
40
42
  @abc.abstractmethod
41
- def get_local_columns(self) -> list[Column]:
43
+ def get_local_columns(self) -> list[ColumnHandle]:
42
44
  """
43
45
  Gets a list of all local (Pixeltable) columns that are associated with this external store.
44
46
  """
@@ -63,9 +65,15 @@ class Project(ExternalStore, abc.ABC):
63
65
  additional capabilities specific to such projects.
64
66
  """
65
67
 
66
- stored_proxies: dict[Column, Column]
68
+ _col_mapping: dict[ColumnHandle, str] # col -> external col name
69
+ stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
67
70
 
68
- def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
71
+ def __init__(
72
+ self,
73
+ name: str,
74
+ col_mapping: dict[ColumnHandle, str],
75
+ stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]],
76
+ ):
69
77
  super().__init__(name)
70
78
  self._col_mapping = col_mapping
71
79
 
@@ -80,11 +88,11 @@ class Project(ExternalStore, abc.ABC):
80
88
  # Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
81
89
  # proportion of overlapping rows, all proxying the same base column.
82
90
  if stored_proxies is None:
83
- self.stored_proxies: dict[Column, Column] = {}
91
+ self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
84
92
  else:
85
93
  self.stored_proxies = stored_proxies
86
94
 
87
- def get_local_columns(self) -> list[Column]:
95
+ def get_local_columns(self) -> list[ColumnHandle]:
88
96
  return list(self.col_mapping.keys())
89
97
 
90
98
  def link(self, tbl_version: TableVersion) -> None:
@@ -92,15 +100,16 @@ class Project(ExternalStore, abc.ABC):
92
100
  # This ensures that the media in those columns resides in the media store.
93
101
  # First determine which columns (if any) need stored proxies, but don't have one yet.
94
102
  stored_proxies_needed: list[Column] = []
95
- for col in self.col_mapping:
103
+ for col_handle in self.col_mapping:
104
+ col = col_handle.get()
96
105
  if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
97
106
  # If this column is already proxied in some other Project, use the existing proxy to avoid
98
107
  # duplication. Otherwise, we'll create a new one.
99
108
  for store in tbl_version.external_stores.values():
100
- if isinstance(store, Project) and col in store.stored_proxies:
101
- self.stored_proxies[col] = store.stored_proxies[col]
109
+ if isinstance(store, Project) and col_handle in store.stored_proxies:
110
+ self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
102
111
  break
103
- if col not in self.stored_proxies:
112
+ if col_handle not in self.stored_proxies:
104
113
  # We didn't find it in an existing Project
105
114
  stored_proxies_needed.append(col)
106
115
 
@@ -110,17 +119,20 @@ class Project(ExternalStore, abc.ABC):
110
119
  proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
111
120
  # Add the columns; this will also update table metadata.
112
121
  tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
122
+ self.stored_proxies.update(
123
+ {col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
124
+ )
113
125
 
114
126
  def unlink(self, tbl_version: TableVersion) -> None:
115
127
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
116
128
  # any *other* external store for this table.)
117
- deletions_needed: set[Column] = set(self.stored_proxies.values())
129
+ deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
118
130
  for name, store in tbl_version.external_stores.items():
119
131
  if isinstance(store, Project) and name != self.name:
120
132
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
121
133
  if len(deletions_needed) > 0:
122
- _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
123
- tbl_version._drop_columns(deletions_needed)
134
+ _logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
135
+ tbl_version._drop_columns(col.get() for col in deletions_needed)
124
136
  self.stored_proxies.clear()
125
137
 
126
138
  def create_stored_proxy(self, col: Column) -> Column:
@@ -142,11 +154,10 @@ class Project(ExternalStore, abc.ABC):
142
154
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
143
155
  stored=True,
144
156
  )
145
- self.stored_proxies[col] = proxy_col
146
157
  return proxy_col
147
158
 
148
159
  @property
149
- def col_mapping(self) -> dict[Column, str]:
160
+ def col_mapping(self) -> dict[ColumnHandle, str]:
150
161
  return self._col_mapping
151
162
 
152
163
  @abc.abstractmethod
@@ -181,7 +192,7 @@ class Project(ExternalStore, abc.ABC):
181
192
  export_cols: dict[str, ts.ColumnType],
182
193
  import_cols: dict[str, ts.ColumnType],
183
194
  col_mapping: Optional[dict[str, str]],
184
- ) -> dict[Column, str]:
195
+ ) -> dict[ColumnHandle, str]:
185
196
  """
186
197
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
187
198
  (i) the keys of `col_mapping` are valid columns of the specified `Table`;
@@ -199,10 +210,10 @@ class Project(ExternalStore, abc.ABC):
199
210
  if col_mapping is None:
200
211
  col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
201
212
 
202
- resolved_col_mapping: dict[Column, str] = {}
213
+ resolved_col_mapping: dict[ColumnHandle, str] = {}
203
214
 
204
215
  # Validate names
205
- t_cols = set(table._schema.keys())
216
+ t_cols = set(table._get_schema().keys())
206
217
  for t_col, ext_col in col_mapping.items():
207
218
  if t_col not in t_cols:
208
219
  if is_user_specified_col_mapping:
@@ -223,9 +234,10 @@ class Project(ExternalStore, abc.ABC):
223
234
  )
224
235
  col_ref = table[t_col]
225
236
  assert isinstance(col_ref, exprs.ColumnRef)
226
- resolved_col_mapping[col_ref.col] = ext_col
237
+ resolved_col_mapping[col_ref.col.handle] = ext_col
238
+
227
239
  # Validate column specs
228
- t_col_types = table._schema
240
+ t_col_types = table._get_schema()
229
241
  for t_col, ext_col in col_mapping.items():
230
242
  t_col_type = t_col_types[t_col]
231
243
  if ext_col in export_cols:
@@ -250,39 +262,56 @@ class Project(ExternalStore, abc.ABC):
250
262
  )
251
263
  return resolved_col_mapping
252
264
 
253
- @classmethod
254
- def _column_as_dict(cls, col: Column) -> dict[str, Any]:
255
- return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
256
265
 
257
- @classmethod
258
- def _column_from_dict(cls, d: dict[str, Any]) -> Column:
259
- from pixeltable.catalog import Catalog
266
+ @dataclass(frozen=True)
267
+ class SyncStatus:
268
+ # stats for the rows affected by the operation in the external store
269
+ ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
260
270
 
261
- tbl_id = UUID(d['tbl_id'])
262
- col_id = d['col_id']
263
- return Catalog.get().get_tbl_version(tbl_id, None).cols_by_id[col_id]
271
+ # stats for the rows affected by the operation
272
+ row_count_stats: RowCountStats = field(default_factory=RowCountStats)
264
273
 
274
+ @property
275
+ def num_excs(self) -> int:
276
+ """
277
+ Returns the total number of Pixeltable exceptions that occurred during the operation.
278
+ """
279
+ return self.row_count_stats.num_excs
265
280
 
266
- @dataclass(frozen=True)
267
- class SyncStatus:
268
- external_rows_created: int = 0
269
- external_rows_deleted: int = 0
270
- external_rows_updated: int = 0
271
- pxt_rows_updated: int = 0
272
- num_excs: int = 0
281
+ @property
282
+ def pxt_rows_updated(self) -> int:
283
+ """
284
+ Returns the number of Pixeltable rows that were updated as a result of the operation.
285
+ """
286
+ return self.row_count_stats.upd_rows
287
+
288
+ @property
289
+ def external_rows_updated(self) -> int:
290
+ return self.ext_row_count_stats.upd_rows
291
+
292
+ @property
293
+ def external_rows_created(self) -> int:
294
+ return self.ext_row_count_stats.ins_rows
295
+
296
+ @property
297
+ def external_rows_deleted(self) -> int:
298
+ return self.ext_row_count_stats.del_rows
273
299
 
274
- def combine(self, other: 'SyncStatus') -> 'SyncStatus':
300
+ def __add__(self, other: 'SyncStatus') -> 'SyncStatus':
301
+ """
302
+ Add the sync status from two SyncStatus objects together.
303
+ """
275
304
  return SyncStatus(
276
- external_rows_created=self.external_rows_created + other.external_rows_created,
277
- external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
278
- external_rows_updated=self.external_rows_updated + other.external_rows_updated,
279
- pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
280
- num_excs=self.num_excs + other.num_excs,
305
+ ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
306
+ row_count_stats=self.row_count_stats + other.row_count_stats,
281
307
  )
282
308
 
283
309
  @classmethod
284
- def empty(cls) -> 'SyncStatus':
285
- return SyncStatus(0, 0, 0, 0, 0)
310
+ def from_update_status(cls, us: UpdateStatus) -> 'SyncStatus':
311
+ """
312
+ Copy information from an UpdateStatus to a SyncStatus.
313
+ """
314
+ return SyncStatus(row_count_stats=us.row_count_stats + us.cascade_row_count_stats)
286
315
 
287
316
 
288
317
  class MockProject(Project):
@@ -293,8 +322,8 @@ class MockProject(Project):
293
322
  name: str,
294
323
  export_cols: dict[str, ts.ColumnType],
295
324
  import_cols: dict[str, ts.ColumnType],
296
- col_mapping: dict[Column, str],
297
- stored_proxies: Optional[dict[Column, Column]] = None,
325
+ col_mapping: dict[ColumnHandle, str],
326
+ stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
298
327
  ):
299
328
  super().__init__(name, col_mapping, stored_proxies)
300
329
  self.export_cols = export_cols
@@ -334,10 +363,8 @@ class MockProject(Project):
334
363
  'name': self.name,
335
364
  'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
336
365
  'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
337
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
338
- 'stored_proxies': [
339
- [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
340
- ],
366
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
367
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
341
368
  }
342
369
 
343
370
  @classmethod
@@ -346,8 +373,8 @@ class MockProject(Project):
346
373
  md['name'],
347
374
  {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
348
375
  {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
349
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
350
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
376
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
377
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
351
378
  )
352
379
 
353
380
  def __eq__(self, other: object) -> bool:
pixeltable/io/globals.py CHANGED
@@ -136,7 +136,7 @@ def create_label_studio_project(
136
136
  if sync_immediately:
137
137
  return t.sync()
138
138
  else:
139
- return SyncStatus.empty()
139
+ return SyncStatus()
140
140
 
141
141
 
142
142
  def export_images_as_fo_dataset(
@@ -4,15 +4,17 @@ import logging
4
4
  import os
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
- from typing import Any, Iterator, Literal, Optional, cast
7
+ from typing import Any, Iterator, Literal, Optional
8
8
  from xml.etree import ElementTree as ET
9
9
 
10
- import label_studio_sdk # type: ignore[import-untyped]
10
+ import label_studio_sdk
11
11
  import PIL.Image
12
12
  from requests.exceptions import HTTPError
13
13
 
14
14
  import pixeltable.type_system as ts
15
15
  from pixeltable import Column, Table, env, exceptions as excs
16
+ from pixeltable.catalog import ColumnHandle
17
+ from pixeltable.catalog.globals import RowCountStats
16
18
  from pixeltable.config import Config
17
19
  from pixeltable.exprs import ColumnRef, DataRow, Expr
18
20
  from pixeltable.io.external_store import Project, SyncStatus
@@ -25,7 +27,7 @@ try:
25
27
  import label_studio_sdk.project as ls_project # type: ignore
26
28
  except ImportError:
27
29
  # label_studio_sdk>=1 compatibility
28
- import label_studio_sdk._legacy.project as ls_project # type: ignore
30
+ import label_studio_sdk._legacy.project as ls_project
29
31
 
30
32
  _logger = logging.getLogger('pixeltable')
31
33
 
@@ -45,13 +47,17 @@ class LabelStudioProject(Project):
45
47
  for synchronizing between a Pixeltable table and a Label Studio project.
46
48
  """
47
49
 
50
+ project_id: int # Label Studio project ID
51
+ media_import_method: Literal['post', 'file', 'url']
52
+ _project: Optional[ls_project.Project]
53
+
48
54
  def __init__(
49
55
  self,
50
56
  name: str,
51
57
  project_id: int,
52
58
  media_import_method: Literal['post', 'file', 'url'],
53
- col_mapping: dict[Column, str],
54
- stored_proxies: Optional[dict[Column, Column]] = None,
59
+ col_mapping: dict[ColumnHandle, str],
60
+ stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
55
61
  ):
56
62
  """
57
63
  The constructor will NOT create a new Label Studio project; it is also used when loading
@@ -59,7 +65,7 @@ class LabelStudioProject(Project):
59
65
  """
60
66
  self.project_id = project_id
61
67
  self.media_import_method = media_import_method
62
- self._project: Optional[ls_project.Project] = None
68
+ self._project = None
63
69
  super().__init__(name, col_mapping, stored_proxies)
64
70
 
65
71
  @property
@@ -112,13 +118,13 @@ class LabelStudioProject(Project):
112
118
  )
113
119
  # Collect all existing tasks into a dict with entries `rowid: task`
114
120
  tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
115
- sync_status = SyncStatus.empty()
121
+ sync_status = SyncStatus()
116
122
  if export_data:
117
123
  export_sync_status = self.__update_tasks(t, tasks)
118
- sync_status = sync_status.combine(export_sync_status)
124
+ sync_status += export_sync_status
119
125
  if import_data:
120
126
  import_sync_status = self.__update_table_from_tasks(t, tasks)
121
- sync_status = sync_status.combine(import_sync_status)
127
+ sync_status += import_sync_status
122
128
  return sync_status
123
129
 
124
130
  def __fetch_all_tasks(self) -> Iterator[dict[str, Any]]:
@@ -155,7 +161,7 @@ class LabelStudioProject(Project):
155
161
  t_data_cols = [t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.data_keys]
156
162
 
157
163
  if len(t_data_cols) == 0:
158
- return SyncStatus.empty()
164
+ return SyncStatus()
159
165
 
160
166
  # Columns in `t` that map to `rectanglelabels` preannotations
161
167
  t_rl_cols = [
@@ -183,15 +189,15 @@ class LabelStudioProject(Project):
183
189
  self,
184
190
  t: Table,
185
191
  existing_tasks: dict[tuple, dict],
186
- media_col: Column,
187
- t_rl_cols: list[Column],
192
+ media_col: ColumnHandle,
193
+ t_rl_cols: list[ColumnHandle],
188
194
  rl_info: list['_RectangleLabel'],
189
195
  ) -> SyncStatus:
190
- is_stored = media_col.is_stored
196
+ is_stored = media_col.get().is_stored
191
197
  # If it's a stored column, we can use `localpath`
192
- localpath_col_opt = [t[media_col.name].localpath] if is_stored else []
198
+ localpath_col_opt = [t[media_col.get().name].localpath] if is_stored else []
193
199
  # Select the media column, rectanglelabels columns, and localpath (if appropriate)
194
- rows = t.select(t[media_col.name], *[t[col.name] for col in t_rl_cols], *localpath_col_opt)
200
+ rows = t.select(t[media_col.get().name], *[t[col.get().name] for col in t_rl_cols], *localpath_col_opt)
195
201
  tasks_created = 0
196
202
  row_ids_in_pxt: set[tuple] = set()
197
203
 
@@ -232,42 +238,42 @@ class LabelStudioProject(Project):
232
238
 
233
239
  env.Env.get().console_logger.info(f'Created {tasks_created} new task(s) in {self}.')
234
240
 
235
- sync_status = SyncStatus(external_rows_created=tasks_created)
241
+ sync_status = SyncStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created))
236
242
 
237
243
  deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
238
-
239
- return sync_status.combine(deletion_sync_status)
244
+ sync_status += deletion_sync_status
245
+ return sync_status
240
246
 
241
247
  def __update_tasks_by_files(
242
248
  self,
243
249
  t: Table,
244
250
  existing_tasks: dict[tuple, dict],
245
- t_data_cols: list[Column],
246
- t_rl_cols: list[Column],
251
+ t_data_cols: list[ColumnHandle],
252
+ t_rl_cols: list[ColumnHandle],
247
253
  rl_info: list['_RectangleLabel'],
248
254
  ) -> SyncStatus:
249
255
  ext_data_cols = [self.col_mapping[col] for col in t_data_cols]
250
256
  expr_refs: dict[str, Expr] = {} # kwargs for the select statement
251
257
  for col in t_data_cols:
252
- col_name = col.name
258
+ col_name = col.get().name
253
259
  if self.media_import_method == 'url':
254
260
  expr_refs[col_name] = t[col_name].fileurl
255
261
  else:
256
262
  assert self.media_import_method == 'file'
257
- if not col.col_type.is_media_type():
263
+ if not col.get().col_type.is_media_type():
258
264
  # Not a media column; query the data directly
259
- expr_refs[col_name] = cast(ColumnRef, t[col_name])
265
+ expr_refs[col_name] = t[col_name]
260
266
  elif col in self.stored_proxies:
261
267
  # Media column that has a stored proxy; use it. We have to give it a name,
262
268
  # since it's an anonymous column
263
- stored_proxy_col = self.stored_proxies[col]
269
+ stored_proxy_col = self.stored_proxies[col].get()
264
270
  expr_refs[f'{col_name}_proxy'] = ColumnRef(stored_proxy_col).localpath
265
271
  else:
266
272
  # Media column without a stored proxy; this means it's a stored computed column,
267
273
  # and we can just use the localpath
268
274
  expr_refs[col_name] = t[col_name].localpath
269
275
 
270
- df = t.select(*[t[col.name] for col in t_rl_cols], **expr_refs)
276
+ df = t.select(*[t[col.get().name] for col in t_rl_cols], **expr_refs)
271
277
  # The following buffers will hold `DataRow` indices that correspond to each of the selected
272
278
  # columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
273
279
  # preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
@@ -286,11 +292,11 @@ class LabelStudioProject(Project):
286
292
  data_vals = [row[idx] for idx in data_col_idxs]
287
293
  coco_annotations = [row[idx] for idx in rl_col_idxs]
288
294
  for i in range(len(t_data_cols)):
289
- if t_data_cols[i].col_type.is_media_type():
295
+ if t_data_cols[i].get().col_type.is_media_type():
290
296
  # Special handling for media columns
291
297
  assert isinstance(data_vals[i], str)
292
298
  if self.media_import_method == 'url':
293
- data_vals[i] = self.__validate_fileurl(t_data_cols[i], data_vals[i])
299
+ data_vals[i] = self.__validate_fileurl(t_data_cols[i].get(), data_vals[i])
294
300
  else:
295
301
  assert self.media_import_method == 'file'
296
302
  data_vals[i] = self.__localpath_to_lspath(data_vals[i])
@@ -336,11 +342,11 @@ class LabelStudioProject(Project):
336
342
  f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.'
337
343
  )
338
344
 
339
- sync_status = SyncStatus(external_rows_created=tasks_created, external_rows_updated=tasks_updated)
345
+ sync_status = SyncStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created, upd_rows=tasks_updated))
340
346
 
341
347
  deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
342
-
343
- return sync_status.combine(deletion_sync_status)
348
+ sync_status += deletion_sync_status
349
+ return sync_status
344
350
 
345
351
  @classmethod
346
352
  def __validate_fileurl(cls, col: Column, url: str) -> Optional[str]:
@@ -377,11 +383,11 @@ class LabelStudioProject(Project):
377
383
  for rowid in deleted_rowids:
378
384
  del existing_tasks[rowid]
379
385
 
380
- return SyncStatus(external_rows_deleted=len(deleted_rowids))
386
+ return SyncStatus(ext_row_count_stats=RowCountStats(del_rows=len(deleted_rowids)))
381
387
 
382
388
  def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> SyncStatus:
383
389
  if ANNOTATIONS_COLUMN not in self.col_mapping.values():
384
- return SyncStatus.empty()
390
+ return SyncStatus()
385
391
 
386
392
  annotations = {
387
393
  # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
@@ -391,7 +397,7 @@ class LabelStudioProject(Project):
391
397
  for task in tasks.values()
392
398
  }
393
399
 
394
- local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN)
400
+ local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN).get()
395
401
 
396
402
  # Prune the annotations down to just the ones that have actually changed.
397
403
  rows = t.select(t[local_annotations_col.name])
@@ -412,23 +418,21 @@ class LabelStudioProject(Project):
412
418
  # TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
413
419
  ancestor = t
414
420
  while local_annotations_col not in ancestor._tbl_version.get().cols:
415
- assert ancestor._base_table is not None
416
- ancestor = ancestor._base_table
421
+ assert ancestor._get_base_table is not None
422
+ ancestor = ancestor._get_base_table()
417
423
  update_status = ancestor.batch_update(updates)
418
424
  env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
419
- return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
425
+ return SyncStatus.from_update_status(update_status)
420
426
  else:
421
- return SyncStatus.empty()
427
+ return SyncStatus()
422
428
 
423
429
  def as_dict(self) -> dict[str, Any]:
424
430
  return {
425
431
  'name': self.name,
426
432
  'project_id': self.project_id,
427
433
  'media_import_method': self.media_import_method,
428
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
429
- 'stored_proxies': [
430
- [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
431
- ],
434
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
435
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
432
436
  }
433
437
 
434
438
  @classmethod
@@ -437,8 +441,8 @@ class LabelStudioProject(Project):
437
441
  md['name'],
438
442
  md['project_id'],
439
443
  md['media_import_method'],
440
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
441
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
444
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
445
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
442
446
  )
443
447
 
444
448
  def __repr__(self) -> str:
@@ -560,7 +564,7 @@ class LabelStudioProject(Project):
560
564
 
561
565
  if name is None:
562
566
  # Create a default name that's unique to the table
563
- all_stores = t.external_stores
567
+ all_stores = t.external_stores()
564
568
  n = 0
565
569
  while f'ls_project_{n}' in all_stores:
566
570
  n += 1
@@ -576,7 +580,7 @@ class LabelStudioProject(Project):
576
580
  local_annotations_column = ANNOTATIONS_COLUMN
577
581
  else:
578
582
  local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
579
- if local_annotations_column not in t._schema:
583
+ if local_annotations_column not in t._get_schema():
580
584
  t.add_columns({local_annotations_column: ts.Json})
581
585
 
582
586
  resolved_col_mapping = cls.validate_columns(
@@ -101,7 +101,7 @@ class TableDataConduit:
101
101
  def add_table_info(self, table: pxt.Table) -> None:
102
102
  """Add information about the table into which we are inserting data"""
103
103
  assert isinstance(table, pxt.Table)
104
- self.pxt_schema = table._schema
104
+ self.pxt_schema = table._get_schema()
105
105
  self.pxt_pk = table._tbl_version.get().primary_key
106
106
  for col in table._tbl_version_path.columns():
107
107
  if col.is_required_for_insert:
@@ -18,7 +18,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
18
18
  _logger = logging.getLogger('pixeltable')
19
19
 
20
20
  # current version of the metadata; this is incremented whenever the metadata schema changes
21
- VERSION = 37
21
+ VERSION = 39
22
22
 
23
23
 
24
24
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=37)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, _: UUID) -> None:
15
+ table_md['view_sn'] = 0
@@ -0,0 +1,39 @@
1
+ from typing import Any, Optional
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=38)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
15
+ if k == 'col_mapping':
16
+ assert isinstance(v, list)
17
+ return k, [__col_mapping_entry(e) for e in v]
18
+ if k == 'stored_proxies':
19
+ assert isinstance(v, list)
20
+ return k, [__stored_proxies_entry(e) for e in v]
21
+ return None
22
+
23
+
24
+ def __col_mapping_entry(e: list) -> list:
25
+ assert isinstance(e, list)
26
+ assert isinstance(e[0], dict)
27
+ assert isinstance(e[1], str)
28
+ return [__col_handle(e[0]), e[1]]
29
+
30
+
31
+ def __stored_proxies_entry(e: list) -> list:
32
+ assert isinstance(e, list)
33
+ assert isinstance(e[0], dict)
34
+ assert isinstance(e[1], dict)
35
+ return [__col_handle(e[0]), __col_handle(e[1])]
36
+
37
+
38
+ def __col_handle(e: dict) -> dict:
39
+ return {'tbl_version': {'id': e['tbl_id'], 'effective_version': None}, 'col_id': e['col_id']}
@@ -2,6 +2,8 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 39: 'ColumnHandles in external stores',
6
+ 38: 'Added TableMd.view_sn',
5
7
  37: 'Add support for the sample() method on DataFrames',
6
8
  36: 'Added Table.lock_dummy',
7
9
  35: 'Track reference_tbl in ColumnRef',
@@ -177,6 +177,11 @@ class TableMd:
177
177
  # - every row is assigned a unique and immutable rowid on insertion
178
178
  next_row_id: int
179
179
 
180
+ # sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
181
+ # - incremented for each add/drop of a mutable view
182
+ # - only maintained for mutable tables
183
+ view_sn: int
184
+
180
185
  # Metadata format for external stores:
181
186
  # {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
182
187
  external_stores: list[dict[str, Any]]