pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -10
  4. pixeltable/catalog/catalog.py +139 -59
  5. pixeltable/catalog/column.py +32 -23
  6. pixeltable/catalog/globals.py +2 -45
  7. pixeltable/catalog/insertable_table.py +5 -2
  8. pixeltable/catalog/path.py +6 -0
  9. pixeltable/catalog/table.py +173 -23
  10. pixeltable/catalog/table_version.py +156 -92
  11. pixeltable/catalog/table_version_handle.py +26 -1
  12. pixeltable/catalog/update_status.py +179 -0
  13. pixeltable/catalog/view.py +12 -3
  14. pixeltable/config.py +76 -12
  15. pixeltable/dataframe.py +1 -1
  16. pixeltable/env.py +29 -0
  17. pixeltable/exec/exec_node.py +7 -24
  18. pixeltable/exec/expr_eval/schedulers.py +134 -7
  19. pixeltable/exprs/column_property_ref.py +23 -20
  20. pixeltable/exprs/column_ref.py +24 -18
  21. pixeltable/exprs/data_row.py +9 -0
  22. pixeltable/exprs/function_call.py +2 -2
  23. pixeltable/exprs/row_builder.py +46 -14
  24. pixeltable/exprs/rowid_ref.py +0 -4
  25. pixeltable/func/function.py +3 -3
  26. pixeltable/functions/audio.py +36 -9
  27. pixeltable/functions/video.py +57 -10
  28. pixeltable/globals.py +61 -1
  29. pixeltable/io/__init__.py +1 -1
  30. pixeltable/io/external_store.py +39 -64
  31. pixeltable/io/globals.py +4 -4
  32. pixeltable/io/hf_datasets.py +10 -2
  33. pixeltable/io/label_studio.py +52 -48
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_38.py +39 -0
  36. pixeltable/metadata/converters/convert_39.py +125 -0
  37. pixeltable/metadata/converters/util.py +3 -0
  38. pixeltable/metadata/notes.py +2 -0
  39. pixeltable/metadata/schema.py +14 -2
  40. pixeltable/metadata/utils.py +78 -0
  41. pixeltable/plan.py +26 -18
  42. pixeltable/share/packager.py +20 -38
  43. pixeltable/store.py +121 -142
  44. pixeltable/type_system.py +2 -2
  45. pixeltable/utils/coroutine.py +6 -23
  46. pixeltable/utils/media_store.py +39 -0
  47. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
  48. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
  49. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
@@ -3,14 +3,13 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- from dataclasses import dataclass
7
6
  from typing import Any, Optional
8
- from uuid import UUID
9
7
 
10
8
  import pixeltable.exceptions as excs
11
9
  import pixeltable.type_system as ts
12
10
  from pixeltable import Column, Table
13
- from pixeltable.catalog import TableVersion
11
+ from pixeltable.catalog import ColumnHandle, TableVersion
12
+ from pixeltable.catalog.update_status import UpdateStatus
14
13
 
15
14
  _logger = logging.getLogger('pixeltable')
16
15
 
@@ -22,6 +21,8 @@ class ExternalStore(abc.ABC):
22
21
  and stateful external stores.
23
22
  """
24
23
 
24
+ __name: str
25
+
25
26
  def __init__(self, name: str) -> None:
26
27
  self.__name = name
27
28
 
@@ -38,13 +39,13 @@ class ExternalStore(abc.ABC):
38
39
  """Removes store-specific metadata created in link()."""
39
40
 
40
41
  @abc.abstractmethod
41
- def get_local_columns(self) -> list[Column]:
42
+ def get_local_columns(self) -> list[ColumnHandle]:
42
43
  """
43
44
  Gets a list of all local (Pixeltable) columns that are associated with this external store.
44
45
  """
45
46
 
46
47
  @abc.abstractmethod
47
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
48
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
48
49
  """
49
50
  Called by `Table.sync()` to implement store-specific synchronization logic.
50
51
  """
@@ -63,9 +64,15 @@ class Project(ExternalStore, abc.ABC):
63
64
  additional capabilities specific to such projects.
64
65
  """
65
66
 
66
- stored_proxies: dict[Column, Column]
67
+ _col_mapping: dict[ColumnHandle, str] # col -> external col name
68
+ stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
67
69
 
68
- def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
70
+ def __init__(
71
+ self,
72
+ name: str,
73
+ col_mapping: dict[ColumnHandle, str],
74
+ stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]],
75
+ ):
69
76
  super().__init__(name)
70
77
  self._col_mapping = col_mapping
71
78
 
@@ -80,11 +87,11 @@ class Project(ExternalStore, abc.ABC):
80
87
  # Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
81
88
  # proportion of overlapping rows, all proxying the same base column.
82
89
  if stored_proxies is None:
83
- self.stored_proxies: dict[Column, Column] = {}
90
+ self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
84
91
  else:
85
92
  self.stored_proxies = stored_proxies
86
93
 
87
- def get_local_columns(self) -> list[Column]:
94
+ def get_local_columns(self) -> list[ColumnHandle]:
88
95
  return list(self.col_mapping.keys())
89
96
 
90
97
  def link(self, tbl_version: TableVersion) -> None:
@@ -92,15 +99,16 @@ class Project(ExternalStore, abc.ABC):
92
99
  # This ensures that the media in those columns resides in the media store.
93
100
  # First determine which columns (if any) need stored proxies, but don't have one yet.
94
101
  stored_proxies_needed: list[Column] = []
95
- for col in self.col_mapping:
102
+ for col_handle in self.col_mapping:
103
+ col = col_handle.get()
96
104
  if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
97
105
  # If this column is already proxied in some other Project, use the existing proxy to avoid
98
106
  # duplication. Otherwise, we'll create a new one.
99
107
  for store in tbl_version.external_stores.values():
100
- if isinstance(store, Project) and col in store.stored_proxies:
101
- self.stored_proxies[col] = store.stored_proxies[col]
108
+ if isinstance(store, Project) and col_handle in store.stored_proxies:
109
+ self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
102
110
  break
103
- if col not in self.stored_proxies:
111
+ if col_handle not in self.stored_proxies:
104
112
  # We didn't find it in an existing Project
105
113
  stored_proxies_needed.append(col)
106
114
 
@@ -110,17 +118,20 @@ class Project(ExternalStore, abc.ABC):
110
118
  proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
111
119
  # Add the columns; this will also update table metadata.
112
120
  tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
121
+ self.stored_proxies.update(
122
+ {col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
123
+ )
113
124
 
114
125
  def unlink(self, tbl_version: TableVersion) -> None:
115
126
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
116
127
  # any *other* external store for this table.)
117
- deletions_needed: set[Column] = set(self.stored_proxies.values())
128
+ deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
118
129
  for name, store in tbl_version.external_stores.items():
119
130
  if isinstance(store, Project) and name != self.name:
120
131
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
121
132
  if len(deletions_needed) > 0:
122
- _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
123
- tbl_version._drop_columns(deletions_needed)
133
+ _logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
134
+ tbl_version._drop_columns(col.get() for col in deletions_needed)
124
135
  self.stored_proxies.clear()
125
136
 
126
137
  def create_stored_proxy(self, col: Column) -> Column:
@@ -142,11 +153,10 @@ class Project(ExternalStore, abc.ABC):
142
153
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
143
154
  stored=True,
144
155
  )
145
- self.stored_proxies[col] = proxy_col
146
156
  return proxy_col
147
157
 
148
158
  @property
149
- def col_mapping(self) -> dict[Column, str]:
159
+ def col_mapping(self) -> dict[ColumnHandle, str]:
150
160
  return self._col_mapping
151
161
 
152
162
  @abc.abstractmethod
@@ -181,7 +191,7 @@ class Project(ExternalStore, abc.ABC):
181
191
  export_cols: dict[str, ts.ColumnType],
182
192
  import_cols: dict[str, ts.ColumnType],
183
193
  col_mapping: Optional[dict[str, str]],
184
- ) -> dict[Column, str]:
194
+ ) -> dict[ColumnHandle, str]:
185
195
  """
186
196
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
187
197
  (i) the keys of `col_mapping` are valid columns of the specified `Table`;
@@ -199,7 +209,7 @@ class Project(ExternalStore, abc.ABC):
199
209
  if col_mapping is None:
200
210
  col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
201
211
 
202
- resolved_col_mapping: dict[Column, str] = {}
212
+ resolved_col_mapping: dict[ColumnHandle, str] = {}
203
213
 
204
214
  # Validate names
205
215
  t_cols = set(table._get_schema().keys())
@@ -223,7 +233,8 @@ class Project(ExternalStore, abc.ABC):
223
233
  )
224
234
  col_ref = table[t_col]
225
235
  assert isinstance(col_ref, exprs.ColumnRef)
226
- resolved_col_mapping[col_ref.col] = ext_col
236
+ resolved_col_mapping[col_ref.col.handle] = ext_col
237
+
227
238
  # Validate column specs
228
239
  t_col_types = table._get_schema()
229
240
  for t_col, ext_col in col_mapping.items():
@@ -250,40 +261,6 @@ class Project(ExternalStore, abc.ABC):
250
261
  )
251
262
  return resolved_col_mapping
252
263
 
253
- @classmethod
254
- def _column_as_dict(cls, col: Column) -> dict[str, Any]:
255
- return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
256
-
257
- @classmethod
258
- def _column_from_dict(cls, d: dict[str, Any]) -> Column:
259
- from pixeltable.catalog import Catalog
260
-
261
- tbl_id = UUID(d['tbl_id'])
262
- col_id = d['col_id']
263
- return Catalog.get().get_tbl_version(tbl_id, None).cols_by_id[col_id]
264
-
265
-
266
- @dataclass(frozen=True)
267
- class SyncStatus:
268
- external_rows_created: int = 0
269
- external_rows_deleted: int = 0
270
- external_rows_updated: int = 0
271
- pxt_rows_updated: int = 0
272
- num_excs: int = 0
273
-
274
- def combine(self, other: 'SyncStatus') -> 'SyncStatus':
275
- return SyncStatus(
276
- external_rows_created=self.external_rows_created + other.external_rows_created,
277
- external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
278
- external_rows_updated=self.external_rows_updated + other.external_rows_updated,
279
- pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
280
- num_excs=self.num_excs + other.num_excs,
281
- )
282
-
283
- @classmethod
284
- def empty(cls) -> 'SyncStatus':
285
- return SyncStatus(0, 0, 0, 0, 0)
286
-
287
264
 
288
265
  class MockProject(Project):
289
266
  """A project that cannot be synced, used mainly for testing."""
@@ -293,8 +270,8 @@ class MockProject(Project):
293
270
  name: str,
294
271
  export_cols: dict[str, ts.ColumnType],
295
272
  import_cols: dict[str, ts.ColumnType],
296
- col_mapping: dict[Column, str],
297
- stored_proxies: Optional[dict[Column, Column]] = None,
273
+ col_mapping: dict[ColumnHandle, str],
274
+ stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
298
275
  ):
299
276
  super().__init__(name, col_mapping, stored_proxies)
300
277
  self.export_cols = export_cols
@@ -319,7 +296,7 @@ class MockProject(Project):
319
296
  def get_import_columns(self) -> dict[str, ts.ColumnType]:
320
297
  return self.import_cols
321
298
 
322
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
299
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
323
300
  raise NotImplementedError()
324
301
 
325
302
  def delete(self) -> None:
@@ -334,10 +311,8 @@ class MockProject(Project):
334
311
  'name': self.name,
335
312
  'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
336
313
  'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
337
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
338
- 'stored_proxies': [
339
- [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
340
- ],
314
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
315
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
341
316
  }
342
317
 
343
318
  @classmethod
@@ -346,8 +321,8 @@ class MockProject(Project):
346
321
  md['name'],
347
322
  {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
348
323
  {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
349
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
350
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
324
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
325
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
351
326
  )
352
327
 
353
328
  def __eq__(self, other: object) -> bool:
pixeltable/io/globals.py CHANGED
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
5
5
  import pixeltable as pxt
6
6
  import pixeltable.exceptions as excs
7
7
  from pixeltable import Table, exprs
8
+ from pixeltable.catalog.update_status import UpdateStatus
8
9
  from pixeltable.env import Env
9
- from pixeltable.io.external_store import SyncStatus
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  import fiftyone as fo # type: ignore[import-untyped]
@@ -22,7 +22,7 @@ def create_label_studio_project(
22
22
  sync_immediately: bool = True,
23
23
  s3_configuration: Optional[dict[str, Any]] = None,
24
24
  **kwargs: Any,
25
- ) -> SyncStatus:
25
+ ) -> UpdateStatus:
26
26
  """
27
27
  Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
28
28
 
@@ -96,7 +96,7 @@ def create_label_studio_project(
96
96
  [Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
97
97
 
98
98
  Returns:
99
- A `SyncStatus` representing the status of any synchronization operations that occurred.
99
+ An `UpdateStatus` representing the status of any synchronization operations that occurred.
100
100
 
101
101
  Examples:
102
102
  Create a Label Studio project whose tasks correspond to videos stored in the `video_col`
@@ -136,7 +136,7 @@ def create_label_studio_project(
136
136
  if sync_immediately:
137
137
  return t.sync()
138
138
  else:
139
- return SyncStatus.empty()
139
+ return UpdateStatus()
140
140
 
141
141
 
142
142
  def export_images_as_fo_dataset(
@@ -50,10 +50,18 @@ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.Column
50
50
  elif isinstance(feature_type, datasets.Sequence):
51
51
  # example: cohere wiki. Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)
52
52
  dtype = _to_pixeltable_type(feature_type.feature, nullable)
53
- length = feature_type.length if feature_type.length != -1 else None
54
- return ts.ArrayType(shape=(length,), dtype=dtype)
53
+ if dtype is None:
54
+ return None
55
+ if dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type():
56
+ length = feature_type.length if feature_type.length != -1 else None
57
+ return ts.ArrayType(shape=(length,), dtype=dtype, nullable=nullable)
58
+ else:
59
+ # Sequence of dicts must be cast as Json
60
+ return ts.JsonType(nullable=nullable)
55
61
  elif isinstance(feature_type, datasets.Image):
56
62
  return ts.ImageType(nullable=nullable)
63
+ elif isinstance(feature_type, dict):
64
+ return ts.JsonType(nullable=nullable)
57
65
  else:
58
66
  return None
59
67
 
@@ -4,18 +4,20 @@ import logging
4
4
  import os
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
- from typing import Any, Iterator, Literal, Optional, cast
7
+ from typing import Any, Iterator, Literal, Optional
8
8
  from xml.etree import ElementTree as ET
9
9
 
10
- import label_studio_sdk # type: ignore[import-untyped]
10
+ import label_studio_sdk
11
11
  import PIL.Image
12
12
  from requests.exceptions import HTTPError
13
13
 
14
14
  import pixeltable.type_system as ts
15
15
  from pixeltable import Column, Table, env, exceptions as excs
16
+ from pixeltable.catalog import ColumnHandle
17
+ from pixeltable.catalog.update_status import RowCountStats, UpdateStatus
16
18
  from pixeltable.config import Config
17
19
  from pixeltable.exprs import ColumnRef, DataRow, Expr
18
- from pixeltable.io.external_store import Project, SyncStatus
20
+ from pixeltable.io.external_store import Project
19
21
  from pixeltable.utils import coco
20
22
 
21
23
  # label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
@@ -25,7 +27,7 @@ try:
25
27
  import label_studio_sdk.project as ls_project # type: ignore
26
28
  except ImportError:
27
29
  # label_studio_sdk>=1 compatibility
28
- import label_studio_sdk._legacy.project as ls_project # type: ignore
30
+ import label_studio_sdk._legacy.project as ls_project
29
31
 
30
32
  _logger = logging.getLogger('pixeltable')
31
33
 
@@ -45,13 +47,17 @@ class LabelStudioProject(Project):
45
47
  for synchronizing between a Pixeltable table and a Label Studio project.
46
48
  """
47
49
 
50
+ project_id: int # Label Studio project ID
51
+ media_import_method: Literal['post', 'file', 'url']
52
+ _project: Optional[ls_project.Project]
53
+
48
54
  def __init__(
49
55
  self,
50
56
  name: str,
51
57
  project_id: int,
52
58
  media_import_method: Literal['post', 'file', 'url'],
53
- col_mapping: dict[Column, str],
54
- stored_proxies: Optional[dict[Column, Column]] = None,
59
+ col_mapping: dict[ColumnHandle, str],
60
+ stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
55
61
  ):
56
62
  """
57
63
  The constructor will NOT create a new Label Studio project; it is also used when loading
@@ -59,7 +65,7 @@ class LabelStudioProject(Project):
59
65
  """
60
66
  self.project_id = project_id
61
67
  self.media_import_method = media_import_method
62
- self._project: Optional[ls_project.Project] = None
68
+ self._project = None
63
69
  super().__init__(name, col_mapping, stored_proxies)
64
70
 
65
71
  @property
@@ -105,20 +111,20 @@ class LabelStudioProject(Project):
105
111
  """
106
112
  return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
107
113
 
108
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
114
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
109
115
  _logger.info(
110
116
  f'Syncing Label Studio project "{self.project_title}" with table `{t._name}`'
111
117
  f' (export: {export_data}, import: {import_data}).'
112
118
  )
113
119
  # Collect all existing tasks into a dict with entries `rowid: task`
114
120
  tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
115
- sync_status = SyncStatus.empty()
121
+ sync_status = UpdateStatus()
116
122
  if export_data:
117
123
  export_sync_status = self.__update_tasks(t, tasks)
118
- sync_status = sync_status.combine(export_sync_status)
124
+ sync_status += export_sync_status
119
125
  if import_data:
120
126
  import_sync_status = self.__update_table_from_tasks(t, tasks)
121
- sync_status = sync_status.combine(import_sync_status)
127
+ sync_status += import_sync_status
122
128
  return sync_status
123
129
 
124
130
  def __fetch_all_tasks(self) -> Iterator[dict[str, Any]]:
@@ -142,7 +148,7 @@ class LabelStudioProject(Project):
142
148
  f'Label Studio project {self.project_title!r}.'
143
149
  )
144
150
 
145
- def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> SyncStatus:
151
+ def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> UpdateStatus:
146
152
  """
147
153
  Updates all tasks in this Label Studio project based on the Pixeltable data:
148
154
  - Creates new tasks for rows that don't map to any existing task;
@@ -155,7 +161,7 @@ class LabelStudioProject(Project):
155
161
  t_data_cols = [t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.data_keys]
156
162
 
157
163
  if len(t_data_cols) == 0:
158
- return SyncStatus.empty()
164
+ return UpdateStatus()
159
165
 
160
166
  # Columns in `t` that map to `rectanglelabels` preannotations
161
167
  t_rl_cols = [
@@ -183,15 +189,15 @@ class LabelStudioProject(Project):
183
189
  self,
184
190
  t: Table,
185
191
  existing_tasks: dict[tuple, dict],
186
- media_col: Column,
187
- t_rl_cols: list[Column],
192
+ media_col: ColumnHandle,
193
+ t_rl_cols: list[ColumnHandle],
188
194
  rl_info: list['_RectangleLabel'],
189
- ) -> SyncStatus:
190
- is_stored = media_col.is_stored
195
+ ) -> UpdateStatus:
196
+ is_stored = media_col.get().is_stored
191
197
  # If it's a stored column, we can use `localpath`
192
- localpath_col_opt = [t[media_col.name].localpath] if is_stored else []
198
+ localpath_col_opt = [t[media_col.get().name].localpath] if is_stored else []
193
199
  # Select the media column, rectanglelabels columns, and localpath (if appropriate)
194
- rows = t.select(t[media_col.name], *[t[col.name] for col in t_rl_cols], *localpath_col_opt)
200
+ rows = t.select(t[media_col.get().name], *[t[col.get().name] for col in t_rl_cols], *localpath_col_opt)
195
201
  tasks_created = 0
196
202
  row_ids_in_pxt: set[tuple] = set()
197
203
 
@@ -232,42 +238,42 @@ class LabelStudioProject(Project):
232
238
 
233
239
  env.Env.get().console_logger.info(f'Created {tasks_created} new task(s) in {self}.')
234
240
 
235
- sync_status = SyncStatus(external_rows_created=tasks_created)
241
+ sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created))
236
242
 
237
243
  deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
238
-
239
- return sync_status.combine(deletion_sync_status)
244
+ sync_status += deletion_sync_status
245
+ return sync_status
240
246
 
241
247
  def __update_tasks_by_files(
242
248
  self,
243
249
  t: Table,
244
250
  existing_tasks: dict[tuple, dict],
245
- t_data_cols: list[Column],
246
- t_rl_cols: list[Column],
251
+ t_data_cols: list[ColumnHandle],
252
+ t_rl_cols: list[ColumnHandle],
247
253
  rl_info: list['_RectangleLabel'],
248
- ) -> SyncStatus:
254
+ ) -> UpdateStatus:
249
255
  ext_data_cols = [self.col_mapping[col] for col in t_data_cols]
250
256
  expr_refs: dict[str, Expr] = {} # kwargs for the select statement
251
257
  for col in t_data_cols:
252
- col_name = col.name
258
+ col_name = col.get().name
253
259
  if self.media_import_method == 'url':
254
260
  expr_refs[col_name] = t[col_name].fileurl
255
261
  else:
256
262
  assert self.media_import_method == 'file'
257
- if not col.col_type.is_media_type():
263
+ if not col.get().col_type.is_media_type():
258
264
  # Not a media column; query the data directly
259
- expr_refs[col_name] = cast(ColumnRef, t[col_name])
265
+ expr_refs[col_name] = t[col_name]
260
266
  elif col in self.stored_proxies:
261
267
  # Media column that has a stored proxy; use it. We have to give it a name,
262
268
  # since it's an anonymous column
263
- stored_proxy_col = self.stored_proxies[col]
269
+ stored_proxy_col = self.stored_proxies[col].get()
264
270
  expr_refs[f'{col_name}_proxy'] = ColumnRef(stored_proxy_col).localpath
265
271
  else:
266
272
  # Media column without a stored proxy; this means it's a stored computed column,
267
273
  # and we can just use the localpath
268
274
  expr_refs[col_name] = t[col_name].localpath
269
275
 
270
- df = t.select(*[t[col.name] for col in t_rl_cols], **expr_refs)
276
+ df = t.select(*[t[col.get().name] for col in t_rl_cols], **expr_refs)
271
277
  # The following buffers will hold `DataRow` indices that correspond to each of the selected
272
278
  # columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
273
279
  # preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
@@ -286,11 +292,11 @@ class LabelStudioProject(Project):
286
292
  data_vals = [row[idx] for idx in data_col_idxs]
287
293
  coco_annotations = [row[idx] for idx in rl_col_idxs]
288
294
  for i in range(len(t_data_cols)):
289
- if t_data_cols[i].col_type.is_media_type():
295
+ if t_data_cols[i].get().col_type.is_media_type():
290
296
  # Special handling for media columns
291
297
  assert isinstance(data_vals[i], str)
292
298
  if self.media_import_method == 'url':
293
- data_vals[i] = self.__validate_fileurl(t_data_cols[i], data_vals[i])
299
+ data_vals[i] = self.__validate_fileurl(t_data_cols[i].get(), data_vals[i])
294
300
  else:
295
301
  assert self.media_import_method == 'file'
296
302
  data_vals[i] = self.__localpath_to_lspath(data_vals[i])
@@ -336,11 +342,11 @@ class LabelStudioProject(Project):
336
342
  f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.'
337
343
  )
338
344
 
339
- sync_status = SyncStatus(external_rows_created=tasks_created, external_rows_updated=tasks_updated)
345
+ sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created, upd_rows=tasks_updated))
340
346
 
341
347
  deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
342
-
343
- return sync_status.combine(deletion_sync_status)
348
+ sync_status += deletion_sync_status
349
+ return sync_status
344
350
 
345
351
  @classmethod
346
352
  def __validate_fileurl(cls, col: Column, url: str) -> Optional[str]:
@@ -361,7 +367,7 @@ class LabelStudioProject(Project):
361
367
 
362
368
  def __delete_stale_tasks(
363
369
  self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
364
- ) -> SyncStatus:
370
+ ) -> UpdateStatus:
365
371
  deleted_rowids = set(existing_tasks.keys()) - row_ids_in_pxt
366
372
  # Sanity check the math
367
373
  assert len(deleted_rowids) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
@@ -377,11 +383,11 @@ class LabelStudioProject(Project):
377
383
  for rowid in deleted_rowids:
378
384
  del existing_tasks[rowid]
379
385
 
380
- return SyncStatus(external_rows_deleted=len(deleted_rowids))
386
+ return UpdateStatus(ext_row_count_stats=RowCountStats(del_rows=len(deleted_rowids)))
381
387
 
382
- def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> SyncStatus:
388
+ def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> UpdateStatus:
383
389
  if ANNOTATIONS_COLUMN not in self.col_mapping.values():
384
- return SyncStatus.empty()
390
+ return UpdateStatus()
385
391
 
386
392
  annotations = {
387
393
  # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
@@ -391,7 +397,7 @@ class LabelStudioProject(Project):
391
397
  for task in tasks.values()
392
398
  }
393
399
 
394
- local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN)
400
+ local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN).get()
395
401
 
396
402
  # Prune the annotations down to just the ones that have actually changed.
397
403
  rows = t.select(t[local_annotations_col.name])
@@ -416,19 +422,17 @@ class LabelStudioProject(Project):
416
422
  ancestor = ancestor._get_base_table()
417
423
  update_status = ancestor.batch_update(updates)
418
424
  env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
419
- return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
425
+ return update_status
420
426
  else:
421
- return SyncStatus.empty()
427
+ return UpdateStatus()
422
428
 
423
429
  def as_dict(self) -> dict[str, Any]:
424
430
  return {
425
431
  'name': self.name,
426
432
  'project_id': self.project_id,
427
433
  'media_import_method': self.media_import_method,
428
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
429
- 'stored_proxies': [
430
- [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
431
- ],
434
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
435
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
432
436
  }
433
437
 
434
438
  @classmethod
@@ -437,8 +441,8 @@ class LabelStudioProject(Project):
437
441
  md['name'],
438
442
  md['project_id'],
439
443
  md['media_import_method'],
440
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
441
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
444
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
445
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
442
446
  )
443
447
 
444
448
  def __repr__(self) -> str:
@@ -18,7 +18,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
18
18
  _logger = logging.getLogger('pixeltable')
19
19
 
20
20
  # current version of the metadata; this is incremented whenever the metadata schema changes
21
- VERSION = 38
21
+ VERSION = 40
22
22
 
23
23
 
24
24
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -0,0 +1,39 @@
1
+ from typing import Any, Optional
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=38)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
15
+ if k == 'col_mapping':
16
+ assert isinstance(v, list)
17
+ return k, [__col_mapping_entry(e) for e in v]
18
+ if k == 'stored_proxies':
19
+ assert isinstance(v, list)
20
+ return k, [__stored_proxies_entry(e) for e in v]
21
+ return None
22
+
23
+
24
+ def __col_mapping_entry(e: list) -> list:
25
+ assert isinstance(e, list)
26
+ assert isinstance(e[0], dict)
27
+ assert isinstance(e[1], str)
28
+ return [__col_handle(e[0]), e[1]]
29
+
30
+
31
+ def __stored_proxies_entry(e: list) -> list:
32
+ assert isinstance(e, list)
33
+ assert isinstance(e[0], dict)
34
+ assert isinstance(e[1], dict)
35
+ return [__col_handle(e[0]), __col_handle(e[1])]
36
+
37
+
38
+ def __col_handle(e: dict) -> dict:
39
+ return {'tbl_version': {'id': e['tbl_id'], 'effective_version': None}, 'col_id': e['col_id']}