pixeltable 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (53) hide show
  1. pixeltable/__init__.py +3 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +8 -2
  4. pixeltable/catalog/insertable_table.py +32 -17
  5. pixeltable/catalog/table.py +167 -12
  6. pixeltable/catalog/table_version.py +185 -106
  7. pixeltable/datatransfer/__init__.py +1 -0
  8. pixeltable/datatransfer/label_studio.py +452 -0
  9. pixeltable/datatransfer/remote.py +85 -0
  10. pixeltable/env.py +148 -69
  11. pixeltable/exprs/column_ref.py +2 -2
  12. pixeltable/exprs/comparison.py +39 -1
  13. pixeltable/exprs/data_row.py +7 -0
  14. pixeltable/exprs/expr.py +11 -12
  15. pixeltable/exprs/function_call.py +0 -3
  16. pixeltable/exprs/globals.py +14 -2
  17. pixeltable/exprs/similarity_expr.py +5 -3
  18. pixeltable/ext/functions/whisperx.py +30 -0
  19. pixeltable/ext/functions/yolox.py +16 -0
  20. pixeltable/func/aggregate_function.py +2 -2
  21. pixeltable/func/expr_template_function.py +3 -1
  22. pixeltable/func/udf.py +2 -2
  23. pixeltable/functions/fireworks.py +9 -4
  24. pixeltable/functions/huggingface.py +25 -1
  25. pixeltable/functions/openai.py +15 -10
  26. pixeltable/functions/together.py +11 -6
  27. pixeltable/functions/util.py +0 -43
  28. pixeltable/functions/video.py +46 -8
  29. pixeltable/globals.py +20 -2
  30. pixeltable/index/__init__.py +1 -0
  31. pixeltable/index/base.py +6 -1
  32. pixeltable/index/btree.py +54 -0
  33. pixeltable/index/embedding_index.py +4 -1
  34. pixeltable/io/__init__.py +1 -0
  35. pixeltable/io/globals.py +58 -0
  36. pixeltable/iterators/base.py +4 -4
  37. pixeltable/iterators/document.py +26 -15
  38. pixeltable/iterators/video.py +9 -1
  39. pixeltable/metadata/__init__.py +2 -2
  40. pixeltable/metadata/converters/convert_14.py +13 -0
  41. pixeltable/metadata/schema.py +9 -6
  42. pixeltable/plan.py +9 -5
  43. pixeltable/store.py +14 -21
  44. pixeltable/tool/create_test_db_dump.py +14 -0
  45. pixeltable/type_system.py +14 -4
  46. pixeltable/utils/coco.py +94 -0
  47. pixeltable-0.2.8.dist-info/METADATA +137 -0
  48. {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/RECORD +50 -45
  49. pixeltable/func/nos_function.py +0 -202
  50. pixeltable/utils/clip.py +0 -18
  51. pixeltable-0.2.6.dist-info/METADATA +0 -131
  52. {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/LICENSE +0 -0
  53. {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/WHEEL +0 -0
@@ -0,0 +1,452 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Any, Iterator, Optional
6
+ from xml.etree import ElementTree
7
+
8
+ import PIL.Image
9
+ import label_studio_sdk
10
+ import more_itertools
11
+ from requests.exceptions import HTTPError
12
+
13
+ import pixeltable as pxt
14
+ import pixeltable.env as env
15
+ import pixeltable.exceptions as excs
16
+ from pixeltable import Table
17
+ from pixeltable.datatransfer.remote import Remote
18
+ from pixeltable.utils import coco
19
+
20
+ _logger = logging.getLogger('pixeltable')
21
+
22
+
23
+ @env.register_client('label_studio')
24
+ def _(api_key: str, url: str) -> label_studio_sdk.Client:
25
+ return label_studio_sdk.Client(api_key=api_key, url=url)
26
+
27
+
28
+ def _label_studio_client() -> label_studio_sdk.Client:
29
+ return env.Env.get().get_client('label_studio')
30
+
31
+
32
+ class LabelStudioProject(Remote):
33
+ """
34
+ A [`Remote`][pixeltable.datatransfer.Remote] that represents a Label Studio project, providing functionality
35
+ for synchronizing between a Pixeltable table and a Label Studio project.
36
+
37
+ The API key and URL for a valid Label Studio server must be specified in Pixeltable config. Either:
38
+
39
+ * Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
40
+ * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
41
+ """
42
+ # TODO(aaron-siegel): Add link in docstring to a Label Studio howto
43
+
44
+ def __init__(self, project_id: int):
45
+ self.project_id = project_id
46
+ self._project: Optional[label_studio_sdk.project.Project] = None
47
+
48
+ @classmethod
49
+ def create(cls, title: str, label_config: str, **kwargs: Any) -> 'LabelStudioProject':
50
+ """
51
+ Creates a new Label Studio project, using the Label Studio client configured in Pixeltable.
52
+
53
+ Args:
54
+ title: The title of the project.
55
+ label_config: The Label Studio project configuration, in XML format.
56
+ **kwargs: Additional keyword arguments for the new project; these will be passed to `start_project`
57
+ in the Label Studio SDK.
58
+ """
59
+ # Check that the config is valid before creating the project
60
+ cls._parse_project_config(label_config)
61
+ project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
62
+ project_id = project.get_params()['id']
63
+ return LabelStudioProject(project_id)
64
+
65
+ @property
66
+ def project(self) -> label_studio_sdk.project.Project:
67
+ """The `Project` object corresponding to this Label Studio project."""
68
+ if self._project is None:
69
+ try:
70
+ self._project = _label_studio_client().get_project(self.project_id)
71
+ except HTTPError as exc:
72
+ raise excs.Error(f'Could not locate Label Studio project: {self.project_id} '
73
+ '(cannot connect to server or project no longer exists)') from exc
74
+ return self._project
75
+
76
+ @property
77
+ def project_params(self) -> dict[str, Any]:
78
+ """The parameters of this Label Studio project."""
79
+ return self.project.get_params()
80
+
81
+ @property
82
+ def project_title(self) -> str:
83
+ """The title of this Label Studio project."""
84
+ return self.project_params['title']
85
+
86
+ @property
87
+ def _project_config(self) -> '_LabelStudioConfig':
88
+ return self._parse_project_config(self.project_params['label_config'])
89
+
90
+ def get_export_columns(self) -> dict[str, pxt.ColumnType]:
91
+ """
92
+ The data keys and preannotation fields specified in this Label Studio project.
93
+ """
94
+ return self._project_config.export_columns
95
+
96
+ def get_import_columns(self) -> dict[str, pxt.ColumnType]:
97
+ """
98
+ Always contains a single entry:
99
+
100
+ ```
101
+ {"annotations": pxt.JsonType(nullable=True)}
102
+ ```
103
+ """
104
+ return {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}
105
+
106
+ def sync(self, t: Table, col_mapping: dict[str, str], export_data: bool, import_data: bool) -> None:
107
+ _logger.info(f'Syncing Label Studio project "{self.project_title}" with table `{t.get_name()}`'
108
+ f' (export: {export_data}, import: {import_data}).')
109
+ # Collect all existing tasks into a dict with entries `rowid: task`
110
+ tasks = {tuple(task['meta']['rowid']): task for task in self._fetch_all_tasks()}
111
+ if export_data:
112
+ self._create_tasks_from_table(t, col_mapping, tasks)
113
+ if import_data:
114
+ self._update_table_from_tasks(t, col_mapping, tasks)
115
+
116
+ def _fetch_all_tasks(self) -> Iterator[dict]:
117
+ page = 1
118
+ unknown_task_count = 0
119
+ while True:
120
+ result = self.project.get_paginated_tasks(page=page, page_size=_PAGE_SIZE)
121
+ if result.get('end_pagination'):
122
+ break
123
+ for task in result['tasks']:
124
+ rowid = task['meta'].get('rowid')
125
+ if rowid is None:
126
+ unknown_task_count += 1
127
+ else:
128
+ yield task
129
+ page += 1
130
+ if unknown_task_count > 0:
131
+ _logger.warning(
132
+ f'Skipped {unknown_task_count} unrecognized task(s) when syncing Label Studio project "{self.project_title}".'
133
+ )
134
+
135
+ def _update_table_from_tasks(self, t: Table, col_mapping: dict[str, str], tasks: dict[tuple, dict]) -> None:
136
+ # `col_mapping` is guaranteed to be a one-to-one dict whose values are a superset
137
+ # of `get_import_columns`
138
+ assert ANNOTATIONS_COLUMN in col_mapping.values()
139
+ annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
140
+ updates = [
141
+ {
142
+ '_rowid': task['meta']['rowid'],
143
+ # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
144
+ # in order to properly handle the scenario where existing annotations have been deleted in
145
+ # Label Studio.
146
+ annotations_column: task[ANNOTATIONS_COLUMN] if len(task[ANNOTATIONS_COLUMN]) > 0 else None
147
+ }
148
+ for task in tasks.values()
149
+ ]
150
+ if len(updates) > 0:
151
+ _logger.info(
152
+ f'Updating table `{t.get_name()}`, column `{annotations_column}` with {len(updates)} total annotations.'
153
+ )
154
+ t.batch_update(updates)
155
+ annotations_count = sum(len(task[ANNOTATIONS_COLUMN]) for task in tasks.values())
156
+ print(f'Synced {annotations_count} annotation(s) from {len(updates)} existing task(s) in {self}.')
157
+
158
+ def _create_tasks_from_table(self, t: Table, col_mapping: dict[str, str], existing_tasks: dict[tuple, dict]) -> None:
159
+ t_col_types = t.column_types()
160
+ config = self._project_config
161
+
162
+ # Columns in `t` that map to Label Studio data keys
163
+ t_data_cols = [
164
+ t_col_name for t_col_name, r_col_name in col_mapping.items()
165
+ if r_col_name in config.data_keys
166
+ ]
167
+
168
+ # Columns in `t` that map to `rectanglelabels` preannotations
169
+ t_rl_cols = [
170
+ t_col_name for t_col_name, r_col_name in col_mapping.items()
171
+ if r_col_name in config.rectangle_labels
172
+ ]
173
+
174
+ # Destinations for `rectanglelabels` preannotations
175
+ rl_info = list(config.rectangle_labels.values())
176
+
177
+ _logger.debug('`t_data_cols`: %s', t_data_cols)
178
+ _logger.debug('`t_rl_cols`: %s', t_rl_cols)
179
+ _logger.debug('`rl_info`: %s', rl_info)
180
+
181
+ if len(t_data_cols) == 1 and t_col_types[t_data_cols[0]].is_media_type():
182
+ # With a single media column, we can post local files to Label Studio using
183
+ # the file transfer API.
184
+ self._create_tasks_by_post(t, col_mapping, existing_tasks, t_rl_cols, rl_info, t_data_cols[0])
185
+ else:
186
+ # Either a single non-media column or multiple columns. Either way, we can't
187
+ # use the file upload API and need to rely on externally accessible URLs for
188
+ # media columns.
189
+ self._create_tasks_by_urls(t, col_mapping, existing_tasks, t_data_cols, t_col_types, t_rl_cols, rl_info)
190
+
191
+ def _create_tasks_by_post(
192
+ self,
193
+ t: Table,
194
+ col_mapping: dict[str, str],
195
+ existing_tasks: dict[tuple, dict],
196
+ t_rl_cols: list[str],
197
+ rl_info: list['_RectangleLabel'],
198
+ media_col_name: str
199
+ ) -> None:
200
+ is_stored = t[media_col_name].col.is_stored
201
+ # If it's a stored column, we can use `localpath`
202
+ localpath_col_opt = [t[media_col_name].localpath] if is_stored else []
203
+ # Select the media column, rectanglelabels columns, and localpath (if appropriate)
204
+ rows = t.select(t[media_col_name], *[t[col] for col in t_rl_cols], *localpath_col_opt)
205
+ tasks_created = 0
206
+ row_ids_in_pxt: set[tuple] = set()
207
+
208
+ for row in rows._exec():
209
+ media_col_idx = rows._select_list_exprs[0].slot_idx
210
+ rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[1: 1 + len(t_rl_cols)]]
211
+ row_ids_in_pxt.add(row.rowid)
212
+ if row.rowid not in existing_tasks:
213
+ # Upload the media file to Label Studio
214
+ if is_stored:
215
+ # There is an existing localpath; use it!
216
+ localpath_col_idx = rows._select_list_exprs[-1].slot_idx
217
+ file = Path(row.vals[localpath_col_idx])
218
+ task_id: int = self.project.import_tasks(file)[0]
219
+ else:
220
+ # No localpath; create a temp file and upload it
221
+ assert isinstance(row.vals[media_col_idx], PIL.Image.Image)
222
+ file = env.Env.get().create_tmp_path(extension='.png')
223
+ row.vals[media_col_idx].save(file, format='png')
224
+ task_id: int = self.project.import_tasks(file)[0]
225
+ os.remove(file)
226
+
227
+ # Update the task with `rowid` metadata
228
+ self.project.update_task(task_id, meta={'rowid': row.rowid})
229
+
230
+ # Convert coco annotations to predictions
231
+ coco_annotations = [row.vals[i] for i in rl_col_idxs]
232
+ _logger.debug('`coco_annotations`: %s', coco_annotations)
233
+ predictions = [
234
+ self._coco_to_predictions(
235
+ coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i], task_id=task_id
236
+ )
237
+ for i in range(len(coco_annotations))
238
+ ]
239
+ _logger.debug(f'`predictions`: %s', predictions)
240
+ self.project.create_predictions(predictions)
241
+ tasks_created += 1
242
+
243
+ print(f'Created {tasks_created} new task(s) in {self}.')
244
+
245
+ self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
246
+
247
+ def _create_tasks_by_urls(
248
+ self,
249
+ t: Table,
250
+ col_mapping: dict[str, str],
251
+ existing_tasks: dict[tuple, dict],
252
+ t_data_cols: list[str],
253
+ t_col_types: dict[str, pxt.ColumnType],
254
+ t_rl_cols: list[str],
255
+ rl_info: list['_RectangleLabel']
256
+ ):
257
+ # TODO(aaron-siegel): This is just a placeholder (implementation is not complete or tested!)
258
+ selection = [
259
+ t[col_name].fileurl if t_col_types[col_name].is_media_type() else t[col_name]
260
+ for col_name in t_data_cols
261
+ ]
262
+ r_data_cols = [col_mapping[col_name] for col_name in t_data_cols]
263
+ rows = t.select(*selection, *[t[col] for col in t_rl_cols])
264
+ new_rows = filter(lambda row: row.rowid not in existing_tasks, rows._exec())
265
+ tasks_created = 0
266
+ row_ids_in_pxt: set[tuple] = set()
267
+
268
+ for page in more_itertools.batched(new_rows, n=_PAGE_SIZE):
269
+ data_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[:len(t_data_cols)]]
270
+ rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[len(t_data_cols):]]
271
+ tasks = []
272
+
273
+ for row in page:
274
+ row_ids_in_pxt.add(row.rowid)
275
+ data_vals = [row.vals[i] for i in data_col_idxs]
276
+ coco_annotations = [row.vals[i] for i in rl_col_idxs]
277
+ predictions = [
278
+ self._coco_to_predictions(coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i])
279
+ for i in range(len(coco_annotations))
280
+ ]
281
+
282
+ # Validate media columns
283
+ # TODO Support this if label studio is running on localhost?
284
+ for i in range(len(data_vals)):
285
+ if t[t_data_cols[i]].col_type.is_media_type() and data_vals[i].startswith("file://"):
286
+ raise excs.Error(
287
+ 'Cannot use locally stored media files in a `LabelStudioProject` with more than one '
288
+ 'data key. (This is a limitation of Label Studio; see warning here: '
289
+ 'https://labelstud.io/guide/tasks.html)'
290
+ )
291
+
292
+ tasks.append({
293
+ 'data': zip(r_data_cols, data_vals),
294
+ 'meta': {'rowid': row.rowid},
295
+ 'predictions': predictions
296
+ })
297
+
298
+ self.project.import_tasks(tasks)
299
+ tasks_created += len(tasks)
300
+
301
+ print(f'Created {tasks_created} new task(s) in {self}.')
302
+
303
+ self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
304
+
305
+ def _delete_stale_tasks(self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int):
306
+ tasks_to_delete = [
307
+ task['id'] for rowid, task in existing_tasks.items()
308
+ if rowid not in row_ids_in_pxt
309
+ ]
310
+ # Sanity check the math
311
+ assert len(tasks_to_delete) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
312
+
313
+ if len(tasks_to_delete) > 0:
314
+ self.project.delete_tasks(tasks_to_delete)
315
+ print(f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.')
316
+
317
+ def to_dict(self) -> dict[str, Any]:
318
+ return {'project_id': self.project_id}
319
+
320
+ @classmethod
321
+ def from_dict(cls, md: dict[str, Any]) -> 'LabelStudioProject':
322
+ return LabelStudioProject(md['project_id'])
323
+
324
+ def __repr__(self) -> str:
325
+ name = self.project.get_params()['title']
326
+ return f'LabelStudioProject `{name}`'
327
+
328
+ @classmethod
329
+ def _parse_project_config(cls, xml_config: str) -> '_LabelStudioConfig':
330
+ """
331
+ Parses a Label Studio XML config, extracting the names and Pixeltable types of
332
+ all input variables.
333
+ """
334
+ root: ElementTree.Element = ElementTree.fromstring(xml_config)
335
+ if root.tag.lower() != 'view':
336
+ raise excs.Error('Root of Label Studio config must be a `View`')
337
+ config = _LabelStudioConfig(
338
+ data_keys=dict(cls._parse_data_keys_config(root)),
339
+ rectangle_labels=dict(cls._parse_rectangle_labels_config(root))
340
+ )
341
+ config.validate()
342
+ return config
343
+
344
+ @classmethod
345
+ def _parse_data_keys_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_DataKey']]:
346
+ for element in root:
347
+ if 'value' in element.attrib and element.attrib['value'][0] == '$':
348
+ remote_col_name = element.attrib['value'][1:]
349
+ if 'name' not in element.attrib:
350
+ raise excs.Error(f'Data key is missing `name` attribute: `{remote_col_name}`')
351
+ element_type = _LS_TAG_MAP.get(element.tag.lower())
352
+ if element_type is None:
353
+ raise excs.Error(
354
+ f'Unsupported Label Studio data type: `{element.tag}` (in data key `{remote_col_name}`)'
355
+ )
356
+ yield remote_col_name, _DataKey(element.attrib['name'], element_type)
357
+
358
+ @classmethod
359
+ def _parse_rectangle_labels_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_RectangleLabel']]:
360
+ for element in root:
361
+ if element.tag.lower() == 'rectanglelabels':
362
+ name = element.attrib['name']
363
+ to_name = element.attrib['toName']
364
+ labels = [
365
+ child.attrib['value']
366
+ for child in element if child.tag.lower() == 'label'
367
+ ]
368
+ for label in labels:
369
+ if label not in coco.COCO_2017_CATEGORIES.values():
370
+ raise excs.Error(f'Label in `rectanglelabels` config is not a valid COCO object name: {label}')
371
+ yield name, _RectangleLabel(to_name=to_name, labels=labels)
372
+
373
+ @classmethod
374
+ def _coco_to_predictions(
375
+ cls,
376
+ coco_annotations: dict[str, Any],
377
+ from_name: str,
378
+ rl_info: '_RectangleLabel',
379
+ task_id: Optional[int] = None
380
+ ) -> dict[str, Any]:
381
+ width = coco_annotations['image']['width']
382
+ height = coco_annotations['image']['height']
383
+ result = [
384
+ {
385
+ 'id': f'result_{i}',
386
+ 'type': 'rectanglelabels',
387
+ 'from_name': from_name,
388
+ 'to_name': rl_info.to_name,
389
+ 'image_rotation': 0,
390
+ 'original_width': width,
391
+ 'original_height': height,
392
+ 'value': {
393
+ 'rotation': 0,
394
+ # Label Studio expects image coordinates as % of image dimensions
395
+ 'x': entry['bbox'][0] * 100.0 / width,
396
+ 'y': entry['bbox'][1] * 100.0 / height,
397
+ 'width': entry['bbox'][2] * 100.0 / width,
398
+ 'height': entry['bbox'][3] * 100.0 / height,
399
+ 'rectanglelabels': [coco.COCO_2017_CATEGORIES[entry['category']]]
400
+ }
401
+ }
402
+ for i, entry in enumerate(coco_annotations['annotations'])
403
+ # include only the COCO labels that match a rectanglelabel name
404
+ if coco.COCO_2017_CATEGORIES[entry['category']] in rl_info.labels
405
+ ]
406
+ if task_id is not None:
407
+ return {'task': task_id, 'result': result}
408
+ else:
409
+ return {'result': result}
410
+
411
+
412
+ @dataclass(frozen=True)
413
+ class _DataKey:
414
+ name: str # The 'name' attribute of the data key; may differ from the field name
415
+ column_type: pxt.ColumnType
416
+
417
+
418
+ @dataclass(frozen=True)
419
+ class _RectangleLabel:
420
+ to_name: str
421
+ labels: list[str]
422
+
423
+
424
+ @dataclass(frozen=True)
425
+ class _LabelStudioConfig:
426
+ data_keys: dict[str, _DataKey]
427
+ rectangle_labels: dict[str, _RectangleLabel]
428
+
429
+ def validate(self) -> None:
430
+ data_key_names = set(key.name for key in self.data_keys.values())
431
+ for name, rl in self.rectangle_labels.items():
432
+ if rl.to_name not in data_key_names:
433
+ raise excs.Error(
434
+ f'Invalid Label Studio configuration: `toName` attribute of RectangleLabels `{name}` '
435
+ f'references an unknown data key: `{rl.to_name}`'
436
+ )
437
+
438
+ @property
439
+ def export_columns(self) -> dict[str, pxt.ColumnType]:
440
+ data_key_cols = {key_name: key_info.column_type for key_name, key_info in self.data_keys.items()}
441
+ rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels.keys()}
442
+ return {**data_key_cols, **rl_cols}
443
+
444
+
445
+ ANNOTATIONS_COLUMN = 'annotations'
446
+ _PAGE_SIZE = 100 # This is the default used in the LS SDK
447
+ _LS_TAG_MAP = {
448
+ 'text': pxt.StringType(),
449
+ 'image': pxt.ImageType(),
450
+ 'video': pxt.VideoType(),
451
+ 'audio': pxt.AudioType()
452
+ }
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from typing import Any
5
+
6
+ import pixeltable.type_system as ts
7
+ from pixeltable import Table
8
+
9
+
10
+ class Remote(abc.ABC):
11
+ """
12
+ Abstract base class that represents a remote data store. Subclasses of `Remote` provide
13
+ functionality for synchronizing between Pixeltable tables and stateful remote stores.
14
+ """
15
+
16
+ @abc.abstractmethod
17
+ def get_export_columns(self) -> dict[str, ts.ColumnType]:
18
+ """
19
+ Returns the names and Pixeltable types that this `Remote` expects to see in a data export.
20
+
21
+ Returns:
22
+ A `dict` mapping names of expected columns to their Pixeltable types.
23
+ """
24
+
25
+ @abc.abstractmethod
26
+ def get_import_columns(self) -> dict[str, ts.ColumnType]:
27
+ """
28
+ Returns the names and Pixeltable types that this `Remote` provides in a data import.
29
+
30
+ Returns:
31
+ A `dict` mapping names of provided columns to their Pixeltable types.
32
+ """
33
+
34
+ @abc.abstractmethod
35
+ def sync(self, t: Table, col_mapping: dict[str, str], export_data: bool, import_data: bool) -> None:
36
+ """
37
+ Synchronizes the given [`Table`][pixeltable.Table] with this `Remote`. This method
38
+ should generally not be called directly; instead, call
39
+ [`t.sync()`][pixeltable.Table.sync].
40
+
41
+ Args:
42
+ t: The table to synchronize with this remote.
43
+ col_mapping: A `dict` mapping columns in the Pixeltable table to columns in the remote store.
44
+ export_data: If `True`, data from this table will be exported to the remote during synchronization.
45
+ import_data: If `True`, data from this table will be imported from the remote during synchronization.
46
+ """
47
+
48
+ @abc.abstractmethod
49
+ def to_dict(self) -> dict[str, Any]: ...
50
+
51
+ @classmethod
52
+ @abc.abstractmethod
53
+ def from_dict(cls, md: dict[str, Any]) -> Remote: ...
54
+
55
+
56
+ # A remote that cannot be synced, used mainly for testing.
57
+ class MockRemote(Remote):
58
+
59
+ def __init__(self, export_cols: dict[str, ts.ColumnType], import_cols: dict[str, ts.ColumnType]):
60
+ self.export_cols = export_cols
61
+ self.import_cols = import_cols
62
+
63
+ def get_export_columns(self) -> dict[str, ts.ColumnType]:
64
+ return self.export_cols
65
+
66
+ def get_import_columns(self) -> dict[str, ts.ColumnType]:
67
+ return self.import_cols
68
+
69
+ def sync(self, t: Table, col_mapping: dict[str, str], export_data: bool, import_data: bool) -> NotImplemented:
70
+ raise NotImplementedError()
71
+
72
+ def to_dict(self) -> dict[str, Any]:
73
+ return {
74
+ # TODO Change in next schema version
75
+ 'push_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
76
+ 'pull_cols': {k: v.as_dict() for k, v in self.import_cols.items()}
77
+ }
78
+
79
+ @classmethod
80
+ def from_dict(cls, md: dict[str, Any]) -> Remote:
81
+ return cls(
82
+ # TODO Change in next schema version
83
+ {k: ts.ColumnType.from_dict(v) for k, v in md['push_cols'].items()},
84
+ {k: ts.ColumnType.from_dict(v) for k, v in md['pull_cols'].items()}
85
+ )