pixeltable 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (77) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +29 -11
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +307 -186
  10. pixeltable/catalog/table_version.py +109 -63
  11. pixeltable/catalog/table_version_path.py +28 -5
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +129 -26
  14. pixeltable/env.py +29 -18
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +74 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +51 -22
  50. pixeltable/io/label_studio.py +639 -0
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +44 -0
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +66 -0
  62. pixeltable/metadata/schema.py +3 -3
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +147 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/METADATA +1 -1
  70. pixeltable-0.2.10.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/label_studio.py +0 -452
  73. pixeltable/datatransfer/remote.py +0 -85
  74. pixeltable/functions/pil/image.py +0 -147
  75. pixeltable-0.2.8.dist-info/RECORD +0 -124
  76. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/LICENSE +0 -0
  77. {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/WHEEL +0 -0
@@ -1,11 +1,16 @@
1
+ import tempfile
1
2
  import uuid
3
+ from pathlib import Path
2
4
  from typing import Optional
3
5
 
6
+ import PIL.Image
4
7
  import av
8
+ import numpy as np
5
9
 
6
10
  import pixeltable.env as env
7
11
  import pixeltable.func as func
8
12
  import pixeltable.type_system as ts
13
+ from pixeltable.utils.code import local_public_names
9
14
 
10
15
  _format_defaults = { # format -> (codec, ext)
11
16
  'wav': ('pcm_s16le', 'wav'),
@@ -30,6 +35,43 @@ _format_defaults = { # format -> (codec, ext)
30
35
  # output_container.mux(packet)
31
36
 
32
37
 
38
+ @func.uda(
39
+ init_types=[ts.IntType()],
40
+ update_types=[ts.ImageType()],
41
+ value_type=ts.VideoType(),
42
+ requires_order_by=True,
43
+ allows_window=False,
44
+ )
45
+ class make_video(func.Aggregator):
46
+ def __init__(self, fps: int = 25):
47
+ """follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
48
+ self.container: Optional[av.container.OutputContainer] = None
49
+ self.stream: Optional[av.stream.Stream] = None
50
+ self.fps = fps
51
+
52
+ def update(self, frame: PIL.Image.Image) -> None:
53
+ if frame is None:
54
+ return
55
+ if self.container is None:
56
+ (_, output_filename) = tempfile.mkstemp(suffix='.mp4', dir=str(env.Env.get().tmp_dir))
57
+ self.out_file = Path(output_filename)
58
+ self.container = av.open(str(self.out_file), mode='w')
59
+ self.stream = self.container.add_stream('h264', rate=self.fps)
60
+ self.stream.pix_fmt = 'yuv420p'
61
+ self.stream.width = frame.width
62
+ self.stream.height = frame.height
63
+
64
+ av_frame = av.VideoFrame.from_ndarray(np.array(frame.convert('RGB')), format='rgb24')
65
+ for packet in self.stream.encode(av_frame):
66
+ self.container.mux(packet)
67
+
68
+ def value(self) -> str:
69
+ for packet in self.stream.encode():
70
+ self.container.mux(packet)
71
+ self.container.close()
72
+ return str(self.out_file)
73
+
74
+
33
75
  _extract_audio_param_types = [
34
76
  ts.VideoType(nullable=False),
35
77
  ts.IntType(nullable=False),
@@ -75,26 +117,62 @@ def get_metadata(video: str) -> dict:
75
117
  """
76
118
  with av.open(video) as container:
77
119
  assert isinstance(container, av.container.InputContainer)
78
- video_streams_info = [
79
- {
80
- 'duration': stream.duration,
81
- 'frames': stream.frames,
82
- 'language': stream.language,
83
- 'average_rate': float(stream.average_rate) if stream.average_rate is not None else None,
84
- 'base_rate': float(stream.base_rate) if stream.base_rate is not None else None,
85
- 'guessed_rate': float(stream.guessed_rate) if stream.guessed_rate is not None else None,
86
- 'pix_fmt': getattr(stream.codec_context, 'pix_fmt', None),
87
- 'width': stream.width,
88
- 'height': stream.height,
89
- }
90
- for stream in container.streams
91
- if isinstance(stream, av.video.stream.VideoStream)
92
- ]
120
+ streams_info = [__get_stream_metadata(stream) for stream in container.streams]
93
121
  result = {
94
122
  'bit_exact': container.bit_exact,
95
123
  'bit_rate': container.bit_rate,
96
124
  'size': container.size,
97
125
  'metadata': container.metadata,
98
- 'streams': video_streams_info, # TODO: Audio streams?
126
+ 'streams': streams_info,
99
127
  }
100
128
  return result
129
+
130
+
131
+ def __get_stream_metadata(stream: av.stream.Stream) -> dict:
132
+ if stream.type != 'audio' and stream.type != 'video':
133
+ return {'type': stream.type} # Currently unsupported
134
+
135
+ codec_context = stream.codec_context
136
+ codec_context_md = {
137
+ 'name': codec_context.name,
138
+ 'codec_tag': codec_context.codec_tag.encode('unicode-escape').decode('utf-8'),
139
+ 'profile': codec_context.profile,
140
+ }
141
+ metadata = {
142
+ 'type': stream.type,
143
+ 'duration': stream.duration,
144
+ 'time_base': float(stream.time_base) if stream.time_base is not None else None,
145
+ 'duration_seconds': float(stream.duration * stream.time_base)
146
+ if stream.duration is not None and stream.time_base is not None
147
+ else None,
148
+ 'frames': stream.frames,
149
+ 'metadata': stream.metadata,
150
+ 'codec_context': codec_context_md,
151
+ }
152
+
153
+ if stream.type == 'audio':
154
+ # Additional metadata for audio
155
+ codec_context_md['channels'] = int(codec_context.channels) if codec_context.channels is not None else None
156
+ else:
157
+ assert stream.type == 'video'
158
+ # Additional metadata for video
159
+ codec_context_md['pix_fmt'] = getattr(stream.codec_context, 'pix_fmt', None)
160
+ metadata.update(
161
+ **{
162
+ 'width': stream.width,
163
+ 'height': stream.height,
164
+ 'frames': stream.frames,
165
+ 'average_rate': float(stream.average_rate) if stream.average_rate is not None else None,
166
+ 'base_rate': float(stream.base_rate) if stream.base_rate is not None else None,
167
+ 'guessed_rate': float(stream.guessed_rate) if stream.guessed_rate is not None else None,
168
+ }
169
+ )
170
+
171
+ return metadata
172
+
173
+
174
+ __all__ = local_public_names(__name__)
175
+
176
+
177
+ def __dir__():
178
+ return __all__
@@ -0,0 +1,74 @@
1
+ from typing import TYPE_CHECKING, Optional
2
+
3
+ import pixeltable as pxt
4
+
5
+ if TYPE_CHECKING:
6
+ from whisper import Whisper
7
+
8
+
9
+ @pxt.udf(
10
+ param_types=[
11
+ pxt.AudioType(),
12
+ pxt.StringType(),
13
+ pxt.JsonType(nullable=True),
14
+ pxt.FloatType(nullable=True),
15
+ pxt.FloatType(nullable=True),
16
+ pxt.FloatType(nullable=True),
17
+ pxt.BoolType(),
18
+ pxt.StringType(nullable=True),
19
+ pxt.BoolType(),
20
+ pxt.StringType(),
21
+ pxt.StringType(),
22
+ pxt.StringType(),
23
+ pxt.FloatType(nullable=True),
24
+ pxt.JsonType(nullable=True),
25
+ ]
26
+ )
27
+ def transcribe(
28
+ audio: str,
29
+ *,
30
+ model: str,
31
+ temperature: Optional[list[float]] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
32
+ compression_ratio_threshold: Optional[float] = 2.4,
33
+ logprob_threshold: Optional[float] = -1.0,
34
+ no_speech_threshold: Optional[float] = 0.6,
35
+ condition_on_previous_text: bool = True,
36
+ initial_prompt: Optional[str] = None,
37
+ word_timestamps: bool = False,
38
+ prepend_punctuations: str = '"\'“¿([{-',
39
+ append_punctuations: str = '"\'.。,,!!??::”)]}、',
40
+ decode_options: Optional[dict] = None,
41
+ ) -> dict:
42
+ import torch
43
+
44
+ if decode_options is None:
45
+ decode_options = {}
46
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
47
+ model = _lookup_model(model, device)
48
+ result = model.transcribe(
49
+ audio,
50
+ temperature=tuple(temperature),
51
+ compression_ratio_threshold=compression_ratio_threshold,
52
+ logprob_threshold=logprob_threshold,
53
+ no_speech_threshold=no_speech_threshold,
54
+ condition_on_previous_text=condition_on_previous_text,
55
+ initial_prompt=initial_prompt,
56
+ word_timestamps=word_timestamps,
57
+ prepend_punctuations=prepend_punctuations,
58
+ append_punctuations=append_punctuations,
59
+ **decode_options,
60
+ )
61
+ return result
62
+
63
+
64
+ def _lookup_model(model_id: str, device: str) -> 'Whisper':
65
+ import whisper
66
+
67
+ key = (model_id, device)
68
+ if key not in _model_cache:
69
+ model = whisper.load_model(model_id, device)
70
+ _model_cache[key] = model
71
+ return _model_cache[key]
72
+
73
+
74
+ _model_cache: dict[tuple[str, str], 'Whisper'] = {}
pixeltable/globals.py CHANGED
@@ -213,7 +213,7 @@ def move(path: str, new_path: str) -> None:
213
213
  obj = Catalog.get().paths[p]
214
214
  Catalog.get().paths.move(p, new_p)
215
215
  new_dir = Catalog.get().paths[new_p.parent]
216
- obj.move(new_p.name, new_dir._id)
216
+ obj._move(new_p.name, new_dir._id)
217
217
 
218
218
 
219
219
  def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
pixeltable/io/__init__.py CHANGED
@@ -1,4 +1,14 @@
1
+ from .external_store import ExternalStore, SyncStatus
1
2
  from .globals import create_label_studio_project
2
3
  from .hf_datasets import import_huggingface_dataset
3
4
  from .pandas import import_csv, import_excel, import_pandas
4
5
  from .parquet import import_parquet
6
+
7
+
8
+ __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
9
+ __removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}
10
+ __all__ = sorted(list(__default_dir - __removed_symbols))
11
+
12
+
13
+ def __dir__():
14
+ return __all__
@@ -0,0 +1,370 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import itertools
5
+ import logging
6
+ import time
7
+ from dataclasses import dataclass
8
+ from typing import Any, Optional
9
+ from uuid import UUID
10
+
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.type_system as ts
13
+ from pixeltable import Table, Column
14
+ import sqlalchemy as sql
15
+
16
+ from pixeltable.catalog import TableVersion
17
+
18
+ _logger = logging.getLogger('pixeltable')
19
+
20
+
21
+ class ExternalStore(abc.ABC):
22
+ """
23
+ Abstract base class that represents an external data store that is linked to a Pixeltable
24
+ table. Subclasses of `ExternalStore` provide functionality for synchronizing between Pixeltable
25
+ and stateful external stores.
26
+ """
27
+
28
+ def __init__(self, name: str) -> None:
29
+ self.__name = name
30
+
31
+ @property
32
+ def name(self) -> str:
33
+ return self.__name
34
+
35
+ @abc.abstractmethod
36
+ def link(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
37
+ """
38
+ Called by `TableVersion.link()` to implement store-specific logic.
39
+ """
40
+
41
+ @abc.abstractmethod
42
+ def unlink(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
43
+ """
44
+ Called by `TableVersion.unlink()` to implement store-specific logic.
45
+ """
46
+
47
+ @abc.abstractmethod
48
+ def get_local_columns(self) -> list[Column]:
49
+ """
50
+ Gets a list of all local (Pixeltable) columns that are associated with this external store.
51
+ """
52
+
53
+ @abc.abstractmethod
54
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
55
+ """
56
+ Called by `Table.sync()` to implement store-specific synchronization logic.
57
+ """
58
+
59
+ @abc.abstractmethod
60
+ def as_dict(self) -> dict[str, Any]: ...
61
+
62
+ @classmethod
63
+ @abc.abstractmethod
64
+ def from_dict(cls, md: dict[str, Any]) -> ExternalStore: ...
65
+
66
+
67
+ class Project(ExternalStore, abc.ABC):
68
+ """
69
+ An `ExternalStore` that represents a labeling project. Extends `ExternalStore` with a few
70
+ additional capabilities specific to such projects.
71
+ """
72
+ def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
73
+ super().__init__(name)
74
+ self._col_mapping = col_mapping
75
+
76
+ # A mapping from original columns to proxy columns. A proxy column is an identical copy of a column that is
77
+ # guaranteed to be stored; the Project will dynamically create and tear down proxy columns as needed. There
78
+ # are two reasons this might happen:
79
+ # (i) to force computed media data to be persisted; or
80
+ # (ii) to force media data to be materialized in a particular location.
81
+ # For each entry (k, v) in the dict, `v` is the stored proxy column for `k`. The proxy column `v` will
82
+ # necessarily be a column of the table to which this project is linked, but `k` need not be; it might be a
83
+ # column of a base table.
84
+ # Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
85
+ # proportion of overlapping rows, all proxying the same base column.
86
+ if stored_proxies is None:
87
+ self.stored_proxies: dict[Column, Column] = {}
88
+ else:
89
+ self.stored_proxies = stored_proxies
90
+
91
+ def get_local_columns(self) -> list[Column]:
92
+ return list(self.col_mapping.keys())
93
+
94
+ def link(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
95
+ # All of the media columns being linked need to either be stored computed columns, or else have stored proxies.
96
+ # This ensures that the media in those columns resides in the media store.
97
+ # First determine which columns (if any) need stored proxies, but don't have one yet.
98
+ stored_proxies_needed: list[Column] = []
99
+ for col in self.col_mapping.keys():
100
+ if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
101
+ # If this column is already proxied in some other Project, use the existing proxy to avoid
102
+ # duplication. Otherwise, we'll create a new one.
103
+ for store in tbl_version.external_stores.values():
104
+ if isinstance(store, Project) and col in store.stored_proxies:
105
+ self.stored_proxies[col] = store.stored_proxies[col]
106
+ break
107
+ if col not in self.stored_proxies:
108
+ # We didn't find it in an existing Project
109
+ stored_proxies_needed.append(col)
110
+ if len(stored_proxies_needed) > 0:
111
+ _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
112
+ # Create stored proxies for columns that need one. Increment the schema version
113
+ # accordingly.
114
+ tbl_version.version += 1
115
+ preceding_schema_version = tbl_version.schema_version
116
+ tbl_version.schema_version = tbl_version.version
117
+ proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
118
+ # Add the columns; this will also update table metadata.
119
+ tbl_version._add_columns(proxy_cols, conn)
120
+ # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
121
+ # invisible to the user.
122
+ tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
123
+
124
+ def unlink(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
125
+ # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
126
+ # any *other* external store for this table.)
127
+ deletions_needed: set[Column] = set(self.stored_proxies.values())
128
+ for name, store in tbl_version.external_stores.items():
129
+ if name != self.name:
130
+ deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
131
+ if len(deletions_needed) > 0:
132
+ _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
133
+ # Delete stored proxies that are no longer needed.
134
+ tbl_version.version += 1
135
+ preceding_schema_version = tbl_version.schema_version
136
+ tbl_version.schema_version = tbl_version.version
137
+ tbl_version._drop_columns(deletions_needed)
138
+ self.stored_proxies.clear()
139
+ tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
140
+
141
+ def create_stored_proxy(self, tbl_version: TableVersion, col: Column) -> Column:
142
+ """
143
+ Creates a proxy column for the specified column. The proxy column will be created in the specified
144
+ `TableVersion`.
145
+ """
146
+ from pixeltable import exprs
147
+
148
+ assert col.col_type.is_media_type() and not (col.is_stored and col.is_computed) and col not in self.stored_proxies
149
+ proxy_col = Column(
150
+ name=None,
151
+ # Force images in the proxy column to be materialized inside the media store, in a normalized format.
152
+ # TODO(aaron-siegel): This is a temporary solution and it will be replaced by a proper `destination`
153
+ # parameter for computed columns. Among other things, this solution does not work for video or audio.
154
+ # Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
155
+ computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
156
+ stored=True,
157
+ col_id=tbl_version.next_col_id,
158
+ sa_col_type=col.col_type.to_sa_type(),
159
+ schema_version_add=tbl_version.schema_version
160
+ )
161
+ proxy_col.tbl = tbl_version
162
+ tbl_version.next_col_id += 1
163
+ self.stored_proxies[col] = proxy_col
164
+ return proxy_col
165
+
166
+ @property
167
+ def col_mapping(self) -> dict[Column, str]:
168
+ return self._col_mapping
169
+
170
+ @abc.abstractmethod
171
+ def get_export_columns(self) -> dict[str, ts.ColumnType]:
172
+ """
173
+ Returns the names and Pixeltable types that this `Project` expects to see in a data export. The keys
174
+ of the `dict` are the names of data fields in the external store, not Pixeltable columns.
175
+
176
+ Returns:
177
+ A `dict` mapping names of external data fields to their expected Pixeltable types.
178
+ """
179
+
180
+ @abc.abstractmethod
181
+ def get_import_columns(self) -> dict[str, ts.ColumnType]:
182
+ """
183
+ Returns the names and Pixeltable types that this `Project` provides in a data import.
184
+
185
+ Returns:
186
+ A `dict` mapping names of provided columns to their Pixeltable types.
187
+ """
188
+
189
+ @abc.abstractmethod
190
+ def delete(self) -> None:
191
+ """
192
+ Deletes this `Project` and all associated (externally stored) data.
193
+ """
194
+
195
+ @classmethod
196
+ def validate_columns(
197
+ cls,
198
+ table: Table,
199
+ export_cols: dict[str, ts.ColumnType],
200
+ import_cols: dict[str, ts.ColumnType],
201
+ col_mapping: Optional[dict[str, str]]
202
+ ) -> dict[Column, str]:
203
+ """
204
+ Verifies that the specified `col_mapping` is valid. In particular, checks that:
205
+ (i) the keys of `col_mapping` are valid columns of the specified `Table`;
206
+ (ii) the values of `col_mapping` are valid external columns (i.e., they appear in either `export_cols` or
207
+ `import_cols`; and
208
+ (iii) the Pixeltable types of the `col_mapping` keys are consistent with the expected types of the corresponding
209
+ external (import or export) columns.
210
+ If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
211
+ in which the Pixeltable column names are resolved to the corresponding `Column` objects.
212
+ """
213
+ is_user_specified_col_mapping = col_mapping is not None
214
+ if col_mapping is None:
215
+ col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
216
+
217
+ resolved_col_mapping: dict[Column, str] = {}
218
+
219
+ # Validate names
220
+ t_cols = table.column_names()
221
+ for t_col, ext_col in col_mapping.items():
222
+ if t_col not in t_cols:
223
+ if is_user_specified_col_mapping:
224
+ raise excs.Error(
225
+ f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table.get_name()}` '
226
+ 'contains no such column.'
227
+ )
228
+ else:
229
+ raise excs.Error(
230
+ f'Column `{t_col}` does not exist in Table `{table.get_name()}`. Either add a column `{t_col}`, '
231
+ f'or specify a `col_mapping` to associate a different column with the external field `{ext_col}`.'
232
+ )
233
+ if ext_col not in export_cols and ext_col not in import_cols:
234
+ raise excs.Error(
235
+ f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
236
+ f'configuration has no column `{ext_col}`.'
237
+ )
238
+ col = table[t_col].col
239
+ resolved_col_mapping[col] = ext_col
240
+ # Validate column specs
241
+ t_col_types = table.column_types()
242
+ for t_col, ext_col in col_mapping.items():
243
+ t_col_type = t_col_types[t_col]
244
+ if ext_col in export_cols:
245
+ # Validate that the table column can be assigned to the external column
246
+ ext_col_type = export_cols[ext_col]
247
+ if not ext_col_type.is_supertype_of(t_col_type):
248
+ raise excs.Error(
249
+ f'Column `{t_col}` cannot be exported to external column `{ext_col}` (incompatible types; expecting `{ext_col_type}`)'
250
+ )
251
+ if ext_col in import_cols:
252
+ # Validate that the external column can be assigned to the table column
253
+ if table._tbl_version_path.get_column(t_col).is_computed:
254
+ raise excs.Error(
255
+ f'Column `{t_col}` is a computed column, which cannot be populated from an external column'
256
+ )
257
+ ext_col_type = import_cols[ext_col]
258
+ if not t_col_type.is_supertype_of(ext_col_type):
259
+ raise excs.Error(
260
+ f'Column `{t_col}` cannot be imported from external column `{ext_col}` (incompatible types; expecting `{ext_col_type}`)'
261
+ )
262
+ return resolved_col_mapping
263
+
264
+ @classmethod
265
+ def _column_as_dict(cls, col: Column) -> dict[str, Any]:
266
+ return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
267
+
268
+ @classmethod
269
+ def _column_from_dict(cls, d: dict[str, Any]) -> Column:
270
+ from pixeltable.catalog import Catalog
271
+
272
+ tbl_id = UUID(d['tbl_id'])
273
+ col_id = d['col_id']
274
+ return Catalog.get().tbl_versions[(tbl_id, None)].cols_by_id[col_id]
275
+
276
+
277
+ @dataclass(frozen=True)
278
+ class SyncStatus:
279
+ external_rows_created: int = 0
280
+ external_rows_deleted: int = 0
281
+ external_rows_updated: int = 0
282
+ pxt_rows_updated: int = 0
283
+ num_excs: int = 0
284
+
285
+ def combine(self, other: 'SyncStatus') -> 'SyncStatus':
286
+ return SyncStatus(
287
+ external_rows_created=self.external_rows_created + other.external_rows_created,
288
+ external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
289
+ external_rows_updated=self.external_rows_updated + other.external_rows_updated,
290
+ pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
291
+ num_excs=self.num_excs + other.num_excs
292
+ )
293
+
294
+ @classmethod
295
+ def empty(cls) -> 'SyncStatus':
296
+ return SyncStatus(0, 0, 0, 0, 0)
297
+
298
+
299
+ class MockProject(Project):
300
+ """A project that cannot be synced, used mainly for testing."""
301
+ def __init__(
302
+ self,
303
+ name: str,
304
+ export_cols: dict[str, ts.ColumnType],
305
+ import_cols: dict[str, ts.ColumnType],
306
+ col_mapping: dict[Column, str],
307
+ stored_proxies: Optional[dict[Column, Column]] = None
308
+ ):
309
+ super().__init__(name, col_mapping, stored_proxies)
310
+ self.export_cols = export_cols
311
+ self.import_cols = import_cols
312
+ self.__is_deleted = False
313
+
314
+ @classmethod
315
+ def create(
316
+ cls,
317
+ t: Table,
318
+ name: str,
319
+ export_cols: dict[str, ts.ColumnType],
320
+ import_cols: dict[str, ts.ColumnType],
321
+ col_mapping: Optional[dict[str, str]] = None
322
+ ) -> 'MockProject':
323
+ col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
324
+ return cls(name, export_cols, import_cols, col_mapping)
325
+
326
+ def get_export_columns(self) -> dict[str, ts.ColumnType]:
327
+ return self.export_cols
328
+
329
+ def get_import_columns(self) -> dict[str, ts.ColumnType]:
330
+ return self.import_cols
331
+
332
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> NotImplemented:
333
+ raise NotImplementedError()
334
+
335
+ def delete(self) -> None:
336
+ self.__is_deleted = True
337
+
338
+ @property
339
+ def is_deleted(self) -> bool:
340
+ return self.__is_deleted
341
+
342
+ def as_dict(self) -> dict[str, Any]:
343
+ return {
344
+ 'name': self.name,
345
+ 'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
346
+ 'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
347
+ 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
348
+ 'stored_proxies': [[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()]
349
+ }
350
+
351
+ @classmethod
352
+ def from_dict(cls, md: dict[str, Any]) -> MockProject:
353
+ return cls(
354
+ md['name'],
355
+ {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
356
+ {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
357
+ {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
358
+ {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']}
359
+ )
360
+
361
+ def __eq__(self, other: Any) -> bool:
362
+ if not isinstance(other, MockProject):
363
+ return False
364
+ return self.name == other.name
365
+
366
+ def __hash__(self) -> int:
367
+ return hash(self.name)
368
+
369
+ def __repr__(self) -> str:
370
+ return f'MockProject `{self.name}`'