pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,581 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import json
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple
8
+ from uuid import UUID
9
+
10
+ import pandas as pd
11
+ import sqlalchemy as sql
12
+
13
+ import pixeltable
14
+ import pixeltable.catalog as catalog
15
+ import pixeltable.env as env
16
+ import pixeltable.exceptions as excs
17
+ import pixeltable.exprs as exprs
18
+ import pixeltable.metadata.schema as schema
19
+ import pixeltable.type_system as ts
20
+ from .column import Column
21
+ from .globals import is_valid_identifier, is_system_column_name
22
+ from .schema_object import SchemaObject
23
+ from .table_version import TableVersion
24
+ from .table_version_path import TableVersionPath
25
+
26
+ _logger = logging.getLogger('pixeltable')
27
+
28
+ class Table(SchemaObject):
29
+ """Base class for all tabular SchemaObjects."""
30
+
31
+ @dataclasses.dataclass
32
+ class UpdateStatus:
33
+ num_rows: int = 0
34
+ # TODO: change to num_computed_columns (the number of computed slots isn't really meaningful to the user)
35
+ num_computed_values: int = 0
36
+ num_excs: int = 0
37
+ updated_cols: List[str] = dataclasses.field(default_factory=list)
38
+ cols_with_excs: List[str] = dataclasses.field(default_factory=list)
39
+
40
+ def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
41
+ super().__init__(id, name, dir_id)
42
+ self.is_dropped = False
43
+ self.tbl_version_path = tbl_version_path
44
+
45
+ def move(self, new_name: str, new_dir_id: UUID) -> None:
46
+ super().move(new_name, new_dir_id)
47
+ with env.Env.get().engine.begin() as conn:
48
+ stmt = sql.text((
49
+ f"UPDATE {schema.Table.__table__} "
50
+ f"SET {schema.Table.dir_id.name} = :new_dir_id, "
51
+ f" {schema.Table.md.name}['name'] = :new_name "
52
+ f"WHERE {schema.Table.id.name} = :id"))
53
+ conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
54
+
55
+ def version(self) -> int:
56
+ """Return the version of this table. Used by tests to ascertain version changes."""
57
+ return self.tbl_version_path.tbl_version.version
58
+
59
+ def _tbl_version(self) -> TableVersion:
60
+ """Return TableVersion for just this table."""
61
+ return self.tbl_version_path.tbl_version
62
+
63
+ def __hash__(self) -> int:
64
+ return hash(self._tbl_version().id)
65
+
66
+ def _check_is_dropped(self) -> None:
67
+ if self.is_dropped:
68
+ raise excs.Error(f'{self.display_name()} {self.name} has been dropped')
69
+
70
+ def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
71
+ """Return a ColumnRef for the given column name.
72
+ """
73
+ return getattr(self.tbl_version_path, col_name)
74
+
75
+ def __getitem__(self, index: object) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame']:
76
+ """Return a ColumnRef for the given column name, or a DataFrame for the given slice.
77
+ """
78
+ return self.tbl_version_path.__getitem__(index)
79
+
80
+ def df(self) -> 'pixeltable.dataframe.DataFrame':
81
+ """Return a DataFrame for this table.
82
+ """
83
+ # local import: avoid circular imports
84
+ from pixeltable.dataframe import DataFrame
85
+ return DataFrame(self.tbl_version_path)
86
+
87
+ def select(self, *items: Any, **named_items: Any) -> 'pixeltable.dataframe.DataFrame':
88
+ """Return a DataFrame for this table.
89
+ """
90
+ # local import: avoid circular imports
91
+ from pixeltable.dataframe import DataFrame
92
+ return DataFrame(self.tbl_version_path).select(*items, **named_items)
93
+
94
+ def where(self, pred: 'exprs.Predicate') -> 'pixeltable.dataframe.DataFrame':
95
+ """Return a DataFrame for this table.
96
+ """
97
+ # local import: avoid circular imports
98
+ from pixeltable.dataframe import DataFrame
99
+ return DataFrame(self.tbl_version_path).where(pred)
100
+
101
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.dataframe.DataFrame':
102
+ """Return a DataFrame for this table.
103
+ """
104
+ # local import: avoid circular imports
105
+ from pixeltable.dataframe import DataFrame
106
+ return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
107
+
108
+ def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
109
+ """Return rows from this table.
110
+ """
111
+ return self.df().collect()
112
+
113
+ def show(
114
+ self, *args, **kwargs
115
+ ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
116
+ """Return rows from this table.
117
+ """
118
+ return self.df().show(*args, **kwargs)
119
+
120
+ def head(
121
+ self, *args, **kwargs
122
+ ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
123
+ """Return the first n rows inserted into this table."""
124
+ return self.df().head(*args, **kwargs)
125
+
126
+ def tail(
127
+ self, *args, **kwargs
128
+ ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
129
+ """Return the last n rows inserted into this table."""
130
+ return self.df().tail(*args, **kwargs)
131
+
132
+ def count(self) -> int:
133
+ """Return the number of rows in this table."""
134
+ return self.df().count()
135
+
136
+ def column_names(self) -> List[str]:
137
+ """Return the names of the columns in this table."""
138
+ return [c.name for c in self.tbl_version_path.columns()]
139
+
140
+ def column_types(self) -> Dict[str, ts.ColumnType]:
141
+ """Return the names of the columns in this table."""
142
+ return {c.name: c.col_type for c in self.tbl_version_path.columns()}
143
+
144
+ @property
145
+ def comment(self) -> str:
146
+ return self.tbl_version.comment
147
+
148
+ @comment.setter
149
+ def comment(self, new_comment: Optional[str]):
150
+ self.tbl_version.set_comment(new_comment)
151
+
152
+ @property
153
+ def num_retained_versions(self):
154
+ return self.tbl_version.num_retained_versions
155
+
156
+ @num_retained_versions.setter
157
+ def num_retained_versions(self, new_num_retained_versions: int):
158
+ self.tbl_version.set_num_retained_versions(new_num_retained_versions)
159
+
160
+ def _description(self) -> pd.DataFrame:
161
+ cols = self.tbl_version_path.columns()
162
+ df = pd.DataFrame({
163
+ 'Column Name': [c.name for c in cols],
164
+ 'Type': [str(c.col_type) for c in cols],
165
+ 'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
166
+ })
167
+ return df
168
+
169
+ def _description_html(self) -> pd.DataFrame:
170
+ pd_df = self._description()
171
+ # white-space: pre-wrap: print \n as newline
172
+ # th: center-align headings
173
+ return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
174
+ .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
175
+ .hide(axis='index')
176
+
177
+ def describe(self) -> None:
178
+ try:
179
+ __IPYTHON__
180
+ from IPython.display import display
181
+ display(self._description_html())
182
+ except NameError:
183
+ print(self.__repr__())
184
+
185
+ # TODO: Display comments in _repr_html()
186
+ def __repr__(self) -> str:
187
+ description_str = self._description().to_string(index=False)
188
+ if self.comment is None:
189
+ comment = ''
190
+ else:
191
+ comment = f'{self.comment}\n'
192
+ return f'{self.display_name()} \'{self._name}\'\n{comment}{description_str}'
193
+
194
+ def _repr_html_(self) -> str:
195
+ return self._description_html()._repr_html_()
196
+
197
+ def _drop(self) -> None:
198
+ self._check_is_dropped()
199
+ self.tbl_version_path.tbl_version.drop()
200
+ self.is_dropped = True
201
+ # update catalog
202
+ cat = catalog.Catalog.get()
203
+ del cat.tbls[self._id]
204
+
205
+ # TODO Factor this out into a separate module.
206
+ # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
207
+ def to_pytorch_dataset(self, image_format : str = 'pt') -> 'torch.utils.data.IterableDataset':
208
+ """Return a PyTorch Dataset for this table.
209
+ See DataFrame.to_pytorch_dataset()
210
+ """
211
+ from pixeltable.dataframe import DataFrame
212
+ return DataFrame(self.tbl_version_path).to_pytorch_dataset(image_format=image_format)
213
+
214
+ def to_coco_dataset(self) -> Path:
215
+ """Return the path to a COCO json file for this table.
216
+ See DataFrame.to_coco_dataset()
217
+ """
218
+ from pixeltable.dataframe import DataFrame
219
+ return DataFrame(self.tbl_version_path).to_coco_dataset()
220
+
221
+ def __setitem__(self, column_name: str, value: Union[ts.ColumnType, exprs.Expr, Callable, dict]) -> None:
222
+ """Adds a column to the table
223
+ Args:
224
+ column_name: the name of the new column
225
+ value: column type or value expression or column specification dictionary:
226
+ column type: a Pixeltable column type (if the table already contains rows, it must be nullable)
227
+ value expression: a Pixeltable expression that computes the column values
228
+ column specification: a dictionary with possible keys 'type', 'value', 'stored', 'indexed'
229
+ Examples:
230
+ Add an int column with ``None`` values:
231
+
232
+ >>> tbl['new_col'] = IntType(nullable=True)
233
+
234
+ For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
235
+ the parameters of the Callable must correspond to existing column names (the column values are then passed
236
+ as arguments to the Callable). In this case, the return type cannot be inferred and needs to be specified
237
+ explicitly:
238
+
239
+ >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
240
+
241
+ For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
242
+ 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
243
+ (by default, computed image columns are not stored but recomputed on demand):
244
+
245
+ >>> tbl['rotated'] = tbl.frame.rotate(90)
246
+
247
+ Do the same, but now the column is stored:
248
+
249
+ >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
250
+
251
+ Add a resized version of the ``frame`` column and index it. The column does not need to be stored in order
252
+ to be indexed:
253
+
254
+ >>> tbl['small_frame'] = {'value': tbl.frame.resize([224, 224]), 'indexed': True}
255
+ """
256
+ if not isinstance(column_name, str):
257
+ raise excs.Error(f'Column name must be a string, got {type(column_name)}')
258
+ if not is_valid_identifier(column_name):
259
+ raise excs.Error(f'Invalid column name: {column_name!r}')
260
+
261
+ new_col = self._create_columns({column_name: value})[0]
262
+ self._verify_column(new_col, self.column_names())
263
+ return self.tbl_version_path.tbl_version.add_column(new_col)
264
+
265
+ def add_column(
266
+ self, *,
267
+ type: Optional[ts.ColumnType] = None, stored: Optional[bool] = None, indexed: Optional[bool] = None,
268
+ print_stats: bool = False, **kwargs: Any
269
+ ) -> UpdateStatus:
270
+ """Adds a column to the table.
271
+
272
+ Args:
273
+ kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
274
+ type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
275
+ stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
276
+ indexed: Whether the column is indexed.
277
+ print_stats: If ``True``, print execution metrics.
278
+
279
+ Returns:
280
+ execution status
281
+
282
+ Raises:
283
+ Error: If the column name is invalid or already exists.
284
+
285
+ Examples:
286
+ Add an int column with ``None`` values:
287
+
288
+ >>> tbl.add_column(new_col=IntType())
289
+
290
+ Alternatively, this can also be expressed as:
291
+
292
+ >>> tbl['new_col'] = IntType()
293
+
294
+ For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
295
+ the parameters of the Callable must correspond to existing column names (the column values are then passed
296
+ as arguments to the Callable). In this case, the column type needs to be specified explicitly:
297
+
298
+ >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=IntType())
299
+
300
+ Alternatively, this can also be expressed as:
301
+
302
+ >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
303
+
304
+ For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
305
+ 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
306
+ (by default, computed image columns are not stored but recomputed on demand):
307
+
308
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90))
309
+
310
+ Alternatively, this can also be expressed as:
311
+
312
+ >>> tbl['rotated'] = tbl.frame.rotate(90)
313
+
314
+ Do the same, but now the column is stored:
315
+
316
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
317
+
318
+ Alternatively, this can also be expressed as:
319
+
320
+ >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
321
+
322
+ Add a resized version of the ``frame`` column and index it. The column does not need to be stored in order
323
+ to be indexed:
324
+
325
+ >>> tbl.add_column(small_frame=tbl.frame.resize([224, 224]), indexed=True)
326
+
327
+ Alternatively, this can also be expressed as:
328
+
329
+ >>> tbl['small_frame'] = {'value': tbl.frame.resize([224, 224]), 'indexed': True}
330
+ """
331
+ self._check_is_dropped()
332
+ # verify kwargs and construct column schema dict
333
+ if len(kwargs) != 1:
334
+ raise excs.Error((
335
+ f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression", '
336
+ f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
337
+ ))
338
+ col_name, spec = next(iter(kwargs.items()))
339
+ col_schema: Dict[str, Any] = {}
340
+ if isinstance(spec, ts.ColumnType):
341
+ if type is not None:
342
+ raise excs.Error(f'add_column(): keyword argument "type" is redundant')
343
+ col_schema['type'] = spec
344
+ else:
345
+ if isinstance(spec, exprs.Expr) and type is not None:
346
+ raise excs.Error(f'add_column(): keyword argument "type" is redundant')
347
+ col_schema['value'] = spec
348
+ if type is not None:
349
+ col_schema['type'] = type
350
+ if stored is not None:
351
+ col_schema['stored'] = stored
352
+ if indexed is not None:
353
+ col_schema['indexed'] = indexed
354
+
355
+ new_col = self._create_columns({col_name: col_schema})[0]
356
+ self._verify_column(new_col, self.column_names())
357
+ return self.tbl_version_path.tbl_version.add_column(new_col, print_stats=print_stats)
358
+
359
+ @classmethod
360
+ def _validate_column_spec(cls, name: str, spec: Dict[str, Any]) -> None:
361
+ """Check integrity of user-supplied Column spec
362
+
363
+ We unfortunately can't use something like jsonschema for validation, because this isn't strictly a JSON schema
364
+ (on account of containing Python Callables or Exprs).
365
+ """
366
+ assert isinstance(spec, dict)
367
+ valid_keys = {'type', 'value', 'stored', 'indexed'}
368
+ has_type = False
369
+ for k in spec.keys():
370
+ if k not in valid_keys:
371
+ raise excs.Error(f'Column {name}: invalid key {k!r}')
372
+
373
+ if 'type' in spec:
374
+ has_type = True
375
+ if not isinstance(spec['type'], ts.ColumnType):
376
+ raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
377
+
378
+ if 'value' in spec:
379
+ value_spec = spec['value']
380
+ value_expr = exprs.Expr.from_object(value_spec)
381
+ if value_expr is None:
382
+ # needs to be a Callable
383
+ if not isinstance(value_spec, Callable):
384
+ raise excs.Error(
385
+ f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
386
+ f'but it is a {type(value_spec)}')
387
+ if 'type' not in spec:
388
+ raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
389
+ else:
390
+ has_type = True
391
+ if 'type' in spec:
392
+ raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
393
+
394
+ if 'stored' in spec and not isinstance(spec['stored'], bool):
395
+ raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
396
+ if 'indexed' in spec and not isinstance(spec['indexed'], bool):
397
+ raise excs.Error(f'Column {name}: "indexed" must be a bool, got {spec["indexed"]}')
398
+ if not has_type:
399
+ raise excs.Error(f'Column {name}: "type" is required')
400
+
401
+ @classmethod
402
+ def _create_columns(cls, schema: Dict[str, Any]) -> List[Column]:
403
+ """Construct list of Columns, given schema"""
404
+ columns: List[Column] = []
405
+ for name, spec in schema.items():
406
+ col_type: Optional[ts.ColumnType] = None
407
+ value_expr: Optional[exprs.Expr] = None
408
+ stored: Optional[bool] = None
409
+ indexed: Optional[bool] = None
410
+ primary_key: Optional[bool] = None
411
+
412
+ if isinstance(spec, ts.ColumnType):
413
+ # TODO: create copy
414
+ col_type = spec
415
+ elif isinstance(spec, exprs.Expr):
416
+ # create copy so we can modify it
417
+ value_expr = spec.copy()
418
+ elif isinstance(spec, Callable):
419
+ raise excs.Error((
420
+ f'Column {name} computed with a Callable: specify using a dictionary with '
421
+ f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
422
+ ))
423
+ elif isinstance(spec, dict):
424
+ cls._validate_column_spec(name, spec)
425
+ col_type = spec.get('type')
426
+ value_expr = spec.get('value')
427
+ if value_expr is not None and isinstance(value_expr, exprs.Expr):
428
+ # create copy so we can modify it
429
+ value_expr = value_expr.copy()
430
+ stored = spec.get('stored')
431
+ indexed = spec.get('indexed')
432
+ primary_key = spec.get('primary_key')
433
+
434
+ column = Column(
435
+ name, col_type=col_type, computed_with=value_expr, stored=stored, indexed=indexed,
436
+ primary_key=primary_key)
437
+ columns.append(column)
438
+ return columns
439
+
440
+ @classmethod
441
+ def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
442
+ """Check integrity of user-supplied Column and supply defaults"""
443
+ if is_system_column_name(col.name):
444
+ raise excs.Error(f'Column name {col.name} is reserved')
445
+ if not is_valid_identifier(col.name):
446
+ raise excs.Error(f"Invalid column name: '{col.name}'")
447
+ if col.name in existing_column_names:
448
+ raise excs.Error(f'Duplicate column name: {col.name}')
449
+ if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
450
+ raise excs.Error(f'Column {col.name}: stored={col.stored} only applies to computed image columns')
451
+ if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
452
+ raise excs.Error((
453
+ f'Column {col.name}: stored={col.stored} is not valid for image columns computed with a streaming '
454
+ f'function'))
455
+ if col.stored is None:
456
+ col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
457
+
458
+ @classmethod
459
+ def _verify_schema(cls, schema: List[Column]) -> None:
460
+ """Check integrity of user-supplied schema and set defaults"""
461
+ column_names: Set[str] = set()
462
+ for col in schema:
463
+ cls._verify_column(col, column_names)
464
+ column_names.add(col.name)
465
+
466
+ def drop_column(self, name: str) -> None:
467
+ """Drop a column from the table.
468
+
469
+ Args:
470
+ name: The name of the column to drop.
471
+
472
+ Raises:
473
+ Error: If the column does not exist or if it is referenced by a computed column.
474
+
475
+ Examples:
476
+ Drop column ``factorial``:
477
+
478
+ >>> tbl.drop_column('factorial')
479
+ """
480
+ self._check_is_dropped()
481
+ self.tbl_version_path.tbl_version.drop_column(name)
482
+
483
+ def rename_column(self, old_name: str, new_name: str) -> None:
484
+ """Rename a column.
485
+
486
+ Args:
487
+ old_name: The current name of the column.
488
+ new_name: The new name of the column.
489
+
490
+ Raises:
491
+ Error: If the column does not exist or if the new name is invalid or already exists.
492
+
493
+ Examples:
494
+ Rename column ``factorial`` to ``fac``:
495
+
496
+ >>> tbl.rename_column('factorial', 'fac')
497
+ """
498
+ self._check_is_dropped()
499
+ self.tbl_version_path.tbl_version.rename_column(old_name, new_name)
500
+
501
+ def update(
502
+ self, value_spec: Dict[str, Union['pixeltable.exprs.Expr', Any]],
503
+ where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
504
+ ) -> UpdateStatus:
505
+ """Update rows in this table.
506
+
507
+ Args:
508
+ value_spec: a dictionary mapping column names to literal values or Pixeltable expressions.
509
+ where: a Predicate to filter rows to update.
510
+ cascade: if True, also update all computed columns that transitively depend on the updated columns.
511
+
512
+ Examples:
513
+ Set newly-added column `int_col` to 1 for all rows:
514
+
515
+ >>> tbl.update({'int_col': 1})
516
+
517
+ Set newly-added column `int_col` to 1 for all rows where `int_col` is 0:
518
+
519
+ >>> tbl.update({'int_col': 1}, where=tbl.int_col == 0)
520
+
521
+ Set `int_col` to the value of `other_int_col` + 1:
522
+
523
+ >>> tbl.update({'int_col': tbl.other_int_col + 1})
524
+
525
+ Increment `int_col` by 1 for all rows where `int_col` is 0:
526
+
527
+ >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
528
+ """
529
+ from pixeltable import exprs
530
+ update_targets: List[Tuple[Column, exprs.Expr]] = []
531
+ for col_name, val in value_spec.items():
532
+ if not isinstance(col_name, str):
533
+ raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
534
+ col = self.tbl_version_path.get_column(col_name, include_bases=False)
535
+ if col is None:
536
+ # TODO: return more informative error if this is trying to update a base column
537
+ raise excs.Error(f'Column {col_name} unknown')
538
+ if col.is_computed:
539
+ raise excs.Error(f'Column {col_name} is computed and cannot be updated')
540
+ if col.primary_key:
541
+ raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
542
+ if col.col_type.is_media_type():
543
+ raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
544
+
545
+ # make sure that the value is compatible with the column type
546
+ # check if this is a literal
547
+ try:
548
+ value_expr = exprs.Literal(val, col_type=col.col_type)
549
+ except TypeError:
550
+ # it's not a literal, let's try to create an expr from it
551
+ value_expr = exprs.Expr.from_object(val)
552
+ if value_expr is None:
553
+ raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
554
+ if not col.col_type.matches(value_expr.col_type):
555
+ raise excs.Error((
556
+ f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
557
+ f'{col_name} ({col.col_type})'
558
+ ))
559
+ update_targets.append((col, value_expr))
560
+
561
+ from pixeltable.plan import Planner
562
+ if where is not None:
563
+ if not isinstance(where, exprs.Predicate):
564
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
565
+ analysis_info = Planner.analyze(self.tbl_version_path, where)
566
+ if analysis_info.similarity_clause is not None:
567
+ raise excs.Error('nearest() cannot be used with update()')
568
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
569
+ if analysis_info.filter is not None:
570
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
571
+
572
+ return self.tbl_version_path.tbl_version.update(update_targets, where, cascade)
573
+
574
+ def revert(self) -> None:
575
+ """Reverts the table to the previous version.
576
+
577
+ .. warning::
578
+ This operation is irreversible.
579
+ """
580
+ self._check_is_dropped()
581
+ self.tbl_version_path.tbl_version.revert()