pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,695 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable
7
+ from uuid import UUID
8
+
9
+ import pandas as pd
10
+ import sqlalchemy as sql
11
+
12
+ import pixeltable
13
+ import pixeltable.catalog as catalog
14
+ import pixeltable.env as env
15
+ import pixeltable.exceptions as excs
16
+ import pixeltable.exprs as exprs
17
+ import pixeltable.metadata.schema as schema
18
+ import pixeltable.type_system as ts
19
+ from .column import Column
20
+ from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
21
+ from .schema_object import SchemaObject
22
+ from .table_version import TableVersion
23
+ from .table_version_path import TableVersionPath
24
+
25
+ _logger = logging.getLogger('pixeltable')
26
+
27
+ class Table(SchemaObject):
28
+ """Base class for all tabular SchemaObjects."""
29
+
30
+ ROWID_COLUMN_NAME = '_rowid'
31
+
32
+ def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
33
+ super().__init__(id, name, dir_id)
34
+ self.is_dropped = False
35
+ self.tbl_version_path = tbl_version_path
36
+
37
+ def move(self, new_name: str, new_dir_id: UUID) -> None:
38
+ super().move(new_name, new_dir_id)
39
+ with env.Env.get().engine.begin() as conn:
40
+ stmt = sql.text((
41
+ f"UPDATE {schema.Table.__table__} "
42
+ f"SET {schema.Table.dir_id.name} = :new_dir_id, "
43
+ f" {schema.Table.md.name}['name'] = :new_name "
44
+ f"WHERE {schema.Table.id.name} = :id"))
45
+ conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
46
+
47
+ def version(self) -> int:
48
+ """Return the version of this table. Used by tests to ascertain version changes."""
49
+ return self.tbl_version_path.tbl_version.version
50
+
51
+ def _tbl_version(self) -> TableVersion:
52
+ """Return TableVersion for just this table."""
53
+ return self.tbl_version_path.tbl_version
54
+
55
+ def __hash__(self) -> int:
56
+ return hash(self._tbl_version().id)
57
+
58
+ def _check_is_dropped(self) -> None:
59
+ if self.is_dropped:
60
+ raise excs.Error(f'{self.display_name()} {self.name} has been dropped')
61
+
62
+ def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
63
+ """Return a ColumnRef for the given column name.
64
+ """
65
+ return getattr(self.tbl_version_path, col_name)
66
+
67
+ def __getitem__(self, index: object) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame']:
68
+ """Return a ColumnRef for the given column name, or a DataFrame for the given slice.
69
+ """
70
+ return self.tbl_version_path.__getitem__(index)
71
+
72
+ def df(self) -> 'pixeltable.dataframe.DataFrame':
73
+ """Return a DataFrame for this table.
74
+ """
75
+ # local import: avoid circular imports
76
+ from pixeltable.dataframe import DataFrame
77
+ return DataFrame(self.tbl_version_path)
78
+
79
+ def select(self, *items: Any, **named_items: Any) -> 'pixeltable.dataframe.DataFrame':
80
+ """Return a DataFrame for this table.
81
+ """
82
+ # local import: avoid circular imports
83
+ from pixeltable.dataframe import DataFrame
84
+ return DataFrame(self.tbl_version_path).select(*items, **named_items)
85
+
86
+ def where(self, pred: 'exprs.Predicate') -> 'pixeltable.dataframe.DataFrame':
87
+ """Return a DataFrame for this table.
88
+ """
89
+ # local import: avoid circular imports
90
+ from pixeltable.dataframe import DataFrame
91
+ return DataFrame(self.tbl_version_path).where(pred)
92
+
93
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.dataframe.DataFrame':
94
+ """Return a DataFrame for this table.
95
+ """
96
+ # local import: avoid circular imports
97
+ from pixeltable.dataframe import DataFrame
98
+ return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
99
+
100
+ def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
101
+ """Return a DataFrame for this table."""
102
+ from pixeltable.dataframe import DataFrame
103
+ return DataFrame(self.tbl_version_path).group_by(*items)
104
+
105
+ def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
106
+ """Return rows from this table.
107
+ """
108
+ return self.df().collect()
109
+
110
+ def show(
111
+ self, *args, **kwargs
112
+ ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
113
+ """Return rows from this table.
114
+ """
115
+ return self.df().show(*args, **kwargs)
116
+
117
+ def head(
118
+ self, *args, **kwargs
119
+ ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
120
+ """Return the first n rows inserted into this table."""
121
+ return self.df().head(*args, **kwargs)
122
+
123
+ def tail(
124
+ self, *args, **kwargs
125
+ ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
126
+ """Return the last n rows inserted into this table."""
127
+ return self.df().tail(*args, **kwargs)
128
+
129
+ def count(self) -> int:
130
+ """Return the number of rows in this table."""
131
+ return self.df().count()
132
+
133
+ def column_names(self) -> List[str]:
134
+ """Return the names of the columns in this table."""
135
+ return [c.name for c in self.tbl_version_path.columns()]
136
+
137
+ def column_types(self) -> Dict[str, ts.ColumnType]:
138
+ """Return the names of the columns in this table."""
139
+ return {c.name: c.col_type for c in self.tbl_version_path.columns()}
140
+
141
+ @property
142
+ def comment(self) -> str:
143
+ return self.tbl_version.comment
144
+
145
+ @comment.setter
146
+ def comment(self, new_comment: Optional[str]):
147
+ self.tbl_version.set_comment(new_comment)
148
+
149
+ @property
150
+ def num_retained_versions(self):
151
+ return self.tbl_version.num_retained_versions
152
+
153
+ @num_retained_versions.setter
154
+ def num_retained_versions(self, new_num_retained_versions: int):
155
+ self.tbl_version.set_num_retained_versions(new_num_retained_versions)
156
+
157
+ def _description(self) -> pd.DataFrame:
158
+ cols = self.tbl_version_path.columns()
159
+ df = pd.DataFrame({
160
+ 'Column Name': [c.name for c in cols],
161
+ 'Type': [str(c.col_type) for c in cols],
162
+ 'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
163
+ })
164
+ return df
165
+
166
+ def _description_html(self) -> pd.DataFrame:
167
+ pd_df = self._description()
168
+ # white-space: pre-wrap: print \n as newline
169
+ # th: center-align headings
170
+ return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
171
+ .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
172
+ .hide(axis='index')
173
+
174
+ def describe(self) -> None:
175
+ try:
176
+ __IPYTHON__
177
+ from IPython.display import display
178
+ display(self._description_html())
179
+ except NameError:
180
+ print(self.__repr__())
181
+
182
+ # TODO: Display comments in _repr_html()
183
+ def __repr__(self) -> str:
184
+ description_str = self._description().to_string(index=False)
185
+ if self.comment is None:
186
+ comment = ''
187
+ else:
188
+ comment = f'{self.comment}\n'
189
+ return f'{self.display_name()} \'{self._name}\'\n{comment}{description_str}'
190
+
191
+ def _repr_html_(self) -> str:
192
+ return self._description_html()._repr_html_()
193
+
194
+ def _drop(self) -> None:
195
+ self._check_is_dropped()
196
+ self.tbl_version_path.tbl_version.drop()
197
+ self.is_dropped = True
198
+ # update catalog
199
+ cat = catalog.Catalog.get()
200
+ del cat.tbls[self._id]
201
+
202
+ # TODO Factor this out into a separate module.
203
+ # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
204
+ def to_pytorch_dataset(self, image_format : str = 'pt') -> 'torch.utils.data.IterableDataset':
205
+ """Return a PyTorch Dataset for this table.
206
+ See DataFrame.to_pytorch_dataset()
207
+ """
208
+ from pixeltable.dataframe import DataFrame
209
+ return DataFrame(self.tbl_version_path).to_pytorch_dataset(image_format=image_format)
210
+
211
+ def to_coco_dataset(self) -> Path:
212
+ """Return the path to a COCO json file for this table.
213
+ See DataFrame.to_coco_dataset()
214
+ """
215
+ from pixeltable.dataframe import DataFrame
216
+ return DataFrame(self.tbl_version_path).to_coco_dataset()
217
+
218
+ def __setitem__(self, column_name: str, value: Union[ts.ColumnType, exprs.Expr, Callable, dict]) -> None:
219
+ """Adds a column to the table
220
+ Args:
221
+ column_name: the name of the new column
222
+ value: column type or value expression or column specification dictionary:
223
+ column type: a Pixeltable column type (if the table already contains rows, it must be nullable)
224
+ value expression: a Pixeltable expression that computes the column values
225
+ column specification: a dictionary with possible keys 'type', 'value', 'stored'
226
+ Examples:
227
+ Add an int column with ``None`` values:
228
+
229
+ >>> tbl['new_col'] = IntType(nullable=True)
230
+
231
+ For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
232
+ the parameters of the Callable must correspond to existing column names (the column values are then passed
233
+ as arguments to the Callable). In this case, the return type cannot be inferred and needs to be specified
234
+ explicitly:
235
+
236
+ >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
237
+
238
+ For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
239
+ 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
240
+ (by default, computed image columns are not stored but recomputed on demand):
241
+
242
+ >>> tbl['rotated'] = tbl.frame.rotate(90)
243
+
244
+ Do the same, but now the column is stored:
245
+
246
+ >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
247
+ """
248
+ if not isinstance(column_name, str):
249
+ raise excs.Error(f'Column name must be a string, got {type(column_name)}')
250
+ if not is_valid_identifier(column_name):
251
+ raise excs.Error(f'Invalid column name: {column_name!r}')
252
+
253
+ new_col = self._create_columns({column_name: value})[0]
254
+ self._verify_column(new_col, self.column_names())
255
+ return self.tbl_version_path.tbl_version.add_column(new_col)
256
+
257
+ def add_column(
258
+ self, *,
259
+ type: Optional[ts.ColumnType] = None, stored: Optional[bool] = None, print_stats: bool = False,
260
+ **kwargs: Any
261
+ ) -> UpdateStatus:
262
+ """Adds a column to the table.
263
+
264
+ Args:
265
+ kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
266
+ type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
267
+ stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
268
+ print_stats: If ``True``, print execution metrics.
269
+
270
+ Returns:
271
+ execution status
272
+
273
+ Raises:
274
+ Error: If the column name is invalid or already exists.
275
+
276
+ Examples:
277
+ Add an int column with ``None`` values:
278
+
279
+ >>> tbl.add_column(new_col=IntType())
280
+
281
+ Alternatively, this can also be expressed as:
282
+
283
+ >>> tbl['new_col'] = IntType()
284
+
285
+ For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
286
+ the parameters of the Callable must correspond to existing column names (the column values are then passed
287
+ as arguments to the Callable). In this case, the column type needs to be specified explicitly:
288
+
289
+ >>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=IntType())
290
+
291
+ Alternatively, this can also be expressed as:
292
+
293
+ >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
294
+
295
+ For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
296
+ 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
297
+ (by default, computed image columns are not stored but recomputed on demand):
298
+
299
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90))
300
+
301
+ Alternatively, this can also be expressed as:
302
+
303
+ >>> tbl['rotated'] = tbl.frame.rotate(90)
304
+
305
+ Do the same, but now the column is stored:
306
+
307
+ >>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
308
+
309
+ Alternatively, this can also be expressed as:
310
+
311
+ >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
312
+ """
313
+ self._check_is_dropped()
314
+ # verify kwargs and construct column schema dict
315
+ if len(kwargs) != 1:
316
+ raise excs.Error((
317
+ f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression", '
318
+ f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
319
+ ))
320
+ col_name, spec = next(iter(kwargs.items()))
321
+ col_schema: Dict[str, Any] = {}
322
+ if isinstance(spec, ts.ColumnType):
323
+ if type is not None:
324
+ raise excs.Error(f'add_column(): keyword argument "type" is redundant')
325
+ col_schema['type'] = spec
326
+ else:
327
+ if isinstance(spec, exprs.Expr) and type is not None:
328
+ raise excs.Error(f'add_column(): keyword argument "type" is redundant')
329
+ col_schema['value'] = spec
330
+ if type is not None:
331
+ col_schema['type'] = type
332
+ if stored is not None:
333
+ col_schema['stored'] = stored
334
+
335
+ new_col = self._create_columns({col_name: col_schema})[0]
336
+ self._verify_column(new_col, self.column_names())
337
+ return self.tbl_version_path.tbl_version.add_column(new_col, print_stats=print_stats)
338
+
339
+ @classmethod
340
+ def _validate_column_spec(cls, name: str, spec: Dict[str, Any]) -> None:
341
+ """Check integrity of user-supplied Column spec
342
+
343
+ We unfortunately can't use something like jsonschema for validation, because this isn't strictly a JSON schema
344
+ (on account of containing Python Callables or Exprs).
345
+ """
346
+ assert isinstance(spec, dict)
347
+ valid_keys = {'type', 'value', 'stored'}
348
+ has_type = False
349
+ for k in spec.keys():
350
+ if k not in valid_keys:
351
+ raise excs.Error(f'Column {name}: invalid key {k!r}')
352
+
353
+ if 'type' in spec:
354
+ has_type = True
355
+ if not isinstance(spec['type'], ts.ColumnType):
356
+ raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
357
+
358
+ if 'value' in spec:
359
+ value_spec = spec['value']
360
+ value_expr = exprs.Expr.from_object(value_spec)
361
+ if value_expr is None:
362
+ # needs to be a Callable
363
+ if not isinstance(value_spec, Callable):
364
+ raise excs.Error(
365
+ f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
366
+ f'but it is a {type(value_spec)}')
367
+ if 'type' not in spec:
368
+ raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
369
+ else:
370
+ has_type = True
371
+ if 'type' in spec:
372
+ raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
373
+
374
+ if 'stored' in spec and not isinstance(spec['stored'], bool):
375
+ raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
376
+ if not has_type:
377
+ raise excs.Error(f'Column {name}: "type" is required')
378
+
379
+ @classmethod
380
+ def _create_columns(cls, schema: Dict[str, Any]) -> List[Column]:
381
+ """Construct list of Columns, given schema"""
382
+ columns: List[Column] = []
383
+ for name, spec in schema.items():
384
+ col_type: Optional[ts.ColumnType] = None
385
+ value_expr: Optional[exprs.Expr] = None
386
+ stored: Optional[bool] = None
387
+ primary_key: Optional[bool] = None
388
+
389
+ if isinstance(spec, ts.ColumnType):
390
+ # TODO: create copy
391
+ col_type = spec
392
+ elif isinstance(spec, exprs.Expr):
393
+ # create copy so we can modify it
394
+ value_expr = spec.copy()
395
+ elif isinstance(spec, Callable):
396
+ raise excs.Error((
397
+ f'Column {name} computed with a Callable: specify using a dictionary with '
398
+ f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
399
+ ))
400
+ elif isinstance(spec, dict):
401
+ cls._validate_column_spec(name, spec)
402
+ col_type = spec.get('type')
403
+ value_expr = spec.get('value')
404
+ if value_expr is not None and isinstance(value_expr, exprs.Expr):
405
+ # create copy so we can modify it
406
+ value_expr = value_expr.copy()
407
+ stored = spec.get('stored')
408
+ primary_key = spec.get('primary_key')
409
+
410
+ column = Column(
411
+ name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key)
412
+ columns.append(column)
413
+ return columns
414
+
415
+ @classmethod
416
+ def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
417
+ """Check integrity of user-supplied Column and supply defaults"""
418
+ if is_system_column_name(col.name):
419
+ raise excs.Error(f'Column name {col.name} is reserved')
420
+ if not is_valid_identifier(col.name):
421
+ raise excs.Error(f"Invalid column name: '{col.name}'")
422
+ if col.name in existing_column_names:
423
+ raise excs.Error(f'Duplicate column name: {col.name}')
424
+ if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
425
+ raise excs.Error(f'Column {col.name}: stored={col.stored} only applies to computed image columns')
426
+ if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
427
+ raise excs.Error((
428
+ f'Column {col.name}: stored={col.stored} is not valid for image columns computed with a streaming '
429
+ f'function'))
430
+ if col.stored is None:
431
+ col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
432
+
433
+ @classmethod
434
+ def _verify_schema(cls, schema: List[Column]) -> None:
435
+ """Check integrity of user-supplied schema and set defaults"""
436
+ column_names: Set[str] = set()
437
+ for col in schema:
438
+ cls._verify_column(col, column_names)
439
+ column_names.add(col.name)
440
+
441
+ def drop_column(self, name: str) -> None:
442
+ """Drop a column from the table.
443
+
444
+ Args:
445
+ name: The name of the column to drop.
446
+
447
+ Raises:
448
+ Error: If the column does not exist or if it is referenced by a computed column.
449
+
450
+ Examples:
451
+ Drop column ``factorial``:
452
+
453
+ >>> tbl.drop_column('factorial')
454
+ """
455
+ self._check_is_dropped()
456
+ self.tbl_version_path.tbl_version.drop_column(name)
457
+
458
+ def rename_column(self, old_name: str, new_name: str) -> None:
459
+ """Rename a column.
460
+
461
+ Args:
462
+ old_name: The current name of the column.
463
+ new_name: The new name of the column.
464
+
465
+ Raises:
466
+ Error: If the column does not exist or if the new name is invalid or already exists.
467
+
468
+ Examples:
469
+ Rename column ``factorial`` to ``fac``:
470
+
471
+ >>> tbl.rename_column('factorial', 'fac')
472
+ """
473
+ self._check_is_dropped()
474
+ self.tbl_version_path.tbl_version.rename_column(old_name, new_name)
475
+
476
+ def add_embedding_index(
477
+ self, col_name: str, *, idx_name: Optional[str] = None,
478
+ text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
479
+ metric: str = 'cosine'
480
+ ) -> None:
481
+ """Add an index to the table.
482
+ Args:
483
+ col_name: name of column to index
484
+ idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
485
+ text_embed: function to embed text; required if the column is a text column
486
+ img_embed: function to embed images; required if the column is an image column
487
+ metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
488
+
489
+ Raises:
490
+ Error: If an index with that name already exists for the table or if the column does not exist.
491
+
492
+ Examples:
493
+ Add an index to the ``img`` column:
494
+
495
+ >>> tbl.add_embedding_index('img', img_embed=...)
496
+
497
+ Add another index to the ``img`` column, using the inner product as the distance metric,
498
+ and with a specific name; ``text_embed`` is also specified in order to search with text:
499
+
500
+ >>> tbl.add_embedding_index(
501
+ 'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
502
+ """
503
+ if self.tbl_version_path.is_snapshot():
504
+ raise excs.Error('Cannot add an index to a snapshot')
505
+ self._check_is_dropped()
506
+ col = self.tbl_version_path.get_column(col_name, include_bases=True)
507
+ if col is None:
508
+ raise excs.Error(f'Column {col_name} unknown')
509
+ if idx_name is not None and idx_name in self.tbl_version_path.tbl_version.idxs_by_name:
510
+ raise excs.Error(f'Duplicate index name: {idx_name}')
511
+ from pixeltable.index import EmbeddingIndex
512
+ # create the EmbeddingIndex instance to verify args
513
+ idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
514
+ status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
515
+ # TODO: how to deal with exceptions here? drop the index and raise?
516
+
517
+ def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
518
+ """Drop an index from the table.
519
+
520
+ Args:
521
+ column_name: The name of the column whose index to drop. Invalid if the column has multiple indices.
522
+ idx_name: The name of the index to drop.
523
+
524
+ Raises:
525
+ Error: If the index does not exist.
526
+
527
+ Examples:
528
+ Drop index on the ``img`` column:
529
+
530
+ >>> tbl.drop_index(column_name='img')
531
+ """
532
+ if self.tbl_version_path.is_snapshot():
533
+ raise excs.Error('Cannot drop an index from a snapshot')
534
+ self._check_is_dropped()
535
+ if (column_name is None) == (idx_name is None):
536
+ raise excs.Error('Exactly one of column_name or idx_name must be provided')
537
+ tbl_version = self.tbl_version_path.tbl_version
538
+
539
+ if idx_name is not None:
540
+ if idx_name not in tbl_version.idxs_by_name:
541
+ raise excs.Error(f'Index {idx_name} does not exist')
542
+ idx_id = tbl_version.idxs_by_name[idx_name].id
543
+ else:
544
+ col = self.tbl_version_path.get_column(column_name, include_bases=True)
545
+ if col is None:
546
+ raise excs.Error(f'Column {column_name} unknown')
547
+ if col.tbl.id != tbl_version.id:
548
+ raise excs.Error(
549
+ f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
550
+ idx_ids = [info.id for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
551
+ if len(idx_ids) == 0:
552
+ raise excs.Error(f'Column {column_name} does not have an index')
553
+ if len(idx_ids) > 1:
554
+ raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
555
+ idx_id = idx_ids[0]
556
+ self.tbl_version_path.tbl_version.drop_index(idx_id)
557
+
558
+ def update(
559
+ self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
560
+ ) -> UpdateStatus:
561
+ """Update rows in this table.
562
+
563
+ Args:
564
+ value_spec: a dictionary mapping column names to literal values or Pixeltable expressions.
565
+ where: a Predicate to filter rows to update.
566
+ cascade: if True, also update all computed columns that transitively depend on the updated columns.
567
+
568
+ Examples:
569
+ Set column `int_col` to 1 for all rows:
570
+
571
+ >>> tbl.update({'int_col': 1})
572
+
573
+ Set column `int_col` to 1 for all rows where `int_col` is 0:
574
+
575
+ >>> tbl.update({'int_col': 1}, where=tbl.int_col == 0)
576
+
577
+ Set `int_col` to the value of `other_int_col` + 1:
578
+
579
+ >>> tbl.update({'int_col': tbl.other_int_col + 1})
580
+
581
+ Increment `int_col` by 1 for all rows where `int_col` is 0:
582
+
583
+ >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
584
+ """
585
+ if self.tbl_version_path.is_snapshot():
586
+ raise excs.Error('Cannot update a snapshot')
587
+ self._check_is_dropped()
588
+
589
+ update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
590
+ from pixeltable.plan import Planner
591
+ if where is not None:
592
+ if not isinstance(where, exprs.Predicate):
593
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
594
+ analysis_info = Planner.analyze(self.tbl_version_path, where)
595
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
596
+ if analysis_info.filter is not None:
597
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
598
+
599
+ return self.tbl_version_path.tbl_version.update(update_spec, where, cascade)
600
+
601
+ def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
602
+ """Update rows in this table.
603
+
604
+ Args:
605
+ rows: an Iterable of dictionaries containing values for the updated columns plus values for the primary key
606
+ columns.
607
+ cascade: if True, also update all computed columns that transitively depend on the updated columns.
608
+
609
+ Examples:
610
+ Update the 'name' and 'age' columns for the rows with ids 1 and 2 (assuming 'id' is the primary key):
611
+
612
+ >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
613
+ """
614
+ if self.tbl_version_path.is_snapshot():
615
+ raise excs.Error('Cannot update a snapshot')
616
+ self._check_is_dropped()
617
+
618
+ row_updates: List[Dict[Column, exprs.Expr]] = []
619
+ pk_col_names = set(c.name for c in self.tbl_version_path.tbl_version.primary_key_columns())
620
+
621
+ # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
622
+ has_rowid = self.ROWID_COLUMN_NAME in rows[0]
623
+ rowids: list[Tuple[int, ...]] = []
624
+ if len(pk_col_names) == 0 and not has_rowid:
625
+ raise excs.Error('Table must have primary key for batch update')
626
+
627
+ for row_spec in rows:
628
+ col_vals = self._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
629
+ if has_rowid:
630
+ # we expect the _rowid column to be present for each row
631
+ assert self.ROWID_COLUMN_NAME in row_spec
632
+ rowids.append(row_spec[self.ROWID_COLUMN_NAME])
633
+ else:
634
+ col_names = set(col.name for col in col_vals.keys())
635
+ if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
636
+ missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
637
+ raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
638
+ row_updates.append(col_vals)
639
+ return self.tbl_version_path.tbl_version.batch_update(row_updates, rowids, cascade)
640
+
641
+ def _validate_update_spec(
642
+ self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
643
+ ) -> dict[Column, 'pixeltable.exprs.Expr']:
644
+ from pixeltable import exprs
645
+ update_targets: dict[Column, exprs.Expr] = {}
646
+ for col_name, val in value_spec.items():
647
+ if not isinstance(col_name, str):
648
+ raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
649
+ if col_name == self.ROWID_COLUMN_NAME:
650
+ # ignore pseudo-column _rowid
651
+ continue
652
+ col = self.tbl_version_path.get_column(col_name, include_bases=False)
653
+ if col is None:
654
+ # TODO: return more informative error if this is trying to update a base column
655
+ raise excs.Error(f'Column {col_name} unknown')
656
+ if col.is_computed:
657
+ raise excs.Error(f'Column {col_name} is computed and cannot be updated')
658
+ if col.is_pk and not allow_pk:
659
+ raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
660
+ if col.col_type.is_media_type():
661
+ raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
662
+
663
+ # make sure that the value is compatible with the column type
664
+ try:
665
+ # check if this is a literal
666
+ value_expr = exprs.Literal(val, col_type=col.col_type)
667
+ except TypeError:
668
+ if not allow_exprs:
669
+ raise excs.Error(
670
+ f'Column {col_name}: value {val!r} is not a valid literal for this column '
671
+ f'(expected {col.col_type})')
672
+ # it's not a literal, let's try to create an expr from it
673
+ value_expr = exprs.Expr.from_object(val)
674
+ if value_expr is None:
675
+ raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
676
+ if not col.col_type.matches(value_expr.col_type):
677
+ raise excs.Error((
678
+ f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
679
+ f'{col_name} ({col.col_type})'
680
+ ))
681
+ update_targets[col] = value_expr
682
+
683
+ return update_targets
684
+
685
+
686
+ def revert(self) -> None:
687
+ """Reverts the table to the previous version.
688
+
689
+ .. warning::
690
+ This operation is irreversible.
691
+ """
692
+ if self.tbl_version_path.is_snapshot():
693
+ raise excs.Error('Cannot revert a snapshot')
694
+ self._check_is_dropped()
695
+ self.tbl_version_path.tbl_version.revert()