pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show
  1. pixeltable/catalog/column.py +25 -48
  2. pixeltable/catalog/insertable_table.py +7 -4
  3. pixeltable/catalog/table.py +163 -57
  4. pixeltable/catalog/table_version.py +416 -140
  5. pixeltable/catalog/table_version_path.py +2 -2
  6. pixeltable/client.py +0 -4
  7. pixeltable/dataframe.py +65 -21
  8. pixeltable/env.py +16 -1
  9. pixeltable/exec/cache_prefetch_node.py +1 -1
  10. pixeltable/exec/in_memory_data_node.py +11 -7
  11. pixeltable/exprs/comparison.py +3 -3
  12. pixeltable/exprs/data_row.py +5 -1
  13. pixeltable/exprs/literal.py +16 -4
  14. pixeltable/exprs/row_builder.py +8 -40
  15. pixeltable/ext/__init__.py +5 -0
  16. pixeltable/ext/functions/yolox.py +92 -0
  17. pixeltable/func/aggregate_function.py +15 -15
  18. pixeltable/func/expr_template_function.py +9 -1
  19. pixeltable/func/globals.py +24 -14
  20. pixeltable/func/signature.py +18 -12
  21. pixeltable/func/udf.py +7 -2
  22. pixeltable/functions/__init__.py +8 -8
  23. pixeltable/functions/eval.py +7 -8
  24. pixeltable/functions/huggingface.py +47 -19
  25. pixeltable/functions/openai.py +2 -2
  26. pixeltable/functions/util.py +11 -0
  27. pixeltable/index/__init__.py +2 -0
  28. pixeltable/index/base.py +49 -0
  29. pixeltable/index/embedding_index.py +95 -0
  30. pixeltable/metadata/schema.py +45 -22
  31. pixeltable/plan.py +15 -34
  32. pixeltable/store.py +38 -41
  33. pixeltable/tests/conftest.py +5 -11
  34. pixeltable/tests/ext/test_yolox.py +21 -0
  35. pixeltable/tests/functions/test_fireworks.py +1 -0
  36. pixeltable/tests/functions/test_huggingface.py +2 -2
  37. pixeltable/tests/functions/test_openai.py +15 -5
  38. pixeltable/tests/functions/test_together.py +1 -0
  39. pixeltable/tests/test_component_view.py +14 -5
  40. pixeltable/tests/test_dataframe.py +19 -18
  41. pixeltable/tests/test_exprs.py +99 -102
  42. pixeltable/tests/test_function.py +51 -43
  43. pixeltable/tests/test_index.py +138 -0
  44. pixeltable/tests/test_migration.py +2 -1
  45. pixeltable/tests/test_snapshot.py +24 -1
  46. pixeltable/tests/test_table.py +101 -25
  47. pixeltable/tests/test_types.py +30 -0
  48. pixeltable/tests/test_video.py +16 -16
  49. pixeltable/tests/test_view.py +5 -0
  50. pixeltable/tests/utils.py +43 -9
  51. pixeltable/tool/create_test_db_dump.py +16 -0
  52. pixeltable/type_system.py +37 -45
  53. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
  54. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
  55. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
  56. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0
@@ -4,10 +4,8 @@ import logging
4
4
  from typing import Optional, Union, Callable, Set
5
5
 
6
6
  import sqlalchemy as sql
7
- from pgvector.sqlalchemy import Vector
8
7
 
9
8
  from pixeltable import exceptions as excs
10
- from pixeltable.metadata import schema
11
9
  from pixeltable.type_system import ColumnType, StringType
12
10
  from .globals import is_valid_identifier
13
11
 
@@ -20,44 +18,38 @@ class Column:
20
18
  table/view.
21
19
  """
22
20
  def __init__(
23
- self, name: str, col_type: Optional[ColumnType] = None,
21
+ self, name: Optional[str], col_type: Optional[ColumnType] = None,
24
22
  computed_with: Optional[Union['Expr', Callable]] = None,
25
- primary_key: bool = False, stored: Optional[bool] = None,
26
- indexed: bool = False,
27
- # these parameters aren't set by users
28
- col_id: Optional[int] = None):
23
+ is_pk: bool = False, stored: Optional[bool] = None,
24
+ col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
+ schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
26
+ ):
29
27
  """Column constructor.
30
28
 
31
29
  Args:
32
- name: column name
30
+ name: column name; None for system columns (eg, index columns)
33
31
  col_type: column type; can be None if the type can be derived from ``computed_with``
34
32
  computed_with: a callable or an Expr object that computes the column value
35
- primary_key: if True, this column is part of the primary key
33
+ is_pk: if True, this column is part of the primary key
36
34
  stored: determines whether a computed column is present in the stored table or recomputed on demand
37
- indexed: if True, this column has a nearest neighbor index (only valid for image columns)
38
35
  col_id: column ID (only used internally)
39
36
 
40
37
  Computed columns: those have a non-None ``computed_with`` argument
41
-
42
38
  - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
43
39
  col_type is None
44
40
  - when loaded from md store: ``computed_with`` is set and col_type is set
45
41
 
46
42
  ``computed_with`` is a Callable:
47
-
48
43
  - the callable's parameter names must correspond to existing columns in the table for which this Column
49
44
  is being used
50
45
  - ``col_type`` needs to be set to the callable's return type
51
46
 
52
47
  ``stored`` (only valid for computed image columns):
53
-
54
48
  - if True: the column is present in the stored table
55
49
  - if False: the column is not present in the stored table and recomputed during a query
56
50
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
57
-
58
- indexed: only valid for image columns; if true, maintains an NN index for this column
59
51
  """
60
- if not is_valid_identifier(name):
52
+ if name is not None and not is_valid_identifier(name):
61
53
  raise excs.Error(f"Invalid column name: '{name}'")
62
54
  self.name = name
63
55
  if col_type is None and computed_with is None:
@@ -90,35 +82,20 @@ class Column:
90
82
  self.stored = stored
91
83
  self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
92
84
  self.id = col_id
93
- self.primary_key = primary_key
85
+ self.is_pk = is_pk
86
+ self.schema_version_add = schema_version_add
87
+ self.schema_version_drop = schema_version_drop
94
88
 
95
89
  # column in the stored table for the values of this Column
96
90
  self.sa_col: Optional[sql.schema.Column] = None
91
+ self.sa_col_type = sa_col_type
97
92
 
98
93
  # computed cols also have storage columns for the exception string and type
99
94
  self.sa_errormsg_col: Optional[sql.schema.Column] = None
100
95
  self.sa_errortype_col: Optional[sql.schema.Column] = None
101
- # indexed columns also have a column for the embeddings
102
- self.sa_idx_col: Optional[sql.schema.Column] = None
103
96
  from .table_version import TableVersion
104
97
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
105
98
 
106
- if indexed and not self.col_type.is_image_type():
107
- raise excs.Error(f'Column {name}: indexed=True requires ImageType')
108
- self.is_indexed = indexed
109
-
110
- @classmethod
111
- def from_md(cls, col_id: int, md: schema.SchemaColumn, tbl: 'TableVersion') -> Column:
112
- """Construct a Column from metadata.
113
-
114
- Leaves out value_expr, because that requires TableVersion.cols to be complete.
115
- """
116
- col = cls(
117
- md.name, col_type=ColumnType.from_dict(md.col_type), primary_key=md.is_pk,
118
- stored=md.stored, indexed=md.is_indexed, col_id=col_id)
119
- col.tbl = tbl
120
- return col
121
-
122
99
  def __hash__(self) -> int:
123
100
  assert self.tbl is not None
124
101
  return hash((self.tbl.id, self.id))
@@ -167,26 +144,26 @@ class Column:
167
144
  """
168
145
  assert self.is_stored
169
146
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
170
- self.sa_col = sql.Column(self.storage_name(), self.col_type.to_sa_type(), nullable=True)
147
+ self.sa_col = sql.Column(
148
+ self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
149
+ nullable=True)
171
150
  if self.is_computed or self.col_type.is_media_type():
172
- self.sa_errormsg_col = sql.Column(self.errormsg_storage_name(), StringType().to_sa_type(), nullable=True)
173
- self.sa_errortype_col = sql.Column(self.errortype_storage_name(), StringType().to_sa_type(), nullable=True)
174
- if self.is_indexed:
175
- self.sa_idx_col = sql.Column(self.index_storage_name(), Vector(512), nullable=True)
151
+ self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), StringType().to_sa_type(), nullable=True)
152
+ self.sa_errortype_col = sql.Column(self.errortype_store_name(), StringType().to_sa_type(), nullable=True)
176
153
 
177
- def storage_name(self) -> str:
154
+ def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
155
+ return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
156
+
157
+ def store_name(self) -> str:
178
158
  assert self.id is not None
179
159
  assert self.is_stored
180
160
  return f'col_{self.id}'
181
161
 
182
- def errormsg_storage_name(self) -> str:
183
- return f'{self.storage_name()}_errormsg'
184
-
185
- def errortype_storage_name(self) -> str:
186
- return f'{self.storage_name()}_errortype'
162
+ def errormsg_store_name(self) -> str:
163
+ return f'{self.store_name()}_errormsg'
187
164
 
188
- def index_storage_name(self) -> str:
189
- return f'{self.storage_name()}_idx_0'
165
+ def errortype_store_name(self) -> str:
166
+ return f'{self.store_name()}_errortype'
190
167
 
191
168
  def __str__(self) -> str:
192
169
  return f'{self.name}: {self.col_type}'
@@ -11,14 +11,17 @@ import pixeltable.type_system as ts
11
11
  from pixeltable import exceptions as excs
12
12
  from pixeltable.env import Env
13
13
  from .catalog import Catalog
14
+ from .globals import UpdateStatus
14
15
  from .table import Table
15
16
  from .table_version import TableVersion
16
17
  from .table_version_path import TableVersionPath
17
18
 
18
19
  _logger = logging.getLogger('pixeltable')
19
20
 
21
+
20
22
  class InsertableTable(Table):
21
23
  """A `Table` that allows inserting and deleting rows."""
24
+
22
25
  def __init__(self, dir_id: UUID, tbl_version: TableVersion):
23
26
  tbl_version_path = TableVersionPath(tbl_version)
24
27
  super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
@@ -42,7 +45,7 @@ class InsertableTable(Table):
42
45
  col = columns[column_names.index(pk_col)]
43
46
  if col.col_type.nullable:
44
47
  raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
45
- col.primary_key = True
48
+ col.is_pk = True
46
49
 
47
50
  with orm.Session(Env.get().engine, future=True) as session:
48
51
  _, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
@@ -62,7 +65,7 @@ class InsertableTable(Table):
62
65
  @overload
63
66
  def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
64
67
 
65
- def insert(self, *args, **kwargs) -> Table.UpdateStatus:
68
+ def insert(self, *args, **kwargs) -> UpdateStatus:
66
69
  """Insert rows into table.
67
70
 
68
71
  To insert multiple rows at a time:
@@ -161,7 +164,7 @@ class InsertableTable(Table):
161
164
  msg = str(e)
162
165
  raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
163
166
 
164
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> Table.UpdateStatus:
167
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
165
168
  """Delete rows in this table.
166
169
 
167
170
  Args:
@@ -181,7 +184,7 @@ class InsertableTable(Table):
181
184
  if where is not None:
182
185
  if not isinstance(where, Predicate):
183
186
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
184
- analysis_info = Planner.analyze(self.tbl_version, where)
187
+ analysis_info = Planner.analyze(self.tbl_version_path, where)
185
188
  if analysis_info.similarity_clause is not None:
186
189
  raise excs.Error('nearest() cannot be used with delete()')
187
190
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
@@ -1,10 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- import dataclasses
4
3
  import json
5
4
  import logging
6
5
  from pathlib import Path
7
- from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple
6
+ from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable
8
7
  from uuid import UUID
9
8
 
10
9
  import pandas as pd
@@ -18,7 +17,7 @@ import pixeltable.exprs as exprs
18
17
  import pixeltable.metadata.schema as schema
19
18
  import pixeltable.type_system as ts
20
19
  from .column import Column
21
- from .globals import is_valid_identifier, is_system_column_name
20
+ from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
22
21
  from .schema_object import SchemaObject
23
22
  from .table_version import TableVersion
24
23
  from .table_version_path import TableVersionPath
@@ -28,14 +27,7 @@ _logger = logging.getLogger('pixeltable')
28
27
  class Table(SchemaObject):
29
28
  """Base class for all tabular SchemaObjects."""
30
29
 
31
- @dataclasses.dataclass
32
- class UpdateStatus:
33
- num_rows: int = 0
34
- # TODO: change to num_computed_columns (the number of computed slots isn't really meaningful to the user)
35
- num_computed_values: int = 0
36
- num_excs: int = 0
37
- updated_cols: List[str] = dataclasses.field(default_factory=list)
38
- cols_with_excs: List[str] = dataclasses.field(default_factory=list)
30
+ ROWID_COLUMN_NAME = '_rowid'
39
31
 
40
32
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
41
33
  super().__init__(id, name, dir_id)
@@ -225,7 +217,7 @@ class Table(SchemaObject):
225
217
  value: column type or value expression or column specification dictionary:
226
218
  column type: a Pixeltable column type (if the table already contains rows, it must be nullable)
227
219
  value expression: a Pixeltable expression that computes the column values
228
- column specification: a dictionary with possible keys 'type', 'value', 'stored', 'indexed'
220
+ column specification: a dictionary with possible keys 'type', 'value', 'stored'
229
221
  Examples:
230
222
  Add an int column with ``None`` values:
231
223
 
@@ -247,11 +239,6 @@ class Table(SchemaObject):
247
239
  Do the same, but now the column is stored:
248
240
 
249
241
  >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
250
-
251
- Add a resized version of the ``frame`` column and index it. The column does not need to be stored in order
252
- to be indexed:
253
-
254
- >>> tbl['small_frame'] = {'value': tbl.frame.resize([224, 224]), 'indexed': True}
255
242
  """
256
243
  if not isinstance(column_name, str):
257
244
  raise excs.Error(f'Column name must be a string, got {type(column_name)}')
@@ -264,8 +251,8 @@ class Table(SchemaObject):
264
251
 
265
252
  def add_column(
266
253
  self, *,
267
- type: Optional[ts.ColumnType] = None, stored: Optional[bool] = None, indexed: Optional[bool] = None,
268
- print_stats: bool = False, **kwargs: Any
254
+ type: Optional[ts.ColumnType] = None, stored: Optional[bool] = None, print_stats: bool = False,
255
+ **kwargs: Any
269
256
  ) -> UpdateStatus:
270
257
  """Adds a column to the table.
271
258
 
@@ -273,7 +260,6 @@ class Table(SchemaObject):
273
260
  kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
274
261
  type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
275
262
  stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
276
- indexed: Whether the column is indexed.
277
263
  print_stats: If ``True``, print execution metrics.
278
264
 
279
265
  Returns:
@@ -318,15 +304,6 @@ class Table(SchemaObject):
318
304
  Alternatively, this can also be expressed as:
319
305
 
320
306
  >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
321
-
322
- Add a resized version of the ``frame`` column and index it. The column does not need to be stored in order
323
- to be indexed:
324
-
325
- >>> tbl.add_column(small_frame=tbl.frame.resize([224, 224]), indexed=True)
326
-
327
- Alternatively, this can also be expressed as:
328
-
329
- >>> tbl['small_frame'] = {'value': tbl.frame.resize([224, 224]), 'indexed': True}
330
307
  """
331
308
  self._check_is_dropped()
332
309
  # verify kwargs and construct column schema dict
@@ -349,8 +326,6 @@ class Table(SchemaObject):
349
326
  col_schema['type'] = type
350
327
  if stored is not None:
351
328
  col_schema['stored'] = stored
352
- if indexed is not None:
353
- col_schema['indexed'] = indexed
354
329
 
355
330
  new_col = self._create_columns({col_name: col_schema})[0]
356
331
  self._verify_column(new_col, self.column_names())
@@ -364,7 +339,7 @@ class Table(SchemaObject):
364
339
  (on account of containing Python Callables or Exprs).
365
340
  """
366
341
  assert isinstance(spec, dict)
367
- valid_keys = {'type', 'value', 'stored', 'indexed'}
342
+ valid_keys = {'type', 'value', 'stored'}
368
343
  has_type = False
369
344
  for k in spec.keys():
370
345
  if k not in valid_keys:
@@ -393,8 +368,6 @@ class Table(SchemaObject):
393
368
 
394
369
  if 'stored' in spec and not isinstance(spec['stored'], bool):
395
370
  raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
396
- if 'indexed' in spec and not isinstance(spec['indexed'], bool):
397
- raise excs.Error(f'Column {name}: "indexed" must be a bool, got {spec["indexed"]}')
398
371
  if not has_type:
399
372
  raise excs.Error(f'Column {name}: "type" is required')
400
373
 
@@ -406,7 +379,6 @@ class Table(SchemaObject):
406
379
  col_type: Optional[ts.ColumnType] = None
407
380
  value_expr: Optional[exprs.Expr] = None
408
381
  stored: Optional[bool] = None
409
- indexed: Optional[bool] = None
410
382
  primary_key: Optional[bool] = None
411
383
 
412
384
  if isinstance(spec, ts.ColumnType):
@@ -428,12 +400,10 @@ class Table(SchemaObject):
428
400
  # create copy so we can modify it
429
401
  value_expr = value_expr.copy()
430
402
  stored = spec.get('stored')
431
- indexed = spec.get('indexed')
432
403
  primary_key = spec.get('primary_key')
433
404
 
434
405
  column = Column(
435
- name, col_type=col_type, computed_with=value_expr, stored=stored, indexed=indexed,
436
- primary_key=primary_key)
406
+ name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key)
437
407
  columns.append(column)
438
408
  return columns
439
409
 
@@ -498,9 +468,85 @@ class Table(SchemaObject):
498
468
  self._check_is_dropped()
499
469
  self.tbl_version_path.tbl_version.rename_column(old_name, new_name)
500
470
 
471
+ def add_embedding_index(
472
+ self, col_name: str, *, idx_name: Optional[str] = None,
473
+ text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None
474
+ ) -> None:
475
+ """Add an index to the table.
476
+ Args:
477
+ col_name: name of column to index
478
+ idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
479
+ idx_type: type of index (one of 'embedding')
480
+
481
+ Raises:
482
+ Error: If an index with that name already exists for the table or if the column does not exist.
483
+
484
+ Examples:
485
+ Add an index to the ``img`` column:
486
+
487
+ >>> tbl.add_embedding_index('img', text_embed=...)
488
+
489
+ Add another index to the ``img`` column, with a specific name:
490
+
491
+ >>> tbl.add_embedding_index('img', idx_name='clip_idx', text_embed=...)
492
+ """
493
+ if self.tbl_version_path.is_snapshot():
494
+ raise excs.Error('Cannot add an index to a snapshot')
495
+ self._check_is_dropped()
496
+ col = self.tbl_version_path.get_column(col_name, include_bases=True)
497
+ if col is None:
498
+ raise excs.Error(f'Column {col_name} unknown')
499
+ if idx_name is not None and idx_name in self.tbl_version_path.tbl_version.idxs_by_name:
500
+ raise excs.Error(f'Duplicate index name: {idx_name}')
501
+ from pixeltable.index import EmbeddingIndex
502
+ # create the EmbeddingIndex instance to verify args
503
+ idx = EmbeddingIndex(col, text_embed=text_embed, img_embed=img_embed)
504
+ status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
505
+ # TODO: how to deal with exceptions here? drop the index and raise?
506
+
507
+ def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
508
+ """Drop an index from the table.
509
+
510
+ Args:
511
+ column_name: The name of the column whose index to drop. Invalid if the column has multiple indices.
512
+ idx_name: The name of the index to drop.
513
+
514
+ Raises:
515
+ Error: If the index does not exist.
516
+
517
+ Examples:
518
+ Drop index on the ``img`` column:
519
+
520
+ >>> tbl.drop_index(column_name='img')
521
+ """
522
+ if self.tbl_version_path.is_snapshot():
523
+ raise excs.Error('Cannot drop an index from a snapshot')
524
+ self._check_is_dropped()
525
+ if (column_name is None) == (idx_name is None):
526
+ raise excs.Error('Exactly one of column_name or idx_name must be provided')
527
+ tbl_version = self.tbl_version_path.tbl_version
528
+
529
+ if idx_name is not None:
530
+ if idx_name not in tbl_version.idxs_by_name:
531
+ raise excs.Error(f'Index {idx_name} does not exist')
532
+ idx_id = tbl_version.idxs_by_name[idx_name].id
533
+ else:
534
+ col = self.tbl_version_path.get_column(column_name, include_bases=True)
535
+ if col is None:
536
+ raise excs.Error(f'Column {column_name} unknown')
537
+ if col.tbl.id != tbl_version.id:
538
+ raise excs.Error(
539
+ f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
540
+ idx_ids = [info.id for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
541
+ if len(idx_ids) == 0:
542
+ raise excs.Error(f'Column {column_name} does not have an index')
543
+ if len(idx_ids) > 1:
544
+ raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
545
+ idx_id = idx_ids[0]
546
+ self.tbl_version_path.tbl_version.drop_index(idx_id)
547
+
501
548
  def update(
502
- self, value_spec: Dict[str, Union['pixeltable.exprs.Expr', Any]],
503
- where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
549
+ self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
504
550
  ) -> UpdateStatus:
505
551
  """Update rows in this table.
506
552
 
@@ -510,11 +556,11 @@ class Table(SchemaObject):
510
556
  cascade: if True, also update all computed columns that transitively depend on the updated columns.
511
557
 
512
558
  Examples:
513
- Set newly-added column `int_col` to 1 for all rows:
559
+ Set column `int_col` to 1 for all rows:
514
560
 
515
561
  >>> tbl.update({'int_col': 1})
516
562
 
517
- Set newly-added column `int_col` to 1 for all rows where `int_col` is 0:
563
+ Set column `int_col` to 1 for all rows where `int_col` is 0:
518
564
 
519
565
  >>> tbl.update({'int_col': 1}, where=tbl.int_col == 0)
520
566
 
@@ -526,27 +572,95 @@ class Table(SchemaObject):
526
572
 
527
573
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
528
574
  """
575
+ if self.tbl_version_path.is_snapshot():
576
+ raise excs.Error('Cannot update a snapshot')
577
+ self._check_is_dropped()
578
+
579
+ update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
580
+ from pixeltable.plan import Planner
581
+ if where is not None:
582
+ if not isinstance(where, exprs.Predicate):
583
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
584
+ analysis_info = Planner.analyze(self.tbl_version_path, where)
585
+ if analysis_info.similarity_clause is not None:
586
+ raise excs.Error('nearest() cannot be used with update()')
587
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
588
+ if analysis_info.filter is not None:
589
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
590
+
591
+ return self.tbl_version_path.tbl_version.update(update_spec, where, cascade)
592
+
593
+ def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
594
+ """Update rows in this table.
595
+
596
+ Args:
597
+ rows: an Iterable of dictionaries containing values for the updated columns plus values for the primary key
598
+ columns.
599
+ cascade: if True, also update all computed columns that transitively depend on the updated columns.
600
+
601
+ Examples:
602
+ Update the 'name' and 'age' columns for the rows with ids 1 and 2 (assuming 'id' is the primary key):
603
+
604
+ >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
605
+ """
606
+ if self.tbl_version_path.is_snapshot():
607
+ raise excs.Error('Cannot update a snapshot')
608
+ self._check_is_dropped()
609
+
610
+ row_updates: List[Dict[Column, exprs.Expr]] = []
611
+ pk_col_names = set(c.name for c in self.tbl_version_path.tbl_version.primary_key_columns())
612
+
613
+ # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
614
+ has_rowid = self.ROWID_COLUMN_NAME in rows[0]
615
+ rowids: list[Tuple[int, ...]] = []
616
+ if len(pk_col_names) == 0 and not has_rowid:
617
+ raise excs.Error('Table must have primary key for batch update')
618
+
619
+ for row_spec in rows:
620
+ col_vals = self._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
621
+ if has_rowid:
622
+ # we expect the _rowid column to be present for each row
623
+ assert self.ROWID_COLUMN_NAME in row_spec
624
+ rowids.append(row_spec[self.ROWID_COLUMN_NAME])
625
+ else:
626
+ col_names = set(col.name for col in col_vals.keys())
627
+ if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
628
+ missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
629
+ raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
630
+ row_updates.append(col_vals)
631
+ return self.tbl_version_path.tbl_version.batch_update(row_updates, rowids, cascade)
632
+
633
+ def _validate_update_spec(
634
+ self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
635
+ ) -> dict[Column, 'pixeltable.exprs.Expr']:
529
636
  from pixeltable import exprs
530
- update_targets: List[Tuple[Column, exprs.Expr]] = []
637
+ update_targets: dict[Column, exprs.Expr] = {}
531
638
  for col_name, val in value_spec.items():
532
639
  if not isinstance(col_name, str):
533
640
  raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
641
+ if col_name == self.ROWID_COLUMN_NAME:
642
+ # ignore pseudo-column _rowid
643
+ continue
534
644
  col = self.tbl_version_path.get_column(col_name, include_bases=False)
535
645
  if col is None:
536
646
  # TODO: return more informative error if this is trying to update a base column
537
647
  raise excs.Error(f'Column {col_name} unknown')
538
648
  if col.is_computed:
539
649
  raise excs.Error(f'Column {col_name} is computed and cannot be updated')
540
- if col.primary_key:
650
+ if col.is_pk and not allow_pk:
541
651
  raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
542
652
  if col.col_type.is_media_type():
543
653
  raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
544
654
 
545
655
  # make sure that the value is compatible with the column type
546
- # check if this is a literal
547
656
  try:
657
+ # check if this is a literal
548
658
  value_expr = exprs.Literal(val, col_type=col.col_type)
549
659
  except TypeError:
660
+ if not allow_exprs:
661
+ raise excs.Error(
662
+ f'Column {col_name}: value {val!r} is not a valid literal for this column '
663
+ f'(expected {col.col_type})')
550
664
  # it's not a literal, let's try to create an expr from it
551
665
  value_expr = exprs.Expr.from_object(val)
552
666
  if value_expr is None:
@@ -556,20 +670,10 @@ class Table(SchemaObject):
556
670
  f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
557
671
  f'{col_name} ({col.col_type})'
558
672
  ))
559
- update_targets.append((col, value_expr))
673
+ update_targets[col] = value_expr
560
674
 
561
- from pixeltable.plan import Planner
562
- if where is not None:
563
- if not isinstance(where, exprs.Predicate):
564
- raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
565
- analysis_info = Planner.analyze(self.tbl_version_path, where)
566
- if analysis_info.similarity_clause is not None:
567
- raise excs.Error('nearest() cannot be used with update()')
568
- # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
569
- if analysis_info.filter is not None:
570
- raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
675
+ return update_targets
571
676
 
572
- return self.tbl_version_path.tbl_version.update(update_targets, where, cascade)
573
677
 
574
678
  def revert(self) -> None:
575
679
  """Reverts the table to the previous version.
@@ -577,5 +681,7 @@ class Table(SchemaObject):
577
681
  .. warning::
578
682
  This operation is irreversible.
579
683
  """
684
+ if self.tbl_version_path.is_snapshot():
685
+ raise excs.Error('Cannot revert a snapshot')
580
686
  self._check_is_dropped()
581
687
  self.tbl_version_path.tbl_version.revert()