pixeltable 0.2.15__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/column.py +3 -0
  3. pixeltable/catalog/dir.py +1 -1
  4. pixeltable/catalog/globals.py +15 -6
  5. pixeltable/catalog/insertable_table.py +23 -8
  6. pixeltable/catalog/named_function.py +1 -1
  7. pixeltable/catalog/path_dict.py +4 -4
  8. pixeltable/catalog/schema_object.py +30 -18
  9. pixeltable/catalog/table.py +84 -99
  10. pixeltable/catalog/table_version.py +35 -24
  11. pixeltable/catalog/table_version_path.py +2 -2
  12. pixeltable/catalog/view.py +15 -8
  13. pixeltable/dataframe.py +56 -56
  14. pixeltable/env.py +6 -5
  15. pixeltable/exec/__init__.py +3 -3
  16. pixeltable/exec/aggregation_node.py +3 -3
  17. pixeltable/exec/expr_eval_node.py +3 -3
  18. pixeltable/exec/in_memory_data_node.py +4 -4
  19. pixeltable/exec/sql_node.py +4 -1
  20. pixeltable/exprs/array_slice.py +3 -4
  21. pixeltable/exprs/column_ref.py +20 -4
  22. pixeltable/exprs/comparison.py +11 -6
  23. pixeltable/exprs/data_row.py +3 -0
  24. pixeltable/exprs/expr.py +51 -23
  25. pixeltable/exprs/function_call.py +8 -1
  26. pixeltable/exprs/inline_array.py +2 -2
  27. pixeltable/exprs/json_path.py +36 -20
  28. pixeltable/exprs/row_builder.py +4 -4
  29. pixeltable/exprs/rowid_ref.py +1 -1
  30. pixeltable/functions/__init__.py +1 -2
  31. pixeltable/functions/audio.py +32 -0
  32. pixeltable/functions/huggingface.py +4 -4
  33. pixeltable/functions/image.py +1 -1
  34. pixeltable/functions/video.py +5 -1
  35. pixeltable/functions/vision.py +2 -6
  36. pixeltable/globals.py +57 -28
  37. pixeltable/io/external_store.py +4 -4
  38. pixeltable/io/globals.py +12 -13
  39. pixeltable/io/label_studio.py +6 -6
  40. pixeltable/io/pandas.py +27 -12
  41. pixeltable/io/parquet.py +14 -14
  42. pixeltable/iterators/document.py +7 -7
  43. pixeltable/plan.py +58 -29
  44. pixeltable/store.py +32 -31
  45. pixeltable/tool/create_test_db_dump.py +12 -6
  46. pixeltable/type_system.py +89 -97
  47. pixeltable/utils/pytorch.py +12 -10
  48. {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
  49. {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/RECORD +52 -51
  50. {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
  51. {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
  52. {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
pixeltable/plan.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import Tuple, Optional, List, Set, Any, Dict
1
+ from typing import Any, Iterable, List, Optional, Sequence, Set, Tuple
2
2
  from uuid import UUID
3
3
 
4
4
  import sqlalchemy as sql
5
5
 
6
+ import pixeltable as pxt
6
7
  import pixeltable.exec as exec
7
8
  import pixeltable.func as func
8
9
  from pixeltable import catalog
@@ -39,7 +40,7 @@ class Analyzer:
39
40
  """Class to perform semantic analysis of a query and to store the analysis state"""
40
41
 
41
42
  def __init__(
42
- self, tbl: catalog.TableVersionPath, select_list: List[exprs.Expr],
43
+ self, tbl: catalog.TableVersionPath, select_list: Sequence[exprs.Expr],
43
44
  where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
44
45
  order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None):
45
46
  if group_by_clause is None:
@@ -68,7 +69,7 @@ class Analyzer:
68
69
  # all exprs that are evaluated in Python; not executable
69
70
  self.all_exprs = self.select_list.copy()
70
71
  self.all_exprs.extend(self.group_by_clause)
71
- self.all_exprs.extend([e for e, _ in self.order_by_clause])
72
+ self.all_exprs.extend(e for e, _ in self.order_by_clause)
72
73
  if self.filter is not None:
73
74
  self.all_exprs.append(self.filter)
74
75
  self.sql_exprs = list(exprs.Expr.list_subexprs(
@@ -84,7 +85,7 @@ class Analyzer:
84
85
 
85
86
  def _analyze_agg(self) -> None:
86
87
  """Check semantic correctness of aggregation and fill in agg-specific fields of Analyzer"""
87
- self.agg_fn_calls = [e for e in self.all_exprs if _is_agg_fn_call(e)]
88
+ self.agg_fn_calls = [e for e in self.all_exprs if isinstance(e, exprs.FunctionCall) and _is_agg_fn_call(e)]
88
89
  if len(self.agg_fn_calls) == 0:
89
90
  # nothing to do
90
91
  return
@@ -98,7 +99,7 @@ class Analyzer:
98
99
 
99
100
  # check that filter doesn't contain aggregates
100
101
  if self.filter is not None:
101
- agg_fn_calls = [e for e in self.filter.subexprs(filter=lambda e: _is_agg_fn_call(e))]
102
+ agg_fn_calls = [e for e in self.filter.subexprs(expr_class=exprs.FunctionCall, filter=lambda e: _is_agg_fn_call(e))]
102
103
  if len(agg_fn_calls) > 0:
103
104
  raise excs.Error(f'Filter cannot contain aggregate functions: {self.filter}')
104
105
 
@@ -111,7 +112,7 @@ class Analyzer:
111
112
  raise excs.Error(f'Grouping expression contains aggregate function: {e}')
112
113
 
113
114
  # check that agg fn calls don't have contradicting ordering requirements
114
- order_by: List[exprs.Exprs] = []
115
+ order_by: list[exprs.Expr] = []
115
116
  order_by_origin: Optional[exprs.Expr] = None # the expr that determines the ordering
116
117
  for agg_fn_call in self.agg_fn_calls:
117
118
  fn_call_order_by = agg_fn_call.get_agg_order_by()
@@ -185,7 +186,7 @@ class Planner:
185
186
  def create_count_stmt(
186
187
  cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
187
188
  ) -> sql.Select:
188
- stmt = sql.select(sql.func.count('*'))
189
+ stmt = sql.select(sql.func.count())
189
190
  refd_tbl_ids: Set[UUID] = set()
190
191
  if where_clause is not None:
191
192
  analyzer = cls.analyze(tbl, where_clause)
@@ -200,7 +201,7 @@ class Planner:
200
201
 
201
202
  @classmethod
202
203
  def create_insert_plan(
203
- cls, tbl: catalog.TableVersion, rows: List[Dict[str, Any]], ignore_errors: bool
204
+ cls, tbl: catalog.TableVersion, rows: list[dict[str, Any]], ignore_errors: bool
204
205
  ) -> exec.ExecNode:
205
206
  """Creates a plan for TableVersion.insert()"""
206
207
  assert not tbl.is_view()
@@ -214,12 +215,12 @@ class Planner:
214
215
  stored_col_info = row_builder.output_slot_idxs()
215
216
  stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
216
217
  input_col_info = [info for info in stored_col_info if not info.col.is_computed]
217
- plan = exec.InMemoryDataNode(tbl, rows, row_builder, tbl.next_rowid)
218
+ plan: exec.ExecNode = exec.InMemoryDataNode(tbl, rows, row_builder, tbl.next_rowid)
218
219
 
219
220
  media_input_cols = [info for info in input_col_info if info.col.col_type.is_media_type()]
220
221
  if len(media_input_cols) > 0:
221
222
  # prefetch external files for all input column refs for validation
222
- plan = exec.CachePrefetchNode(tbl.id, media_input_cols, plan)
223
+ plan = exec.CachePrefetchNode(tbl.id, media_input_cols, input=plan)
223
224
  plan = exec.MediaValidationNode(row_builder, media_input_cols, input=plan)
224
225
 
225
226
  computed_exprs = [e for e in row_builder.default_eval_ctx.target_exprs if not isinstance(e, exprs.ColumnRef)]
@@ -234,6 +235,34 @@ class Planner:
234
235
  ignore_errors=ignore_errors))
235
236
  return plan
236
237
 
238
+ @classmethod
239
+ def create_df_insert_plan(
240
+ cls,
241
+ tbl: catalog.TableVersion,
242
+ df: 'pxt.DataFrame',
243
+ ignore_errors: bool
244
+ ) -> exec.ExecNode:
245
+ assert not tbl.is_view()
246
+ plan = df._create_query_plan() # ExecNode constructed by the DataFrame
247
+
248
+ # Modify the plan RowBuilder to register the output columns
249
+ for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
250
+ assert col_name in tbl.cols_by_name
251
+ col = tbl.cols_by_name[col_name]
252
+ plan.row_builder.add_table_column(col, expr.slot_idx)
253
+
254
+ stored_col_info = plan.row_builder.output_slot_idxs()
255
+ stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
256
+ plan.set_stored_img_cols(stored_img_col_info)
257
+
258
+ plan.set_ctx(
259
+ exec.ExecContext(
260
+ plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0,
261
+ ignore_errors=ignore_errors))
262
+ plan.ctx.num_rows = 0 # Unknown
263
+
264
+ return plan
265
+
237
266
  @classmethod
238
267
  def create_update_plan(
239
268
  cls, tbl: catalog.TableVersionPath,
@@ -258,7 +287,7 @@ class Planner:
258
287
  target = tbl.tbl_version # the one we need to update
259
288
  updated_cols = list(update_targets.keys())
260
289
  if len(recompute_targets) > 0:
261
- recomputed_cols = recompute_targets.copy()
290
+ recomputed_cols = set(recompute_targets)
262
291
  else:
263
292
  recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
264
293
  # regardless of cascade, we need to update all indices on any updated column
@@ -270,13 +299,13 @@ class Planner:
270
299
  copied_cols = [
271
300
  col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
272
301
  ]
273
- select_list = [exprs.ColumnRef(col) for col in copied_cols]
302
+ select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
274
303
  select_list.extend(update_targets.values())
275
304
 
276
305
  recomputed_exprs = \
277
306
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
278
307
  # recomputed cols reference the new values of the updated cols
279
- spec = {exprs.ColumnRef(col): e for col, e in update_targets.items()}
308
+ spec: dict[exprs.Expr, exprs.Expr] = {exprs.ColumnRef(col): e for col, e in update_targets.items()}
280
309
  exprs.Expr.list_substitute(recomputed_exprs, spec)
281
310
  select_list.extend(recomputed_exprs)
282
311
 
@@ -284,16 +313,17 @@ class Planner:
284
313
  plan = cls.create_query_plan(tbl, select_list, where_clause=where_clause, with_pk=True, ignore_errors=True)
285
314
  all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols) # same order as select_list
286
315
  # update row builder with column information
287
- [plan.row_builder.add_table_column(col, select_list[i].slot_idx) for i, col in enumerate(all_base_cols)]
316
+ for i, col in enumerate(all_base_cols):
317
+ plan.row_builder.add_table_column(col, select_list[i].slot_idx)
288
318
  recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
289
319
  return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
290
320
 
291
321
  @classmethod
292
322
  def create_batch_update_plan(
293
- cls, tbl: catalog.TableVersionPath,
294
- batch: list[dict[catalog.Column, exprs.Expr]], rowids: list[tuple[int, ...]],
295
- cascade: bool
296
- ) -> Tuple[exec.ExecNode, exec.RowUpdateNode, sql.ClauseElement, List[catalog.Column], List[catalog.Column]]:
323
+ cls, tbl: catalog.TableVersionPath,
324
+ batch: list[dict[catalog.Column, exprs.Expr]], rowids: list[tuple[int, ...]],
325
+ cascade: bool
326
+ ) -> tuple[exec.ExecNode, exec.RowUpdateNode, sql.ColumnElement[bool], list[catalog.Column], list[catalog.Column]]:
297
327
  """
298
328
  Returns:
299
329
  - root node of the plan to produce the updated rows
@@ -327,7 +357,7 @@ class Planner:
327
357
  col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
328
358
  ]
329
359
  select_list = [exprs.ColumnRef(col) for col in copied_cols]
330
- select_list.extend([exprs.ColumnRef(col) for col in updated_cols])
360
+ select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
331
361
 
332
362
  recomputed_exprs = \
333
363
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
@@ -341,10 +371,11 @@ class Planner:
341
371
  analyzer = Analyzer(tbl, select_list)
342
372
  row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
343
373
  analyzer.finalize(row_builder)
344
- plan = exec.SqlLookupNode(tbl, row_builder, analyzer.sql_exprs, sa_key_cols, key_vals)
345
- delete_where_clause = plan.where_clause
374
+ sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, analyzer.sql_exprs, sa_key_cols, key_vals)
375
+ delete_where_clause = sql_lookup_node.where_clause
346
376
  col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
347
- plan = row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, plan)
377
+ row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
378
+ plan: exec.ExecNode = row_update_node
348
379
  if not cls._is_contained_in(analyzer.select_list, analyzer.sql_exprs):
349
380
  # we need an ExprEvalNode to evaluate the remaining output exprs
350
381
  plan = exec.ExprEvalNode(row_builder, analyzer.select_list, analyzer.sql_exprs, input=plan)
@@ -388,7 +419,7 @@ class Planner:
388
419
  # retrieve all stored cols and all target exprs
389
420
  recomputed_cols = set(recompute_targets.copy())
390
421
  copied_cols = [col for col in target.cols if col.is_stored and not col in recomputed_cols]
391
- select_list = [exprs.ColumnRef(col) for col in copied_cols]
422
+ select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
392
423
  # resolve recomputed exprs to stored columns in the base
393
424
  recomputed_exprs = \
394
425
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_cols) for c in recomputed_cols]
@@ -398,10 +429,8 @@ class Planner:
398
429
  plan = cls.create_query_plan(
399
430
  view, select_list, where_clause=target.predicate, with_pk=True, ignore_errors=True,
400
431
  exact_version_only=view.get_bases())
401
- [
432
+ for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
402
433
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
403
- for i, col in enumerate(copied_cols + list(recomputed_cols)) # same order as select_list
404
- ]
405
434
  # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
406
435
  stored_img_col_info = \
407
436
  [info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
@@ -532,7 +561,7 @@ class Planner:
532
561
  return False
533
562
  tbl = e.col.tbl
534
563
  return tbl.is_component_view() and tbl.is_iterator_column(e.col) and not e.col.is_stored
535
- unstored_iter_col_refs = list(exprs.Expr.list_subexprs(analyzer.all_exprs, filter=refs_unstored_iter_col))
564
+ unstored_iter_col_refs = list(exprs.Expr.list_subexprs(analyzer.all_exprs, expr_class=exprs.ColumnRef, filter=refs_unstored_iter_col))
536
565
  if len(unstored_iter_col_refs) > 0 and len(order_by_items) == 0:
537
566
  # we don't already have a user-requested ordering and we access unstored iterator columns:
538
567
  # order by the primary key of the component view, which minimizes the number of iterator instantiations
@@ -554,9 +583,9 @@ class Planner:
554
583
  return order_by_items
555
584
 
556
585
  @classmethod
557
- def _is_contained_in(cls, l1: List[exprs.Expr], l2: List[exprs.Expr]) -> bool:
586
+ def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
558
587
  """Returns True if l1 is contained in l2"""
559
- s1, s2 = set([e.id for e in l1]), set([e.id for e in l2])
588
+ s1, s2 = set(e.id for e in l1), set(e.id for e in l2)
560
589
  return s1 <= s2
561
590
 
562
591
  @classmethod
pixeltable/store.py CHANGED
@@ -7,7 +7,7 @@ import sys
7
7
  import urllib.parse
8
8
  import urllib.request
9
9
  import warnings
10
- from typing import Optional, Dict, Any, List, Tuple, Set
10
+ from typing import Optional, Dict, Any, List, Tuple, Set, Union
11
11
 
12
12
  import sqlalchemy as sql
13
13
  from tqdm import tqdm, TqdmWarning
@@ -15,10 +15,8 @@ from tqdm import tqdm, TqdmWarning
15
15
  import pixeltable.catalog as catalog
16
16
  import pixeltable.env as env
17
17
  from pixeltable import exprs
18
- import pixeltable.exceptions as excs
19
18
  from pixeltable.exec import ExecNode
20
19
  from pixeltable.metadata import schema
21
- from pixeltable.type_system import StringType
22
20
  from pixeltable.utils.media_store import MediaStore
23
21
  from pixeltable.utils.sql import log_stmt, log_explain
24
22
 
@@ -34,10 +32,15 @@ class StoreBase:
34
32
  - v_max: version at which the row was deleted (or MAX_VERSION if it's still live)
35
33
  """
36
34
 
35
+ __INSERT_BATCH_SIZE = 1000
36
+
37
37
  def __init__(self, tbl_version: catalog.TableVersion):
38
38
  self.tbl_version = tbl_version
39
39
  self.sa_md = sql.MetaData()
40
40
  self.sa_tbl: Optional[sql.Table] = None
41
+ # We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
42
+ # since it's referenced by various methods of `StoreBase`
43
+ self.base = None if tbl_version.base is None else tbl_version.base.store_tbl
41
44
  self.create_sa_tbl()
42
45
 
43
46
  def pk_columns(self) -> List[sql.Column]:
@@ -49,7 +52,6 @@ class StoreBase:
49
52
  @abc.abstractmethod
50
53
  def _create_rowid_columns(self) -> List[sql.Column]:
51
54
  """Create and return rowid columns"""
52
- pass
53
55
 
54
56
  @abc.abstractmethod
55
57
  def _create_system_columns(self) -> List[sql.Column]:
@@ -61,7 +63,6 @@ class StoreBase:
61
63
  self._pk_columns = [*rowid_cols, self.v_min_col]
62
64
  return [*rowid_cols, self.v_min_col, self.v_max_col]
63
65
 
64
-
65
66
  def create_sa_tbl(self) -> None:
66
67
  """Create self.sa_tbl from self.tbl_version."""
67
68
  system_cols = self._create_system_columns()
@@ -96,14 +97,12 @@ class StoreBase:
96
97
  self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
97
98
 
98
99
  @abc.abstractmethod
99
- def _rowid_join_predicate(self) -> sql.ClauseElement:
100
+ def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
100
101
  """Return predicate for rowid joins to all bases"""
101
- pass
102
102
 
103
103
  @abc.abstractmethod
104
104
  def _storage_name(self) -> str:
105
105
  """Return the name of the data store table"""
106
- pass
107
106
 
108
107
  def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column, v_min: int) -> str:
109
108
  """Move tmp media file with given url to Env.media_dir and return new url, or given url if not a tmp_dir file"""
@@ -158,10 +157,12 @@ class StoreBase:
158
157
 
159
158
  def count(self, conn: Optional[sql.engine.Connection] = None) -> int:
160
159
  """Return the number of rows visible in self.tbl_version"""
161
- stmt = sql.select(sql.func.count('*'))\
162
- .select_from(self.sa_tbl)\
163
- .where(self.v_min_col <= self.tbl_version.version)\
160
+ stmt = (
161
+ sql.select(sql.func.count('*')) # type: ignore
162
+ .select_from(self.sa_tbl)
163
+ .where(self.v_min_col <= self.tbl_version.version)
164
164
  .where(self.v_max_col > self.tbl_version.version)
165
+ )
165
166
  if conn is None:
166
167
  with env.Env.get().engine.connect() as conn:
167
168
  result = conn.execute(stmt).scalar_one()
@@ -191,12 +192,12 @@ class StoreBase:
191
192
  added_storage_cols = [col.store_name()]
192
193
  if col.records_errors:
193
194
  # we also need to create the errormsg and errortype storage cols
194
- stmt = (f'ALTER TABLE {self._storage_name()} '
195
- f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
196
- conn.execute(sql.text(stmt))
197
- stmt = (f'ALTER TABLE {self._storage_name()} '
198
- f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
199
- conn.execute(sql.text(stmt))
195
+ stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
196
+ f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
197
+ conn.execute(stmt)
198
+ stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
199
+ f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
200
+ conn.execute(stmt)
200
201
  added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
201
202
  self.create_sa_tbl()
202
203
  _logger.info(f'Added columns {added_storage_cols} to storage table {self._storage_name()}')
@@ -299,7 +300,6 @@ class StoreBase:
299
300
  """
300
301
  assert v_min is not None
301
302
  exec_plan.ctx.set_conn(conn)
302
- batch_size = 16 # TODO: is this a good batch size?
303
303
  # TODO: total?
304
304
  num_excs = 0
305
305
  num_rows = 0
@@ -311,10 +311,10 @@ class StoreBase:
311
311
  exec_plan.open()
312
312
  for row_batch in exec_plan:
313
313
  num_rows += len(row_batch)
314
- for batch_start_idx in range(0, len(row_batch), batch_size):
314
+ for batch_start_idx in range(0, len(row_batch), self.__INSERT_BATCH_SIZE):
315
315
  # compute batch of rows and convert them into table rows
316
316
  table_rows: List[Dict[str, Any]] = []
317
- for row_idx in range(batch_start_idx, min(batch_start_idx + batch_size, len(row_batch))):
317
+ for row_idx in range(batch_start_idx, min(batch_start_idx + self.__INSERT_BATCH_SIZE, len(row_batch))):
318
318
  row = row_batch[row_idx]
319
319
  table_row, num_row_exc = \
320
320
  self._create_table_row(row, row_builder, media_cols, cols_with_excs, v_min=v_min)
@@ -340,7 +340,7 @@ class StoreBase:
340
340
  finally:
341
341
  exec_plan.close()
342
342
 
343
- def _versions_clause(self, versions: List[Optional[int]], match_on_vmin: bool) -> sql.ClauseElement:
343
+ def _versions_clause(self, versions: list[Optional[int]], match_on_vmin: bool) -> sql.ColumnElement[bool]:
344
344
  """Return filter for base versions"""
345
345
  v = versions[0]
346
346
  if v is None:
@@ -355,7 +355,7 @@ class StoreBase:
355
355
 
356
356
  def delete_rows(
357
357
  self, current_version: int, base_versions: List[Optional[int]], match_on_vmin: bool,
358
- where_clause: Optional[sql.ClauseElement], conn: sql.engine.Connection) -> int:
358
+ where_clause: Optional[sql.ColumnElement[bool]], conn: sql.engine.Connection) -> int:
359
359
  """Mark rows as deleted that are live and were created prior to current_version.
360
360
  Also: populate the undo columns
361
361
  Args:
@@ -375,17 +375,19 @@ class StoreBase:
375
375
  rowid_join_clause = self._rowid_join_predicate()
376
376
  base_versions_clause = sql.true() if len(base_versions) == 0 \
377
377
  else self.base._versions_clause(base_versions, match_on_vmin)
378
- set_clause = {self.v_max_col: current_version}
378
+ set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
379
379
  for index_info in self.tbl_version.idxs_by_name.values():
380
380
  # copy value column to undo column
381
381
  set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
382
382
  # set value column to NULL
383
383
  set_clause[index_info.val_col.sa_col] = None
384
- stmt = sql.update(self.sa_tbl) \
385
- .values(set_clause) \
386
- .where(where_clause) \
387
- .where(rowid_join_clause) \
384
+ stmt = (
385
+ sql.update(self.sa_tbl)
386
+ .values(set_clause)
387
+ .where(where_clause)
388
+ .where(rowid_join_clause)
388
389
  .where(base_versions_clause)
390
+ )
389
391
  log_explain(_logger, stmt, conn)
390
392
  status = conn.execute(stmt)
391
393
  return status.rowcount
@@ -403,14 +405,13 @@ class StoreTable(StoreBase):
403
405
  def _storage_name(self) -> str:
404
406
  return f'tbl_{self.tbl_version.id.hex}'
405
407
 
406
- def _rowid_join_predicate(self) -> sql.ClauseElement:
408
+ def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
407
409
  return sql.true()
408
410
 
409
411
 
410
412
  class StoreView(StoreBase):
411
413
  def __init__(self, catalog_view: catalog.TableVersion):
412
414
  assert catalog_view.is_view()
413
- self.base = catalog_view.base.store_tbl
414
415
  super().__init__(catalog_view)
415
416
 
416
417
  def _create_rowid_columns(self) -> List[sql.Column]:
@@ -421,7 +422,7 @@ class StoreView(StoreBase):
421
422
  def _storage_name(self) -> str:
422
423
  return f'view_{self.tbl_version.id.hex}'
423
424
 
424
- def _rowid_join_predicate(self) -> sql.ClauseElement:
425
+ def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
425
426
  return sql.and_(
426
427
  self.base._rowid_join_predicate(),
427
428
  *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())])
@@ -448,7 +449,7 @@ class StoreComponentView(StoreView):
448
449
  # we need to fix up the 'pos' column in TableVersion
449
450
  self.tbl_version.cols_by_name['pos'].sa_col = self.pos_col
450
451
 
451
- def _rowid_join_predicate(self) -> sql.ClauseElement:
452
+ def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
452
453
  return sql.and_(
453
454
  self.base._rowid_join_predicate(),
454
455
  *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())])
@@ -44,19 +44,24 @@ class Dumper:
44
44
  pg_package_dir = os.path.dirname(pixeltable_pgserver.__file__)
45
45
  pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
46
46
  _logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
47
+ # We need the raw DB URL, without a driver qualifier. (The driver qualifier is needed by
48
+ # SQLAlchemy, but command-line Postgres won't know how to interpret it.)
49
+ db_url = Env.get()._db_server.get_uri(Env.get()._db_name)
47
50
  with open(dump_file, 'wb') as dump:
48
51
  pg_dump_process = subprocess.Popen(
49
- [pg_dump_binary, Env.get().db_url, '-U', 'postgres', '-Fc'],
52
+ (pg_dump_binary, db_url, '-U', 'postgres', '-Fc'),
50
53
  stdout=subprocess.PIPE
51
54
  )
52
55
  subprocess.run(
53
- ["gzip", "-9"],
56
+ ('gzip', '-9'),
54
57
  stdin=pg_dump_process.stdout,
55
58
  stdout=dump,
56
59
  check=True
57
60
  )
61
+ if pg_dump_process.poll() != 0:
62
+ raise RuntimeError(f'pg_dump failed with return code {pg_dump_process.returncode}')
58
63
  info_file = self.output_dir / f'pixeltable-v{md_version:03d}-test-info.toml'
59
- git_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip()
64
+ git_sha = subprocess.check_output(('git', 'rev-parse', 'HEAD')).decode('ascii').strip()
60
65
  user = os.environ.get('USER', os.environ.get('USERNAME'))
61
66
  info_dict = {'pixeltable-dump': {
62
67
  'metadata-version': md_version,
@@ -187,9 +192,6 @@ class Dumper:
187
192
  add_column('div', t.c3 / 1.7)
188
193
  add_column('mod', t.c2 % 11)
189
194
 
190
- # array_slice
191
- add_column('array_slice_1', t.c6[5])
192
-
193
195
  # column_property_ref
194
196
  add_column('fileurl', t.c8.fileurl)
195
197
  add_column('localpath', t.c8.localpath)
@@ -237,6 +239,10 @@ class Dumper:
237
239
  # json_mapper and json_path
238
240
  add_column('json_mapper', t.c6[3])
239
241
  add_column('json_path', t.c6.f1)
242
+ add_column('json_path_nested', t.c6.f6.f7)
243
+ add_column('json_path_star', t.c6.f5['*'])
244
+ add_column('json_path_idx', t.c6.f5[3])
245
+ add_column('json_path_slice', t.c6.f5[1:3:2])
240
246
 
241
247
  # literal
242
248
  add_column('str_const', 'str')