pixeltable 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (150) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +106 -81
  18. pixeltable/env.py +28 -24
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -9
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +108 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +231 -113
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +36 -23
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/share/__init__.py +0 -0
  128. pixeltable/share/packager.py +218 -0
  129. pixeltable/store.py +42 -26
  130. pixeltable/type_system.py +102 -75
  131. pixeltable/utils/arrow.py +7 -8
  132. pixeltable/utils/coco.py +16 -17
  133. pixeltable/utils/code.py +1 -1
  134. pixeltable/utils/console_output.py +6 -3
  135. pixeltable/utils/description_helper.py +7 -7
  136. pixeltable/utils/documents.py +3 -1
  137. pixeltable/utils/filecache.py +12 -7
  138. pixeltable/utils/http_server.py +9 -8
  139. pixeltable/utils/iceberg.py +14 -0
  140. pixeltable/utils/media_store.py +3 -2
  141. pixeltable/utils/pytorch.py +11 -14
  142. pixeltable/utils/s3.py +1 -0
  143. pixeltable/utils/sql.py +1 -0
  144. pixeltable/utils/transactional_directory.py +2 -2
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/METADATA +9 -9
  146. pixeltable-0.3.4.dist-info/RECORD +166 -0
  147. pixeltable-0.3.2.dist-info/RECORD +0 -161
  148. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
  149. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
  150. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,218 @@
1
+ import io
2
+ import json
3
+ import logging
4
+ import tarfile
5
+ import urllib.parse
6
+ import urllib.request
7
+ import uuid
8
+ from pathlib import Path
9
+ from typing import Any, Iterator
10
+
11
+ import more_itertools
12
+ import numpy as np
13
+ import pyarrow as pa
14
+ import pyiceberg.catalog
15
+
16
+ import pixeltable as pxt
17
+ import pixeltable.type_system as ts
18
+ from pixeltable import exprs
19
+ from pixeltable.env import Env
20
+ from pixeltable.utils.arrow import PXT_TO_PA_TYPES
21
+ from pixeltable.utils.iceberg import sqlite_catalog
22
+
23
+ _logger = logging.getLogger('pixeltable')
24
+
25
+
26
+ class TablePackager:
27
+ """
28
+ Packages a pixeltable Table into a tarball containing Iceberg tables and media files. The structure of the tarball
29
+ is as follows:
30
+
31
+ warehouse/catalog.db # sqlite Iceberg catalog
32
+ warehouse/pxt.db/** # Iceberg metadata and data files (parquet/avro/json)
33
+ media/** # Local media files
34
+
35
+ If the table being archived is a view, then the Iceberg catalog will contain separate tables for the view and each
36
+ of its ancestors. All rows will be exported with additional _rowid and _v_min columns. Currently, only the most
37
+ recent version of the table can be exported, and only the full table contents.
38
+
39
+ If the table contains media columns, they are handled as follows:
40
+ - If a media file has an external URL (any URL scheme other than file://), then the URL will be preserved as-is and
41
+ stored in the Iceberg table.
42
+ - If a media file is a local file, then it will be copied into the tarball as a file of the form
43
+ 'media/{uuid}{extension}', and the Iceberg table will contain the ephemeral URI 'pxtmedia://{uuid}{extension}'.
44
+ """
45
+
46
+ table: pxt.Table # The table to be packaged
47
+ tmp_dir: Path # Temporary directory where the package will reside
48
+ iceberg_catalog: pyiceberg.catalog.Catalog
49
+ media_files: dict[Path, str] # Mapping from local media file paths to their tarball names
50
+
51
+ def __init__(self, table: pxt.Table) -> None:
52
+ self.table = table
53
+ self.tmp_dir = Path(Env.get().create_tmp_path())
54
+ self.media_files = {}
55
+
56
+ def package(self) -> Path:
57
+ """
58
+ Export the table to a tarball containing Iceberg tables and media files.
59
+ """
60
+ assert not self.tmp_dir.exists() # Packaging can only be done once per TablePackager instance
61
+ _logger.info(f"Packaging table '{self.table._path}' and its ancestors in: {self.tmp_dir}")
62
+ self.tmp_dir.mkdir()
63
+ self.iceberg_catalog = sqlite_catalog(self.tmp_dir / 'warehouse')
64
+ ancestors = [self.table] + self.table._bases
65
+ for t in ancestors:
66
+ _logger.info(f"Exporting table '{t._path}'.")
67
+ self.__export_table(t)
68
+ _logger.info(f'Building archive.')
69
+ bundle_path = self.__build_tarball()
70
+ _logger.info(f'Packaging complete: {bundle_path}')
71
+ return bundle_path
72
+
73
+ def __export_table(self, t: pxt.Table) -> None:
74
+ """
75
+ Exports the data from `t` into an Iceberg table.
76
+ """
77
+ # First generate a select list for the data we want to extract from `t`. This includes:
78
+ # - all stored columns, including computed columns;
79
+ # - errortype and errormsg fields whenever they're defined.
80
+ # We select only those columns that are defined in this table (columns inherited from ancestor tables will be
81
+ # handled separately).
82
+ # For media columns, we substitute `col.fileurl` so that we always get the URL (which may be a file:// URL;
83
+ # these will be specially handled later)
84
+ select_exprs: dict[str, exprs.Expr] = {}
85
+
86
+ # As we generate the select list, we construct a separate list of column types. We can't rely on df._schema
87
+ # to get the column types, since we'll be substituting `fileurl`s for media columns.
88
+ actual_col_types: list[ts.ColumnType] = []
89
+
90
+ for col_name, col in t._tbl_version.cols_by_name.items():
91
+ if not col.is_stored:
92
+ continue
93
+ if col.col_type.is_media_type():
94
+ select_exprs[col_name] = t[col_name].fileurl
95
+ else:
96
+ select_exprs[col_name] = t[col_name]
97
+ actual_col_types.append(col.col_type)
98
+ if col.records_errors:
99
+ select_exprs[f'{col_name}_errortype'] = t[col_name].errortype
100
+ actual_col_types.append(ts.StringType())
101
+ select_exprs[f'{col_name}_errormsg'] = t[col_name].errormsg
102
+ actual_col_types.append(ts.StringType())
103
+
104
+ # Run the select() on `self.table`, not `t`, so that we export only those rows that are actually present in
105
+ # `self.table`.
106
+ df = self.table.select(**select_exprs)
107
+ namespace = self.__iceberg_namespace(t)
108
+ self.iceberg_catalog.create_namespace_if_not_exists(namespace)
109
+ iceberg_schema = self.__to_iceberg_schema(df._schema)
110
+ iceberg_tbl = self.iceberg_catalog.create_table(f'{namespace}.{t._name}', schema=iceberg_schema)
111
+
112
+ # Populate the Iceberg table with data.
113
+ # The data is first loaded from the DataFrame into a sequence of pyarrow tables, batched in order to avoid
114
+ # excessive memory usage. The pyarrow tables are then amalgamated into the (single) Iceberg table on disk.
115
+ for pa_table in self.__to_pa_tables(df, actual_col_types, iceberg_schema):
116
+ iceberg_tbl.append(pa_table)
117
+
118
+ @classmethod
119
+ def __iceberg_namespace(cls, table: pxt.Table) -> str:
120
+ """
121
+ Iceberg tables must have a namespace, which cannot be the empty string, so we prepend `pxt` to the table path.
122
+ """
123
+ parent_path = table._parent._path
124
+ if len(parent_path) == 0:
125
+ return 'pxt'
126
+ else:
127
+ return f'pxt.{parent_path}'
128
+
129
+ # The following methods are responsible for schema and data conversion from Pixeltable to Iceberg. Some of this
130
+ # logic might be consolidated into arrow.py and unified with general Parquet export, but there are several
131
+ # major differences:
132
+ # - Iceberg has no array type; we export all arrays as binary blobs
133
+ # - We include _rowid and _v_min columns in the Iceberg table
134
+ # - Media columns are handled specially as indicated above
135
+
136
+ @classmethod
137
+ def __to_iceberg_schema(cls, pxt_schema: dict[str, ts.ColumnType]) -> pa.Schema:
138
+ entries = [(name, cls.__to_iceberg_type(col_type)) for name, col_type in pxt_schema.items()]
139
+ entries.append(('_rowid', pa.list_(pa.int64())))
140
+ entries.append(('_v_min', pa.int64()))
141
+ return pa.schema(entries) # type: ignore[arg-type]
142
+
143
+ @classmethod
144
+ def __to_iceberg_type(cls, col_type: ts.ColumnType) -> pa.DataType:
145
+ if col_type.is_array_type():
146
+ return pa.binary()
147
+ if col_type.is_media_type():
148
+ return pa.string()
149
+ return PXT_TO_PA_TYPES.get(col_type.__class__)
150
+
151
+ def __to_pa_tables(
152
+ self,
153
+ df: pxt.DataFrame,
154
+ actual_col_types: list[pxt.ColumnType],
155
+ arrow_schema: pa.Schema,
156
+ batch_size: int = 1_000,
157
+ ) -> Iterator[pa.Table]:
158
+ """
159
+ Load a DataFrame as a sequence of pyarrow tables. The pyarrow tables are batched into smaller chunks
160
+ to avoid excessive memory usage.
161
+ """
162
+ for rows in more_itertools.batched(self.__to_pa_rows(df, actual_col_types), batch_size):
163
+ cols = {col_name: [row[idx] for row in rows] for idx, col_name in enumerate(df._schema.keys())}
164
+ cols['_rowid'] = [row[-2] for row in rows]
165
+ cols['_v_min'] = [row[-1] for row in rows]
166
+ yield pa.Table.from_pydict(cols, schema=arrow_schema)
167
+
168
+ def __to_pa_rows(self, df: pxt.DataFrame, actual_col_types: list[pxt.ColumnType]) -> Iterator[list]:
169
+ for row in df._exec():
170
+ vals = [row[e.slot_idx] for e in df._select_list_exprs]
171
+ result = [self.__to_pa_value(val, col_type) for val, col_type in zip(vals, actual_col_types)]
172
+ result.append(row.rowid)
173
+ result.append(row.v_min)
174
+ yield result
175
+
176
+ def __to_pa_value(self, val: Any, col_type: ts.ColumnType) -> Any:
177
+ if val is None:
178
+ return None
179
+ if col_type.is_array_type():
180
+ # Export arrays as binary
181
+ assert isinstance(val, np.ndarray)
182
+ arr = io.BytesIO()
183
+ np.save(arr, val)
184
+ return arr.getvalue()
185
+ if col_type.is_json_type():
186
+ # Export JSON as strings
187
+ return json.dumps(val)
188
+ if col_type.is_media_type():
189
+ # Handle media files as described above
190
+ assert isinstance(val, str) # Media columns are always referenced by `fileurl`
191
+ return self.__process_media_url(val)
192
+ return val
193
+
194
+ def __process_media_url(self, url: str) -> str:
195
+ parsed_url = urllib.parse.urlparse(url)
196
+ if parsed_url.scheme == 'file':
197
+ # It's the URL of a local file. Replace it with a pxtmedia:// URI.
198
+ # (We can't use an actual pxt:// URI, because the eventual pxt:// table name might not be known at this
199
+ # time. The pxtmedia:// URI serves as a relative reference into the tarball that can be replaced with an
200
+ # actual URL when the table is reconstituted.)
201
+ path = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_url.path)))
202
+ if path not in self.media_files:
203
+ # Create a new entry in the `media_files` dict so that we can copy the file into the tarball later.
204
+ dest_name = f'{uuid.uuid4().hex}{path.suffix}'
205
+ self.media_files[path] = dest_name
206
+ return f'pxtmedia://{self.media_files[path]}'
207
+ # For any type of URL other than a local file, just return the URL as-is.
208
+ return url
209
+
210
+ def __build_tarball(self) -> Path:
211
+ bundle_path = self.tmp_dir / 'bundle.tar.bz2'
212
+ with tarfile.open(bundle_path, 'w:bz2') as tf:
213
+ # Add the Iceberg warehouse dir (including the catalog)
214
+ tf.add(self.tmp_dir / 'warehouse', arcname='warehouse', recursive=True)
215
+ # Add the media files
216
+ for src_file, dest_name in self.media_files.items():
217
+ tf.add(src_file, arcname=f'media/{dest_name}')
218
+ return bundle_path
pixeltable/store.py CHANGED
@@ -32,6 +32,7 @@ class StoreBase:
32
32
  - v_min: version at which the row was created
33
33
  - v_max: version at which the row was deleted (or MAX_VERSION if it's still live)
34
34
  """
35
+
35
36
  tbl_version: catalog.TableVersion
36
37
  sa_md: sql.MetaData
37
38
  sa_tbl: Optional[sql.Table]
@@ -65,8 +66,9 @@ class StoreBase:
65
66
  """Create and return system columns"""
66
67
  rowid_cols = self._create_rowid_columns()
67
68
  self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
68
- self.v_max_col = \
69
- sql.Column('v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION))
69
+ self.v_max_col = sql.Column(
70
+ 'v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION)
71
+ )
70
72
  self._pk_cols = [*rowid_cols, self.v_min_col]
71
73
  return [*rowid_cols, self.v_min_col, self.v_max_col]
72
74
 
@@ -134,7 +136,7 @@ class StoreBase:
134
136
  return new_file_url
135
137
 
136
138
  def _move_tmp_media_files(
137
- self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
139
+ self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
138
140
  ) -> None:
139
141
  """Move tmp media files that we generated to a permanent location"""
140
142
  for c in media_cols:
@@ -143,7 +145,7 @@ class StoreBase:
143
145
  table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
144
146
 
145
147
  def _create_table_row(
146
- self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
148
+ self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
147
149
  ) -> tuple[dict[str, Any], int]:
148
150
  """Return Tuple[complete table row, # of exceptions] for insert()
149
151
  Creates a row that includes the PK columns, with the values from input_row.pk.
@@ -193,11 +195,13 @@ class StoreBase:
193
195
  added_storage_cols = [col.store_name()]
194
196
  if col.records_errors:
195
197
  # we also need to create the errormsg and errortype storage cols
196
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
197
- f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
198
+ stmt = sql.text(
199
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL'
200
+ )
198
201
  conn.execute(stmt)
199
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
200
- f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
202
+ stmt = sql.text(
203
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL'
204
+ )
201
205
  conn.execute(stmt)
202
206
  added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
203
207
  self.create_sa_tbl()
@@ -219,7 +223,7 @@ class StoreBase:
219
223
  exec_plan: ExecNode,
220
224
  value_expr_slot_idx: int,
221
225
  conn: sql.engine.Connection,
222
- on_error: Literal['abort', 'ignore']
226
+ on_error: Literal['abort', 'ignore'],
223
227
  ) -> int:
224
228
  """Update store column of a computed column with values produced by an execution plan
225
229
 
@@ -295,10 +299,9 @@ class StoreBase:
295
299
  update_stmt = update_stmt.where(pk_col == tmp_pk_col)
296
300
  update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
297
301
  if col.records_errors:
298
- update_stmt = update_stmt.values({
299
- col.sa_errortype_col: tmp_errortype_col,
300
- col.sa_errormsg_col: tmp_errormsg_col
301
- })
302
+ update_stmt = update_stmt.values(
303
+ {col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
304
+ )
302
305
  log_explain(_logger, update_stmt, conn)
303
306
  conn.execute(update_stmt)
304
307
 
@@ -308,8 +311,13 @@ class StoreBase:
308
311
  return num_excs
309
312
 
310
313
  def insert_rows(
311
- self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None,
312
- show_progress: bool = True, rowids: Optional[Iterator[int]] = None, abort_on_exc: bool = False
314
+ self,
315
+ exec_plan: ExecNode,
316
+ conn: sql.engine.Connection,
317
+ v_min: Optional[int] = None,
318
+ show_progress: bool = True,
319
+ rowids: Optional[Iterator[int]] = None,
320
+ abort_on_exc: bool = False,
313
321
  ) -> tuple[int, int, set[int]]:
314
322
  """Insert rows into the store table and update the catalog table's md
315
323
  Returns:
@@ -347,12 +355,12 @@ class StoreBase:
347
355
 
348
356
  if show_progress:
349
357
  if progress_bar is None:
350
- warnings.simplefilter("ignore", category=TqdmWarning)
358
+ warnings.simplefilter('ignore', category=TqdmWarning)
351
359
  progress_bar = tqdm(
352
360
  desc=f'Inserting rows into `{self.tbl_version.name}`',
353
361
  unit=' rows',
354
362
  ncols=100,
355
- file=sys.stdout
363
+ file=sys.stdout,
356
364
  )
357
365
  progress_bar.update(1)
358
366
 
@@ -379,8 +387,13 @@ class StoreBase:
379
387
  return sql.and_(clause, self.base._versions_clause(versions[1:], match_on_vmin))
380
388
 
381
389
  def delete_rows(
382
- self, current_version: int, base_versions: list[Optional[int]], match_on_vmin: bool,
383
- where_clause: Optional[sql.ColumnElement[bool]], conn: sql.engine.Connection) -> int:
390
+ self,
391
+ current_version: int,
392
+ base_versions: list[Optional[int]],
393
+ match_on_vmin: bool,
394
+ where_clause: Optional[sql.ColumnElement[bool]],
395
+ conn: sql.engine.Connection,
396
+ ) -> int:
384
397
  """Mark rows as deleted that are live and were created prior to current_version.
385
398
  Also: populate the undo columns
386
399
  Args:
@@ -394,12 +407,12 @@ class StoreBase:
394
407
  """
395
408
  where_clause = sql.true() if where_clause is None else where_clause
396
409
  where_clause = sql.and_(
397
- self.v_min_col < current_version,
398
- self.v_max_col == schema.Table.MAX_VERSION,
399
- where_clause)
410
+ self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
411
+ )
400
412
  rowid_join_clause = self._rowid_join_predicate()
401
- base_versions_clause = sql.true() if len(base_versions) == 0 \
402
- else self.base._versions_clause(base_versions, match_on_vmin)
413
+ base_versions_clause = (
414
+ sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
415
+ )
403
416
  set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
404
417
  for index_info in self.tbl_version.idxs_by_name.values():
405
418
  # copy value column to undo column
@@ -450,7 +463,9 @@ class StoreView(StoreBase):
450
463
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
451
464
  return sql.and_(
452
465
  self.base._rowid_join_predicate(),
453
- *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())])
466
+ *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())],
467
+ )
468
+
454
469
 
455
470
  class StoreComponentView(StoreView):
456
471
  """A view that stores components of its base, as produced by a ComponentIterator
@@ -482,4 +497,5 @@ class StoreComponentView(StoreView):
482
497
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
483
498
  return sql.and_(
484
499
  self.base._rowid_join_predicate(),
485
- *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())])
500
+ *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())],
501
+ )