pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/store.py CHANGED
@@ -2,23 +2,23 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import logging
5
- import os
6
5
  import sys
7
- import urllib.parse
8
- import urllib.request
6
+ import time
9
7
  import warnings
10
- from typing import Any, Iterable, Iterator, Literal, Optional, Union
8
+ from typing import Any, Iterable, Iterator
9
+ from uuid import UUID
11
10
 
12
11
  import more_itertools
12
+ import psycopg
13
13
  import sqlalchemy as sql
14
14
  from tqdm import TqdmWarning, tqdm
15
15
 
16
- from pixeltable import catalog, exceptions as excs, exprs
16
+ from pixeltable import catalog, exceptions as excs
17
+ from pixeltable.catalog.update_status import RowCountStats
17
18
  from pixeltable.env import Env
18
19
  from pixeltable.exec import ExecNode
19
20
  from pixeltable.metadata import schema
20
21
  from pixeltable.utils.exception_handler import run_cleanup
21
- from pixeltable.utils.media_store import MediaStore
22
22
  from pixeltable.utils.sql import log_explain, log_stmt
23
23
 
24
24
  _logger = logging.getLogger('pixeltable')
@@ -35,24 +35,46 @@ class StoreBase:
35
35
 
36
36
  tbl_version: catalog.TableVersionHandle
37
37
  sa_md: sql.MetaData
38
- sa_tbl: Optional[sql.Table]
38
+ sa_tbl: sql.Table | None
39
39
  _pk_cols: list[sql.Column]
40
40
  v_min_col: sql.Column
41
41
  v_max_col: sql.Column
42
- base: Optional[StoreBase]
43
42
 
44
- __INSERT_BATCH_SIZE = 1000
43
+ # We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
44
+ # since it's referenced by various methods of `StoreBase`
45
+ _base: StoreBase | None
46
+
47
+ # In my cursory experiments this was the optimal batch size: it was an improvement over 5_000 and there was no real
48
+ # benefit to going higher.
49
+ # TODO: Perform more rigorous experiments with different table structures and OS environments to refine this.
50
+ __INSERT_BATCH_SIZE = 10_000
45
51
 
46
52
  def __init__(self, tbl_version: catalog.TableVersion):
47
- self.tbl_version = catalog.TableVersionHandle(
48
- tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version
49
- )
53
+ self.tbl_version = tbl_version.handle
50
54
  self.sa_md = sql.MetaData()
51
55
  self.sa_tbl = None
52
- # We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
53
- # since it's referenced by various methods of `StoreBase`
54
- self.base = tbl_version.base.get().store_tbl if tbl_version.base is not None else None
55
- self.create_sa_tbl()
56
+ self._pk_cols = []
57
+
58
+ # we initialize _base lazily, because the base may not exist anymore at this point
59
+ # (but we might still need sa_table to access our store table); do this before create_sa_tbl()
60
+ self._base = None
61
+
62
+ # we're passing in tbl_version to avoid a circular call to TableVersionHandle.get()
63
+ self.create_sa_tbl(tbl_version)
64
+
65
+ @property
66
+ def base(self) -> StoreBase | None:
67
+ if self._base is None:
68
+ tv = self.tbl_version.get()
69
+ self._base = tv.base.get().store_tbl if tv.base is not None else None
70
+ return self._base
71
+
72
+ @classmethod
73
+ def storage_name(cls, tbl_id: UUID, is_view: bool) -> str:
74
+ return f'{"view" if is_view else "tbl"}_{tbl_id.hex}'
75
+
76
+ def system_columns(self) -> list[sql.Column]:
77
+ return [*self._pk_cols, self.v_max_col]
56
78
 
57
79
  def pk_columns(self) -> list[sql.Column]:
58
80
  return self._pk_cols
@@ -66,7 +88,23 @@ class StoreBase:
66
88
 
67
89
  def _create_system_columns(self) -> list[sql.Column]:
68
90
  """Create and return system columns"""
69
- rowid_cols = self._create_rowid_columns()
91
+ rowid_cols: list[sql.Column]
92
+ if self._store_tbl_exists():
93
+ # derive our rowid Columns from the existing table, without having to access self.base.store_tbl:
94
+ # self.base may not exist anymore (both this table and our base got dropped in the same transaction, and
95
+ # the base was finalized before this table)
96
+ with Env.get().begin_xact(for_write=False) as conn:
97
+ q = (
98
+ f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r} '
99
+ 'ORDER BY ordinal_position'
100
+ )
101
+ col_names = [row[0] for row in conn.execute(sql.text(q)).fetchall()]
102
+ num_rowid_cols = col_names.index('v_min')
103
+ rowid_cols = [
104
+ sql.Column(col_name, sql.BigInteger, nullable=False) for col_name in col_names[:num_rowid_cols]
105
+ ]
106
+ else:
107
+ rowid_cols = self._create_rowid_columns()
70
108
  self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
71
109
  self.v_max_col = sql.Column(
72
110
  'v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION)
@@ -74,18 +112,20 @@ class StoreBase:
74
112
  self._pk_cols = [*rowid_cols, self.v_min_col]
75
113
  return [*rowid_cols, self.v_min_col, self.v_max_col]
76
114
 
77
- def create_sa_tbl(self) -> None:
115
+ def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
78
116
  """Create self.sa_tbl from self.tbl_version."""
117
+ if tbl_version is None:
118
+ tbl_version = self.tbl_version.get()
79
119
  system_cols = self._create_system_columns()
80
120
  all_cols = system_cols.copy()
81
- for col in [c for c in self.tbl_version.get().cols if c.is_stored]:
121
+ # we captured all columns, including dropped ones: they're still part of the physical table
122
+ for col in [c for c in tbl_version.cols if c.is_stored]:
82
123
  # re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
83
124
  # to the last sql.Table version we created and cannot be reused
84
125
  col.create_sa_cols()
85
126
  all_cols.append(col.sa_col)
86
- if col.records_errors:
87
- all_cols.append(col.sa_errormsg_col)
88
- all_cols.append(col.sa_errortype_col)
127
+ if col.stores_cellmd:
128
+ all_cols.append(col.sa_cellmd_col)
89
129
 
90
130
  if self.sa_tbl is not None:
91
131
  # if we're called in response to a schema change, we need to remove the old table first
@@ -96,16 +136,17 @@ class StoreBase:
96
136
  # - base x view joins can be executed as merge joins
97
137
  # - speeds up ORDER BY rowid DESC
98
138
  # - allows filtering for a particular table version in index scan
99
- idx_name = f'sys_cols_idx_{self.tbl_version.id.hex}'
139
+ idx_name = f'sys_cols_idx_{tbl_version.id.hex}'
100
140
  idxs.append(sql.Index(idx_name, *system_cols))
101
141
 
102
142
  # v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
103
- idx_name = f'vmin_idx_{self.tbl_version.id.hex}'
143
+ idx_name = f'vmin_idx_{tbl_version.id.hex}'
104
144
  idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=Env.get().dbms.version_index_type))
105
- idx_name = f'vmax_idx_{self.tbl_version.id.hex}'
145
+ idx_name = f'vmax_idx_{tbl_version.id.hex}'
106
146
  idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
107
147
 
108
148
  self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
149
+ # _logger.debug(f'created sa tbl for {tbl_version.id!s} (sa_tbl={id(self.sa_tbl):x}, tv={id(tbl_version):x})')
109
150
 
110
151
  @abc.abstractmethod
111
152
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
@@ -115,51 +156,6 @@ class StoreBase:
115
156
  def _storage_name(self) -> str:
116
157
  """Return the name of the data store table"""
117
158
 
118
- def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column, v_min: int) -> str:
119
- """Move tmp media file with given url to Env.media_dir and return new url, or given url if not a tmp_dir file"""
120
- pxt_tmp_dir = str(Env.get().tmp_dir)
121
- if file_url is None:
122
- return None
123
- parsed = urllib.parse.urlparse(file_url)
124
- # We should never be passed a local file path here. The "len > 1" ensures that Windows
125
- # file paths aren't mistaken for URLs with a single-character scheme.
126
- assert len(parsed.scheme) > 1
127
- if parsed.scheme != 'file':
128
- # remote url
129
- return file_url
130
- file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
131
- if not file_path.startswith(pxt_tmp_dir):
132
- # not a tmp file
133
- return file_url
134
- _, ext = os.path.splitext(file_path)
135
- new_path = str(MediaStore.prepare_media_path(self.tbl_version.id, col.id, v_min, ext=ext))
136
- os.rename(file_path, new_path)
137
- new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(new_path))
138
- return new_file_url
139
-
140
- def _move_tmp_media_files(
141
- self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
142
- ) -> None:
143
- """Move tmp media files that we generated to a permanent location"""
144
- for c in media_cols:
145
- for table_row in table_rows:
146
- file_url = table_row[c.store_name()]
147
- table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
148
-
149
- def _create_table_row(
150
- self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
151
- ) -> tuple[dict[str, Any], int]:
152
- """Return Tuple[complete table row, # of exceptions] for insert()
153
- Creates a row that includes the PK columns, with the values from input_row.pk.
154
- Returns:
155
- Tuple[complete table row, # of exceptions]
156
- """
157
- table_row, num_excs = row_builder.create_table_row(input_row, exc_col_ids)
158
- assert len(pk) == len(self._pk_cols)
159
- for pk_col, pk_val in zip(self._pk_cols, pk):
160
- table_row[pk_col.name] = pk_val
161
- return table_row, num_excs
162
-
163
159
  def count(self) -> int:
164
160
  """Return the number of rows visible in self.tbl_version"""
165
161
  stmt = (
@@ -173,14 +169,123 @@ class StoreBase:
173
169
  assert isinstance(result, int)
174
170
  return result
175
171
 
172
+ def _exec_if_not_exists(self, stmt: str, wait_for_table: bool) -> None:
173
+ """
174
+ Execute a statement containing 'IF NOT EXISTS' and ignore any duplicate object-related errors.
175
+
176
+ The statement needs to run in a separate transaction, because the expected error conditions will abort the
177
+ enclosing transaction (and the ability to run additional statements in that same transaction).
178
+ """
179
+ while True:
180
+ with Env.get().begin_xact(for_write=True) as conn:
181
+ try:
182
+ if wait_for_table and not Env.get().is_using_cockroachdb:
183
+ # Try to lock the table to make sure that it exists. This needs to run in the same transaction
184
+ # as 'stmt' to avoid a race condition.
185
+ # TODO: adapt this for CockroachDB
186
+ lock_stmt = f'LOCK TABLE {self._storage_name()} IN ACCESS EXCLUSIVE MODE'
187
+ conn.execute(sql.text(lock_stmt))
188
+ conn.execute(sql.text(stmt))
189
+ return
190
+ except (sql.exc.IntegrityError, sql.exc.ProgrammingError) as e:
191
+ Env.get().console_logger.info(f'{stmt} failed with: {e}')
192
+ if (
193
+ isinstance(e.orig, psycopg.errors.UniqueViolation)
194
+ and 'duplicate key value violates unique constraint' in str(e.orig)
195
+ ) or (
196
+ isinstance(e.orig, (psycopg.errors.DuplicateObject, psycopg.errors.DuplicateTable))
197
+ and 'already exists' in str(e.orig)
198
+ ):
199
+ # table already exists
200
+ return
201
+ elif isinstance(e.orig, psycopg.errors.UndefinedTable):
202
+ # the Lock Table failed because the table doesn't exist yet; try again
203
+ time.sleep(1)
204
+ continue
205
+ else:
206
+ raise
207
+
208
+ def _store_tbl_exists(self) -> bool:
209
+ """Returns True if the store table exists, False otherwise."""
210
+ with Env.get().begin_xact(for_write=False) as conn:
211
+ q = (
212
+ 'SELECT COUNT(*) FROM pg_catalog.pg_tables '
213
+ f"WHERE schemaname = 'public' AND tablename = {self._storage_name()!r}"
214
+ )
215
+ res = conn.execute(sql.text(q)).scalar_one()
216
+ return res == 1
217
+
176
218
  def create(self) -> None:
177
- conn = Env.get().conn
178
- self.sa_md.create_all(bind=conn)
219
+ """
220
+ Create or update store table to bring it in sync with self.sa_tbl. Idempotent.
221
+
222
+ This runs a sequence of DDL statements (Create Table, Alter Table Add Column, Create Index), each of which
223
+ is run in its own transaction.
224
+
225
+ The exception to that are local replicas, for which TableRestorer creates an enclosing transaction. In theory,
226
+ this should avoid the potential for race conditions that motivate the error handling present in
227
+ _exec_if_not_exists() (meaning: we shouldn't see those errors when creating local replicas).
228
+ TODO: remove the special case for local replicas in order to make the logic easier to reason about.
229
+ """
230
+ postgres_dialect = sql.dialects.postgresql.dialect()
231
+
232
+ if not self._store_tbl_exists():
233
+ # run Create Table If Not Exists; we always need If Not Exists to avoid race conditions between concurrent
234
+ # Pixeltable processes
235
+ create_stmt = sql.schema.CreateTable(self.sa_tbl, if_not_exists=True).compile(dialect=postgres_dialect)
236
+ self._exec_if_not_exists(str(create_stmt), wait_for_table=False)
237
+ else:
238
+ # ensure that all columns exist by running Alter Table Add Column If Not Exists for all columns
239
+ for col in self.sa_tbl.columns:
240
+ stmt = self._add_column_stmt(col)
241
+ self._exec_if_not_exists(stmt, wait_for_table=True)
242
+ # TODO: do we also need to ensure that these columns are now visible (ie, is there another potential race
243
+ # condition here?)
244
+
245
+ # ensure that all system indices exist by running Create Index If Not Exists
246
+ for idx in self.sa_tbl.indexes:
247
+ create_idx_stmt = sql.schema.CreateIndex(idx, if_not_exists=True).compile(dialect=postgres_dialect)
248
+ self._exec_if_not_exists(str(create_idx_stmt), wait_for_table=True)
249
+
250
+ # ensure that all visible non-system indices exist by running appropriate create statements
251
+ for id in self.tbl_version.get().idxs:
252
+ self.create_index(id)
253
+
254
+ def create_index(self, idx_id: int) -> None:
255
+ """Create If Not Exists for this index"""
256
+ idx_info = self.tbl_version.get().idxs[idx_id]
257
+ stmt = idx_info.idx.sa_create_stmt(self.tbl_version.get()._store_idx_name(idx_id), idx_info.val_col.sa_col)
258
+ self._exec_if_not_exists(str(stmt), wait_for_table=True)
259
+
260
+ def validate(self) -> None:
261
+ """Validate store table against self.table_version"""
262
+ with Env.get().begin_xact() as conn:
263
+ # check that all columns are present
264
+ q = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
265
+ store_col_info = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
266
+ tbl_col_info = {col.store_name() for col in self.tbl_version.get().cols if col.is_stored}
267
+ assert tbl_col_info.issubset(store_col_info)
268
+
269
+ # check that all visible indices are present
270
+ q = f'SELECT indexname FROM pg_indexes WHERE tablename = {self._storage_name()!r}'
271
+ store_idx_names = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
272
+ tbl_index_names = {
273
+ self.tbl_version.get()._store_idx_name(info.id) for info in self.tbl_version.get().idxs.values()
274
+ }
275
+ assert tbl_index_names.issubset(store_idx_names)
179
276
 
180
277
  def drop(self) -> None:
181
278
  """Drop store table"""
182
279
  conn = Env.get().conn
183
- self.sa_md.drop_all(bind=conn)
280
+ drop_stmt = f'DROP TABLE IF EXISTS {self._storage_name()}'
281
+ conn.execute(sql.text(drop_stmt))
282
+
283
+ def _add_column_stmt(self, sa_col: sql.Column) -> str:
284
+ col_type_str = sa_col.type.compile(dialect=sql.dialects.postgresql.dialect())
285
+ return (
286
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN IF NOT EXISTS '
287
+ f'{sa_col.name} {col_type_str} {"NOT " if not sa_col.nullable else ""} NULL'
288
+ )
184
289
 
185
290
  def add_column(self, col: catalog.Column) -> None:
186
291
  """Add column(s) to the store-resident table based on a catalog column
@@ -190,14 +295,13 @@ class StoreBase:
190
295
  """
191
296
  assert col.is_stored
192
297
  conn = Env.get().conn
193
- col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
298
+ col_type_str = col.sa_col_type.compile(dialect=conn.dialect)
194
299
  s_txt = f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL'
195
300
  added_storage_cols = [col.store_name()]
196
- if col.records_errors:
197
- # we also need to create the errormsg and errortype storage cols
198
- s_txt += f' , ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL'
199
- s_txt += f' , ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL'
200
- added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
301
+ if col.stores_cellmd:
302
+ cellmd_type_str = col.sa_cellmd_type().compile(dialect=conn.dialect)
303
+ s_txt += f' , ADD COLUMN {col.cellmd_store_name()} {cellmd_type_str} DEFAULT NULL'
304
+ added_storage_cols.append(col.cellmd_store_name())
201
305
 
202
306
  stmt = sql.text(s_txt)
203
307
  log_stmt(_logger, stmt)
@@ -208,16 +312,13 @@ class StoreBase:
208
312
  def drop_column(self, col: catalog.Column) -> None:
209
313
  """Execute Alter Table Drop Column statement"""
210
314
  s_txt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
211
- if col.records_errors:
212
- s_txt += f' , DROP COLUMN {col.errormsg_store_name()}'
213
- s_txt += f' , DROP COLUMN {col.errortype_store_name()}'
315
+ if col.stores_cellmd:
316
+ s_txt += f' , DROP COLUMN {col.cellmd_store_name()}'
214
317
  stmt = sql.text(s_txt)
215
318
  log_stmt(_logger, stmt)
216
319
  Env.get().conn.execute(stmt)
217
320
 
218
- def load_column(
219
- self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, on_error: Literal['abort', 'ignore']
220
- ) -> int:
321
+ def load_column(self, col: catalog.Column, exec_plan: ExecNode, abort_on_exc: bool) -> int:
221
322
  """Update store column of a computed column with values produced by an execution plan
222
323
 
223
324
  Returns:
@@ -226,84 +327,69 @@ class StoreBase:
226
327
  sql.exc.DBAPIError if there was a SQL error during execution
227
328
  excs.Error if on_error='abort' and there was an exception during row evaluation
228
329
  """
229
- assert col.tbl.id == self.tbl_version.id
330
+ assert col.get_tbl().id == self.tbl_version.id
230
331
  num_excs = 0
231
332
  num_rows = 0
232
333
  # create temp table to store output of exec_plan, with the same primary key as the store table
233
334
  tmp_name = f'temp_{self._storage_name()}'
234
- tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
235
- tmp_cols = tmp_pk_cols.copy()
335
+ tmp_pk_cols = tuple(sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns())
236
336
  tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
237
- tmp_cols.append(tmp_val_col)
337
+ tmp_cols = [*tmp_pk_cols, tmp_val_col]
238
338
  # add error columns if the store column records errors
239
- if col.records_errors:
240
- tmp_errortype_col = sql.Column(col.sa_errortype_col.name, col.sa_errortype_col.type)
241
- tmp_cols.append(tmp_errortype_col)
242
- tmp_errormsg_col = sql.Column(col.sa_errormsg_col.name, col.sa_errormsg_col.type)
243
- tmp_cols.append(tmp_errormsg_col)
339
+ if col.stores_cellmd:
340
+ tmp_cellmd_col = sql.Column(col.sa_cellmd_col.name, col.sa_cellmd_col.type)
341
+ tmp_cols.append(tmp_cellmd_col)
342
+ tmp_col_names = [col.name for col in tmp_cols]
343
+
244
344
  tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
245
345
  conn = Env.get().conn
246
346
  tmp_tbl.create(bind=conn)
247
347
 
348
+ row_builder = exec_plan.row_builder
349
+
248
350
  try:
351
+ table_rows: list[tuple[Any]] = []
352
+
249
353
  # insert rows from exec_plan into temp table
250
- # TODO: unify the table row construction logic with RowBuilder.create_table_row()
251
354
  for row_batch in exec_plan:
252
355
  num_rows += len(row_batch)
253
- tbl_rows: list[dict[str, Any]] = []
254
- for result_row in row_batch:
255
- tbl_row: dict[str, Any] = {}
256
- for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
257
- tbl_row[pk_col.name] = pk_val
258
-
259
- if col.is_computed:
260
- if result_row.has_exc(value_expr_slot_idx):
261
- num_excs += 1
262
- value_exc = result_row.get_exc(value_expr_slot_idx)
263
- if on_error == 'abort':
264
- raise excs.Error(
265
- f'Error while evaluating computed column `{col.name}`:\n{value_exc}'
266
- ) from value_exc
267
- # we store a NULL value and record the exception/exc type
268
- error_type = type(value_exc).__name__
269
- error_msg = str(value_exc)
270
- tbl_row[col.sa_col.name] = None
271
- tbl_row[col.sa_errortype_col.name] = error_type
272
- tbl_row[col.sa_errormsg_col.name] = error_msg
273
- else:
274
- if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
275
- # we have yet to store this image
276
- filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.get().version))
277
- result_row.flush_img(value_expr_slot_idx, filepath)
278
- val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
279
- if col.col_type.is_media_type():
280
- val = self._move_tmp_media_file(val, col, result_row.pk[-1])
281
- tbl_row[col.sa_col.name] = val
282
- if col.records_errors:
283
- tbl_row[col.sa_errortype_col.name] = None
284
- tbl_row[col.sa_errormsg_col.name] = None
285
-
286
- tbl_rows.append(tbl_row)
287
- conn.execute(sql.insert(tmp_tbl), tbl_rows)
356
+ batch_table_rows: list[tuple[Any]] = []
357
+
358
+ for row in row_batch:
359
+ if abort_on_exc and row.has_exc():
360
+ exc = row.get_first_exc()
361
+ raise excs.Error(f'Error while evaluating computed column {col.name!r}:\n{exc}') from exc
362
+ table_row, num_row_exc = row_builder.create_store_table_row(row, None, row.pk)
363
+ num_excs += num_row_exc
364
+ batch_table_rows.append(tuple(table_row))
365
+
366
+ table_rows.extend(batch_table_rows)
367
+
368
+ if len(table_rows) >= self.__INSERT_BATCH_SIZE:
369
+ self.sql_insert(tmp_tbl, tmp_col_names, table_rows)
370
+ table_rows.clear()
371
+
372
+ if len(table_rows) > 0:
373
+ self.sql_insert(tmp_tbl, tmp_col_names, table_rows)
288
374
 
289
375
  # update store table with values from temp table
290
376
  update_stmt = sql.update(self.sa_tbl)
291
377
  for pk_col, tmp_pk_col in zip(self.pk_columns(), tmp_pk_cols):
292
378
  update_stmt = update_stmt.where(pk_col == tmp_pk_col)
293
379
  update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
294
- if col.records_errors:
295
- update_stmt = update_stmt.values(
296
- {col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
297
- )
380
+ if col.stores_cellmd:
381
+ update_stmt = update_stmt.values({col.sa_cellmd_col: tmp_cellmd_col})
298
382
  log_explain(_logger, update_stmt, conn)
299
383
  conn.execute(update_stmt)
384
+
300
385
  finally:
301
386
 
302
387
  def remove_tmp_tbl() -> None:
303
388
  self.sa_md.remove(tmp_tbl)
304
389
  tmp_tbl.drop(bind=conn)
305
390
 
306
- run_cleanup(remove_tmp_tbl, raise_error=True)
391
+ run_cleanup(remove_tmp_tbl, raise_error=False)
392
+
307
393
  return num_excs
308
394
 
309
395
  def insert_rows(
@@ -311,9 +397,9 @@ class StoreBase:
311
397
  exec_plan: ExecNode,
312
398
  v_min: int,
313
399
  show_progress: bool = True,
314
- rowids: Optional[Iterator[int]] = None,
400
+ rowids: Iterator[int] | None = None,
315
401
  abort_on_exc: bool = False,
316
- ) -> tuple[int, int, set[int]]:
402
+ ) -> tuple[set[int], RowCountStats]:
317
403
  """Insert rows into the store table and update the catalog table's md
318
404
  Returns:
319
405
  number of inserted rows, number of exceptions, set of column ids that have exceptions
@@ -323,53 +409,80 @@ class StoreBase:
323
409
  num_excs = 0
324
410
  num_rows = 0
325
411
  cols_with_excs: set[int] = set()
326
- progress_bar: Optional[tqdm] = None # create this only after we started executing
412
+ progress_bar: tqdm | None = None # create this only after we started executing
327
413
  row_builder = exec_plan.row_builder
328
- media_cols = [info.col for info in row_builder.table_columns if info.col.col_type.is_media_type()]
329
- conn = Env.get().conn
414
+
415
+ store_col_names = row_builder.store_column_names()
330
416
 
331
417
  try:
418
+ table_rows: list[tuple[Any]] = []
332
419
  exec_plan.open()
420
+
333
421
  for row_batch in exec_plan:
334
422
  num_rows += len(row_batch)
335
- for batch_start_idx in range(0, len(row_batch), self.__INSERT_BATCH_SIZE):
336
- # compute batch of rows and convert them into table rows
337
- table_rows: list[dict[str, Any]] = []
338
- batch_stop_idx = min(batch_start_idx + self.__INSERT_BATCH_SIZE, len(row_batch))
339
- for row_idx in range(batch_start_idx, batch_stop_idx):
340
- row = row_batch[row_idx]
341
- # if abort_on_exc == True, we need to check for media validation exceptions
342
- if abort_on_exc and row.has_exc():
343
- exc = row.get_first_exc()
344
- raise exc
345
-
346
- rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
347
- pk = (*rowid, v_min)
348
- table_row, num_row_exc = self._create_table_row(row, row_builder, cols_with_excs, pk=pk)
349
- num_excs += num_row_exc
350
- table_rows.append(table_row)
351
-
352
- if show_progress:
353
- if progress_bar is None:
354
- warnings.simplefilter('ignore', category=TqdmWarning)
355
- progress_bar = tqdm(
356
- desc=f'Inserting rows into `{self.tbl_version.get().name}`',
357
- unit=' rows',
358
- ncols=100,
359
- file=sys.stdout,
360
- )
361
- progress_bar.update(1)
362
-
363
- # insert batch of rows
364
- self._move_tmp_media_files(table_rows, media_cols, v_min)
365
- conn.execute(sql.insert(self.sa_tbl), table_rows)
423
+ batch_table_rows: list[tuple[Any]] = []
424
+
425
+ # compute batch of rows and convert them into table rows
426
+ for row in row_batch:
427
+ # if abort_on_exc == True, we need to check for media validation exceptions
428
+ if abort_on_exc and row.has_exc():
429
+ exc = row.get_first_exc()
430
+ raise exc
431
+
432
+ rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
433
+ pk = (*rowid, v_min)
434
+ assert len(pk) == len(self._pk_cols)
435
+ table_row, num_row_exc = row_builder.create_store_table_row(row, cols_with_excs, pk)
436
+ num_excs += num_row_exc
437
+
438
+ if show_progress and Env.get().verbosity >= 1:
439
+ if progress_bar is None:
440
+ warnings.simplefilter('ignore', category=TqdmWarning)
441
+ progress_bar = tqdm(
442
+ desc=f'Inserting rows into `{self.tbl_version.get().name}`',
443
+ unit=' rows',
444
+ ncols=100,
445
+ file=sys.stdout,
446
+ )
447
+ progress_bar.update(1)
448
+
449
+ batch_table_rows.append(tuple(table_row))
450
+
451
+ table_rows.extend(batch_table_rows)
452
+
453
+ # if a batch is ready for insertion into the database, insert it
454
+ if len(table_rows) >= self.__INSERT_BATCH_SIZE:
455
+ self.sql_insert(self.sa_tbl, store_col_names, table_rows)
456
+ table_rows.clear()
457
+
458
+ # insert any remaining rows
459
+ if len(table_rows) > 0:
460
+ self.sql_insert(self.sa_tbl, store_col_names, table_rows)
461
+
366
462
  if progress_bar is not None:
367
463
  progress_bar.close()
368
- return num_rows, num_excs, cols_with_excs
464
+ computed_values = exec_plan.ctx.num_computed_exprs * num_rows
465
+ row_counts = RowCountStats(ins_rows=num_rows, num_excs=num_excs, computed_values=computed_values)
466
+
467
+ return cols_with_excs, row_counts
369
468
  finally:
370
469
  exec_plan.close()
371
470
 
372
- def _versions_clause(self, versions: list[Optional[int]], match_on_vmin: bool) -> sql.ColumnElement[bool]:
471
+ @classmethod
472
+ def sql_insert(cls, sa_tbl: sql.Table, store_col_names: list[str], table_rows: list[tuple[Any]]) -> None:
473
+ assert len(table_rows) > 0
474
+ conn = Env.get().conn
475
+ conn.execute(sql.insert(sa_tbl), [dict(zip(store_col_names, table_row)) for table_row in table_rows])
476
+
477
+ # TODO: Inserting directly via psycopg delivers a small performance benefit, but is somewhat fraught due to
478
+ # differences in the data representation that SQLAlchemy/psycopg expect. The below code will do the
479
+ # insertion in psycopg and can be used if/when we decide to pursue that optimization.
480
+ # col_names_str = ", ".join(store_col_names)
481
+ # placeholders_str = ", ".join('%s' for _ in store_col_names)
482
+ # stmt_text = f'INSERT INTO {self.sa_tbl.name} ({col_names_str}) VALUES ({placeholders_str})'
483
+ # conn.exec_driver_sql(stmt_text, table_rows)
484
+
485
+ def _versions_clause(self, versions: list[int | None], match_on_vmin: bool) -> sql.ColumnElement[bool]:
373
486
  """Return filter for base versions"""
374
487
  v = versions[0]
375
488
  if v is None:
@@ -387,9 +500,9 @@ class StoreBase:
387
500
  def delete_rows(
388
501
  self,
389
502
  current_version: int,
390
- base_versions: list[Optional[int]],
503
+ base_versions: list[int | None],
391
504
  match_on_vmin: bool,
392
- where_clause: Optional[sql.ColumnElement[bool]],
505
+ where_clause: sql.ColumnElement[bool] | None,
393
506
  ) -> int:
394
507
  """Mark rows as deleted that are live and were created prior to current_version.
395
508
  Also: populate the undo columns
@@ -403,23 +516,23 @@ class StoreBase:
403
516
  number of deleted rows
404
517
  """
405
518
  where_clause = sql.true() if where_clause is None else where_clause
406
- where_clause = sql.and_(
407
- self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
408
- )
519
+ version_clause = sql.and_(self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION)
409
520
  rowid_join_clause = self._rowid_join_predicate()
410
521
  base_versions_clause = (
411
522
  sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
412
523
  )
413
- set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
524
+ set_clause: dict[sql.Column, int | sql.Column] = {self.v_max_col: current_version}
414
525
  for index_info in self.tbl_version.get().idxs_by_name.values():
415
526
  # copy value column to undo column
416
527
  set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
417
528
  # set value column to NULL
418
529
  set_clause[index_info.val_col.sa_col] = None
530
+
419
531
  stmt = (
420
532
  sql.update(self.sa_tbl)
421
533
  .values(set_clause)
422
534
  .where(where_clause)
535
+ .where(version_clause)
423
536
  .where(rowid_join_clause)
424
537
  .where(base_versions_clause)
425
538
  )
@@ -435,8 +548,7 @@ class StoreBase:
435
548
  *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), filter_view.rowid_columns())],
436
549
  )
437
550
  stmt = (
438
- sql.select('*') # TODO: Use a more specific list of columns?
439
- .select_from(self.sa_tbl)
551
+ sql.select(self.sa_tbl)
440
552
  .where(self.v_min_col <= version)
441
553
  .where(self.v_max_col > version)
442
554
  .where(sql.exists().where(filter_predicate))
@@ -500,26 +612,31 @@ class StoreComponentView(StoreView):
500
612
  PK: now also includes pos, the position returned by the ComponentIterator for the base row identified by base_rowid
501
613
  """
502
614
 
503
- rowid_cols: list[sql.Column]
504
- pos_col: sql.Column
505
- pos_col_idx: int
506
-
507
615
  def __init__(self, catalog_view: catalog.TableVersion):
508
616
  super().__init__(catalog_view)
509
617
 
510
618
  def _create_rowid_columns(self) -> list[sql.Column]:
511
619
  # each base row is expanded into n view rows
512
- self.rowid_cols = [sql.Column(c.name, c.type) for c in self.base.rowid_columns()]
620
+ rowid_cols = [sql.Column(c.name, c.type) for c in self.base.rowid_columns()]
513
621
  # name of pos column: avoid collisions with bases' pos columns
514
- self.pos_col = sql.Column(f'pos_{len(self.rowid_cols) - 1}', sql.BigInteger, nullable=False)
515
- self.pos_col_idx = len(self.rowid_cols)
516
- self.rowid_cols.append(self.pos_col)
517
- return self.rowid_cols
518
-
519
- def create_sa_tbl(self) -> None:
520
- super().create_sa_tbl()
622
+ pos_col = sql.Column(f'pos_{len(rowid_cols) - 1}', sql.BigInteger, nullable=False)
623
+ rowid_cols.append(pos_col)
624
+ return rowid_cols
625
+
626
+ @property
627
+ def pos_col(self) -> sql.Column:
628
+ return self.rowid_columns()[-1]
629
+
630
+ @property
631
+ def pos_col_idx(self) -> int:
632
+ return len(self.rowid_columns()) - 1
633
+
634
+ def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
635
+ if tbl_version is None:
636
+ tbl_version = self.tbl_version.get()
637
+ super().create_sa_tbl(tbl_version)
521
638
  # we need to fix up the 'pos' column in TableVersion
522
- self.tbl_version.get().cols_by_name['pos'].sa_col = self.pos_col
639
+ tbl_version.cols_by_name['pos'].sa_col = self.pos_col
523
640
 
524
641
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
525
642
  return sql.and_(