pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,109 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import TYPE_CHECKING
6
+ from uuid import UUID
7
+
8
+ from pixeltable import exceptions as excs
9
+
10
+ from .table_version import TableVersion, TableVersionKey
11
+
12
+ if TYPE_CHECKING:
13
+ from pixeltable.catalog import Column
14
+
15
+ _logger = logging.getLogger('pixeltable')
16
+
17
+
18
+ class TableVersionHandle:
19
+ """
20
+ Indirection mechanism for TableVersion instances, which get resolved against the catalog at runtime.
21
+
22
+ See the TableVersion docstring for details on the semantics of `effective_version` and `anchor_tbl_id`.
23
+ """
24
+
25
+ key: TableVersionKey
26
+ _tbl_version: TableVersion | None
27
+
28
+ def __init__(self, key: TableVersionKey, *, tbl_version: TableVersion | None = None):
29
+ self.key = key
30
+ self._tbl_version = tbl_version
31
+
32
+ def __eq__(self, other: object) -> bool:
33
+ if not isinstance(other, TableVersionHandle):
34
+ return False
35
+ return self.id == other.id and self.effective_version == other.effective_version
36
+
37
+ def __hash__(self) -> int:
38
+ return hash((self.id, self.effective_version))
39
+
40
+ def __repr__(self) -> str:
41
+ return (
42
+ f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version}, '
43
+ f'anchor_tbl_id={self.anchor_tbl_id})'
44
+ )
45
+
46
+ @property
47
+ def id(self) -> UUID:
48
+ return self.key.tbl_id
49
+
50
+ @property
51
+ def effective_version(self) -> int | None:
52
+ return self.key.effective_version
53
+
54
+ @property
55
+ def anchor_tbl_id(self) -> UUID | None:
56
+ return self.key.anchor_tbl_id
57
+
58
+ @property
59
+ def is_snapshot(self) -> bool:
60
+ return self.effective_version is not None
61
+
62
+ def get(self) -> TableVersion:
63
+ from .catalog import Catalog
64
+
65
+ cat = Catalog.get()
66
+ if self._tbl_version is None or not self._tbl_version.is_validated:
67
+ if self.effective_version is not None and self._tbl_version is not None:
68
+ # this is a snapshot version; we need to make sure we refer to the instance cached
69
+ # in Catalog, in order to avoid mixing sa_tbl instances in the same transaction
70
+ # (which will lead to duplicates in the From clause generated in SqlNode.create_from_clause())
71
+ assert self.key in cat._tbl_versions
72
+ self._tbl_version = cat._tbl_versions[self.key]
73
+ self._tbl_version.is_validated = True
74
+ else:
75
+ self._tbl_version = Catalog.get().get_tbl_version(self.key)
76
+ assert self._tbl_version.key == self.key
77
+ if self.effective_version is None:
78
+ tvs = list(Catalog.get()._tbl_versions.values())
79
+ assert self._tbl_version in tvs, self._tbl_version
80
+ return self._tbl_version
81
+
82
+ def as_dict(self) -> dict:
83
+ return self.key.as_dict()
84
+
85
+ @classmethod
86
+ def from_dict(cls, d: dict) -> TableVersionHandle:
87
+ return cls(TableVersionKey.from_dict(d))
88
+
89
+
90
+ @dataclass(frozen=True)
91
+ class ColumnHandle:
92
+ tbl_version: TableVersionHandle
93
+ col_id: int
94
+
95
+ def get(self) -> 'Column':
96
+ if self.col_id not in self.tbl_version.get().cols_by_id:
97
+ schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
98
+ raise excs.Error(
99
+ f'Column was dropped (no record for column ID {self.col_id} in table '
100
+ f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
101
+ )
102
+ return self.tbl_version.get().cols_by_id[self.col_id]
103
+
104
+ def as_dict(self) -> dict:
105
+ return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
106
+
107
+ @classmethod
108
+ def from_dict(cls, d: dict) -> ColumnHandle:
109
+ return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
@@ -1,14 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional, Union
5
4
  from uuid import UUID
6
5
 
7
- import pixeltable as pxt
8
- from pixeltable import exprs
6
+ from pixeltable.env import Env
7
+ from pixeltable.metadata import schema
9
8
 
10
9
  from .column import Column
11
- from .table_version import TableVersion
10
+ from .globals import MediaValidation
11
+ from .table_version import TableVersion, TableVersionKey
12
+ from .table_version_handle import TableVersionHandle
12
13
 
13
14
  _logger = logging.getLogger('pixeltable')
14
15
 
@@ -23,24 +24,101 @@ class TableVersionPath:
23
24
 
24
25
  TableVersionPath contains all metadata needed to execute queries and updates against a particular version of a
25
26
  table/view.
27
+
28
+ TableVersionPath supplies metadata needed for query construction (eg, column names), for which it uses a
29
+ cached TableVersion instance.
30
+ - when running inside a transaction, this instance is guaranteed to be validated
31
+ - when running outside a transaction, we use an unvalidated instance in order to avoid repeated validation
32
+ on every metadata-related method call (the instance won't stay validated, because TableVersionHandle.get()
33
+ runs a local transaction, at the end of which the instance is again invalidated)
34
+ - supplying metadata from an unvalidated instance is okay, because it needs to get revalidated anyway when a
35
+ query actually runs (at which point there is a transaction context) - there is no guarantee that in between
36
+ constructing a Query and executing it, the underlying table schema hasn't changed (eg, a concurrent process
37
+ could have dropped a column referenced in the query).
26
38
  """
27
39
 
28
- def __init__(self, tbl_version: TableVersion, base: Optional[TableVersionPath] = None):
40
+ tbl_version: TableVersionHandle
41
+ base: TableVersionPath | None
42
+ _cached_tbl_version: TableVersion | None
43
+
44
+ def __init__(self, tbl_version: TableVersionHandle, base: TableVersionPath | None = None):
29
45
  assert tbl_version is not None
30
46
  self.tbl_version = tbl_version
31
47
  self.base = base
48
+ self._cached_tbl_version = None
49
+
50
+ if self.base is not None and tbl_version.anchor_tbl_id is not None:
51
+ self.base = self.base.anchor_to(tbl_version.anchor_tbl_id)
52
+
53
+ @classmethod
54
+ def from_md(cls, path: schema.TableVersionPath) -> TableVersionPath:
55
+ assert len(path) > 0
56
+ result: TableVersionPath | None = None
57
+ for tbl_id_str, effective_version in path[::-1]:
58
+ tbl_id = UUID(tbl_id_str)
59
+ key = TableVersionKey(tbl_id, effective_version, None)
60
+ result = TableVersionPath(TableVersionHandle(key), base=result)
61
+ return result
62
+
63
+ def as_md(self) -> schema.TableVersionPath:
64
+ result = [(self.tbl_version.id.hex, self.tbl_version.effective_version)]
65
+ if self.base is not None:
66
+ result.extend(self.base.as_md())
67
+ return result
68
+
69
+ def refresh_cached_md(self) -> None:
70
+ from pixeltable.catalog import Catalog
71
+
72
+ if Env.get().in_xact:
73
+ # when we're running inside a transaction, we need to make sure to supply current metadata;
74
+ # mixing stale metadata with current metadata leads to query construction failures
75
+ # (multiple sqlalchemy Table instances for the same underlying table create corrupted From clauses)
76
+ if self._cached_tbl_version is not None and self._cached_tbl_version.is_validated:
77
+ # nothing to refresh
78
+ return
79
+ elif self._cached_tbl_version is not None:
80
+ return
81
+
82
+ with Catalog.get().begin_xact(tbl_id=self.tbl_version.id, for_write=False):
83
+ self._cached_tbl_version = self.tbl_version.get()
84
+
85
+ def anchor_to(self, anchor_tbl_id: UUID | None) -> TableVersionPath:
86
+ """
87
+ Return a new TableVersionPath with all of its non-snapshot TableVersions pointing to the given anchor_tbl_id.
88
+ (This will clear the existing anchor_tbl_id in the case anchor_tbl_id=None.)
89
+ """
90
+ if self.tbl_version.effective_version is not None:
91
+ return self
32
92
 
93
+ return TableVersionPath(
94
+ TableVersionHandle(TableVersionKey(self.tbl_version.id, None, anchor_tbl_id)),
95
+ base=self.base.anchor_to(anchor_tbl_id) if self.base is not None else None,
96
+ )
97
+
98
+ def clear_cached_md(self) -> None:
99
+ self._cached_tbl_version = None
100
+ if self.base is not None:
101
+ self.base.clear_cached_md()
102
+
103
+ @property
33
104
  def tbl_id(self) -> UUID:
34
105
  """Return the id of the table/view that this path represents"""
35
106
  return self.tbl_version.id
36
107
 
37
108
  def version(self) -> int:
38
109
  """Return the version of the table/view that this path represents"""
39
- return self.tbl_version.version
110
+ self.refresh_cached_md()
111
+ return self._cached_tbl_version.version
112
+
113
+ def schema_version(self) -> int:
114
+ """Return the version of the table/view that this path represents"""
115
+ self.refresh_cached_md()
116
+ return self._cached_tbl_version.schema_version
40
117
 
41
118
  def tbl_name(self) -> str:
42
119
  """Return the name of the table/view that this path represents"""
43
- return self.tbl_version.name
120
+ self.refresh_cached_md()
121
+ return self._cached_tbl_version.name
44
122
 
45
123
  def path_len(self) -> int:
46
124
  """Return the length of the path"""
@@ -48,32 +126,53 @@ class TableVersionPath:
48
126
 
49
127
  def is_snapshot(self) -> bool:
50
128
  """Return True if this is a path of snapshot versions"""
51
- if not self.tbl_version.is_snapshot:
52
- return False
53
- return self.base.is_snapshot() if self.base is not None else True
129
+ return self.tbl_version.is_snapshot
54
130
 
55
131
  def is_view(self) -> bool:
56
- return self.tbl_version.is_view()
132
+ self.refresh_cached_md()
133
+ return self._cached_tbl_version.is_view
57
134
 
58
135
  def is_component_view(self) -> bool:
59
- return self.tbl_version.is_component_view()
136
+ self.refresh_cached_md()
137
+ return self._cached_tbl_version.is_component_view
138
+
139
+ def is_replica(self) -> bool:
140
+ self.refresh_cached_md()
141
+ return self._cached_tbl_version.is_replica
142
+
143
+ def is_mutable(self) -> bool:
144
+ self.refresh_cached_md()
145
+ return self._cached_tbl_version.is_mutable
60
146
 
61
147
  def is_insertable(self) -> bool:
62
- return self.tbl_version.is_insertable()
148
+ self.refresh_cached_md()
149
+ return self._cached_tbl_version.is_insertable
63
150
 
64
- def get_tbl_versions(self) -> list[TableVersion]:
151
+ def comment(self) -> str:
152
+ self.refresh_cached_md()
153
+ return self._cached_tbl_version.comment
154
+
155
+ def num_retained_versions(self) -> int:
156
+ self.refresh_cached_md()
157
+ return self._cached_tbl_version.num_retained_versions
158
+
159
+ def media_validation(self) -> MediaValidation:
160
+ self.refresh_cached_md()
161
+ return self._cached_tbl_version.media_validation
162
+
163
+ def get_tbl_versions(self) -> list[TableVersionHandle]:
65
164
  """Return all tbl versions"""
66
165
  if self.base is None:
67
166
  return [self.tbl_version]
68
- return [self.tbl_version] + self.base.get_tbl_versions()
167
+ return [self.tbl_version, *self.base.get_tbl_versions()]
69
168
 
70
- def get_bases(self) -> list[TableVersion]:
169
+ def get_bases(self) -> list[TableVersionHandle]:
71
170
  """Return all tbl versions"""
72
171
  if self.base is None:
73
172
  return []
74
173
  return self.base.get_tbl_versions()
75
174
 
76
- def find_tbl_version(self, id: UUID) -> Optional[TableVersion]:
175
+ def find_tbl_version(self, id: UUID) -> TableVersionHandle | None:
77
176
  """Return the matching TableVersion in the chain of TableVersions, starting with this one"""
78
177
  if self.tbl_version.id == id:
79
178
  return self.tbl_version
@@ -81,23 +180,14 @@ class TableVersionPath:
81
180
  return None
82
181
  return self.base.find_tbl_version(id)
83
182
 
84
- def get_column_ref(self, col_name: str) -> exprs.ColumnRef:
85
- """Return a ColumnRef for the given column name."""
86
- from pixeltable.exprs import ColumnRef
87
- if col_name not in self.tbl_version.cols_by_name:
88
- if self.base is None:
89
- raise AttributeError(f'Column {col_name} unknown')
90
- return self.base.get_column_ref(col_name)
91
- col = self.tbl_version.cols_by_name[col_name]
92
- return ColumnRef(col)
93
-
94
183
  def columns(self) -> list[Column]:
95
184
  """Return all user columns visible in this tbl version path, including columns from bases"""
96
- result = list(self.tbl_version.cols_by_name.values())
97
- if self.base is not None:
185
+ self.refresh_cached_md()
186
+ result = list(self._cached_tbl_version.cols_by_name.values())
187
+ if self.base is not None and self._cached_tbl_version.include_base_columns:
98
188
  base_cols = self.base.columns()
99
189
  # we only include base columns that don't conflict with one of our column names
100
- result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
190
+ result.extend(c for c in base_cols if c.name not in self._cached_tbl_version.cols_by_name)
101
191
  return result
102
192
 
103
193
  def cols_by_name(self) -> dict[str, Column]:
@@ -110,25 +200,30 @@ class TableVersionPath:
110
200
  cols = self.columns()
111
201
  return {col.id: col for col in cols}
112
202
 
113
- def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
203
+ def get_column(self, name: str) -> Column | None:
114
204
  """Return the column with the given name, or None if not found"""
115
- col = self.tbl_version.cols_by_name.get(name)
205
+ self.refresh_cached_md()
206
+ col = self._cached_tbl_version.cols_by_name.get(name)
116
207
  if col is not None:
117
208
  return col
118
- elif self.base is not None and include_bases:
209
+ elif self.base is not None and self._cached_tbl_version.include_base_columns:
119
210
  return self.base.get_column(name)
120
211
  else:
121
212
  return None
122
213
 
123
- def has_column(self, col: Column, include_bases: bool = True) -> bool:
124
- """Return True if this table has the given column.
125
- """
126
- assert col.tbl is not None
127
- if col.tbl.id == self.tbl_version.id and col.tbl.effective_version == self.tbl_version.effective_version \
128
- and col.id in self.tbl_version.cols_by_id:
214
+ def has_column(self, col: Column) -> bool:
215
+ """Return True if this table has the given column."""
216
+ assert col.get_tbl() is not None
217
+ self.refresh_cached_md()
218
+
219
+ if (
220
+ col.get_tbl().id == self.tbl_version.id
221
+ and col.get_tbl().effective_version == self.tbl_version.effective_version
222
+ and col.id in self._cached_tbl_version.cols_by_id
223
+ ):
129
224
  # the column is visible in this table version
130
225
  return True
131
- elif self.base is not None and include_bases:
226
+ elif self.base is not None:
132
227
  return self.base.has_column(col)
133
228
  else:
134
229
  return False
@@ -136,11 +231,11 @@ class TableVersionPath:
136
231
  def as_dict(self) -> dict:
137
232
  return {
138
233
  'tbl_version': self.tbl_version.as_dict(),
139
- 'base': self.base.as_dict() if self.base is not None else None
234
+ 'base': self.base.as_dict() if self.base is not None else None,
140
235
  }
141
236
 
142
237
  @classmethod
143
238
  def from_dict(cls, d: dict) -> TableVersionPath:
144
- tbl_version = TableVersion.from_dict(d['tbl_version'])
239
+ tbl_version = TableVersionHandle.from_dict(d['tbl_version'])
145
240
  base = TableVersionPath.from_dict(d['base']) if d['base'] is not None else None
146
241
  return cls(tbl_version, base)
@@ -0,0 +1,53 @@
1
+ # This file contains all dataclasses related to schema.PendingTableOp:
2
+ # - TableOp: the container for each log entry
3
+ # - <>Op: the actual operation, which is performed by TableVersion.exec_op(); each <>Op class contains
4
+ # enough information for exec_op() to perform the operation without having to reference data outside of
5
+ # TableVersion
6
+
7
+ import dataclasses
8
+ from typing import Any
9
+
10
+
11
+ @dataclasses.dataclass
12
+ class CreateStoreTableOp:
13
+ pass
14
+
15
+
16
+ @dataclasses.dataclass
17
+ class CreateIndexOp:
18
+ idx_id: int
19
+
20
+
21
+ @dataclasses.dataclass
22
+ class LoadViewOp:
23
+ view_path: dict[str, Any] # needed to create the view load plan
24
+
25
+
26
+ @dataclasses.dataclass
27
+ class DeleteTableMdOp:
28
+ pass
29
+
30
+
31
+ @dataclasses.dataclass
32
+ class DeleteTableMediaFilesOp:
33
+ pass
34
+
35
+
36
+ @dataclasses.dataclass
37
+ class DropStoreTableOp:
38
+ pass
39
+
40
+
41
+ @dataclasses.dataclass
42
+ class TableOp:
43
+ tbl_id: str # uuid.UUID
44
+ op_sn: int # sequence number within the update operation; [0, num_ops)
45
+ num_ops: int # total number of ops forming the update operation
46
+ needs_xact: bool # if True, op must be run as part of a transaction
47
+
48
+ create_store_table_op: CreateStoreTableOp | None = None
49
+ create_index_op: CreateIndexOp | None = None
50
+ load_view_op: LoadViewOp | None = None
51
+ delete_table_md_op: DeleteTableMdOp | None = None
52
+ delete_table_media_files_op: DeleteTableMediaFilesOp | None = None
53
+ drop_store_table_op: DropStoreTableOp | None = None
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from IPython.lib.pretty import RepresentationPrinter
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class RowCountStats:
12
+ """
13
+ Statistics about the counts of rows affected by a table operation.
14
+ """
15
+
16
+ ins_rows: int = 0 # rows inserted
17
+ del_rows: int = 0 # rows deleted
18
+ upd_rows: int = 0 # rows updated
19
+ num_excs: int = 0 # total number of exceptions
20
+ # TODO: disambiguate what this means: # of slots computed or # of columns computed?
21
+ computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
22
+
23
+ @property
24
+ def num_rows(self) -> int:
25
+ return self.ins_rows + self.del_rows + self.upd_rows
26
+
27
+ def insert_to_update(self) -> 'RowCountStats':
28
+ """
29
+ Convert insert row count stats to update row count stats.
30
+ This is used when an insert operation is treated as an update.
31
+ """
32
+ return RowCountStats(
33
+ ins_rows=0,
34
+ del_rows=self.del_rows,
35
+ upd_rows=self.upd_rows + self.ins_rows,
36
+ num_excs=self.num_excs,
37
+ computed_values=self.computed_values,
38
+ )
39
+
40
+ def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
41
+ """
42
+ Add the stats from two RowCountStats objects together.
43
+ """
44
+ return RowCountStats(
45
+ ins_rows=self.ins_rows + other.ins_rows,
46
+ del_rows=self.del_rows + other.del_rows,
47
+ upd_rows=self.upd_rows + other.upd_rows,
48
+ num_excs=self.num_excs + other.num_excs,
49
+ computed_values=self.computed_values + other.computed_values,
50
+ )
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class UpdateStatus:
55
+ """
56
+ Information about changes to table data or table schema
57
+ """
58
+
59
+ updated_cols: list[str] = field(default_factory=list)
60
+ """Columns that were updated."""
61
+ cols_with_excs: list[str] = field(default_factory=list)
62
+ """Columns that encountered exceptions."""
63
+
64
+ # stats for the rows affected by the operation
65
+ row_count_stats: RowCountStats = field(default_factory=RowCountStats)
66
+ """Row count statistics for rows affected by this operation."""
67
+
68
+ # stats for changes cascaded to other tables
69
+ cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
70
+ """Row count statistics for changes cascaded to other tables."""
71
+
72
+ # stats for the rows affected by the operation in an external store
73
+ ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
74
+ """Row count statistics for rows affected in an external store."""
75
+
76
+ @property
77
+ def num_rows(self) -> int:
78
+ """Total number of rows affected (including cascaded changes)."""
79
+ return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
80
+
81
+ @property
82
+ def num_excs(self) -> int:
83
+ """Total number of exceptions encountered (including cascaded changes)."""
84
+ return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
85
+
86
+ @property
87
+ def num_computed_values(self) -> int:
88
+ """Total number of computed values affected (including cascaded changes)."""
89
+ return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
90
+
91
+ def insert_to_update(self) -> 'UpdateStatus':
92
+ """
93
+ Convert the update status from an insert operation to an update operation.
94
+ This is used when an insert operation is treated as an update.
95
+ """
96
+ return UpdateStatus(
97
+ updated_cols=self.updated_cols,
98
+ cols_with_excs=self.cols_with_excs,
99
+ row_count_stats=self.row_count_stats.insert_to_update(),
100
+ cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
101
+ ext_row_count_stats=self.ext_row_count_stats,
102
+ )
103
+
104
+ def to_cascade(self) -> 'UpdateStatus':
105
+ """
106
+ Convert the update status to a cascade update status.
107
+ This is used when an operation cascades changes to other tables.
108
+ """
109
+ return UpdateStatus(
110
+ updated_cols=self.updated_cols,
111
+ cols_with_excs=self.cols_with_excs,
112
+ row_count_stats=RowCountStats(),
113
+ cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
114
+ ext_row_count_stats=self.ext_row_count_stats,
115
+ )
116
+
117
+ def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
118
+ """
119
+ Add the update status from two UpdateStatus objects together.
120
+ """
121
+ return UpdateStatus(
122
+ updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
123
+ cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
124
+ row_count_stats=self.row_count_stats + other.row_count_stats,
125
+ cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
126
+ ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
127
+ )
128
+
129
+ @property
130
+ def insert_msg(self) -> str:
131
+ """A message describing the results of an insert operation."""
132
+ if self.num_excs == 0:
133
+ cols_with_excs_str = ''
134
+ else:
135
+ cols_with_excs_str = (
136
+ f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
137
+ )
138
+ cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
139
+ msg = (
140
+ f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
141
+ f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
142
+ )
143
+ return msg
144
+
145
+ @classmethod
146
+ def __cnt_str(cls, cnt: int, item: str) -> str:
147
+ assert cnt > 0
148
+ return f'{cnt} {item}{"" if cnt == 1 else "s"}'
149
+
150
+ def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
151
+ messages = []
152
+ # Combine row count stats and cascade row count stats
153
+ stats = self.row_count_stats + self.cascade_row_count_stats
154
+ if stats.ins_rows > 0:
155
+ messages.append(f'{self.__cnt_str(stats.ins_rows, "row")} inserted')
156
+ if stats.del_rows > 0:
157
+ messages.append(f'{self.__cnt_str(stats.del_rows, "row")} deleted')
158
+ if stats.upd_rows > 0:
159
+ messages.append(f'{self.__cnt_str(stats.upd_rows, "row")} updated')
160
+ if stats.computed_values > 0:
161
+ messages.append(f'{self.__cnt_str(stats.computed_values, "value")} computed')
162
+ if stats.num_excs > 0:
163
+ messages.append(self.__cnt_str(stats.num_excs, 'exception'))
164
+ p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
165
+
166
+ @property
167
+ def pxt_rows_updated(self) -> int:
168
+ """
169
+ Returns the number of Pixeltable rows that were updated as a result of the operation.
170
+ """
171
+ return (self.row_count_stats + self.cascade_row_count_stats).upd_rows
172
+
173
+ @property
174
+ def external_rows_updated(self) -> int:
175
+ """Number of rows updated in an external store."""
176
+ return self.ext_row_count_stats.upd_rows
177
+
178
+ @property
179
+ def external_rows_created(self) -> int:
180
+ """Number of rows created in an external store."""
181
+ return self.ext_row_count_stats.ins_rows
182
+
183
+ @property
184
+ def external_rows_deleted(self) -> int:
185
+ """Number of rows deleted from an external store."""
186
+ return self.ext_row_count_stats.del_rows
187
+
188
+ @property
189
+ def ext_num_rows(self) -> int:
190
+ """Total number of rows affected in an external store."""
191
+ return self.ext_row_count_stats.num_rows