pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,13 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import TYPE_CHECKING, Optional
4
+ from dataclasses import dataclass
5
+ from typing import TYPE_CHECKING
5
6
  from uuid import UUID
6
7
 
7
- from .table_version import TableVersion
8
+ from pixeltable import exceptions as excs
9
+
10
+ from .table_version import TableVersion, TableVersionKey
8
11
 
9
12
  if TYPE_CHECKING:
10
- pass
13
+ from pixeltable.catalog import Column
11
14
 
12
15
  _logger = logging.getLogger('pixeltable')
13
16
 
@@ -15,15 +18,15 @@ _logger = logging.getLogger('pixeltable')
15
18
  class TableVersionHandle:
16
19
  """
17
20
  Indirection mechanism for TableVersion instances, which get resolved against the catalog at runtime.
21
+
22
+ See the TableVersion docstring for details on the semantics of `effective_version` and `anchor_tbl_id`.
18
23
  """
19
24
 
20
- id: UUID
21
- effective_version: Optional[int]
22
- _tbl_version: Optional[TableVersion]
25
+ key: TableVersionKey
26
+ _tbl_version: TableVersion | None
23
27
 
24
- def __init__(self, tbl_id: UUID, effective_version: Optional[int], tbl_version: Optional[TableVersion] = None):
25
- self.id = tbl_id
26
- self.effective_version = effective_version
28
+ def __init__(self, key: TableVersionKey, *, tbl_version: TableVersion | None = None):
29
+ self.key = key
27
30
  self._tbl_version = tbl_version
28
31
 
29
32
  def __eq__(self, other: object) -> bool:
@@ -34,20 +37,73 @@ class TableVersionHandle:
34
37
  def __hash__(self) -> int:
35
38
  return hash((self.id, self.effective_version))
36
39
 
37
- @classmethod
38
- def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
39
- return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
40
+ def __repr__(self) -> str:
41
+ return (
42
+ f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version}, '
43
+ f'anchor_tbl_id={self.anchor_tbl_id})'
44
+ )
45
+
46
+ @property
47
+ def id(self) -> UUID:
48
+ return self.key.tbl_id
49
+
50
+ @property
51
+ def effective_version(self) -> int | None:
52
+ return self.key.effective_version
53
+
54
+ @property
55
+ def anchor_tbl_id(self) -> UUID | None:
56
+ return self.key.anchor_tbl_id
57
+
58
+ @property
59
+ def is_snapshot(self) -> bool:
60
+ return self.effective_version is not None
40
61
 
41
62
  def get(self) -> TableVersion:
42
63
  from .catalog import Catalog
43
64
 
44
- if self._tbl_version is None:
45
- self._tbl_version = Catalog.get().get_tbl_version(self.id, self.effective_version)
65
+ cat = Catalog.get()
66
+ if self._tbl_version is None or not self._tbl_version.is_validated:
67
+ if self.effective_version is not None and self._tbl_version is not None:
68
+ # this is a snapshot version; we need to make sure we refer to the instance cached
69
+ # in Catalog, in order to avoid mixing sa_tbl instances in the same transaction
70
+ # (which will lead to duplicates in the From clause generated in SqlNode.create_from_clause())
71
+ assert self.key in cat._tbl_versions
72
+ self._tbl_version = cat._tbl_versions[self.key]
73
+ self._tbl_version.is_validated = True
74
+ else:
75
+ self._tbl_version = Catalog.get().get_tbl_version(self.key)
76
+ assert self._tbl_version.key == self.key
77
+ if self.effective_version is None:
78
+ tvs = list(Catalog.get()._tbl_versions.values())
79
+ assert self._tbl_version in tvs, self._tbl_version
46
80
  return self._tbl_version
47
81
 
48
82
  def as_dict(self) -> dict:
49
- return {'id': str(self.id), 'effective_version': self.effective_version}
83
+ return self.key.as_dict()
50
84
 
51
85
  @classmethod
52
86
  def from_dict(cls, d: dict) -> TableVersionHandle:
53
- return cls(UUID(d['id']), d['effective_version'])
87
+ return cls(TableVersionKey.from_dict(d))
88
+
89
+
90
+ @dataclass(frozen=True)
91
+ class ColumnHandle:
92
+ tbl_version: TableVersionHandle
93
+ col_id: int
94
+
95
+ def get(self) -> 'Column':
96
+ if self.col_id not in self.tbl_version.get().cols_by_id:
97
+ schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
98
+ raise excs.Error(
99
+ f'Column was dropped (no record for column ID {self.col_id} in table '
100
+ f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
101
+ )
102
+ return self.tbl_version.get().cols_by_id[self.col_id]
103
+
104
+ def as_dict(self) -> dict:
105
+ return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
106
+
107
+ @classmethod
108
+ def from_dict(cls, d: dict) -> ColumnHandle:
109
+ return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
@@ -1,12 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional
5
4
  from uuid import UUID
6
5
 
6
+ from pixeltable.env import Env
7
7
  from pixeltable.metadata import schema
8
8
 
9
9
  from .column import Column
10
+ from .globals import MediaValidation
11
+ from .table_version import TableVersion, TableVersionKey
10
12
  from .table_version_handle import TableVersionHandle
11
13
 
12
14
  _logger = logging.getLogger('pixeltable')
@@ -22,23 +24,40 @@ class TableVersionPath:
22
24
 
23
25
  TableVersionPath contains all metadata needed to execute queries and updates against a particular version of a
24
26
  table/view.
27
+
28
+ TableVersionPath supplies metadata needed for query construction (eg, column names), for which it uses a
29
+ cached TableVersion instance.
30
+ - when running inside a transaction, this instance is guaranteed to be validated
31
+ - when running outside a transaction, we use an unvalidated instance in order to avoid repeated validation
32
+ on every metadata-related method call (the instance won't stay validated, because TableVersionHandle.get()
33
+ runs a local transaction, at the end of which the instance is again invalidated)
34
+ - supplying metadata from an unvalidated instance is okay, because it needs to get revalidated anyway when a
35
+ query actually runs (at which point there is a transaction context) - there is no guarantee that in between
36
+ constructing a Query and executing it, the underlying table schema hasn't changed (eg, a concurrent process
37
+ could have dropped a column referenced in the query).
25
38
  """
26
39
 
27
40
  tbl_version: TableVersionHandle
28
- base: Optional[TableVersionPath]
41
+ base: TableVersionPath | None
42
+ _cached_tbl_version: TableVersion | None
29
43
 
30
- def __init__(self, tbl_version: TableVersionHandle, base: Optional[TableVersionPath] = None):
44
+ def __init__(self, tbl_version: TableVersionHandle, base: TableVersionPath | None = None):
31
45
  assert tbl_version is not None
32
46
  self.tbl_version = tbl_version
33
47
  self.base = base
48
+ self._cached_tbl_version = None
49
+
50
+ if self.base is not None and tbl_version.anchor_tbl_id is not None:
51
+ self.base = self.base.anchor_to(tbl_version.anchor_tbl_id)
34
52
 
35
53
  @classmethod
36
54
  def from_md(cls, path: schema.TableVersionPath) -> TableVersionPath:
37
55
  assert len(path) > 0
38
- result: Optional[TableVersionPath] = None
56
+ result: TableVersionPath | None = None
39
57
  for tbl_id_str, effective_version in path[::-1]:
40
58
  tbl_id = UUID(tbl_id_str)
41
- result = TableVersionPath(TableVersionHandle(tbl_id, effective_version), base=result)
59
+ key = TableVersionKey(tbl_id, effective_version, None)
60
+ result = TableVersionPath(TableVersionHandle(key), base=result)
42
61
  return result
43
62
 
44
63
  def as_md(self) -> schema.TableVersionPath:
@@ -47,17 +66,59 @@ class TableVersionPath:
47
66
  result.extend(self.base.as_md())
48
67
  return result
49
68
 
69
+ def refresh_cached_md(self) -> None:
70
+ from pixeltable.catalog import Catalog
71
+
72
+ if Env.get().in_xact:
73
+ # when we're running inside a transaction, we need to make sure to supply current metadata;
74
+ # mixing stale metadata with current metadata leads to query construction failures
75
+ # (multiple sqlalchemy Table instances for the same underlying table create corrupted From clauses)
76
+ if self._cached_tbl_version is not None and self._cached_tbl_version.is_validated:
77
+ # nothing to refresh
78
+ return
79
+ elif self._cached_tbl_version is not None:
80
+ return
81
+
82
+ with Catalog.get().begin_xact(tbl_id=self.tbl_version.id, for_write=False):
83
+ self._cached_tbl_version = self.tbl_version.get()
84
+
85
+ def anchor_to(self, anchor_tbl_id: UUID | None) -> TableVersionPath:
86
+ """
87
+ Return a new TableVersionPath with all of its non-snapshot TableVersions pointing to the given anchor_tbl_id.
88
+ (This will clear the existing anchor_tbl_id in the case anchor_tbl_id=None.)
89
+ """
90
+ if self.tbl_version.effective_version is not None:
91
+ return self
92
+
93
+ return TableVersionPath(
94
+ TableVersionHandle(TableVersionKey(self.tbl_version.id, None, anchor_tbl_id)),
95
+ base=self.base.anchor_to(anchor_tbl_id) if self.base is not None else None,
96
+ )
97
+
98
+ def clear_cached_md(self) -> None:
99
+ self._cached_tbl_version = None
100
+ if self.base is not None:
101
+ self.base.clear_cached_md()
102
+
103
+ @property
50
104
  def tbl_id(self) -> UUID:
51
105
  """Return the id of the table/view that this path represents"""
52
106
  return self.tbl_version.id
53
107
 
54
108
  def version(self) -> int:
55
109
  """Return the version of the table/view that this path represents"""
56
- return self.tbl_version.get().version
110
+ self.refresh_cached_md()
111
+ return self._cached_tbl_version.version
112
+
113
+ def schema_version(self) -> int:
114
+ """Return the version of the table/view that this path represents"""
115
+ self.refresh_cached_md()
116
+ return self._cached_tbl_version.schema_version
57
117
 
58
118
  def tbl_name(self) -> str:
59
119
  """Return the name of the table/view that this path represents"""
60
- return self.tbl_version.get().name
120
+ self.refresh_cached_md()
121
+ return self._cached_tbl_version.name
61
122
 
62
123
  def path_len(self) -> int:
63
124
  """Return the length of the path"""
@@ -65,18 +126,39 @@ class TableVersionPath:
65
126
 
66
127
  def is_snapshot(self) -> bool:
67
128
  """Return True if this is a path of snapshot versions"""
68
- if not self.tbl_version.get().is_snapshot:
69
- return False
70
- return self.base.is_snapshot() if self.base is not None else True
129
+ return self.tbl_version.is_snapshot
71
130
 
72
131
  def is_view(self) -> bool:
73
- return self.tbl_version.get().is_view
132
+ self.refresh_cached_md()
133
+ return self._cached_tbl_version.is_view
74
134
 
75
135
  def is_component_view(self) -> bool:
76
- return self.tbl_version.get().is_component_view
136
+ self.refresh_cached_md()
137
+ return self._cached_tbl_version.is_component_view
138
+
139
+ def is_replica(self) -> bool:
140
+ self.refresh_cached_md()
141
+ return self._cached_tbl_version.is_replica
142
+
143
+ def is_mutable(self) -> bool:
144
+ self.refresh_cached_md()
145
+ return self._cached_tbl_version.is_mutable
77
146
 
78
147
  def is_insertable(self) -> bool:
79
- return self.tbl_version.get().is_insertable()
148
+ self.refresh_cached_md()
149
+ return self._cached_tbl_version.is_insertable
150
+
151
+ def comment(self) -> str:
152
+ self.refresh_cached_md()
153
+ return self._cached_tbl_version.comment
154
+
155
+ def num_retained_versions(self) -> int:
156
+ self.refresh_cached_md()
157
+ return self._cached_tbl_version.num_retained_versions
158
+
159
+ def media_validation(self) -> MediaValidation:
160
+ self.refresh_cached_md()
161
+ return self._cached_tbl_version.media_validation
80
162
 
81
163
  def get_tbl_versions(self) -> list[TableVersionHandle]:
82
164
  """Return all tbl versions"""
@@ -90,7 +172,7 @@ class TableVersionPath:
90
172
  return []
91
173
  return self.base.get_tbl_versions()
92
174
 
93
- def find_tbl_version(self, id: UUID) -> Optional[TableVersionHandle]:
175
+ def find_tbl_version(self, id: UUID) -> TableVersionHandle | None:
94
176
  """Return the matching TableVersion in the chain of TableVersions, starting with this one"""
95
177
  if self.tbl_version.id == id:
96
178
  return self.tbl_version
@@ -98,20 +180,14 @@ class TableVersionPath:
98
180
  return None
99
181
  return self.base.find_tbl_version(id)
100
182
 
101
- @property
102
- def ancestor_paths(self) -> list[TableVersionPath]:
103
- if self.base is None:
104
- return [self]
105
- else:
106
- return [self, *self.base.ancestor_paths]
107
-
108
183
  def columns(self) -> list[Column]:
109
184
  """Return all user columns visible in this tbl version path, including columns from bases"""
110
- result = list(self.tbl_version.get().cols_by_name.values())
111
- if self.base is not None and self.tbl_version.get().include_base_columns:
185
+ self.refresh_cached_md()
186
+ result = list(self._cached_tbl_version.cols_by_name.values())
187
+ if self.base is not None and self._cached_tbl_version.include_base_columns:
112
188
  base_cols = self.base.columns()
113
189
  # we only include base columns that don't conflict with one of our column names
114
- result.extend(c for c in base_cols if c.name not in self.tbl_version.get().cols_by_name)
190
+ result.extend(c for c in base_cols if c.name not in self._cached_tbl_version.cols_by_name)
115
191
  return result
116
192
 
117
193
  def cols_by_name(self) -> dict[str, Column]:
@@ -124,37 +200,30 @@ class TableVersionPath:
124
200
  cols = self.columns()
125
201
  return {col.id: col for col in cols}
126
202
 
127
- def get_column(self, name: str, include_bases: Optional[bool] = None) -> Optional[Column]:
203
+ def get_column(self, name: str) -> Column | None:
128
204
  """Return the column with the given name, or None if not found"""
129
- col = self.tbl_version.get().cols_by_name.get(name)
205
+ self.refresh_cached_md()
206
+ col = self._cached_tbl_version.cols_by_name.get(name)
130
207
  if col is not None:
131
208
  return col
132
- elif self.base is not None and (include_bases or self.tbl_version.get().include_base_columns):
209
+ elif self.base is not None and self._cached_tbl_version.include_base_columns:
133
210
  return self.base.get_column(name)
134
211
  else:
135
212
  return None
136
213
 
137
- def get_column_by_id(self, tbl_id: UUID, col_id: int) -> Optional[Column]:
138
- """Return the column for the given tbl/col id"""
139
- if self.tbl_version.id == tbl_id:
140
- assert col_id in self.tbl_version.get().cols_by_id
141
- return self.tbl_version.get().cols_by_id[col_id]
142
- elif self.base is not None:
143
- return self.base.get_column_by_id(tbl_id, col_id)
144
- else:
145
- return None
146
-
147
- def has_column(self, col: Column, include_bases: bool = True) -> bool:
214
+ def has_column(self, col: Column) -> bool:
148
215
  """Return True if this table has the given column."""
149
- assert col.tbl is not None
216
+ assert col.get_tbl() is not None
217
+ self.refresh_cached_md()
218
+
150
219
  if (
151
- col.tbl.id == self.tbl_version.id
152
- and col.tbl.effective_version == self.tbl_version.effective_version
153
- and col.id in self.tbl_version.get().cols_by_id
220
+ col.get_tbl().id == self.tbl_version.id
221
+ and col.get_tbl().effective_version == self.tbl_version.effective_version
222
+ and col.id in self._cached_tbl_version.cols_by_id
154
223
  ):
155
224
  # the column is visible in this table version
156
225
  return True
157
- elif self.base is not None and include_bases:
226
+ elif self.base is not None:
158
227
  return self.base.has_column(col)
159
228
  else:
160
229
  return False
@@ -0,0 +1,53 @@
1
+ # This file contains all dataclasses related to schema.PendingTableOp:
2
+ # - TableOp: the container for each log entry
3
+ # - <>Op: the actual operation, which is performed by TableVersion.exec_op(); each <>Op class contains
4
+ # enough information for exec_op() to perform the operation without having to reference data outside of
5
+ # TableVersion
6
+
7
+ import dataclasses
8
+ from typing import Any
9
+
10
+
11
+ @dataclasses.dataclass
12
+ class CreateStoreTableOp:
13
+ pass
14
+
15
+
16
+ @dataclasses.dataclass
17
+ class CreateIndexOp:
18
+ idx_id: int
19
+
20
+
21
+ @dataclasses.dataclass
22
+ class LoadViewOp:
23
+ view_path: dict[str, Any] # needed to create the view load plan
24
+
25
+
26
+ @dataclasses.dataclass
27
+ class DeleteTableMdOp:
28
+ pass
29
+
30
+
31
+ @dataclasses.dataclass
32
+ class DeleteTableMediaFilesOp:
33
+ pass
34
+
35
+
36
+ @dataclasses.dataclass
37
+ class DropStoreTableOp:
38
+ pass
39
+
40
+
41
+ @dataclasses.dataclass
42
+ class TableOp:
43
+ tbl_id: str # uuid.UUID
44
+ op_sn: int # sequence number within the update operation; [0, num_ops)
45
+ num_ops: int # total number of ops forming the update operation
46
+ needs_xact: bool # if True, op must be run as part of a transaction
47
+
48
+ create_store_table_op: CreateStoreTableOp | None = None
49
+ create_index_op: CreateIndexOp | None = None
50
+ load_view_op: LoadViewOp | None = None
51
+ delete_table_md_op: DeleteTableMdOp | None = None
52
+ delete_table_media_files_op: DeleteTableMediaFilesOp | None = None
53
+ drop_store_table_op: DropStoreTableOp | None = None
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from IPython.lib.pretty import RepresentationPrinter
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class RowCountStats:
12
+ """
13
+ Statistics about the counts of rows affected by a table operation.
14
+ """
15
+
16
+ ins_rows: int = 0 # rows inserted
17
+ del_rows: int = 0 # rows deleted
18
+ upd_rows: int = 0 # rows updated
19
+ num_excs: int = 0 # total number of exceptions
20
+ # TODO: disambiguate what this means: # of slots computed or # of columns computed?
21
+ computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
22
+
23
+ @property
24
+ def num_rows(self) -> int:
25
+ return self.ins_rows + self.del_rows + self.upd_rows
26
+
27
+ def insert_to_update(self) -> 'RowCountStats':
28
+ """
29
+ Convert insert row count stats to update row count stats.
30
+ This is used when an insert operation is treated as an update.
31
+ """
32
+ return RowCountStats(
33
+ ins_rows=0,
34
+ del_rows=self.del_rows,
35
+ upd_rows=self.upd_rows + self.ins_rows,
36
+ num_excs=self.num_excs,
37
+ computed_values=self.computed_values,
38
+ )
39
+
40
+ def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
41
+ """
42
+ Add the stats from two RowCountStats objects together.
43
+ """
44
+ return RowCountStats(
45
+ ins_rows=self.ins_rows + other.ins_rows,
46
+ del_rows=self.del_rows + other.del_rows,
47
+ upd_rows=self.upd_rows + other.upd_rows,
48
+ num_excs=self.num_excs + other.num_excs,
49
+ computed_values=self.computed_values + other.computed_values,
50
+ )
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class UpdateStatus:
55
+ """
56
+ Information about changes to table data or table schema
57
+ """
58
+
59
+ updated_cols: list[str] = field(default_factory=list)
60
+ """Columns that were updated."""
61
+ cols_with_excs: list[str] = field(default_factory=list)
62
+ """Columns that encountered exceptions."""
63
+
64
+ # stats for the rows affected by the operation
65
+ row_count_stats: RowCountStats = field(default_factory=RowCountStats)
66
+ """Row count statistics for rows affected by this operation."""
67
+
68
+ # stats for changes cascaded to other tables
69
+ cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
70
+ """Row count statistics for changes cascaded to other tables."""
71
+
72
+ # stats for the rows affected by the operation in an external store
73
+ ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
74
+ """Row count statistics for rows affected in an external store."""
75
+
76
+ @property
77
+ def num_rows(self) -> int:
78
+ """Total number of rows affected (including cascaded changes)."""
79
+ return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
80
+
81
+ @property
82
+ def num_excs(self) -> int:
83
+ """Total number of exceptions encountered (including cascaded changes)."""
84
+ return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
85
+
86
+ @property
87
+ def num_computed_values(self) -> int:
88
+ """Total number of computed values affected (including cascaded changes)."""
89
+ return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
90
+
91
+ def insert_to_update(self) -> 'UpdateStatus':
92
+ """
93
+ Convert the update status from an insert operation to an update operation.
94
+ This is used when an insert operation is treated as an update.
95
+ """
96
+ return UpdateStatus(
97
+ updated_cols=self.updated_cols,
98
+ cols_with_excs=self.cols_with_excs,
99
+ row_count_stats=self.row_count_stats.insert_to_update(),
100
+ cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
101
+ ext_row_count_stats=self.ext_row_count_stats,
102
+ )
103
+
104
+ def to_cascade(self) -> 'UpdateStatus':
105
+ """
106
+ Convert the update status to a cascade update status.
107
+ This is used when an operation cascades changes to other tables.
108
+ """
109
+ return UpdateStatus(
110
+ updated_cols=self.updated_cols,
111
+ cols_with_excs=self.cols_with_excs,
112
+ row_count_stats=RowCountStats(),
113
+ cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
114
+ ext_row_count_stats=self.ext_row_count_stats,
115
+ )
116
+
117
+ def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
118
+ """
119
+ Add the update status from two UpdateStatus objects together.
120
+ """
121
+ return UpdateStatus(
122
+ updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
123
+ cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
124
+ row_count_stats=self.row_count_stats + other.row_count_stats,
125
+ cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
126
+ ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
127
+ )
128
+
129
+ @property
130
+ def insert_msg(self) -> str:
131
+ """A message describing the results of an insert operation."""
132
+ if self.num_excs == 0:
133
+ cols_with_excs_str = ''
134
+ else:
135
+ cols_with_excs_str = (
136
+ f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
137
+ )
138
+ cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
139
+ msg = (
140
+ f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
141
+ f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
142
+ )
143
+ return msg
144
+
145
+ @classmethod
146
+ def __cnt_str(cls, cnt: int, item: str) -> str:
147
+ assert cnt > 0
148
+ return f'{cnt} {item}{"" if cnt == 1 else "s"}'
149
+
150
+ def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
151
+ messages = []
152
+ # Combine row count stats and cascade row count stats
153
+ stats = self.row_count_stats + self.cascade_row_count_stats
154
+ if stats.ins_rows > 0:
155
+ messages.append(f'{self.__cnt_str(stats.ins_rows, "row")} inserted')
156
+ if stats.del_rows > 0:
157
+ messages.append(f'{self.__cnt_str(stats.del_rows, "row")} deleted')
158
+ if stats.upd_rows > 0:
159
+ messages.append(f'{self.__cnt_str(stats.upd_rows, "row")} updated')
160
+ if stats.computed_values > 0:
161
+ messages.append(f'{self.__cnt_str(stats.computed_values, "value")} computed')
162
+ if stats.num_excs > 0:
163
+ messages.append(self.__cnt_str(stats.num_excs, 'exception'))
164
+ p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
165
+
166
+ @property
167
+ def pxt_rows_updated(self) -> int:
168
+ """
169
+ Returns the number of Pixeltable rows that were updated as a result of the operation.
170
+ """
171
+ return (self.row_count_stats + self.cascade_row_count_stats).upd_rows
172
+
173
+ @property
174
+ def external_rows_updated(self) -> int:
175
+ """Number of rows updated in an external store."""
176
+ return self.ext_row_count_stats.upd_rows
177
+
178
+ @property
179
+ def external_rows_created(self) -> int:
180
+ """Number of rows created in an external store."""
181
+ return self.ext_row_count_stats.ins_rows
182
+
183
+ @property
184
+ def external_rows_deleted(self) -> int:
185
+ """Number of rows deleted from an external store."""
186
+ return self.ext_row_count_stats.del_rows
187
+
188
+ @property
189
+ def ext_num_rows(self) -> int:
190
+ """Total number of rows affected in an external store."""
191
+ return self.ext_row_count_stats.num_rows