pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,21 +1,25 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import dataclasses
3
4
  import sys
4
5
  import time
5
- from dataclasses import dataclass
6
- from typing import Any, Iterable, Optional, Sequence
6
+ from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Sequence, TypeVar
7
7
  from uuid import UUID
8
8
 
9
+ import numpy as np
9
10
  import sqlalchemy as sql
10
11
 
11
- import pixeltable.catalog as catalog
12
- import pixeltable.exceptions as excs
13
- import pixeltable.func as func
14
- import pixeltable.utils as utils
12
+ from pixeltable import catalog, exceptions as excs, exprs, utils
13
+ from pixeltable.env import Env
14
+ from pixeltable.utils.misc import non_none_dict_factory
15
+
15
16
  from .data_row import DataRow
16
- from .expr import Expr
17
+ from .expr import Expr, ExprScope
17
18
  from .expr_set import ExprSet
18
19
 
20
+ if TYPE_CHECKING:
21
+ from .column_ref import ColumnRef
22
+
19
23
 
20
24
  class ExecProfile:
21
25
  def __init__(self, row_builder: RowBuilder):
@@ -30,14 +34,16 @@ class ExecProfile:
30
34
  per_call_time = self.eval_time[i] / self.eval_count[i]
31
35
  calls_per_row = self.eval_count[i] / num_rows
32
36
  multiple_str = f'({calls_per_row}x)' if calls_per_row > 1 else ''
33
- print(f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}')
37
+ Env.get().console_logger.info(
38
+ f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}'
39
+ )
34
40
 
35
41
 
36
- @dataclass
37
- class ColumnSlotIdx:
42
+ class ColumnSlotIdx(NamedTuple):
38
43
  """Info for how to locate materialized column in DataRow
39
44
  TODO: can this be integrated into RowBuilder directly?
40
45
  """
46
+
41
47
  col: catalog.Column
42
48
  slot_idx: int
43
49
 
@@ -47,7 +53,14 @@ class RowBuilder:
47
53
 
48
54
  For ColumnRefs to unstored iterator columns:
49
55
  - in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
56
+
57
+ Args:
58
+ output_exprs: list of Exprs to be evaluated
59
+ columns: list of columns to be materialized
60
+ input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
61
+ TODO: enforce that output_exprs doesn't overlap with input_exprs?
50
62
  """
63
+
51
64
  unique_exprs: ExprSet
52
65
  next_slot_idx: int
53
66
  input_expr_slot_idxs: set[int]
@@ -59,61 +72,84 @@ class RowBuilder:
59
72
 
60
73
  input_exprs: ExprSet
61
74
 
62
- table_columns: list[ColumnSlotIdx]
75
+ tbl: catalog.TableVersion | None # reference table of the RowBuilder; used to identify pk columns for writes
76
+ for_view_load: bool # True if this RowBuilder represents a view load
77
+
78
+ table_columns: dict[catalog.Column, int | None] # value: slot idx, if the result of an expr
63
79
  default_eval_ctx: EvalCtx
64
80
  unstored_iter_args: dict[UUID, Expr]
81
+ unstored_iter_outputs: dict[UUID, list['ColumnRef']]
65
82
 
66
83
  # transitive dependents for the purpose of exception propagation: an exception for slot i is propagated to
67
84
  # _exc_dependents[i]
68
85
  # (list of set of slot_idxs, indexed by slot_idx)
69
86
  _exc_dependents: list[set[int]]
70
87
 
88
+ # dependents[i] = direct dependents of expr with slot idx i; dependents[i, j] == True: expr j depends on expr i
89
+ dependents: np.ndarray # of bool
90
+ transitive_dependents: np.ndarray # of bool
91
+ # dependencies[i] = direct dependencies of expr with slot idx i; transpose of dependents
92
+ dependencies: np.ndarray # of bool
93
+ # num_dependencies[i] = number of direct dependencies of expr with slot idx i
94
+ num_dependencies: np.ndarray # of int
95
+
71
96
  # records the output_expr that a subexpr belongs to
72
97
  # (a subexpr can be shared across multiple output exprs)
73
98
  output_expr_ids: list[set[int]]
74
99
 
75
- @dataclass
100
+ img_slot_idxs: list[int] # Indices of image slots
101
+ media_slot_idxs: list[int] # Indices of non-image media slots
102
+ array_slot_idxs: list[int] # Indices of array slots
103
+ json_slot_idxs: list[int] # Indices of json slots
104
+
105
+ @dataclasses.dataclass
76
106
  class EvalCtx:
77
107
  """Context for evaluating a set of target exprs"""
108
+
78
109
  slot_idxs: list[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
79
110
  exprs: list[Expr] # exprs corresponding to slot_idxs
80
111
  target_slot_idxs: list[int] # slot idxs of target exprs; might contain duplicates
81
112
  target_exprs: list[Expr] # exprs corresponding to target_slot_idxs
82
113
 
83
114
  def __init__(
84
- self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
115
+ self,
116
+ output_exprs: Sequence[Expr],
117
+ columns: Sequence[catalog.Column],
118
+ input_exprs: Iterable[Expr],
119
+ tbl: catalog.TableVersion | None = None,
120
+ for_view_load: bool = False,
85
121
  ):
86
- """
87
- Args:
88
- output_exprs: list of Exprs to be evaluated
89
- columns: list of columns to be materialized
90
- input_exprs: list of Exprs that are excluded from evaluation (because they're already materialized)
91
- TODO: enforce that output_exprs doesn't overlap with input_exprs?
92
- """
122
+ from .column_property_ref import ColumnPropertyRef
123
+ from .column_ref import ColumnRef
124
+
93
125
  self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
94
126
  self.next_slot_idx = 0
95
127
 
96
- # record input and output exprs; make copies to avoid reusing execution state
128
+ # record input exprs; make copies to avoid reusing execution state
97
129
  unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
130
+
98
131
  self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
99
132
 
100
133
  resolve_cols = set(columns)
101
- self.output_exprs = ExprSet([
134
+ self.output_exprs = ExprSet(
102
135
  self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
103
136
  for e in output_exprs
104
- ])
137
+ )
105
138
 
106
139
  # if init(columns):
107
- # - we are creating table rows and need to record columns for create_table_row()
140
+ # - we are creating table rows and need to record columns for create_store_table_row()
108
141
  # - output_exprs materialize those columns
109
142
  # - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
110
143
  # - media validation:
111
144
  # * for write-validated columns, we need to create validating ColumnRefs
112
145
  # * further references to that column (eg, computed cols) need to resolve to the validating ColumnRef
113
- from .column_ref import ColumnRef
114
- self.table_columns: list[ColumnSlotIdx] = []
146
+
147
+ self.for_view_load = for_view_load
148
+ self.tbl = tbl
149
+ self.table_columns = {}
115
150
  self.input_exprs = ExprSet()
116
151
  validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
152
+
117
153
  for col in columns:
118
154
  expr: Expr
119
155
  if col.is_computed:
@@ -125,7 +161,8 @@ class RowBuilder:
125
161
  else:
126
162
  # record a ColumnRef so that references to this column resolve to the same slot idx
127
163
  perform_validation = (
128
- None if not col.col_type.is_media_type()
164
+ None
165
+ if not col.col_type.is_media_type()
129
166
  else col.media_validation == catalog.MediaValidation.ON_WRITE
130
167
  )
131
168
  expr = ColumnRef(col, perform_validation=perform_validation)
@@ -153,22 +190,39 @@ class RowBuilder:
153
190
  # because that would cause them to be evaluated for every single row
154
191
  # - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
155
192
  # iterated object changes
193
+
156
194
  col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
157
195
 
158
196
  def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
159
- tbl = col_ref.col.tbl
160
- return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
197
+ tbl = col_ref.col.get_tbl()
198
+ return tbl.is_component_view and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
161
199
 
162
200
  unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
163
- component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
201
+ component_views = [col_ref.col.get_tbl() for col_ref in unstored_iter_col_refs]
164
202
  unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
203
+
204
+ # the *stored* output columns of the unstored iterators
205
+ self.unstored_iter_outputs = {
206
+ view.id: [
207
+ self._record_unique_expr(ColumnRef(col), recursive=True)
208
+ for col in view.iterator_columns()
209
+ if col.is_stored
210
+ ]
211
+ for view in component_views
212
+ }
213
+
165
214
  self.unstored_iter_args = {
166
- id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
215
+ id: self._record_unique_expr(args, recursive=True) for id, args in unstored_iter_args.items()
167
216
  }
168
217
 
218
+ unstored_iter_col_refs = [
219
+ self._record_unique_expr(col_ref, recursive=True) for col_ref in unstored_iter_col_refs
220
+ ]
221
+
169
222
  for col_ref in unstored_iter_col_refs:
170
- iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
171
- col_ref.set_iter_arg_ctx(iter_arg_ctx)
223
+ iter_arg_ctx = self.create_eval_ctx([self.unstored_iter_args[col_ref.col.get_tbl().id]])
224
+ iter_outputs = self.unstored_iter_outputs[col_ref.col.get_tbl().id]
225
+ col_ref.set_iter_arg_ctx(iter_arg_ctx, iter_outputs)
172
226
 
173
227
  # we guarantee that we can compute the expr DAG in a single front-to-back pass
174
228
  for i, expr in enumerate(self.unique_exprs):
@@ -176,19 +230,31 @@ class RowBuilder:
176
230
 
177
231
  # determine transitive dependencies for the purpose of exception propagation
178
232
  # (list of set of slot_idxs, indexed by slot_idx)
233
+ # self.dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
234
+ self.dependencies = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
179
235
  exc_dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)]
180
- from .column_property_ref import ColumnPropertyRef
236
+
181
237
  for expr in self.unique_exprs:
182
238
  if expr.slot_idx in self.input_expr_slot_idxs:
183
239
  # this is input and therefore doesn't depend on other exprs
184
240
  continue
185
241
  # error properties don't have exceptions themselves
186
- if isinstance(expr, ColumnPropertyRef) and expr.is_error_prop():
242
+ if isinstance(expr, ColumnPropertyRef) and expr.is_cellmd_prop():
187
243
  continue
244
+ dependency_idxs = [d.slot_idx for d in expr.dependencies()]
245
+ self.dependencies[expr.slot_idx, dependency_idxs] = True
188
246
  for d in expr.dependencies():
189
247
  exc_dependencies[expr.slot_idx].add(d.slot_idx)
190
248
  exc_dependencies[expr.slot_idx].update(exc_dependencies[d.slot_idx])
191
249
 
250
+ self.num_dependencies = np.sum(self.dependencies, axis=1)
251
+ self.dependents = self.dependencies.T
252
+ self.transitive_dependents = np.zeros((self.num_materialized, self.num_materialized), dtype=bool)
253
+ for i in reversed(range(self.num_materialized)):
254
+ self.transitive_dependents[i] = self.dependents[i] | np.any(
255
+ self.transitive_dependents[self.dependents[i]], axis=0
256
+ )
257
+
192
258
  self._exc_dependents = [set() for _ in range(self.num_materialized)]
193
259
  for expr in self.unique_exprs:
194
260
  assert expr.slot_idx is not None
@@ -199,19 +265,32 @@ class RowBuilder:
199
265
  for e in self.output_exprs:
200
266
  self._record_output_expr_id(e, e.slot_idx)
201
267
 
268
+ self.img_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_image_type()]
269
+ self.media_slot_idxs = [
270
+ e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
271
+ ]
272
+ self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
273
+ self.json_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_json_type()]
274
+
202
275
  def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
203
- """Record a column that is part of the table row"""
204
- self.table_columns.append(ColumnSlotIdx(col, slot_idx))
276
+ """Record an output column for which the value is produced via expr evaluation"""
277
+ assert self.tbl is not None
278
+ assert col.is_stored
279
+ self.table_columns[col] = slot_idx
205
280
 
206
- def output_slot_idxs(self) -> list[ColumnSlotIdx]:
207
- """Return ColumnSlotIdx for output columns"""
208
- return self.table_columns
281
+ def add_table_columns(self, cols: list[catalog.Column]) -> None:
282
+ """Record output columns whose values are materialized into DataRow.cell_vals"""
283
+ for col in cols:
284
+ self.table_columns[col] = None
209
285
 
210
- def set_conn(self, conn: sql.engine.Connection) -> None:
211
- from .function_call import FunctionCall
212
- for expr in self.unique_exprs:
213
- if isinstance(expr, FunctionCall) and isinstance(expr.fn, func.QueryTemplateFunction):
214
- expr.fn.set_conn(conn)
286
+ @property
287
+ def media_output_col_info(self) -> list[ColumnSlotIdx]:
288
+ """Return slot idxs for media output columns whose values are produced by expr evaluation"""
289
+ return [
290
+ ColumnSlotIdx(col, slot_idx)
291
+ for col, slot_idx in self.table_columns.items()
292
+ if col.col_type.is_media_type() and slot_idx is not None
293
+ ]
215
294
 
216
295
  @property
217
296
  def num_materialized(self) -> int:
@@ -226,7 +305,9 @@ class RowBuilder:
226
305
  self.next_slot_idx += 1
227
306
  return result
228
307
 
229
- def _record_unique_expr(self, expr: Expr, recursive: bool) -> Expr:
308
+ T = TypeVar('T', bound=Expr)
309
+
310
+ def _record_unique_expr(self, expr: T, recursive: bool) -> T:
230
311
  """Records the expr if it's not a duplicate and assigns a slot idx to expr and its components"
231
312
  Returns:
232
313
  the unique expr
@@ -254,8 +335,14 @@ class RowBuilder:
254
335
  for d in e.dependencies():
255
336
  self._record_output_expr_id(d, output_expr_id)
256
337
 
257
- def _compute_dependencies(self, target_slot_idxs: list[int], excluded_slot_idxs: list[int]) -> list[int]:
258
- """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
338
+ def _compute_dependencies(
339
+ self, target_slot_idxs: list[int], excluded_slot_idxs: list[int], target_scope: ExprScope | None = None
340
+ ) -> list[int]:
341
+ """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'
342
+
343
+ If target_scope != None, stops transitive dependency resolution when leaving target_scope (ie, includes
344
+ immediate dependents that aren't in target_scope, but doesn't resolve those).
345
+ """
259
346
  dependencies: list[set[int]] = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
260
347
  # doing this front-to-back ensures that we capture transitive dependencies
261
348
  max_target_slot_idx = max(target_slot_idxs)
@@ -268,7 +355,11 @@ class RowBuilder:
268
355
  if expr.slot_idx in self.input_expr_slot_idxs:
269
356
  # this is input and therefore doesn't depend on other exprs
270
357
  continue
358
+ if target_scope is not None and expr.scope() != target_scope:
359
+ # don't resolve dependencies outside of target_scope
360
+ continue
271
361
  for d in expr.dependencies():
362
+ assert d.slot_idx is not None, f'{expr}, {d}'
272
363
  if d.slot_idx in excluded_slot_idxs:
273
364
  continue
274
365
  dependencies[expr.slot_idx].add(d.slot_idx)
@@ -298,10 +389,15 @@ class RowBuilder:
298
389
  for c in e.components:
299
390
  self.__set_slot_idxs_aux(c)
300
391
 
301
- def get_dependencies(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> list[Expr]:
392
+ def get_dependencies(
393
+ self, targets: Iterable[Expr], exclude: Iterable[Expr] | None = None, limit_scope: bool = True
394
+ ) -> list[Expr]:
302
395
  """
303
396
  Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
304
397
  The exprs given in 'exclude' are excluded.
398
+ If limit_scope == True, only returns dependencies in the same scope and immediate (ie, not transitive)
399
+ dependencies from enclosing scopes.
400
+
305
401
  Returns:
306
402
  list of Exprs from unique_exprs (= with slot_idx set)
307
403
  """
@@ -312,38 +408,56 @@ class RowBuilder:
312
408
  return []
313
409
  # make sure we only refer to recorded exprs
314
410
  targets = [self.unique_exprs[e] for e in targets]
411
+ target_scope: ExprScope | None = None
412
+ if limit_scope:
413
+ # make sure all targets are from the same scope
414
+ target_scopes = {e.scope() for e in targets}
415
+ assert len(target_scopes) == 1
416
+ target_scope = target_scopes.pop()
315
417
  exclude = [self.unique_exprs[e] for e in exclude]
316
418
  target_slot_idxs = [e.slot_idx for e in targets]
317
419
  excluded_slot_idxs = [e.slot_idx for e in exclude]
318
- all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
420
+ all_dependencies = set(
421
+ self._compute_dependencies(target_slot_idxs, excluded_slot_idxs, target_scope=target_scope)
422
+ )
319
423
  all_dependencies.update(target_slot_idxs)
320
424
  result_ids = list(all_dependencies)
321
425
  result_ids.sort()
322
426
  return [self.unique_exprs[id] for id in result_ids]
323
427
 
324
- def create_eval_ctx(self, targets: Iterable[Expr], exclude: Optional[Iterable[Expr]] = None) -> EvalCtx:
428
+ def create_eval_ctx(
429
+ self, targets: Iterable[Expr], exclude: Iterable[Expr] | None = None, limit_scope: bool = True
430
+ ) -> EvalCtx:
325
431
  """Return EvalCtx for targets"""
326
432
  targets = list(targets)
327
433
  if exclude is None:
328
434
  exclude = []
329
435
  if len(targets) == 0:
330
436
  return self.EvalCtx([], [], [], [])
331
- dependencies = self.get_dependencies(targets, exclude)
437
+ dependencies = self.get_dependencies(targets, exclude, limit_scope=limit_scope)
332
438
  targets = [self.unique_exprs[e] for e in targets]
333
439
  target_slot_idxs = [e.slot_idx for e in targets]
334
440
  ctx_slot_idxs = [e.slot_idx for e in dependencies]
335
441
  return self.EvalCtx(
336
- slot_idxs=ctx_slot_idxs, exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
337
- target_slot_idxs=target_slot_idxs, target_exprs=targets)
442
+ slot_idxs=ctx_slot_idxs,
443
+ exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
444
+ target_slot_idxs=target_slot_idxs,
445
+ target_exprs=targets,
446
+ )
338
447
 
339
448
  def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
340
449
  """Record an exception in data_row and propagate it to dependents"""
341
450
  data_row.set_exc(slot_idx, exc)
342
- for slot_idx in self._exc_dependents[slot_idx]:
343
- data_row.set_exc(slot_idx, exc)
451
+ for idx in self._exc_dependents[slot_idx]:
452
+ data_row.set_exc(idx, exc)
344
453
 
345
454
  def eval(
346
- self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
455
+ self,
456
+ data_row: DataRow,
457
+ ctx: EvalCtx,
458
+ profile: ExecProfile | None = None,
459
+ ignore_errors: bool = False,
460
+ force_eval: ExprScope | None = None,
347
461
  ) -> None:
348
462
  """
349
463
  Populates the slots in data_row given in ctx.
@@ -351,10 +465,11 @@ class RowBuilder:
351
465
  and omits any of that expr's dependents's eval().
352
466
  profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
353
467
  ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
468
+ force_eval: forces exprs in the specified scope to be reevaluated, even if they already have a value
354
469
  """
355
470
  for expr in ctx.exprs:
356
471
  assert expr.slot_idx >= 0
357
- if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
472
+ if expr.scope() != force_eval and (data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx)):
358
473
  continue
359
474
  try:
360
475
  start_time = time.perf_counter()
@@ -368,31 +483,79 @@ class RowBuilder:
368
483
  if not ignore_errors:
369
484
  input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
370
485
  raise excs.ExprEvalError(
371
- expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
486
+ expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
487
+ ) from exc
372
488
 
373
- def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
374
- """Create a table row from the slots that have an output column assigned
489
+ def create_store_table_row(
490
+ self, data_row: DataRow, cols_with_excs: set[int] | None, pk: tuple[int, ...]
491
+ ) -> tuple[list[Any], int]:
492
+ """Create a store table row from the slots that have an output column assigned
375
493
 
376
- Return tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
494
+ Return tuple[list of row values in `self.table_columns` order, # of exceptions]
377
495
  This excludes system columns.
496
+ Row values are converted to their store type.
378
497
  """
498
+ from pixeltable.exprs.column_property_ref import ColumnPropertyRef
499
+
379
500
  num_excs = 0
380
- table_row: dict[str, Any] = {}
381
- for info in self.table_columns:
382
- col, slot_idx = info.col, info.slot_idx
501
+ table_row: list[Any] = list(pk)
502
+ # Nulls in JSONB columns need to be stored as sql.sql.null(), otherwise it stores a json 'null'
503
+ for col, slot_idx in self.table_columns.items():
504
+ if col.id in data_row.cell_vals:
505
+ table_row.append(data_row.cell_vals[col.id])
506
+ if col.stores_cellmd:
507
+ if data_row.cell_md[col.id] is None:
508
+ table_row.append(sql.sql.null())
509
+ else:
510
+ # we want to minimize the size of the stored dict and use dict_factory to remove Nones
511
+ md = dataclasses.asdict(data_row.cell_md[col.id], dict_factory=non_none_dict_factory)
512
+ assert len(md) > 0
513
+ table_row.append(md)
514
+ if slot_idx is not None and data_row.has_exc(slot_idx):
515
+ num_excs += 1
516
+ if cols_with_excs is not None:
517
+ cols_with_excs.add(col.id)
518
+ continue
519
+
383
520
  if data_row.has_exc(slot_idx):
384
- # exceptions get stored in the errortype/-msg columns
385
521
  exc = data_row.get_exc(slot_idx)
386
522
  num_excs += 1
387
- exc_col_ids.add(col.id)
388
- table_row[col.store_name()] = None
389
- table_row[col.errortype_store_name()] = type(exc).__name__
390
- table_row[col.errormsg_store_name()] = str(exc)
523
+ if cols_with_excs is not None:
524
+ cols_with_excs.add(col.id)
525
+ table_row.append(sql.sql.null() if col.col_type.is_json_type() else None)
526
+ if col.stores_cellmd:
527
+ # exceptions get stored in the errortype/-msg properties of the cellmd column
528
+ table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
391
529
  else:
392
- val = data_row.get_stored_val(slot_idx, col.sa_col.type)
393
- table_row[col.store_name()] = val
394
- # we unfortunately need to set these, even if there are no errors
395
- table_row[col.errortype_store_name()] = None
396
- table_row[col.errormsg_store_name()] = None
530
+ val = data_row.get_stored_val(slot_idx, col.sa_col_type)
531
+ table_row.append(val)
532
+ if col.stores_cellmd:
533
+ table_row.append(sql.sql.null()) # placeholder for cellmd column
397
534
 
398
535
  return table_row, num_excs
536
+
537
+ def store_column_names(self) -> list[str]:
538
+ """
539
+ Returns the list of store column names corresponding to the table_columns of this RowBuilder.
540
+ The second tuple element of the return value is a dictionary containing all media columns in the
541
+ table; it's the mapping {list_index: column}.
542
+ """
543
+ assert self.tbl is not None, self.table_columns
544
+ store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
545
+
546
+ for col in self.table_columns:
547
+ store_col_names.append(col.store_name())
548
+ if col.stores_cellmd:
549
+ store_col_names.append(col.cellmd_store_name())
550
+
551
+ return store_col_names
552
+
553
+ def make_row(self) -> exprs.DataRow:
554
+ """Creates a new DataRow with the current row_builder's configuration."""
555
+ return exprs.DataRow(
556
+ size=self.num_materialized,
557
+ img_slot_idxs=self.img_slot_idxs,
558
+ media_slot_idxs=self.media_slot_idxs,
559
+ array_slot_idxs=self.array_slot_idxs,
560
+ json_slot_idxs=self.json_slot_idxs,
561
+ )
@@ -1,18 +1,21 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ import logging
4
+ from typing import Any, cast
4
5
  from uuid import UUID
5
6
 
6
7
  import sqlalchemy as sql
7
8
 
8
- import pixeltable.catalog as catalog
9
- import pixeltable.type_system as ts
9
+ from pixeltable import catalog, type_system as ts
10
+ from pixeltable.catalog.table_version import TableVersionKey
10
11
 
11
12
  from .data_row import DataRow
12
13
  from .expr import Expr
13
14
  from .row_builder import RowBuilder
14
15
  from .sql_element_cache import SqlElementCache
15
16
 
17
+ _logger = logging.getLogger('pixeltable')
18
+
16
19
 
17
20
  class RowidRef(Expr):
18
21
  """A reference to a part of a table rowid
@@ -22,9 +25,20 @@ class RowidRef(Expr):
22
25
  _from_dict()/init() is called, which is why this class effectively has two separate paths for construction
23
26
  (with and without a TableVersion).
24
27
  """
28
+
29
+ tbl: catalog.TableVersionHandle | None
30
+ normalized_base: catalog.TableVersionHandle | None
31
+ tbl_id: UUID
32
+ normalized_base_id: UUID
33
+ rowid_component_idx: int
34
+
25
35
  def __init__(
26
- self, tbl: catalog.TableVersion, idx: int,
27
- tbl_id: Optional[UUID] = None, normalized_base_id: Optional[UUID] = None):
36
+ self,
37
+ tbl: catalog.TableVersionHandle | None,
38
+ idx: int,
39
+ tbl_id: UUID | None = None,
40
+ normalized_base_id: UUID | None = None,
41
+ ):
28
42
  super().__init__(ts.IntType(nullable=False))
29
43
  self.tbl = tbl
30
44
  if tbl is not None:
@@ -32,8 +46,8 @@ class RowidRef(Expr):
32
46
  # (which has the same values as all its descendent views)
33
47
  normalized_base = tbl
34
48
  # don't try to reference tbl.store_tbl here
35
- while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
36
- normalized_base = normalized_base.base
49
+ while normalized_base.get().base is not None and normalized_base.get().base.get().num_rowid_columns() > idx:
50
+ normalized_base = normalized_base.get().base
37
51
  self.normalized_base = normalized_base
38
52
  else:
39
53
  self.normalized_base = None
@@ -44,21 +58,35 @@ class RowidRef(Expr):
44
58
  self.rowid_component_idx = idx
45
59
  self.id = self._create_id()
46
60
 
47
- def default_column_name(self) -> Optional[str]:
61
+ def default_column_name(self) -> str | None:
48
62
  return str(self)
49
63
 
50
64
  def _equals(self, other: RowidRef) -> bool:
51
- return self.normalized_base_id == other.normalized_base_id \
65
+ return (
66
+ self.normalized_base_id == other.normalized_base_id
52
67
  and self.rowid_component_idx == other.rowid_component_idx
68
+ )
53
69
 
54
70
  def _id_attrs(self) -> list[tuple[str, Any]]:
55
- return super()._id_attrs() +\
56
- [('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
71
+ return [
72
+ *super()._id_attrs(),
73
+ ('normalized_base_id', self.normalized_base_id),
74
+ ('idx', self.rowid_component_idx),
75
+ ]
57
76
 
58
77
  def __repr__(self) -> str:
59
78
  # check if this is the pos column of a component view
60
- tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
61
- if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
79
+ from pixeltable import store
80
+
81
+ tbl = (
82
+ self.tbl.get()
83
+ if self.tbl is not None
84
+ else catalog.Catalog.get().get_tbl_version(TableVersionKey(self.tbl_id, None, None))
85
+ )
86
+ if (
87
+ tbl.is_component_view
88
+ and self.rowid_component_idx == cast(store.StoreComponentView, tbl.store_tbl).pos_col_idx
89
+ ):
62
90
  return catalog.globals._POS_COLUMN_NAME
63
91
  return ''
64
92
 
@@ -75,15 +103,25 @@ class RowidRef(Expr):
75
103
  self.tbl = tbl.tbl_version
76
104
  self.tbl_id = self.tbl.id
77
105
 
78
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
79
- tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
106
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
107
+ tbl = (
108
+ self.tbl.get()
109
+ if self.tbl is not None
110
+ else catalog.Catalog.get().get_tbl_version(TableVersionKey(self.tbl_id, None, None))
111
+ )
112
+ assert tbl.is_validated
80
113
  rowid_cols = tbl.store_tbl.rowid_columns()
114
+ assert self.rowid_component_idx <= len(rowid_cols), (
115
+ f'{self.rowid_component_idx} not consistent with {rowid_cols}'
116
+ )
81
117
  return rowid_cols[self.rowid_component_idx]
82
118
 
83
119
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
84
120
  data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
85
121
 
86
122
  def _as_dict(self) -> dict:
123
+ # TODO: Serialize the full TableVersionHandle, not just the UUID
124
+ assert self.tbl is None or self.tbl.anchor_tbl_id is None # TODO: support anchor_tbl_id for view-over-replica
87
125
  return {
88
126
  'tbl_id': str(self.tbl_id),
89
127
  'normalized_base_id': str(self.normalized_base_id),