pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.1.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.1.dist-info/METADATA +119 -0
  124. pixeltable-0.2.1.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,355 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple, Set, Iterable
3
+ from dataclasses import dataclass
4
+ import time
5
+ import sys
6
+
7
+ from .expr import Expr
8
+ from .expr_set import ExprSet
9
+ from .data_row import DataRow
10
+ import pixeltable.utils as utils
11
+ import pixeltable.func as func
12
+ import pixeltable.exceptions as excs
13
+ import pixeltable.catalog as catalog
14
+
15
+
16
+ class ExecProfile:
17
+ def __init__(self, row_builder: RowBuilder):
18
+ self.eval_time = [0.0] * row_builder.num_materialized
19
+ self.eval_count = [0] * row_builder.num_materialized
20
+ self.row_builder = row_builder
21
+
22
+ def print(self, num_rows: int) -> str:
23
+ for i in range(self.row_builder.num_materialized):
24
+ if self.eval_count[i] == 0:
25
+ continue
26
+ per_call_time = self.eval_time[i] / self.eval_count[i]
27
+ calls_per_row = self.eval_count[i] / num_rows
28
+ multiple_str = f'({calls_per_row}x)' if calls_per_row > 1 else ''
29
+ print(f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}')
30
+
31
+
32
+ @dataclass
33
+ class ColumnSlotIdx:
34
+ """Info for how to locate materialized column in DataRow
35
+ TODO: can this be integrated into RowBuilder directly?
36
+ """
37
+ col: catalog.Column
38
+ slot_idx: int
39
+
40
+
41
+ class RowBuilder:
42
+ """Create and populate DataRows and table rows from exprs and computed columns
43
+
44
+ For ColumnRefs to unstored iterator columns:
45
+ - in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
46
+ """
47
+
48
+ @dataclass
49
+ class EvalCtx:
50
+ """Context for evaluating a set of target exprs"""
51
+ slot_idxs: List[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
52
+ exprs: List[Expr] # exprs corresponding to slot_idxs
53
+ target_slot_idxs: List[int] # slot idxs of target exprs; might contain duplicates
54
+ target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
55
+
56
+ def __init__(
57
+ self, output_exprs: List[Expr], columns: List[catalog.Column],
58
+ indices: List[Tuple[catalog.Column, func.Function]], input_exprs: List[Expr]
59
+ ):
60
+ """
61
+ Args:
62
+ output_exprs: list of Exprs to be evaluated
63
+ columns: list of columns to be materialized
64
+ indices: list of embeddings to be materialized (Tuple[indexed column, embedding function])
65
+ """
66
+ self.unique_exprs = ExprSet() # dependencies precede their dependents
67
+ self.next_slot_idx = 0
68
+
69
+ # record input and output exprs; make copies to avoid reusing execution state
70
+ unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
71
+ self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
72
+
73
+ # output exprs: all exprs the caller wants to materialize
74
+ # - explicitly requested output_exprs
75
+ # - values for computed columns
76
+ # - embedding values for indices
77
+ resolve_cols = set(columns)
78
+ self.output_exprs = [
79
+ self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
80
+ for e in output_exprs
81
+ ]
82
+
83
+ # record columns for create_table_row()
84
+ from .column_ref import ColumnRef
85
+ self.table_columns: List[ColumnSlotIdx] = []
86
+ for col in columns:
87
+ if col.is_computed:
88
+ assert col.value_expr is not None
89
+ # create a copy here so we don't reuse execution state and resolve references to computed columns
90
+ expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
91
+ expr = self._record_unique_expr(expr, recursive=True)
92
+ self.add_table_column(col, expr.slot_idx)
93
+ self.output_exprs.append(expr)
94
+ else:
95
+ # record a ColumnRef so that references to this column resolve to the same slot idx
96
+ ref = ColumnRef(col)
97
+ ref = self._record_unique_expr(ref, recursive=False)
98
+ self.add_table_column(col, ref.slot_idx)
99
+
100
+ # record indices; indexed by slot_idx
101
+ self.index_columns: List[catalog.Column] = []
102
+ for col, embedding_fn in indices:
103
+ # we assume that the parameter of the embedding function is a ref to an image column
104
+ assert col.col_type.is_image_type()
105
+ # construct expr to compute embedding; explicitly resize images to the required size
106
+ target_img_type = next(iter(embedding_fn.signature.parameters.values())).col_type
107
+ expr = embedding_fn(ColumnRef(col).resize(target_img_type.size))
108
+ expr = self._record_unique_expr(expr, recursive=True)
109
+ self.output_exprs.append(expr)
110
+ if len(self.index_columns) <= expr.slot_idx:
111
+ # pad to slot_idx
112
+ self.index_columns.extend([None] * (expr.slot_idx - len(self.index_columns) + 1))
113
+ self.index_columns[expr.slot_idx] = col
114
+
115
+ # default eval ctx: all output exprs
116
+ self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
117
+
118
+ # references to unstored iterator columns:
119
+ # - those ColumnRefs need to instantiate iterators
120
+ # - we create and record the iterator args here and pass them to their respective ColumnRefs
121
+ # - we do this instead of simply recording the iterator args as a component of those ColumnRefs,
122
+ # because that would cause them to be evaluated for every single row
123
+ # - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
124
+ # iterated object changes
125
+ col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
126
+ def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
127
+ tbl = col_ref.col.tbl
128
+ return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
129
+ unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
130
+ component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
131
+ unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
132
+ self.unstored_iter_args = \
133
+ {id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()}
134
+
135
+ for col_ref in unstored_iter_col_refs:
136
+ iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
137
+ col_ref.set_iter_arg_ctx(iter_arg_ctx)
138
+
139
+ # we guarantee that we can compute the expr DAG in a single front-to-back pass
140
+ for i, expr in enumerate(self.unique_exprs):
141
+ assert expr.slot_idx == i
142
+
143
+ # record transitive dependencies (list of set of slot_idxs, indexed by slot_idx)
144
+ self.dependencies: List[Set[int]] = [set() for _ in range(self.num_materialized)]
145
+ for expr in self.unique_exprs:
146
+ if expr.slot_idx in self.input_expr_slot_idxs:
147
+ # this is input and therefore doesn't depend on other exprs
148
+ continue
149
+ for d in expr.dependencies():
150
+ self.dependencies[expr.slot_idx].add(d.slot_idx)
151
+ self.dependencies[expr.slot_idx].update(self.dependencies[d.slot_idx])
152
+
153
+ # derive transitive dependents
154
+ self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
155
+ for expr in self.unique_exprs:
156
+ for d in self.dependencies[expr.slot_idx]:
157
+ self.dependents[d].add(expr.slot_idx)
158
+
159
+ # records the output_expr that a subexpr belongs to
160
+ # (a subexpr can be shared across multiple output exprs)
161
+ self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
162
+ for e in self.output_exprs:
163
+ self._record_output_expr_id(e, e.slot_idx)
164
+
165
+ def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
166
+ """Record a column that is part of the table row"""
167
+ self.table_columns.append(ColumnSlotIdx(col, slot_idx))
168
+
169
+ def output_slot_idxs(self) -> List[ColumnSlotIdx]:
170
+ """Return ColumnSlotIdx for output columns"""
171
+ return self.table_columns
172
+
173
+ def index_slot_idxs(self) -> List[ColumnSlotIdx]:
174
+ """Return ColumnSlotIdx for index columns"""
175
+ return [
176
+ ColumnSlotIdx(self.output_columns[i], i) for i in range(len(self.index_columns))
177
+ if self.output_columns[i] is not None
178
+ ]
179
+
180
+ @property
181
+ def num_materialized(self) -> int:
182
+ return self.next_slot_idx
183
+
184
+ def get_output_exprs(self) -> List[Expr]:
185
+ """Returns exprs that were requested in the c'tor and require evaluation"""
186
+ return self.output_exprs
187
+
188
+ def _next_slot_idx(self) -> int:
189
+ result = self.next_slot_idx
190
+ self.next_slot_idx += 1
191
+ return result
192
+
193
+ def _record_unique_expr(self, expr: Expr, recursive: bool) -> Expr:
194
+ """Records the expr if it's not a duplicate and assigns a slot idx to expr and its components"
195
+ Returns:
196
+ the unique expr
197
+ """
198
+ if expr in self.unique_exprs:
199
+ # expr is a duplicate: we use the original instead
200
+ return self.unique_exprs[expr]
201
+
202
+ # expr value needs to be computed via Expr.eval()
203
+ if recursive:
204
+ for i, c in enumerate(expr.components):
205
+ # make sure we only refer to components that have themselves been recorded
206
+ expr.components[i] = self._record_unique_expr(c, True)
207
+ assert expr.slot_idx < 0
208
+ expr.slot_idx = self._next_slot_idx()
209
+ self.unique_exprs.append(expr)
210
+ return expr
211
+
212
+ def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
213
+ self.output_expr_ids[e.slot_idx].add(output_expr_id)
214
+ for d in e.dependencies():
215
+ self._record_output_expr_id(d, output_expr_id)
216
+
217
+ def _compute_dependencies(self, target_slot_idxs: List[int], excluded_slot_idxs: List[int]) -> List[int]:
218
+ """Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
219
+ dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
220
+ # doing this front-to-back ensures that we capture transitive dependencies
221
+ max_target_slot_idx = max(target_slot_idxs)
222
+ for expr in self.unique_exprs:
223
+ if expr.slot_idx > max_target_slot_idx:
224
+ # we're done
225
+ break
226
+ if expr.slot_idx in excluded_slot_idxs:
227
+ continue
228
+ if expr.slot_idx in self.input_expr_slot_idxs:
229
+ # this is input and therefore doesn't depend on other exprs
230
+ continue
231
+ for d in expr.dependencies():
232
+ if d.slot_idx in excluded_slot_idxs:
233
+ continue
234
+ dependencies[expr.slot_idx].add(d.slot_idx)
235
+ dependencies[expr.slot_idx].update(dependencies[d.slot_idx])
236
+ # merge dependencies and convert to list
237
+ return sorted(set().union(*[dependencies[i] for i in target_slot_idxs]))
238
+
239
+ def substitute_exprs(self, expr_list: List[Expr], remove_duplicates: bool = True) -> None:
240
+ """Substitutes exprs with their executable counterparts from unique_exprs and optionally removes duplicates"""
241
+ i = 0
242
+ unique_ids: Set[i] = set() # slot idxs within expr_list
243
+ while i < len(expr_list):
244
+ unique_expr = self.unique_exprs[expr_list[i]]
245
+ if unique_expr.slot_idx in unique_ids and remove_duplicates:
246
+ del expr_list[i]
247
+ else:
248
+ expr_list[i] = unique_expr
249
+ unique_ids.add(unique_expr.slot_idx)
250
+ i += 1
251
+
252
+ def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
253
+ """
254
+ Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
255
+ The exprs given in 'exclude' are excluded.
256
+ Returns:
257
+ list of Exprs from unique_exprs (= with slot_idx set)
258
+ """
259
+ if exclude is None:
260
+ exclude = []
261
+ if len(targets) == 0:
262
+ return []
263
+ # make sure we only refer to recorded exprs
264
+ targets = [self.unique_exprs[e] for e in targets]
265
+ exclude = [self.unique_exprs[e] for e in exclude]
266
+ target_slot_idxs = [e.slot_idx for e in targets]
267
+ excluded_slot_idxs = [e.slot_idx for e in exclude]
268
+ all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
269
+ all_dependencies.update(target_slot_idxs)
270
+ result_ids = list(all_dependencies)
271
+ result_ids.sort()
272
+ return [self.unique_exprs[id] for id in result_ids]
273
+
274
+ def create_eval_ctx(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> EvalCtx:
275
+ """Return EvalCtx for targets"""
276
+ if exclude is None:
277
+ exclude = []
278
+ if len(targets) == 0:
279
+ return self.EvalCtx([], [], [], [])
280
+ dependencies = self.get_dependencies(targets, exclude)
281
+ targets = [self.unique_exprs[e] for e in targets]
282
+ target_slot_idxs = [e.slot_idx for e in targets]
283
+ ctx_slot_idxs = [e.slot_idx for e in dependencies]
284
+ return self.EvalCtx(
285
+ slot_idxs=ctx_slot_idxs, exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
286
+ target_slot_idxs=target_slot_idxs, target_exprs=targets)
287
+
288
+ def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
289
+ """Record an exception in data_row and propagate it to dependents"""
290
+ data_row.set_exc(slot_idx, exc)
291
+ for slot_idx in self.dependents[slot_idx]:
292
+ data_row.set_exc(slot_idx, exc)
293
+
294
+ def eval(
295
+ self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
296
+ ) -> None:
297
+ """
298
+ Populates the slots in data_row given in ctx.
299
+ If an expr.eval() raises an exception, records the exception in the corresponding slot of data_row
300
+ and omits any of that expr's dependents's eval().
301
+ profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
302
+ ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
303
+ """
304
+ for expr in ctx.exprs:
305
+ assert expr.slot_idx >= 0
306
+ if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
307
+ continue
308
+ try:
309
+ start_time = time.perf_counter()
310
+ expr.eval(data_row, self)
311
+ if profile is not None:
312
+ profile.eval_time[expr.slot_idx] += time.perf_counter() - start_time
313
+ profile.eval_count[expr.slot_idx] += 1
314
+ except Exception as exc:
315
+ _, _, exc_tb = sys.exc_info()
316
+ self.set_exc(data_row, expr.slot_idx, exc)
317
+ if not ignore_errors:
318
+ input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
319
+ raise excs.ExprEvalError(
320
+ expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
321
+
322
+ def create_table_row(self, data_row: DataRow, exc_col_ids: Set[int]) -> Tuple[Dict[str, Any], int]:
323
+ """Create a table row from the slots that have an output column assigned
324
+
325
+ Return Tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
326
+ This excludes system columns.
327
+ """
328
+ num_excs = 0
329
+ table_row: Dict[str, Any] = {}
330
+ for info in self.table_columns:
331
+ col, slot_idx = info.col, info.slot_idx
332
+ if data_row.has_exc(slot_idx):
333
+ # exceptions get stored in the errortype/-msg columns
334
+ exc = data_row.get_exc(slot_idx)
335
+ num_excs += 1
336
+ exc_col_ids.add(col.id)
337
+ table_row[col.storage_name()] = None
338
+ table_row[col.errortype_storage_name()] = type(exc).__name__
339
+ table_row[col.errormsg_storage_name()] = str(exc)
340
+ else:
341
+ val = data_row.get_stored_val(slot_idx)
342
+ table_row[col.storage_name()] = val
343
+ # we unfortunately need to set these, even if there are no errors
344
+ table_row[col.errortype_storage_name()] = None
345
+ table_row[col.errormsg_storage_name()] = None
346
+
347
+ for slot_idx, col in enumerate(self.index_columns):
348
+ if col is None:
349
+ continue
350
+ # don't use get_stored_val() here, we need to pass in the ndarray
351
+ val = data_row[slot_idx]
352
+ table_row[col.index_storage_name()] = val
353
+
354
+ return table_row, num_excs
355
+
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .data_row import DataRow
9
+ from .row_builder import RowBuilder
10
+ import pixeltable.type_system as ts
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ class RowidRef(Expr):
15
+ """A reference to a part of a table rowid
16
+
17
+ This is used internally to support grouping by a base table and for references to the 'pos' column.
18
+ When a RowidRef is part of a computed column in a view, the view's TableVersion isn't available when
19
+ _from_dict()/init() is called, which is why this class effectively has two separate paths for construction
20
+ (with and without a TableVersion).
21
+ """
22
+ def __init__(
23
+ self, tbl: catalog.TableVersion, idx: int,
24
+ tbl_id: Optional[UUID] = None, normalized_base_id: Optional[UUID] = None):
25
+ super().__init__(ts.IntType(nullable=False))
26
+ self.tbl = tbl
27
+ if tbl is not None:
28
+ # normalize to simplify comparisons: we refer to the lowest base table that has the requested rowid idx
29
+ # (which has the same values as all its descendent views)
30
+ normalized_base = tbl
31
+ # don't try to reference tbl.store_tbl here
32
+ while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
33
+ normalized_base = normalized_base.base
34
+ self.normalized_base = normalized_base
35
+ else:
36
+ self.normalized_base = None
37
+
38
+ # if we're initialized by _from_dict(), we only have the ids, not the TableVersion itself
39
+ self.tbl_id = tbl.id if tbl is not None else tbl_id
40
+ self.normalized_base_id = self.normalized_base.id if self.normalized_base is not None else normalized_base_id
41
+ self.rowid_component_idx = idx
42
+ self.id = self._create_id()
43
+
44
+ def default_column_name(self) -> Optional[str]:
45
+ return str(self)
46
+
47
+ def _equals(self, other: RowidRef) -> bool:
48
+ return self.normalized_base_id == other.normalized_base_id \
49
+ and self.rowid_component_idx == other.rowid_component_idx
50
+
51
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
52
+ return super()._id_attrs() +\
53
+ [('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
54
+
55
+ def __str__(self) -> str:
56
+ # check if this is the pos column of a component view
57
+ tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
58
+ if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
59
+ return catalog.globals.POS_COLUMN_NAME
60
+ return ''
61
+
62
+ def set_tbl(self, tbl: catalog.TableVersionPath) -> None:
63
+ """Change the table that is being referenced.
64
+ This can be necessary during query planning, because at that stage we try to minimize the total number of
65
+ tables that are referenced/need to be joined.
66
+ We can only change to a view of the original table (which shares the base's rowid columns).
67
+ """
68
+ if self.tbl_id == tbl.tbl_version.id:
69
+ return
70
+ tbl_version_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
71
+ assert self.tbl_id in tbl_version_ids
72
+ self.tbl = tbl.tbl_version
73
+ self.tbl_id = self.tbl.id
74
+
75
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
76
+ tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
77
+ rowid_cols = tbl.store_tbl.rowid_columns()
78
+ return rowid_cols[self.rowid_component_idx]
79
+
80
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
81
+ data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
82
+
83
+ def _as_dict(self) -> Dict:
84
+ return {
85
+ 'tbl_id': str(self.tbl_id),
86
+ 'normalized_base_id': str(self.normalized_base_id),
87
+ 'idx': self.rowid_component_idx,
88
+ }
89
+
90
+ @classmethod
91
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
92
+ tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
93
+ return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
94
+
@@ -0,0 +1,53 @@
1
+ import json
2
+ from typing import Optional, Dict, List, Tuple, Any
3
+
4
+ import sqlalchemy as sql
5
+
6
+ import pixeltable.type_system as ts
7
+ from .expr import DataRow, Expr
8
+ from .row_builder import RowBuilder
9
+
10
+
11
+ class TypeCast(Expr):
12
+ """
13
+ An `Expr` that represents a type conversion from an underlying `Expr` to
14
+ a specified `ColumnType`.
15
+ """
16
+ def __init__(self, underlying: Expr, new_type: ts.ColumnType):
17
+ super().__init__(new_type)
18
+ self.components: List[Expr] = [underlying]
19
+ self.id: Optional[int] = self._create_id()
20
+
21
+ @property
22
+ def _underlying(self):
23
+ return self.components[0]
24
+
25
+ def _equals(self, other: 'TypeCast') -> bool:
26
+ # `TypeCast` has no properties beyond those captured by `Expr`.
27
+ return True
28
+
29
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
30
+ return super()._id_attrs() + [('new_type', self.col_type)]
31
+
32
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
33
+ """
34
+ `sql_expr` is unimplemented for now, in order to sidestep potentially thorny
35
+ questions about consistency of doing type conversions in both Python and Postgres.
36
+ """
37
+ return None
38
+
39
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
40
+ original_val = data_row[self._underlying.slot_idx]
41
+ data_row[self.slot_idx] = self.col_type.create_literal(original_val)
42
+
43
+ def _as_dict(self) -> Dict:
44
+ return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
45
+
46
+ @classmethod
47
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
48
+ assert 'new_type' in d
49
+ assert len(components) == 1
50
+ return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
51
+
52
+ def __str__(self) -> str:
53
+ return f'{self._underlying}.astype({self.col_type})'
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Tuple, Any, Dict, NoReturn
4
+
5
+ import pixeltable.type_system as ts
6
+ from .data_row import DataRow
7
+ from .expr import Expr
8
+ from .row_builder import RowBuilder
9
+
10
+
11
+ class Variable(Expr):
12
+ """An expr parameter, needed for ExprTemplateFunctions
13
+
14
+ A Variable has a name and type and needs to have been replaced by an actual expression before evaluation.
15
+ """
16
+
17
+ def __init__(self, name: str, col_type: ts.ColumnType):
18
+ super().__init__(col_type)
19
+ self.name = name
20
+ self.id = self._create_id()
21
+
22
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
23
+ return super()._id_attrs() + [('name', self.name)]
24
+
25
+ def default_column_name(self) -> NoReturn:
26
+ raise NotImplementedError()
27
+
28
+ def _equals(self, other: Variable) -> bool:
29
+ return self.name == other.name
30
+
31
+ def __str__(self) -> str:
32
+ return self.name
33
+
34
+ def sql_expr(self) -> NoReturn:
35
+ raise NotImplementedError()
36
+
37
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
38
+ raise NotImplementedError()
39
+
40
+ def _as_dict(self) -> Dict:
41
+ return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
42
+
43
+ @classmethod
44
+ def _from_dict(cls, d: Dict, _: List[Expr]) -> Expr:
45
+ return cls(d['name'], ts.ColumnType.from_dict(d['type']))
@@ -0,0 +1,9 @@
1
+ from .aggregate_function import Aggregator, AggregateFunction, uda
2
+ from .batched_function import BatchedFunction, ExplicitBatchedFunction
3
+ from .callable_function import CallableFunction
4
+ from .expr_template_function import ExprTemplateFunction
5
+ from .function import Function
6
+ from .function_registry import FunctionRegistry
7
+ from .nos_function import NOSFunction
8
+ from .signature import Signature, Parameter, Batch
9
+ from .udf import udf, make_function, expr_udf