pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +21 -4
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -31
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -48
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -86
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1086 -258
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -133
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.0.dist-info/LICENSE +18 -0
- pixeltable-0.2.0.dist-info/METADATA +117 -0
- pixeltable-0.2.0.dist-info/RECORD +125 -0
- {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.2.dist-info/LICENSE +0 -201
- pixeltable-0.1.2.dist-info/METADATA +0 -89
- pixeltable-0.1.2.dist-info/RECORD +0 -37
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple, Set, Iterable
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import time
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .expr_set import ExprSet
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
import pixeltable.utils as utils
|
|
11
|
+
import pixeltable.func as func
|
|
12
|
+
import pixeltable.exceptions as excs
|
|
13
|
+
import pixeltable.catalog as catalog
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ExecProfile:
|
|
17
|
+
def __init__(self, row_builder: RowBuilder):
|
|
18
|
+
self.eval_time = [0.0] * row_builder.num_materialized
|
|
19
|
+
self.eval_count = [0] * row_builder.num_materialized
|
|
20
|
+
self.row_builder = row_builder
|
|
21
|
+
|
|
22
|
+
def print(self, num_rows: int) -> str:
|
|
23
|
+
for i in range(self.row_builder.num_materialized):
|
|
24
|
+
if self.eval_count[i] == 0:
|
|
25
|
+
continue
|
|
26
|
+
per_call_time = self.eval_time[i] / self.eval_count[i]
|
|
27
|
+
calls_per_row = self.eval_count[i] / num_rows
|
|
28
|
+
multiple_str = f'({calls_per_row}x)' if calls_per_row > 1 else ''
|
|
29
|
+
print(f'{self.row_builder.unique_exprs[i]}: {utils.print_perf_counter_delta(per_call_time)} {multiple_str}')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ColumnSlotIdx:
|
|
34
|
+
"""Info for how to locate materialized column in DataRow
|
|
35
|
+
TODO: can this be integrated into RowBuilder directly?
|
|
36
|
+
"""
|
|
37
|
+
col: catalog.Column
|
|
38
|
+
slot_idx: int
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RowBuilder:
|
|
42
|
+
"""Create and populate DataRows and table rows from exprs and computed columns
|
|
43
|
+
|
|
44
|
+
For ColumnRefs to unstored iterator columns:
|
|
45
|
+
- in order for them to be executable, we also record the iterator args and pass them to the ColumnRef
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class EvalCtx:
|
|
50
|
+
"""Context for evaluating a set of target exprs"""
|
|
51
|
+
slot_idxs: List[int] # slot idxs of exprs needed to evaluate target exprs; does not contain duplicates
|
|
52
|
+
exprs: List[Expr] # exprs corresponding to slot_idxs
|
|
53
|
+
target_slot_idxs: List[int] # slot idxs of target exprs; might contain duplicates
|
|
54
|
+
target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self, output_exprs: List[Expr], columns: List[catalog.Column],
|
|
58
|
+
indices: List[Tuple[catalog.Column, func.Function]], input_exprs: List[Expr]
|
|
59
|
+
):
|
|
60
|
+
"""
|
|
61
|
+
Args:
|
|
62
|
+
output_exprs: list of Exprs to be evaluated
|
|
63
|
+
columns: list of columns to be materialized
|
|
64
|
+
indices: list of embeddings to be materialized (Tuple[indexed column, embedding function])
|
|
65
|
+
"""
|
|
66
|
+
self.unique_exprs = ExprSet() # dependencies precede their dependents
|
|
67
|
+
self.next_slot_idx = 0
|
|
68
|
+
|
|
69
|
+
# record input and output exprs; make copies to avoid reusing execution state
|
|
70
|
+
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
71
|
+
self.input_expr_slot_idxs = {e.slot_idx for e in unique_input_exprs}
|
|
72
|
+
|
|
73
|
+
# output exprs: all exprs the caller wants to materialize
|
|
74
|
+
# - explicitly requested output_exprs
|
|
75
|
+
# - values for computed columns
|
|
76
|
+
# - embedding values for indices
|
|
77
|
+
resolve_cols = set(columns)
|
|
78
|
+
self.output_exprs = [
|
|
79
|
+
self._record_unique_expr(e.copy().resolve_computed_cols(resolve_cols=resolve_cols), recursive=True)
|
|
80
|
+
for e in output_exprs
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# record columns for create_table_row()
|
|
84
|
+
from .column_ref import ColumnRef
|
|
85
|
+
self.table_columns: List[ColumnSlotIdx] = []
|
|
86
|
+
for col in columns:
|
|
87
|
+
if col.is_computed:
|
|
88
|
+
assert col.value_expr is not None
|
|
89
|
+
# create a copy here so we don't reuse execution state and resolve references to computed columns
|
|
90
|
+
expr = col.value_expr.copy().resolve_computed_cols(resolve_cols=resolve_cols)
|
|
91
|
+
expr = self._record_unique_expr(expr, recursive=True)
|
|
92
|
+
self.add_table_column(col, expr.slot_idx)
|
|
93
|
+
self.output_exprs.append(expr)
|
|
94
|
+
else:
|
|
95
|
+
# record a ColumnRef so that references to this column resolve to the same slot idx
|
|
96
|
+
ref = ColumnRef(col)
|
|
97
|
+
ref = self._record_unique_expr(ref, recursive=False)
|
|
98
|
+
self.add_table_column(col, ref.slot_idx)
|
|
99
|
+
|
|
100
|
+
# record indices; indexed by slot_idx
|
|
101
|
+
self.index_columns: List[catalog.Column] = []
|
|
102
|
+
for col, embedding_fn in indices:
|
|
103
|
+
# we assume that the parameter of the embedding function is a ref to an image column
|
|
104
|
+
assert col.col_type.is_image_type()
|
|
105
|
+
# construct expr to compute embedding; explicitly resize images to the required size
|
|
106
|
+
target_img_type = next(iter(embedding_fn.signature.parameters.values())).col_type
|
|
107
|
+
expr = embedding_fn(ColumnRef(col).resize(target_img_type.size))
|
|
108
|
+
expr = self._record_unique_expr(expr, recursive=True)
|
|
109
|
+
self.output_exprs.append(expr)
|
|
110
|
+
if len(self.index_columns) <= expr.slot_idx:
|
|
111
|
+
# pad to slot_idx
|
|
112
|
+
self.index_columns.extend([None] * (expr.slot_idx - len(self.index_columns) + 1))
|
|
113
|
+
self.index_columns[expr.slot_idx] = col
|
|
114
|
+
|
|
115
|
+
# default eval ctx: all output exprs
|
|
116
|
+
self.default_eval_ctx = self.create_eval_ctx(self.output_exprs, exclude=unique_input_exprs)
|
|
117
|
+
|
|
118
|
+
# references to unstored iterator columns:
|
|
119
|
+
# - those ColumnRefs need to instantiate iterators
|
|
120
|
+
# - we create and record the iterator args here and pass them to their respective ColumnRefs
|
|
121
|
+
# - we do this instead of simply recording the iterator args as a component of those ColumnRefs,
|
|
122
|
+
# because that would cause them to be evaluated for every single row
|
|
123
|
+
# - the separate eval ctx allows the ColumnRef to materialize the iterator args only when the underlying
|
|
124
|
+
# iterated object changes
|
|
125
|
+
col_refs = [e for e in self.unique_exprs if isinstance(e, ColumnRef)]
|
|
126
|
+
def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
|
|
127
|
+
tbl = col_ref.col.tbl
|
|
128
|
+
return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
|
|
129
|
+
unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
|
|
130
|
+
component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
|
|
131
|
+
unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
|
|
132
|
+
self.unstored_iter_args = \
|
|
133
|
+
{id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()}
|
|
134
|
+
|
|
135
|
+
for col_ref in unstored_iter_col_refs:
|
|
136
|
+
iter_arg_ctx = self.create_eval_ctx([unstored_iter_args[col_ref.col.tbl.id]])
|
|
137
|
+
col_ref.set_iter_arg_ctx(iter_arg_ctx)
|
|
138
|
+
|
|
139
|
+
# we guarantee that we can compute the expr DAG in a single front-to-back pass
|
|
140
|
+
for i, expr in enumerate(self.unique_exprs):
|
|
141
|
+
assert expr.slot_idx == i
|
|
142
|
+
|
|
143
|
+
# record transitive dependencies (list of set of slot_idxs, indexed by slot_idx)
|
|
144
|
+
self.dependencies: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
145
|
+
for expr in self.unique_exprs:
|
|
146
|
+
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
147
|
+
# this is input and therefore doesn't depend on other exprs
|
|
148
|
+
continue
|
|
149
|
+
for d in expr.dependencies():
|
|
150
|
+
self.dependencies[expr.slot_idx].add(d.slot_idx)
|
|
151
|
+
self.dependencies[expr.slot_idx].update(self.dependencies[d.slot_idx])
|
|
152
|
+
|
|
153
|
+
# derive transitive dependents
|
|
154
|
+
self.dependents: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
155
|
+
for expr in self.unique_exprs:
|
|
156
|
+
for d in self.dependencies[expr.slot_idx]:
|
|
157
|
+
self.dependents[d].add(expr.slot_idx)
|
|
158
|
+
|
|
159
|
+
# records the output_expr that a subexpr belongs to
|
|
160
|
+
# (a subexpr can be shared across multiple output exprs)
|
|
161
|
+
self.output_expr_ids: List[Set[int]] = [set() for _ in range(self.num_materialized)]
|
|
162
|
+
for e in self.output_exprs:
|
|
163
|
+
self._record_output_expr_id(e, e.slot_idx)
|
|
164
|
+
|
|
165
|
+
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
166
|
+
"""Record a column that is part of the table row"""
|
|
167
|
+
self.table_columns.append(ColumnSlotIdx(col, slot_idx))
|
|
168
|
+
|
|
169
|
+
def output_slot_idxs(self) -> List[ColumnSlotIdx]:
|
|
170
|
+
"""Return ColumnSlotIdx for output columns"""
|
|
171
|
+
return self.table_columns
|
|
172
|
+
|
|
173
|
+
def index_slot_idxs(self) -> List[ColumnSlotIdx]:
|
|
174
|
+
"""Return ColumnSlotIdx for index columns"""
|
|
175
|
+
return [
|
|
176
|
+
ColumnSlotIdx(self.output_columns[i], i) for i in range(len(self.index_columns))
|
|
177
|
+
if self.output_columns[i] is not None
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def num_materialized(self) -> int:
|
|
182
|
+
return self.next_slot_idx
|
|
183
|
+
|
|
184
|
+
def get_output_exprs(self) -> List[Expr]:
|
|
185
|
+
"""Returns exprs that were requested in the c'tor and require evaluation"""
|
|
186
|
+
return self.output_exprs
|
|
187
|
+
|
|
188
|
+
def _next_slot_idx(self) -> int:
|
|
189
|
+
result = self.next_slot_idx
|
|
190
|
+
self.next_slot_idx += 1
|
|
191
|
+
return result
|
|
192
|
+
|
|
193
|
+
def _record_unique_expr(self, expr: Expr, recursive: bool) -> Expr:
|
|
194
|
+
"""Records the expr if it's not a duplicate and assigns a slot idx to expr and its components"
|
|
195
|
+
Returns:
|
|
196
|
+
the unique expr
|
|
197
|
+
"""
|
|
198
|
+
if expr in self.unique_exprs:
|
|
199
|
+
# expr is a duplicate: we use the original instead
|
|
200
|
+
return self.unique_exprs[expr]
|
|
201
|
+
|
|
202
|
+
# expr value needs to be computed via Expr.eval()
|
|
203
|
+
if recursive:
|
|
204
|
+
for i, c in enumerate(expr.components):
|
|
205
|
+
# make sure we only refer to components that have themselves been recorded
|
|
206
|
+
expr.components[i] = self._record_unique_expr(c, True)
|
|
207
|
+
assert expr.slot_idx < 0
|
|
208
|
+
expr.slot_idx = self._next_slot_idx()
|
|
209
|
+
self.unique_exprs.append(expr)
|
|
210
|
+
return expr
|
|
211
|
+
|
|
212
|
+
def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
|
|
213
|
+
self.output_expr_ids[e.slot_idx].add(output_expr_id)
|
|
214
|
+
for d in e.dependencies():
|
|
215
|
+
self._record_output_expr_id(d, output_expr_id)
|
|
216
|
+
|
|
217
|
+
def _compute_dependencies(self, target_slot_idxs: List[int], excluded_slot_idxs: List[int]) -> List[int]:
|
|
218
|
+
"""Compute exprs needed to materialize the given target slots, excluding 'excluded_slot_idxs'"""
|
|
219
|
+
dependencies = [set() for _ in range(self.num_materialized)] # indexed by slot_idx
|
|
220
|
+
# doing this front-to-back ensures that we capture transitive dependencies
|
|
221
|
+
max_target_slot_idx = max(target_slot_idxs)
|
|
222
|
+
for expr in self.unique_exprs:
|
|
223
|
+
if expr.slot_idx > max_target_slot_idx:
|
|
224
|
+
# we're done
|
|
225
|
+
break
|
|
226
|
+
if expr.slot_idx in excluded_slot_idxs:
|
|
227
|
+
continue
|
|
228
|
+
if expr.slot_idx in self.input_expr_slot_idxs:
|
|
229
|
+
# this is input and therefore doesn't depend on other exprs
|
|
230
|
+
continue
|
|
231
|
+
for d in expr.dependencies():
|
|
232
|
+
if d.slot_idx in excluded_slot_idxs:
|
|
233
|
+
continue
|
|
234
|
+
dependencies[expr.slot_idx].add(d.slot_idx)
|
|
235
|
+
dependencies[expr.slot_idx].update(dependencies[d.slot_idx])
|
|
236
|
+
# merge dependencies and convert to list
|
|
237
|
+
return sorted(set().union(*[dependencies[i] for i in target_slot_idxs]))
|
|
238
|
+
|
|
239
|
+
def substitute_exprs(self, expr_list: List[Expr], remove_duplicates: bool = True) -> None:
|
|
240
|
+
"""Substitutes exprs with their executable counterparts from unique_exprs and optionally removes duplicates"""
|
|
241
|
+
i = 0
|
|
242
|
+
unique_ids: Set[i] = set() # slot idxs within expr_list
|
|
243
|
+
while i < len(expr_list):
|
|
244
|
+
unique_expr = self.unique_exprs[expr_list[i]]
|
|
245
|
+
if unique_expr.slot_idx in unique_ids and remove_duplicates:
|
|
246
|
+
del expr_list[i]
|
|
247
|
+
else:
|
|
248
|
+
expr_list[i] = unique_expr
|
|
249
|
+
unique_ids.add(unique_expr.slot_idx)
|
|
250
|
+
i += 1
|
|
251
|
+
|
|
252
|
+
def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
|
|
253
|
+
"""
|
|
254
|
+
Return list of dependencies needed to evaluate the given target exprs (expressed as slot idxs).
|
|
255
|
+
The exprs given in 'exclude' are excluded.
|
|
256
|
+
Returns:
|
|
257
|
+
list of Exprs from unique_exprs (= with slot_idx set)
|
|
258
|
+
"""
|
|
259
|
+
if exclude is None:
|
|
260
|
+
exclude = []
|
|
261
|
+
if len(targets) == 0:
|
|
262
|
+
return []
|
|
263
|
+
# make sure we only refer to recorded exprs
|
|
264
|
+
targets = [self.unique_exprs[e] for e in targets]
|
|
265
|
+
exclude = [self.unique_exprs[e] for e in exclude]
|
|
266
|
+
target_slot_idxs = [e.slot_idx for e in targets]
|
|
267
|
+
excluded_slot_idxs = [e.slot_idx for e in exclude]
|
|
268
|
+
all_dependencies = set(self._compute_dependencies(target_slot_idxs, excluded_slot_idxs))
|
|
269
|
+
all_dependencies.update(target_slot_idxs)
|
|
270
|
+
result_ids = list(all_dependencies)
|
|
271
|
+
result_ids.sort()
|
|
272
|
+
return [self.unique_exprs[id] for id in result_ids]
|
|
273
|
+
|
|
274
|
+
def create_eval_ctx(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> EvalCtx:
|
|
275
|
+
"""Return EvalCtx for targets"""
|
|
276
|
+
if exclude is None:
|
|
277
|
+
exclude = []
|
|
278
|
+
if len(targets) == 0:
|
|
279
|
+
return self.EvalCtx([], [], [], [])
|
|
280
|
+
dependencies = self.get_dependencies(targets, exclude)
|
|
281
|
+
targets = [self.unique_exprs[e] for e in targets]
|
|
282
|
+
target_slot_idxs = [e.slot_idx for e in targets]
|
|
283
|
+
ctx_slot_idxs = [e.slot_idx for e in dependencies]
|
|
284
|
+
return self.EvalCtx(
|
|
285
|
+
slot_idxs=ctx_slot_idxs, exprs=[self.unique_exprs[slot_idx] for slot_idx in ctx_slot_idxs],
|
|
286
|
+
target_slot_idxs=target_slot_idxs, target_exprs=targets)
|
|
287
|
+
|
|
288
|
+
def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
|
|
289
|
+
"""Record an exception in data_row and propagate it to dependents"""
|
|
290
|
+
data_row.set_exc(slot_idx, exc)
|
|
291
|
+
for slot_idx in self.dependents[slot_idx]:
|
|
292
|
+
data_row.set_exc(slot_idx, exc)
|
|
293
|
+
|
|
294
|
+
def eval(
|
|
295
|
+
self, data_row: DataRow, ctx: EvalCtx, profile: Optional[ExecProfile] = None, ignore_errors: bool = False
|
|
296
|
+
) -> None:
|
|
297
|
+
"""
|
|
298
|
+
Populates the slots in data_row given in ctx.
|
|
299
|
+
If an expr.eval() raises an exception, records the exception in the corresponding slot of data_row
|
|
300
|
+
and omits any of that expr's dependents's eval().
|
|
301
|
+
profile: if present, populated with execution time of each expr.eval() call; indexed by expr.slot_idx
|
|
302
|
+
ignore_errors: if False, raises ExprEvalError if any expr.eval() raises an exception
|
|
303
|
+
"""
|
|
304
|
+
for expr in ctx.exprs:
|
|
305
|
+
assert expr.slot_idx >= 0
|
|
306
|
+
if data_row.has_val[expr.slot_idx] or data_row.has_exc(expr.slot_idx):
|
|
307
|
+
continue
|
|
308
|
+
try:
|
|
309
|
+
start_time = time.perf_counter()
|
|
310
|
+
expr.eval(data_row, self)
|
|
311
|
+
if profile is not None:
|
|
312
|
+
profile.eval_time[expr.slot_idx] += time.perf_counter() - start_time
|
|
313
|
+
profile.eval_count[expr.slot_idx] += 1
|
|
314
|
+
except Exception as exc:
|
|
315
|
+
_, _, exc_tb = sys.exc_info()
|
|
316
|
+
self.set_exc(data_row, expr.slot_idx, exc)
|
|
317
|
+
if not ignore_errors:
|
|
318
|
+
input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
|
|
319
|
+
raise excs.ExprEvalError(
|
|
320
|
+
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
|
|
321
|
+
|
|
322
|
+
def create_table_row(self, data_row: DataRow, exc_col_ids: Set[int]) -> Tuple[Dict[str, Any], int]:
|
|
323
|
+
"""Create a table row from the slots that have an output column assigned
|
|
324
|
+
|
|
325
|
+
Return Tuple[dict that represents a stored row (can be passed to sql.insert()), # of exceptions]
|
|
326
|
+
This excludes system columns.
|
|
327
|
+
"""
|
|
328
|
+
num_excs = 0
|
|
329
|
+
table_row: Dict[str, Any] = {}
|
|
330
|
+
for info in self.table_columns:
|
|
331
|
+
col, slot_idx = info.col, info.slot_idx
|
|
332
|
+
if data_row.has_exc(slot_idx):
|
|
333
|
+
# exceptions get stored in the errortype/-msg columns
|
|
334
|
+
exc = data_row.get_exc(slot_idx)
|
|
335
|
+
num_excs += 1
|
|
336
|
+
exc_col_ids.add(col.id)
|
|
337
|
+
table_row[col.storage_name()] = None
|
|
338
|
+
table_row[col.errortype_storage_name()] = type(exc).__name__
|
|
339
|
+
table_row[col.errormsg_storage_name()] = str(exc)
|
|
340
|
+
else:
|
|
341
|
+
val = data_row.get_stored_val(slot_idx)
|
|
342
|
+
table_row[col.storage_name()] = val
|
|
343
|
+
# we unfortunately need to set these, even if there are no errors
|
|
344
|
+
table_row[col.errortype_storage_name()] = None
|
|
345
|
+
table_row[col.errormsg_storage_name()] = None
|
|
346
|
+
|
|
347
|
+
for slot_idx, col in enumerate(self.index_columns):
|
|
348
|
+
if col is None:
|
|
349
|
+
continue
|
|
350
|
+
# don't use get_stored_val() here, we need to pass in the ndarray
|
|
351
|
+
val = data_row[slot_idx]
|
|
352
|
+
table_row[col.index_storage_name()] = val
|
|
353
|
+
|
|
354
|
+
return table_row, num_excs
|
|
355
|
+
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RowidRef(Expr):
|
|
15
|
+
"""A reference to a part of a table rowid
|
|
16
|
+
|
|
17
|
+
This is used internally to support grouping by a base table and for references to the 'pos' column.
|
|
18
|
+
When a RowidRef is part of a computed column in a view, the view's TableVersion isn't available when
|
|
19
|
+
_from_dict()/init() is called, which is why this class effectively has two separate paths for construction
|
|
20
|
+
(with and without a TableVersion).
|
|
21
|
+
"""
|
|
22
|
+
def __init__(
|
|
23
|
+
self, tbl: catalog.TableVersion, idx: int,
|
|
24
|
+
tbl_id: Optional[UUID] = None, normalized_base_id: Optional[UUID] = None):
|
|
25
|
+
super().__init__(ts.IntType(nullable=False))
|
|
26
|
+
self.tbl = tbl
|
|
27
|
+
if tbl is not None:
|
|
28
|
+
# normalize to simplify comparisons: we refer to the lowest base table that has the requested rowid idx
|
|
29
|
+
# (which has the same values as all its descendent views)
|
|
30
|
+
normalized_base = tbl
|
|
31
|
+
# don't try to reference tbl.store_tbl here
|
|
32
|
+
while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
|
|
33
|
+
normalized_base = normalized_base.base
|
|
34
|
+
self.normalized_base = normalized_base
|
|
35
|
+
else:
|
|
36
|
+
self.normalized_base = None
|
|
37
|
+
|
|
38
|
+
# if we're initialized by _from_dict(), we only have the ids, not the TableVersion itself
|
|
39
|
+
self.tbl_id = tbl.id if tbl is not None else tbl_id
|
|
40
|
+
self.normalized_base_id = self.normalized_base.id if self.normalized_base is not None else normalized_base_id
|
|
41
|
+
self.rowid_component_idx = idx
|
|
42
|
+
self.id = self._create_id()
|
|
43
|
+
|
|
44
|
+
def default_column_name(self) -> Optional[str]:
|
|
45
|
+
return str(self)
|
|
46
|
+
|
|
47
|
+
def _equals(self, other: RowidRef) -> bool:
|
|
48
|
+
return self.normalized_base_id == other.normalized_base_id \
|
|
49
|
+
and self.rowid_component_idx == other.rowid_component_idx
|
|
50
|
+
|
|
51
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
52
|
+
return super()._id_attrs() +\
|
|
53
|
+
[('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
|
|
54
|
+
|
|
55
|
+
def __str__(self) -> str:
|
|
56
|
+
# check if this is the pos column of a component view
|
|
57
|
+
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
58
|
+
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
|
|
59
|
+
return catalog.globals.POS_COLUMN_NAME
|
|
60
|
+
return ''
|
|
61
|
+
|
|
62
|
+
def set_tbl(self, tbl: catalog.TableVersionPath) -> None:
|
|
63
|
+
"""Change the table that is being referenced.
|
|
64
|
+
This can be necessary during query planning, because at that stage we try to minimize the total number of
|
|
65
|
+
tables that are referenced/need to be joined.
|
|
66
|
+
We can only change to a view of the original table (which shares the base's rowid columns).
|
|
67
|
+
"""
|
|
68
|
+
if self.tbl_id == tbl.tbl_version.id:
|
|
69
|
+
return
|
|
70
|
+
tbl_version_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
|
|
71
|
+
assert self.tbl_id in tbl_version_ids
|
|
72
|
+
self.tbl = tbl.tbl_version
|
|
73
|
+
self.tbl_id = self.tbl.id
|
|
74
|
+
|
|
75
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
76
|
+
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
77
|
+
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
78
|
+
return rowid_cols[self.rowid_component_idx]
|
|
79
|
+
|
|
80
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
81
|
+
data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
|
|
82
|
+
|
|
83
|
+
def _as_dict(self) -> Dict:
|
|
84
|
+
return {
|
|
85
|
+
'tbl_id': str(self.tbl_id),
|
|
86
|
+
'normalized_base_id': str(self.normalized_base_id),
|
|
87
|
+
'idx': self.rowid_component_idx,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
92
|
+
tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
|
|
93
|
+
return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
|
|
94
|
+
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional, Dict, List, Tuple, Any
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
import pixeltable.type_system as ts
|
|
7
|
+
from .expr import DataRow, Expr
|
|
8
|
+
from .row_builder import RowBuilder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TypeCast(Expr):
|
|
12
|
+
"""
|
|
13
|
+
An `Expr` that represents a type conversion from an underlying `Expr` to
|
|
14
|
+
a specified `ColumnType`.
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, underlying: Expr, new_type: ts.ColumnType):
|
|
17
|
+
super().__init__(new_type)
|
|
18
|
+
self.components: List[Expr] = [underlying]
|
|
19
|
+
self.id: Optional[int] = self._create_id()
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def _underlying(self):
|
|
23
|
+
return self.components[0]
|
|
24
|
+
|
|
25
|
+
def _equals(self, other: 'TypeCast') -> bool:
|
|
26
|
+
# `TypeCast` has no properties beyond those captured by `Expr`.
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
30
|
+
return super()._id_attrs() + [('new_type', self.col_type)]
|
|
31
|
+
|
|
32
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
33
|
+
"""
|
|
34
|
+
`sql_expr` is unimplemented for now, in order to sidestep potentially thorny
|
|
35
|
+
questions about consistency of doing type conversions in both Python and Postgres.
|
|
36
|
+
"""
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
40
|
+
original_val = data_row[self._underlying.slot_idx]
|
|
41
|
+
data_row[self.slot_idx] = self.col_type.create_literal(original_val)
|
|
42
|
+
|
|
43
|
+
def _as_dict(self) -> Dict:
|
|
44
|
+
return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
48
|
+
assert 'new_type' in d
|
|
49
|
+
assert len(components) == 1
|
|
50
|
+
return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
|
|
51
|
+
|
|
52
|
+
def __str__(self) -> str:
|
|
53
|
+
return f'{self._underlying}.astype({self.col_type})'
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List, Tuple, Any, Dict, NoReturn
|
|
4
|
+
|
|
5
|
+
import pixeltable.type_system as ts
|
|
6
|
+
from .data_row import DataRow
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .row_builder import RowBuilder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Variable(Expr):
|
|
12
|
+
"""An expr parameter, needed for ExprTemplateFunctions
|
|
13
|
+
|
|
14
|
+
A Variable has a name and type and needs to have been replaced by an actual expression before evaluation.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, name: str, col_type: ts.ColumnType):
|
|
18
|
+
super().__init__(col_type)
|
|
19
|
+
self.name = name
|
|
20
|
+
self.id = self._create_id()
|
|
21
|
+
|
|
22
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
23
|
+
return super()._id_attrs() + [('name', self.name)]
|
|
24
|
+
|
|
25
|
+
def default_column_name(self) -> NoReturn:
|
|
26
|
+
raise NotImplementedError()
|
|
27
|
+
|
|
28
|
+
def _equals(self, other: Variable) -> bool:
|
|
29
|
+
return self.name == other.name
|
|
30
|
+
|
|
31
|
+
def __str__(self) -> str:
|
|
32
|
+
return self.name
|
|
33
|
+
|
|
34
|
+
def sql_expr(self) -> NoReturn:
|
|
35
|
+
raise NotImplementedError()
|
|
36
|
+
|
|
37
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
|
|
38
|
+
raise NotImplementedError()
|
|
39
|
+
|
|
40
|
+
def _as_dict(self) -> Dict:
|
|
41
|
+
return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _from_dict(cls, d: Dict, _: List[Expr]) -> Expr:
|
|
45
|
+
return cls(d['name'], ts.ColumnType.from_dict(d['type']))
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from .aggregate_function import Aggregator, AggregateFunction, uda
|
|
2
|
+
from .batched_function import BatchedFunction, ExplicitBatchedFunction
|
|
3
|
+
from .callable_function import CallableFunction
|
|
4
|
+
from .expr_template_function import ExprTemplateFunction
|
|
5
|
+
from .function import Function
|
|
6
|
+
from .function_registry import FunctionRegistry
|
|
7
|
+
from .nos_function import NOSFunction
|
|
8
|
+
from .signature import Signature, Parameter, Batch
|
|
9
|
+
from .udf import udf, make_function, expr_udf
|