pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +22 -12
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +121 -101
- pixeltable/catalog/table_version.py +291 -142
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +67 -26
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +20 -21
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +13 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
- pixeltable/exec/expr_eval/globals.py +30 -7
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +151 -31
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +32 -17
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +16 -12
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +201 -108
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +60 -26
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +2 -1
- pixeltable/io/label_studio.py +77 -68
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +9 -12
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +7 -1
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +62 -54
- pixeltable/utils/arrow.py +1 -2
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +6 -3
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +12 -7
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.2.dist-info/RECORD +0 -161
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/plan.py
CHANGED
|
@@ -2,18 +2,15 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import enum
|
|
5
|
-
from typing import Any, Iterable, Optional, Sequence
|
|
5
|
+
from typing import Any, Iterable, Literal, Optional, Sequence
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
import sqlalchemy as sql
|
|
10
9
|
|
|
11
10
|
import pixeltable as pxt
|
|
12
11
|
import pixeltable.exec as exec
|
|
13
|
-
from pixeltable import catalog
|
|
14
|
-
from pixeltable import
|
|
15
|
-
from pixeltable import exprs
|
|
16
|
-
from pixeltable.exec.sql_node import OrderByItem, OrderByClause, combine_order_by_clauses, print_order_by_clause
|
|
12
|
+
from pixeltable import catalog, exceptions as excs, exprs
|
|
13
|
+
from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
|
|
17
14
|
|
|
18
15
|
|
|
19
16
|
def _is_agg_fn_call(e: exprs.Expr) -> bool:
|
|
@@ -21,7 +18,7 @@ def _is_agg_fn_call(e: exprs.Expr) -> bool:
|
|
|
21
18
|
|
|
22
19
|
|
|
23
20
|
def _get_combined_ordering(
|
|
24
|
-
|
|
21
|
+
o1: list[tuple[exprs.Expr, bool]], o2: list[tuple[exprs.Expr, bool]]
|
|
25
22
|
) -> list[tuple[exprs.Expr, bool]]:
|
|
26
23
|
"""Returns an ordering that's compatible with both o1 and o2, or an empty list if no such ordering exists"""
|
|
27
24
|
result: list[tuple[exprs.Expr, bool]] = []
|
|
@@ -65,13 +62,15 @@ class JoinType(enum.Enum):
|
|
|
65
62
|
@dataclasses.dataclass
|
|
66
63
|
class JoinClause:
|
|
67
64
|
"""Corresponds to a single 'JOIN ... ON (...)' clause in a SELECT statement; excludes the joined table."""
|
|
65
|
+
|
|
68
66
|
join_type: JoinType
|
|
69
67
|
join_predicate: Optional[exprs.Expr] # None for join_type == CROSS
|
|
70
68
|
|
|
71
69
|
|
|
72
70
|
@dataclasses.dataclass
|
|
73
71
|
class FromClause:
|
|
74
|
-
"""Corresponds to the From-clause ('FROM <tbl> JOIN ... ON (...) JOIN ...') of a SELECT statement
|
|
72
|
+
"""Corresponds to the From-clause ('FROM <tbl> JOIN ... ON (...) JOIN ...') of a SELECT statement"""
|
|
73
|
+
|
|
75
74
|
tbls: list[catalog.TableVersionPath]
|
|
76
75
|
join_clauses: list[JoinClause] = dataclasses.field(default_factory=list)
|
|
77
76
|
|
|
@@ -101,9 +100,13 @@ class Analyzer:
|
|
|
101
100
|
agg_order_by: list[exprs.Expr]
|
|
102
101
|
|
|
103
102
|
def __init__(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
103
|
+
self,
|
|
104
|
+
from_clause: FromClause,
|
|
105
|
+
select_list: Sequence[exprs.Expr],
|
|
106
|
+
where_clause: Optional[exprs.Expr] = None,
|
|
107
|
+
group_by_clause: Optional[list[exprs.Expr]] = None,
|
|
108
|
+
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
|
|
109
|
+
):
|
|
107
110
|
if order_by_clause is None:
|
|
108
111
|
order_by_clause = []
|
|
109
112
|
self.from_clause = from_clause
|
|
@@ -146,12 +149,17 @@ class Analyzer:
|
|
|
146
149
|
candidates = self.select_list
|
|
147
150
|
agg_fn_calls = exprs.ExprSet(
|
|
148
151
|
exprs.Expr.list_subexprs(
|
|
149
|
-
candidates,
|
|
150
|
-
|
|
152
|
+
candidates,
|
|
153
|
+
expr_class=exprs.FunctionCall,
|
|
154
|
+
filter=lambda e: bool(e.is_agg_fn_call and not e.is_window_fn_call),
|
|
155
|
+
)
|
|
156
|
+
)
|
|
151
157
|
self.agg_fn_calls = list(agg_fn_calls)
|
|
152
158
|
window_fn_calls = exprs.ExprSet(
|
|
153
159
|
exprs.Expr.list_subexprs(
|
|
154
|
-
candidates, expr_class=exprs.FunctionCall, filter=lambda e: bool(e.is_window_fn_call)
|
|
160
|
+
candidates, expr_class=exprs.FunctionCall, filter=lambda e: bool(e.is_window_fn_call)
|
|
161
|
+
)
|
|
162
|
+
)
|
|
155
163
|
self.window_fn_calls = list(window_fn_calls)
|
|
156
164
|
if len(self.agg_fn_calls) == 0:
|
|
157
165
|
# nothing to do
|
|
@@ -165,7 +173,8 @@ class Analyzer:
|
|
|
165
173
|
is_agg_output = [self._determine_agg_status(e, grouping_expr_ids)[0] for e in self.select_list]
|
|
166
174
|
if is_agg_output.count(False) > 0:
|
|
167
175
|
raise excs.Error(
|
|
168
|
-
f'Invalid non-aggregate expression in aggregate query: {self.select_list[is_agg_output.index(False)]}'
|
|
176
|
+
f'Invalid non-aggregate expression in aggregate query: {self.select_list[is_agg_output.index(False)]}'
|
|
177
|
+
)
|
|
169
178
|
|
|
170
179
|
# check that Where clause and filter doesn't contain aggregates
|
|
171
180
|
if self.sql_where_clause is not None:
|
|
@@ -205,7 +214,8 @@ class Analyzer:
|
|
|
205
214
|
# an expression such as <grouping expr 1> + <grouping expr 2> can both be the output and input of agg
|
|
206
215
|
assert len(e.components) > 0
|
|
207
216
|
component_is_output, component_is_input = zip(
|
|
208
|
-
*[self._determine_agg_status(c, grouping_expr_ids) for c in e.components]
|
|
217
|
+
*[self._determine_agg_status(c, grouping_expr_ids) for c in e.components]
|
|
218
|
+
)
|
|
209
219
|
is_output = component_is_output.count(True) == len(e.components)
|
|
210
220
|
is_input = component_is_input.count(True) == len(e.components)
|
|
211
221
|
if not is_output and not is_input:
|
|
@@ -234,7 +244,8 @@ class Analyzer:
|
|
|
234
244
|
# window functions require ordering by the group_by/order_by clauses
|
|
235
245
|
group_by_exprs, order_by_exprs = fn_call.get_window_sort_exprs()
|
|
236
246
|
clause.append(
|
|
237
|
-
[OrderByItem(e, None) for e in group_by_exprs] + [OrderByItem(e, True) for e in order_by_exprs]
|
|
247
|
+
[OrderByItem(e, None) for e in group_by_exprs] + [OrderByItem(e, True) for e in order_by_exprs]
|
|
248
|
+
)
|
|
238
249
|
return combine_order_by_clauses(clause)
|
|
239
250
|
|
|
240
251
|
def has_agg(self) -> bool:
|
|
@@ -245,9 +256,7 @@ class Analyzer:
|
|
|
245
256
|
class Planner:
|
|
246
257
|
# TODO: create an exec.CountNode and change this to create_count_plan()
|
|
247
258
|
@classmethod
|
|
248
|
-
def create_count_stmt(
|
|
249
|
-
cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
|
|
250
|
-
) -> sql.Select:
|
|
259
|
+
def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None) -> sql.Select:
|
|
251
260
|
stmt = sql.select(sql.func.count())
|
|
252
261
|
refd_tbl_ids: set[UUID] = set()
|
|
253
262
|
if where_clause is not None:
|
|
@@ -288,23 +297,26 @@ class Planner:
|
|
|
288
297
|
if len(computed_exprs) > 0:
|
|
289
298
|
# add an ExprEvalNode when there are exprs to compute
|
|
290
299
|
plan = exec.ExprEvalNode(
|
|
291
|
-
row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
|
|
300
|
+
row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
|
|
301
|
+
)
|
|
292
302
|
|
|
293
303
|
stored_col_info = row_builder.output_slot_idxs()
|
|
294
304
|
stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
|
|
295
305
|
plan.set_stored_img_cols(stored_img_col_info)
|
|
296
306
|
plan.set_ctx(
|
|
297
307
|
exec.ExecContext(
|
|
298
|
-
row_builder,
|
|
299
|
-
|
|
308
|
+
row_builder,
|
|
309
|
+
batch_size=0,
|
|
310
|
+
show_pbar=True,
|
|
311
|
+
num_computed_exprs=len(computed_exprs),
|
|
312
|
+
ignore_errors=ignore_errors,
|
|
313
|
+
)
|
|
314
|
+
)
|
|
300
315
|
return plan
|
|
301
316
|
|
|
302
317
|
@classmethod
|
|
303
318
|
def create_df_insert_plan(
|
|
304
|
-
cls,
|
|
305
|
-
tbl: catalog.TableVersion,
|
|
306
|
-
df: 'pxt.DataFrame',
|
|
307
|
-
ignore_errors: bool
|
|
319
|
+
cls, tbl: catalog.TableVersion, df: 'pxt.DataFrame', ignore_errors: bool
|
|
308
320
|
) -> exec.ExecNode:
|
|
309
321
|
assert not tbl.is_view()
|
|
310
322
|
plan = df._create_query_plan() # ExecNode constructed by the DataFrame
|
|
@@ -321,18 +333,21 @@ class Planner:
|
|
|
321
333
|
|
|
322
334
|
plan.set_ctx(
|
|
323
335
|
exec.ExecContext(
|
|
324
|
-
plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0,
|
|
325
|
-
|
|
336
|
+
plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0, ignore_errors=ignore_errors
|
|
337
|
+
)
|
|
338
|
+
)
|
|
326
339
|
plan.ctx.num_rows = 0 # Unknown
|
|
327
340
|
|
|
328
341
|
return plan
|
|
329
342
|
|
|
330
343
|
@classmethod
|
|
331
344
|
def create_update_plan(
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
345
|
+
cls,
|
|
346
|
+
tbl: catalog.TableVersionPath,
|
|
347
|
+
update_targets: dict[catalog.Column, exprs.Expr],
|
|
348
|
+
recompute_targets: list[catalog.Column],
|
|
349
|
+
where_clause: Optional[exprs.Expr],
|
|
350
|
+
cascade: bool,
|
|
336
351
|
) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
|
|
337
352
|
"""Creates a plan to materialize updated rows.
|
|
338
353
|
The plan:
|
|
@@ -361,14 +376,16 @@ class Planner:
|
|
|
361
376
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
362
377
|
recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
|
|
363
378
|
copied_cols = [
|
|
364
|
-
col
|
|
379
|
+
col
|
|
380
|
+
for col in target.cols_by_id.values()
|
|
365
381
|
if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
|
|
366
382
|
]
|
|
367
383
|
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
|
|
368
384
|
select_list.extend(update_targets.values())
|
|
369
385
|
|
|
370
|
-
recomputed_exprs =
|
|
371
|
-
|
|
386
|
+
recomputed_exprs = [
|
|
387
|
+
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
|
|
388
|
+
]
|
|
372
389
|
# recomputed cols reference the new values of the updated cols
|
|
373
390
|
spec: dict[exprs.Expr, exprs.Expr] = {exprs.ColumnRef(col): e for col, e in update_targets.items()}
|
|
374
391
|
exprs.Expr.list_substitute(recomputed_exprs, spec)
|
|
@@ -385,9 +402,11 @@ class Planner:
|
|
|
385
402
|
|
|
386
403
|
@classmethod
|
|
387
404
|
def create_batch_update_plan(
|
|
388
|
-
cls,
|
|
389
|
-
|
|
390
|
-
|
|
405
|
+
cls,
|
|
406
|
+
tbl: catalog.TableVersionPath,
|
|
407
|
+
batch: list[dict[catalog.Column, exprs.Expr]],
|
|
408
|
+
rowids: list[tuple[int, ...]],
|
|
409
|
+
cascade: bool,
|
|
391
410
|
) -> tuple[exec.ExecNode, exec.RowUpdateNode, sql.ColumnElement[bool], list[catalog.Column], list[catalog.Column]]:
|
|
392
411
|
"""
|
|
393
412
|
Returns:
|
|
@@ -419,15 +438,17 @@ class Planner:
|
|
|
419
438
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
420
439
|
recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
|
|
421
440
|
copied_cols = [
|
|
422
|
-
col
|
|
441
|
+
col
|
|
442
|
+
for col in target.cols_by_id.values()
|
|
423
443
|
if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
|
|
424
444
|
]
|
|
425
445
|
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
|
|
426
446
|
select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
|
|
427
447
|
|
|
428
|
-
recomputed_exprs =
|
|
429
|
-
|
|
430
|
-
|
|
448
|
+
recomputed_exprs = [
|
|
449
|
+
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
|
|
450
|
+
]
|
|
451
|
+
# the RowUpdateNode updates columns in-place, ie, in the original ColumnRef; no further substitution is needed
|
|
431
452
|
select_list.extend(recomputed_exprs)
|
|
432
453
|
|
|
433
454
|
# ExecNode tree (from bottom to top):
|
|
@@ -435,8 +456,9 @@ class Planner:
|
|
|
435
456
|
# - RowUpdateNode to update the retrieved rows
|
|
436
457
|
# - ExprEvalNode to evaluate the remaining output exprs
|
|
437
458
|
analyzer = Analyzer(FromClause(tbls=[tbl]), select_list)
|
|
438
|
-
sql_exprs = list(
|
|
439
|
-
analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
|
|
459
|
+
sql_exprs = list(
|
|
460
|
+
exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
|
|
461
|
+
)
|
|
440
462
|
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
|
|
441
463
|
analyzer.finalize(row_builder)
|
|
442
464
|
sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
|
|
@@ -458,13 +480,16 @@ class Planner:
|
|
|
458
480
|
plan.set_ctx(ctx)
|
|
459
481
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
460
482
|
return (
|
|
461
|
-
plan,
|
|
462
|
-
|
|
483
|
+
plan,
|
|
484
|
+
row_update_node,
|
|
485
|
+
sql_lookup_node.where_clause_element,
|
|
486
|
+
list(updated_cols) + recomputed_user_cols,
|
|
487
|
+
recomputed_user_cols,
|
|
463
488
|
)
|
|
464
489
|
|
|
465
490
|
@classmethod
|
|
466
491
|
def create_view_update_plan(
|
|
467
|
-
|
|
492
|
+
cls, view: catalog.TableVersionPath, recompute_targets: list[catalog.Column]
|
|
468
493
|
) -> exec.ExecNode:
|
|
469
494
|
"""Creates a plan to materialize updated rows for a view, given that the base table has been updated.
|
|
470
495
|
The plan:
|
|
@@ -489,19 +514,25 @@ class Planner:
|
|
|
489
514
|
copied_cols = [col for col in target.cols_by_id.values() if col.is_stored and not col in recomputed_cols]
|
|
490
515
|
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in copied_cols]
|
|
491
516
|
# resolve recomputed exprs to stored columns in the base
|
|
492
|
-
recomputed_exprs =
|
|
493
|
-
|
|
517
|
+
recomputed_exprs = [
|
|
518
|
+
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_cols) for c in recomputed_cols
|
|
519
|
+
]
|
|
494
520
|
select_list.extend(recomputed_exprs)
|
|
495
521
|
|
|
496
522
|
# we need to retrieve the PK columns of the existing rows
|
|
497
523
|
plan = cls.create_query_plan(
|
|
498
|
-
FromClause(tbls=[view]),
|
|
499
|
-
|
|
524
|
+
FromClause(tbls=[view]),
|
|
525
|
+
select_list,
|
|
526
|
+
where_clause=target.predicate,
|
|
527
|
+
ignore_errors=True,
|
|
528
|
+
exact_version_only=view.get_bases(),
|
|
529
|
+
)
|
|
500
530
|
for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
|
|
501
531
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
502
532
|
# TODO: avoid duplication with view_load_plan() logic (where does this belong?)
|
|
503
|
-
stored_img_col_info =
|
|
504
|
-
|
|
533
|
+
stored_img_col_info = [
|
|
534
|
+
info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
|
|
535
|
+
]
|
|
505
536
|
plan.set_stored_img_cols(stored_img_col_info)
|
|
506
537
|
return plan
|
|
507
538
|
|
|
@@ -539,21 +570,27 @@ class Planner:
|
|
|
539
570
|
# 3. materialize stored view columns that haven't been produced by step 1
|
|
540
571
|
base_output_exprs = [e for e in row_builder.default_eval_ctx.exprs if e.is_bound_by([view.base])]
|
|
541
572
|
view_output_exprs = [
|
|
542
|
-
e
|
|
573
|
+
e
|
|
574
|
+
for e in row_builder.default_eval_ctx.target_exprs
|
|
543
575
|
if e.is_bound_by([view]) and not e.is_bound_by([view.base])
|
|
544
576
|
]
|
|
545
577
|
# if we're propagating an insert, we only want to see those base rows that were created for the current version
|
|
546
578
|
base_analyzer = Analyzer(FromClause(tbls=[view.base]), base_output_exprs, where_clause=target.predicate)
|
|
547
579
|
base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
|
|
548
580
|
plan = cls._create_query_plan(
|
|
549
|
-
row_builder=row_builder,
|
|
550
|
-
|
|
581
|
+
row_builder=row_builder,
|
|
582
|
+
analyzer=base_analyzer,
|
|
583
|
+
eval_ctx=base_eval_ctx,
|
|
584
|
+
with_pk=True,
|
|
585
|
+
exact_version_only=view.get_bases() if propagates_insert else [],
|
|
586
|
+
)
|
|
551
587
|
exec_ctx = plan.ctx
|
|
552
588
|
if target.is_component_view():
|
|
553
589
|
plan = exec.ComponentIterationNode(target, plan)
|
|
554
590
|
if len(view_output_exprs) > 0:
|
|
555
591
|
plan = exec.ExprEvalNode(
|
|
556
|
-
row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
|
|
592
|
+
row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
|
|
593
|
+
)
|
|
557
594
|
|
|
558
595
|
stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
|
|
559
596
|
plan.set_stored_img_cols(stored_img_col_info)
|
|
@@ -582,10 +619,9 @@ class Planner:
|
|
|
582
619
|
ob_clauses.append(ordering)
|
|
583
620
|
for fn_call in analyzer.agg_fn_calls:
|
|
584
621
|
# agg functions with an ordering requirement are implicitly ascending
|
|
585
|
-
ordering = (
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
)
|
|
622
|
+
ordering = [OrderByItem(e, None) for e in analyzer.group_by_clause] + [
|
|
623
|
+
OrderByItem(e, True) for e in fn_call.get_agg_order_by()
|
|
624
|
+
]
|
|
589
625
|
ob_clauses.append(ordering)
|
|
590
626
|
if len(ob_clauses) <= 1:
|
|
591
627
|
return
|
|
@@ -596,7 +632,8 @@ class Planner:
|
|
|
596
632
|
if combined is None:
|
|
597
633
|
raise excs.Error(
|
|
598
634
|
f'Incompatible ordering requirements: '
|
|
599
|
-
f'{print_order_by_clause(combined_ordering)} vs {print_order_by_clause(ordering)}'
|
|
635
|
+
f'{print_order_by_clause(combined_ordering)} vs {print_order_by_clause(ordering)}'
|
|
636
|
+
)
|
|
600
637
|
combined_ordering = combined
|
|
601
638
|
|
|
602
639
|
@classmethod
|
|
@@ -623,10 +660,15 @@ class Planner:
|
|
|
623
660
|
|
|
624
661
|
@classmethod
|
|
625
662
|
def create_query_plan(
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
663
|
+
cls,
|
|
664
|
+
from_clause: FromClause,
|
|
665
|
+
select_list: Optional[list[exprs.Expr]] = None,
|
|
666
|
+
where_clause: Optional[exprs.Expr] = None,
|
|
667
|
+
group_by_clause: Optional[list[exprs.Expr]] = None,
|
|
668
|
+
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
|
|
669
|
+
limit: Optional[exprs.Expr] = None,
|
|
670
|
+
ignore_errors: bool = False,
|
|
671
|
+
exact_version_only: Optional[list[catalog.TableVersion]] = None,
|
|
630
672
|
) -> exec.ExecNode:
|
|
631
673
|
"""Return plan for executing a query.
|
|
632
674
|
Updates 'select_list' in place to make it executable.
|
|
@@ -639,8 +681,12 @@ class Planner:
|
|
|
639
681
|
if exact_version_only is None:
|
|
640
682
|
exact_version_only = []
|
|
641
683
|
analyzer = Analyzer(
|
|
642
|
-
from_clause,
|
|
643
|
-
|
|
684
|
+
from_clause,
|
|
685
|
+
select_list,
|
|
686
|
+
where_clause=where_clause,
|
|
687
|
+
group_by_clause=group_by_clause,
|
|
688
|
+
order_by_clause=order_by_clause,
|
|
689
|
+
)
|
|
644
690
|
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
|
|
645
691
|
|
|
646
692
|
analyzer.finalize(row_builder)
|
|
@@ -648,8 +694,13 @@ class Planner:
|
|
|
648
694
|
# with_pk: for now, we always retrieve the PK, because we need it for the file cache
|
|
649
695
|
eval_ctx = row_builder.create_eval_ctx(analyzer.select_list)
|
|
650
696
|
plan = cls._create_query_plan(
|
|
651
|
-
row_builder=row_builder,
|
|
652
|
-
|
|
697
|
+
row_builder=row_builder,
|
|
698
|
+
analyzer=analyzer,
|
|
699
|
+
eval_ctx=eval_ctx,
|
|
700
|
+
limit=limit,
|
|
701
|
+
with_pk=True,
|
|
702
|
+
exact_version_only=exact_version_only,
|
|
703
|
+
)
|
|
653
704
|
plan.ctx.ignore_errors = ignore_errors
|
|
654
705
|
select_list.clear()
|
|
655
706
|
select_list.extend(analyzer.select_list)
|
|
@@ -657,9 +708,13 @@ class Planner:
|
|
|
657
708
|
|
|
658
709
|
@classmethod
|
|
659
710
|
def _create_query_plan(
|
|
660
|
-
cls,
|
|
661
|
-
|
|
662
|
-
|
|
711
|
+
cls,
|
|
712
|
+
row_builder: exprs.RowBuilder,
|
|
713
|
+
analyzer: Analyzer,
|
|
714
|
+
eval_ctx: exprs.RowBuilder.EvalCtx,
|
|
715
|
+
limit: Optional[exprs.Expr] = None,
|
|
716
|
+
with_pk: bool = False,
|
|
717
|
+
exact_version_only: Optional[list[catalog.TableVersion]] = None,
|
|
663
718
|
) -> exec.ExecNode:
|
|
664
719
|
"""
|
|
665
720
|
Create plan to materialize eval_ctx.
|
|
@@ -672,9 +727,8 @@ class Planner:
|
|
|
672
727
|
if exact_version_only is None:
|
|
673
728
|
exact_version_only = []
|
|
674
729
|
sql_elements = analyzer.sql_elements
|
|
675
|
-
is_python_agg = (
|
|
676
|
-
|
|
677
|
-
or not sql_elements.contains_all(analyzer.window_fn_calls)
|
|
730
|
+
is_python_agg = not sql_elements.contains_all(analyzer.agg_fn_calls) or not sql_elements.contains_all(
|
|
731
|
+
analyzer.window_fn_calls
|
|
678
732
|
)
|
|
679
733
|
ctx = exec.ExecContext(row_builder)
|
|
680
734
|
cls._verify_ordering(analyzer, verify_agg=is_python_agg)
|
|
@@ -686,19 +740,26 @@ class Planner:
|
|
|
686
740
|
# - subexprs of Where clause conjuncts that can't be run in SQL
|
|
687
741
|
# - all grouping exprs, if any aggregate function call can't be run in SQL (in that case, they all have to be
|
|
688
742
|
# run in Python)
|
|
689
|
-
candidates = list(
|
|
690
|
-
|
|
691
|
-
|
|
743
|
+
candidates = list(
|
|
744
|
+
exprs.Expr.list_subexprs(
|
|
745
|
+
analyzer.select_list,
|
|
746
|
+
filter=lambda e: (
|
|
692
747
|
sql_elements.contains(e)
|
|
693
748
|
and not e._contains(cls=exprs.FunctionCall, filter=lambda e: bool(e.is_agg_fn_call))
|
|
694
|
-
|
|
695
|
-
|
|
749
|
+
),
|
|
750
|
+
traverse_matches=False,
|
|
751
|
+
)
|
|
752
|
+
)
|
|
696
753
|
if analyzer.filter is not None:
|
|
697
|
-
candidates.extend(
|
|
698
|
-
analyzer.filter, filter=lambda e: sql_elements.contains(e), traverse_matches=False)
|
|
754
|
+
candidates.extend(
|
|
755
|
+
exprs.Expr.subexprs(analyzer.filter, filter=lambda e: sql_elements.contains(e), traverse_matches=False)
|
|
756
|
+
)
|
|
699
757
|
if is_python_agg and analyzer.group_by_clause is not None:
|
|
700
|
-
candidates.extend(
|
|
701
|
-
|
|
758
|
+
candidates.extend(
|
|
759
|
+
exprs.Expr.list_subexprs(
|
|
760
|
+
analyzer.group_by_clause, filter=lambda e: sql_elements.contains(e), traverse_matches=False
|
|
761
|
+
)
|
|
762
|
+
)
|
|
702
763
|
# not isinstance(...): we don't want to materialize Literals via a Select
|
|
703
764
|
sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
|
|
704
765
|
|
|
@@ -706,7 +767,8 @@ class Planner:
|
|
|
706
767
|
join_exprs = exprs.ExprSet(
|
|
707
768
|
join_clause.join_predicate
|
|
708
769
|
for join_clause in analyzer.from_clause.join_clauses
|
|
709
|
-
if join_clause.join_predicate is not None
|
|
770
|
+
if join_clause.join_predicate is not None
|
|
771
|
+
)
|
|
710
772
|
scan_target_exprs = sql_exprs | join_exprs
|
|
711
773
|
tbl_scan_plans: list[exec.SqlScanNode] = []
|
|
712
774
|
plan: exec.ExecNode
|
|
@@ -716,16 +778,21 @@ class Planner:
|
|
|
716
778
|
exprs.Expr.list_subexprs(
|
|
717
779
|
scan_target_exprs,
|
|
718
780
|
filter=lambda e: e.is_bound_by([tbl]) and not isinstance(e, exprs.Literal),
|
|
719
|
-
traverse_matches=False
|
|
781
|
+
traverse_matches=False,
|
|
782
|
+
)
|
|
783
|
+
)
|
|
720
784
|
plan = exec.SqlScanNode(
|
|
721
|
-
tbl, row_builder, select_list=tbl_scan_exprs,
|
|
722
|
-
|
|
785
|
+
tbl, row_builder, select_list=tbl_scan_exprs, set_pk=with_pk, exact_version_only=exact_version_only
|
|
786
|
+
)
|
|
723
787
|
tbl_scan_plans.append(plan)
|
|
724
788
|
|
|
725
789
|
if len(analyzer.from_clause.join_clauses) > 0:
|
|
726
790
|
plan = exec.SqlJoinNode(
|
|
727
|
-
row_builder,
|
|
728
|
-
|
|
791
|
+
row_builder,
|
|
792
|
+
inputs=tbl_scan_plans,
|
|
793
|
+
join_clauses=analyzer.from_clause.join_clauses,
|
|
794
|
+
select_list=sql_exprs,
|
|
795
|
+
)
|
|
729
796
|
else:
|
|
730
797
|
plan = tbl_scan_plans[0]
|
|
731
798
|
|
|
@@ -762,11 +829,17 @@ class Planner:
|
|
|
762
829
|
and plan.to_cte() is not None
|
|
763
830
|
):
|
|
764
831
|
plan = exec.SqlAggregationNode(
|
|
765
|
-
row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
|
|
832
|
+
row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
|
|
833
|
+
)
|
|
766
834
|
else:
|
|
767
835
|
plan = exec.AggregationNode(
|
|
768
|
-
tbl.tbl_version,
|
|
769
|
-
|
|
836
|
+
tbl.tbl_version,
|
|
837
|
+
row_builder,
|
|
838
|
+
analyzer.group_by_clause,
|
|
839
|
+
analyzer.agg_fn_calls + analyzer.window_fn_calls,
|
|
840
|
+
agg_input,
|
|
841
|
+
input=plan,
|
|
842
|
+
)
|
|
770
843
|
typecheck_dummy = analyzer.grouping_exprs + analyzer.agg_fn_calls + analyzer.window_fn_calls
|
|
771
844
|
agg_output = exprs.ExprSet(typecheck_dummy)
|
|
772
845
|
if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
@@ -794,7 +867,8 @@ class Planner:
|
|
|
794
867
|
expr_eval_node.set_input_order(False)
|
|
795
868
|
|
|
796
869
|
if limit is not None:
|
|
797
|
-
|
|
870
|
+
assert isinstance(limit, exprs.Literal)
|
|
871
|
+
plan.set_limit(limit.val)
|
|
798
872
|
|
|
799
873
|
plan.set_ctx(ctx)
|
|
800
874
|
return plan
|
|
@@ -805,7 +879,7 @@ class Planner:
|
|
|
805
879
|
|
|
806
880
|
@classmethod
|
|
807
881
|
def create_add_column_plan(
|
|
808
|
-
|
|
882
|
+
cls, tbl: catalog.TableVersionPath, col: catalog.Column
|
|
809
883
|
) -> tuple[exec.ExecNode, Optional[int]]:
|
|
810
884
|
"""Creates a plan for InsertableTable.add_column()
|
|
811
885
|
Returns:
|
|
@@ -816,7 +890,8 @@ class Planner:
|
|
|
816
890
|
row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
|
|
817
891
|
analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
|
|
818
892
|
plan = cls._create_query_plan(
|
|
819
|
-
row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
|
|
893
|
+
row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
|
|
894
|
+
)
|
|
820
895
|
plan.ctx.batch_size = 16
|
|
821
896
|
plan.ctx.show_pbar = True
|
|
822
897
|
plan.ctx.ignore_errors = True
|