pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +21 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +227 -148
- pixeltable/catalog/table_version.py +66 -28
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +18 -19
- pixeltable/dataframe.py +16 -32
- pixeltable/env.py +6 -1
- pixeltable/exec/__init__.py +1 -2
- pixeltable/exec/aggregation_node.py +27 -17
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +9 -26
- pixeltable/exec/exec_node.py +36 -7
- pixeltable/exec/expr_eval_node.py +19 -11
- pixeltable/exec/in_memory_data_node.py +14 -11
- pixeltable/exec/sql_node.py +266 -138
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +56 -36
- pixeltable/exprs/expr.py +65 -63
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +26 -15
- pixeltable/exprs/function_call.py +53 -24
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +14 -13
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +12 -6
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function.py +11 -10
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +84 -42
- pixeltable/functions/huggingface.py +31 -34
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +59 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +65 -74
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +20 -7
- pixeltable/index/embedding_index.py +12 -14
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +98 -2
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +126 -60
- pixeltable/metadata/__init__.py +4 -3
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +54 -12
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +40 -21
- pixeltable/plan.py +149 -165
- pixeltable/py.typed +0 -0
- pixeltable/store.py +57 -37
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +55 -0
- pixeltable/type_system.py +260 -61
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +16 -2
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +10 -11
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/plan.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Any, Iterable, Optional, Sequence
|
|
1
|
+
from typing import Any, Iterable, Optional, Sequence, cast
|
|
3
2
|
from uuid import UUID
|
|
4
3
|
|
|
5
4
|
import sqlalchemy as sql
|
|
@@ -9,6 +8,7 @@ import pixeltable.exec as exec
|
|
|
9
8
|
from pixeltable import catalog
|
|
10
9
|
from pixeltable import exceptions as excs
|
|
11
10
|
from pixeltable import exprs
|
|
11
|
+
from pixeltable.exec.sql_node import OrderByItem, OrderByClause, combine_order_by_clauses, print_order_by_clause
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def _is_agg_fn_call(e: exprs.Expr) -> bool:
|
|
@@ -46,11 +46,9 @@ class Analyzer:
|
|
|
46
46
|
tbl: catalog.TableVersionPath
|
|
47
47
|
all_exprs: list[exprs.Expr]
|
|
48
48
|
select_list: list[exprs.Expr]
|
|
49
|
-
group_by_clause: list[exprs.Expr]
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# exprs that can be expressed in SQL and are retrieved directly from the store
|
|
53
|
-
#sql_exprs: list[exprs.Expr]
|
|
49
|
+
group_by_clause: Optional[list[exprs.Expr]] # None for non-aggregate queries; [] for agg query w/o grouping
|
|
50
|
+
grouping_exprs: list[exprs.Expr] # [] for non-aggregate queries or agg query w/o grouping
|
|
51
|
+
order_by_clause: OrderByClause
|
|
54
52
|
|
|
55
53
|
sql_elements: exprs.SqlElementCache
|
|
56
54
|
|
|
@@ -60,15 +58,14 @@ class Analyzer:
|
|
|
60
58
|
# filter predicate applied to output rows of the SQL scan
|
|
61
59
|
filter: Optional[exprs.Expr]
|
|
62
60
|
|
|
63
|
-
agg_fn_calls: list[exprs.FunctionCall]
|
|
61
|
+
agg_fn_calls: list[exprs.FunctionCall] # grouping aggregation (ie, not window functions)
|
|
62
|
+
window_fn_calls: list[exprs.FunctionCall]
|
|
64
63
|
agg_order_by: list[exprs.Expr]
|
|
65
64
|
|
|
66
65
|
def __init__(
|
|
67
66
|
self, tbl: catalog.TableVersionPath, select_list: Sequence[exprs.Expr],
|
|
68
67
|
where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[list[exprs.Expr]] = None,
|
|
69
68
|
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None):
|
|
70
|
-
if group_by_clause is None:
|
|
71
|
-
group_by_clause = []
|
|
72
69
|
if order_by_clause is None:
|
|
73
70
|
order_by_clause = []
|
|
74
71
|
self.tbl = tbl
|
|
@@ -78,8 +75,10 @@ class Analyzer:
|
|
|
78
75
|
self.select_list = [e.resolve_computed_cols() for e in select_list]
|
|
79
76
|
if where_clause is not None:
|
|
80
77
|
where_clause = where_clause.resolve_computed_cols()
|
|
81
|
-
self.group_by_clause =
|
|
82
|
-
|
|
78
|
+
self.group_by_clause = (
|
|
79
|
+
[e.resolve_computed_cols() for e in group_by_clause] if group_by_clause is not None else None
|
|
80
|
+
)
|
|
81
|
+
self.order_by_clause = [OrderByItem(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
|
|
83
82
|
|
|
84
83
|
self.sql_where_clause = None
|
|
85
84
|
self.filter = None
|
|
@@ -89,20 +88,36 @@ class Analyzer:
|
|
|
89
88
|
|
|
90
89
|
# all exprs that are evaluated in Python; not executable
|
|
91
90
|
self.all_exprs = self.select_list.copy()
|
|
92
|
-
self.
|
|
91
|
+
if self.group_by_clause is not None:
|
|
92
|
+
self.all_exprs.extend(self.group_by_clause)
|
|
93
93
|
self.all_exprs.extend(e for e, _ in self.order_by_clause)
|
|
94
94
|
if self.filter is not None:
|
|
95
95
|
self.all_exprs.append(self.filter)
|
|
96
96
|
|
|
97
97
|
self.agg_order_by = []
|
|
98
|
+
self.agg_fn_calls = []
|
|
99
|
+
self.window_fn_calls = []
|
|
98
100
|
self._analyze_agg()
|
|
101
|
+
self.grouping_exprs = self.group_by_clause if self.group_by_clause is not None else []
|
|
99
102
|
|
|
100
103
|
def _analyze_agg(self) -> None:
|
|
101
104
|
"""Check semantic correctness of aggregation and fill in agg-specific fields of Analyzer"""
|
|
102
|
-
|
|
105
|
+
candidates = self.select_list
|
|
106
|
+
agg_fn_calls = exprs.ExprSet(
|
|
107
|
+
exprs.Expr.list_subexprs(
|
|
108
|
+
candidates, expr_class=exprs.FunctionCall,
|
|
109
|
+
filter=lambda e: bool(e.is_agg_fn_call and not e.is_window_fn_call)))
|
|
110
|
+
self.agg_fn_calls = list(agg_fn_calls)
|
|
111
|
+
window_fn_calls = exprs.ExprSet(
|
|
112
|
+
exprs.Expr.list_subexprs(
|
|
113
|
+
candidates, expr_class=exprs.FunctionCall, filter=lambda e: bool(e.is_window_fn_call)))
|
|
114
|
+
self.window_fn_calls = list(window_fn_calls)
|
|
103
115
|
if len(self.agg_fn_calls) == 0:
|
|
104
116
|
# nothing to do
|
|
105
117
|
return
|
|
118
|
+
# if we're doing grouping aggregation and don't have an explicit Group By clause, we're creating a single group
|
|
119
|
+
if self.group_by_clause is None:
|
|
120
|
+
self.group_by_clause = []
|
|
106
121
|
|
|
107
122
|
# check that select list only contains aggregate output
|
|
108
123
|
grouping_expr_ids = {e.id for e in self.group_by_clause}
|
|
@@ -113,8 +128,7 @@ class Analyzer:
|
|
|
113
128
|
|
|
114
129
|
# check that filter doesn't contain aggregates
|
|
115
130
|
if self.filter is not None:
|
|
116
|
-
|
|
117
|
-
if len(agg_fn_calls) > 0:
|
|
131
|
+
if any(_is_agg_fn_call(e) for e in self.filter.subexprs(expr_class=exprs.FunctionCall)):
|
|
118
132
|
raise excs.Error(f'Filter cannot contain aggregate functions: {self.filter}')
|
|
119
133
|
|
|
120
134
|
# check that grouping exprs don't contain aggregates and can be expressed as SQL (we perform sort-based
|
|
@@ -125,27 +139,6 @@ class Analyzer:
|
|
|
125
139
|
if e._contains(filter=lambda e: _is_agg_fn_call(e)):
|
|
126
140
|
raise excs.Error(f'Grouping expression contains aggregate function: {e}')
|
|
127
141
|
|
|
128
|
-
# check that agg fn calls don't have contradicting ordering requirements
|
|
129
|
-
order_by: list[exprs.Expr] = []
|
|
130
|
-
order_by_origin: Optional[exprs.Expr] = None # the expr that determines the ordering
|
|
131
|
-
for agg_fn_call in self.agg_fn_calls:
|
|
132
|
-
fn_call_order_by = agg_fn_call.get_agg_order_by()
|
|
133
|
-
if len(fn_call_order_by) == 0:
|
|
134
|
-
continue
|
|
135
|
-
if len(order_by) == 0:
|
|
136
|
-
order_by = fn_call_order_by
|
|
137
|
-
order_by_origin = agg_fn_call
|
|
138
|
-
else:
|
|
139
|
-
combined = _get_combined_ordering(
|
|
140
|
-
[(e, True) for e in order_by], [(e, True) for e in fn_call_order_by])
|
|
141
|
-
if len(combined) == 0:
|
|
142
|
-
raise excs.Error((
|
|
143
|
-
f"Incompatible ordering requirements between expressions '{order_by_origin}' and "
|
|
144
|
-
f"'{agg_fn_call}':\n"
|
|
145
|
-
f"{exprs.Expr.print_list(order_by)} vs {exprs.Expr.print_list(fn_call_order_by)}"
|
|
146
|
-
))
|
|
147
|
-
self.agg_order_by = order_by
|
|
148
|
-
|
|
149
142
|
def _determine_agg_status(self, e: exprs.Expr, grouping_expr_ids: set[int]) -> tuple[bool, bool]:
|
|
150
143
|
"""Determine whether expr is the input to or output of an aggregate function.
|
|
151
144
|
Returns:
|
|
@@ -175,14 +168,14 @@ class Analyzer:
|
|
|
175
168
|
raise excs.Error(f'Invalid expression, mixes aggregate with non-aggregate: {e}')
|
|
176
169
|
return is_output, is_input
|
|
177
170
|
|
|
178
|
-
|
|
179
171
|
def finalize(self, row_builder: exprs.RowBuilder) -> None:
|
|
180
172
|
"""Make all exprs executable
|
|
181
173
|
TODO: add EvalCtx for each expr list?
|
|
182
174
|
"""
|
|
183
175
|
# maintain original composition of select list
|
|
184
176
|
row_builder.set_slot_idxs(self.select_list, remove_duplicates=False)
|
|
185
|
-
|
|
177
|
+
if self.group_by_clause is not None:
|
|
178
|
+
row_builder.set_slot_idxs(self.group_by_clause)
|
|
186
179
|
order_by_exprs = [e for e, _ in self.order_by_clause]
|
|
187
180
|
row_builder.set_slot_idxs(order_by_exprs)
|
|
188
181
|
row_builder.set_slot_idxs(self.all_exprs)
|
|
@@ -191,6 +184,19 @@ class Analyzer:
|
|
|
191
184
|
row_builder.set_slot_idxs(self.agg_fn_calls)
|
|
192
185
|
row_builder.set_slot_idxs(self.agg_order_by)
|
|
193
186
|
|
|
187
|
+
def get_window_fn_ob_clause(self) -> Optional[OrderByClause]:
|
|
188
|
+
clause: list[OrderByClause] = []
|
|
189
|
+
for fn_call in self.window_fn_calls:
|
|
190
|
+
# window functions require ordering by the group_by/order_by clauses
|
|
191
|
+
group_by_exprs, order_by_exprs = fn_call.get_window_sort_exprs()
|
|
192
|
+
clause.append(
|
|
193
|
+
[OrderByItem(e, None) for e in group_by_exprs] + [OrderByItem(e, True) for e in order_by_exprs])
|
|
194
|
+
return combine_order_by_clauses(clause)
|
|
195
|
+
|
|
196
|
+
def has_agg(self) -> bool:
|
|
197
|
+
"""True if there is any kind of aggregation in the query"""
|
|
198
|
+
return self.group_by_clause is not None or len(self.agg_fn_calls) > 0 or len(self.window_fn_calls) > 0
|
|
199
|
+
|
|
194
200
|
|
|
195
201
|
class Planner:
|
|
196
202
|
# TODO: create an exec.CountNode and change this to create_count_plan()
|
|
@@ -219,27 +225,28 @@ class Planner:
|
|
|
219
225
|
assert not tbl.is_view()
|
|
220
226
|
# stored_cols: all cols we need to store, incl computed cols (and indices)
|
|
221
227
|
stored_cols = [c for c in tbl.cols if c.is_stored]
|
|
222
|
-
assert len(stored_cols) > 0
|
|
223
|
-
|
|
228
|
+
assert len(stored_cols) > 0 # there needs to be something to store
|
|
224
229
|
row_builder = exprs.RowBuilder([], stored_cols, [])
|
|
225
230
|
|
|
226
231
|
# create InMemoryDataNode for 'rows'
|
|
227
|
-
stored_col_info = row_builder.output_slot_idxs()
|
|
228
|
-
stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
|
|
229
|
-
input_col_info = [info for info in stored_col_info if not info.col.is_computed]
|
|
230
232
|
plan: exec.ExecNode = exec.InMemoryDataNode(tbl, rows, row_builder, tbl.next_rowid)
|
|
231
233
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
234
|
+
media_input_col_info = [
|
|
235
|
+
exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
|
|
236
|
+
for col_ref in row_builder.input_exprs
|
|
237
|
+
if isinstance(col_ref, exprs.ColumnRef) and col_ref.col_type.is_media_type()
|
|
238
|
+
]
|
|
239
|
+
if len(media_input_col_info) > 0:
|
|
240
|
+
# prefetch external files for all input column refs
|
|
241
|
+
plan = exec.CachePrefetchNode(tbl.id, media_input_col_info, input=plan)
|
|
237
242
|
|
|
238
|
-
computed_exprs =
|
|
243
|
+
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
239
244
|
if len(computed_exprs) > 0:
|
|
240
245
|
# add an ExprEvalNode when there are exprs to compute
|
|
241
246
|
plan = exec.ExprEvalNode(row_builder, computed_exprs, plan.output_exprs, input=plan)
|
|
242
247
|
|
|
248
|
+
stored_col_info = row_builder.output_slot_idxs()
|
|
249
|
+
stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
|
|
243
250
|
plan.set_stored_img_cols(stored_img_col_info)
|
|
244
251
|
plan.set_ctx(
|
|
245
252
|
exec.ExecContext(
|
|
@@ -507,93 +514,35 @@ class Planner:
|
|
|
507
514
|
return plan, len(row_builder.default_eval_ctx.target_exprs)
|
|
508
515
|
|
|
509
516
|
@classmethod
|
|
510
|
-
def
|
|
511
|
-
"""
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
e for e in analyzer.all_exprs if isinstance(e, exprs.FunctionCall) and e.is_window_fn_call
|
|
519
|
-
]
|
|
520
|
-
if len(window_fn_calls) > 0:
|
|
521
|
-
for fn_call in window_fn_calls:
|
|
517
|
+
def _verify_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> None:
|
|
518
|
+
"""Verify that the various ordering requirements don't conflict"""
|
|
519
|
+
ob_clauses: list[OrderByClause] = [analyzer.order_by_clause.copy()]
|
|
520
|
+
|
|
521
|
+
if verify_agg:
|
|
522
|
+
ordering: OrderByClause
|
|
523
|
+
for fn_call in analyzer.window_fn_calls:
|
|
524
|
+
# window functions require ordering by the group_by/order_by clauses
|
|
522
525
|
gb, ob = fn_call.get_window_sort_exprs()
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
f"Incompatible ordering requirements between expressions '{order_by_origin}' and "
|
|
535
|
-
f"'{fn_call}':\n"
|
|
536
|
-
f"{exprs.Expr.print_list(order_by_items)} vs {exprs.Expr.print_list(other_order_by_clauses)}"
|
|
537
|
-
))
|
|
538
|
-
order_by_items = combined
|
|
539
|
-
|
|
540
|
-
if len(analyzer.group_by_clause) > 0:
|
|
541
|
-
agg_ordering = [(e, None) for e in analyzer.group_by_clause] + [(e, True) for e in analyzer.agg_order_by]
|
|
542
|
-
if len(order_by_items) > 0:
|
|
543
|
-
# check for compatibility
|
|
544
|
-
combined = _get_combined_ordering(order_by_items, agg_ordering)
|
|
545
|
-
if len(combined) == 0:
|
|
546
|
-
raise excs.Error((
|
|
547
|
-
f"Incompatible ordering requirements between expressions '{order_by_origin}' and "
|
|
548
|
-
f"grouping expressions:\n"
|
|
549
|
-
f"{exprs.Expr.print_list([e for e, _ in order_by_items])} vs "
|
|
550
|
-
f"{exprs.Expr.print_list([e for e, _ in agg_ordering])}"
|
|
551
|
-
))
|
|
552
|
-
order_by_items = combined
|
|
553
|
-
else:
|
|
554
|
-
order_by_items = agg_ordering
|
|
526
|
+
ordering = [OrderByItem(e, None) for e in gb] + [OrderByItem(e, True) for e in ob]
|
|
527
|
+
ob_clauses.append(ordering)
|
|
528
|
+
for fn_call in analyzer.agg_fn_calls:
|
|
529
|
+
# agg functions with an ordering requirement are implicitly ascending
|
|
530
|
+
ordering = (
|
|
531
|
+
[OrderByItem(e, None) for e in analyzer.group_by_clause]
|
|
532
|
+
+ [OrderByItem(e, True) for e in fn_call.get_agg_order_by()]
|
|
533
|
+
)
|
|
534
|
+
ob_clauses.append(ordering)
|
|
535
|
+
if len(ob_clauses) <= 1:
|
|
536
|
+
return
|
|
555
537
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
f"{exprs.Expr.print_list([e for e, _ in order_by_items])} vs "
|
|
565
|
-
f"{exprs.Expr.print_list([e for e, _ in analyzer.order_by_clause])}"
|
|
566
|
-
))
|
|
567
|
-
order_by_items = combined
|
|
568
|
-
else:
|
|
569
|
-
order_by_items = analyzer.order_by_clause
|
|
570
|
-
|
|
571
|
-
# TODO: can this be unified with the same logic in RowBuilder
|
|
572
|
-
def refs_unstored_iter_col(e: exprs.Expr) -> bool:
|
|
573
|
-
if not isinstance(e, exprs.ColumnRef):
|
|
574
|
-
return False
|
|
575
|
-
tbl = e.col.tbl
|
|
576
|
-
return tbl.is_component_view() and tbl.is_iterator_column(e.col) and not e.col.is_stored
|
|
577
|
-
unstored_iter_col_refs = list(exprs.Expr.list_subexprs(analyzer.all_exprs, expr_class=exprs.ColumnRef, filter=refs_unstored_iter_col))
|
|
578
|
-
if len(unstored_iter_col_refs) > 0 and len(order_by_items) == 0:
|
|
579
|
-
# we don't already have a user-requested ordering and we access unstored iterator columns:
|
|
580
|
-
# order by the primary key of the component view, which minimizes the number of iterator instantiations
|
|
581
|
-
component_views = {e.col.tbl for e in unstored_iter_col_refs}
|
|
582
|
-
# TODO: generalize this to multi-level iteration
|
|
583
|
-
assert len(component_views) == 1
|
|
584
|
-
component_view = list(component_views)[0]
|
|
585
|
-
order_by_items = [
|
|
586
|
-
(exprs.RowidRef(component_view, idx), None)
|
|
587
|
-
for idx in range(len(component_view.store_tbl.rowid_columns()))
|
|
588
|
-
]
|
|
589
|
-
order_by_origin = unstored_iter_col_refs[0]
|
|
590
|
-
|
|
591
|
-
for e in [e for e, _ in order_by_items]:
|
|
592
|
-
if not analyzer.sql_elements.contains(e):
|
|
593
|
-
raise excs.Error(f'order_by element cannot be expressed in SQL: {e}')
|
|
594
|
-
# we do ascending ordering by default, if not specified otherwise
|
|
595
|
-
order_by_items = [(e, True) if asc is None else (e, asc) for e, asc in order_by_items]
|
|
596
|
-
return order_by_items
|
|
538
|
+
combined_ordering = ob_clauses[0]
|
|
539
|
+
for ordering in ob_clauses[1:]:
|
|
540
|
+
combined = combine_order_by_clauses([combined_ordering, ordering])
|
|
541
|
+
if combined is None:
|
|
542
|
+
raise excs.Error(
|
|
543
|
+
f'Incompatible ordering requirements: '
|
|
544
|
+
f'{print_order_by_clause(combined_ordering)} vs {print_order_by_clause(ordering)}')
|
|
545
|
+
combined_ordering = combined
|
|
597
546
|
|
|
598
547
|
@classmethod
|
|
599
548
|
def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
|
|
@@ -632,8 +581,6 @@ class Planner:
|
|
|
632
581
|
"""
|
|
633
582
|
if select_list is None:
|
|
634
583
|
select_list = []
|
|
635
|
-
if group_by_clause is None:
|
|
636
|
-
group_by_clause = []
|
|
637
584
|
if order_by_clause is None:
|
|
638
585
|
order_by_clause = []
|
|
639
586
|
if exact_version_only is None:
|
|
@@ -641,16 +588,12 @@ class Planner:
|
|
|
641
588
|
analyzer = Analyzer(
|
|
642
589
|
tbl, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
|
|
643
590
|
order_by_clause=order_by_clause)
|
|
644
|
-
|
|
645
|
-
analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False))
|
|
646
|
-
# remove Literals from sql_exprs, we don't want to materialize them via a Select
|
|
647
|
-
input_exprs = exprs.ExprSet(e for e in input_exprs if not isinstance(e, exprs.Literal))
|
|
648
|
-
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], input_exprs)
|
|
591
|
+
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
|
|
649
592
|
|
|
650
593
|
analyzer.finalize(row_builder)
|
|
651
594
|
# select_list: we need to materialize everything that's been collected
|
|
652
595
|
# with_pk: for now, we always retrieve the PK, because we need it for the file cache
|
|
653
|
-
eval_ctx = row_builder.create_eval_ctx(analyzer.
|
|
596
|
+
eval_ctx = row_builder.create_eval_ctx(analyzer.select_list)
|
|
654
597
|
plan = cls._create_query_plan(
|
|
655
598
|
tbl, row_builder, analyzer=analyzer, eval_ctx=eval_ctx, limit=limit, with_pk=True,
|
|
656
599
|
exact_version_only=exact_version_only)
|
|
@@ -677,48 +620,89 @@ class Planner:
|
|
|
677
620
|
if exact_version_only is None:
|
|
678
621
|
exact_version_only = []
|
|
679
622
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
680
|
-
|
|
623
|
+
sql_elements = analyzer.sql_elements
|
|
624
|
+
is_python_agg = (
|
|
625
|
+
not sql_elements.contains_all(analyzer.agg_fn_calls)
|
|
626
|
+
or not sql_elements.contains_all(analyzer.window_fn_calls)
|
|
627
|
+
)
|
|
681
628
|
ctx = exec.ExecContext(row_builder)
|
|
629
|
+
cls._verify_ordering(analyzer, verify_agg=is_python_agg)
|
|
630
|
+
|
|
631
|
+
# materialized with SQL scan:
|
|
632
|
+
# - select list subexprs that aren't aggregates
|
|
633
|
+
# - Where clause conjuncts that can't be run in SQL
|
|
634
|
+
# - all grouping exprs, if any aggregate function call can't be run in SQL (in that case, they all have to be
|
|
635
|
+
# run in Python)
|
|
636
|
+
candidates = list(exprs.Expr.list_subexprs(
|
|
637
|
+
analyzer.select_list,
|
|
638
|
+
filter=lambda e: (
|
|
639
|
+
sql_elements.contains(e)
|
|
640
|
+
and not e._contains(cls=exprs.FunctionCall, filter=lambda e: bool(e.is_agg_fn_call))
|
|
641
|
+
),
|
|
642
|
+
traverse_matches=False))
|
|
643
|
+
if analyzer.filter is not None:
|
|
644
|
+
candidates.extend(exprs.Expr.subexprs(
|
|
645
|
+
analyzer.filter, filter=lambda e: sql_elements.contains(e), traverse_matches=False))
|
|
646
|
+
if is_python_agg and analyzer.group_by_clause is not None:
|
|
647
|
+
candidates.extend(exprs.Expr.list_subexprs(
|
|
648
|
+
analyzer.group_by_clause, filter=lambda e: sql_elements.contains(e), traverse_matches=False))
|
|
649
|
+
# not isinstance(...): we don't want to materialize Literals via a Select
|
|
650
|
+
sql_scan_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
|
|
682
651
|
|
|
683
|
-
order_by_items = cls._determine_ordering(analyzer)
|
|
684
|
-
sql_limit = 0 if is_agg_query else limit # if we're aggregating, the limit applies to the agg output
|
|
685
|
-
sql_exprs = [
|
|
686
|
-
e for e in eval_ctx.exprs if analyzer.sql_elements.contains(e) and not isinstance(e, exprs.Literal)
|
|
687
|
-
]
|
|
688
652
|
plan = exec.SqlScanNode(
|
|
689
|
-
tbl, row_builder, select_list=
|
|
690
|
-
filter=analyzer.filter,
|
|
691
|
-
|
|
653
|
+
tbl, row_builder, select_list=sql_scan_exprs, where_clause=analyzer.sql_where_clause,
|
|
654
|
+
filter=analyzer.filter, set_pk=with_pk, exact_version_only=exact_version_only)
|
|
655
|
+
if len(analyzer.window_fn_calls) > 0:
|
|
656
|
+
# we need to order the input for window functions
|
|
657
|
+
plan.add_order_by(analyzer.get_window_fn_ob_clause())
|
|
692
658
|
plan = cls._insert_prefetch_node(tbl.tbl_version.id, analyzer.select_list, row_builder, plan)
|
|
693
659
|
|
|
694
|
-
if
|
|
695
|
-
# we're doing aggregation; the input of the AggregateNode are the grouping exprs plus the
|
|
660
|
+
if analyzer.group_by_clause is not None:
|
|
661
|
+
# we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
|
|
696
662
|
# args of the agg fn calls
|
|
697
|
-
agg_input = exprs.ExprSet(analyzer.
|
|
663
|
+
agg_input = exprs.ExprSet(analyzer.grouping_exprs.copy())
|
|
698
664
|
for fn_call in analyzer.agg_fn_calls:
|
|
699
665
|
agg_input.update(fn_call.components)
|
|
700
|
-
if not
|
|
666
|
+
if not sql_scan_exprs.issuperset(agg_input):
|
|
701
667
|
# we need an ExprEvalNode
|
|
702
|
-
plan = exec.ExprEvalNode(row_builder, agg_input,
|
|
668
|
+
plan = exec.ExprEvalNode(row_builder, agg_input, sql_scan_exprs, input=plan)
|
|
703
669
|
|
|
704
670
|
# batch size for aggregation input: this could be the entire table, so we need to divide it into
|
|
705
671
|
# smaller batches; at the same time, we need to make the batches large enough to amortize the
|
|
706
672
|
# function call overhead
|
|
707
673
|
ctx.batch_size = 16
|
|
708
674
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
plan = exec.
|
|
675
|
+
# do aggregation in SQL if all agg exprs can be translated
|
|
676
|
+
if (sql_elements.contains_all(analyzer.select_list)
|
|
677
|
+
and sql_elements.contains_all(analyzer.grouping_exprs)
|
|
678
|
+
and isinstance(plan, exec.SqlNode)
|
|
679
|
+
and plan.to_cte() is not None):
|
|
680
|
+
plan = exec.SqlAggregationNode(
|
|
681
|
+
row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause)
|
|
682
|
+
else:
|
|
683
|
+
plan = exec.AggregationNode(
|
|
684
|
+
tbl.tbl_version, row_builder, analyzer.group_by_clause,
|
|
685
|
+
analyzer.agg_fn_calls + analyzer.window_fn_calls, agg_input, input=plan)
|
|
686
|
+
typecheck_dummy = analyzer.grouping_exprs + analyzer.agg_fn_calls + analyzer.window_fn_calls
|
|
687
|
+
agg_output = exprs.ExprSet(typecheck_dummy)
|
|
688
|
+
if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
689
|
+
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
690
|
+
plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
|
|
715
691
|
else:
|
|
716
|
-
if not exprs.ExprSet(
|
|
692
|
+
if not exprs.ExprSet(sql_scan_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
717
693
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
718
|
-
plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs,
|
|
694
|
+
plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, sql_scan_exprs, input=plan)
|
|
719
695
|
# we're returning everything to the user, so we might as well do it in a single batch
|
|
720
696
|
ctx.batch_size = 0
|
|
721
697
|
|
|
698
|
+
sql_node = plan.get_sql_node()
|
|
699
|
+
assert sql_node is not None
|
|
700
|
+
if len(analyzer.order_by_clause) > 0:
|
|
701
|
+
sql_node.add_order_by(analyzer.order_by_clause)
|
|
702
|
+
|
|
703
|
+
if limit is not None:
|
|
704
|
+
plan.set_limit(limit)
|
|
705
|
+
|
|
722
706
|
plan.set_ctx(ctx)
|
|
723
707
|
return plan
|
|
724
708
|
|
pixeltable/py.typed
ADDED
|
File without changes
|