pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +11 -2
- pixeltable/catalog/catalog.py +407 -119
- pixeltable/catalog/column.py +38 -26
- pixeltable/catalog/globals.py +130 -15
- pixeltable/catalog/insertable_table.py +10 -9
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +245 -119
- pixeltable/catalog/table_version.py +142 -116
- pixeltable/catalog/table_version_handle.py +30 -2
- pixeltable/catalog/table_version_path.py +28 -4
- pixeltable/catalog/view.py +14 -20
- pixeltable/config.py +4 -0
- pixeltable/dataframe.py +10 -9
- pixeltable/env.py +5 -11
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/sql_node.py +47 -30
- pixeltable/exprs/column_property_ref.py +2 -10
- pixeltable/exprs/column_ref.py +24 -21
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/expr.py +4 -4
- pixeltable/exprs/row_builder.py +44 -13
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +4 -2
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +8 -6
- pixeltable/functions/mistralai.py +2 -13
- pixeltable/functions/openai.py +1 -6
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/util.py +6 -1
- pixeltable/globals.py +0 -2
- pixeltable/io/external_store.py +81 -54
- pixeltable/io/globals.py +1 -1
- pixeltable/io/label_studio.py +49 -45
- pixeltable/io/table_data_conduit.py +1 -1
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -0
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +59 -139
- pixeltable/share/packager.py +2 -2
- pixeltable/store.py +114 -103
- pixeltable/type_system.py +30 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
- pixeltable/utils/sample.py +0 -25
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import schema
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MetadataUtils:
|
|
9
|
+
@classmethod
|
|
10
|
+
def _diff_md(
|
|
11
|
+
cls, old_md: Optional[dict[int, schema.SchemaColumn]], new_md: Optional[dict[int, schema.SchemaColumn]]
|
|
12
|
+
) -> str:
|
|
13
|
+
"""Return a string reporting the differences in a specific entry in two dictionaries
|
|
14
|
+
|
|
15
|
+
Results are formatted as follows:
|
|
16
|
+
- If `old_md` is `None`, returns 'Initial Version'.
|
|
17
|
+
- If `old_md` and `new_md` are the same, returns an empty string.
|
|
18
|
+
- If there are additions, changes, or deletions, returns a string summarizing the changes.
|
|
19
|
+
"""
|
|
20
|
+
assert new_md is not None
|
|
21
|
+
if old_md is None:
|
|
22
|
+
return 'Initial Version'
|
|
23
|
+
if old_md == new_md:
|
|
24
|
+
return ''
|
|
25
|
+
added = {k: v.name for k, v in new_md.items() if k not in old_md}
|
|
26
|
+
changed = {
|
|
27
|
+
k: f'{old_md[k].name!r} to {v.name!r}'
|
|
28
|
+
for k, v in new_md.items()
|
|
29
|
+
if k in old_md and old_md[k].name != v.name
|
|
30
|
+
}
|
|
31
|
+
deleted = {k: v.name for k, v in old_md.items() if k not in new_md}
|
|
32
|
+
if len(added) == 0 and len(changed) == 0 and len(deleted) == 0:
|
|
33
|
+
return ''
|
|
34
|
+
# Format the result
|
|
35
|
+
t = []
|
|
36
|
+
if len(added) > 0:
|
|
37
|
+
t.append('Added: ' + ', '.join(added.values()))
|
|
38
|
+
if len(changed) > 0:
|
|
39
|
+
t.append('Renamed: ' + ', '.join(changed.values()))
|
|
40
|
+
if len(deleted) > 0:
|
|
41
|
+
t.append('Deleted: ' + ', '.join(deleted.values()))
|
|
42
|
+
r = ', '.join(t)
|
|
43
|
+
return r
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _create_md_change_dict(
|
|
47
|
+
cls, md_list: Optional[list[tuple[int, dict[int, schema.SchemaColumn]]]]
|
|
48
|
+
) -> dict[int, str]:
|
|
49
|
+
"""Return a dictionary of schema changes by version
|
|
50
|
+
Args:
|
|
51
|
+
md_list: a list of tuples, each containing a version number and a metadata dictionary.
|
|
52
|
+
"""
|
|
53
|
+
r: dict[int, str] = {}
|
|
54
|
+
if md_list is None or len(md_list) == 0:
|
|
55
|
+
return r
|
|
56
|
+
|
|
57
|
+
# Sort the list in place by version number
|
|
58
|
+
md_list.sort()
|
|
59
|
+
|
|
60
|
+
first_retrieved_version = md_list[0][0]
|
|
61
|
+
if first_retrieved_version == 0:
|
|
62
|
+
prev_md = None
|
|
63
|
+
prev_ver = -1
|
|
64
|
+
start = 0
|
|
65
|
+
else:
|
|
66
|
+
prev_md = md_list[0][1]
|
|
67
|
+
prev_ver = first_retrieved_version
|
|
68
|
+
start = 1
|
|
69
|
+
|
|
70
|
+
for ver, curr_md in md_list[start:]:
|
|
71
|
+
if ver == prev_ver:
|
|
72
|
+
continue
|
|
73
|
+
assert ver > prev_ver
|
|
74
|
+
tf = cls._diff_md(prev_md, curr_md)
|
|
75
|
+
if tf != '':
|
|
76
|
+
r[ver] = tf
|
|
77
|
+
prev_md = curr_md
|
|
78
|
+
return r
|
pixeltable/plan.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import enum
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import Any, Iterable, Literal,
|
|
6
|
+
from typing import Any, Iterable, Literal, Optional, Sequence
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import sqlalchemy as sql
|
|
@@ -12,7 +12,6 @@ import pixeltable as pxt
|
|
|
12
12
|
from pixeltable import catalog, exceptions as excs, exec, exprs
|
|
13
13
|
from pixeltable.catalog import Column, TableVersionHandle
|
|
14
14
|
from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
|
|
15
|
-
from pixeltable.utils.sample import sample_key
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
def _is_agg_fn_call(e: exprs.Expr) -> bool:
|
|
@@ -159,16 +158,6 @@ class SampleClause:
|
|
|
159
158
|
return format(threshold_int, '08x') + 'ffffffffffffffffffffffff'
|
|
160
159
|
|
|
161
160
|
|
|
162
|
-
class SamplingClauses(NamedTuple):
|
|
163
|
-
"""Clauses provided when rewriting a SampleClause"""
|
|
164
|
-
|
|
165
|
-
where: exprs.Expr
|
|
166
|
-
group_by_clause: Optional[list[exprs.Expr]]
|
|
167
|
-
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
|
|
168
|
-
limit: Optional[exprs.Expr]
|
|
169
|
-
sample_clause: Optional[SampleClause]
|
|
170
|
-
|
|
171
|
-
|
|
172
161
|
class Analyzer:
|
|
173
162
|
"""
|
|
174
163
|
Performs semantic analysis of a query and stores the analysis state.
|
|
@@ -180,6 +169,8 @@ class Analyzer:
|
|
|
180
169
|
group_by_clause: Optional[list[exprs.Expr]] # None for non-aggregate queries; [] for agg query w/o grouping
|
|
181
170
|
grouping_exprs: list[exprs.Expr] # [] for non-aggregate queries or agg query w/o grouping
|
|
182
171
|
order_by_clause: OrderByClause
|
|
172
|
+
stratify_exprs: list[exprs.Expr] # [] if no stratiifcation is required
|
|
173
|
+
sample_clause: Optional[SampleClause] # None if no sampling clause is present
|
|
183
174
|
|
|
184
175
|
sql_elements: exprs.SqlElementCache
|
|
185
176
|
|
|
@@ -200,6 +191,7 @@ class Analyzer:
|
|
|
200
191
|
where_clause: Optional[exprs.Expr] = None,
|
|
201
192
|
group_by_clause: Optional[list[exprs.Expr]] = None,
|
|
202
193
|
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
|
|
194
|
+
sample_clause: Optional[SampleClause] = None,
|
|
203
195
|
):
|
|
204
196
|
if order_by_clause is None:
|
|
205
197
|
order_by_clause = []
|
|
@@ -213,6 +205,11 @@ class Analyzer:
|
|
|
213
205
|
self.group_by_clause = (
|
|
214
206
|
[e.resolve_computed_cols() for e in group_by_clause] if group_by_clause is not None else None
|
|
215
207
|
)
|
|
208
|
+
self.sample_clause = sample_clause
|
|
209
|
+
if self.sample_clause is not None and self.sample_clause.is_stratified:
|
|
210
|
+
self.stratify_exprs = [e.resolve_computed_cols() for e in sample_clause.stratify_exprs]
|
|
211
|
+
else:
|
|
212
|
+
self.stratify_exprs = []
|
|
216
213
|
self.order_by_clause = [OrderByItem(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
|
|
217
214
|
|
|
218
215
|
self.sql_where_clause = None
|
|
@@ -228,8 +225,11 @@ class Analyzer:
|
|
|
228
225
|
self.all_exprs.append(join_clause.join_predicate)
|
|
229
226
|
if self.group_by_clause is not None:
|
|
230
227
|
self.all_exprs.extend(self.group_by_clause)
|
|
228
|
+
self.all_exprs.extend(self.stratify_exprs)
|
|
231
229
|
self.all_exprs.extend(e for e, _ in self.order_by_clause)
|
|
232
230
|
if self.filter is not None:
|
|
231
|
+
if sample_clause is not None:
|
|
232
|
+
raise excs.Error(f'Filter {self.filter} not expressible in SQL')
|
|
233
233
|
self.all_exprs.append(self.filter)
|
|
234
234
|
|
|
235
235
|
self.agg_order_by = []
|
|
@@ -378,7 +378,7 @@ class Planner:
|
|
|
378
378
|
|
|
379
379
|
cls.__check_valid_columns(tbl, stored_cols, 'inserted into')
|
|
380
380
|
|
|
381
|
-
row_builder = exprs.RowBuilder([], stored_cols, [])
|
|
381
|
+
row_builder = exprs.RowBuilder([], stored_cols, [], tbl)
|
|
382
382
|
|
|
383
383
|
# create InMemoryDataNode for 'rows'
|
|
384
384
|
plan: exec.ExecNode = exec.InMemoryDataNode(
|
|
@@ -473,15 +473,19 @@ class Planner:
|
|
|
473
473
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
474
474
|
target = tbl.tbl_version.get() # the one we need to update
|
|
475
475
|
updated_cols = list(update_targets.keys())
|
|
476
|
+
recomputed_cols: set[Column]
|
|
476
477
|
if len(recompute_targets) > 0:
|
|
477
|
-
|
|
478
|
+
assert len(update_targets) == 0
|
|
479
|
+
recomputed_cols = {*recompute_targets}
|
|
480
|
+
if cascade:
|
|
481
|
+
recomputed_cols |= target.get_dependent_columns(recomputed_cols)
|
|
478
482
|
else:
|
|
479
483
|
recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
484
|
+
# regardless of cascade, we need to update all indices on any updated/recomputed column
|
|
485
|
+
idx_val_cols = target.get_idx_val_columns(set(updated_cols) | recomputed_cols)
|
|
486
|
+
recomputed_cols.update(idx_val_cols)
|
|
487
|
+
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
488
|
+
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
485
489
|
|
|
486
490
|
cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
|
|
487
491
|
|
|
@@ -588,7 +592,7 @@ class Planner:
|
|
|
588
592
|
sql_exprs = list(
|
|
589
593
|
exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
|
|
590
594
|
)
|
|
591
|
-
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
|
|
595
|
+
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
|
|
592
596
|
analyzer.finalize(row_builder)
|
|
593
597
|
sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
|
|
594
598
|
col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
|
|
@@ -602,8 +606,7 @@ class Planner:
|
|
|
602
606
|
row_builder.set_slot_idxs(select_list, remove_duplicates=False)
|
|
603
607
|
for i, col in enumerate(all_base_cols):
|
|
604
608
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
605
|
-
|
|
606
|
-
ctx = exec.ExecContext(row_builder)
|
|
609
|
+
ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
|
|
607
610
|
# we're returning everything to the user, so we might as well do it in a single batch
|
|
608
611
|
ctx.batch_size = 0
|
|
609
612
|
plan.set_ctx(ctx)
|
|
@@ -691,25 +694,13 @@ class Planner:
|
|
|
691
694
|
# 2. for component views: iterator args
|
|
692
695
|
iterator_args = [target.iterator_args] if target.iterator_args is not None else []
|
|
693
696
|
|
|
694
|
-
# If this contains a sample specification, modify / create where, group_by, order_by, and limit clauses
|
|
695
697
|
from_clause = FromClause(tbls=[view.base])
|
|
696
|
-
where, group_by_clause, order_by_clause, limit, sample_clause = cls.create_sample_clauses(
|
|
697
|
-
from_clause, target.sample_clause, target.predicate, None, [], None
|
|
698
|
-
)
|
|
699
|
-
|
|
700
|
-
# if we're propagating an insert, we only want to see those base rows that were created for the current version
|
|
701
698
|
base_analyzer = Analyzer(
|
|
702
|
-
from_clause,
|
|
703
|
-
iterator_args,
|
|
704
|
-
where_clause=where,
|
|
705
|
-
group_by_clause=group_by_clause,
|
|
706
|
-
order_by_clause=order_by_clause,
|
|
699
|
+
from_clause, iterator_args, where_clause=target.predicate, sample_clause=target.sample_clause
|
|
707
700
|
)
|
|
708
|
-
row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [])
|
|
709
|
-
|
|
710
|
-
if target.sample_clause is not None and base_analyzer.filter is not None:
|
|
711
|
-
raise excs.Error(f'Filter {base_analyzer.filter} not expressible in SQL')
|
|
701
|
+
row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [], target)
|
|
712
702
|
|
|
703
|
+
# if we're propagating an insert, we only want to see those base rows that were created for the current version
|
|
713
704
|
# execution plan:
|
|
714
705
|
# 1. materialize exprs computed from the base that are needed for stored view columns
|
|
715
706
|
# 2. if it's an iterator view, expand the base rows into component rows
|
|
@@ -723,19 +714,13 @@ class Planner:
|
|
|
723
714
|
|
|
724
715
|
# Create a new analyzer reflecting exactly what is required from the base table
|
|
725
716
|
base_analyzer = Analyzer(
|
|
726
|
-
from_clause,
|
|
727
|
-
base_output_exprs,
|
|
728
|
-
where_clause=where,
|
|
729
|
-
group_by_clause=group_by_clause,
|
|
730
|
-
order_by_clause=order_by_clause,
|
|
717
|
+
from_clause, base_output_exprs, where_clause=target.predicate, sample_clause=target.sample_clause
|
|
731
718
|
)
|
|
732
719
|
base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
|
|
733
720
|
plan = cls._create_query_plan(
|
|
734
721
|
row_builder=row_builder,
|
|
735
722
|
analyzer=base_analyzer,
|
|
736
723
|
eval_ctx=base_eval_ctx,
|
|
737
|
-
limit=limit,
|
|
738
|
-
sample_clause=sample_clause,
|
|
739
724
|
with_pk=True,
|
|
740
725
|
exact_version_only=view.get_bases() if propagates_insert else [],
|
|
741
726
|
)
|
|
@@ -818,62 +803,6 @@ class Planner:
|
|
|
818
803
|
prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input_node)
|
|
819
804
|
return prefetch_node
|
|
820
805
|
|
|
821
|
-
@classmethod
|
|
822
|
-
def create_sample_clauses(
|
|
823
|
-
cls,
|
|
824
|
-
from_clause: FromClause,
|
|
825
|
-
sample_clause: SampleClause,
|
|
826
|
-
where_clause: Optional[exprs.Expr],
|
|
827
|
-
group_by_clause: Optional[list[exprs.Expr]],
|
|
828
|
-
order_by_clause: Optional[list[tuple[exprs.Expr, bool]]],
|
|
829
|
-
limit: Optional[exprs.Expr],
|
|
830
|
-
) -> SamplingClauses:
|
|
831
|
-
"""tuple[
|
|
832
|
-
exprs.Expr,
|
|
833
|
-
Optional[list[exprs.Expr]],
|
|
834
|
-
Optional[list[tuple[exprs.Expr, bool]]],
|
|
835
|
-
Optional[exprs.Expr],
|
|
836
|
-
Optional[SampleClause],
|
|
837
|
-
]:"""
|
|
838
|
-
"""Construct clauses required for sampling under various conditions.
|
|
839
|
-
If there is no sampling, then return the original clauses.
|
|
840
|
-
If the sample is stratified, then return only the group by clause. The rest of the
|
|
841
|
-
mechanism for stratified sampling is provided by the SampleSqlNode.
|
|
842
|
-
If the sample is non-stratified, then rewrite the query to accommodate the supplied where clause,
|
|
843
|
-
and provide the other clauses required for sampling
|
|
844
|
-
"""
|
|
845
|
-
|
|
846
|
-
# If no sample clause, return the original clauses
|
|
847
|
-
if sample_clause is None:
|
|
848
|
-
return SamplingClauses(where_clause, group_by_clause, order_by_clause, limit, None)
|
|
849
|
-
|
|
850
|
-
# If the sample clause is stratified, create a group by clause
|
|
851
|
-
if sample_clause.is_stratified:
|
|
852
|
-
group_by = sample_clause.stratify_exprs
|
|
853
|
-
# Note that limit is not possible here
|
|
854
|
-
return SamplingClauses(where_clause, group_by, order_by_clause, None, sample_clause)
|
|
855
|
-
|
|
856
|
-
else:
|
|
857
|
-
# If non-stratified sampling, construct a where clause, order_by, and limit clauses
|
|
858
|
-
# Construct an expression for sorting rows and limiting row counts
|
|
859
|
-
s_key = sample_key(
|
|
860
|
-
exprs.Literal(sample_clause.seed), *cls.rowid_columns(from_clause._first_tbl.tbl_version)
|
|
861
|
-
)
|
|
862
|
-
|
|
863
|
-
# Construct a suitable where clause
|
|
864
|
-
where = where_clause
|
|
865
|
-
if sample_clause.fraction is not None:
|
|
866
|
-
fraction_md5_hex = exprs.Expr.from_object(
|
|
867
|
-
sample_clause.fraction_to_md5_hex(float(sample_clause.fraction))
|
|
868
|
-
)
|
|
869
|
-
f_where = s_key < fraction_md5_hex
|
|
870
|
-
where = where & f_where if where is not None else f_where
|
|
871
|
-
|
|
872
|
-
order_by: list[tuple[exprs.Expr, bool]] = [(s_key, True)]
|
|
873
|
-
limit = exprs.Literal(sample_clause.n)
|
|
874
|
-
# Note that group_by is not possible here
|
|
875
|
-
return SamplingClauses(where, None, order_by, limit, None)
|
|
876
|
-
|
|
877
806
|
@classmethod
|
|
878
807
|
def create_query_plan(
|
|
879
808
|
cls,
|
|
@@ -898,21 +827,19 @@ class Planner:
|
|
|
898
827
|
if exact_version_only is None:
|
|
899
828
|
exact_version_only = []
|
|
900
829
|
|
|
901
|
-
# Modify clauses to include sample clause
|
|
902
|
-
where, group_by_clause, order_by_clause, limit, sample = cls.create_sample_clauses(
|
|
903
|
-
from_clause, sample_clause, where_clause, group_by_clause, order_by_clause, limit
|
|
904
|
-
)
|
|
905
|
-
|
|
906
830
|
analyzer = Analyzer(
|
|
907
831
|
from_clause,
|
|
908
832
|
select_list,
|
|
909
|
-
where_clause=
|
|
833
|
+
where_clause=where_clause,
|
|
910
834
|
group_by_clause=group_by_clause,
|
|
911
835
|
order_by_clause=order_by_clause,
|
|
836
|
+
sample_clause=sample_clause,
|
|
912
837
|
)
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
838
|
+
# If the from_clause has a single table, we can use it as the context table for the RowBuilder.
|
|
839
|
+
# Otherwise there is no context table, but that's ok, because the context table is only needed for
|
|
840
|
+
# table mutations, which can't happen during a join.
|
|
841
|
+
context_tbl = from_clause.tbls[0].tbl_version.get() if len(from_clause.tbls) == 1 else None
|
|
842
|
+
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], context_tbl)
|
|
916
843
|
|
|
917
844
|
analyzer.finalize(row_builder)
|
|
918
845
|
# select_list: we need to materialize everything that's been collected
|
|
@@ -923,7 +850,6 @@ class Planner:
|
|
|
923
850
|
analyzer=analyzer,
|
|
924
851
|
eval_ctx=eval_ctx,
|
|
925
852
|
limit=limit,
|
|
926
|
-
sample_clause=sample,
|
|
927
853
|
with_pk=True,
|
|
928
854
|
exact_version_only=exact_version_only,
|
|
929
855
|
)
|
|
@@ -939,7 +865,6 @@ class Planner:
|
|
|
939
865
|
analyzer: Analyzer,
|
|
940
866
|
eval_ctx: exprs.RowBuilder.EvalCtx,
|
|
941
867
|
limit: Optional[exprs.Expr] = None,
|
|
942
|
-
sample_clause: Optional[SampleClause] = None,
|
|
943
868
|
with_pk: bool = False,
|
|
944
869
|
exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
|
|
945
870
|
) -> exec.ExecNode:
|
|
@@ -966,6 +891,7 @@ class Planner:
|
|
|
966
891
|
# - join clause subexprs
|
|
967
892
|
# - subexprs of Where clause conjuncts that can't be run in SQL
|
|
968
893
|
# - all grouping exprs
|
|
894
|
+
# - all stratify exprs
|
|
969
895
|
candidates = list(
|
|
970
896
|
exprs.Expr.list_subexprs(
|
|
971
897
|
analyzer.select_list,
|
|
@@ -980,10 +906,12 @@ class Planner:
|
|
|
980
906
|
candidates.extend(
|
|
981
907
|
exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
|
|
982
908
|
)
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
909
|
+
candidates.extend(
|
|
910
|
+
exprs.Expr.list_subexprs(analyzer.grouping_exprs, filter=sql_elements.contains, traverse_matches=False)
|
|
911
|
+
)
|
|
912
|
+
candidates.extend(
|
|
913
|
+
exprs.Expr.list_subexprs(analyzer.stratify_exprs, filter=sql_elements.contains, traverse_matches=False)
|
|
914
|
+
)
|
|
987
915
|
# not isinstance(...): we don't want to materialize Literals via a Select
|
|
988
916
|
sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
|
|
989
917
|
|
|
@@ -1028,6 +956,15 @@ class Planner:
|
|
|
1028
956
|
# we need to order the input for window functions
|
|
1029
957
|
plan.set_order_by(analyzer.get_window_fn_ob_clause())
|
|
1030
958
|
|
|
959
|
+
if analyzer.sample_clause is not None:
|
|
960
|
+
plan = exec.SqlSampleNode(
|
|
961
|
+
row_builder,
|
|
962
|
+
input=plan,
|
|
963
|
+
select_list=tbl_scan_exprs,
|
|
964
|
+
sample_clause=analyzer.sample_clause,
|
|
965
|
+
stratify_exprs=analyzer.stratify_exprs,
|
|
966
|
+
)
|
|
967
|
+
|
|
1031
968
|
plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder, plan)
|
|
1032
969
|
|
|
1033
970
|
if analyzer.group_by_clause is not None:
|
|
@@ -1050,26 +987,12 @@ class Planner:
|
|
|
1050
987
|
sql_elements.contains_all(analyzer.select_list)
|
|
1051
988
|
and sql_elements.contains_all(analyzer.grouping_exprs)
|
|
1052
989
|
and isinstance(plan, exec.SqlNode)
|
|
1053
|
-
and plan.to_cte(
|
|
990
|
+
and plan.to_cte() is not None
|
|
1054
991
|
):
|
|
1055
|
-
|
|
1056
|
-
plan =
|
|
1057
|
-
|
|
1058
|
-
input=plan,
|
|
1059
|
-
select_list=analyzer.select_list,
|
|
1060
|
-
stratify_exprs=analyzer.group_by_clause,
|
|
1061
|
-
sample_clause=sample_clause,
|
|
1062
|
-
)
|
|
1063
|
-
else:
|
|
1064
|
-
plan = exec.SqlAggregationNode(
|
|
1065
|
-
row_builder,
|
|
1066
|
-
input=plan,
|
|
1067
|
-
select_list=analyzer.select_list,
|
|
1068
|
-
group_by_items=analyzer.group_by_clause,
|
|
1069
|
-
)
|
|
992
|
+
plan = exec.SqlAggregationNode(
|
|
993
|
+
row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
|
|
994
|
+
)
|
|
1070
995
|
else:
|
|
1071
|
-
if sample_clause is not None:
|
|
1072
|
-
raise excs.Error('Sample clause not supported with Python aggregation')
|
|
1073
996
|
input_sql_node = plan.get_node(exec.SqlNode)
|
|
1074
997
|
assert combined_ordering is not None
|
|
1075
998
|
input_sql_node.set_order_by(combined_ordering)
|
|
@@ -1119,16 +1042,14 @@ class Planner:
|
|
|
1119
1042
|
return Analyzer(FromClause(tbls=[tbl]), [], where_clause=where_clause)
|
|
1120
1043
|
|
|
1121
1044
|
@classmethod
|
|
1122
|
-
def create_add_column_plan(
|
|
1123
|
-
cls, tbl: catalog.TableVersionPath, col: catalog.Column
|
|
1124
|
-
) -> tuple[exec.ExecNode, Optional[int]]:
|
|
1045
|
+
def create_add_column_plan(cls, tbl: catalog.TableVersionPath, col: catalog.Column) -> exec.ExecNode:
|
|
1125
1046
|
"""Creates a plan for InsertableTable.add_column()
|
|
1126
1047
|
Returns:
|
|
1127
1048
|
plan: the plan to execute
|
|
1128
1049
|
value_expr slot idx for the plan output (for computed cols)
|
|
1129
1050
|
"""
|
|
1130
1051
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
1131
|
-
row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
|
|
1052
|
+
row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[], tbl=tbl.tbl_version.get())
|
|
1132
1053
|
analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
|
|
1133
1054
|
plan = cls._create_query_plan(
|
|
1134
1055
|
row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
|
|
@@ -1140,5 +1061,4 @@ class Planner:
|
|
|
1140
1061
|
# we want to flush images
|
|
1141
1062
|
if col.is_computed and col.is_stored and col.col_type.is_image_type():
|
|
1142
1063
|
plan.set_stored_img_cols(row_builder.output_slot_idxs())
|
|
1143
|
-
|
|
1144
|
-
return plan, value_expr_slot_idx
|
|
1064
|
+
return plan
|
pixeltable/share/packager.py
CHANGED
|
@@ -127,7 +127,7 @@ class TablePackager:
|
|
|
127
127
|
# We use snappy compression for the Parquet tables; the entire bundle will be bzip2-compressed later, so
|
|
128
128
|
# faster compression should provide good performance while still reducing temporary storage utilization.
|
|
129
129
|
parquet_writer = pq.ParquetWriter(parquet_file, parquet_schema, compression='SNAPPY')
|
|
130
|
-
filter_tv = self.table.
|
|
130
|
+
filter_tv = self.table._tbl_version_path.tbl_version.get()
|
|
131
131
|
row_iter = tv.store_tbl.dump_rows(tv.version, filter_tv.store_tbl, filter_tv.version)
|
|
132
132
|
for pa_table in self.__to_pa_tables(row_iter, sql_types, media_cols, parquet_schema):
|
|
133
133
|
parquet_writer.write_table(pa_table)
|
|
@@ -238,7 +238,7 @@ class TablePackager:
|
|
|
238
238
|
- Documents are replaced by a thumbnail as a base64-encoded webp
|
|
239
239
|
"""
|
|
240
240
|
# First 8 columns
|
|
241
|
-
preview_cols = dict(itertools.islice(self.table.
|
|
241
|
+
preview_cols = dict(itertools.islice(self.table._get_schema().items(), 0, 8))
|
|
242
242
|
select_list = [self.table[col_name] for col_name in preview_cols]
|
|
243
243
|
# First 5 rows
|
|
244
244
|
rows = list(self.table.select(*select_list).head(n=5))
|