pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +11 -2
  4. pixeltable/catalog/catalog.py +407 -119
  5. pixeltable/catalog/column.py +38 -26
  6. pixeltable/catalog/globals.py +130 -15
  7. pixeltable/catalog/insertable_table.py +10 -9
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +245 -119
  10. pixeltable/catalog/table_version.py +142 -116
  11. pixeltable/catalog/table_version_handle.py +30 -2
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +14 -20
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -10
  22. pixeltable/exprs/column_ref.py +24 -21
  23. pixeltable/exprs/data_row.py +9 -0
  24. pixeltable/exprs/expr.py +4 -4
  25. pixeltable/exprs/row_builder.py +44 -13
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/mcp.py +74 -0
  28. pixeltable/func/query_template_function.py +4 -2
  29. pixeltable/func/tools.py +12 -2
  30. pixeltable/func/udf.py +2 -2
  31. pixeltable/functions/__init__.py +1 -0
  32. pixeltable/functions/groq.py +108 -0
  33. pixeltable/functions/huggingface.py +8 -6
  34. pixeltable/functions/mistralai.py +2 -13
  35. pixeltable/functions/openai.py +1 -6
  36. pixeltable/functions/replicate.py +2 -2
  37. pixeltable/functions/util.py +6 -1
  38. pixeltable/globals.py +0 -2
  39. pixeltable/io/external_store.py +81 -54
  40. pixeltable/io/globals.py +1 -1
  41. pixeltable/io/label_studio.py +49 -45
  42. pixeltable/io/table_data_conduit.py +1 -1
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_37.py +15 -0
  45. pixeltable/metadata/converters/convert_38.py +39 -0
  46. pixeltable/metadata/notes.py +2 -0
  47. pixeltable/metadata/schema.py +5 -0
  48. pixeltable/metadata/utils.py +78 -0
  49. pixeltable/plan.py +59 -139
  50. pixeltable/share/packager.py +2 -2
  51. pixeltable/store.py +114 -103
  52. pixeltable/type_system.py +30 -0
  53. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/METADATA +1 -1
  54. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/RECORD +57 -53
  55. pixeltable/utils/sample.py +0 -25
  56. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from pixeltable.metadata import schema
6
+
7
+
8
+ class MetadataUtils:
9
+ @classmethod
10
+ def _diff_md(
11
+ cls, old_md: Optional[dict[int, schema.SchemaColumn]], new_md: Optional[dict[int, schema.SchemaColumn]]
12
+ ) -> str:
13
+ """Return a string reporting the differences in a specific entry in two dictionaries
14
+
15
+ Results are formatted as follows:
16
+ - If `old_md` is `None`, returns 'Initial Version'.
17
+ - If `old_md` and `new_md` are the same, returns an empty string.
18
+ - If there are additions, changes, or deletions, returns a string summarizing the changes.
19
+ """
20
+ assert new_md is not None
21
+ if old_md is None:
22
+ return 'Initial Version'
23
+ if old_md == new_md:
24
+ return ''
25
+ added = {k: v.name for k, v in new_md.items() if k not in old_md}
26
+ changed = {
27
+ k: f'{old_md[k].name!r} to {v.name!r}'
28
+ for k, v in new_md.items()
29
+ if k in old_md and old_md[k].name != v.name
30
+ }
31
+ deleted = {k: v.name for k, v in old_md.items() if k not in new_md}
32
+ if len(added) == 0 and len(changed) == 0 and len(deleted) == 0:
33
+ return ''
34
+ # Format the result
35
+ t = []
36
+ if len(added) > 0:
37
+ t.append('Added: ' + ', '.join(added.values()))
38
+ if len(changed) > 0:
39
+ t.append('Renamed: ' + ', '.join(changed.values()))
40
+ if len(deleted) > 0:
41
+ t.append('Deleted: ' + ', '.join(deleted.values()))
42
+ r = ', '.join(t)
43
+ return r
44
+
45
+ @classmethod
46
+ def _create_md_change_dict(
47
+ cls, md_list: Optional[list[tuple[int, dict[int, schema.SchemaColumn]]]]
48
+ ) -> dict[int, str]:
49
+ """Return a dictionary of schema changes by version
50
+ Args:
51
+ md_list: a list of tuples, each containing a version number and a metadata dictionary.
52
+ """
53
+ r: dict[int, str] = {}
54
+ if md_list is None or len(md_list) == 0:
55
+ return r
56
+
57
+ # Sort the list in place by version number
58
+ md_list.sort()
59
+
60
+ first_retrieved_version = md_list[0][0]
61
+ if first_retrieved_version == 0:
62
+ prev_md = None
63
+ prev_ver = -1
64
+ start = 0
65
+ else:
66
+ prev_md = md_list[0][1]
67
+ prev_ver = first_retrieved_version
68
+ start = 1
69
+
70
+ for ver, curr_md in md_list[start:]:
71
+ if ver == prev_ver:
72
+ continue
73
+ assert ver > prev_ver
74
+ tf = cls._diff_md(prev_md, curr_md)
75
+ if tf != '':
76
+ r[ver] = tf
77
+ prev_md = curr_md
78
+ return r
pixeltable/plan.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import dataclasses
4
4
  import enum
5
5
  from textwrap import dedent
6
- from typing import Any, Iterable, Literal, NamedTuple, Optional, Sequence
6
+ from typing import Any, Iterable, Literal, Optional, Sequence
7
7
  from uuid import UUID
8
8
 
9
9
  import sqlalchemy as sql
@@ -12,7 +12,6 @@ import pixeltable as pxt
12
12
  from pixeltable import catalog, exceptions as excs, exec, exprs
13
13
  from pixeltable.catalog import Column, TableVersionHandle
14
14
  from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
15
- from pixeltable.utils.sample import sample_key
16
15
 
17
16
 
18
17
  def _is_agg_fn_call(e: exprs.Expr) -> bool:
@@ -159,16 +158,6 @@ class SampleClause:
159
158
  return format(threshold_int, '08x') + 'ffffffffffffffffffffffff'
160
159
 
161
160
 
162
- class SamplingClauses(NamedTuple):
163
- """Clauses provided when rewriting a SampleClause"""
164
-
165
- where: exprs.Expr
166
- group_by_clause: Optional[list[exprs.Expr]]
167
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
168
- limit: Optional[exprs.Expr]
169
- sample_clause: Optional[SampleClause]
170
-
171
-
172
161
  class Analyzer:
173
162
  """
174
163
  Performs semantic analysis of a query and stores the analysis state.
@@ -180,6 +169,8 @@ class Analyzer:
180
169
  group_by_clause: Optional[list[exprs.Expr]] # None for non-aggregate queries; [] for agg query w/o grouping
181
170
  grouping_exprs: list[exprs.Expr] # [] for non-aggregate queries or agg query w/o grouping
182
171
  order_by_clause: OrderByClause
172
+ stratify_exprs: list[exprs.Expr] # [] if no stratiifcation is required
173
+ sample_clause: Optional[SampleClause] # None if no sampling clause is present
183
174
 
184
175
  sql_elements: exprs.SqlElementCache
185
176
 
@@ -200,6 +191,7 @@ class Analyzer:
200
191
  where_clause: Optional[exprs.Expr] = None,
201
192
  group_by_clause: Optional[list[exprs.Expr]] = None,
202
193
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
194
+ sample_clause: Optional[SampleClause] = None,
203
195
  ):
204
196
  if order_by_clause is None:
205
197
  order_by_clause = []
@@ -213,6 +205,11 @@ class Analyzer:
213
205
  self.group_by_clause = (
214
206
  [e.resolve_computed_cols() for e in group_by_clause] if group_by_clause is not None else None
215
207
  )
208
+ self.sample_clause = sample_clause
209
+ if self.sample_clause is not None and self.sample_clause.is_stratified:
210
+ self.stratify_exprs = [e.resolve_computed_cols() for e in sample_clause.stratify_exprs]
211
+ else:
212
+ self.stratify_exprs = []
216
213
  self.order_by_clause = [OrderByItem(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
217
214
 
218
215
  self.sql_where_clause = None
@@ -228,8 +225,11 @@ class Analyzer:
228
225
  self.all_exprs.append(join_clause.join_predicate)
229
226
  if self.group_by_clause is not None:
230
227
  self.all_exprs.extend(self.group_by_clause)
228
+ self.all_exprs.extend(self.stratify_exprs)
231
229
  self.all_exprs.extend(e for e, _ in self.order_by_clause)
232
230
  if self.filter is not None:
231
+ if sample_clause is not None:
232
+ raise excs.Error(f'Filter {self.filter} not expressible in SQL')
233
233
  self.all_exprs.append(self.filter)
234
234
 
235
235
  self.agg_order_by = []
@@ -378,7 +378,7 @@ class Planner:
378
378
 
379
379
  cls.__check_valid_columns(tbl, stored_cols, 'inserted into')
380
380
 
381
- row_builder = exprs.RowBuilder([], stored_cols, [])
381
+ row_builder = exprs.RowBuilder([], stored_cols, [], tbl)
382
382
 
383
383
  # create InMemoryDataNode for 'rows'
384
384
  plan: exec.ExecNode = exec.InMemoryDataNode(
@@ -473,15 +473,19 @@ class Planner:
473
473
  assert isinstance(tbl, catalog.TableVersionPath)
474
474
  target = tbl.tbl_version.get() # the one we need to update
475
475
  updated_cols = list(update_targets.keys())
476
+ recomputed_cols: set[Column]
476
477
  if len(recompute_targets) > 0:
477
- recomputed_cols = set(recompute_targets)
478
+ assert len(update_targets) == 0
479
+ recomputed_cols = {*recompute_targets}
480
+ if cascade:
481
+ recomputed_cols |= target.get_dependent_columns(recomputed_cols)
478
482
  else:
479
483
  recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
480
- # regardless of cascade, we need to update all indices on any updated column
481
- idx_val_cols = target.get_idx_val_columns(updated_cols)
482
- recomputed_cols.update(idx_val_cols)
483
- # we only need to recompute stored columns (unstored ones are substituted away)
484
- recomputed_cols = {c for c in recomputed_cols if c.is_stored}
484
+ # regardless of cascade, we need to update all indices on any updated/recomputed column
485
+ idx_val_cols = target.get_idx_val_columns(set(updated_cols) | recomputed_cols)
486
+ recomputed_cols.update(idx_val_cols)
487
+ # we only need to recompute stored columns (unstored ones are substituted away)
488
+ recomputed_cols = {c for c in recomputed_cols if c.is_stored}
485
489
 
486
490
  cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
487
491
 
@@ -588,7 +592,7 @@ class Planner:
588
592
  sql_exprs = list(
589
593
  exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
590
594
  )
591
- row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
595
+ row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
592
596
  analyzer.finalize(row_builder)
593
597
  sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
594
598
  col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
@@ -602,8 +606,7 @@ class Planner:
602
606
  row_builder.set_slot_idxs(select_list, remove_duplicates=False)
603
607
  for i, col in enumerate(all_base_cols):
604
608
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
605
-
606
- ctx = exec.ExecContext(row_builder)
609
+ ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
607
610
  # we're returning everything to the user, so we might as well do it in a single batch
608
611
  ctx.batch_size = 0
609
612
  plan.set_ctx(ctx)
@@ -691,25 +694,13 @@ class Planner:
691
694
  # 2. for component views: iterator args
692
695
  iterator_args = [target.iterator_args] if target.iterator_args is not None else []
693
696
 
694
- # If this contains a sample specification, modify / create where, group_by, order_by, and limit clauses
695
697
  from_clause = FromClause(tbls=[view.base])
696
- where, group_by_clause, order_by_clause, limit, sample_clause = cls.create_sample_clauses(
697
- from_clause, target.sample_clause, target.predicate, None, [], None
698
- )
699
-
700
- # if we're propagating an insert, we only want to see those base rows that were created for the current version
701
698
  base_analyzer = Analyzer(
702
- from_clause,
703
- iterator_args,
704
- where_clause=where,
705
- group_by_clause=group_by_clause,
706
- order_by_clause=order_by_clause,
699
+ from_clause, iterator_args, where_clause=target.predicate, sample_clause=target.sample_clause
707
700
  )
708
- row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [])
709
-
710
- if target.sample_clause is not None and base_analyzer.filter is not None:
711
- raise excs.Error(f'Filter {base_analyzer.filter} not expressible in SQL')
701
+ row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [], target)
712
702
 
703
+ # if we're propagating an insert, we only want to see those base rows that were created for the current version
713
704
  # execution plan:
714
705
  # 1. materialize exprs computed from the base that are needed for stored view columns
715
706
  # 2. if it's an iterator view, expand the base rows into component rows
@@ -723,19 +714,13 @@ class Planner:
723
714
 
724
715
  # Create a new analyzer reflecting exactly what is required from the base table
725
716
  base_analyzer = Analyzer(
726
- from_clause,
727
- base_output_exprs,
728
- where_clause=where,
729
- group_by_clause=group_by_clause,
730
- order_by_clause=order_by_clause,
717
+ from_clause, base_output_exprs, where_clause=target.predicate, sample_clause=target.sample_clause
731
718
  )
732
719
  base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
733
720
  plan = cls._create_query_plan(
734
721
  row_builder=row_builder,
735
722
  analyzer=base_analyzer,
736
723
  eval_ctx=base_eval_ctx,
737
- limit=limit,
738
- sample_clause=sample_clause,
739
724
  with_pk=True,
740
725
  exact_version_only=view.get_bases() if propagates_insert else [],
741
726
  )
@@ -818,62 +803,6 @@ class Planner:
818
803
  prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input_node)
819
804
  return prefetch_node
820
805
 
821
- @classmethod
822
- def create_sample_clauses(
823
- cls,
824
- from_clause: FromClause,
825
- sample_clause: SampleClause,
826
- where_clause: Optional[exprs.Expr],
827
- group_by_clause: Optional[list[exprs.Expr]],
828
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]],
829
- limit: Optional[exprs.Expr],
830
- ) -> SamplingClauses:
831
- """tuple[
832
- exprs.Expr,
833
- Optional[list[exprs.Expr]],
834
- Optional[list[tuple[exprs.Expr, bool]]],
835
- Optional[exprs.Expr],
836
- Optional[SampleClause],
837
- ]:"""
838
- """Construct clauses required for sampling under various conditions.
839
- If there is no sampling, then return the original clauses.
840
- If the sample is stratified, then return only the group by clause. The rest of the
841
- mechanism for stratified sampling is provided by the SampleSqlNode.
842
- If the sample is non-stratified, then rewrite the query to accommodate the supplied where clause,
843
- and provide the other clauses required for sampling
844
- """
845
-
846
- # If no sample clause, return the original clauses
847
- if sample_clause is None:
848
- return SamplingClauses(where_clause, group_by_clause, order_by_clause, limit, None)
849
-
850
- # If the sample clause is stratified, create a group by clause
851
- if sample_clause.is_stratified:
852
- group_by = sample_clause.stratify_exprs
853
- # Note that limit is not possible here
854
- return SamplingClauses(where_clause, group_by, order_by_clause, None, sample_clause)
855
-
856
- else:
857
- # If non-stratified sampling, construct a where clause, order_by, and limit clauses
858
- # Construct an expression for sorting rows and limiting row counts
859
- s_key = sample_key(
860
- exprs.Literal(sample_clause.seed), *cls.rowid_columns(from_clause._first_tbl.tbl_version)
861
- )
862
-
863
- # Construct a suitable where clause
864
- where = where_clause
865
- if sample_clause.fraction is not None:
866
- fraction_md5_hex = exprs.Expr.from_object(
867
- sample_clause.fraction_to_md5_hex(float(sample_clause.fraction))
868
- )
869
- f_where = s_key < fraction_md5_hex
870
- where = where & f_where if where is not None else f_where
871
-
872
- order_by: list[tuple[exprs.Expr, bool]] = [(s_key, True)]
873
- limit = exprs.Literal(sample_clause.n)
874
- # Note that group_by is not possible here
875
- return SamplingClauses(where, None, order_by, limit, None)
876
-
877
806
  @classmethod
878
807
  def create_query_plan(
879
808
  cls,
@@ -898,21 +827,19 @@ class Planner:
898
827
  if exact_version_only is None:
899
828
  exact_version_only = []
900
829
 
901
- # Modify clauses to include sample clause
902
- where, group_by_clause, order_by_clause, limit, sample = cls.create_sample_clauses(
903
- from_clause, sample_clause, where_clause, group_by_clause, order_by_clause, limit
904
- )
905
-
906
830
  analyzer = Analyzer(
907
831
  from_clause,
908
832
  select_list,
909
- where_clause=where,
833
+ where_clause=where_clause,
910
834
  group_by_clause=group_by_clause,
911
835
  order_by_clause=order_by_clause,
836
+ sample_clause=sample_clause,
912
837
  )
913
- row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
914
- if sample_clause is not None and analyzer.filter is not None:
915
- raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
838
+ # If the from_clause has a single table, we can use it as the context table for the RowBuilder.
839
+ # Otherwise there is no context table, but that's ok, because the context table is only needed for
840
+ # table mutations, which can't happen during a join.
841
+ context_tbl = from_clause.tbls[0].tbl_version.get() if len(from_clause.tbls) == 1 else None
842
+ row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], context_tbl)
916
843
 
917
844
  analyzer.finalize(row_builder)
918
845
  # select_list: we need to materialize everything that's been collected
@@ -923,7 +850,6 @@ class Planner:
923
850
  analyzer=analyzer,
924
851
  eval_ctx=eval_ctx,
925
852
  limit=limit,
926
- sample_clause=sample,
927
853
  with_pk=True,
928
854
  exact_version_only=exact_version_only,
929
855
  )
@@ -939,7 +865,6 @@ class Planner:
939
865
  analyzer: Analyzer,
940
866
  eval_ctx: exprs.RowBuilder.EvalCtx,
941
867
  limit: Optional[exprs.Expr] = None,
942
- sample_clause: Optional[SampleClause] = None,
943
868
  with_pk: bool = False,
944
869
  exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
945
870
  ) -> exec.ExecNode:
@@ -966,6 +891,7 @@ class Planner:
966
891
  # - join clause subexprs
967
892
  # - subexprs of Where clause conjuncts that can't be run in SQL
968
893
  # - all grouping exprs
894
+ # - all stratify exprs
969
895
  candidates = list(
970
896
  exprs.Expr.list_subexprs(
971
897
  analyzer.select_list,
@@ -980,10 +906,12 @@ class Planner:
980
906
  candidates.extend(
981
907
  exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
982
908
  )
983
- if analyzer.group_by_clause is not None:
984
- candidates.extend(
985
- exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
986
- )
909
+ candidates.extend(
910
+ exprs.Expr.list_subexprs(analyzer.grouping_exprs, filter=sql_elements.contains, traverse_matches=False)
911
+ )
912
+ candidates.extend(
913
+ exprs.Expr.list_subexprs(analyzer.stratify_exprs, filter=sql_elements.contains, traverse_matches=False)
914
+ )
987
915
  # not isinstance(...): we don't want to materialize Literals via a Select
988
916
  sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
989
917
 
@@ -1028,6 +956,15 @@ class Planner:
1028
956
  # we need to order the input for window functions
1029
957
  plan.set_order_by(analyzer.get_window_fn_ob_clause())
1030
958
 
959
+ if analyzer.sample_clause is not None:
960
+ plan = exec.SqlSampleNode(
961
+ row_builder,
962
+ input=plan,
963
+ select_list=tbl_scan_exprs,
964
+ sample_clause=analyzer.sample_clause,
965
+ stratify_exprs=analyzer.stratify_exprs,
966
+ )
967
+
1031
968
  plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder, plan)
1032
969
 
1033
970
  if analyzer.group_by_clause is not None:
@@ -1050,26 +987,12 @@ class Planner:
1050
987
  sql_elements.contains_all(analyzer.select_list)
1051
988
  and sql_elements.contains_all(analyzer.grouping_exprs)
1052
989
  and isinstance(plan, exec.SqlNode)
1053
- and plan.to_cte(keep_pk=(sample_clause is not None)) is not None
990
+ and plan.to_cte() is not None
1054
991
  ):
1055
- if sample_clause is not None:
1056
- plan = exec.SqlSampleNode(
1057
- row_builder,
1058
- input=plan,
1059
- select_list=analyzer.select_list,
1060
- stratify_exprs=analyzer.group_by_clause,
1061
- sample_clause=sample_clause,
1062
- )
1063
- else:
1064
- plan = exec.SqlAggregationNode(
1065
- row_builder,
1066
- input=plan,
1067
- select_list=analyzer.select_list,
1068
- group_by_items=analyzer.group_by_clause,
1069
- )
992
+ plan = exec.SqlAggregationNode(
993
+ row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
994
+ )
1070
995
  else:
1071
- if sample_clause is not None:
1072
- raise excs.Error('Sample clause not supported with Python aggregation')
1073
996
  input_sql_node = plan.get_node(exec.SqlNode)
1074
997
  assert combined_ordering is not None
1075
998
  input_sql_node.set_order_by(combined_ordering)
@@ -1119,16 +1042,14 @@ class Planner:
1119
1042
  return Analyzer(FromClause(tbls=[tbl]), [], where_clause=where_clause)
1120
1043
 
1121
1044
  @classmethod
1122
- def create_add_column_plan(
1123
- cls, tbl: catalog.TableVersionPath, col: catalog.Column
1124
- ) -> tuple[exec.ExecNode, Optional[int]]:
1045
+ def create_add_column_plan(cls, tbl: catalog.TableVersionPath, col: catalog.Column) -> exec.ExecNode:
1125
1046
  """Creates a plan for InsertableTable.add_column()
1126
1047
  Returns:
1127
1048
  plan: the plan to execute
1128
1049
  value_expr slot idx for the plan output (for computed cols)
1129
1050
  """
1130
1051
  assert isinstance(tbl, catalog.TableVersionPath)
1131
- row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
1052
+ row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[], tbl=tbl.tbl_version.get())
1132
1053
  analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
1133
1054
  plan = cls._create_query_plan(
1134
1055
  row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
@@ -1140,5 +1061,4 @@ class Planner:
1140
1061
  # we want to flush images
1141
1062
  if col.is_computed and col.is_stored and col.col_type.is_image_type():
1142
1063
  plan.set_stored_img_cols(row_builder.output_slot_idxs())
1143
- value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
1144
- return plan, value_expr_slot_idx
1064
+ return plan
@@ -127,7 +127,7 @@ class TablePackager:
127
127
  # We use snappy compression for the Parquet tables; the entire bundle will be bzip2-compressed later, so
128
128
  # faster compression should provide good performance while still reducing temporary storage utilization.
129
129
  parquet_writer = pq.ParquetWriter(parquet_file, parquet_schema, compression='SNAPPY')
130
- filter_tv = self.table._tbl_version.get()
130
+ filter_tv = self.table._tbl_version_path.tbl_version.get()
131
131
  row_iter = tv.store_tbl.dump_rows(tv.version, filter_tv.store_tbl, filter_tv.version)
132
132
  for pa_table in self.__to_pa_tables(row_iter, sql_types, media_cols, parquet_schema):
133
133
  parquet_writer.write_table(pa_table)
@@ -238,7 +238,7 @@ class TablePackager:
238
238
  - Documents are replaced by a thumbnail as a base64-encoded webp
239
239
  """
240
240
  # First 8 columns
241
- preview_cols = dict(itertools.islice(self.table._schema.items(), 0, 8))
241
+ preview_cols = dict(itertools.islice(self.table._get_schema().items(), 0, 8))
242
242
  select_list = [self.table[col_name] for col_name in preview_cols]
243
243
  # First 5 rows
244
244
  rows = list(self.table.select(*select_list).head(n=5))