pixeltable 0.4.0rc2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (59) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +9 -1
  4. pixeltable/catalog/catalog.py +333 -99
  5. pixeltable/catalog/column.py +28 -26
  6. pixeltable/catalog/globals.py +12 -0
  7. pixeltable/catalog/insertable_table.py +8 -8
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +111 -116
  10. pixeltable/catalog/table_version.py +36 -50
  11. pixeltable/catalog/table_version_handle.py +4 -1
  12. pixeltable/catalog/table_version_path.py +28 -4
  13. pixeltable/catalog/view.py +10 -18
  14. pixeltable/config.py +4 -0
  15. pixeltable/dataframe.py +10 -9
  16. pixeltable/env.py +5 -11
  17. pixeltable/exceptions.py +6 -0
  18. pixeltable/exec/exec_node.py +2 -0
  19. pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
  20. pixeltable/exec/sql_node.py +47 -30
  21. pixeltable/exprs/column_property_ref.py +2 -1
  22. pixeltable/exprs/column_ref.py +7 -6
  23. pixeltable/exprs/expr.py +4 -4
  24. pixeltable/func/__init__.py +1 -0
  25. pixeltable/func/mcp.py +74 -0
  26. pixeltable/func/query_template_function.py +4 -2
  27. pixeltable/func/tools.py +12 -2
  28. pixeltable/func/udf.py +2 -2
  29. pixeltable/functions/__init__.py +1 -0
  30. pixeltable/functions/anthropic.py +19 -45
  31. pixeltable/functions/deepseek.py +19 -38
  32. pixeltable/functions/fireworks.py +9 -18
  33. pixeltable/functions/gemini.py +2 -2
  34. pixeltable/functions/groq.py +108 -0
  35. pixeltable/functions/huggingface.py +8 -6
  36. pixeltable/functions/llama_cpp.py +6 -6
  37. pixeltable/functions/mistralai.py +16 -53
  38. pixeltable/functions/ollama.py +1 -1
  39. pixeltable/functions/openai.py +82 -170
  40. pixeltable/functions/replicate.py +2 -2
  41. pixeltable/functions/together.py +22 -80
  42. pixeltable/functions/util.py +6 -1
  43. pixeltable/globals.py +0 -2
  44. pixeltable/io/external_store.py +2 -2
  45. pixeltable/io/label_studio.py +4 -4
  46. pixeltable/io/table_data_conduit.py +1 -1
  47. pixeltable/metadata/__init__.py +1 -1
  48. pixeltable/metadata/converters/convert_37.py +15 -0
  49. pixeltable/metadata/notes.py +1 -0
  50. pixeltable/metadata/schema.py +5 -0
  51. pixeltable/plan.py +37 -121
  52. pixeltable/share/packager.py +2 -2
  53. pixeltable/type_system.py +30 -0
  54. {pixeltable-0.4.0rc2.dist-info → pixeltable-0.4.1.dist-info}/METADATA +1 -1
  55. {pixeltable-0.4.0rc2.dist-info → pixeltable-0.4.1.dist-info}/RECORD +58 -56
  56. pixeltable/utils/sample.py +0 -25
  57. {pixeltable-0.4.0rc2.dist-info → pixeltable-0.4.1.dist-info}/LICENSE +0 -0
  58. {pixeltable-0.4.0rc2.dist-info → pixeltable-0.4.1.dist-info}/WHEEL +0 -0
  59. {pixeltable-0.4.0rc2.dist-info → pixeltable-0.4.1.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py CHANGED
@@ -428,8 +428,6 @@ def get_table(path: str) -> catalog.Table:
428
428
  """
429
429
  path_obj = catalog.Path(path)
430
430
  tbl = Catalog.get().get_table(path_obj)
431
- tv = tbl._tbl_version.get()
432
- _logger.debug(f'get_table(): tbl={tv.id}:{tv.effective_version} sa_tbl={id(tv.store_tbl.sa_tbl):x} tv={id(tv):x}')
433
431
  return tbl
434
432
 
435
433
 
@@ -202,7 +202,7 @@ class Project(ExternalStore, abc.ABC):
202
202
  resolved_col_mapping: dict[Column, str] = {}
203
203
 
204
204
  # Validate names
205
- t_cols = set(table._schema.keys())
205
+ t_cols = set(table._get_schema().keys())
206
206
  for t_col, ext_col in col_mapping.items():
207
207
  if t_col not in t_cols:
208
208
  if is_user_specified_col_mapping:
@@ -225,7 +225,7 @@ class Project(ExternalStore, abc.ABC):
225
225
  assert isinstance(col_ref, exprs.ColumnRef)
226
226
  resolved_col_mapping[col_ref.col] = ext_col
227
227
  # Validate column specs
228
- t_col_types = table._schema
228
+ t_col_types = table._get_schema()
229
229
  for t_col, ext_col in col_mapping.items():
230
230
  t_col_type = t_col_types[t_col]
231
231
  if ext_col in export_cols:
@@ -412,8 +412,8 @@ class LabelStudioProject(Project):
412
412
  # TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
413
413
  ancestor = t
414
414
  while local_annotations_col not in ancestor._tbl_version.get().cols:
415
- assert ancestor._base_table is not None
416
- ancestor = ancestor._base_table
415
+ assert ancestor._get_base_table is not None
416
+ ancestor = ancestor._get_base_table()
417
417
  update_status = ancestor.batch_update(updates)
418
418
  env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
419
419
  return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
@@ -560,7 +560,7 @@ class LabelStudioProject(Project):
560
560
 
561
561
  if name is None:
562
562
  # Create a default name that's unique to the table
563
- all_stores = t.external_stores
563
+ all_stores = t.external_stores()
564
564
  n = 0
565
565
  while f'ls_project_{n}' in all_stores:
566
566
  n += 1
@@ -576,7 +576,7 @@ class LabelStudioProject(Project):
576
576
  local_annotations_column = ANNOTATIONS_COLUMN
577
577
  else:
578
578
  local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
579
- if local_annotations_column not in t._schema:
579
+ if local_annotations_column not in t._get_schema():
580
580
  t.add_columns({local_annotations_column: ts.Json})
581
581
 
582
582
  resolved_col_mapping = cls.validate_columns(
@@ -101,7 +101,7 @@ class TableDataConduit:
101
101
  def add_table_info(self, table: pxt.Table) -> None:
102
102
  """Add information about the table into which we are inserting data"""
103
103
  assert isinstance(table, pxt.Table)
104
- self.pxt_schema = table._schema
104
+ self.pxt_schema = table._get_schema()
105
105
  self.pxt_pk = table._tbl_version.get().primary_key
106
106
  for col in table._tbl_version_path.columns():
107
107
  if col.is_required_for_insert:
@@ -18,7 +18,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
18
18
  _logger = logging.getLogger('pixeltable')
19
19
 
20
20
  # current version of the metadata; this is incremented whenever the metadata schema changes
21
- VERSION = 37
21
+ VERSION = 38
22
22
 
23
23
 
24
24
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=37)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, _: UUID) -> None:
15
+ table_md['view_sn'] = 0
@@ -2,6 +2,7 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 38: 'Added TableMd.view_sn',
5
6
  37: 'Add support for the sample() method on DataFrames',
6
7
  36: 'Added Table.lock_dummy',
7
8
  35: 'Track reference_tbl in ColumnRef',
@@ -177,6 +177,11 @@ class TableMd:
177
177
  # - every row is assigned a unique and immutable rowid on insertion
178
178
  next_row_id: int
179
179
 
180
+ # sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
181
+ # - incremented for each add/drop of a mutable view
182
+ # - only maintained for mutable tables
183
+ view_sn: int
184
+
180
185
  # Metadata format for external stores:
181
186
  # {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
182
187
  external_stores: list[dict[str, Any]]
pixeltable/plan.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import dataclasses
4
4
  import enum
5
5
  from textwrap import dedent
6
- from typing import Any, Iterable, Literal, NamedTuple, Optional, Sequence
6
+ from typing import Any, Iterable, Literal, Optional, Sequence
7
7
  from uuid import UUID
8
8
 
9
9
  import sqlalchemy as sql
@@ -12,7 +12,6 @@ import pixeltable as pxt
12
12
  from pixeltable import catalog, exceptions as excs, exec, exprs
13
13
  from pixeltable.catalog import Column, TableVersionHandle
14
14
  from pixeltable.exec.sql_node import OrderByClause, OrderByItem, combine_order_by_clauses, print_order_by_clause
15
- from pixeltable.utils.sample import sample_key
16
15
 
17
16
 
18
17
  def _is_agg_fn_call(e: exprs.Expr) -> bool:
@@ -159,16 +158,6 @@ class SampleClause:
159
158
  return format(threshold_int, '08x') + 'ffffffffffffffffffffffff'
160
159
 
161
160
 
162
- class SamplingClauses(NamedTuple):
163
- """Clauses provided when rewriting a SampleClause"""
164
-
165
- where: exprs.Expr
166
- group_by_clause: Optional[list[exprs.Expr]]
167
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
168
- limit: Optional[exprs.Expr]
169
- sample_clause: Optional[SampleClause]
170
-
171
-
172
161
  class Analyzer:
173
162
  """
174
163
  Performs semantic analysis of a query and stores the analysis state.
@@ -180,6 +169,8 @@ class Analyzer:
180
169
  group_by_clause: Optional[list[exprs.Expr]] # None for non-aggregate queries; [] for agg query w/o grouping
181
170
  grouping_exprs: list[exprs.Expr] # [] for non-aggregate queries or agg query w/o grouping
182
171
  order_by_clause: OrderByClause
172
+ stratify_exprs: list[exprs.Expr] # [] if no stratiifcation is required
173
+ sample_clause: Optional[SampleClause] # None if no sampling clause is present
183
174
 
184
175
  sql_elements: exprs.SqlElementCache
185
176
 
@@ -200,6 +191,7 @@ class Analyzer:
200
191
  where_clause: Optional[exprs.Expr] = None,
201
192
  group_by_clause: Optional[list[exprs.Expr]] = None,
202
193
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None,
194
+ sample_clause: Optional[SampleClause] = None,
203
195
  ):
204
196
  if order_by_clause is None:
205
197
  order_by_clause = []
@@ -213,6 +205,11 @@ class Analyzer:
213
205
  self.group_by_clause = (
214
206
  [e.resolve_computed_cols() for e in group_by_clause] if group_by_clause is not None else None
215
207
  )
208
+ self.sample_clause = sample_clause
209
+ if self.sample_clause is not None and self.sample_clause.is_stratified:
210
+ self.stratify_exprs = [e.resolve_computed_cols() for e in sample_clause.stratify_exprs]
211
+ else:
212
+ self.stratify_exprs = []
216
213
  self.order_by_clause = [OrderByItem(e.resolve_computed_cols(), asc) for e, asc in order_by_clause]
217
214
 
218
215
  self.sql_where_clause = None
@@ -228,8 +225,11 @@ class Analyzer:
228
225
  self.all_exprs.append(join_clause.join_predicate)
229
226
  if self.group_by_clause is not None:
230
227
  self.all_exprs.extend(self.group_by_clause)
228
+ self.all_exprs.extend(self.stratify_exprs)
231
229
  self.all_exprs.extend(e for e, _ in self.order_by_clause)
232
230
  if self.filter is not None:
231
+ if sample_clause is not None:
232
+ raise excs.Error(f'Filter {self.filter} not expressible in SQL')
233
233
  self.all_exprs.append(self.filter)
234
234
 
235
235
  self.agg_order_by = []
@@ -691,25 +691,13 @@ class Planner:
691
691
  # 2. for component views: iterator args
692
692
  iterator_args = [target.iterator_args] if target.iterator_args is not None else []
693
693
 
694
- # If this contains a sample specification, modify / create where, group_by, order_by, and limit clauses
695
694
  from_clause = FromClause(tbls=[view.base])
696
- where, group_by_clause, order_by_clause, limit, sample_clause = cls.create_sample_clauses(
697
- from_clause, target.sample_clause, target.predicate, None, [], None
698
- )
699
-
700
- # if we're propagating an insert, we only want to see those base rows that were created for the current version
701
695
  base_analyzer = Analyzer(
702
- from_clause,
703
- iterator_args,
704
- where_clause=where,
705
- group_by_clause=group_by_clause,
706
- order_by_clause=order_by_clause,
696
+ from_clause, iterator_args, where_clause=target.predicate, sample_clause=target.sample_clause
707
697
  )
708
698
  row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [])
709
699
 
710
- if target.sample_clause is not None and base_analyzer.filter is not None:
711
- raise excs.Error(f'Filter {base_analyzer.filter} not expressible in SQL')
712
-
700
+ # if we're propagating an insert, we only want to see those base rows that were created for the current version
713
701
  # execution plan:
714
702
  # 1. materialize exprs computed from the base that are needed for stored view columns
715
703
  # 2. if it's an iterator view, expand the base rows into component rows
@@ -723,19 +711,13 @@ class Planner:
723
711
 
724
712
  # Create a new analyzer reflecting exactly what is required from the base table
725
713
  base_analyzer = Analyzer(
726
- from_clause,
727
- base_output_exprs,
728
- where_clause=where,
729
- group_by_clause=group_by_clause,
730
- order_by_clause=order_by_clause,
714
+ from_clause, base_output_exprs, where_clause=target.predicate, sample_clause=target.sample_clause
731
715
  )
732
716
  base_eval_ctx = row_builder.create_eval_ctx(base_analyzer.all_exprs)
733
717
  plan = cls._create_query_plan(
734
718
  row_builder=row_builder,
735
719
  analyzer=base_analyzer,
736
720
  eval_ctx=base_eval_ctx,
737
- limit=limit,
738
- sample_clause=sample_clause,
739
721
  with_pk=True,
740
722
  exact_version_only=view.get_bases() if propagates_insert else [],
741
723
  )
@@ -818,62 +800,6 @@ class Planner:
818
800
  prefetch_node = exec.CachePrefetchNode(tbl_id, file_col_info, input_node)
819
801
  return prefetch_node
820
802
 
821
- @classmethod
822
- def create_sample_clauses(
823
- cls,
824
- from_clause: FromClause,
825
- sample_clause: SampleClause,
826
- where_clause: Optional[exprs.Expr],
827
- group_by_clause: Optional[list[exprs.Expr]],
828
- order_by_clause: Optional[list[tuple[exprs.Expr, bool]]],
829
- limit: Optional[exprs.Expr],
830
- ) -> SamplingClauses:
831
- """tuple[
832
- exprs.Expr,
833
- Optional[list[exprs.Expr]],
834
- Optional[list[tuple[exprs.Expr, bool]]],
835
- Optional[exprs.Expr],
836
- Optional[SampleClause],
837
- ]:"""
838
- """Construct clauses required for sampling under various conditions.
839
- If there is no sampling, then return the original clauses.
840
- If the sample is stratified, then return only the group by clause. The rest of the
841
- mechanism for stratified sampling is provided by the SampleSqlNode.
842
- If the sample is non-stratified, then rewrite the query to accommodate the supplied where clause,
843
- and provide the other clauses required for sampling
844
- """
845
-
846
- # If no sample clause, return the original clauses
847
- if sample_clause is None:
848
- return SamplingClauses(where_clause, group_by_clause, order_by_clause, limit, None)
849
-
850
- # If the sample clause is stratified, create a group by clause
851
- if sample_clause.is_stratified:
852
- group_by = sample_clause.stratify_exprs
853
- # Note that limit is not possible here
854
- return SamplingClauses(where_clause, group_by, order_by_clause, None, sample_clause)
855
-
856
- else:
857
- # If non-stratified sampling, construct a where clause, order_by, and limit clauses
858
- # Construct an expression for sorting rows and limiting row counts
859
- s_key = sample_key(
860
- exprs.Literal(sample_clause.seed), *cls.rowid_columns(from_clause._first_tbl.tbl_version)
861
- )
862
-
863
- # Construct a suitable where clause
864
- where = where_clause
865
- if sample_clause.fraction is not None:
866
- fraction_md5_hex = exprs.Expr.from_object(
867
- sample_clause.fraction_to_md5_hex(float(sample_clause.fraction))
868
- )
869
- f_where = s_key < fraction_md5_hex
870
- where = where & f_where if where is not None else f_where
871
-
872
- order_by: list[tuple[exprs.Expr, bool]] = [(s_key, True)]
873
- limit = exprs.Literal(sample_clause.n)
874
- # Note that group_by is not possible here
875
- return SamplingClauses(where, None, order_by, limit, None)
876
-
877
803
  @classmethod
878
804
  def create_query_plan(
879
805
  cls,
@@ -898,21 +824,15 @@ class Planner:
898
824
  if exact_version_only is None:
899
825
  exact_version_only = []
900
826
 
901
- # Modify clauses to include sample clause
902
- where, group_by_clause, order_by_clause, limit, sample = cls.create_sample_clauses(
903
- from_clause, sample_clause, where_clause, group_by_clause, order_by_clause, limit
904
- )
905
-
906
827
  analyzer = Analyzer(
907
828
  from_clause,
908
829
  select_list,
909
- where_clause=where,
830
+ where_clause=where_clause,
910
831
  group_by_clause=group_by_clause,
911
832
  order_by_clause=order_by_clause,
833
+ sample_clause=sample_clause,
912
834
  )
913
835
  row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [])
914
- if sample_clause is not None and analyzer.filter is not None:
915
- raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
916
836
 
917
837
  analyzer.finalize(row_builder)
918
838
  # select_list: we need to materialize everything that's been collected
@@ -923,7 +843,6 @@ class Planner:
923
843
  analyzer=analyzer,
924
844
  eval_ctx=eval_ctx,
925
845
  limit=limit,
926
- sample_clause=sample,
927
846
  with_pk=True,
928
847
  exact_version_only=exact_version_only,
929
848
  )
@@ -939,7 +858,6 @@ class Planner:
939
858
  analyzer: Analyzer,
940
859
  eval_ctx: exprs.RowBuilder.EvalCtx,
941
860
  limit: Optional[exprs.Expr] = None,
942
- sample_clause: Optional[SampleClause] = None,
943
861
  with_pk: bool = False,
944
862
  exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
945
863
  ) -> exec.ExecNode:
@@ -966,6 +884,7 @@ class Planner:
966
884
  # - join clause subexprs
967
885
  # - subexprs of Where clause conjuncts that can't be run in SQL
968
886
  # - all grouping exprs
887
+ # - all stratify exprs
969
888
  candidates = list(
970
889
  exprs.Expr.list_subexprs(
971
890
  analyzer.select_list,
@@ -980,10 +899,12 @@ class Planner:
980
899
  candidates.extend(
981
900
  exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
982
901
  )
983
- if analyzer.group_by_clause is not None:
984
- candidates.extend(
985
- exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
986
- )
902
+ candidates.extend(
903
+ exprs.Expr.list_subexprs(analyzer.grouping_exprs, filter=sql_elements.contains, traverse_matches=False)
904
+ )
905
+ candidates.extend(
906
+ exprs.Expr.list_subexprs(analyzer.stratify_exprs, filter=sql_elements.contains, traverse_matches=False)
907
+ )
987
908
  # not isinstance(...): we don't want to materialize Literals via a Select
988
909
  sql_exprs = exprs.ExprSet(e for e in candidates if not isinstance(e, exprs.Literal))
989
910
 
@@ -1028,6 +949,15 @@ class Planner:
1028
949
  # we need to order the input for window functions
1029
950
  plan.set_order_by(analyzer.get_window_fn_ob_clause())
1030
951
 
952
+ if analyzer.sample_clause is not None:
953
+ plan = exec.SqlSampleNode(
954
+ row_builder,
955
+ input=plan,
956
+ select_list=tbl_scan_exprs,
957
+ sample_clause=analyzer.sample_clause,
958
+ stratify_exprs=analyzer.stratify_exprs,
959
+ )
960
+
1031
961
  plan = cls._insert_prefetch_node(tbl.tbl_version.id, row_builder, plan)
1032
962
 
1033
963
  if analyzer.group_by_clause is not None:
@@ -1050,26 +980,12 @@ class Planner:
1050
980
  sql_elements.contains_all(analyzer.select_list)
1051
981
  and sql_elements.contains_all(analyzer.grouping_exprs)
1052
982
  and isinstance(plan, exec.SqlNode)
1053
- and plan.to_cte(keep_pk=(sample_clause is not None)) is not None
983
+ and plan.to_cte() is not None
1054
984
  ):
1055
- if sample_clause is not None:
1056
- plan = exec.SqlSampleNode(
1057
- row_builder,
1058
- input=plan,
1059
- select_list=analyzer.select_list,
1060
- stratify_exprs=analyzer.group_by_clause,
1061
- sample_clause=sample_clause,
1062
- )
1063
- else:
1064
- plan = exec.SqlAggregationNode(
1065
- row_builder,
1066
- input=plan,
1067
- select_list=analyzer.select_list,
1068
- group_by_items=analyzer.group_by_clause,
1069
- )
985
+ plan = exec.SqlAggregationNode(
986
+ row_builder, input=plan, select_list=analyzer.select_list, group_by_items=analyzer.group_by_clause
987
+ )
1070
988
  else:
1071
- if sample_clause is not None:
1072
- raise excs.Error('Sample clause not supported with Python aggregation')
1073
989
  input_sql_node = plan.get_node(exec.SqlNode)
1074
990
  assert combined_ordering is not None
1075
991
  input_sql_node.set_order_by(combined_ordering)
@@ -127,7 +127,7 @@ class TablePackager:
127
127
  # We use snappy compression for the Parquet tables; the entire bundle will be bzip2-compressed later, so
128
128
  # faster compression should provide good performance while still reducing temporary storage utilization.
129
129
  parquet_writer = pq.ParquetWriter(parquet_file, parquet_schema, compression='SNAPPY')
130
- filter_tv = self.table._tbl_version.get()
130
+ filter_tv = self.table._tbl_version_path.tbl_version.get()
131
131
  row_iter = tv.store_tbl.dump_rows(tv.version, filter_tv.store_tbl, filter_tv.version)
132
132
  for pa_table in self.__to_pa_tables(row_iter, sql_types, media_cols, parquet_schema):
133
133
  parquet_writer.write_table(pa_table)
@@ -238,7 +238,7 @@ class TablePackager:
238
238
  - Documents are replaced by a thumbnail as a base64-encoded webp
239
239
  """
240
240
  # First 8 columns
241
- preview_cols = dict(itertools.islice(self.table._schema.items(), 0, 8))
241
+ preview_cols = dict(itertools.islice(self.table._get_schema().items(), 0, 8))
242
242
  select_list = [self.table[col_name] for col_name in preview_cols]
243
243
  # First 5 rows
244
244
  rows = list(self.table.select(*select_list).head(n=5))
pixeltable/type_system.py CHANGED
@@ -395,6 +395,36 @@ class ColumnType:
395
395
  raise excs.Error(f'Standard Python type `{name}` cannot be used here; use `{suggestion}` instead')
396
396
  raise excs.Error(f'Unknown type: {t}')
397
397
 
398
+ @classmethod
399
+ def from_json_schema(cls, schema: dict[str, Any]) -> Optional[ColumnType]:
400
+ # We first express the JSON schema as a Python type, and then convert it to a Pixeltable type.
401
+ # TODO: Is there a meaningful fallback if one of these operations fails? (Maybe another use case for a pxt Any
402
+ # type?)
403
+ py_type = cls.__json_schema_to_py_type(schema)
404
+ return cls.from_python_type(py_type) if py_type is not None else None
405
+
406
+ @classmethod
407
+ def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> Union[type, _GenericAlias, None]:
408
+ if 'type' in schema:
409
+ if schema['type'] == 'null':
410
+ return type(None)
411
+ if schema['type'] == 'string':
412
+ return str
413
+ if schema['type'] == 'integer':
414
+ return int
415
+ if schema['type'] == 'number':
416
+ return float
417
+ if schema['type'] == 'boolean':
418
+ return bool
419
+ if schema['type'] in ('array', 'object'):
420
+ return list
421
+ elif 'anyOf' in schema:
422
+ subscripts = tuple(cls.__json_schema_to_py_type(subschema) for subschema in schema['anyOf'])
423
+ if all(subscript is not None for subscript in subscripts):
424
+ return Union[subscripts]
425
+
426
+ return None
427
+
398
428
  def validate_literal(self, val: Any) -> None:
399
429
  """Raise TypeError if val is not a valid literal for this type"""
400
430
  if val is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pixeltable
3
- Version: 0.4.0rc2
3
+ Version: 0.4.1
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  License: Apache-2.0
6
6
  Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai