pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +31 -50
  4. pixeltable/catalog/insertable_table.py +7 -6
  5. pixeltable/catalog/table.py +171 -57
  6. pixeltable/catalog/table_version.py +417 -140
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/dataframe.py +239 -121
  9. pixeltable/env.py +82 -16
  10. pixeltable/exec/__init__.py +2 -1
  11. pixeltable/exec/cache_prefetch_node.py +1 -1
  12. pixeltable/exec/data_row_batch.py +6 -7
  13. pixeltable/exec/expr_eval_node.py +28 -28
  14. pixeltable/exec/in_memory_data_node.py +11 -7
  15. pixeltable/exec/sql_scan_node.py +7 -6
  16. pixeltable/exprs/__init__.py +4 -3
  17. pixeltable/exprs/column_ref.py +9 -0
  18. pixeltable/exprs/comparison.py +3 -3
  19. pixeltable/exprs/data_row.py +5 -1
  20. pixeltable/exprs/expr.py +15 -7
  21. pixeltable/exprs/function_call.py +17 -15
  22. pixeltable/exprs/image_member_access.py +9 -28
  23. pixeltable/exprs/in_predicate.py +96 -0
  24. pixeltable/exprs/inline_array.py +13 -11
  25. pixeltable/exprs/inline_dict.py +15 -13
  26. pixeltable/exprs/literal.py +16 -4
  27. pixeltable/exprs/row_builder.py +15 -41
  28. pixeltable/exprs/similarity_expr.py +65 -0
  29. pixeltable/ext/__init__.py +5 -0
  30. pixeltable/ext/functions/yolox.py +92 -0
  31. pixeltable/func/__init__.py +0 -2
  32. pixeltable/func/aggregate_function.py +18 -15
  33. pixeltable/func/callable_function.py +57 -13
  34. pixeltable/func/expr_template_function.py +20 -3
  35. pixeltable/func/function.py +35 -4
  36. pixeltable/func/globals.py +24 -14
  37. pixeltable/func/signature.py +23 -27
  38. pixeltable/func/udf.py +13 -12
  39. pixeltable/functions/__init__.py +8 -8
  40. pixeltable/functions/eval.py +7 -8
  41. pixeltable/functions/huggingface.py +64 -17
  42. pixeltable/functions/openai.py +36 -3
  43. pixeltable/functions/pil/image.py +61 -64
  44. pixeltable/functions/together.py +21 -0
  45. pixeltable/functions/util.py +11 -0
  46. pixeltable/globals.py +425 -0
  47. pixeltable/index/__init__.py +2 -0
  48. pixeltable/index/base.py +51 -0
  49. pixeltable/index/embedding_index.py +168 -0
  50. pixeltable/io/__init__.py +3 -0
  51. pixeltable/{utils → io}/hf_datasets.py +48 -17
  52. pixeltable/io/pandas.py +148 -0
  53. pixeltable/{utils → io}/parquet.py +58 -33
  54. pixeltable/iterators/__init__.py +1 -1
  55. pixeltable/iterators/base.py +4 -0
  56. pixeltable/iterators/document.py +218 -97
  57. pixeltable/iterators/video.py +8 -9
  58. pixeltable/metadata/__init__.py +7 -3
  59. pixeltable/metadata/converters/convert_12.py +3 -0
  60. pixeltable/metadata/converters/convert_13.py +41 -0
  61. pixeltable/metadata/schema.py +45 -22
  62. pixeltable/plan.py +15 -51
  63. pixeltable/store.py +38 -41
  64. pixeltable/tool/create_test_db_dump.py +39 -4
  65. pixeltable/type_system.py +47 -96
  66. pixeltable/utils/documents.py +42 -12
  67. pixeltable/utils/http_server.py +70 -0
  68. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
  69. pixeltable-0.2.6.dist-info/RECORD +119 -0
  70. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  71. pixeltable/client.py +0 -604
  72. pixeltable/exprs/image_similarity_predicate.py +0 -58
  73. pixeltable/func/batched_function.py +0 -53
  74. pixeltable/tests/conftest.py +0 -177
  75. pixeltable/tests/functions/test_fireworks.py +0 -42
  76. pixeltable/tests/functions/test_functions.py +0 -60
  77. pixeltable/tests/functions/test_huggingface.py +0 -158
  78. pixeltable/tests/functions/test_openai.py +0 -152
  79. pixeltable/tests/functions/test_together.py +0 -111
  80. pixeltable/tests/test_audio.py +0 -65
  81. pixeltable/tests/test_catalog.py +0 -27
  82. pixeltable/tests/test_client.py +0 -21
  83. pixeltable/tests/test_component_view.py +0 -370
  84. pixeltable/tests/test_dataframe.py +0 -439
  85. pixeltable/tests/test_dirs.py +0 -107
  86. pixeltable/tests/test_document.py +0 -120
  87. pixeltable/tests/test_exprs.py +0 -805
  88. pixeltable/tests/test_function.py +0 -324
  89. pixeltable/tests/test_migration.py +0 -43
  90. pixeltable/tests/test_nos.py +0 -54
  91. pixeltable/tests/test_snapshot.py +0 -208
  92. pixeltable/tests/test_table.py +0 -1267
  93. pixeltable/tests/test_transactional_directory.py +0 -42
  94. pixeltable/tests/test_types.py +0 -22
  95. pixeltable/tests/test_video.py +0 -159
  96. pixeltable/tests/test_view.py +0 -530
  97. pixeltable/tests/utils.py +0 -408
  98. pixeltable-0.2.4.dist-info/RECORD +0 -132
  99. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Optional, List, Dict, get_type_hints, Type, Any, TypeVar, Tuple, Union
1
+ from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
2
2
  import platform
3
3
  import uuid
4
4
  import dataclasses
@@ -71,16 +71,43 @@ class Dir(Base):
71
71
 
72
72
 
73
73
  @dataclasses.dataclass
74
- class ColumnHistory:
74
+ class ColumnMd:
75
75
  """
76
- Records when a column was added/dropped, which is needed to GC unreachable storage columns
77
- (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
78
- from the stored table).
79
- One record per column (across all schema versions).
76
+ Records the non-versioned metadata of a column.
77
+ - immutable attributes: type, primary key, etc.
78
+ - when a column was added/dropped, which is needed to GC unreachable storage columns
79
+ (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
80
+ from the stored table).
80
81
  """
81
- col_id: int
82
+ id: int
82
83
  schema_version_add: int
83
84
  schema_version_drop: Optional[int]
85
+ col_type: dict
86
+
87
+ # if True, is part of the primary key
88
+ is_pk: bool
89
+
90
+ # if set, this is a computed column
91
+ value_expr: Optional[dict]
92
+
93
+ # if True, the column is present in the stored table
94
+ stored: Optional[bool]
95
+
96
+
97
+ @dataclasses.dataclass
98
+ class IndexMd:
99
+ """
100
+ Metadata needed to instantiate an EmbeddingIndex
101
+ """
102
+ id: int
103
+ name: str
104
+ indexed_col_id: int # column being indexed
105
+ index_val_col_id: int # column holding the values to be indexed
106
+ index_val_undo_col_id: int # column holding index values for deleted rows
107
+ schema_version_add: int
108
+ schema_version_drop: Optional[int]
109
+ class_fqn: str
110
+ init_args: dict[str, Any]
84
111
 
85
112
 
86
113
  @dataclasses.dataclass
@@ -91,13 +118,13 @@ class ViewMd:
91
118
  base_versions: List[Tuple[str, Optional[int]]]
92
119
 
93
120
  # filter predicate applied to the base table; view-only
94
- predicate: Optional[Dict[str, Any]]
121
+ predicate: Optional[dict[str, Any]]
95
122
 
96
123
  # ComponentIterator subclass; only for component views
97
124
  iterator_class_fqn: Optional[str]
98
125
 
99
126
  # args to pass to the iterator class constructor; only for component views
100
- iterator_args: Optional[Dict[str, Any]]
127
+ iterator_args: Optional[dict[str, Any]]
101
128
 
102
129
 
103
130
  @dataclasses.dataclass
@@ -109,15 +136,15 @@ class TableMd:
109
136
  # each version has a corresponding schema version (current_version >= current_schema_version)
110
137
  current_schema_version: int
111
138
 
112
- # used to assign Column.id
113
- next_col_id: int
139
+ next_col_id: int # used to assign Column.id
140
+ next_idx_id: int # used to assign IndexMd.id
114
141
 
115
142
  # - used to assign the rowid column in the storage table
116
143
  # - every row is assigned a unique and immutable rowid on insertion
117
144
  next_row_id: int
118
145
 
119
- column_history: Dict[int, ColumnHistory] # col_id -> ColumnHistory
120
-
146
+ column_md: dict[int, ColumnMd] # col_id -> ColumnMd
147
+ index_md: dict[int, IndexMd] # index_id -> IndexMd
121
148
  view_md: Optional[ViewMd]
122
149
 
123
150
 
@@ -155,24 +182,20 @@ class TableVersion(Base):
155
182
  @dataclasses.dataclass
156
183
  class SchemaColumn:
157
184
  """
158
- Records the logical (user-visible) schema of a table.
159
- Contains the full set of columns for each new schema version: one record per (column x schema version).
185
+ Records the versioned metadata of a column.
160
186
  """
161
187
  pos: int
162
188
  name: str
163
- col_type: dict
164
- is_pk: bool
165
- value_expr: Optional[dict]
166
- stored: Optional[bool]
167
- # if True, creates vector index for this column
168
- is_indexed: bool
169
189
 
170
190
 
171
191
  @dataclasses.dataclass
172
192
  class TableSchemaVersionMd:
193
+ """
194
+ Records all versioned table metadata.
195
+ """
173
196
  schema_version: int
174
197
  preceding_schema_version: Optional[int]
175
- columns: Dict[int, SchemaColumn] # col_id -> SchemaColumn
198
+ columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
176
199
  num_retained_versions: int
177
200
  comment: str
178
201
 
pixeltable/plan.py CHANGED
@@ -60,24 +60,10 @@ class Analyzer:
60
60
  # filter predicate applied to output rows of the SQL scan
61
61
  self.filter: Optional[exprs.Predicate] = None
62
62
  # not executable
63
- self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
63
+ #self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
64
64
  if where_clause is not None:
65
65
  where_clause_conjuncts, self.filter = where_clause.split_conjuncts(lambda e: e.sql_expr() is not None)
66
66
  self.sql_where_clause = exprs.CompoundPredicate.make_conjunction(where_clause_conjuncts)
67
- if self.filter is not None:
68
- similarity_clauses, self.filter = self.filter.split_conjuncts(
69
- lambda e: isinstance(e, exprs.ImageSimilarityPredicate))
70
- if len(similarity_clauses) > 1:
71
- raise excs.Error(f'More than one nearest() not supported')
72
- if len(similarity_clauses) == 1:
73
- if len(self.order_by_clause) > 0:
74
- raise excs.Error((
75
- f'nearest() returns results in order of proximity and cannot be used in conjunction with '
76
- f'order_by()'))
77
- self.similarity_clause = similarity_clauses[0]
78
- img_col = self.similarity_clause.img_col_ref.col
79
- if not img_col.is_indexed:
80
- raise excs.Error(f'nearest() not available for unindexed column {img_col.name}')
81
67
 
82
68
  # all exprs that are evaluated in Python; not executable
83
69
  self.all_exprs = self.select_list.copy()
@@ -203,8 +189,6 @@ class Planner:
203
189
  refd_tbl_ids: Set[UUID] = set()
204
190
  if where_clause is not None:
205
191
  analyzer = cls.analyze(tbl, where_clause)
206
- if analyzer.similarity_clause is not None:
207
- raise excs.Error('nearest() cannot be used with count()')
208
192
  if analyzer.filter is not None:
209
193
  raise excs.Error(f'Filter {analyzer.filter} not expressible in SQL')
210
194
  clause_element = analyzer.sql_where_clause.sql_expr()
@@ -220,18 +204,11 @@ class Planner:
220
204
  ) -> exec.ExecNode:
221
205
  """Creates a plan for TableVersion.insert()"""
222
206
  assert not tbl.is_view()
223
- # things we need to materialize:
224
- # 1. stored_cols: all cols we need to store, incl computed cols (and indices)
207
+ # stored_cols: all cols we need to store, incl computed cols (and indices)
225
208
  stored_cols = [c for c in tbl.cols if c.is_stored]
226
209
  assert len(stored_cols) > 0
227
- # 2. values to insert into indices
228
- indexed_cols = [c for c in tbl.cols if c.is_indexed]
229
- index_info: List[Tuple[catalog.Column, func.Function]] = []
230
- if len(indexed_cols) > 0:
231
- from pixeltable.functions.nos.image_embedding import openai_clip
232
- index_info = [(c, openai_clip) for c in tbl.cols if c.is_indexed]
233
210
 
234
- row_builder = exprs.RowBuilder([], stored_cols, index_info, [])
211
+ row_builder = exprs.RowBuilder([], stored_cols, [])
235
212
 
236
213
  # create InMemoryDataNode for 'rows'
237
214
  stored_col_info = row_builder.output_slot_idxs()
@@ -260,7 +237,7 @@ class Planner:
260
237
  @classmethod
261
238
  def create_update_plan(
262
239
  cls, tbl: catalog.TableVersionPath,
263
- update_targets: List[Tuple[catalog.Column, exprs.Expr]],
240
+ update_targets: dict[catalog.Column, exprs.Expr],
264
241
  recompute_targets: List[catalog.Column],
265
242
  where_clause: Optional[exprs.Predicate], cascade: bool
266
243
  ) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
@@ -279,7 +256,7 @@ class Planner:
279
256
  # retrieve all stored cols and all target exprs
280
257
  assert isinstance(tbl, catalog.TableVersionPath)
281
258
  target = tbl.tbl_version # the one we need to update
282
- updated_cols = [col for col, _ in update_targets]
259
+ updated_cols = list(update_targets.keys())
283
260
  if len(recompute_targets) > 0:
284
261
  recomputed_cols = recompute_targets.copy()
285
262
  else:
@@ -291,12 +268,12 @@ class Planner:
291
268
  col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
292
269
  ]
293
270
  select_list = [exprs.ColumnRef(col) for col in copied_cols]
294
- select_list.extend([expr for _, expr in update_targets])
271
+ select_list.extend(update_targets.values())
295
272
 
296
273
  recomputed_exprs = \
297
274
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
298
275
  # recomputed cols reference the new values of the updated cols
299
- for col, e in update_targets:
276
+ for col, e in update_targets.items():
300
277
  exprs.Expr.list_substitute(recomputed_exprs, exprs.ColumnRef(col), e)
301
278
  select_list.extend(recomputed_exprs)
302
279
 
@@ -375,16 +352,10 @@ class Planner:
375
352
  # the store
376
353
  target = view.tbl_version # the one we need to populate
377
354
  stored_cols = [c for c in target.cols if c.is_stored and (c.is_computed or target.is_iterator_column(c))]
378
- # 2. index values
379
- indexed_cols = [c for c in target.cols if c.is_indexed]
380
- index_info: List[Tuple[catalog.Column, func.Function]] = []
381
- if len(indexed_cols) > 0:
382
- from pixeltable.functions.nos.image_embedding import openai_clip
383
- index_info = [(c, openai_clip) for c in target.cols if c.is_indexed]
384
- # 3. for component views: iterator args
355
+ # 2. for component views: iterator args
385
356
  iterator_args = [target.iterator_args] if target.iterator_args is not None else []
386
357
 
387
- row_builder = exprs.RowBuilder(iterator_args, stored_cols, index_info, [])
358
+ row_builder = exprs.RowBuilder(iterator_args, stored_cols, [])
388
359
 
389
360
  # execution plan:
390
361
  # 1. materialize exprs computed from the base that are needed for stored view columns
@@ -548,7 +519,7 @@ class Planner:
548
519
  analyzer = Analyzer(
549
520
  tbl, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
550
521
  order_by_clause=order_by_clause)
551
- row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], analyzer.sql_exprs)
522
+ row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
552
523
 
553
524
  analyzer.finalize(row_builder)
554
525
  # select_list: we need to materialize everything that's been collected
@@ -582,7 +553,7 @@ class Planner:
582
553
  sql_select_list = analyzer.sql_exprs.copy()
583
554
  plan = exec.SqlScanNode(
584
555
  tbl, row_builder, select_list=sql_select_list, where_clause=analyzer.sql_where_clause,
585
- filter=analyzer.filter, similarity_clause=analyzer.similarity_clause, order_by_items=order_by_items,
556
+ filter=analyzer.filter, order_by_items=order_by_items,
586
557
  limit=sql_limit, set_pk=with_pk, exact_version_only=exact_version_only)
587
558
  plan = cls._insert_prefetch_node(tbl.tbl_version.id, analyzer.select_list, row_builder, plan)
588
559
 
@@ -627,21 +598,15 @@ class Planner:
627
598
  @classmethod
628
599
  def create_add_column_plan(
629
600
  cls, tbl: catalog.TableVersionPath, col: catalog.Column
630
- ) -> Tuple[exec.ExecNode, Optional[int], Optional[int]]:
601
+ ) -> Tuple[exec.ExecNode, Optional[int]]:
631
602
  """Creates a plan for InsertableTable.add_column()
632
603
  Returns:
633
604
  plan: the plan to execute
634
- ctx: the context to use for the plan
635
605
  value_expr slot idx for the plan output (for computed cols)
636
- embedding slot idx for the plan output (for indexed image cols)
637
606
  """
638
607
  assert isinstance(tbl, catalog.TableVersionPath)
639
608
  index_info: List[Tuple[catalog.Column, func.Function]] = []
640
- if col.is_indexed:
641
- from pixeltable.functions.nos.image_embedding import openai_clip
642
- index_info = [(col, openai_clip)]
643
- row_builder = exprs.RowBuilder(
644
- output_exprs=[], columns=[col], indices=index_info, input_exprs=[])
609
+ row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
645
610
  analyzer = Analyzer(tbl, row_builder.default_eval_ctx.target_exprs)
646
611
  plan = cls._create_query_plan(tbl, row_builder=row_builder, analyzer=analyzer, with_pk=True)
647
612
  plan.ctx.batch_size = 16
@@ -651,6 +616,5 @@ class Planner:
651
616
  # we want to flush images
652
617
  if col.is_computed and col.is_stored and col.col_type.is_image_type():
653
618
  plan.set_stored_img_cols(row_builder.output_slot_idxs())
654
- value_expr_slot_idx: Optional[int] = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
655
- embedding_slot_idx: Optional[int] = row_builder.index_slot_idxs()[0].slot_idx if col.is_indexed else None
656
- return plan, value_expr_slot_idx, embedding_slot_idx
619
+ value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
620
+ return plan, value_expr_slot_idx
pixeltable/store.py CHANGED
@@ -38,7 +38,7 @@ class StoreBase:
38
38
  self.tbl_version = tbl_version
39
39
  self.sa_md = sql.MetaData()
40
40
  self.sa_tbl: Optional[sql.Table] = None
41
- self._create_sa_tbl()
41
+ self.create_sa_tbl()
42
42
 
43
43
  def pk_columns(self) -> List[sql.Column]:
44
44
  return self._pk_columns
@@ -62,7 +62,7 @@ class StoreBase:
62
62
  return [*rowid_cols, self.v_min_col, self.v_max_col]
63
63
 
64
64
 
65
- def _create_sa_tbl(self) -> None:
65
+ def create_sa_tbl(self) -> None:
66
66
  """Create self.sa_tbl from self.tbl_version."""
67
67
  system_cols = self._create_system_columns()
68
68
  all_cols = system_cols.copy()
@@ -76,9 +76,6 @@ class StoreBase:
76
76
  all_cols.append(col.sa_errormsg_col)
77
77
  all_cols.append(col.sa_errortype_col)
78
78
 
79
- if col.is_indexed:
80
- all_cols.append(col.sa_idx_col)
81
-
82
79
  # we create an index for:
83
80
  # - scalar columns (except for strings, because long strings can't be used for B-tree indices)
84
81
  # - non-computed video and image columns (they will contain external paths/urls that users might want to
@@ -145,8 +142,8 @@ class StoreBase:
145
142
  """Move tmp media files that we generated to a permanent location"""
146
143
  for c in media_cols:
147
144
  for table_row in table_rows:
148
- file_url = table_row[c.storage_name()]
149
- table_row[c.storage_name()] = self._move_tmp_media_file(file_url, c, v_min)
145
+ file_url = table_row[c.store_name()]
146
+ table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
150
147
 
151
148
  def _create_table_row(
152
149
  self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, media_cols: List[catalog.Column],
@@ -168,16 +165,19 @@ class StoreBase:
168
165
 
169
166
  return table_row, num_excs
170
167
 
171
- def count(self) -> None:
168
+ def count(self, conn: Optional[sql.engine.Connection] = None) -> int:
172
169
  """Return the number of rows visible in self.tbl_version"""
173
170
  stmt = sql.select(sql.func.count('*'))\
174
171
  .select_from(self.sa_tbl)\
175
172
  .where(self.v_min_col <= self.tbl_version.version)\
176
173
  .where(self.v_max_col > self.tbl_version.version)
177
- with env.Env.get().engine.begin() as conn:
174
+ if conn is None:
175
+ with env.Env.get().engine.connect() as conn:
176
+ result = conn.execute(stmt).scalar_one()
177
+ else:
178
178
  result = conn.execute(stmt).scalar_one()
179
- assert isinstance(result, int)
180
- return result
179
+ assert isinstance(result, int)
180
+ return result
181
181
 
182
182
  def create(self, conn: sql.engine.Connection) -> None:
183
183
  self.sa_md.create_all(bind=conn)
@@ -193,38 +193,35 @@ class StoreBase:
193
193
  message).
194
194
  """
195
195
  assert col.is_stored
196
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.storage_name()} {col.col_type.to_sql()}')
196
+ col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
197
+ stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL')
197
198
  log_stmt(_logger, stmt)
198
199
  conn.execute(stmt)
199
- added_storage_cols = [col.storage_name()]
200
+ added_storage_cols = [col.store_name()]
200
201
  if col.records_errors:
201
202
  # we also need to create the errormsg and errortype storage cols
202
203
  stmt = (f'ALTER TABLE {self._storage_name()} '
203
- f'ADD COLUMN {col.errormsg_storage_name()} {StringType().to_sql()} DEFAULT NULL')
204
+ f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
204
205
  conn.execute(sql.text(stmt))
205
206
  stmt = (f'ALTER TABLE {self._storage_name()} '
206
- f'ADD COLUMN {col.errortype_storage_name()} {StringType().to_sql()} DEFAULT NULL')
207
+ f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
207
208
  conn.execute(sql.text(stmt))
208
- added_storage_cols.extend([col.errormsg_storage_name(), col.errortype_storage_name()])
209
- self._create_sa_tbl()
209
+ added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
210
+ self.create_sa_tbl()
210
211
  _logger.info(f'Added columns {added_storage_cols} to storage table {self._storage_name()}')
211
212
 
212
- def drop_column(self, col: Optional[catalog.Column] = None, conn: Optional[sql.engine.Connection] = None) -> None:
213
- """Re-create self.sa_tbl and drop column, if one is given"""
214
- if col is not None:
215
- assert conn is not None
216
- stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.storage_name()}'
213
+ def drop_column(self, col: catalog.Column, conn: sql.engine.Connection) -> None:
214
+ """Execute Alter Table Drop Column statement"""
215
+ stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
216
+ conn.execute(sql.text(stmt))
217
+ if col.records_errors:
218
+ stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_store_name()}'
219
+ conn.execute(sql.text(stmt))
220
+ stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_store_name()}'
217
221
  conn.execute(sql.text(stmt))
218
- if col.records_errors:
219
- stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_storage_name()}'
220
- conn.execute(sql.text(stmt))
221
- stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_storage_name()}'
222
- conn.execute(sql.text(stmt))
223
- self._create_sa_tbl()
224
222
 
225
223
  def load_column(
226
- self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, embedding_slot_idx: int,
227
- conn: sql.engine.Connection
224
+ self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, conn: sql.engine.Connection
228
225
  ) -> int:
229
226
  """Update store column of a computed column with values produced by an execution plan
230
227
 
@@ -253,18 +250,11 @@ class StoreBase:
253
250
  col.sa_errormsg_col: error_msg
254
251
  }
255
252
  else:
256
- val = result_row.get_stored_val(value_expr_slot_idx)
253
+ val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
257
254
  if col.col_type.is_media_type():
258
255
  val = self._move_tmp_media_file(val, col, result_row.pk[-1])
259
256
  values_dict = {col.sa_col: val}
260
257
 
261
- if col.is_indexed:
262
- # TODO: deal with exceptions
263
- assert not result_row.has_exc(embedding_slot_idx)
264
- # don't use get_stored_val() here, we need to pass the ndarray
265
- embedding = result_row[embedding_slot_idx]
266
- values_dict[col.sa_index_col] = embedding
267
-
268
258
  update_stmt = sql.update(self.sa_tbl).values(values_dict)
269
259
  for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
270
260
  update_stmt = update_stmt.where(pk_col == pk_val)
@@ -337,6 +327,7 @@ class StoreBase:
337
327
  self, current_version: int, base_versions: List[Optional[int]], match_on_vmin: bool,
338
328
  where_clause: Optional[sql.ClauseElement], conn: sql.engine.Connection) -> int:
339
329
  """Mark rows as deleted that are live and were created prior to current_version.
330
+ Also: populate the undo columns
340
331
  Args:
341
332
  base_versions: if non-None, join only to base rows that were created at that version,
342
333
  otherwise join to rows that are live in the base's current version (which is distinct from the
@@ -354,8 +345,14 @@ class StoreBase:
354
345
  rowid_join_clause = self._rowid_join_predicate()
355
346
  base_versions_clause = sql.true() if len(base_versions) == 0 \
356
347
  else self.base._versions_clause(base_versions, match_on_vmin)
348
+ set_clause = {self.v_max_col: current_version}
349
+ for index_info in self.tbl_version.idxs_by_name.values():
350
+ # copy value column to undo column
351
+ set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
352
+ # set value column to NULL
353
+ set_clause[index_info.val_col.sa_col] = None
357
354
  stmt = sql.update(self.sa_tbl) \
358
- .values({self.v_max_col: current_version}) \
355
+ .values(set_clause) \
359
356
  .where(where_clause) \
360
357
  .where(rowid_join_clause) \
361
358
  .where(base_versions_clause)
@@ -416,8 +413,8 @@ class StoreComponentView(StoreView):
416
413
  self.rowid_cols.append(self.pos_col)
417
414
  return self.rowid_cols
418
415
 
419
- def _create_sa_tbl(self) -> None:
420
- super()._create_sa_tbl()
416
+ def create_sa_tbl(self) -> None:
417
+ super().create_sa_tbl()
421
418
  # we need to fix up the 'pos' column in TableVersion
422
419
  self.tbl_version.cols_by_name['pos'].sa_col = self.pos_col
423
420
 
@@ -11,6 +11,7 @@ import toml
11
11
  import pixeltable as pxt
12
12
  import pixeltable.metadata as metadata
13
13
  from pixeltable.env import Env
14
+ from pixeltable.func import Batch
14
15
  from pixeltable.type_system import \
15
16
  StringType, IntType, FloatType, BoolType, TimestampType, JsonType
16
17
 
@@ -29,9 +30,7 @@ class Dumper:
29
30
  os.environ['PIXELTABLE_DB'] = db_name
30
31
  os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
31
32
 
32
- Env.get().set_up(reinit_db=True)
33
- self.cl = pxt.Client()
34
- self.cl.logging(level=logging.DEBUG, to_stdout=True)
33
+ Env.get().configure_logging(level=logging.DEBUG, to_stdout=True)
35
34
 
36
35
  def dump_db(self) -> None:
37
36
  md_version = metadata.VERSION
@@ -76,8 +75,18 @@ class Dumper:
76
75
  'c6': JsonType(nullable=False),
77
76
  'c7': JsonType(nullable=False),
78
77
  }
79
- t = self.cl.create_table('sample_table', schema, primary_key='c2')
78
+ t = pxt.create_table('sample_table', schema, primary_key='c2')
79
+
80
+ # Add columns for InlineArray and InlineDict
80
81
  t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
82
+ t.add_column(c9=[['a', 'b', 'c'], ['d', 'e', 'f']])
83
+ t.add_column(c10=[t.c1, [t.c1n, t.c2]])
84
+ t.add_column(c11={'int': 22, 'dict': {'key': 'val'}, 'expr': t.c1})
85
+
86
+ # InPredicate
87
+ t.add_column(isin_1=t.c1.isin(['test string 1', 'test string 2', 'test string 3']))
88
+ t.add_column(isin_2=t.c2.isin([1, 2, 3, 4, 5]))
89
+ t.add_column(isin_3=t.c2.isin(t.c6.f5))
81
90
 
82
91
  # Add columns for .astype converters to ensure they're persisted properly
83
92
  t.add_column(c2_as_float=t.c2.astype(FloatType()))
@@ -136,6 +145,32 @@ class Dumper:
136
145
  for i in range(num_rows)
137
146
  ]
138
147
  t.insert(rows)
148
+ pxt.create_dir('views')
149
+ v = pxt.create_view('views.sample_view', t, filter=(t.c2 < 50))
150
+ _ = pxt.create_view('views.sample_snapshot', t, filter=(t.c2 >= 75), is_snapshot=True)
151
+ e = pxt.create_view('views.empty_view', t, filter=t.c2 == 4171780)
152
+ assert e.count() == 0
153
+ # Computed column using a library function
154
+ v['str_format'] = pxt.functions.string.str_format('{0} {key}', t.c1, key=t.c1)
155
+ # Computed column using a bespoke stored udf
156
+ v['test_udf'] = test_udf_stored(t.c2)
157
+ # Computed column using a batched function
158
+ # (apply this to the empty view, since it's a "heavyweight" function)
159
+ e['batched'] = pxt.functions.huggingface.clip_text(t.c1, model_id='openai/clip-vit-base-patch32')
160
+ # computed column using a stored batched function
161
+ v['test_udf_batched'] = test_udf_stored_batched(t.c1, upper=False)
162
+ # astype
163
+ v['astype'] = t.c1.astype(pxt.FloatType())
164
+
165
+
166
+ @pxt.udf(_force_stored=True)
167
+ def test_udf_stored(n: int) -> int:
168
+ return n + 1
169
+
170
+
171
+ @pxt.udf(batch_size=4, _force_stored=True)
172
+ def test_udf_stored_batched(strings: Batch[str], *, upper: bool = True) -> Batch[str]:
173
+ return [string.upper() if upper else string.lower() for string in strings]
139
174
 
140
175
 
141
176
  def main() -> None: