pixeltable 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/column.py +6 -3
  3. pixeltable/catalog/dir.py +1 -1
  4. pixeltable/catalog/globals.py +15 -6
  5. pixeltable/catalog/insertable_table.py +23 -8
  6. pixeltable/catalog/named_function.py +1 -1
  7. pixeltable/catalog/path_dict.py +4 -4
  8. pixeltable/catalog/schema_object.py +30 -18
  9. pixeltable/catalog/table.py +87 -104
  10. pixeltable/catalog/table_version.py +35 -24
  11. pixeltable/catalog/table_version_path.py +2 -2
  12. pixeltable/catalog/view.py +15 -8
  13. pixeltable/dataframe.py +56 -56
  14. pixeltable/env.py +10 -9
  15. pixeltable/exec/__init__.py +3 -3
  16. pixeltable/exec/aggregation_node.py +3 -3
  17. pixeltable/exec/expr_eval_node.py +3 -3
  18. pixeltable/exec/in_memory_data_node.py +4 -4
  19. pixeltable/exec/sql_node.py +4 -1
  20. pixeltable/exprs/arithmetic_expr.py +41 -16
  21. pixeltable/exprs/array_slice.py +3 -4
  22. pixeltable/exprs/column_ref.py +20 -4
  23. pixeltable/exprs/comparison.py +11 -6
  24. pixeltable/exprs/data_row.py +3 -0
  25. pixeltable/exprs/expr.py +88 -23
  26. pixeltable/exprs/function_call.py +12 -1
  27. pixeltable/exprs/globals.py +3 -1
  28. pixeltable/exprs/inline_array.py +4 -4
  29. pixeltable/exprs/json_path.py +36 -20
  30. pixeltable/exprs/row_builder.py +4 -4
  31. pixeltable/exprs/rowid_ref.py +1 -1
  32. pixeltable/functions/__init__.py +1 -2
  33. pixeltable/functions/audio.py +32 -0
  34. pixeltable/functions/huggingface.py +4 -4
  35. pixeltable/functions/image.py +1 -1
  36. pixeltable/functions/json.py +46 -0
  37. pixeltable/functions/video.py +5 -1
  38. pixeltable/functions/{eval.py → vision.py} +166 -27
  39. pixeltable/globals.py +57 -28
  40. pixeltable/io/external_store.py +6 -6
  41. pixeltable/io/globals.py +13 -14
  42. pixeltable/io/label_studio.py +6 -6
  43. pixeltable/io/pandas.py +60 -19
  44. pixeltable/io/parquet.py +14 -14
  45. pixeltable/iterators/document.py +7 -7
  46. pixeltable/iterators/video.py +55 -23
  47. pixeltable/plan.py +58 -29
  48. pixeltable/store.py +97 -59
  49. pixeltable/tool/create_test_db_dump.py +17 -11
  50. pixeltable/type_system.py +155 -143
  51. pixeltable/utils/pytorch.py +12 -10
  52. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
  53. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/RECORD +56 -54
  54. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
  55. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
  56. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import abc
4
+ import itertools
3
5
  import json
4
6
  import logging
5
7
  from pathlib import Path
6
- from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type, Literal
8
+ from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
7
9
  from uuid import UUID
8
- import abc
9
10
 
10
11
  import pandas as pd
11
12
  import sqlalchemy as sql
@@ -18,8 +19,9 @@ import pixeltable.exprs as exprs
18
19
  import pixeltable.index as index
19
20
  import pixeltable.metadata.schema as schema
20
21
  import pixeltable.type_system as ts
22
+
21
23
  from .column import Column
22
- from .globals import _ROWID_COLUMN_NAME, is_valid_identifier, is_system_column_name, UpdateStatus
24
+ from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
23
25
  from .schema_object import SchemaObject
24
26
  from .table_version import TableVersion
25
27
  from .table_version_path import TableVersionPath
@@ -27,7 +29,7 @@ from .table_version_path import TableVersionPath
27
29
  _logger = logging.getLogger('pixeltable')
28
30
 
29
31
  class Table(SchemaObject):
30
- """Base class for all tabular SchemaObjects."""
32
+ """Base class for table objects (base tables, views, snapshots)."""
31
33
 
32
34
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
33
35
  super().__init__(id, name, dir_id)
@@ -46,7 +48,18 @@ class Table(SchemaObject):
46
48
  f"WHERE {schema.Table.id.name} = :id"))
47
49
  conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
48
50
 
49
- def version(self) -> int:
51
+ def get_metadata(self) -> dict[str, Any]:
52
+ md = super().get_metadata()
53
+ md['base'] = self._base._path if self._base is not None else None
54
+ md['schema'] = self._schema
55
+ md['version'] = self._version
56
+ md['schema_version'] = self._tbl_version.schema_version
57
+ md['comment'] = self._comment
58
+ md['num_retained_versions'] = self._num_retained_versions
59
+ return md
60
+
61
+ @property
62
+ def _version(self) -> int:
50
63
  """Return the version of this table. Used by tests to ascertain version changes."""
51
64
  return self._tbl_version.version
52
65
 
@@ -60,7 +73,7 @@ class Table(SchemaObject):
60
73
 
61
74
  def _check_is_dropped(self) -> None:
62
75
  if self._is_dropped:
63
- raise excs.Error(f'{self.display_name()} {self.name} has been dropped')
76
+ raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
64
77
 
65
78
  def __getattr__(
66
79
  self, name: str
@@ -74,7 +87,7 @@ class Table(SchemaObject):
74
87
  def __getitem__(
75
88
  self, index: object
76
89
  ) -> Union[
77
- 'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame'
90
+ 'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.DataFrame'
78
91
  ]:
79
92
  """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
80
93
  """
@@ -90,10 +103,10 @@ class Table(SchemaObject):
90
103
  recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
91
104
  all sub-views (including views of views, etc.)
92
105
  """
93
- return [t.path for t in self._get_views(recursive=recursive)]
106
+ return [t._path for t in self._get_views(recursive=recursive)]
94
107
 
95
108
  def _get_views(self, *, recursive: bool = True) -> list['Table']:
96
- dependents = catalog.Catalog.get().tbl_dependents[self._get_id()]
109
+ dependents = catalog.Catalog.get().tbl_dependents[self._id]
97
110
  if recursive:
98
111
  return dependents + [t for view in dependents for t in view._get_views(recursive=True)]
99
112
  else:
@@ -106,33 +119,30 @@ class Table(SchemaObject):
106
119
  from pixeltable.dataframe import DataFrame
107
120
  return DataFrame(self._tbl_version_path)
108
121
 
109
- def select(self, *items: Any, **named_items: Any) -> 'pixeltable.dataframe.DataFrame':
110
- """Return a DataFrame for this table.
111
- """
122
+ def select(self, *items: Any, **named_items: Any) -> 'pixeltable.DataFrame':
123
+ """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
112
124
  # local import: avoid circular imports
113
125
  from pixeltable.dataframe import DataFrame
114
126
  return DataFrame(self._tbl_version_path).select(*items, **named_items)
115
127
 
116
- def where(self, pred: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
117
- """Return a DataFrame for this table.
118
- """
128
+ def where(self, pred: 'exprs.Expr') -> 'pixeltable.DataFrame':
129
+ """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
119
130
  # local import: avoid circular imports
120
131
  from pixeltable.dataframe import DataFrame
121
132
  return DataFrame(self._tbl_version_path).where(pred)
122
133
 
123
- def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.dataframe.DataFrame':
124
- """Return a DataFrame for this table.
125
- """
134
+ def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.DataFrame':
135
+ """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
126
136
  # local import: avoid circular imports
127
137
  from pixeltable.dataframe import DataFrame
128
138
  return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
129
139
 
130
- def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
131
- """Return a DataFrame for this table."""
140
+ def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.DataFrame':
141
+ """Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
132
142
  from pixeltable.dataframe import DataFrame
133
143
  return DataFrame(self._tbl_version_path).group_by(*items)
134
144
 
135
- def limit(self, n: int) -> 'pixeltable.dataframe.DataFrame':
145
+ def limit(self, n: int) -> 'pixeltable.DataFrame':
136
146
  from pixeltable.dataframe import DataFrame
137
147
  return DataFrame(self._tbl_version_path).limit(n)
138
148
 
@@ -163,20 +173,18 @@ class Table(SchemaObject):
163
173
  """Return the number of rows in this table."""
164
174
  return self._df().count()
165
175
 
166
- def column_names(self) -> list[str]:
167
- """Return the names of the columns in this table."""
168
- return [c.name for c in self._tbl_version_path.columns()]
169
-
170
- def column_types(self) -> dict[str, ts.ColumnType]:
171
- """Return the names of the columns in this table."""
176
+ @property
177
+ def _schema(self) -> dict[str, ts.ColumnType]:
178
+ """Return the schema (column names and column types) of this table."""
172
179
  return {c.name: c.col_type for c in self._tbl_version_path.columns()}
173
180
 
174
- def query_names(self) -> list[str]:
181
+ @property
182
+ def _query_names(self) -> list[str]:
175
183
  """Return the names of the registered queries for this table."""
176
184
  return list(self._queries.keys())
177
185
 
178
186
  @property
179
- def base(self) -> Optional['Table']:
187
+ def _base(self) -> Optional['Table']:
180
188
  """
181
189
  The base table of this `Table`. If this table is a view, returns the `Table`
182
190
  from which it was derived. Otherwise, returns `None`.
@@ -187,21 +195,13 @@ class Table(SchemaObject):
187
195
  return catalog.Catalog.get().tbls[base_id]
188
196
 
189
197
  @property
190
- def comment(self) -> str:
198
+ def _comment(self) -> str:
191
199
  return self._tbl_version.comment
192
200
 
193
- @comment.setter
194
- def comment(self, new_comment: Optional[str]):
195
- self._tbl_version.set_comment(new_comment)
196
-
197
201
  @property
198
- def num_retained_versions(self):
202
+ def _num_retained_versions(self):
199
203
  return self._tbl_version.num_retained_versions
200
204
 
201
- @num_retained_versions.setter
202
- def num_retained_versions(self, new_num_retained_versions: int):
203
- self._tbl_version.set_num_retained_versions(new_num_retained_versions)
204
-
205
205
  def _description(self) -> pd.DataFrame:
206
206
  cols = self._tbl_version_path.columns()
207
207
  df = pd.DataFrame({
@@ -233,11 +233,11 @@ class Table(SchemaObject):
233
233
  # TODO: Display comments in _repr_html()
234
234
  def __repr__(self) -> str:
235
235
  description_str = self._description().to_string(index=False)
236
- if self.comment is None:
236
+ if self._comment is None:
237
237
  comment = ''
238
238
  else:
239
- comment = f'{self.comment}\n'
240
- return f'{self.display_name()} \'{self._name}\'\n{comment}{description_str}'
239
+ comment = f'{self._comment}\n'
240
+ return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
241
241
 
242
242
  def _repr_html_(self) -> str:
243
243
  return self._description_html()._repr_html_()
@@ -266,51 +266,34 @@ class Table(SchemaObject):
266
266
  from pixeltable.dataframe import DataFrame
267
267
  return DataFrame(self._tbl_version_path).to_coco_dataset()
268
268
 
269
- def __setitem__(self, column_name: str, value: Union[ts.ColumnType, exprs.Expr, Callable, dict]) -> None:
270
- """Adds a column to the table
271
- Args:
272
- column_name: the name of the new column
273
- value: column type or value expression or column specification dictionary:
274
- column type: a Pixeltable column type (if the table already contains rows, it must be nullable)
275
- value expression: a Pixeltable expression that computes the column values
276
- column specification: a dictionary with possible keys 'type', 'value', 'stored'
277
- Examples:
278
- Add an int column with ``None`` values:
279
-
280
- >>> tbl['new_col'] = IntType(nullable=True)
281
-
282
- For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
283
- the parameters of the Callable must correspond to existing column names (the column values are then passed
284
- as arguments to the Callable). In this case, the return type cannot be inferred and needs to be specified
285
- explicitly:
286
-
287
- >>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
269
+ def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
270
+ """
271
+ Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
288
272
 
289
- For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
290
- 90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
291
- (by default, computed image columns are not stored but recomputed on demand):
273
+ >>> tbl['new_col'] = IntType()
292
274
 
293
- >>> tbl['rotated'] = tbl.frame.rotate(90)
275
+ is exactly equivalent to
294
276
 
295
- Do the same, but now the column is stored:
277
+ >>> tbl.add_column(new_col=IntType())
296
278
 
297
- >>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
279
+ For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
298
280
  """
299
- if not isinstance(column_name, str):
300
- raise excs.Error(f'Column name must be a string, got {type(column_name)}')
301
- if not is_valid_identifier(column_name):
302
- raise excs.Error(f'Invalid column name: {column_name!r}')
303
-
304
- new_col = self._create_columns({column_name: value})[0]
305
- self._verify_column(new_col, self.column_names(), self.query_names())
306
- return self._tbl_version.add_column(new_col)
281
+ if not isinstance(col_name, str):
282
+ raise excs.Error(f'Column name must be a string, got {type(col_name)}')
283
+ if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
284
+ raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
285
+ self.add_column(**{col_name: spec})
307
286
 
308
287
  def add_column(
309
- self, *,
310
- type: Optional[ts.ColumnType] = None, stored: Optional[bool] = None, print_stats: bool = False,
311
- **kwargs: Any
288
+ self,
289
+ *,
290
+ type: Optional[ts.ColumnType] = None,
291
+ stored: Optional[bool] = None,
292
+ print_stats: bool = False,
293
+ **kwargs: Union[ts.ColumnType, exprs.Expr, Callable]
312
294
  ) -> UpdateStatus:
313
- """Adds a column to the table.
295
+ """
296
+ Adds a column to the table.
314
297
 
315
298
  Args:
316
299
  kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
@@ -364,19 +347,20 @@ class Table(SchemaObject):
364
347
  self._check_is_dropped()
365
348
  # verify kwargs and construct column schema dict
366
349
  if len(kwargs) != 1:
367
- raise excs.Error((
368
- f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression", '
350
+ raise excs.Error(
351
+ f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
369
352
  f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
370
- ))
353
+ )
371
354
  col_name, spec = next(iter(kwargs.items()))
355
+ if not is_valid_identifier(col_name):
356
+ raise excs.Error(f'Invalid column name: {col_name!r}')
357
+ if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
358
+ raise excs.Error(f'add_column(): keyword argument "type" is redundant')
359
+
372
360
  col_schema: dict[str, Any] = {}
373
361
  if isinstance(spec, ts.ColumnType):
374
- if type is not None:
375
- raise excs.Error(f'add_column(): keyword argument "type" is redundant')
376
362
  col_schema['type'] = spec
377
363
  else:
378
- if isinstance(spec, exprs.Expr) and type is not None:
379
- raise excs.Error(f'add_column(): keyword argument "type" is redundant')
380
364
  col_schema['value'] = spec
381
365
  if type is not None:
382
366
  col_schema['type'] = type
@@ -384,7 +368,7 @@ class Table(SchemaObject):
384
368
  col_schema['stored'] = stored
385
369
 
386
370
  new_col = self._create_columns({col_name: col_schema})[0]
387
- self._verify_column(new_col, self.column_names(), self.query_names())
371
+ self._verify_column(new_col, set(self._schema.keys()), self._query_names)
388
372
  return self._tbl_version.add_column(new_col, print_stats=print_stats)
389
373
 
390
374
  @classmethod
@@ -434,8 +418,8 @@ class Table(SchemaObject):
434
418
  for name, spec in schema.items():
435
419
  col_type: Optional[ts.ColumnType] = None
436
420
  value_expr: Optional[exprs.Expr] = None
437
- stored: Optional[bool] = None
438
421
  primary_key: Optional[bool] = None
422
+ stored = True
439
423
 
440
424
  if isinstance(spec, ts.ColumnType):
441
425
  # TODO: create copy
@@ -455,7 +439,7 @@ class Table(SchemaObject):
455
439
  if value_expr is not None and isinstance(value_expr, exprs.Expr):
456
440
  # create copy so we can modify it
457
441
  value_expr = value_expr.copy()
458
- stored = spec.get('stored')
442
+ stored = spec.get('stored', True)
459
443
  primary_key = spec.get('primary_key')
460
444
 
461
445
  column = Column(
@@ -469,7 +453,7 @@ class Table(SchemaObject):
469
453
  ) -> None:
470
454
  """Check integrity of user-supplied Column and supply defaults"""
471
455
  if is_system_column_name(col.name):
472
- raise excs.Error(f'Column name {col.name!r} is reserved')
456
+ raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
473
457
  if not is_valid_identifier(col.name):
474
458
  raise excs.Error(f"Invalid column name: {col.name!r}")
475
459
  if col.name in existing_column_names:
@@ -478,12 +462,10 @@ class Table(SchemaObject):
478
462
  raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
479
463
  if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
480
464
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
481
- if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
465
+ if col.stored is False and col.has_window_fn_call():
482
466
  raise excs.Error((
483
467
  f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
484
468
  f'function'))
485
- if col.stored is None:
486
- col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
487
469
 
488
470
  @classmethod
489
471
  def _verify_schema(cls, schema: list[Column]) -> None:
@@ -530,7 +512,7 @@ class Table(SchemaObject):
530
512
  ]
531
513
  if len(dependent_stores) > 0:
532
514
  dependent_store_names = [
533
- store.name if view._get_id() == self._get_id() else f'{store.name} (in view `{view.name}`)'
515
+ store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
534
516
  for view, store in dependent_stores
535
517
  ]
536
518
  raise excs.Error(
@@ -594,6 +576,7 @@ class Table(SchemaObject):
594
576
  if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
595
577
  raise excs.Error(f'Duplicate index name: {idx_name}')
596
578
  from pixeltable.index import EmbeddingIndex
579
+
597
580
  # create the EmbeddingIndex instance to verify args
598
581
  idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
599
582
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
@@ -850,7 +833,7 @@ class Table(SchemaObject):
850
833
  else:
851
834
  function_path = None
852
835
  query_name = py_fn.__name__
853
- if query_name in self.column_names():
836
+ if query_name in self._schema.keys():
854
837
  raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
855
838
  if query_name in self._queries:
856
839
  raise excs.Error(f'Duplicate query name: {query_name!r}')
@@ -879,13 +862,13 @@ class Table(SchemaObject):
879
862
  Links the specified `ExternalStore` to this table.
880
863
  """
881
864
  if self._tbl_version.is_snapshot:
882
- raise excs.Error(f'Table `{self.name}` is a snapshot, so it cannot be linked to an external store.')
865
+ raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
883
866
  self._check_is_dropped()
884
867
  if store.name in self.external_stores:
885
- raise excs.Error(f'Table `{self.name}` already has an external store with that name: {store.name}')
886
- _logger.info(f'Linking external store `{store.name}` to table `{self.name}`')
868
+ raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
869
+ _logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
887
870
  self._tbl_version.link_external_store(store)
888
- print(f'Linked external store `{store.name}` to table `{self.name}`.')
871
+ print(f'Linked external store `{store.name}` to table `{self._name}`.')
889
872
 
890
873
  def unlink_external_stores(
891
874
  self,
@@ -917,11 +900,11 @@ class Table(SchemaObject):
917
900
  if not ignore_errors:
918
901
  for store in stores:
919
902
  if store not in all_stores:
920
- raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
903
+ raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
921
904
 
922
905
  for store in stores:
923
906
  self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
924
- print(f'Unlinked external store from table `{self.name}`: {store}')
907
+ print(f'Unlinked external store from table `{self._name}`: {store}')
925
908
 
926
909
  def sync(
927
910
  self,
@@ -949,7 +932,7 @@ class Table(SchemaObject):
949
932
 
950
933
  for store in stores:
951
934
  if store not in all_stores:
952
- raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
935
+ raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
953
936
 
954
937
  from pixeltable.io import SyncStatus
955
938
 
@@ -962,7 +945,7 @@ class Table(SchemaObject):
962
945
  return sync_status
963
946
 
964
947
  def __dir__(self) -> list[str]:
965
- return list(super().__dir__()) + self.column_names() + self.query_names()
948
+ return list(super().__dir__()) + list(self._schema.keys()) + self._query_names
966
949
 
967
950
  def _ipython_key_completions_(self) -> list[str]:
968
- return self.column_names() + self.query_names()
951
+ return list(self._schema.keys()) + self._query_names
@@ -12,7 +12,7 @@ from uuid import UUID
12
12
  import sqlalchemy as sql
13
13
  import sqlalchemy.orm as orm
14
14
 
15
- import pixeltable
15
+ import pixeltable as pxt
16
16
  import pixeltable.exceptions as excs
17
17
  import pixeltable.exprs as exprs
18
18
  import pixeltable.func as func
@@ -24,7 +24,7 @@ from pixeltable.metadata import schema
24
24
  from pixeltable.utils.filecache import FileCache
25
25
  from pixeltable.utils.media_store import MediaStore
26
26
  from .column import Column
27
- from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
27
+ from .globals import UpdateStatus, _POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
28
28
  from ..func.globals import resolve_symbol
29
29
 
30
30
  _logger = logging.getLogger('pixeltable')
@@ -56,7 +56,7 @@ class TableVersion:
56
56
 
57
57
  def __init__(
58
58
  self, id: UUID, tbl_md: schema.TableMd, version: int, schema_version_md: schema.TableSchemaVersionMd,
59
- base: Optional[TableVersion] = None, base_path: Optional['pixeltable.catalog.TableVersionPath'] = None,
59
+ base: Optional[TableVersion] = None, base_path: Optional['pxt.catalog.TableVersionPath'] = None,
60
60
  is_snapshot: Optional[bool] = None
61
61
  ):
62
62
  # only one of base and base_path can be non-None
@@ -124,7 +124,7 @@ class TableVersion:
124
124
  self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
125
125
  self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
126
126
  self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
127
- self.external_stores: dict[str, pixeltable.io.ExternalStore] = {}
127
+ self.external_stores: dict[str, pxt.io.ExternalStore] = {}
128
128
 
129
129
  self._init_schema(tbl_md, schema_version_md)
130
130
 
@@ -145,7 +145,7 @@ class TableVersion:
145
145
  @classmethod
146
146
  def create(
147
147
  cls, session: orm.Session, dir_id: UUID, name: str, cols: List[Column], num_retained_versions: int,
148
- comment: str, base_path: Optional['pixeltable.catalog.TableVersionPath'] = None,
148
+ comment: str, base_path: Optional['pxt.catalog.TableVersionPath'] = None,
149
149
  view_md: Optional[schema.ViewMd] = None
150
150
  ) -> Tuple[UUID, Optional[TableVersion]]:
151
151
  # assign ids
@@ -636,14 +636,28 @@ class TableVersion:
636
636
  _logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
637
637
 
638
638
  def insert(
639
- self, rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception : bool = True
639
+ self,
640
+ rows: Optional[list[dict[str, Any]]],
641
+ df: Optional[pxt.DataFrame],
642
+ conn: Optional[sql.engine.Connection] = None,
643
+ print_stats: bool = False,
644
+ fail_on_exception: bool = True
640
645
  ) -> UpdateStatus:
641
- """Insert rows into this table.
642
646
  """
643
- assert self.is_insertable()
647
+ Insert rows into this table, either from an explicit list of dicts or from a `DataFrame`.
648
+ """
644
649
  from pixeltable.plan import Planner
645
- plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
646
- with Env.get().engine.begin() as conn:
650
+
651
+ assert self.is_insertable()
652
+ assert (rows is None) != (df is None) # Exactly one must be specified
653
+ if rows is not None:
654
+ plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
655
+ else:
656
+ plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
657
+ if conn is None:
658
+ with Env.get().engine.begin() as conn:
659
+ return self._insert(plan, conn, time.time(), print_stats)
660
+ else:
647
661
  return self._insert(plan, conn, time.time(), print_stats)
648
662
 
649
663
  def _insert(
@@ -739,7 +753,7 @@ class TableVersion:
739
753
  if error_if_not_exists:
740
754
  raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
741
755
  if insert_if_not_exists:
742
- insert_status = self.insert(unmatched_rows, print_stats=False, fail_on_exception=False)
756
+ insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
743
757
  result += insert_status
744
758
  return result
745
759
 
@@ -994,11 +1008,11 @@ class TableVersion:
994
1008
  def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
995
1009
  for store_md in tbl_md.external_stores:
996
1010
  store_cls = resolve_symbol(store_md['class'])
997
- assert isinstance(store_cls, type) and issubclass(store_cls, pixeltable.io.ExternalStore)
1011
+ assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
998
1012
  store = store_cls.from_dict(store_md['md'])
999
1013
  self.external_stores[store.name] = store
1000
1014
 
1001
- def link_external_store(self, store: pixeltable.io.ExternalStore) -> None:
1015
+ def link_external_store(self, store: pxt.io.ExternalStore) -> None:
1002
1016
  with Env.get().engine.begin() as conn:
1003
1017
  store.link(self, conn) # May result in additional metadata changes
1004
1018
  self.external_stores[store.name] = store
@@ -1012,7 +1026,7 @@ class TableVersion:
1012
1026
  del self.external_stores[store_name]
1013
1027
  self._update_md(time.time(), conn, update_tbl_version=False)
1014
1028
 
1015
- if delete_external_data and isinstance(store, pixeltable.io.external_store.Project):
1029
+ if delete_external_data and isinstance(store, pxt.io.external_store.Project):
1016
1030
  store.delete()
1017
1031
 
1018
1032
  def is_view(self) -> bool:
@@ -1032,7 +1046,7 @@ class TableVersion:
1032
1046
 
1033
1047
  def is_system_column(self, col: Column) -> bool:
1034
1048
  """Return True if column was created by Pixeltable"""
1035
- if col.name == POS_COLUMN_NAME and self.is_component_view():
1049
+ if col.name == _POS_COLUMN_NAME and self.is_component_view():
1036
1050
  return True
1037
1051
  return False
1038
1052
 
@@ -1056,7 +1070,7 @@ class TableVersion:
1056
1070
  return names
1057
1071
 
1058
1072
  @classmethod
1059
- def _create_value_expr(cls, col: Column, path: 'pixeltable.catalog.TableVersionPath') -> None:
1073
+ def _create_value_expr(cls, col: Column, path: 'pxt.catalog.TableVersionPath') -> None:
1060
1074
  """
1061
1075
  Create col.value_expr, given col.compute_func.
1062
1076
  Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
@@ -1093,16 +1107,13 @@ class TableVersion:
1093
1107
  result = {info.val_col for col in cols for info in col.get_idx_info().values()}
1094
1108
  return result
1095
1109
 
1096
- def get_dependent_columns(self, cols: list[Column]) -> set[Column]:
1110
+ def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
1097
1111
  """
1098
1112
  Return the set of columns that transitively depend on any of the given ones.
1099
1113
  """
1100
- if len(cols) == 0:
1101
- return set()
1102
- result: set[Column] = set()
1103
- for col in cols:
1104
- result.update(col.dependent_cols)
1105
- result.update(self.get_dependent_columns(result))
1114
+ result = {dependent_col for col in cols for dependent_col in col.dependent_cols}
1115
+ if len(result) > 0:
1116
+ result.update(self.get_dependent_columns(result))
1106
1117
  return result
1107
1118
 
1108
1119
  def num_rowid_columns(self) -> int:
@@ -1123,7 +1134,7 @@ class TableVersion:
1123
1134
  return column_md
1124
1135
 
1125
1136
  @classmethod
1126
- def _create_stores_md(cls, stores: Iterable['pixeltable.io.ExternalStore']) -> list[dict[str, Any]]:
1137
+ def _create_stores_md(cls, stores: Iterable['pxt.io.ExternalStore']) -> list[dict[str, Any]]:
1127
1138
  return [
1128
1139
  {
1129
1140
  'class': f'{type(store).__module__}.{type(store).__qualname__}',
@@ -6,7 +6,7 @@ from uuid import UUID
6
6
 
7
7
  import pixeltable
8
8
  from .column import Column
9
- from .globals import POS_COLUMN_NAME
9
+ from .globals import _POS_COLUMN_NAME
10
10
  from .table_version import TableVersion
11
11
 
12
12
  _logger = logging.getLogger('pixeltable')
@@ -83,7 +83,7 @@ class TableVersionPath:
83
83
  def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
84
84
  """Return a ColumnRef for the given column name."""
85
85
  from pixeltable.exprs import ColumnRef, RowidRef
86
- if col_name == POS_COLUMN_NAME and self.is_component_view():
86
+ if col_name == _POS_COLUMN_NAME and self.is_component_view():
87
87
  return RowidRef(self.tbl_version, self.tbl_version.store_tbl.pos_col_idx)
88
88
  if col_name not in self.tbl_version.cols_by_name:
89
89
  if self.base is None:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import logging
5
- from typing import Optional, Type, Dict, Set, Any, Iterable, TYPE_CHECKING
5
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Set, Type
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy.orm as orm
@@ -14,10 +14,11 @@ import pixeltable.metadata.schema as md_schema
14
14
  from pixeltable.env import Env
15
15
  from pixeltable.exceptions import Error
16
16
  from pixeltable.iterators import ComponentIterator
17
- from pixeltable.type_system import InvalidType, IntType
17
+ from pixeltable.type_system import IntType, InvalidType
18
+
18
19
  from .catalog import Catalog
19
20
  from .column import Column
20
- from .globals import POS_COLUMN_NAME, UpdateStatus
21
+ from .globals import _POS_COLUMN_NAME, UpdateStatus
21
22
  from .table import Table
22
23
  from .table_version import TableVersion
23
24
  from .table_version_path import TableVersionPath
@@ -45,11 +46,11 @@ class View(Table):
45
46
  self._snapshot_only = snapshot_only
46
47
 
47
48
  @classmethod
48
- def display_name(cls) -> str:
49
+ def _display_name(cls) -> str:
49
50
  return 'view'
50
51
 
51
52
  @classmethod
52
- def create(
53
+ def _create(
53
54
  cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
54
55
  predicate: 'pxt.exprs.Expr', is_snapshot: bool, num_retained_versions: int, comment: str,
55
56
  iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
@@ -100,7 +101,7 @@ class View(Table):
100
101
  # a component view exposes the pos column of its rowid;
101
102
  # we create that column here, so it gets assigned a column id;
102
103
  # stored=False: it is not stored separately (it's already stored as part of the rowid)
103
- iterator_cols = [Column(POS_COLUMN_NAME, IntType(), stored=False)]
104
+ iterator_cols = [Column(_POS_COLUMN_NAME, IntType(), stored=False)]
104
105
  output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
105
106
  iterator_cols.extend([
106
107
  Column(col_name, col_type, stored=col_name not in unstored_cols)
@@ -207,11 +208,17 @@ class View(Table):
207
208
  cat.tbl_dependents[self._base_id].remove(self)
208
209
  del cat.tbl_dependents[self._id]
209
210
 
211
+ def get_metadata(self) -> dict[str, Any]:
212
+ md = super().get_metadata()
213
+ md['is_view'] = True
214
+ md['is_snapshot'] = self._tbl_version_path.is_snapshot()
215
+ return md
216
+
210
217
  def insert(
211
218
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
212
219
  fail_on_exception: bool = True, **kwargs: Any
213
220
  ) -> UpdateStatus:
214
- raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
221
+ raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
215
222
 
216
223
  def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
217
- raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')
224
+ raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')