pixeltable 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (76) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +28 -16
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +298 -204
  10. pixeltable/catalog/table_version.py +104 -139
  11. pixeltable/catalog/table_version_path.py +22 -4
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +128 -25
  14. pixeltable/env.py +21 -14
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +78 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +50 -22
  50. pixeltable/{datatransfer → io}/label_studio.py +279 -166
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +40 -25
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +11 -8
  62. pixeltable/metadata/schema.py +3 -6
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +145 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/METADATA +2 -2
  70. pixeltable-0.2.9.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/remote.py +0 -113
  73. pixeltable/functions/pil/image.py +0 -147
  74. pixeltable-0.2.7.dist-info/RECORD +0 -126
  75. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/LICENSE +0 -0
  76. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/WHEEL +0 -0
@@ -1,11 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- import itertools
4
3
  import json
5
4
  import logging
6
5
  from pathlib import Path
7
- from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable, Type
6
+ from typing import Union, Any, Optional, Callable, Set, Tuple, Iterable, overload, Type
8
7
  from uuid import UUID
8
+ import abc
9
9
 
10
10
  import pandas as pd
11
11
  import sqlalchemy as sql
@@ -15,9 +15,9 @@ import pixeltable.catalog as catalog
15
15
  import pixeltable.env as env
16
16
  import pixeltable.exceptions as excs
17
17
  import pixeltable.exprs as exprs
18
+ import pixeltable.index as index
18
19
  import pixeltable.metadata.schema as schema
19
20
  import pixeltable.type_system as ts
20
- import pixeltable.index as index
21
21
  from .column import Column
22
22
  from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
23
23
  from .schema_object import SchemaObject
@@ -29,15 +29,17 @@ _logger = logging.getLogger('pixeltable')
29
29
  class Table(SchemaObject):
30
30
  """Base class for all tabular SchemaObjects."""
31
31
 
32
- ROWID_COLUMN_NAME = '_rowid'
32
+ __ROWID_COLUMN_NAME = '_rowid'
33
33
 
34
34
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
35
35
  super().__init__(id, name, dir_id)
36
- self.is_dropped = False
37
- self.tbl_version_path = tbl_version_path
36
+ self._is_dropped = False
37
+ self._tbl_version_path = tbl_version_path
38
+ from pixeltable.func import QueryTemplateFunction
39
+ self._queries: dict[str, QueryTemplateFunction] = {}
38
40
 
39
- def move(self, new_name: str, new_dir_id: UUID) -> None:
40
- super().move(new_name, new_dir_id)
41
+ def _move(self, new_name: str, new_dir_id: UUID) -> None:
42
+ super()._move(new_name, new_dir_id)
41
43
  with env.Env.get().engine.begin() as conn:
42
44
  stmt = sql.text((
43
45
  f"UPDATE {schema.Table.__table__} "
@@ -48,61 +50,85 @@ class Table(SchemaObject):
48
50
 
49
51
  def version(self) -> int:
50
52
  """Return the version of this table. Used by tests to ascertain version changes."""
51
- return self.tbl_version_path.tbl_version.version
53
+ return self._tbl_version.version
52
54
 
55
+ @property
53
56
  def _tbl_version(self) -> TableVersion:
54
57
  """Return TableVersion for just this table."""
55
- return self.tbl_version_path.tbl_version
58
+ return self._tbl_version_path.tbl_version
56
59
 
57
60
  def __hash__(self) -> int:
58
- return hash(self._tbl_version().id)
61
+ return hash(self._tbl_version.id)
59
62
 
60
63
  def _check_is_dropped(self) -> None:
61
- if self.is_dropped:
64
+ if self._is_dropped:
62
65
  raise excs.Error(f'{self.display_name()} {self.name} has been dropped')
63
66
 
64
- def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
65
- """Return a ColumnRef for the given column name.
67
+ def __getattr__(
68
+ self, name: str
69
+ ) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.func.QueryTemplateFunction']:
70
+ """Return a ColumnRef or QueryTemplateFunction for the given name.
66
71
  """
67
- return getattr(self.tbl_version_path, col_name)
72
+ if name in self._queries:
73
+ return self._queries[name]
74
+ return getattr(self._tbl_version_path, name)
75
+
76
+ def __getitem__(
77
+ self, index: object
78
+ ) -> Union[
79
+ 'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame'
80
+ ]:
81
+ """Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
82
+ """
83
+ if isinstance(index, str) and index in self._queries:
84
+ return self._queries[index]
85
+ return self._tbl_version_path.__getitem__(index)
68
86
 
69
- def __getitem__(self, index: object) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame']:
70
- """Return a ColumnRef for the given column name, or a DataFrame for the given slice.
87
+ def get_views(self, *, recursive: bool = False) -> list['Table']:
88
+ """
89
+ All views and snapshots of this `Table`.
71
90
  """
72
- return self.tbl_version_path.__getitem__(index)
91
+ if recursive:
92
+ return [self] + [t for view in self.get_views(recursive=False) for t in view.get_views(recursive=True)]
93
+ else:
94
+ return catalog.Catalog.get().tbl_dependents[self._get_id()]
73
95
 
74
96
  def df(self) -> 'pixeltable.dataframe.DataFrame':
75
97
  """Return a DataFrame for this table.
76
98
  """
77
99
  # local import: avoid circular imports
78
100
  from pixeltable.dataframe import DataFrame
79
- return DataFrame(self.tbl_version_path)
101
+ return DataFrame(self._tbl_version_path)
80
102
 
81
103
  def select(self, *items: Any, **named_items: Any) -> 'pixeltable.dataframe.DataFrame':
82
104
  """Return a DataFrame for this table.
83
105
  """
84
106
  # local import: avoid circular imports
85
107
  from pixeltable.dataframe import DataFrame
86
- return DataFrame(self.tbl_version_path).select(*items, **named_items)
108
+ return DataFrame(self._tbl_version_path).select(*items, **named_items)
87
109
 
88
110
  def where(self, pred: 'exprs.Predicate') -> 'pixeltable.dataframe.DataFrame':
89
111
  """Return a DataFrame for this table.
90
112
  """
91
113
  # local import: avoid circular imports
92
114
  from pixeltable.dataframe import DataFrame
93
- return DataFrame(self.tbl_version_path).where(pred)
115
+ return DataFrame(self._tbl_version_path).where(pred)
94
116
 
95
117
  def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.dataframe.DataFrame':
96
118
  """Return a DataFrame for this table.
97
119
  """
98
120
  # local import: avoid circular imports
99
121
  from pixeltable.dataframe import DataFrame
100
- return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
122
+ return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
101
123
 
102
124
  def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
103
125
  """Return a DataFrame for this table."""
104
126
  from pixeltable.dataframe import DataFrame
105
- return DataFrame(self.tbl_version_path).group_by(*items)
127
+ return DataFrame(self._tbl_version_path).group_by(*items)
128
+
129
+ def limit(self, n: int) -> 'pixeltable.dataframe.DataFrame':
130
+ from pixeltable.dataframe import DataFrame
131
+ return DataFrame(self._tbl_version_path).limit(n)
106
132
 
107
133
  def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
108
134
  """Return rows from this table."""
@@ -131,32 +157,47 @@ class Table(SchemaObject):
131
157
  """Return the number of rows in this table."""
132
158
  return self.df().count()
133
159
 
134
- def column_names(self) -> List[str]:
160
+ def column_names(self) -> list[str]:
135
161
  """Return the names of the columns in this table."""
136
- return [c.name for c in self.tbl_version_path.columns()]
162
+ return [c.name for c in self._tbl_version_path.columns()]
137
163
 
138
- def column_types(self) -> Dict[str, ts.ColumnType]:
164
+ def column_types(self) -> dict[str, ts.ColumnType]:
139
165
  """Return the names of the columns in this table."""
140
- return {c.name: c.col_type for c in self.tbl_version_path.columns()}
166
+ return {c.name: c.col_type for c in self._tbl_version_path.columns()}
167
+
168
+ def query_names(self) -> list[str]:
169
+ """Return the names of the registered queries for this table."""
170
+ return list(self._queries.keys())
171
+
172
+ @property
173
+ def base(self) -> Optional['Table']:
174
+ """
175
+ The base table of this `Table`. If this table is a view, returns the `Table`
176
+ from which it was derived. Otherwise, returns `None`.
177
+ """
178
+ if self._tbl_version_path.base is None:
179
+ return None
180
+ base_id = self._tbl_version_path.base.tbl_version.id
181
+ return catalog.Catalog.get().tbls[base_id]
141
182
 
142
183
  @property
143
184
  def comment(self) -> str:
144
- return self.tbl_version.comment
185
+ return self._tbl_version.comment
145
186
 
146
187
  @comment.setter
147
188
  def comment(self, new_comment: Optional[str]):
148
- self.tbl_version.set_comment(new_comment)
189
+ self._tbl_version.set_comment(new_comment)
149
190
 
150
191
  @property
151
192
  def num_retained_versions(self):
152
- return self.tbl_version.num_retained_versions
193
+ return self._tbl_version.num_retained_versions
153
194
 
154
195
  @num_retained_versions.setter
155
196
  def num_retained_versions(self, new_num_retained_versions: int):
156
- self.tbl_version.set_num_retained_versions(new_num_retained_versions)
197
+ self._tbl_version.set_num_retained_versions(new_num_retained_versions)
157
198
 
158
199
  def _description(self) -> pd.DataFrame:
159
- cols = self.tbl_version_path.columns()
200
+ cols = self._tbl_version_path.columns()
160
201
  df = pd.DataFrame({
161
202
  'Column Name': [c.name for c in cols],
162
203
  'Type': [str(c.col_type) for c in cols],
@@ -173,6 +214,9 @@ class Table(SchemaObject):
173
214
  .hide(axis='index')
174
215
 
175
216
  def describe(self) -> None:
217
+ """
218
+ Print the table schema.
219
+ """
176
220
  try:
177
221
  __IPYTHON__
178
222
  from IPython.display import display
@@ -194,8 +238,8 @@ class Table(SchemaObject):
194
238
 
195
239
  def _drop(self) -> None:
196
240
  self._check_is_dropped()
197
- self.tbl_version_path.tbl_version.drop()
198
- self.is_dropped = True
241
+ self._tbl_version.drop()
242
+ self._is_dropped = True
199
243
  # update catalog
200
244
  cat = catalog.Catalog.get()
201
245
  del cat.tbls[self._id]
@@ -207,14 +251,14 @@ class Table(SchemaObject):
207
251
  See DataFrame.to_pytorch_dataset()
208
252
  """
209
253
  from pixeltable.dataframe import DataFrame
210
- return DataFrame(self.tbl_version_path).to_pytorch_dataset(image_format=image_format)
254
+ return DataFrame(self._tbl_version_path).to_pytorch_dataset(image_format=image_format)
211
255
 
212
256
  def to_coco_dataset(self) -> Path:
213
257
  """Return the path to a COCO json file for this table.
214
258
  See DataFrame.to_coco_dataset()
215
259
  """
216
260
  from pixeltable.dataframe import DataFrame
217
- return DataFrame(self.tbl_version_path).to_coco_dataset()
261
+ return DataFrame(self._tbl_version_path).to_coco_dataset()
218
262
 
219
263
  def __setitem__(self, column_name: str, value: Union[ts.ColumnType, exprs.Expr, Callable, dict]) -> None:
220
264
  """Adds a column to the table
@@ -252,8 +296,8 @@ class Table(SchemaObject):
252
296
  raise excs.Error(f'Invalid column name: {column_name!r}')
253
297
 
254
298
  new_col = self._create_columns({column_name: value})[0]
255
- self._verify_column(new_col, self.column_names())
256
- return self.tbl_version_path.tbl_version.add_column(new_col)
299
+ self._verify_column(new_col, self.column_names(), self.query_names())
300
+ return self._tbl_version.add_column(new_col)
257
301
 
258
302
  def add_column(
259
303
  self, *,
@@ -319,7 +363,7 @@ class Table(SchemaObject):
319
363
  f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
320
364
  ))
321
365
  col_name, spec = next(iter(kwargs.items()))
322
- col_schema: Dict[str, Any] = {}
366
+ col_schema: dict[str, Any] = {}
323
367
  if isinstance(spec, ts.ColumnType):
324
368
  if type is not None:
325
369
  raise excs.Error(f'add_column(): keyword argument "type" is redundant')
@@ -334,11 +378,11 @@ class Table(SchemaObject):
334
378
  col_schema['stored'] = stored
335
379
 
336
380
  new_col = self._create_columns({col_name: col_schema})[0]
337
- self._verify_column(new_col, self.column_names())
338
- return self.tbl_version_path.tbl_version.add_column(new_col, print_stats=print_stats)
381
+ self._verify_column(new_col, self.column_names(), self.query_names())
382
+ return self._tbl_version.add_column(new_col, print_stats=print_stats)
339
383
 
340
384
  @classmethod
341
- def _validate_column_spec(cls, name: str, spec: Dict[str, Any]) -> None:
385
+ def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
342
386
  """Check integrity of user-supplied Column spec
343
387
 
344
388
  We unfortunately can't use something like jsonschema for validation, because this isn't strictly a JSON schema
@@ -378,9 +422,9 @@ class Table(SchemaObject):
378
422
  raise excs.Error(f'Column {name}: "type" is required')
379
423
 
380
424
  @classmethod
381
- def _create_columns(cls, schema: Dict[str, Any]) -> List[Column]:
425
+ def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
382
426
  """Construct list of Columns, given schema"""
383
- columns: List[Column] = []
427
+ columns: list[Column] = []
384
428
  for name, spec in schema.items():
385
429
  col_type: Optional[ts.ColumnType] = None
386
430
  value_expr: Optional[exprs.Expr] = None
@@ -414,25 +458,29 @@ class Table(SchemaObject):
414
458
  return columns
415
459
 
416
460
  @classmethod
417
- def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
461
+ def _verify_column(
462
+ cls, col: Column, existing_column_names: Set[str], existing_query_names: Optional[Set[str]] = None
463
+ ) -> None:
418
464
  """Check integrity of user-supplied Column and supply defaults"""
419
465
  if is_system_column_name(col.name):
420
- raise excs.Error(f'Column name {col.name} is reserved')
466
+ raise excs.Error(f'Column name {col.name!r} is reserved')
421
467
  if not is_valid_identifier(col.name):
422
- raise excs.Error(f"Invalid column name: '{col.name}'")
468
+ raise excs.Error(f"Invalid column name: {col.name!r}")
423
469
  if col.name in existing_column_names:
424
- raise excs.Error(f'Duplicate column name: {col.name}')
470
+ raise excs.Error(f'Duplicate column name: {col.name!r}')
471
+ if existing_query_names is not None and col.name in existing_query_names:
472
+ raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
425
473
  if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
426
- raise excs.Error(f'Column {col.name}: stored={col.stored} only applies to computed image columns')
474
+ raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
427
475
  if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
428
476
  raise excs.Error((
429
- f'Column {col.name}: stored={col.stored} is not valid for image columns computed with a streaming '
477
+ f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
430
478
  f'function'))
431
479
  if col.stored is None:
432
480
  col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
433
481
 
434
482
  @classmethod
435
- def _verify_schema(cls, schema: List[Column]) -> None:
483
+ def _verify_schema(cls, schema: list[Column]) -> None:
436
484
  """Check integrity of user-supplied schema and set defaults"""
437
485
  column_names: Set[str] = set()
438
486
  for col in schema:
@@ -454,7 +502,7 @@ class Table(SchemaObject):
454
502
  >>> tbl.drop_column('factorial')
455
503
  """
456
504
  self._check_is_dropped()
457
- self.tbl_version_path.tbl_version.drop_column(name)
505
+ self._tbl_version.drop_column(name)
458
506
 
459
507
  def rename_column(self, old_name: str, new_name: str) -> None:
460
508
  """Rename a column.
@@ -472,7 +520,7 @@ class Table(SchemaObject):
472
520
  >>> tbl.rename_column('factorial', 'fac')
473
521
  """
474
522
  self._check_is_dropped()
475
- self.tbl_version_path.tbl_version.rename_column(old_name, new_name)
523
+ self._tbl_version.rename_column(old_name, new_name)
476
524
 
477
525
  def add_embedding_index(
478
526
  self, col_name: str, *, idx_name: Optional[str] = None,
@@ -501,18 +549,18 @@ class Table(SchemaObject):
501
549
  >>> tbl.add_embedding_index(
502
550
  'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
503
551
  """
504
- if self.tbl_version_path.is_snapshot():
552
+ if self._tbl_version_path.is_snapshot():
505
553
  raise excs.Error('Cannot add an index to a snapshot')
506
554
  self._check_is_dropped()
507
- col = self.tbl_version_path.get_column(col_name, include_bases=True)
555
+ col = self._tbl_version_path.get_column(col_name, include_bases=True)
508
556
  if col is None:
509
557
  raise excs.Error(f'Column {col_name} unknown')
510
- if idx_name is not None and idx_name in self.tbl_version_path.tbl_version.idxs_by_name:
558
+ if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
511
559
  raise excs.Error(f'Duplicate index name: {idx_name}')
512
560
  from pixeltable.index import EmbeddingIndex
513
561
  # create the EmbeddingIndex instance to verify args
514
562
  idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
515
- status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
563
+ status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
516
564
  # TODO: how to deal with exceptions here? drop the index and raise?
517
565
 
518
566
  def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
@@ -520,7 +568,7 @@ class Table(SchemaObject):
520
568
 
521
569
  Args:
522
570
  column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
523
- embedding indices.
571
+ embedding indices.
524
572
  idx_name: The name of the index to drop.
525
573
 
526
574
  Raises:
@@ -554,25 +602,24 @@ class Table(SchemaObject):
554
602
  self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
555
603
  _idx_class: Optional[Type[index.IndexBase]] = None
556
604
  ) -> None:
557
- if self.tbl_version_path.is_snapshot():
605
+ if self._tbl_version_path.is_snapshot():
558
606
  raise excs.Error('Cannot drop an index from a snapshot')
559
607
  self._check_is_dropped()
560
608
  if (column_name is None) == (idx_name is None):
561
609
  raise excs.Error('Exactly one of column_name or idx_name must be provided')
562
- tbl_version = self.tbl_version_path.tbl_version
563
610
 
564
611
  if idx_name is not None:
565
- if idx_name not in tbl_version.idxs_by_name:
612
+ if idx_name not in self._tbl_version.idxs_by_name:
566
613
  raise excs.Error(f'Index {idx_name} does not exist')
567
- idx_id = tbl_version.idxs_by_name[idx_name].id
614
+ idx_id = self._tbl_version.idxs_by_name[idx_name].id
568
615
  else:
569
- col = self.tbl_version_path.get_column(column_name, include_bases=True)
616
+ col = self._tbl_version_path.get_column(column_name, include_bases=True)
570
617
  if col is None:
571
618
  raise excs.Error(f'Column {column_name} unknown')
572
- if col.tbl.id != tbl_version.id:
619
+ if col.tbl.id != self._tbl_version.id:
573
620
  raise excs.Error(
574
621
  f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
575
- idx_info = [info for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
622
+ idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
576
623
  if _idx_class is not None:
577
624
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
578
625
  if len(idx_info) == 0:
@@ -580,7 +627,57 @@ class Table(SchemaObject):
580
627
  if len(idx_info) > 1:
581
628
  raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
582
629
  idx_id = idx_info[0].id
583
- self.tbl_version_path.tbl_version.drop_index(idx_id)
630
+ self._tbl_version.drop_index(idx_id)
631
+
632
+ @overload
633
+ def insert(
634
+ self, rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
635
+ ) -> UpdateStatus: ...
636
+
637
+ @overload
638
+ def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
639
+
640
+ @abc.abstractmethod
641
+ def insert(
642
+ self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
643
+ fail_on_exception: bool = True, **kwargs: Any
644
+ ) -> UpdateStatus:
645
+ """Inserts rows into this table. There are two mutually exclusive call patterns:
646
+
647
+ To insert multiple rows at a time:
648
+ ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
649
+
650
+ To insert just a single row, you can use the more convenient syntax:
651
+ ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
652
+
653
+ Args:
654
+ rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
655
+ names to values.
656
+ kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
657
+ print_stats: If ``True``, print statistics about the cost of computed columns.
658
+ fail_on_exception:
659
+ Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
660
+ are handled.
661
+ If ``False``, store error information (accessible as column properties 'errortype' and 'errormsg')
662
+ for those cases, but continue inserting rows.
663
+ If ``True``, raise an exception that aborts the insert.
664
+
665
+ Returns:
666
+ execution status
667
+
668
+ Raises:
669
+ Error: if a row does not match the table schema or contains values for computed columns
670
+
671
+ Examples:
672
+ Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
673
+
674
+ >>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
675
+
676
+ Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
677
+
678
+ >>> tbl.insert(a=1, b=1, c=1)
679
+ """
680
+ raise NotImplementedError
584
681
 
585
682
  def update(
586
683
  self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
@@ -609,7 +706,7 @@ class Table(SchemaObject):
609
706
 
610
707
  >>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
611
708
  """
612
- if self.tbl_version_path.is_snapshot():
709
+ if self._tbl_version_path.is_snapshot():
613
710
  raise excs.Error('Cannot update a snapshot')
614
711
  self._check_is_dropped()
615
712
 
@@ -618,12 +715,12 @@ class Table(SchemaObject):
618
715
  if where is not None:
619
716
  if not isinstance(where, exprs.Predicate):
620
717
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
621
- analysis_info = Planner.analyze(self.tbl_version_path, where)
718
+ analysis_info = Planner.analyze(self._tbl_version_path, where)
622
719
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
623
720
  if analysis_info.filter is not None:
624
721
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
625
722
 
626
- return self.tbl_version_path.tbl_version.update(update_spec, where, cascade)
723
+ return self._tbl_version.update(update_spec, where, cascade)
627
724
 
628
725
  def batch_update(self, rows: Iterable[dict[str, Any]], cascade: bool = True) -> UpdateStatus:
629
726
  """Update rows in this table.
@@ -638,15 +735,15 @@ class Table(SchemaObject):
638
735
 
639
736
  >>> tbl.update([{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 2, 'name': 'Bob', 'age': 40}])
640
737
  """
641
- if self.tbl_version_path.is_snapshot():
738
+ if self._tbl_version_path.is_snapshot():
642
739
  raise excs.Error('Cannot update a snapshot')
643
740
  self._check_is_dropped()
644
741
 
645
- row_updates: List[Dict[Column, exprs.Expr]] = []
646
- pk_col_names = set(c.name for c in self.tbl_version_path.tbl_version.primary_key_columns())
742
+ row_updates: list[dict[Column, exprs.Expr]] = []
743
+ pk_col_names = set(c.name for c in self._tbl_version.primary_key_columns())
647
744
 
648
745
  # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
649
- has_rowid = self.ROWID_COLUMN_NAME in rows[0]
746
+ has_rowid = self.__ROWID_COLUMN_NAME in rows[0]
650
747
  rowids: list[Tuple[int, ...]] = []
651
748
  if len(pk_col_names) == 0 and not has_rowid:
652
749
  raise excs.Error('Table must have primary key for batch update')
@@ -655,15 +752,15 @@ class Table(SchemaObject):
655
752
  col_vals = self._validate_update_spec(row_spec, allow_pk=not has_rowid, allow_exprs=False)
656
753
  if has_rowid:
657
754
  # we expect the _rowid column to be present for each row
658
- assert self.ROWID_COLUMN_NAME in row_spec
659
- rowids.append(row_spec[self.ROWID_COLUMN_NAME])
755
+ assert self.__ROWID_COLUMN_NAME in row_spec
756
+ rowids.append(row_spec[self.__ROWID_COLUMN_NAME])
660
757
  else:
661
758
  col_names = set(col.name for col in col_vals.keys())
662
759
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
663
760
  missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
664
761
  raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
665
762
  row_updates.append(col_vals)
666
- return self.tbl_version_path.tbl_version.batch_update(row_updates, rowids, cascade)
763
+ return self._tbl_version.batch_update(row_updates, rowids, cascade)
667
764
 
668
765
  def _validate_update_spec(
669
766
  self, value_spec: dict[str, Any], allow_pk: bool, allow_exprs: bool
@@ -673,10 +770,10 @@ class Table(SchemaObject):
673
770
  for col_name, val in value_spec.items():
674
771
  if not isinstance(col_name, str):
675
772
  raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
676
- if col_name == self.ROWID_COLUMN_NAME:
773
+ if col_name == self.__ROWID_COLUMN_NAME:
677
774
  # ignore pseudo-column _rowid
678
775
  continue
679
- col = self.tbl_version_path.get_column(col_name, include_bases=False)
776
+ col = self._tbl_version_path.get_column(col_name, include_bases=False)
680
777
  if col is None:
681
778
  # TODO: return more informative error if this is trying to update a base column
682
779
  raise excs.Error(f'Column {col_name} unknown')
@@ -709,169 +806,166 @@ class Table(SchemaObject):
709
806
 
710
807
  return update_targets
711
808
 
809
+ @abc.abstractmethod
810
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
811
+ """Delete rows in this table.
812
+
813
+ Args:
814
+ where: a Predicate to filter rows to delete.
815
+
816
+ Examples:
817
+ Delete all rows in a table:
818
+
819
+ >>> tbl.delete()
820
+
821
+ Delete all rows in a table where column `a` is greater than 5:
822
+
823
+ >>> tbl.delete(tbl.a > 5)
824
+ """
825
+ raise NotImplementedError
826
+
712
827
  def revert(self) -> None:
713
828
  """Reverts the table to the previous version.
714
829
 
715
830
  .. warning::
716
831
  This operation is irreversible.
717
832
  """
718
- if self.tbl_version_path.is_snapshot():
833
+ if self._tbl_version_path.is_snapshot():
719
834
  raise excs.Error('Cannot revert a snapshot')
720
835
  self._check_is_dropped()
721
- self.tbl_version_path.tbl_version.revert()
836
+ self._tbl_version.revert()
837
+
838
+ @overload
839
+ def query(self, py_fn: Callable) -> 'pixeltable.func.QueryTemplateFunction': ...
840
+
841
+ @overload
842
+ def query(
843
+ self, *, param_types: Optional[list[ts.ColumnType]] = None
844
+ ) -> Callable[[Callable], 'pixeltable.func.QueryTemplateFunction']: ...
845
+
846
+ def query(self, *args: Any, **kwargs: Any) -> Any:
847
+ def make_query_template(
848
+ py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
849
+ ) -> 'pixeltable.func.QueryTemplateFunction':
850
+ if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
851
+ # this is a named function in a module
852
+ function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
853
+ else:
854
+ function_path = None
855
+ query_name = py_fn.__name__
856
+ if query_name in self.column_names():
857
+ raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
858
+ if query_name in self._queries:
859
+ raise excs.Error(f'Duplicate query name: {query_name!r}')
860
+ import pixeltable.func as func
861
+ query_fn = func.QueryTemplateFunction.create(
862
+ py_fn, param_types=param_types, path=function_path, name=query_name)
863
+ self._queries[query_name] = query_fn
864
+ return query_fn
865
+
866
+ # TODO: verify that the inferred return type matches that of the template
867
+ # TODO: verify that the signature doesn't contain batched parameters
868
+
869
+ if len(args) == 1:
870
+ assert len(kwargs) == 0 and callable(args[0])
871
+ return make_query_template(args[0], None)
872
+ else:
873
+ assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
874
+ return lambda py_fn: make_query_template(py_fn, kwargs['param_types'])
722
875
 
723
- def _link(
724
- self,
725
- remote: 'pixeltable.datatransfer.Remote',
726
- col_mapping: Optional[dict[str, str]] = None
727
- ) -> None:
728
- """
729
- Links the specified `Remote` to this table. Once a remote is linked, it can be synchronized with
730
- this `Table` by calling [`Table.sync()`]. A record of the link
731
- is stored in table metadata and will persist across sessions.
876
+ @property
877
+ def external_stores(self) -> list[str]:
878
+ return list(self._tbl_version.external_stores.keys())
732
879
 
733
- Args:
734
- remote (pixeltable.datatransfer.Remote): The `Remote` to link to this table.
735
- col_mapping: An optional mapping of columns from this `Table` to columns in the `Remote`.
880
+ def _link_external_store(self, store: 'pixeltable.io.ExternalStore') -> None:
881
+ """
882
+ Links the specified `ExternalStore` to this table.
736
883
  """
737
- # TODO(aaron-siegel): Refactor `col_mapping`
884
+ if self._tbl_version.is_snapshot:
885
+ raise excs.Error(f'Table `{self.get_name()}` is a snapshot, so it cannot be linked to an external store.')
738
886
  self._check_is_dropped()
739
- if remote in self._get_remotes():
740
- raise excs.Error(f'That remote is already linked to table `{self.get_name()}`: {remote}')
741
- push_cols = remote.get_export_columns()
742
- pull_cols = remote.get_import_columns()
743
- is_col_mapping_user_specified = col_mapping is not None
744
- if col_mapping is None:
745
- # Use the identity mapping by default if `col_mapping` is not specified
746
- col_mapping = {col: col for col in itertools.chain(push_cols.keys(), pull_cols.keys())}
747
- self._validate_remote(push_cols, pull_cols, col_mapping, is_col_mapping_user_specified)
748
- _logger.info(f'Linking remote {remote} to table `{self.get_name()}`.')
749
- self.tbl_version_path.tbl_version.link(remote, col_mapping)
750
- print(f'Linked remote {remote} to table `{self.get_name()}`.')
751
-
752
- def unlink(
887
+ if store.name in self.external_stores:
888
+ raise excs.Error(f'Table `{self.get_name()}` already has an external store with that name: {store.name}')
889
+ _logger.info(f'Linking external store `{store.name}` to table `{self.get_name()}`')
890
+ self._tbl_version.link_external_store(store)
891
+ print(f'Linked external store `{store.name}` to table `{self.get_name()}`.')
892
+
893
+ def unlink_external_stores(
753
894
  self,
754
- remotes: Optional['pixeltable.datatransfer.Remote' | list['pixeltable.datatransfer.Remote']] = None,
895
+ stores: Optional[str | list[str]] = None,
755
896
  *,
756
- delete_remote_data: bool = False,
897
+ delete_external_data: bool = False,
757
898
  ignore_errors: bool = False
758
899
  ) -> None:
759
900
  """
760
- Unlinks this table's `Remote`s.
901
+ Unlinks this table's external stores.
761
902
 
762
903
  Args:
763
- remotes: If specified, will unlink only the specified `Remote` or list of `Remote`s. If not specified,
764
- will unlink all of this table's `Remote`s.
765
- ignore_errors (bool): If `True`, no exception will be thrown if the specified `Remote` is not linked
904
+ stores: If specified, will unlink only the specified named store or list of stores. If not specified,
905
+ will unlink all of this table's external stores.
906
+ ignore_errors (bool): If `True`, no exception will be thrown if a specified store is not linked
766
907
  to this table.
767
- delete_remote_data (bool): If `True`, then the remote data source will also be deleted. WARNING: This
908
+ delete_external_data (bool): If `True`, then the external data store will also be deleted. WARNING: This
768
909
  is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
769
-
770
910
  """
771
911
  self._check_is_dropped()
772
- all_remotes = self._get_remotes()
912
+ all_stores = self.external_stores
773
913
 
774
- if remotes is None:
775
- remotes = list(all_remotes.keys())
776
- elif isinstance(remotes, pixeltable.datatransfer.Remote):
777
- remotes = [remotes]
914
+ if stores is None:
915
+ stores = all_stores
916
+ elif isinstance(stores, str):
917
+ stores = [stores]
778
918
 
779
919
  # Validation
780
920
  if not ignore_errors:
781
- for remote in remotes:
782
- if remote not in all_remotes:
783
- raise excs.Error(f'Remote {remote} is not linked to table `{self.get_name()}`')
921
+ for store in stores:
922
+ if store not in all_stores:
923
+ raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
784
924
 
785
- for remote in remotes:
786
- self.tbl_version_path.tbl_version.unlink(remote)
787
- print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
788
- if delete_remote_data:
789
- remote.delete()
790
-
791
- def _validate_remote(
792
- self,
793
- export_cols: dict[str, ts.ColumnType],
794
- import_cols: dict[str, ts.ColumnType],
795
- col_mapping: Optional[dict[str, str]],
796
- is_col_mapping_user_specified: bool
797
- ):
798
- # Validate names
799
- t_cols = self.column_names()
800
- for t_col, r_col in col_mapping.items():
801
- if t_col not in t_cols:
802
- if is_col_mapping_user_specified:
803
- raise excs.Error(
804
- f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{self.get_name()}` '
805
- 'contains no such column.'
806
- )
807
- else:
808
- raise excs.Error(
809
- f'Column `{t_col}` does not exist in Table `{self.get_name()}`. Either add a column `{t_col}`, '
810
- f'or specify a `col_mapping` to associate a different column with the remote field `{r_col}`.'
811
- )
812
- if r_col not in export_cols and r_col not in import_cols:
813
- raise excs.Error(
814
- f'Column name `{r_col}` appears as a value in `col_mapping`, but the remote '
815
- f'configuration has no column `{r_col}`.'
816
- )
817
- # Validate column specs
818
- t_col_types = self.column_types()
819
- for t_col, r_col in col_mapping.items():
820
- t_col_type = t_col_types[t_col]
821
- if r_col in export_cols:
822
- # Validate that the table column can be assigned to the remote column
823
- r_col_type = export_cols[r_col]
824
- if not r_col_type.is_supertype_of(t_col_type):
825
- raise excs.Error(
826
- f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types; expecting `{r_col_type}`)'
827
- )
828
- if r_col in import_cols:
829
- # Validate that the remote column can be assigned to the table column
830
- if self.tbl_version_path.get_column(t_col).is_computed:
831
- raise excs.Error(
832
- f'Column `{t_col}` is a computed column, which cannot be populated from a remote column'
833
- )
834
- r_col_type = import_cols[r_col]
835
- if not t_col_type.is_supertype_of(r_col_type):
836
- raise excs.Error(
837
- f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types; expecting `{r_col_type}`)'
838
- )
839
-
840
- def _get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
841
- """
842
- Gets a `dict` of all `Remote`s linked to this table.
843
- """
844
- return self.tbl_version_path.tbl_version.get_remotes()
925
+ for store in stores:
926
+ self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
927
+ print(f'Unlinked external store from table `{self.get_name()}`: {store}')
845
928
 
846
929
  def sync(
847
930
  self,
931
+ stores: Optional[str | list[str]] = None,
848
932
  *,
849
933
  export_data: bool = True,
850
934
  import_data: bool = True
851
- ):
935
+ ) -> 'pixeltable.io.SyncStatus':
852
936
  """
853
- Synchronizes this table with its linked `Remote`s.
937
+ Synchronizes this table with its linked external stores.
854
938
 
855
939
  Args:
856
- export_data: If `True`, data from this table will be exported to the external store during synchronization.
857
- import_data: If `True`, data from the external store will be imported to this table during synchronization.
940
+ stores: If specified, will synchronize only the specified named store or list of stores. If not specified,
941
+ will synchronize all of this table's external stores.
942
+ export_data: If `True`, data from this table will be exported to the external stores during synchronization.
943
+ import_data: If `True`, data from the external stores will be imported to this table during synchronization.
858
944
  """
859
- remotes = self._get_remotes()
860
- assert len(remotes) <= 1
945
+ self._check_is_dropped()
946
+ all_stores = self.external_stores
861
947
 
862
- # Validation
863
- for remote in remotes:
864
- col_mapping = remotes[remote]
865
- r_cols = set(col_mapping.values())
866
- # Validate export/import
867
- if export_data and not any(col in r_cols for col in remote.get_export_columns()):
868
- raise excs.Error(
869
- f'Attempted to sync with export_data=True, but there are no columns to export: {remote}'
870
- )
871
- if import_data and not any(col in r_cols for col in remote.get_import_columns()):
872
- raise excs.Error(
873
- f'Attempted to sync with import_data=True, but there are no columns to import: {remote}'
874
- )
948
+ if stores is None:
949
+ stores = all_stores
950
+ elif isinstance(stores, str):
951
+ stores = [stores]
952
+
953
+ for store in stores:
954
+ if store not in all_stores:
955
+ raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
956
+
957
+ from pixeltable.io import SyncStatus
958
+
959
+ sync_status = SyncStatus.empty()
960
+ for store in stores:
961
+ store_obj = self._tbl_version.external_stores[store]
962
+ store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
963
+ sync_status = sync_status.combine(store_sync_status)
964
+
965
+ return sync_status
966
+
967
+ def __dir__(self) -> list[str]:
968
+ return list(super().__dir__()) + self.column_names() + self.query_names()
875
969
 
876
- for remote in remotes:
877
- remote.sync(self, remotes[remote], export_data=export_data, import_data=import_data)
970
+ def _ipython_key_completions_(self) -> list[str]:
971
+ return self.column_names() + self.query_names()