pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show
  1. pixeltable/__init__.py +20 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +23 -7
  4. pixeltable/catalog/insertable_table.py +32 -19
  5. pixeltable/catalog/table.py +210 -20
  6. pixeltable/catalog/table_version.py +272 -111
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/dataframe.py +184 -110
  9. pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable/datatransfer/label_studio.py +526 -0
  11. pixeltable/datatransfer/remote.py +113 -0
  12. pixeltable/env.py +213 -79
  13. pixeltable/exec/__init__.py +2 -1
  14. pixeltable/exec/data_row_batch.py +6 -7
  15. pixeltable/exec/expr_eval_node.py +28 -28
  16. pixeltable/exec/sql_scan_node.py +7 -6
  17. pixeltable/exprs/__init__.py +4 -3
  18. pixeltable/exprs/column_ref.py +11 -2
  19. pixeltable/exprs/comparison.py +39 -1
  20. pixeltable/exprs/data_row.py +7 -0
  21. pixeltable/exprs/expr.py +26 -19
  22. pixeltable/exprs/function_call.py +17 -18
  23. pixeltable/exprs/globals.py +14 -2
  24. pixeltable/exprs/image_member_access.py +9 -28
  25. pixeltable/exprs/in_predicate.py +96 -0
  26. pixeltable/exprs/inline_array.py +13 -11
  27. pixeltable/exprs/inline_dict.py +15 -13
  28. pixeltable/exprs/row_builder.py +7 -1
  29. pixeltable/exprs/similarity_expr.py +67 -0
  30. pixeltable/ext/functions/whisperx.py +30 -0
  31. pixeltable/ext/functions/yolox.py +16 -0
  32. pixeltable/func/__init__.py +0 -2
  33. pixeltable/func/aggregate_function.py +5 -2
  34. pixeltable/func/callable_function.py +57 -13
  35. pixeltable/func/expr_template_function.py +14 -3
  36. pixeltable/func/function.py +35 -4
  37. pixeltable/func/signature.py +5 -15
  38. pixeltable/func/udf.py +8 -12
  39. pixeltable/functions/fireworks.py +9 -4
  40. pixeltable/functions/huggingface.py +48 -5
  41. pixeltable/functions/openai.py +49 -11
  42. pixeltable/functions/pil/image.py +61 -64
  43. pixeltable/functions/together.py +32 -6
  44. pixeltable/functions/util.py +0 -43
  45. pixeltable/functions/video.py +46 -8
  46. pixeltable/globals.py +443 -0
  47. pixeltable/index/__init__.py +1 -0
  48. pixeltable/index/base.py +9 -2
  49. pixeltable/index/btree.py +54 -0
  50. pixeltable/index/embedding_index.py +91 -15
  51. pixeltable/io/__init__.py +4 -0
  52. pixeltable/io/globals.py +59 -0
  53. pixeltable/{utils → io}/hf_datasets.py +48 -17
  54. pixeltable/io/pandas.py +148 -0
  55. pixeltable/{utils → io}/parquet.py +58 -33
  56. pixeltable/iterators/__init__.py +1 -1
  57. pixeltable/iterators/base.py +8 -4
  58. pixeltable/iterators/document.py +225 -93
  59. pixeltable/iterators/video.py +16 -9
  60. pixeltable/metadata/__init__.py +8 -4
  61. pixeltable/metadata/converters/convert_12.py +3 -0
  62. pixeltable/metadata/converters/convert_13.py +41 -0
  63. pixeltable/metadata/converters/convert_14.py +13 -0
  64. pixeltable/metadata/converters/convert_15.py +29 -0
  65. pixeltable/metadata/converters/util.py +63 -0
  66. pixeltable/metadata/schema.py +12 -6
  67. pixeltable/plan.py +11 -24
  68. pixeltable/store.py +16 -23
  69. pixeltable/tool/create_test_db_dump.py +49 -14
  70. pixeltable/type_system.py +27 -58
  71. pixeltable/utils/coco.py +94 -0
  72. pixeltable/utils/documents.py +42 -12
  73. pixeltable/utils/http_server.py +70 -0
  74. pixeltable-0.2.7.dist-info/METADATA +137 -0
  75. pixeltable-0.2.7.dist-info/RECORD +126 -0
  76. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
  77. pixeltable/client.py +0 -600
  78. pixeltable/exprs/image_similarity_predicate.py +0 -58
  79. pixeltable/func/batched_function.py +0 -53
  80. pixeltable/func/nos_function.py +0 -202
  81. pixeltable/tests/conftest.py +0 -171
  82. pixeltable/tests/ext/test_yolox.py +0 -21
  83. pixeltable/tests/functions/test_fireworks.py +0 -43
  84. pixeltable/tests/functions/test_functions.py +0 -60
  85. pixeltable/tests/functions/test_huggingface.py +0 -158
  86. pixeltable/tests/functions/test_openai.py +0 -162
  87. pixeltable/tests/functions/test_together.py +0 -112
  88. pixeltable/tests/test_audio.py +0 -65
  89. pixeltable/tests/test_catalog.py +0 -27
  90. pixeltable/tests/test_client.py +0 -21
  91. pixeltable/tests/test_component_view.py +0 -379
  92. pixeltable/tests/test_dataframe.py +0 -440
  93. pixeltable/tests/test_dirs.py +0 -107
  94. pixeltable/tests/test_document.py +0 -120
  95. pixeltable/tests/test_exprs.py +0 -802
  96. pixeltable/tests/test_function.py +0 -332
  97. pixeltable/tests/test_index.py +0 -138
  98. pixeltable/tests/test_migration.py +0 -44
  99. pixeltable/tests/test_nos.py +0 -54
  100. pixeltable/tests/test_snapshot.py +0 -231
  101. pixeltable/tests/test_table.py +0 -1343
  102. pixeltable/tests/test_transactional_directory.py +0 -42
  103. pixeltable/tests/test_types.py +0 -52
  104. pixeltable/tests/test_video.py +0 -159
  105. pixeltable/tests/test_view.py +0 -535
  106. pixeltable/tests/utils.py +0 -442
  107. pixeltable/utils/clip.py +0 -18
  108. pixeltable-0.2.5.dist-info/METADATA +0 -128
  109. pixeltable-0.2.5.dist-info/RECORD +0 -139
  110. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -1,18 +1,32 @@
1
1
  from .catalog import Column, Table, InsertableTable, View
2
- from .client import Client
3
2
  from .dataframe import DataFrame
3
+ from .datatransfer import Remote
4
+ from .catalog import Column, Table, InsertableTable, View
4
5
  from .exceptions import Error, Error
5
6
  from .exprs import RELATIVE_PATH_ROOT
6
7
  from .func import Function, udf, uda, Aggregator, expr_udf
7
- from .type_system import \
8
- ColumnType, StringType, IntType, FloatType, BoolType, TimestampType, JsonType, ArrayType, ImageType, VideoType, \
9
- AudioType, DocumentType
8
+ from .globals import *
9
+ from .type_system import (
10
+ ColumnType,
11
+ StringType,
12
+ IntType,
13
+ FloatType,
14
+ BoolType,
15
+ TimestampType,
16
+ JsonType,
17
+ ArrayType,
18
+ ImageType,
19
+ VideoType,
20
+ AudioType,
21
+ DocumentType,
22
+ )
10
23
  from .utils.help import help
24
+
11
25
  # noinspection PyUnresolvedReferences
12
- from . import functions
26
+ from . import functions, io, iterators
27
+ from .__version__ import __version__, __version_tuple__
13
28
 
14
29
  __all__ = [
15
- 'Client',
16
30
  'DataFrame',
17
31
  'Column',
18
32
  'Table',
@@ -39,6 +53,3 @@ __all__ = [
39
53
  'uda',
40
54
  'expr_udf',
41
55
  ]
42
-
43
-
44
-
@@ -0,0 +1,3 @@
1
+ # These version placeholders will be replaced during build.
2
+ __version__ = "0.2.7"
3
+ __version_tuple__ = (0, 2, 7)
@@ -5,8 +5,8 @@ from typing import Optional, Union, Callable, Set
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- from pixeltable import exceptions as excs
9
- from pixeltable.type_system import ColumnType, StringType
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.type_system as ts
10
10
  from .globals import is_valid_identifier
11
11
 
12
12
  _logger = logging.getLogger('pixeltable')
@@ -18,11 +18,12 @@ class Column:
18
18
  table/view.
19
19
  """
20
20
  def __init__(
21
- self, name: Optional[str], col_type: Optional[ColumnType] = None,
21
+ self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
22
22
  computed_with: Optional[Union['Expr', Callable]] = None,
23
23
  is_pk: bool = False, stored: Optional[bool] = None,
24
24
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
- schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
25
+ schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
26
+ records_errors: Optional[bool] = None
26
27
  ):
27
28
  """Column constructor.
28
29
 
@@ -80,12 +81,19 @@ class Column:
80
81
  assert self.col_type is not None
81
82
 
82
83
  self.stored = stored
83
- self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
+ self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
85
  self.id = col_id
85
86
  self.is_pk = is_pk
86
87
  self.schema_version_add = schema_version_add
87
88
  self.schema_version_drop = schema_version_drop
88
89
 
90
+ # stored_proxy may be set later if this is a non-stored column.
91
+ # if col1.stored_proxy == col2, then also col1 == col2.proxy_base.
92
+ self.stored_proxy: Optional[Column] = None
93
+ self.proxy_base: Optional[Column] = None
94
+
95
+ self._records_errors = records_errors
96
+
89
97
  # column in the stored table for the values of this Column
90
98
  self.sa_col: Optional[sql.schema.Column] = None
91
99
  self.sa_col_type = sa_col_type
@@ -93,6 +101,7 @@ class Column:
93
101
  # computed cols also have storage columns for the exception string and type
94
102
  self.sa_errormsg_col: Optional[sql.schema.Column] = None
95
103
  self.sa_errortype_col: Optional[sql.schema.Column] = None
104
+
96
105
  from .table_version import TableVersion
97
106
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
98
107
 
@@ -114,6 +123,10 @@ class Column:
114
123
  l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
115
124
  return len(l) > 0
116
125
 
126
+ def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
127
+ assert self.tbl is not None
128
+ return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
129
+
117
130
  @property
118
131
  def is_computed(self) -> bool:
119
132
  return self.compute_func is not None or self.value_expr is not None
@@ -127,6 +140,9 @@ class Column:
127
140
  @property
128
141
  def records_errors(self) -> bool:
129
142
  """True if this column also stores error information."""
143
+ # default: record errors for computed and media columns
144
+ if self._records_errors is not None:
145
+ return self._records_errors
130
146
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
131
147
 
132
148
  def source(self) -> None:
@@ -148,8 +164,8 @@ class Column:
148
164
  self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
149
165
  nullable=True)
150
166
  if self.is_computed or self.col_type.is_media_type():
151
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), StringType().to_sa_type(), nullable=True)
152
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), StringType().to_sa_type(), nullable=True)
167
+ self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
168
+ self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
153
169
 
154
170
  def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
155
171
  return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
@@ -60,25 +60,29 @@ class InsertableTable(Table):
60
60
  return tbl
61
61
 
62
62
  @overload
63
- def insert(self, rows: Iterable[Dict[str, Any]], /, print_stats: bool = False, fail_on_exception: bool = True): ...
63
+ def insert(
64
+ self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
65
+ ) -> UpdateStatus: ...
64
66
 
65
67
  @overload
66
- def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
68
+ def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
67
69
 
68
- def insert(self, *args, **kwargs) -> UpdateStatus:
69
- """Insert rows into table.
70
+ def insert(
71
+ self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
72
+ fail_on_exception: bool = True, **kwargs: Any
73
+ ) -> UpdateStatus:
74
+ """Inserts rows into this table. There are two mutually exclusive call patterns:
70
75
 
71
76
  To insert multiple rows at a time:
72
-
73
- ``insert(rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception: bool = True)``
77
+ ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
74
78
 
75
79
  To insert just a single row, you can use the more convenient syntax:
76
- ``insert(print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
80
+ ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
77
81
 
78
82
  Args:
79
83
  rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
80
84
  names to values.
81
- kwargs: (if inserting a single row) keyword-argument pairs representing column names and values.
85
+ kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
82
86
  print_stats: If ``True``, print statistics about the cost of computed columns.
83
87
  fail_on_exception:
84
88
  Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
@@ -102,16 +106,27 @@ class InsertableTable(Table):
102
106
 
103
107
  >>> tbl.insert(a=1, b=1, c=1)
104
108
  """
105
- print_stats = kwargs.pop('print_stats', False)
106
- fail_on_exception = kwargs.pop('fail_on_exception', True)
107
- if len(args) > 0:
108
- # There's a positional argument; this means `rows` is expressed as a
109
- # list of dicts (multi-insert)
110
- rows = list(args[0])
111
- else:
112
- # No positional argument; this means we're inserting a single row
113
- # using kwargs syntax
109
+ # The commented code is the intended implementation, with signature (*args, **kwargs).
110
+ # That signature cannot be used currently, due to a present limitation in mkdocs.
111
+ # See: https://github.com/mkdocstrings/mkdocstrings/issues/669
112
+
113
+ # print_stats = kwargs.pop('print_stats', False)
114
+ # fail_on_exception = kwargs.pop('fail_on_exception', True)
115
+ # if len(args) > 0:
116
+ # # There's a positional argument; this means `rows` is expressed as a
117
+ # # list of dicts (multi-insert)
118
+ # rows = list(args[0])
119
+ # else:
120
+ # # No positional argument; this means we're inserting a single row
121
+ # # using kwargs syntax
122
+ # rows = [kwargs]
123
+
124
+ if rows is None:
114
125
  rows = [kwargs]
126
+ else:
127
+ rows = list(rows)
128
+ if len(kwargs) > 0:
129
+ raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
115
130
 
116
131
  if not isinstance(rows, list):
117
132
  raise excs.Error('rows must be a list of dictionaries')
@@ -185,8 +200,6 @@ class InsertableTable(Table):
185
200
  if not isinstance(where, Predicate):
186
201
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
187
202
  analysis_info = Planner.analyze(self.tbl_version_path, where)
188
- if analysis_info.similarity_clause is not None:
189
- raise excs.Error('nearest() cannot be used with delete()')
190
203
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
191
204
  if analysis_info.filter is not None:
192
205
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import itertools
3
4
  import json
4
5
  import logging
5
6
  from pathlib import Path
6
- from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable
7
+ from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable, Type
7
8
  from uuid import UUID
8
9
 
9
10
  import pandas as pd
@@ -16,6 +17,7 @@ import pixeltable.exceptions as excs
16
17
  import pixeltable.exprs as exprs
17
18
  import pixeltable.metadata.schema as schema
18
19
  import pixeltable.type_system as ts
20
+ import pixeltable.index as index
19
21
  from .column import Column
20
22
  from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
21
23
  from .schema_object import SchemaObject
@@ -97,27 +99,31 @@ class Table(SchemaObject):
97
99
  from pixeltable.dataframe import DataFrame
98
100
  return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
99
101
 
100
- def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
101
- """Return rows from this table.
102
- """
102
+ def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
103
+ """Return a DataFrame for this table."""
104
+ from pixeltable.dataframe import DataFrame
105
+ return DataFrame(self.tbl_version_path).group_by(*items)
106
+
107
+ def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
108
+ """Return rows from this table."""
103
109
  return self.df().collect()
104
110
 
105
111
  def show(
106
112
  self, *args, **kwargs
107
- ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
113
+ ) -> 'pixeltable.dataframe.DataFrameResultSet':
108
114
  """Return rows from this table.
109
115
  """
110
116
  return self.df().show(*args, **kwargs)
111
117
 
112
118
  def head(
113
119
  self, *args, **kwargs
114
- ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
120
+ ) -> 'pixeltable.dataframe.DataFrameResultSet':
115
121
  """Return the first n rows inserted into this table."""
116
122
  return self.df().head(*args, **kwargs)
117
123
 
118
124
  def tail(
119
125
  self, *args, **kwargs
120
- ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
126
+ ) -> 'pixeltable.dataframe.DataFrameResultSet':
121
127
  """Return the last n rows inserted into this table."""
122
128
  return self.df().tail(*args, **kwargs)
123
129
 
@@ -470,13 +476,16 @@ class Table(SchemaObject):
470
476
 
471
477
  def add_embedding_index(
472
478
  self, col_name: str, *, idx_name: Optional[str] = None,
473
- text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None
479
+ text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
480
+ metric: str = 'cosine'
474
481
  ) -> None:
475
482
  """Add an index to the table.
476
483
  Args:
477
484
  col_name: name of column to index
478
485
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
479
- idx_type: type of index (one of 'embedding')
486
+ text_embed: function to embed text; required if the column is a text column
487
+ img_embed: function to embed images; required if the column is an image column
488
+ metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
480
489
 
481
490
  Raises:
482
491
  Error: If an index with that name already exists for the table or if the column does not exist.
@@ -484,11 +493,13 @@ class Table(SchemaObject):
484
493
  Examples:
485
494
  Add an index to the ``img`` column:
486
495
 
487
- >>> tbl.add_embedding_index('img', text_embed=...)
496
+ >>> tbl.add_embedding_index('img', img_embed=...)
488
497
 
489
- Add another index to the ``img`` column, with a specific name:
498
+ Add another index to the ``img`` column, using the inner product as the distance metric,
499
+ and with a specific name; ``text_embed`` is also specified in order to search with text:
490
500
 
491
- >>> tbl.add_embedding_index('img', idx_name='clip_idx', text_embed=...)
501
+ >>> tbl.add_embedding_index(
502
+ 'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
492
503
  """
493
504
  if self.tbl_version_path.is_snapshot():
494
505
  raise excs.Error('Cannot add an index to a snapshot')
@@ -500,10 +511,28 @@ class Table(SchemaObject):
500
511
  raise excs.Error(f'Duplicate index name: {idx_name}')
501
512
  from pixeltable.index import EmbeddingIndex
502
513
  # create the EmbeddingIndex instance to verify args
503
- idx = EmbeddingIndex(col, text_embed=text_embed, img_embed=img_embed)
514
+ idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
504
515
  status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
505
516
  # TODO: how to deal with exceptions here? drop the index and raise?
506
517
 
518
+ def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
519
+ """Drop an embedding index from the table.
520
+
521
+ Args:
522
+ column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
523
+ embedding indices.
524
+ idx_name: The name of the index to drop.
525
+
526
+ Raises:
527
+ Error: If the index does not exist.
528
+
529
+ Examples:
530
+ Drop embedding index on the ``img`` column:
531
+
532
+ >>> tbl.drop_embedding_index(column_name='img')
533
+ """
534
+ self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
535
+
507
536
  def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
508
537
  """Drop an index from the table.
509
538
 
@@ -519,6 +548,12 @@ class Table(SchemaObject):
519
548
 
520
549
  >>> tbl.drop_index(column_name='img')
521
550
  """
551
+ self._drop_index(column_name=column_name, idx_name=idx_name)
552
+
553
+ def _drop_index(
554
+ self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
555
+ _idx_class: Optional[Type[index.IndexBase]] = None
556
+ ) -> None:
522
557
  if self.tbl_version_path.is_snapshot():
523
558
  raise excs.Error('Cannot drop an index from a snapshot')
524
559
  self._check_is_dropped()
@@ -537,12 +572,14 @@ class Table(SchemaObject):
537
572
  if col.tbl.id != tbl_version.id:
538
573
  raise excs.Error(
539
574
  f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
540
- idx_ids = [info.id for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
541
- if len(idx_ids) == 0:
575
+ idx_info = [info for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
576
+ if _idx_class is not None:
577
+ idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
578
+ if len(idx_info) == 0:
542
579
  raise excs.Error(f'Column {column_name} does not have an index')
543
- if len(idx_ids) > 1:
580
+ if len(idx_info) > 1:
544
581
  raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
545
- idx_id = idx_ids[0]
582
+ idx_id = idx_info[0].id
546
583
  self.tbl_version_path.tbl_version.drop_index(idx_id)
547
584
 
548
585
  def update(
@@ -582,8 +619,6 @@ class Table(SchemaObject):
582
619
  if not isinstance(where, exprs.Predicate):
583
620
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
584
621
  analysis_info = Planner.analyze(self.tbl_version_path, where)
585
- if analysis_info.similarity_clause is not None:
586
- raise excs.Error('nearest() cannot be used with update()')
587
622
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
588
623
  if analysis_info.filter is not None:
589
624
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
@@ -674,7 +709,6 @@ class Table(SchemaObject):
674
709
 
675
710
  return update_targets
676
711
 
677
-
678
712
  def revert(self) -> None:
679
713
  """Reverts the table to the previous version.
680
714
 
@@ -685,3 +719,159 @@ class Table(SchemaObject):
685
719
  raise excs.Error('Cannot revert a snapshot')
686
720
  self._check_is_dropped()
687
721
  self.tbl_version_path.tbl_version.revert()
722
+
723
+ def _link(
724
+ self,
725
+ remote: 'pixeltable.datatransfer.Remote',
726
+ col_mapping: Optional[dict[str, str]] = None
727
+ ) -> None:
728
+ """
729
+ Links the specified `Remote` to this table. Once a remote is linked, it can be synchronized with
730
+ this `Table` by calling [`Table.sync()`]. A record of the link
731
+ is stored in table metadata and will persist across sessions.
732
+
733
+ Args:
734
+ remote (pixeltable.datatransfer.Remote): The `Remote` to link to this table.
735
+ col_mapping: An optional mapping of columns from this `Table` to columns in the `Remote`.
736
+ """
737
+ # TODO(aaron-siegel): Refactor `col_mapping`
738
+ self._check_is_dropped()
739
+ if remote in self._get_remotes():
740
+ raise excs.Error(f'That remote is already linked to table `{self.get_name()}`: {remote}')
741
+ push_cols = remote.get_export_columns()
742
+ pull_cols = remote.get_import_columns()
743
+ is_col_mapping_user_specified = col_mapping is not None
744
+ if col_mapping is None:
745
+ # Use the identity mapping by default if `col_mapping` is not specified
746
+ col_mapping = {col: col for col in itertools.chain(push_cols.keys(), pull_cols.keys())}
747
+ self._validate_remote(push_cols, pull_cols, col_mapping, is_col_mapping_user_specified)
748
+ _logger.info(f'Linking remote {remote} to table `{self.get_name()}`.')
749
+ self.tbl_version_path.tbl_version.link(remote, col_mapping)
750
+ print(f'Linked remote {remote} to table `{self.get_name()}`.')
751
+
752
+ def unlink(
753
+ self,
754
+ remotes: Optional['pixeltable.datatransfer.Remote' | list['pixeltable.datatransfer.Remote']] = None,
755
+ *,
756
+ delete_remote_data: bool = False,
757
+ ignore_errors: bool = False
758
+ ) -> None:
759
+ """
760
+ Unlinks this table's `Remote`s.
761
+
762
+ Args:
763
+ remotes: If specified, will unlink only the specified `Remote` or list of `Remote`s. If not specified,
764
+ will unlink all of this table's `Remote`s.
765
+ ignore_errors (bool): If `True`, no exception will be thrown if the specified `Remote` is not linked
766
+ to this table.
767
+ delete_remote_data (bool): If `True`, then the remote data source will also be deleted. WARNING: This
768
+ is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
769
+
770
+ """
771
+ self._check_is_dropped()
772
+ all_remotes = self._get_remotes()
773
+
774
+ if remotes is None:
775
+ remotes = list(all_remotes.keys())
776
+ elif isinstance(remotes, pixeltable.datatransfer.Remote):
777
+ remotes = [remotes]
778
+
779
+ # Validation
780
+ if not ignore_errors:
781
+ for remote in remotes:
782
+ if remote not in all_remotes:
783
+ raise excs.Error(f'Remote {remote} is not linked to table `{self.get_name()}`')
784
+
785
+ for remote in remotes:
786
+ self.tbl_version_path.tbl_version.unlink(remote)
787
+ print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
788
+ if delete_remote_data:
789
+ remote.delete()
790
+
791
+ def _validate_remote(
792
+ self,
793
+ export_cols: dict[str, ts.ColumnType],
794
+ import_cols: dict[str, ts.ColumnType],
795
+ col_mapping: Optional[dict[str, str]],
796
+ is_col_mapping_user_specified: bool
797
+ ):
798
+ # Validate names
799
+ t_cols = self.column_names()
800
+ for t_col, r_col in col_mapping.items():
801
+ if t_col not in t_cols:
802
+ if is_col_mapping_user_specified:
803
+ raise excs.Error(
804
+ f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{self.get_name()}` '
805
+ 'contains no such column.'
806
+ )
807
+ else:
808
+ raise excs.Error(
809
+ f'Column `{t_col}` does not exist in Table `{self.get_name()}`. Either add a column `{t_col}`, '
810
+ f'or specify a `col_mapping` to associate a different column with the remote field `{r_col}`.'
811
+ )
812
+ if r_col not in export_cols and r_col not in import_cols:
813
+ raise excs.Error(
814
+ f'Column name `{r_col}` appears as a value in `col_mapping`, but the remote '
815
+ f'configuration has no column `{r_col}`.'
816
+ )
817
+ # Validate column specs
818
+ t_col_types = self.column_types()
819
+ for t_col, r_col in col_mapping.items():
820
+ t_col_type = t_col_types[t_col]
821
+ if r_col in export_cols:
822
+ # Validate that the table column can be assigned to the remote column
823
+ r_col_type = export_cols[r_col]
824
+ if not r_col_type.is_supertype_of(t_col_type):
825
+ raise excs.Error(
826
+ f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types; expecting `{r_col_type}`)'
827
+ )
828
+ if r_col in import_cols:
829
+ # Validate that the remote column can be assigned to the table column
830
+ if self.tbl_version_path.get_column(t_col).is_computed:
831
+ raise excs.Error(
832
+ f'Column `{t_col}` is a computed column, which cannot be populated from a remote column'
833
+ )
834
+ r_col_type = import_cols[r_col]
835
+ if not t_col_type.is_supertype_of(r_col_type):
836
+ raise excs.Error(
837
+ f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types; expecting `{r_col_type}`)'
838
+ )
839
+
840
+ def _get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
841
+ """
842
+ Gets a `dict` of all `Remote`s linked to this table.
843
+ """
844
+ return self.tbl_version_path.tbl_version.get_remotes()
845
+
846
+ def sync(
847
+ self,
848
+ *,
849
+ export_data: bool = True,
850
+ import_data: bool = True
851
+ ):
852
+ """
853
+ Synchronizes this table with its linked `Remote`s.
854
+
855
+ Args:
856
+ export_data: If `True`, data from this table will be exported to the external store during synchronization.
857
+ import_data: If `True`, data from the external store will be imported to this table during synchronization.
858
+ """
859
+ remotes = self._get_remotes()
860
+ assert len(remotes) <= 1
861
+
862
+ # Validation
863
+ for remote in remotes:
864
+ col_mapping = remotes[remote]
865
+ r_cols = set(col_mapping.values())
866
+ # Validate export/import
867
+ if export_data and not any(col in r_cols for col in remote.get_export_columns()):
868
+ raise excs.Error(
869
+ f'Attempted to sync with export_data=True, but there are no columns to export: {remote}'
870
+ )
871
+ if import_data and not any(col in r_cols for col in remote.get_import_columns()):
872
+ raise excs.Error(
873
+ f'Attempted to sync with import_data=True, but there are no columns to import: {remote}'
874
+ )
875
+
876
+ for remote in remotes:
877
+ remote.sync(self, remotes[remote], export_data=export_data, import_data=import_data)