pixeltable 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (84) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/globals.py +3 -0
  5. pixeltable/catalog/table.py +208 -145
  6. pixeltable/catalog/table_version.py +36 -18
  7. pixeltable/catalog/table_version_path.py +0 -8
  8. pixeltable/catalog/view.py +3 -3
  9. pixeltable/dataframe.py +9 -24
  10. pixeltable/env.py +1 -1
  11. pixeltable/exec/__init__.py +1 -1
  12. pixeltable/exec/aggregation_node.py +22 -15
  13. pixeltable/exec/data_row_batch.py +7 -7
  14. pixeltable/exec/exec_node.py +35 -7
  15. pixeltable/exec/expr_eval_node.py +2 -1
  16. pixeltable/exec/in_memory_data_node.py +9 -9
  17. pixeltable/exec/sql_node.py +265 -136
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/data_row.py +30 -19
  20. pixeltable/exprs/expr.py +15 -14
  21. pixeltable/exprs/expr_dict.py +55 -0
  22. pixeltable/exprs/expr_set.py +21 -15
  23. pixeltable/exprs/function_call.py +21 -8
  24. pixeltable/exprs/rowid_ref.py +2 -2
  25. pixeltable/exprs/sql_element_cache.py +5 -1
  26. pixeltable/ext/functions/whisperx.py +7 -2
  27. pixeltable/func/callable_function.py +2 -2
  28. pixeltable/func/function_registry.py +6 -7
  29. pixeltable/func/query_template_function.py +11 -12
  30. pixeltable/func/signature.py +17 -15
  31. pixeltable/func/udf.py +0 -4
  32. pixeltable/functions/__init__.py +1 -1
  33. pixeltable/functions/audio.py +4 -6
  34. pixeltable/functions/globals.py +86 -42
  35. pixeltable/functions/huggingface.py +12 -14
  36. pixeltable/functions/image.py +59 -45
  37. pixeltable/functions/json.py +0 -1
  38. pixeltable/functions/mistralai.py +2 -2
  39. pixeltable/functions/openai.py +22 -25
  40. pixeltable/functions/string.py +50 -50
  41. pixeltable/functions/timestamp.py +20 -20
  42. pixeltable/functions/together.py +2 -2
  43. pixeltable/functions/video.py +11 -20
  44. pixeltable/functions/whisper.py +2 -20
  45. pixeltable/globals.py +55 -56
  46. pixeltable/index/base.py +2 -2
  47. pixeltable/index/btree.py +7 -7
  48. pixeltable/index/embedding_index.py +8 -10
  49. pixeltable/io/external_store.py +11 -5
  50. pixeltable/io/globals.py +2 -0
  51. pixeltable/io/hf_datasets.py +1 -1
  52. pixeltable/io/label_studio.py +6 -6
  53. pixeltable/io/parquet.py +14 -13
  54. pixeltable/iterators/document.py +9 -7
  55. pixeltable/iterators/video.py +10 -1
  56. pixeltable/metadata/__init__.py +3 -2
  57. pixeltable/metadata/converters/convert_14.py +4 -2
  58. pixeltable/metadata/converters/convert_15.py +1 -1
  59. pixeltable/metadata/converters/convert_19.py +1 -0
  60. pixeltable/metadata/converters/convert_20.py +1 -1
  61. pixeltable/metadata/converters/util.py +9 -8
  62. pixeltable/metadata/schema.py +32 -21
  63. pixeltable/plan.py +136 -154
  64. pixeltable/store.py +51 -36
  65. pixeltable/tool/create_test_db_dump.py +6 -6
  66. pixeltable/tool/doc_plugins/griffe.py +3 -34
  67. pixeltable/tool/mypy_plugin.py +32 -0
  68. pixeltable/type_system.py +243 -60
  69. pixeltable/utils/arrow.py +10 -9
  70. pixeltable/utils/coco.py +4 -4
  71. pixeltable/utils/documents.py +1 -1
  72. pixeltable/utils/filecache.py +9 -9
  73. pixeltable/utils/formatter.py +1 -1
  74. pixeltable/utils/http_server.py +2 -5
  75. pixeltable/utils/media_store.py +6 -6
  76. pixeltable/utils/pytorch.py +10 -11
  77. pixeltable/utils/sql.py +2 -1
  78. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/METADATA +6 -5
  79. pixeltable-0.2.21.dist-info/RECORD +148 -0
  80. pixeltable/utils/help.py +0 -11
  81. pixeltable-0.2.20.dist-info/RECORD +0 -147
  82. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
  83. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
  84. {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
@@ -15,12 +15,12 @@ from typing import Optional
15
15
 
16
16
  import sqlalchemy as sql
17
17
 
18
+ import pixeltable as pxt
18
19
  from pixeltable.env import Env
19
- import pixeltable.func as func
20
20
  from pixeltable.utils.code import local_public_names
21
21
 
22
22
 
23
- @func.udf(is_property=True)
23
+ @pxt.udf(is_property=True)
24
24
  def year(self: datetime) -> int:
25
25
  """
26
26
  Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
@@ -36,7 +36,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
36
36
  return sql.extract('year', self)
37
37
 
38
38
 
39
- @func.udf(is_property=True)
39
+ @pxt.udf(is_property=True)
40
40
  def month(self: datetime) -> int:
41
41
  """
42
42
  Between 1 and 12 inclusive.
@@ -51,7 +51,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
51
51
  return sql.extract('month', self)
52
52
 
53
53
 
54
- @func.udf(is_property=True)
54
+ @pxt.udf(is_property=True)
55
55
  def day(self: datetime) -> int:
56
56
  """
57
57
  Between 1 and the number of days in the given month of the given year.
@@ -66,7 +66,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
66
66
  return sql.extract('day', self)
67
67
 
68
68
 
69
- @func.udf(is_property=True)
69
+ @pxt.udf(is_property=True)
70
70
  def hour(self: datetime) -> int:
71
71
  """
72
72
  Between 0 and 23 inclusive.
@@ -81,7 +81,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
81
81
  return sql.extract('hour', self)
82
82
 
83
83
 
84
- @func.udf(is_property=True)
84
+ @pxt.udf(is_property=True)
85
85
  def minute(self: datetime) -> int:
86
86
  """
87
87
  Between 0 and 59 inclusive.
@@ -96,7 +96,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
96
96
  return sql.extract('minute', self)
97
97
 
98
98
 
99
- @func.udf(is_property=True)
99
+ @pxt.udf(is_property=True)
100
100
  def second(self: datetime) -> int:
101
101
  """
102
102
  Between 0 and 59 inclusive.
@@ -111,7 +111,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
111
111
  return sql.extract('second', self)
112
112
 
113
113
 
114
- @func.udf(is_property=True)
114
+ @pxt.udf(is_property=True)
115
115
  def microsecond(self: datetime) -> int:
116
116
  """
117
117
  Between 0 and 999999 inclusive.
@@ -126,7 +126,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
126
126
  return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
127
127
 
128
128
 
129
- @func.udf(is_method=True)
129
+ @pxt.udf(is_method=True)
130
130
  def astimezone(self: datetime, tz: str) -> datetime:
131
131
  """
132
132
  Convert the datetime to the given time zone.
@@ -139,7 +139,7 @@ def astimezone(self: datetime, tz: str) -> datetime:
139
139
  return self.astimezone(tzinfo)
140
140
 
141
141
 
142
- @func.udf(is_method=True)
142
+ @pxt.udf(is_method=True)
143
143
  def weekday(self: datetime) -> int:
144
144
  """
145
145
  Between 0 (Monday) and 6 (Sunday) inclusive.
@@ -154,7 +154,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
154
154
  return sql.extract('isodow', self) - 1
155
155
 
156
156
 
157
- @func.udf(is_method=True)
157
+ @pxt.udf(is_method=True)
158
158
  def isoweekday(self: datetime) -> int:
159
159
  """
160
160
  Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
@@ -169,7 +169,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
169
169
  return sql.extract('isodow', self)
170
170
 
171
171
 
172
- @func.udf(is_method=True)
172
+ @pxt.udf(is_method=True)
173
173
  def isocalendar(self: datetime) -> dict:
174
174
  """
175
175
  Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
@@ -181,7 +181,7 @@ def isocalendar(self: datetime) -> dict:
181
181
  return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
182
182
 
183
183
 
184
- @func.udf(is_method=True)
184
+ @pxt.udf(is_method=True)
185
185
  def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
186
186
  """
187
187
  Return a string representing the date and time in ISO 8601 format.
@@ -195,7 +195,7 @@ def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
195
195
  return self.isoformat(sep=sep, timespec=timespec)
196
196
 
197
197
 
198
- @func.udf(is_method=True)
198
+ @pxt.udf(is_method=True)
199
199
  def strftime(self: datetime, format: str) -> str:
200
200
  """
201
201
  Return a string representing the date and time, controlled by an explicit format string.
@@ -208,7 +208,7 @@ def strftime(self: datetime, format: str) -> str:
208
208
  return self.strftime(format)
209
209
 
210
210
 
211
- @func.udf(is_method=True)
211
+ @pxt.udf(is_method=True)
212
212
  def make_timestamp(
213
213
  year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
214
214
  ) -> datetime:
@@ -234,7 +234,7 @@ def _(
234
234
  sql.cast(minute, sql.Integer),
235
235
  sql.cast(second + microsecond / 1000000.0, sql.Double))
236
236
 
237
- # @func.udf
237
+ # @pxt.udf
238
238
  # def date(self: datetime) -> datetime:
239
239
  # """
240
240
  # Return the date part of the datetime.
@@ -245,7 +245,7 @@ def _(
245
245
  # return datetime(d.year, d.month, d.day)
246
246
  #
247
247
  #
248
- # @func.udf
248
+ # @pxt.udf
249
249
  # def time(self: datetime) -> datetime:
250
250
  # """
251
251
  # Return the time part of the datetime, with microseconds set to 0.
@@ -256,7 +256,7 @@ def _(
256
256
  # return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
257
257
 
258
258
 
259
- @func.udf(is_method=True)
259
+ @pxt.udf(is_method=True)
260
260
  def replace(
261
261
  self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
262
262
  hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
@@ -271,7 +271,7 @@ def replace(
271
271
  return self.replace(**kwargs)
272
272
 
273
273
 
274
- @func.udf(is_method=True)
274
+ @pxt.udf(is_method=True)
275
275
  def toordinal(self: datetime) -> int:
276
276
  """
277
277
  Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
@@ -281,7 +281,7 @@ def toordinal(self: datetime) -> int:
281
281
  return self.toordinal()
282
282
 
283
283
 
284
- @func.udf(is_method=True)
284
+ @pxt.udf(is_method=True)
285
285
  def posix_timestamp(self: datetime) -> float:
286
286
  """
287
287
  Return POSIX timestamp corresponding to the datetime instance.
@@ -185,8 +185,8 @@ _embedding_dimensions_cache = {
185
185
  }
186
186
 
187
187
 
188
- @pxt.udf(batch_size=32, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
189
- def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
188
+ @pxt.udf(batch_size=32)
189
+ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
190
190
  """
191
191
  Query an embedding model for a given string of text.
192
192
 
@@ -20,9 +20,8 @@ import av # type: ignore[import-untyped]
20
20
  import numpy as np
21
21
  import PIL.Image
22
22
 
23
+ import pixeltable as pxt
23
24
  import pixeltable.env as env
24
- import pixeltable.func as func
25
- import pixeltable.type_system as ts
26
25
  from pixeltable.utils.code import local_public_names
27
26
 
28
27
  _format_defaults = { # format -> (codec, ext)
@@ -48,14 +47,14 @@ _format_defaults = { # format -> (codec, ext)
48
47
  # output_container.mux(packet)
49
48
 
50
49
 
51
- @func.uda(
52
- init_types=[ts.IntType()],
53
- update_types=[ts.ImageType()],
54
- value_type=ts.VideoType(),
50
+ @pxt.uda(
51
+ init_types=[pxt.IntType()],
52
+ update_types=[pxt.ImageType()],
53
+ value_type=pxt.VideoType(),
55
54
  requires_order_by=True,
56
55
  allows_window=False,
57
56
  )
58
- class make_video(func.Aggregator):
57
+ class make_video(pxt.Aggregator):
59
58
  """
60
59
  Aggregator that creates a video from a sequence of images.
61
60
  """
@@ -88,18 +87,10 @@ class make_video(func.Aggregator):
88
87
  return str(self.out_file)
89
88
 
90
89
 
91
- _extract_audio_param_types = [
92
- ts.VideoType(nullable=False),
93
- ts.IntType(nullable=False),
94
- ts.StringType(nullable=False),
95
- ts.StringType(nullable=True),
96
- ]
97
-
98
-
99
- @func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
90
+ @pxt.udf(is_method=True)
100
91
  def extract_audio(
101
- video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
102
- ) -> Optional[str]:
92
+ video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
93
+ ) -> pxt.Audio:
103
94
  """
104
95
  Extract an audio stream from a video file, save it as a media file and return its path.
105
96
 
@@ -128,8 +119,8 @@ def extract_audio(
128
119
  return output_filename
129
120
 
130
121
 
131
- @func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)], is_method=True)
132
- def get_metadata(video: str) -> dict:
122
+ @pxt.udf(is_method=True)
123
+ def get_metadata(video: pxt.Video) -> dict:
133
124
  """
134
125
  Gets various metadata associated with a video file and returns it as a dictionary.
135
126
  """
@@ -14,27 +14,9 @@ from pixeltable.env import Env
14
14
  if TYPE_CHECKING:
15
15
  from whisper import Whisper # type: ignore[import-untyped]
16
16
 
17
-
18
- @pxt.udf(
19
- param_types=[
20
- pxt.AudioType(),
21
- pxt.StringType(),
22
- pxt.JsonType(nullable=True),
23
- pxt.FloatType(nullable=True),
24
- pxt.FloatType(nullable=True),
25
- pxt.FloatType(nullable=True),
26
- pxt.BoolType(),
27
- pxt.StringType(nullable=True),
28
- pxt.BoolType(),
29
- pxt.StringType(),
30
- pxt.StringType(),
31
- pxt.StringType(),
32
- pxt.FloatType(nullable=True),
33
- pxt.JsonType(nullable=True),
34
- ]
35
- )
17
+ @pxt.udf
36
18
  def transcribe(
37
- audio: str,
19
+ audio: pxt.Audio,
38
20
  *,
39
21
  model: str,
40
22
  temperature: Optional[list[float]] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
pixeltable/globals.py CHANGED
@@ -54,11 +54,13 @@ def create_table(
54
54
  Examples:
55
55
  Create a table with an int and a string column:
56
56
 
57
- >>> table = pxt.create_table('my_table', schema={'col1': IntType(), 'col2': StringType()})
57
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String})
58
58
 
59
- Create a table from a select statement over an existing table `tbl`:
59
+ Create a table from a select statement over an existing table `orig_table` (this will create a new table
60
+ containing the exact contents of the query):
60
61
 
61
- >>> table = pxt.create_table('my_table', tbl.where(tbl.col1 < 10).select(tbl.col2))
62
+ >>> tbl1 = pxt.get_table('orig_table')
63
+ ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
62
64
  """
63
65
  path = catalog.Path(path_str)
64
66
  Catalog.get().paths.check_is_valid(path, expected=None)
@@ -105,8 +107,7 @@ def create_view(
105
107
  path_str: str,
106
108
  base: Union[catalog.Table, DataFrame],
107
109
  *,
108
- schema: Optional[dict[str, Any]] = None,
109
- filter: Optional[exprs.Expr] = None,
110
+ additional_columns: Optional[dict[str, Any]] = None,
110
111
  is_snapshot: bool = False,
111
112
  iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
112
113
  num_retained_versions: int = 10,
@@ -116,11 +117,13 @@ def create_view(
116
117
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
117
118
 
118
119
  Args:
119
- path_str: Path to the view.
120
+ path_str: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
121
+ `dir1.my_view`.
120
122
  base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
121
123
  base the view on.
122
- schema: dictionary mapping column names to column types, value expressions, or to column specifications.
123
- filter: predicate to filter rows of the base table.
124
+ additional_columns: If specified, will add these columns to the view once it is created. The format
125
+ of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
126
+ [`create_table`][pixeltable.create_table].
124
127
  is_snapshot: Whether the view is a snapshot.
125
128
  iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
126
129
  the base table.
@@ -130,36 +133,29 @@ def create_view(
130
133
 
131
134
  Returns:
132
135
  A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
133
- exists or is invalid and `ignore_errors=True`, returns `None`.
136
+ exists or is invalid and `ignore_errors=True`, returns `None`.
134
137
 
135
138
  Raises:
136
139
  Error: if the path already exists or is invalid and `ignore_errors=False`.
137
140
 
138
141
  Examples:
139
- Create a view with an additional int and a string column and a filter:
142
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
140
143
 
141
- >>> view = cl.create_view(
142
- 'my_view', base, schema={'col3': IntType(), 'col4': StringType()}, filter=base.col1 > 10)
144
+ >>> tbl = pxt.get_table('my_table')
145
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
143
146
 
144
- Create a table snapshot:
147
+ Create a snapshot of `my_table`:
145
148
 
146
- >>> snapshot_view = cl.create_view('my_snapshot_view', base, is_snapshot=True)
147
-
148
- Create an immutable view with additional computed columns and a filter:
149
-
150
- >>> snapshot_view = cl.create_view(
151
- 'my_snapshot', base, schema={'col3': base.col2 + 1}, filter=base.col1 > 10, is_snapshot=True)
149
+ >>> tbl = pxt.get_table('my_table')
150
+ ... snapshot_view = pxt.create_view('my_snapshot_view', tbl, is_snapshot=True)
152
151
  """
152
+ where: Optional[exprs.Expr] = None
153
153
  if isinstance(base, catalog.Table):
154
154
  tbl_version_path = base._tbl_version_path
155
155
  elif isinstance(base, DataFrame):
156
156
  base._validate_mutable('create_view')
157
157
  tbl_version_path = base.tbl
158
- if base.where_clause is not None and filter is not None:
159
- raise excs.Error(
160
- 'Cannot specify a `filter` directly if one is already declared in a `DataFrame.where` clause'
161
- )
162
- filter = base.where_clause
158
+ where = base.where_clause
163
159
  else:
164
160
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
165
161
  assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
@@ -173,8 +169,8 @@ def create_view(
173
169
  raise e
174
170
  dir = Catalog.get().paths[path.parent]
175
171
 
176
- if schema is None:
177
- schema = {}
172
+ if additional_columns is None:
173
+ additional_columns = {}
178
174
  if iterator is None:
179
175
  iterator_class, iterator_args = None, None
180
176
  else:
@@ -184,8 +180,8 @@ def create_view(
184
180
  dir._id,
185
181
  path.name,
186
182
  base=tbl_version_path,
187
- schema=schema,
188
- predicate=filter,
183
+ additional_columns=additional_columns,
184
+ predicate=where,
189
185
  is_snapshot=is_snapshot,
190
186
  iterator_cls=iterator_class,
191
187
  iterator_args=iterator_args,
@@ -199,7 +195,7 @@ def create_view(
199
195
 
200
196
 
201
197
  def get_table(path: str) -> catalog.Table:
202
- """Get a handle to an existing table or view or snapshot.
198
+ """Get a handle to an existing table, view, or snapshot.
203
199
 
204
200
  Args:
205
201
  path: Path to the table.
@@ -213,15 +209,15 @@ def get_table(path: str) -> catalog.Table:
213
209
  Examples:
214
210
  Get handle for a table in the top-level directory:
215
211
 
216
- >>> table = cl.get_table('my_table')
212
+ >>> tbl = pxt.get_table('my_table')
217
213
 
218
214
  For a table in a subdirectory:
219
215
 
220
- >>> table = cl.get_table('subdir.my_table')
216
+ >>> tbl = pxt.get_table('subdir.my_table')
221
217
 
222
- For a snapshot in the top-level directory:
218
+ Handles to views and snapshots are retrieved in the same way:
223
219
 
224
- >>> table = cl.get_table('my_snapshot')
220
+ >>> tbl = pxt.get_table('my_snapshot')
225
221
  """
226
222
  p = catalog.Path(path)
227
223
  Catalog.get().paths.check_is_valid(p, expected=catalog.Table)
@@ -243,11 +239,11 @@ def move(path: str, new_path: str) -> None:
243
239
  Examples:
244
240
  Move a table to a different directory:
245
241
 
246
- >>>> cl.move('dir1.my_table', 'dir2.my_table')
242
+ >>>> pxt.move('dir1.my_table', 'dir2.my_table')
247
243
 
248
244
  Rename a table:
249
245
 
250
- >>>> cl.move('dir1.my_table', 'dir1.new_name')
246
+ >>>> pxt.move('dir1.my_table', 'dir1.new_name')
251
247
  """
252
248
  p = catalog.Path(path)
253
249
  Catalog.get().paths.check_is_valid(p, expected=catalog.SchemaObject)
@@ -260,18 +256,18 @@ def move(path: str, new_path: str) -> None:
260
256
 
261
257
 
262
258
  def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
263
- """Drop a table or view or snapshot.
259
+ """Drop a table, view, or snapshot.
264
260
 
265
261
  Args:
266
262
  path: Path to the [`Table`][pixeltable.Table].
267
- force: If `True`, will also drop all views or sub-views of this table.
268
- ignore_errors: Whether to ignore errors if the table does not exist.
263
+ force: If `True`, will also drop all views and sub-views of this table.
264
+ ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
269
265
 
270
266
  Raises:
271
- Error: If the path does not exist or does not designate a table object and ignore_errors is False.
267
+ Error: If the path does not exist or does not designate a table object, and `ignore_errors=False`.
272
268
 
273
269
  Examples:
274
- >>> cl.drop_table('my_table')
270
+ >>> pxt.drop_table('my_table')
275
271
  """
276
272
  cat = Catalog.get()
277
273
  path_obj = catalog.Path(path)
@@ -302,7 +298,8 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
302
298
 
303
299
  Args:
304
300
  dir_path: Path to the directory. Defaults to the root directory.
305
- recursive: Whether to list tables in subdirectories as well.
301
+ recursive: If `False`, returns only those tables that are directly contained in specified directory; if
302
+ `True`, returns all tables that are descendants of the specified directory, recursively.
306
303
 
307
304
  Returns:
308
305
  A list of [`Table`][pixeltable.Table] paths.
@@ -313,13 +310,11 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
313
310
  Examples:
314
311
  List tables in top-level directory:
315
312
 
316
- >>> cl.list_tables()
317
- ['my_table', ...]
313
+ >>> pxt.list_tables()
318
314
 
319
315
  List tables in 'dir1':
320
316
 
321
- >>> cl.list_tables('dir1')
322
- [...]
317
+ >>> pxt.list_tables('dir1')
323
318
  """
324
319
  assert dir_path is not None
325
320
  path = catalog.Path(dir_path, empty_is_valid=True)
@@ -332,17 +327,17 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
332
327
 
333
328
  Args:
334
329
  path_str: Path to the directory.
335
- ignore_errors: if True, silently returns on error
330
+ ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
336
331
 
337
332
  Raises:
338
- Error: If the path already exists or the parent is not a directory.
333
+ Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
339
334
 
340
335
  Examples:
341
- >>> cl.create_dir('my_dir')
336
+ >>> pxt.create_dir('my_dir')
342
337
 
343
338
  Create a subdirectory:
344
339
 
345
- >>> cl.create_dir('my_dir.sub_dir')
340
+ >>> pxt.create_dir('my_dir.sub_dir')
346
341
  """
347
342
  try:
348
343
  path = catalog.Path(path_str)
@@ -373,17 +368,21 @@ def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) ->
373
368
  """Remove a directory.
374
369
 
375
370
  Args:
376
- path_str: Path to the directory.
371
+ path_str: Name or path of the directory.
372
+ force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
373
+ with any views or snapshots that depend on any of the dropped tables.
374
+ ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
375
+ does not exist.
377
376
 
378
377
  Raises:
379
- Error: If the path does not exist or does not designate a directory or if the directory is not empty.
378
+ Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
380
379
 
381
380
  Examples:
382
- >>> cl.drop_dir('my_dir')
381
+ >>> pxt.drop_dir('my_dir')
383
382
 
384
383
  Remove a subdirectory:
385
384
 
386
- >>> cl.drop_dir('my_dir.sub_dir')
385
+ >>> pxt.drop_dir('my_dir.sub_dir')
387
386
  """
388
387
  cat = Catalog.get()
389
388
  path = catalog.Path(path_str)
@@ -428,14 +427,14 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
428
427
  """List the directories in a directory.
429
428
 
430
429
  Args:
431
- path_str: Path to the directory.
432
- recursive: Whether to list subdirectories recursively.
430
+ path_str: Name or path of the directory.
431
+ recursive: If `True`, lists all descendants of this directory recursively.
433
432
 
434
433
  Returns:
435
434
  List of directory paths.
436
435
 
437
436
  Raises:
438
- Error: If the path does not exist or does not designate a directory.
437
+ Error: If `path_str` does not exist or does not designate a directory.
439
438
 
440
439
  Examples:
441
440
  >>> cl.list_dirs('my_dir', recursive=True)
pixeltable/index/base.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable.catalog as catalog
8
+ from pixeltable import catalog, exprs
9
9
 
10
10
 
11
11
  class IndexBase(abc.ABC):
@@ -22,7 +22,7 @@ class IndexBase(abc.ABC):
22
22
  pass
23
23
 
24
24
  @abc.abstractmethod
25
- def index_value_expr(self) -> 'pixeltable.exprs.Expr':
25
+ def index_value_expr(self) -> exprs.Expr:
26
26
  """Return expression that computes the value that goes into the index"""
27
27
  pass
28
28
 
pixeltable/index/btree.py CHANGED
@@ -2,10 +2,10 @@ from typing import Optional
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
- # TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
6
- #import pixeltable.catalog as catalog
7
5
  import pixeltable.exceptions as excs
8
- import pixeltable.func as func
6
+ from pixeltable import catalog, exprs
7
+ from pixeltable.func.udf import udf
8
+
9
9
  from .base import IndexBase
10
10
 
11
11
 
@@ -15,7 +15,8 @@ class BtreeIndex(IndexBase):
15
15
  """
16
16
  MAX_STRING_LEN = 256
17
17
 
18
- @func.udf
18
+ @staticmethod
19
+ @udf
19
20
  def str_filter(s: Optional[str]) -> Optional[str]:
20
21
  if s is None:
21
22
  return None
@@ -24,10 +25,9 @@ class BtreeIndex(IndexBase):
24
25
  def __init__(self, c: 'catalog.Column'):
25
26
  if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
26
27
  raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
27
- from pixeltable.exprs import ColumnRef
28
- self.value_expr = self.str_filter(ColumnRef(c)) if c.col_type.is_string_type() else ColumnRef(c)
28
+ self.value_expr = BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
29
29
 
30
- def index_value_expr(self) -> 'pixeltable.exprs.Expr':
30
+ def index_value_expr(self) -> 'exprs.Expr':
31
31
  return self.value_expr
32
32
 
33
33
  def records_value_errors(self) -> bool:
@@ -1,18 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, Any
4
3
  import enum
4
+ from typing import Any, Optional
5
5
 
6
- import PIL.Image
7
6
  import numpy as np
8
- import pgvector.sqlalchemy
7
+ import pgvector.sqlalchemy # type: ignore[import-untyped]
9
8
  import PIL.Image
10
9
  import sqlalchemy as sql
11
10
 
12
- import pixeltable.catalog as catalog
13
11
  import pixeltable.exceptions as excs
14
- import pixeltable.func as func
15
12
  import pixeltable.type_system as ts
13
+ from pixeltable import catalog, exprs, func
14
+
16
15
  from .base import IndexBase
17
16
 
18
17
 
@@ -58,16 +57,15 @@ class EmbeddingIndex(IndexBase):
58
57
  self._validate_embedding_fn(image_embed, 'image_embed', ts.ColumnType.Type.IMAGE)
59
58
 
60
59
  self.metric = self.Metric[metric.upper()]
61
- from pixeltable.exprs import ColumnRef
62
- self.value_expr = string_embed(ColumnRef(c)) if c.col_type.is_string_type() else image_embed(ColumnRef(c))
63
- assert self.value_expr.col_type.is_array_type()
60
+ self.value_expr = string_embed(exprs.ColumnRef(c)) if c.col_type.is_string_type() else image_embed(exprs.ColumnRef(c))
61
+ assert isinstance(self.value_expr.col_type, ts.ArrayType)
64
62
  self.string_embed = string_embed
65
63
  self.image_embed = image_embed
66
64
  vector_size = self.value_expr.col_type.shape[0]
67
65
  assert vector_size is not None
68
66
  self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
69
67
 
70
- def index_value_expr(self) -> 'pixeltable.exprs.Expr':
68
+ def index_value_expr(self) -> exprs.Expr:
71
69
  """Return expression that computes the value that goes into the index"""
72
70
  return self.value_expr
73
71
 
@@ -151,7 +149,7 @@ class EmbeddingIndex(IndexBase):
151
149
  img = PIL.Image.new('RGB', (512, 512))
152
150
  return_type = embed_fn.call_return_type({param_name: img})
153
151
  assert return_type is not None
154
- if not return_type.is_array_type():
152
+ if not isinstance(return_type, ts.ArrayType):
155
153
  raise excs.Error(f'{name} must return an array, but returns {return_type}')
156
154
  else:
157
155
  shape = return_type.shape