pixeltable 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (88) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/globals.py +3 -0
  5. pixeltable/catalog/insertable_table.py +9 -7
  6. pixeltable/catalog/table.py +220 -143
  7. pixeltable/catalog/table_version.py +36 -18
  8. pixeltable/catalog/table_version_path.py +0 -8
  9. pixeltable/catalog/view.py +3 -3
  10. pixeltable/dataframe.py +9 -24
  11. pixeltable/env.py +107 -36
  12. pixeltable/exceptions.py +7 -4
  13. pixeltable/exec/__init__.py +1 -1
  14. pixeltable/exec/aggregation_node.py +22 -15
  15. pixeltable/exec/component_iteration_node.py +62 -41
  16. pixeltable/exec/data_row_batch.py +7 -7
  17. pixeltable/exec/exec_node.py +35 -7
  18. pixeltable/exec/expr_eval_node.py +2 -1
  19. pixeltable/exec/in_memory_data_node.py +9 -9
  20. pixeltable/exec/sql_node.py +265 -136
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/data_row.py +30 -19
  23. pixeltable/exprs/expr.py +15 -14
  24. pixeltable/exprs/expr_dict.py +55 -0
  25. pixeltable/exprs/expr_set.py +21 -15
  26. pixeltable/exprs/function_call.py +21 -8
  27. pixeltable/exprs/json_path.py +3 -6
  28. pixeltable/exprs/rowid_ref.py +2 -2
  29. pixeltable/exprs/sql_element_cache.py +5 -1
  30. pixeltable/ext/functions/whisperx.py +7 -2
  31. pixeltable/func/callable_function.py +2 -2
  32. pixeltable/func/function_registry.py +6 -7
  33. pixeltable/func/query_template_function.py +11 -12
  34. pixeltable/func/signature.py +17 -15
  35. pixeltable/func/udf.py +0 -4
  36. pixeltable/functions/__init__.py +1 -1
  37. pixeltable/functions/audio.py +4 -6
  38. pixeltable/functions/globals.py +86 -42
  39. pixeltable/functions/huggingface.py +12 -14
  40. pixeltable/functions/image.py +59 -45
  41. pixeltable/functions/json.py +0 -1
  42. pixeltable/functions/mistralai.py +2 -2
  43. pixeltable/functions/openai.py +22 -25
  44. pixeltable/functions/string.py +50 -50
  45. pixeltable/functions/timestamp.py +20 -20
  46. pixeltable/functions/together.py +26 -12
  47. pixeltable/functions/video.py +11 -20
  48. pixeltable/functions/whisper.py +2 -20
  49. pixeltable/globals.py +57 -56
  50. pixeltable/index/base.py +2 -2
  51. pixeltable/index/btree.py +7 -7
  52. pixeltable/index/embedding_index.py +8 -10
  53. pixeltable/io/external_store.py +11 -5
  54. pixeltable/io/globals.py +3 -1
  55. pixeltable/io/hf_datasets.py +4 -4
  56. pixeltable/io/label_studio.py +6 -6
  57. pixeltable/io/parquet.py +14 -13
  58. pixeltable/iterators/document.py +10 -8
  59. pixeltable/iterators/video.py +10 -1
  60. pixeltable/metadata/__init__.py +3 -2
  61. pixeltable/metadata/converters/convert_14.py +4 -2
  62. pixeltable/metadata/converters/convert_15.py +1 -1
  63. pixeltable/metadata/converters/convert_19.py +1 -0
  64. pixeltable/metadata/converters/convert_20.py +1 -1
  65. pixeltable/metadata/converters/util.py +9 -8
  66. pixeltable/metadata/schema.py +32 -21
  67. pixeltable/plan.py +136 -154
  68. pixeltable/store.py +51 -36
  69. pixeltable/tool/create_test_db_dump.py +7 -7
  70. pixeltable/tool/doc_plugins/griffe.py +3 -34
  71. pixeltable/tool/mypy_plugin.py +32 -0
  72. pixeltable/type_system.py +243 -60
  73. pixeltable/utils/arrow.py +10 -9
  74. pixeltable/utils/coco.py +4 -4
  75. pixeltable/utils/documents.py +1 -1
  76. pixeltable/utils/filecache.py +131 -84
  77. pixeltable/utils/formatter.py +1 -1
  78. pixeltable/utils/http_server.py +2 -5
  79. pixeltable/utils/media_store.py +6 -6
  80. pixeltable/utils/pytorch.py +10 -11
  81. pixeltable/utils/sql.py +2 -1
  82. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/METADATA +16 -7
  83. pixeltable-0.2.21.dist-info/RECORD +148 -0
  84. pixeltable/utils/help.py +0 -11
  85. pixeltable-0.2.19.dist-info/RECORD +0 -147
  86. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
  87. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
  88. {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
@@ -15,12 +15,12 @@ from typing import Optional
15
15
 
16
16
  import sqlalchemy as sql
17
17
 
18
+ import pixeltable as pxt
18
19
  from pixeltable.env import Env
19
- import pixeltable.func as func
20
20
  from pixeltable.utils.code import local_public_names
21
21
 
22
22
 
23
- @func.udf(is_property=True)
23
+ @pxt.udf(is_property=True)
24
24
  def year(self: datetime) -> int:
25
25
  """
26
26
  Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
@@ -36,7 +36,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
36
36
  return sql.extract('year', self)
37
37
 
38
38
 
39
- @func.udf(is_property=True)
39
+ @pxt.udf(is_property=True)
40
40
  def month(self: datetime) -> int:
41
41
  """
42
42
  Between 1 and 12 inclusive.
@@ -51,7 +51,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
51
51
  return sql.extract('month', self)
52
52
 
53
53
 
54
- @func.udf(is_property=True)
54
+ @pxt.udf(is_property=True)
55
55
  def day(self: datetime) -> int:
56
56
  """
57
57
  Between 1 and the number of days in the given month of the given year.
@@ -66,7 +66,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
66
66
  return sql.extract('day', self)
67
67
 
68
68
 
69
- @func.udf(is_property=True)
69
+ @pxt.udf(is_property=True)
70
70
  def hour(self: datetime) -> int:
71
71
  """
72
72
  Between 0 and 23 inclusive.
@@ -81,7 +81,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
81
81
  return sql.extract('hour', self)
82
82
 
83
83
 
84
- @func.udf(is_property=True)
84
+ @pxt.udf(is_property=True)
85
85
  def minute(self: datetime) -> int:
86
86
  """
87
87
  Between 0 and 59 inclusive.
@@ -96,7 +96,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
96
96
  return sql.extract('minute', self)
97
97
 
98
98
 
99
- @func.udf(is_property=True)
99
+ @pxt.udf(is_property=True)
100
100
  def second(self: datetime) -> int:
101
101
  """
102
102
  Between 0 and 59 inclusive.
@@ -111,7 +111,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
111
111
  return sql.extract('second', self)
112
112
 
113
113
 
114
- @func.udf(is_property=True)
114
+ @pxt.udf(is_property=True)
115
115
  def microsecond(self: datetime) -> int:
116
116
  """
117
117
  Between 0 and 999999 inclusive.
@@ -126,7 +126,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
126
126
  return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
127
127
 
128
128
 
129
- @func.udf(is_method=True)
129
+ @pxt.udf(is_method=True)
130
130
  def astimezone(self: datetime, tz: str) -> datetime:
131
131
  """
132
132
  Convert the datetime to the given time zone.
@@ -139,7 +139,7 @@ def astimezone(self: datetime, tz: str) -> datetime:
139
139
  return self.astimezone(tzinfo)
140
140
 
141
141
 
142
- @func.udf(is_method=True)
142
+ @pxt.udf(is_method=True)
143
143
  def weekday(self: datetime) -> int:
144
144
  """
145
145
  Between 0 (Monday) and 6 (Sunday) inclusive.
@@ -154,7 +154,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
154
154
  return sql.extract('isodow', self) - 1
155
155
 
156
156
 
157
- @func.udf(is_method=True)
157
+ @pxt.udf(is_method=True)
158
158
  def isoweekday(self: datetime) -> int:
159
159
  """
160
160
  Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
@@ -169,7 +169,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
169
169
  return sql.extract('isodow', self)
170
170
 
171
171
 
172
- @func.udf(is_method=True)
172
+ @pxt.udf(is_method=True)
173
173
  def isocalendar(self: datetime) -> dict:
174
174
  """
175
175
  Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
@@ -181,7 +181,7 @@ def isocalendar(self: datetime) -> dict:
181
181
  return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
182
182
 
183
183
 
184
- @func.udf(is_method=True)
184
+ @pxt.udf(is_method=True)
185
185
  def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
186
186
  """
187
187
  Return a string representing the date and time in ISO 8601 format.
@@ -195,7 +195,7 @@ def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
195
195
  return self.isoformat(sep=sep, timespec=timespec)
196
196
 
197
197
 
198
- @func.udf(is_method=True)
198
+ @pxt.udf(is_method=True)
199
199
  def strftime(self: datetime, format: str) -> str:
200
200
  """
201
201
  Return a string representing the date and time, controlled by an explicit format string.
@@ -208,7 +208,7 @@ def strftime(self: datetime, format: str) -> str:
208
208
  return self.strftime(format)
209
209
 
210
210
 
211
- @func.udf(is_method=True)
211
+ @pxt.udf(is_method=True)
212
212
  def make_timestamp(
213
213
  year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
214
214
  ) -> datetime:
@@ -234,7 +234,7 @@ def _(
234
234
  sql.cast(minute, sql.Integer),
235
235
  sql.cast(second + microsecond / 1000000.0, sql.Double))
236
236
 
237
- # @func.udf
237
+ # @pxt.udf
238
238
  # def date(self: datetime) -> datetime:
239
239
  # """
240
240
  # Return the date part of the datetime.
@@ -245,7 +245,7 @@ def _(
245
245
  # return datetime(d.year, d.month, d.day)
246
246
  #
247
247
  #
248
- # @func.udf
248
+ # @pxt.udf
249
249
  # def time(self: datetime) -> datetime:
250
250
  # """
251
251
  # Return the time part of the datetime, with microseconds set to 0.
@@ -256,7 +256,7 @@ def _(
256
256
  # return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
257
257
 
258
258
 
259
- @func.udf(is_method=True)
259
+ @pxt.udf(is_method=True)
260
260
  def replace(
261
261
  self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
262
262
  hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
@@ -271,7 +271,7 @@ def replace(
271
271
  return self.replace(**kwargs)
272
272
 
273
273
 
274
- @func.udf(is_method=True)
274
+ @pxt.udf(is_method=True)
275
275
  def toordinal(self: datetime) -> int:
276
276
  """
277
277
  Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
@@ -281,7 +281,7 @@ def toordinal(self: datetime) -> int:
281
281
  return self.toordinal()
282
282
 
283
283
 
284
- @func.udf(is_method=True)
284
+ @pxt.udf(is_method=True)
285
285
  def posix_timestamp(self: datetime) -> float:
286
286
  """
287
287
  Return POSIX timestamp corresponding to the datetime instance.
@@ -7,13 +7,15 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
7
7
 
8
8
  import base64
9
9
  import io
10
- from typing import TYPE_CHECKING, Callable, Optional
10
+ from typing import TYPE_CHECKING, Callable, Optional, TypeVar
11
11
 
12
12
  import numpy as np
13
13
  import PIL.Image
14
+ import requests
14
15
  import tenacity
15
16
 
16
17
  import pixeltable as pxt
18
+ import pixeltable.exceptions as excs
17
19
  from pixeltable import env
18
20
  from pixeltable.func import Batch
19
21
  from pixeltable.utils.code import local_public_names
@@ -32,7 +34,10 @@ def _together_client() -> 'together.Together':
32
34
  return env.Env.get().get_client('together')
33
35
 
34
36
 
35
- def _retry(fn: Callable) -> Callable:
37
+ T = TypeVar('T')
38
+
39
+
40
+ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
36
41
  import together
37
42
  return tenacity.retry(
38
43
  retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
@@ -180,8 +185,8 @@ _embedding_dimensions_cache = {
180
185
  }
181
186
 
182
187
 
183
- @pxt.udf(batch_size=32, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
184
- def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
188
+ @pxt.udf(batch_size=32)
189
+ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
185
190
  """
186
191
  Query an embedding model for a given string of text.
187
192
 
@@ -249,20 +254,29 @@ def image_generations(
249
254
  The generated image.
250
255
 
251
256
  Examples:
252
- Add a computed column that applies the model `runwayml/stable-diffusion-v1-5`
257
+ Add a computed column that applies the model `stabilityai/stable-diffusion-xl-base-1.0`
253
258
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
254
259
 
255
- >>> tbl['response'] = image_generations(tbl.prompt, model='runwayml/stable-diffusion-v1-5')
260
+ >>> tbl['response'] = image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
256
261
  """
257
- # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
258
262
  result = _retry(_together_client().images.generate)(
259
263
  prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
260
264
  )
261
- b64_str = result.data[0].b64_json
262
- b64_bytes = base64.b64decode(b64_str)
263
- img = PIL.Image.open(io.BytesIO(b64_bytes))
264
- img.load()
265
- return img
265
+ if result.data[0].b64_json is not None:
266
+ b64_bytes = base64.b64decode(result.data[0].b64_json)
267
+ img = PIL.Image.open(io.BytesIO(b64_bytes))
268
+ img.load()
269
+ return img
270
+ if result.data[0].url is not None:
271
+ try:
272
+ resp = requests.get(result.data[0].url)
273
+ with io.BytesIO(resp.content) as fp:
274
+ image = PIL.Image.open(fp)
275
+ image.load()
276
+ return image
277
+ except Exception as exc:
278
+ raise excs.Error('Failed to download generated image from together.ai.') from exc
279
+ raise excs.Error('Response does not contain a generated image.')
266
280
 
267
281
 
268
282
  __all__ = local_public_names(__name__)
@@ -20,9 +20,8 @@ import av # type: ignore[import-untyped]
20
20
  import numpy as np
21
21
  import PIL.Image
22
22
 
23
+ import pixeltable as pxt
23
24
  import pixeltable.env as env
24
- import pixeltable.func as func
25
- import pixeltable.type_system as ts
26
25
  from pixeltable.utils.code import local_public_names
27
26
 
28
27
  _format_defaults = { # format -> (codec, ext)
@@ -48,14 +47,14 @@ _format_defaults = { # format -> (codec, ext)
48
47
  # output_container.mux(packet)
49
48
 
50
49
 
51
- @func.uda(
52
- init_types=[ts.IntType()],
53
- update_types=[ts.ImageType()],
54
- value_type=ts.VideoType(),
50
+ @pxt.uda(
51
+ init_types=[pxt.IntType()],
52
+ update_types=[pxt.ImageType()],
53
+ value_type=pxt.VideoType(),
55
54
  requires_order_by=True,
56
55
  allows_window=False,
57
56
  )
58
- class make_video(func.Aggregator):
57
+ class make_video(pxt.Aggregator):
59
58
  """
60
59
  Aggregator that creates a video from a sequence of images.
61
60
  """
@@ -88,18 +87,10 @@ class make_video(func.Aggregator):
88
87
  return str(self.out_file)
89
88
 
90
89
 
91
- _extract_audio_param_types = [
92
- ts.VideoType(nullable=False),
93
- ts.IntType(nullable=False),
94
- ts.StringType(nullable=False),
95
- ts.StringType(nullable=True),
96
- ]
97
-
98
-
99
- @func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
90
+ @pxt.udf(is_method=True)
100
91
  def extract_audio(
101
- video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
102
- ) -> Optional[str]:
92
+ video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
93
+ ) -> pxt.Audio:
103
94
  """
104
95
  Extract an audio stream from a video file, save it as a media file and return its path.
105
96
 
@@ -128,8 +119,8 @@ def extract_audio(
128
119
  return output_filename
129
120
 
130
121
 
131
- @func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)], is_method=True)
132
- def get_metadata(video: str) -> dict:
122
+ @pxt.udf(is_method=True)
123
+ def get_metadata(video: pxt.Video) -> dict:
133
124
  """
134
125
  Gets various metadata associated with a video file and returns it as a dictionary.
135
126
  """
@@ -14,27 +14,9 @@ from pixeltable.env import Env
14
14
  if TYPE_CHECKING:
15
15
  from whisper import Whisper # type: ignore[import-untyped]
16
16
 
17
-
18
- @pxt.udf(
19
- param_types=[
20
- pxt.AudioType(),
21
- pxt.StringType(),
22
- pxt.JsonType(nullable=True),
23
- pxt.FloatType(nullable=True),
24
- pxt.FloatType(nullable=True),
25
- pxt.FloatType(nullable=True),
26
- pxt.BoolType(),
27
- pxt.StringType(nullable=True),
28
- pxt.BoolType(),
29
- pxt.StringType(),
30
- pxt.StringType(),
31
- pxt.StringType(),
32
- pxt.FloatType(nullable=True),
33
- pxt.JsonType(nullable=True),
34
- ]
35
- )
17
+ @pxt.udf
36
18
  def transcribe(
37
- audio: str,
19
+ audio: pxt.Audio,
38
20
  *,
39
21
  model: str,
40
22
  temperature: Optional[list[float]] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
pixeltable/globals.py CHANGED
@@ -16,6 +16,7 @@ from pixeltable.dataframe import DataFrameResultSet
16
16
  from pixeltable.env import Env
17
17
  from pixeltable.iterators import ComponentIterator
18
18
  from pixeltable.metadata import schema
19
+ from pixeltable.utils.filecache import FileCache
19
20
 
20
21
  _logger = logging.getLogger('pixeltable')
21
22
 
@@ -53,11 +54,13 @@ def create_table(
53
54
  Examples:
54
55
  Create a table with an int and a string column:
55
56
 
56
- >>> table = pxt.create_table('my_table', schema={'col1': IntType(), 'col2': StringType()})
57
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String})
57
58
 
58
- Create a table from a select statement over an existing table `tbl`:
59
+ Create a table from a select statement over an existing table `orig_table` (this will create a new table
60
+ containing the exact contents of the query):
59
61
 
60
- >>> table = pxt.create_table('my_table', tbl.where(tbl.col1 < 10).select(tbl.col2))
62
+ >>> tbl1 = pxt.get_table('orig_table')
63
+ ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
61
64
  """
62
65
  path = catalog.Path(path_str)
63
66
  Catalog.get().paths.check_is_valid(path, expected=None)
@@ -104,8 +107,7 @@ def create_view(
104
107
  path_str: str,
105
108
  base: Union[catalog.Table, DataFrame],
106
109
  *,
107
- schema: Optional[dict[str, Any]] = None,
108
- filter: Optional[exprs.Expr] = None,
110
+ additional_columns: Optional[dict[str, Any]] = None,
109
111
  is_snapshot: bool = False,
110
112
  iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
111
113
  num_retained_versions: int = 10,
@@ -115,11 +117,13 @@ def create_view(
115
117
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
116
118
 
117
119
  Args:
118
- path_str: Path to the view.
120
+ path_str: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
121
+ `dir1.my_view`.
119
122
  base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
120
123
  base the view on.
121
- schema: dictionary mapping column names to column types, value expressions, or to column specifications.
122
- filter: predicate to filter rows of the base table.
124
+ additional_columns: If specified, will add these columns to the view once it is created. The format
125
+ of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
126
+ [`create_table`][pixeltable.create_table].
123
127
  is_snapshot: Whether the view is a snapshot.
124
128
  iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
125
129
  the base table.
@@ -129,36 +133,29 @@ def create_view(
129
133
 
130
134
  Returns:
131
135
  A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
132
- exists or is invalid and `ignore_errors=True`, returns `None`.
136
+ exists or is invalid and `ignore_errors=True`, returns `None`.
133
137
 
134
138
  Raises:
135
139
  Error: if the path already exists or is invalid and `ignore_errors=False`.
136
140
 
137
141
  Examples:
138
- Create a view with an additional int and a string column and a filter:
142
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
139
143
 
140
- >>> view = cl.create_view(
141
- 'my_view', base, schema={'col3': IntType(), 'col4': StringType()}, filter=base.col1 > 10)
144
+ >>> tbl = pxt.get_table('my_table')
145
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
142
146
 
143
- Create a table snapshot:
147
+ Create a snapshot of `my_table`:
144
148
 
145
- >>> snapshot_view = cl.create_view('my_snapshot_view', base, is_snapshot=True)
146
-
147
- Create an immutable view with additional computed columns and a filter:
148
-
149
- >>> snapshot_view = cl.create_view(
150
- 'my_snapshot', base, schema={'col3': base.col2 + 1}, filter=base.col1 > 10, is_snapshot=True)
149
+ >>> tbl = pxt.get_table('my_table')
150
+ ... snapshot_view = pxt.create_view('my_snapshot_view', tbl, is_snapshot=True)
151
151
  """
152
+ where: Optional[exprs.Expr] = None
152
153
  if isinstance(base, catalog.Table):
153
154
  tbl_version_path = base._tbl_version_path
154
155
  elif isinstance(base, DataFrame):
155
156
  base._validate_mutable('create_view')
156
157
  tbl_version_path = base.tbl
157
- if base.where_clause is not None and filter is not None:
158
- raise excs.Error(
159
- 'Cannot specify a `filter` directly if one is already declared in a `DataFrame.where` clause'
160
- )
161
- filter = base.where_clause
158
+ where = base.where_clause
162
159
  else:
163
160
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
164
161
  assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
@@ -172,8 +169,8 @@ def create_view(
172
169
  raise e
173
170
  dir = Catalog.get().paths[path.parent]
174
171
 
175
- if schema is None:
176
- schema = {}
172
+ if additional_columns is None:
173
+ additional_columns = {}
177
174
  if iterator is None:
178
175
  iterator_class, iterator_args = None, None
179
176
  else:
@@ -183,8 +180,8 @@ def create_view(
183
180
  dir._id,
184
181
  path.name,
185
182
  base=tbl_version_path,
186
- schema=schema,
187
- predicate=filter,
183
+ additional_columns=additional_columns,
184
+ predicate=where,
188
185
  is_snapshot=is_snapshot,
189
186
  iterator_cls=iterator_class,
190
187
  iterator_args=iterator_args,
@@ -193,11 +190,12 @@ def create_view(
193
190
  )
194
191
  Catalog.get().paths[path] = view
195
192
  _logger.info(f'Created view `{path_str}`.')
193
+ FileCache.get().emit_eviction_warnings()
196
194
  return view
197
195
 
198
196
 
199
197
  def get_table(path: str) -> catalog.Table:
200
- """Get a handle to an existing table or view or snapshot.
198
+ """Get a handle to an existing table, view, or snapshot.
201
199
 
202
200
  Args:
203
201
  path: Path to the table.
@@ -211,15 +209,15 @@ def get_table(path: str) -> catalog.Table:
211
209
  Examples:
212
210
  Get handle for a table in the top-level directory:
213
211
 
214
- >>> table = cl.get_table('my_table')
212
+ >>> tbl = pxt.get_table('my_table')
215
213
 
216
214
  For a table in a subdirectory:
217
215
 
218
- >>> table = cl.get_table('subdir.my_table')
216
+ >>> tbl = pxt.get_table('subdir.my_table')
219
217
 
220
- For a snapshot in the top-level directory:
218
+ Handles to views and snapshots are retrieved in the same way:
221
219
 
222
- >>> table = cl.get_table('my_snapshot')
220
+ >>> tbl = pxt.get_table('my_snapshot')
223
221
  """
224
222
  p = catalog.Path(path)
225
223
  Catalog.get().paths.check_is_valid(p, expected=catalog.Table)
@@ -241,11 +239,11 @@ def move(path: str, new_path: str) -> None:
241
239
  Examples:
242
240
  Move a table to a different directory:
243
241
 
244
- >>>> cl.move('dir1.my_table', 'dir2.my_table')
242
+ >>>> pxt.move('dir1.my_table', 'dir2.my_table')
245
243
 
246
244
  Rename a table:
247
245
 
248
- >>>> cl.move('dir1.my_table', 'dir1.new_name')
246
+ >>>> pxt.move('dir1.my_table', 'dir1.new_name')
249
247
  """
250
248
  p = catalog.Path(path)
251
249
  Catalog.get().paths.check_is_valid(p, expected=catalog.SchemaObject)
@@ -258,18 +256,18 @@ def move(path: str, new_path: str) -> None:
258
256
 
259
257
 
260
258
  def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
261
- """Drop a table or view or snapshot.
259
+ """Drop a table, view, or snapshot.
262
260
 
263
261
  Args:
264
262
  path: Path to the [`Table`][pixeltable.Table].
265
- force: If `True`, will also drop all views or sub-views of this table.
266
- ignore_errors: Whether to ignore errors if the table does not exist.
263
+ force: If `True`, will also drop all views and sub-views of this table.
264
+ ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
267
265
 
268
266
  Raises:
269
- Error: If the path does not exist or does not designate a table object and ignore_errors is False.
267
+ Error: If the path does not exist or does not designate a table object, and `ignore_errors=False`.
270
268
 
271
269
  Examples:
272
- >>> cl.drop_table('my_table')
270
+ >>> pxt.drop_table('my_table')
273
271
  """
274
272
  cat = Catalog.get()
275
273
  path_obj = catalog.Path(path)
@@ -300,7 +298,8 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
300
298
 
301
299
  Args:
302
300
  dir_path: Path to the directory. Defaults to the root directory.
303
- recursive: Whether to list tables in subdirectories as well.
301
+ recursive: If `False`, returns only those tables that are directly contained in specified directory; if
302
+ `True`, returns all tables that are descendants of the specified directory, recursively.
304
303
 
305
304
  Returns:
306
305
  A list of [`Table`][pixeltable.Table] paths.
@@ -311,13 +310,11 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
311
310
  Examples:
312
311
  List tables in top-level directory:
313
312
 
314
- >>> cl.list_tables()
315
- ['my_table', ...]
313
+ >>> pxt.list_tables()
316
314
 
317
315
  List tables in 'dir1':
318
316
 
319
- >>> cl.list_tables('dir1')
320
- [...]
317
+ >>> pxt.list_tables('dir1')
321
318
  """
322
319
  assert dir_path is not None
323
320
  path = catalog.Path(dir_path, empty_is_valid=True)
@@ -330,17 +327,17 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
330
327
 
331
328
  Args:
332
329
  path_str: Path to the directory.
333
- ignore_errors: if True, silently returns on error
330
+ ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
334
331
 
335
332
  Raises:
336
- Error: If the path already exists or the parent is not a directory.
333
+ Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
337
334
 
338
335
  Examples:
339
- >>> cl.create_dir('my_dir')
336
+ >>> pxt.create_dir('my_dir')
340
337
 
341
338
  Create a subdirectory:
342
339
 
343
- >>> cl.create_dir('my_dir.sub_dir')
340
+ >>> pxt.create_dir('my_dir.sub_dir')
344
341
  """
345
342
  try:
346
343
  path = catalog.Path(path_str)
@@ -371,17 +368,21 @@ def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) ->
371
368
  """Remove a directory.
372
369
 
373
370
  Args:
374
- path_str: Path to the directory.
371
+ path_str: Name or path of the directory.
372
+ force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
373
+ with any views or snapshots that depend on any of the dropped tables.
374
+ ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
375
+ does not exist.
375
376
 
376
377
  Raises:
377
- Error: If the path does not exist or does not designate a directory or if the directory is not empty.
378
+ Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
378
379
 
379
380
  Examples:
380
- >>> cl.drop_dir('my_dir')
381
+ >>> pxt.drop_dir('my_dir')
381
382
 
382
383
  Remove a subdirectory:
383
384
 
384
- >>> cl.drop_dir('my_dir.sub_dir')
385
+ >>> pxt.drop_dir('my_dir.sub_dir')
385
386
  """
386
387
  cat = Catalog.get()
387
388
  path = catalog.Path(path_str)
@@ -426,14 +427,14 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
426
427
  """List the directories in a directory.
427
428
 
428
429
  Args:
429
- path_str: Path to the directory.
430
- recursive: Whether to list subdirectories recursively.
430
+ path_str: Name or path of the directory.
431
+ recursive: If `True`, lists all descendants of this directory recursively.
431
432
 
432
433
  Returns:
433
434
  List of directory paths.
434
435
 
435
436
  Raises:
436
- Error: If the path does not exist or does not designate a directory.
437
+ Error: If `path_str` does not exist or does not designate a directory.
437
438
 
438
439
  Examples:
439
440
  >>> cl.list_dirs('my_dir', recursive=True)
pixeltable/index/base.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable.catalog as catalog
8
+ from pixeltable import catalog, exprs
9
9
 
10
10
 
11
11
  class IndexBase(abc.ABC):
@@ -22,7 +22,7 @@ class IndexBase(abc.ABC):
22
22
  pass
23
23
 
24
24
  @abc.abstractmethod
25
- def index_value_expr(self) -> 'pixeltable.exprs.Expr':
25
+ def index_value_expr(self) -> exprs.Expr:
26
26
  """Return expression that computes the value that goes into the index"""
27
27
  pass
28
28
 
pixeltable/index/btree.py CHANGED
@@ -2,10 +2,10 @@ from typing import Optional
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
- # TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
6
- #import pixeltable.catalog as catalog
7
5
  import pixeltable.exceptions as excs
8
- import pixeltable.func as func
6
+ from pixeltable import catalog, exprs
7
+ from pixeltable.func.udf import udf
8
+
9
9
  from .base import IndexBase
10
10
 
11
11
 
@@ -15,7 +15,8 @@ class BtreeIndex(IndexBase):
15
15
  """
16
16
  MAX_STRING_LEN = 256
17
17
 
18
- @func.udf
18
+ @staticmethod
19
+ @udf
19
20
  def str_filter(s: Optional[str]) -> Optional[str]:
20
21
  if s is None:
21
22
  return None
@@ -24,10 +25,9 @@ class BtreeIndex(IndexBase):
24
25
  def __init__(self, c: 'catalog.Column'):
25
26
  if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
26
27
  raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
27
- from pixeltable.exprs import ColumnRef
28
- self.value_expr = self.str_filter(ColumnRef(c)) if c.col_type.is_string_type() else ColumnRef(c)
28
+ self.value_expr = BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
29
29
 
30
- def index_value_expr(self) -> 'pixeltable.exprs.Expr':
30
+ def index_value_expr(self) -> 'exprs.Expr':
31
31
  return self.value_expr
32
32
 
33
33
  def records_value_errors(self) -> bool: