pixeltable 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/globals.py +3 -0
- pixeltable/catalog/insertable_table.py +9 -7
- pixeltable/catalog/table.py +220 -143
- pixeltable/catalog/table_version.py +36 -18
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +9 -24
- pixeltable/env.py +107 -36
- pixeltable/exceptions.py +7 -4
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/aggregation_node.py +22 -15
- pixeltable/exec/component_iteration_node.py +62 -41
- pixeltable/exec/data_row_batch.py +7 -7
- pixeltable/exec/exec_node.py +35 -7
- pixeltable/exec/expr_eval_node.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -9
- pixeltable/exec/sql_node.py +265 -136
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/data_row.py +30 -19
- pixeltable/exprs/expr.py +15 -14
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +21 -15
- pixeltable/exprs/function_call.py +21 -8
- pixeltable/exprs/json_path.py +3 -6
- pixeltable/exprs/rowid_ref.py +2 -2
- pixeltable/exprs/sql_element_cache.py +5 -1
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +86 -42
- pixeltable/functions/huggingface.py +12 -14
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/string.py +50 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +26 -12
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +57 -56
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +7 -7
- pixeltable/index/embedding_index.py +8 -10
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/globals.py +3 -1
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +10 -1
- pixeltable/metadata/__init__.py +3 -2
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/util.py +9 -8
- pixeltable/metadata/schema.py +32 -21
- pixeltable/plan.py +136 -154
- pixeltable/store.py +51 -36
- pixeltable/tool/create_test_db_dump.py +7 -7
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/mypy_plugin.py +32 -0
- pixeltable/type_system.py +243 -60
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +1 -1
- pixeltable/utils/filecache.py +131 -84
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/METADATA +16 -7
- pixeltable-0.2.21.dist-info/RECORD +148 -0
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.19.dist-info/RECORD +0 -147
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
|
@@ -15,12 +15,12 @@ from typing import Optional
|
|
|
15
15
|
|
|
16
16
|
import sqlalchemy as sql
|
|
17
17
|
|
|
18
|
+
import pixeltable as pxt
|
|
18
19
|
from pixeltable.env import Env
|
|
19
|
-
import pixeltable.func as func
|
|
20
20
|
from pixeltable.utils.code import local_public_names
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
@
|
|
23
|
+
@pxt.udf(is_property=True)
|
|
24
24
|
def year(self: datetime) -> int:
|
|
25
25
|
"""
|
|
26
26
|
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
@@ -36,7 +36,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
36
36
|
return sql.extract('year', self)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
@
|
|
39
|
+
@pxt.udf(is_property=True)
|
|
40
40
|
def month(self: datetime) -> int:
|
|
41
41
|
"""
|
|
42
42
|
Between 1 and 12 inclusive.
|
|
@@ -51,7 +51,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
51
51
|
return sql.extract('month', self)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
@
|
|
54
|
+
@pxt.udf(is_property=True)
|
|
55
55
|
def day(self: datetime) -> int:
|
|
56
56
|
"""
|
|
57
57
|
Between 1 and the number of days in the given month of the given year.
|
|
@@ -66,7 +66,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
66
66
|
return sql.extract('day', self)
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
@
|
|
69
|
+
@pxt.udf(is_property=True)
|
|
70
70
|
def hour(self: datetime) -> int:
|
|
71
71
|
"""
|
|
72
72
|
Between 0 and 23 inclusive.
|
|
@@ -81,7 +81,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
81
81
|
return sql.extract('hour', self)
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
@
|
|
84
|
+
@pxt.udf(is_property=True)
|
|
85
85
|
def minute(self: datetime) -> int:
|
|
86
86
|
"""
|
|
87
87
|
Between 0 and 59 inclusive.
|
|
@@ -96,7 +96,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
96
96
|
return sql.extract('minute', self)
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
@
|
|
99
|
+
@pxt.udf(is_property=True)
|
|
100
100
|
def second(self: datetime) -> int:
|
|
101
101
|
"""
|
|
102
102
|
Between 0 and 59 inclusive.
|
|
@@ -111,7 +111,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
111
111
|
return sql.extract('second', self)
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
@
|
|
114
|
+
@pxt.udf(is_property=True)
|
|
115
115
|
def microsecond(self: datetime) -> int:
|
|
116
116
|
"""
|
|
117
117
|
Between 0 and 999999 inclusive.
|
|
@@ -126,7 +126,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
126
126
|
return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
|
|
127
127
|
|
|
128
128
|
|
|
129
|
-
@
|
|
129
|
+
@pxt.udf(is_method=True)
|
|
130
130
|
def astimezone(self: datetime, tz: str) -> datetime:
|
|
131
131
|
"""
|
|
132
132
|
Convert the datetime to the given time zone.
|
|
@@ -139,7 +139,7 @@ def astimezone(self: datetime, tz: str) -> datetime:
|
|
|
139
139
|
return self.astimezone(tzinfo)
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
@
|
|
142
|
+
@pxt.udf(is_method=True)
|
|
143
143
|
def weekday(self: datetime) -> int:
|
|
144
144
|
"""
|
|
145
145
|
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
@@ -154,7 +154,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
154
154
|
return sql.extract('isodow', self) - 1
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
@
|
|
157
|
+
@pxt.udf(is_method=True)
|
|
158
158
|
def isoweekday(self: datetime) -> int:
|
|
159
159
|
"""
|
|
160
160
|
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
@@ -169,7 +169,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
169
169
|
return sql.extract('isodow', self)
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
@
|
|
172
|
+
@pxt.udf(is_method=True)
|
|
173
173
|
def isocalendar(self: datetime) -> dict:
|
|
174
174
|
"""
|
|
175
175
|
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
@@ -181,7 +181,7 @@ def isocalendar(self: datetime) -> dict:
|
|
|
181
181
|
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
182
182
|
|
|
183
183
|
|
|
184
|
-
@
|
|
184
|
+
@pxt.udf(is_method=True)
|
|
185
185
|
def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
186
186
|
"""
|
|
187
187
|
Return a string representing the date and time in ISO 8601 format.
|
|
@@ -195,7 +195,7 @@ def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
|
195
195
|
return self.isoformat(sep=sep, timespec=timespec)
|
|
196
196
|
|
|
197
197
|
|
|
198
|
-
@
|
|
198
|
+
@pxt.udf(is_method=True)
|
|
199
199
|
def strftime(self: datetime, format: str) -> str:
|
|
200
200
|
"""
|
|
201
201
|
Return a string representing the date and time, controlled by an explicit format string.
|
|
@@ -208,7 +208,7 @@ def strftime(self: datetime, format: str) -> str:
|
|
|
208
208
|
return self.strftime(format)
|
|
209
209
|
|
|
210
210
|
|
|
211
|
-
@
|
|
211
|
+
@pxt.udf(is_method=True)
|
|
212
212
|
def make_timestamp(
|
|
213
213
|
year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
|
|
214
214
|
) -> datetime:
|
|
@@ -234,7 +234,7 @@ def _(
|
|
|
234
234
|
sql.cast(minute, sql.Integer),
|
|
235
235
|
sql.cast(second + microsecond / 1000000.0, sql.Double))
|
|
236
236
|
|
|
237
|
-
# @
|
|
237
|
+
# @pxt.udf
|
|
238
238
|
# def date(self: datetime) -> datetime:
|
|
239
239
|
# """
|
|
240
240
|
# Return the date part of the datetime.
|
|
@@ -245,7 +245,7 @@ def _(
|
|
|
245
245
|
# return datetime(d.year, d.month, d.day)
|
|
246
246
|
#
|
|
247
247
|
#
|
|
248
|
-
# @
|
|
248
|
+
# @pxt.udf
|
|
249
249
|
# def time(self: datetime) -> datetime:
|
|
250
250
|
# """
|
|
251
251
|
# Return the time part of the datetime, with microseconds set to 0.
|
|
@@ -256,7 +256,7 @@ def _(
|
|
|
256
256
|
# return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
|
|
257
257
|
|
|
258
258
|
|
|
259
|
-
@
|
|
259
|
+
@pxt.udf(is_method=True)
|
|
260
260
|
def replace(
|
|
261
261
|
self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
|
|
262
262
|
hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
|
|
@@ -271,7 +271,7 @@ def replace(
|
|
|
271
271
|
return self.replace(**kwargs)
|
|
272
272
|
|
|
273
273
|
|
|
274
|
-
@
|
|
274
|
+
@pxt.udf(is_method=True)
|
|
275
275
|
def toordinal(self: datetime) -> int:
|
|
276
276
|
"""
|
|
277
277
|
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
@@ -281,7 +281,7 @@ def toordinal(self: datetime) -> int:
|
|
|
281
281
|
return self.toordinal()
|
|
282
282
|
|
|
283
283
|
|
|
284
|
-
@
|
|
284
|
+
@pxt.udf(is_method=True)
|
|
285
285
|
def posix_timestamp(self: datetime) -> float:
|
|
286
286
|
"""
|
|
287
287
|
Return POSIX timestamp corresponding to the datetime instance.
|
pixeltable/functions/together.py
CHANGED
|
@@ -7,13 +7,15 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
|
|
|
7
7
|
|
|
8
8
|
import base64
|
|
9
9
|
import io
|
|
10
|
-
from typing import TYPE_CHECKING, Callable, Optional
|
|
10
|
+
from typing import TYPE_CHECKING, Callable, Optional, TypeVar
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import PIL.Image
|
|
14
|
+
import requests
|
|
14
15
|
import tenacity
|
|
15
16
|
|
|
16
17
|
import pixeltable as pxt
|
|
18
|
+
import pixeltable.exceptions as excs
|
|
17
19
|
from pixeltable import env
|
|
18
20
|
from pixeltable.func import Batch
|
|
19
21
|
from pixeltable.utils.code import local_public_names
|
|
@@ -32,7 +34,10 @@ def _together_client() -> 'together.Together':
|
|
|
32
34
|
return env.Env.get().get_client('together')
|
|
33
35
|
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
T = TypeVar('T')
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
36
41
|
import together
|
|
37
42
|
return tenacity.retry(
|
|
38
43
|
retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
|
|
@@ -180,8 +185,8 @@ _embedding_dimensions_cache = {
|
|
|
180
185
|
}
|
|
181
186
|
|
|
182
187
|
|
|
183
|
-
@pxt.udf(batch_size=32
|
|
184
|
-
def embeddings(input: Batch[str], *, model: str) -> Batch[
|
|
188
|
+
@pxt.udf(batch_size=32)
|
|
189
|
+
def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
|
|
185
190
|
"""
|
|
186
191
|
Query an embedding model for a given string of text.
|
|
187
192
|
|
|
@@ -249,20 +254,29 @@ def image_generations(
|
|
|
249
254
|
The generated image.
|
|
250
255
|
|
|
251
256
|
Examples:
|
|
252
|
-
Add a computed column that applies the model `
|
|
257
|
+
Add a computed column that applies the model `stabilityai/stable-diffusion-xl-base-1.0`
|
|
253
258
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
254
259
|
|
|
255
|
-
>>> tbl['response'] = image_generations(tbl.prompt, model='
|
|
260
|
+
>>> tbl['response'] = image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
|
|
256
261
|
"""
|
|
257
|
-
# TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
|
|
258
262
|
result = _retry(_together_client().images.generate)(
|
|
259
263
|
prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
|
|
260
264
|
)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
265
|
+
if result.data[0].b64_json is not None:
|
|
266
|
+
b64_bytes = base64.b64decode(result.data[0].b64_json)
|
|
267
|
+
img = PIL.Image.open(io.BytesIO(b64_bytes))
|
|
268
|
+
img.load()
|
|
269
|
+
return img
|
|
270
|
+
if result.data[0].url is not None:
|
|
271
|
+
try:
|
|
272
|
+
resp = requests.get(result.data[0].url)
|
|
273
|
+
with io.BytesIO(resp.content) as fp:
|
|
274
|
+
image = PIL.Image.open(fp)
|
|
275
|
+
image.load()
|
|
276
|
+
return image
|
|
277
|
+
except Exception as exc:
|
|
278
|
+
raise excs.Error('Failed to download generated image from together.ai.') from exc
|
|
279
|
+
raise excs.Error('Response does not contain a generated image.')
|
|
266
280
|
|
|
267
281
|
|
|
268
282
|
__all__ = local_public_names(__name__)
|
pixeltable/functions/video.py
CHANGED
|
@@ -20,9 +20,8 @@ import av # type: ignore[import-untyped]
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
import PIL.Image
|
|
22
22
|
|
|
23
|
+
import pixeltable as pxt
|
|
23
24
|
import pixeltable.env as env
|
|
24
|
-
import pixeltable.func as func
|
|
25
|
-
import pixeltable.type_system as ts
|
|
26
25
|
from pixeltable.utils.code import local_public_names
|
|
27
26
|
|
|
28
27
|
_format_defaults = { # format -> (codec, ext)
|
|
@@ -48,14 +47,14 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
48
47
|
# output_container.mux(packet)
|
|
49
48
|
|
|
50
49
|
|
|
51
|
-
@
|
|
52
|
-
init_types=[
|
|
53
|
-
update_types=[
|
|
54
|
-
value_type=
|
|
50
|
+
@pxt.uda(
|
|
51
|
+
init_types=[pxt.IntType()],
|
|
52
|
+
update_types=[pxt.ImageType()],
|
|
53
|
+
value_type=pxt.VideoType(),
|
|
55
54
|
requires_order_by=True,
|
|
56
55
|
allows_window=False,
|
|
57
56
|
)
|
|
58
|
-
class make_video(
|
|
57
|
+
class make_video(pxt.Aggregator):
|
|
59
58
|
"""
|
|
60
59
|
Aggregator that creates a video from a sequence of images.
|
|
61
60
|
"""
|
|
@@ -88,18 +87,10 @@ class make_video(func.Aggregator):
|
|
|
88
87
|
return str(self.out_file)
|
|
89
88
|
|
|
90
89
|
|
|
91
|
-
|
|
92
|
-
ts.VideoType(nullable=False),
|
|
93
|
-
ts.IntType(nullable=False),
|
|
94
|
-
ts.StringType(nullable=False),
|
|
95
|
-
ts.StringType(nullable=True),
|
|
96
|
-
]
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
|
|
90
|
+
@pxt.udf(is_method=True)
|
|
100
91
|
def extract_audio(
|
|
101
|
-
video_path:
|
|
102
|
-
) ->
|
|
92
|
+
video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
93
|
+
) -> pxt.Audio:
|
|
103
94
|
"""
|
|
104
95
|
Extract an audio stream from a video file, save it as a media file and return its path.
|
|
105
96
|
|
|
@@ -128,8 +119,8 @@ def extract_audio(
|
|
|
128
119
|
return output_filename
|
|
129
120
|
|
|
130
121
|
|
|
131
|
-
@
|
|
132
|
-
def get_metadata(video:
|
|
122
|
+
@pxt.udf(is_method=True)
|
|
123
|
+
def get_metadata(video: pxt.Video) -> dict:
|
|
133
124
|
"""
|
|
134
125
|
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
135
126
|
"""
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -14,27 +14,9 @@ from pixeltable.env import Env
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from whisper import Whisper # type: ignore[import-untyped]
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
@pxt.udf(
|
|
19
|
-
param_types=[
|
|
20
|
-
pxt.AudioType(),
|
|
21
|
-
pxt.StringType(),
|
|
22
|
-
pxt.JsonType(nullable=True),
|
|
23
|
-
pxt.FloatType(nullable=True),
|
|
24
|
-
pxt.FloatType(nullable=True),
|
|
25
|
-
pxt.FloatType(nullable=True),
|
|
26
|
-
pxt.BoolType(),
|
|
27
|
-
pxt.StringType(nullable=True),
|
|
28
|
-
pxt.BoolType(),
|
|
29
|
-
pxt.StringType(),
|
|
30
|
-
pxt.StringType(),
|
|
31
|
-
pxt.StringType(),
|
|
32
|
-
pxt.FloatType(nullable=True),
|
|
33
|
-
pxt.JsonType(nullable=True),
|
|
34
|
-
]
|
|
35
|
-
)
|
|
17
|
+
@pxt.udf
|
|
36
18
|
def transcribe(
|
|
37
|
-
audio:
|
|
19
|
+
audio: pxt.Audio,
|
|
38
20
|
*,
|
|
39
21
|
model: str,
|
|
40
22
|
temperature: Optional[list[float]] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
|
pixeltable/globals.py
CHANGED
|
@@ -16,6 +16,7 @@ from pixeltable.dataframe import DataFrameResultSet
|
|
|
16
16
|
from pixeltable.env import Env
|
|
17
17
|
from pixeltable.iterators import ComponentIterator
|
|
18
18
|
from pixeltable.metadata import schema
|
|
19
|
+
from pixeltable.utils.filecache import FileCache
|
|
19
20
|
|
|
20
21
|
_logger = logging.getLogger('pixeltable')
|
|
21
22
|
|
|
@@ -53,11 +54,13 @@ def create_table(
|
|
|
53
54
|
Examples:
|
|
54
55
|
Create a table with an int and a string column:
|
|
55
56
|
|
|
56
|
-
>>>
|
|
57
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String})
|
|
57
58
|
|
|
58
|
-
Create a table from a select statement over an existing table `
|
|
59
|
+
Create a table from a select statement over an existing table `orig_table` (this will create a new table
|
|
60
|
+
containing the exact contents of the query):
|
|
59
61
|
|
|
60
|
-
>>>
|
|
62
|
+
>>> tbl1 = pxt.get_table('orig_table')
|
|
63
|
+
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
61
64
|
"""
|
|
62
65
|
path = catalog.Path(path_str)
|
|
63
66
|
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
@@ -104,8 +107,7 @@ def create_view(
|
|
|
104
107
|
path_str: str,
|
|
105
108
|
base: Union[catalog.Table, DataFrame],
|
|
106
109
|
*,
|
|
107
|
-
|
|
108
|
-
filter: Optional[exprs.Expr] = None,
|
|
110
|
+
additional_columns: Optional[dict[str, Any]] = None,
|
|
109
111
|
is_snapshot: bool = False,
|
|
110
112
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
|
|
111
113
|
num_retained_versions: int = 10,
|
|
@@ -115,11 +117,13 @@ def create_view(
|
|
|
115
117
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
116
118
|
|
|
117
119
|
Args:
|
|
118
|
-
path_str:
|
|
120
|
+
path_str: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
|
|
121
|
+
`dir1.my_view`.
|
|
119
122
|
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
|
|
120
123
|
base the view on.
|
|
121
|
-
|
|
122
|
-
|
|
124
|
+
additional_columns: If specified, will add these columns to the view once it is created. The format
|
|
125
|
+
of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
|
|
126
|
+
[`create_table`][pixeltable.create_table].
|
|
123
127
|
is_snapshot: Whether the view is a snapshot.
|
|
124
128
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
125
129
|
the base table.
|
|
@@ -129,36 +133,29 @@ def create_view(
|
|
|
129
133
|
|
|
130
134
|
Returns:
|
|
131
135
|
A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
|
|
132
|
-
|
|
136
|
+
exists or is invalid and `ignore_errors=True`, returns `None`.
|
|
133
137
|
|
|
134
138
|
Raises:
|
|
135
139
|
Error: if the path already exists or is invalid and `ignore_errors=False`.
|
|
136
140
|
|
|
137
141
|
Examples:
|
|
138
|
-
Create a view
|
|
142
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
|
|
139
143
|
|
|
140
|
-
>>>
|
|
141
|
-
|
|
144
|
+
>>> tbl = pxt.get_table('my_table')
|
|
145
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
|
|
142
146
|
|
|
143
|
-
Create a
|
|
147
|
+
Create a snapshot of `my_table`:
|
|
144
148
|
|
|
145
|
-
>>>
|
|
146
|
-
|
|
147
|
-
Create an immutable view with additional computed columns and a filter:
|
|
148
|
-
|
|
149
|
-
>>> snapshot_view = cl.create_view(
|
|
150
|
-
'my_snapshot', base, schema={'col3': base.col2 + 1}, filter=base.col1 > 10, is_snapshot=True)
|
|
149
|
+
>>> tbl = pxt.get_table('my_table')
|
|
150
|
+
... snapshot_view = pxt.create_view('my_snapshot_view', tbl, is_snapshot=True)
|
|
151
151
|
"""
|
|
152
|
+
where: Optional[exprs.Expr] = None
|
|
152
153
|
if isinstance(base, catalog.Table):
|
|
153
154
|
tbl_version_path = base._tbl_version_path
|
|
154
155
|
elif isinstance(base, DataFrame):
|
|
155
156
|
base._validate_mutable('create_view')
|
|
156
157
|
tbl_version_path = base.tbl
|
|
157
|
-
|
|
158
|
-
raise excs.Error(
|
|
159
|
-
'Cannot specify a `filter` directly if one is already declared in a `DataFrame.where` clause'
|
|
160
|
-
)
|
|
161
|
-
filter = base.where_clause
|
|
158
|
+
where = base.where_clause
|
|
162
159
|
else:
|
|
163
160
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
164
161
|
assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
|
|
@@ -172,8 +169,8 @@ def create_view(
|
|
|
172
169
|
raise e
|
|
173
170
|
dir = Catalog.get().paths[path.parent]
|
|
174
171
|
|
|
175
|
-
if
|
|
176
|
-
|
|
172
|
+
if additional_columns is None:
|
|
173
|
+
additional_columns = {}
|
|
177
174
|
if iterator is None:
|
|
178
175
|
iterator_class, iterator_args = None, None
|
|
179
176
|
else:
|
|
@@ -183,8 +180,8 @@ def create_view(
|
|
|
183
180
|
dir._id,
|
|
184
181
|
path.name,
|
|
185
182
|
base=tbl_version_path,
|
|
186
|
-
|
|
187
|
-
predicate=
|
|
183
|
+
additional_columns=additional_columns,
|
|
184
|
+
predicate=where,
|
|
188
185
|
is_snapshot=is_snapshot,
|
|
189
186
|
iterator_cls=iterator_class,
|
|
190
187
|
iterator_args=iterator_args,
|
|
@@ -193,11 +190,12 @@ def create_view(
|
|
|
193
190
|
)
|
|
194
191
|
Catalog.get().paths[path] = view
|
|
195
192
|
_logger.info(f'Created view `{path_str}`.')
|
|
193
|
+
FileCache.get().emit_eviction_warnings()
|
|
196
194
|
return view
|
|
197
195
|
|
|
198
196
|
|
|
199
197
|
def get_table(path: str) -> catalog.Table:
|
|
200
|
-
"""Get a handle to an existing table
|
|
198
|
+
"""Get a handle to an existing table, view, or snapshot.
|
|
201
199
|
|
|
202
200
|
Args:
|
|
203
201
|
path: Path to the table.
|
|
@@ -211,15 +209,15 @@ def get_table(path: str) -> catalog.Table:
|
|
|
211
209
|
Examples:
|
|
212
210
|
Get handle for a table in the top-level directory:
|
|
213
211
|
|
|
214
|
-
>>>
|
|
212
|
+
>>> tbl = pxt.get_table('my_table')
|
|
215
213
|
|
|
216
214
|
For a table in a subdirectory:
|
|
217
215
|
|
|
218
|
-
>>>
|
|
216
|
+
>>> tbl = pxt.get_table('subdir.my_table')
|
|
219
217
|
|
|
220
|
-
|
|
218
|
+
Handles to views and snapshots are retrieved in the same way:
|
|
221
219
|
|
|
222
|
-
>>>
|
|
220
|
+
>>> tbl = pxt.get_table('my_snapshot')
|
|
223
221
|
"""
|
|
224
222
|
p = catalog.Path(path)
|
|
225
223
|
Catalog.get().paths.check_is_valid(p, expected=catalog.Table)
|
|
@@ -241,11 +239,11 @@ def move(path: str, new_path: str) -> None:
|
|
|
241
239
|
Examples:
|
|
242
240
|
Move a table to a different directory:
|
|
243
241
|
|
|
244
|
-
>>>>
|
|
242
|
+
>>>> pxt.move('dir1.my_table', 'dir2.my_table')
|
|
245
243
|
|
|
246
244
|
Rename a table:
|
|
247
245
|
|
|
248
|
-
>>>>
|
|
246
|
+
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
249
247
|
"""
|
|
250
248
|
p = catalog.Path(path)
|
|
251
249
|
Catalog.get().paths.check_is_valid(p, expected=catalog.SchemaObject)
|
|
@@ -258,18 +256,18 @@ def move(path: str, new_path: str) -> None:
|
|
|
258
256
|
|
|
259
257
|
|
|
260
258
|
def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
|
|
261
|
-
"""Drop a table
|
|
259
|
+
"""Drop a table, view, or snapshot.
|
|
262
260
|
|
|
263
261
|
Args:
|
|
264
262
|
path: Path to the [`Table`][pixeltable.Table].
|
|
265
|
-
force: If `True`, will also drop all views
|
|
266
|
-
ignore_errors:
|
|
263
|
+
force: If `True`, will also drop all views and sub-views of this table.
|
|
264
|
+
ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
|
|
267
265
|
|
|
268
266
|
Raises:
|
|
269
|
-
Error: If the path does not exist or does not designate a table object and ignore_errors
|
|
267
|
+
Error: If the path does not exist or does not designate a table object, and `ignore_errors=False`.
|
|
270
268
|
|
|
271
269
|
Examples:
|
|
272
|
-
>>>
|
|
270
|
+
>>> pxt.drop_table('my_table')
|
|
273
271
|
"""
|
|
274
272
|
cat = Catalog.get()
|
|
275
273
|
path_obj = catalog.Path(path)
|
|
@@ -300,7 +298,8 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
300
298
|
|
|
301
299
|
Args:
|
|
302
300
|
dir_path: Path to the directory. Defaults to the root directory.
|
|
303
|
-
recursive:
|
|
301
|
+
recursive: If `False`, returns only those tables that are directly contained in specified directory; if
|
|
302
|
+
`True`, returns all tables that are descendants of the specified directory, recursively.
|
|
304
303
|
|
|
305
304
|
Returns:
|
|
306
305
|
A list of [`Table`][pixeltable.Table] paths.
|
|
@@ -311,13 +310,11 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
311
310
|
Examples:
|
|
312
311
|
List tables in top-level directory:
|
|
313
312
|
|
|
314
|
-
>>>
|
|
315
|
-
['my_table', ...]
|
|
313
|
+
>>> pxt.list_tables()
|
|
316
314
|
|
|
317
315
|
List tables in 'dir1':
|
|
318
316
|
|
|
319
|
-
>>>
|
|
320
|
-
[...]
|
|
317
|
+
>>> pxt.list_tables('dir1')
|
|
321
318
|
"""
|
|
322
319
|
assert dir_path is not None
|
|
323
320
|
path = catalog.Path(dir_path, empty_is_valid=True)
|
|
@@ -330,17 +327,17 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
|
|
|
330
327
|
|
|
331
328
|
Args:
|
|
332
329
|
path_str: Path to the directory.
|
|
333
|
-
ignore_errors: if True
|
|
330
|
+
ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
|
|
334
331
|
|
|
335
332
|
Raises:
|
|
336
|
-
Error: If the path already exists or the parent is not a directory
|
|
333
|
+
Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
|
|
337
334
|
|
|
338
335
|
Examples:
|
|
339
|
-
>>>
|
|
336
|
+
>>> pxt.create_dir('my_dir')
|
|
340
337
|
|
|
341
338
|
Create a subdirectory:
|
|
342
339
|
|
|
343
|
-
>>>
|
|
340
|
+
>>> pxt.create_dir('my_dir.sub_dir')
|
|
344
341
|
"""
|
|
345
342
|
try:
|
|
346
343
|
path = catalog.Path(path_str)
|
|
@@ -371,17 +368,21 @@ def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) ->
|
|
|
371
368
|
"""Remove a directory.
|
|
372
369
|
|
|
373
370
|
Args:
|
|
374
|
-
path_str:
|
|
371
|
+
path_str: Name or path of the directory.
|
|
372
|
+
force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
|
|
373
|
+
with any views or snapshots that depend on any of the dropped tables.
|
|
374
|
+
ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
|
|
375
|
+
does not exist.
|
|
375
376
|
|
|
376
377
|
Raises:
|
|
377
|
-
Error: If the path does not exist or does not designate a directory or if the directory is not empty.
|
|
378
|
+
Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
|
|
378
379
|
|
|
379
380
|
Examples:
|
|
380
|
-
>>>
|
|
381
|
+
>>> pxt.drop_dir('my_dir')
|
|
381
382
|
|
|
382
383
|
Remove a subdirectory:
|
|
383
384
|
|
|
384
|
-
>>>
|
|
385
|
+
>>> pxt.drop_dir('my_dir.sub_dir')
|
|
385
386
|
"""
|
|
386
387
|
cat = Catalog.get()
|
|
387
388
|
path = catalog.Path(path_str)
|
|
@@ -426,14 +427,14 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
426
427
|
"""List the directories in a directory.
|
|
427
428
|
|
|
428
429
|
Args:
|
|
429
|
-
path_str:
|
|
430
|
-
recursive:
|
|
430
|
+
path_str: Name or path of the directory.
|
|
431
|
+
recursive: If `True`, lists all descendants of this directory recursively.
|
|
431
432
|
|
|
432
433
|
Returns:
|
|
433
434
|
List of directory paths.
|
|
434
435
|
|
|
435
436
|
Raises:
|
|
436
|
-
Error: If
|
|
437
|
+
Error: If `path_str` does not exist or does not designate a directory.
|
|
437
438
|
|
|
438
439
|
Examples:
|
|
439
440
|
>>> cl.list_dirs('my_dir', recursive=True)
|
pixeltable/index/base.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from pixeltable import catalog, exprs
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class IndexBase(abc.ABC):
|
|
@@ -22,7 +22,7 @@ class IndexBase(abc.ABC):
|
|
|
22
22
|
pass
|
|
23
23
|
|
|
24
24
|
@abc.abstractmethod
|
|
25
|
-
def index_value_expr(self) ->
|
|
25
|
+
def index_value_expr(self) -> exprs.Expr:
|
|
26
26
|
"""Return expression that computes the value that goes into the index"""
|
|
27
27
|
pass
|
|
28
28
|
|
pixeltable/index/btree.py
CHANGED
|
@@ -2,10 +2,10 @@ from typing import Optional
|
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
|
-
# TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
|
|
6
|
-
#import pixeltable.catalog as catalog
|
|
7
5
|
import pixeltable.exceptions as excs
|
|
8
|
-
|
|
6
|
+
from pixeltable import catalog, exprs
|
|
7
|
+
from pixeltable.func.udf import udf
|
|
8
|
+
|
|
9
9
|
from .base import IndexBase
|
|
10
10
|
|
|
11
11
|
|
|
@@ -15,7 +15,8 @@ class BtreeIndex(IndexBase):
|
|
|
15
15
|
"""
|
|
16
16
|
MAX_STRING_LEN = 256
|
|
17
17
|
|
|
18
|
-
@
|
|
18
|
+
@staticmethod
|
|
19
|
+
@udf
|
|
19
20
|
def str_filter(s: Optional[str]) -> Optional[str]:
|
|
20
21
|
if s is None:
|
|
21
22
|
return None
|
|
@@ -24,10 +25,9 @@ class BtreeIndex(IndexBase):
|
|
|
24
25
|
def __init__(self, c: 'catalog.Column'):
|
|
25
26
|
if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
|
|
26
27
|
raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
|
|
27
|
-
|
|
28
|
-
self.value_expr = self.str_filter(ColumnRef(c)) if c.col_type.is_string_type() else ColumnRef(c)
|
|
28
|
+
self.value_expr = BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
|
|
29
29
|
|
|
30
|
-
def index_value_expr(self) -> '
|
|
30
|
+
def index_value_expr(self) -> 'exprs.Expr':
|
|
31
31
|
return self.value_expr
|
|
32
32
|
|
|
33
33
|
def records_value_errors(self) -> bool:
|