pixeltable 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/globals.py +3 -0
- pixeltable/catalog/table.py +208 -145
- pixeltable/catalog/table_version.py +36 -18
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +9 -24
- pixeltable/env.py +1 -1
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/aggregation_node.py +22 -15
- pixeltable/exec/data_row_batch.py +7 -7
- pixeltable/exec/exec_node.py +35 -7
- pixeltable/exec/expr_eval_node.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -9
- pixeltable/exec/sql_node.py +265 -136
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/data_row.py +30 -19
- pixeltable/exprs/expr.py +15 -14
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +21 -15
- pixeltable/exprs/function_call.py +21 -8
- pixeltable/exprs/rowid_ref.py +2 -2
- pixeltable/exprs/sql_element_cache.py +5 -1
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +86 -42
- pixeltable/functions/huggingface.py +12 -14
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/string.py +50 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +55 -56
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +7 -7
- pixeltable/index/embedding_index.py +8 -10
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/globals.py +2 -0
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/document.py +9 -7
- pixeltable/iterators/video.py +10 -1
- pixeltable/metadata/__init__.py +3 -2
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/util.py +9 -8
- pixeltable/metadata/schema.py +32 -21
- pixeltable/plan.py +136 -154
- pixeltable/store.py +51 -36
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/mypy_plugin.py +32 -0
- pixeltable/type_system.py +243 -60
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +1 -1
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/METADATA +6 -5
- pixeltable-0.2.21.dist-info/RECORD +148 -0
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
|
@@ -15,12 +15,12 @@ from typing import Optional
|
|
|
15
15
|
|
|
16
16
|
import sqlalchemy as sql
|
|
17
17
|
|
|
18
|
+
import pixeltable as pxt
|
|
18
19
|
from pixeltable.env import Env
|
|
19
|
-
import pixeltable.func as func
|
|
20
20
|
from pixeltable.utils.code import local_public_names
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
@
|
|
23
|
+
@pxt.udf(is_property=True)
|
|
24
24
|
def year(self: datetime) -> int:
|
|
25
25
|
"""
|
|
26
26
|
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
@@ -36,7 +36,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
36
36
|
return sql.extract('year', self)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
@
|
|
39
|
+
@pxt.udf(is_property=True)
|
|
40
40
|
def month(self: datetime) -> int:
|
|
41
41
|
"""
|
|
42
42
|
Between 1 and 12 inclusive.
|
|
@@ -51,7 +51,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
51
51
|
return sql.extract('month', self)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
@
|
|
54
|
+
@pxt.udf(is_property=True)
|
|
55
55
|
def day(self: datetime) -> int:
|
|
56
56
|
"""
|
|
57
57
|
Between 1 and the number of days in the given month of the given year.
|
|
@@ -66,7 +66,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
66
66
|
return sql.extract('day', self)
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
@
|
|
69
|
+
@pxt.udf(is_property=True)
|
|
70
70
|
def hour(self: datetime) -> int:
|
|
71
71
|
"""
|
|
72
72
|
Between 0 and 23 inclusive.
|
|
@@ -81,7 +81,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
81
81
|
return sql.extract('hour', self)
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
@
|
|
84
|
+
@pxt.udf(is_property=True)
|
|
85
85
|
def minute(self: datetime) -> int:
|
|
86
86
|
"""
|
|
87
87
|
Between 0 and 59 inclusive.
|
|
@@ -96,7 +96,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
96
96
|
return sql.extract('minute', self)
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
@
|
|
99
|
+
@pxt.udf(is_property=True)
|
|
100
100
|
def second(self: datetime) -> int:
|
|
101
101
|
"""
|
|
102
102
|
Between 0 and 59 inclusive.
|
|
@@ -111,7 +111,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
111
111
|
return sql.extract('second', self)
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
@
|
|
114
|
+
@pxt.udf(is_property=True)
|
|
115
115
|
def microsecond(self: datetime) -> int:
|
|
116
116
|
"""
|
|
117
117
|
Between 0 and 999999 inclusive.
|
|
@@ -126,7 +126,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
126
126
|
return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
|
|
127
127
|
|
|
128
128
|
|
|
129
|
-
@
|
|
129
|
+
@pxt.udf(is_method=True)
|
|
130
130
|
def astimezone(self: datetime, tz: str) -> datetime:
|
|
131
131
|
"""
|
|
132
132
|
Convert the datetime to the given time zone.
|
|
@@ -139,7 +139,7 @@ def astimezone(self: datetime, tz: str) -> datetime:
|
|
|
139
139
|
return self.astimezone(tzinfo)
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
@
|
|
142
|
+
@pxt.udf(is_method=True)
|
|
143
143
|
def weekday(self: datetime) -> int:
|
|
144
144
|
"""
|
|
145
145
|
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
@@ -154,7 +154,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
154
154
|
return sql.extract('isodow', self) - 1
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
@
|
|
157
|
+
@pxt.udf(is_method=True)
|
|
158
158
|
def isoweekday(self: datetime) -> int:
|
|
159
159
|
"""
|
|
160
160
|
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
@@ -169,7 +169,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
169
169
|
return sql.extract('isodow', self)
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
@
|
|
172
|
+
@pxt.udf(is_method=True)
|
|
173
173
|
def isocalendar(self: datetime) -> dict:
|
|
174
174
|
"""
|
|
175
175
|
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
@@ -181,7 +181,7 @@ def isocalendar(self: datetime) -> dict:
|
|
|
181
181
|
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
182
182
|
|
|
183
183
|
|
|
184
|
-
@
|
|
184
|
+
@pxt.udf(is_method=True)
|
|
185
185
|
def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
186
186
|
"""
|
|
187
187
|
Return a string representing the date and time in ISO 8601 format.
|
|
@@ -195,7 +195,7 @@ def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
|
195
195
|
return self.isoformat(sep=sep, timespec=timespec)
|
|
196
196
|
|
|
197
197
|
|
|
198
|
-
@
|
|
198
|
+
@pxt.udf(is_method=True)
|
|
199
199
|
def strftime(self: datetime, format: str) -> str:
|
|
200
200
|
"""
|
|
201
201
|
Return a string representing the date and time, controlled by an explicit format string.
|
|
@@ -208,7 +208,7 @@ def strftime(self: datetime, format: str) -> str:
|
|
|
208
208
|
return self.strftime(format)
|
|
209
209
|
|
|
210
210
|
|
|
211
|
-
@
|
|
211
|
+
@pxt.udf(is_method=True)
|
|
212
212
|
def make_timestamp(
|
|
213
213
|
year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
|
|
214
214
|
) -> datetime:
|
|
@@ -234,7 +234,7 @@ def _(
|
|
|
234
234
|
sql.cast(minute, sql.Integer),
|
|
235
235
|
sql.cast(second + microsecond / 1000000.0, sql.Double))
|
|
236
236
|
|
|
237
|
-
# @
|
|
237
|
+
# @pxt.udf
|
|
238
238
|
# def date(self: datetime) -> datetime:
|
|
239
239
|
# """
|
|
240
240
|
# Return the date part of the datetime.
|
|
@@ -245,7 +245,7 @@ def _(
|
|
|
245
245
|
# return datetime(d.year, d.month, d.day)
|
|
246
246
|
#
|
|
247
247
|
#
|
|
248
|
-
# @
|
|
248
|
+
# @pxt.udf
|
|
249
249
|
# def time(self: datetime) -> datetime:
|
|
250
250
|
# """
|
|
251
251
|
# Return the time part of the datetime, with microseconds set to 0.
|
|
@@ -256,7 +256,7 @@ def _(
|
|
|
256
256
|
# return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
|
|
257
257
|
|
|
258
258
|
|
|
259
|
-
@
|
|
259
|
+
@pxt.udf(is_method=True)
|
|
260
260
|
def replace(
|
|
261
261
|
self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
|
|
262
262
|
hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
|
|
@@ -271,7 +271,7 @@ def replace(
|
|
|
271
271
|
return self.replace(**kwargs)
|
|
272
272
|
|
|
273
273
|
|
|
274
|
-
@
|
|
274
|
+
@pxt.udf(is_method=True)
|
|
275
275
|
def toordinal(self: datetime) -> int:
|
|
276
276
|
"""
|
|
277
277
|
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
@@ -281,7 +281,7 @@ def toordinal(self: datetime) -> int:
|
|
|
281
281
|
return self.toordinal()
|
|
282
282
|
|
|
283
283
|
|
|
284
|
-
@
|
|
284
|
+
@pxt.udf(is_method=True)
|
|
285
285
|
def posix_timestamp(self: datetime) -> float:
|
|
286
286
|
"""
|
|
287
287
|
Return POSIX timestamp corresponding to the datetime instance.
|
pixeltable/functions/together.py
CHANGED
|
@@ -185,8 +185,8 @@ _embedding_dimensions_cache = {
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
|
|
188
|
-
@pxt.udf(batch_size=32
|
|
189
|
-
def embeddings(input: Batch[str], *, model: str) -> Batch[
|
|
188
|
+
@pxt.udf(batch_size=32)
|
|
189
|
+
def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
|
|
190
190
|
"""
|
|
191
191
|
Query an embedding model for a given string of text.
|
|
192
192
|
|
pixeltable/functions/video.py
CHANGED
|
@@ -20,9 +20,8 @@ import av # type: ignore[import-untyped]
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
import PIL.Image
|
|
22
22
|
|
|
23
|
+
import pixeltable as pxt
|
|
23
24
|
import pixeltable.env as env
|
|
24
|
-
import pixeltable.func as func
|
|
25
|
-
import pixeltable.type_system as ts
|
|
26
25
|
from pixeltable.utils.code import local_public_names
|
|
27
26
|
|
|
28
27
|
_format_defaults = { # format -> (codec, ext)
|
|
@@ -48,14 +47,14 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
48
47
|
# output_container.mux(packet)
|
|
49
48
|
|
|
50
49
|
|
|
51
|
-
@
|
|
52
|
-
init_types=[
|
|
53
|
-
update_types=[
|
|
54
|
-
value_type=
|
|
50
|
+
@pxt.uda(
|
|
51
|
+
init_types=[pxt.IntType()],
|
|
52
|
+
update_types=[pxt.ImageType()],
|
|
53
|
+
value_type=pxt.VideoType(),
|
|
55
54
|
requires_order_by=True,
|
|
56
55
|
allows_window=False,
|
|
57
56
|
)
|
|
58
|
-
class make_video(
|
|
57
|
+
class make_video(pxt.Aggregator):
|
|
59
58
|
"""
|
|
60
59
|
Aggregator that creates a video from a sequence of images.
|
|
61
60
|
"""
|
|
@@ -88,18 +87,10 @@ class make_video(func.Aggregator):
|
|
|
88
87
|
return str(self.out_file)
|
|
89
88
|
|
|
90
89
|
|
|
91
|
-
|
|
92
|
-
ts.VideoType(nullable=False),
|
|
93
|
-
ts.IntType(nullable=False),
|
|
94
|
-
ts.StringType(nullable=False),
|
|
95
|
-
ts.StringType(nullable=True),
|
|
96
|
-
]
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
|
|
90
|
+
@pxt.udf(is_method=True)
|
|
100
91
|
def extract_audio(
|
|
101
|
-
video_path:
|
|
102
|
-
) ->
|
|
92
|
+
video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
93
|
+
) -> pxt.Audio:
|
|
103
94
|
"""
|
|
104
95
|
Extract an audio stream from a video file, save it as a media file and return its path.
|
|
105
96
|
|
|
@@ -128,8 +119,8 @@ def extract_audio(
|
|
|
128
119
|
return output_filename
|
|
129
120
|
|
|
130
121
|
|
|
131
|
-
@
|
|
132
|
-
def get_metadata(video:
|
|
122
|
+
@pxt.udf(is_method=True)
|
|
123
|
+
def get_metadata(video: pxt.Video) -> dict:
|
|
133
124
|
"""
|
|
134
125
|
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
135
126
|
"""
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -14,27 +14,9 @@ from pixeltable.env import Env
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from whisper import Whisper # type: ignore[import-untyped]
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
@pxt.udf(
|
|
19
|
-
param_types=[
|
|
20
|
-
pxt.AudioType(),
|
|
21
|
-
pxt.StringType(),
|
|
22
|
-
pxt.JsonType(nullable=True),
|
|
23
|
-
pxt.FloatType(nullable=True),
|
|
24
|
-
pxt.FloatType(nullable=True),
|
|
25
|
-
pxt.FloatType(nullable=True),
|
|
26
|
-
pxt.BoolType(),
|
|
27
|
-
pxt.StringType(nullable=True),
|
|
28
|
-
pxt.BoolType(),
|
|
29
|
-
pxt.StringType(),
|
|
30
|
-
pxt.StringType(),
|
|
31
|
-
pxt.StringType(),
|
|
32
|
-
pxt.FloatType(nullable=True),
|
|
33
|
-
pxt.JsonType(nullable=True),
|
|
34
|
-
]
|
|
35
|
-
)
|
|
17
|
+
@pxt.udf
|
|
36
18
|
def transcribe(
|
|
37
|
-
audio:
|
|
19
|
+
audio: pxt.Audio,
|
|
38
20
|
*,
|
|
39
21
|
model: str,
|
|
40
22
|
temperature: Optional[list[float]] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
|
pixeltable/globals.py
CHANGED
|
@@ -54,11 +54,13 @@ def create_table(
|
|
|
54
54
|
Examples:
|
|
55
55
|
Create a table with an int and a string column:
|
|
56
56
|
|
|
57
|
-
>>>
|
|
57
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String})
|
|
58
58
|
|
|
59
|
-
Create a table from a select statement over an existing table `
|
|
59
|
+
Create a table from a select statement over an existing table `orig_table` (this will create a new table
|
|
60
|
+
containing the exact contents of the query):
|
|
60
61
|
|
|
61
|
-
>>>
|
|
62
|
+
>>> tbl1 = pxt.get_table('orig_table')
|
|
63
|
+
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
62
64
|
"""
|
|
63
65
|
path = catalog.Path(path_str)
|
|
64
66
|
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
@@ -105,8 +107,7 @@ def create_view(
|
|
|
105
107
|
path_str: str,
|
|
106
108
|
base: Union[catalog.Table, DataFrame],
|
|
107
109
|
*,
|
|
108
|
-
|
|
109
|
-
filter: Optional[exprs.Expr] = None,
|
|
110
|
+
additional_columns: Optional[dict[str, Any]] = None,
|
|
110
111
|
is_snapshot: bool = False,
|
|
111
112
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
|
|
112
113
|
num_retained_versions: int = 10,
|
|
@@ -116,11 +117,13 @@ def create_view(
|
|
|
116
117
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
117
118
|
|
|
118
119
|
Args:
|
|
119
|
-
path_str:
|
|
120
|
+
path_str: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
|
|
121
|
+
`dir1.my_view`.
|
|
120
122
|
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
|
|
121
123
|
base the view on.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
+
additional_columns: If specified, will add these columns to the view once it is created. The format
|
|
125
|
+
of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
|
|
126
|
+
[`create_table`][pixeltable.create_table].
|
|
124
127
|
is_snapshot: Whether the view is a snapshot.
|
|
125
128
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
126
129
|
the base table.
|
|
@@ -130,36 +133,29 @@ def create_view(
|
|
|
130
133
|
|
|
131
134
|
Returns:
|
|
132
135
|
A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
|
|
133
|
-
|
|
136
|
+
exists or is invalid and `ignore_errors=True`, returns `None`.
|
|
134
137
|
|
|
135
138
|
Raises:
|
|
136
139
|
Error: if the path already exists or is invalid and `ignore_errors=False`.
|
|
137
140
|
|
|
138
141
|
Examples:
|
|
139
|
-
Create a view
|
|
142
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
|
|
140
143
|
|
|
141
|
-
>>>
|
|
142
|
-
|
|
144
|
+
>>> tbl = pxt.get_table('my_table')
|
|
145
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
|
|
143
146
|
|
|
144
|
-
Create a
|
|
147
|
+
Create a snapshot of `my_table`:
|
|
145
148
|
|
|
146
|
-
>>>
|
|
147
|
-
|
|
148
|
-
Create an immutable view with additional computed columns and a filter:
|
|
149
|
-
|
|
150
|
-
>>> snapshot_view = cl.create_view(
|
|
151
|
-
'my_snapshot', base, schema={'col3': base.col2 + 1}, filter=base.col1 > 10, is_snapshot=True)
|
|
149
|
+
>>> tbl = pxt.get_table('my_table')
|
|
150
|
+
... snapshot_view = pxt.create_view('my_snapshot_view', tbl, is_snapshot=True)
|
|
152
151
|
"""
|
|
152
|
+
where: Optional[exprs.Expr] = None
|
|
153
153
|
if isinstance(base, catalog.Table):
|
|
154
154
|
tbl_version_path = base._tbl_version_path
|
|
155
155
|
elif isinstance(base, DataFrame):
|
|
156
156
|
base._validate_mutable('create_view')
|
|
157
157
|
tbl_version_path = base.tbl
|
|
158
|
-
|
|
159
|
-
raise excs.Error(
|
|
160
|
-
'Cannot specify a `filter` directly if one is already declared in a `DataFrame.where` clause'
|
|
161
|
-
)
|
|
162
|
-
filter = base.where_clause
|
|
158
|
+
where = base.where_clause
|
|
163
159
|
else:
|
|
164
160
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
165
161
|
assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
|
|
@@ -173,8 +169,8 @@ def create_view(
|
|
|
173
169
|
raise e
|
|
174
170
|
dir = Catalog.get().paths[path.parent]
|
|
175
171
|
|
|
176
|
-
if
|
|
177
|
-
|
|
172
|
+
if additional_columns is None:
|
|
173
|
+
additional_columns = {}
|
|
178
174
|
if iterator is None:
|
|
179
175
|
iterator_class, iterator_args = None, None
|
|
180
176
|
else:
|
|
@@ -184,8 +180,8 @@ def create_view(
|
|
|
184
180
|
dir._id,
|
|
185
181
|
path.name,
|
|
186
182
|
base=tbl_version_path,
|
|
187
|
-
|
|
188
|
-
predicate=
|
|
183
|
+
additional_columns=additional_columns,
|
|
184
|
+
predicate=where,
|
|
189
185
|
is_snapshot=is_snapshot,
|
|
190
186
|
iterator_cls=iterator_class,
|
|
191
187
|
iterator_args=iterator_args,
|
|
@@ -199,7 +195,7 @@ def create_view(
|
|
|
199
195
|
|
|
200
196
|
|
|
201
197
|
def get_table(path: str) -> catalog.Table:
|
|
202
|
-
"""Get a handle to an existing table
|
|
198
|
+
"""Get a handle to an existing table, view, or snapshot.
|
|
203
199
|
|
|
204
200
|
Args:
|
|
205
201
|
path: Path to the table.
|
|
@@ -213,15 +209,15 @@ def get_table(path: str) -> catalog.Table:
|
|
|
213
209
|
Examples:
|
|
214
210
|
Get handle for a table in the top-level directory:
|
|
215
211
|
|
|
216
|
-
>>>
|
|
212
|
+
>>> tbl = pxt.get_table('my_table')
|
|
217
213
|
|
|
218
214
|
For a table in a subdirectory:
|
|
219
215
|
|
|
220
|
-
>>>
|
|
216
|
+
>>> tbl = pxt.get_table('subdir.my_table')
|
|
221
217
|
|
|
222
|
-
|
|
218
|
+
Handles to views and snapshots are retrieved in the same way:
|
|
223
219
|
|
|
224
|
-
>>>
|
|
220
|
+
>>> tbl = pxt.get_table('my_snapshot')
|
|
225
221
|
"""
|
|
226
222
|
p = catalog.Path(path)
|
|
227
223
|
Catalog.get().paths.check_is_valid(p, expected=catalog.Table)
|
|
@@ -243,11 +239,11 @@ def move(path: str, new_path: str) -> None:
|
|
|
243
239
|
Examples:
|
|
244
240
|
Move a table to a different directory:
|
|
245
241
|
|
|
246
|
-
>>>>
|
|
242
|
+
>>>> pxt.move('dir1.my_table', 'dir2.my_table')
|
|
247
243
|
|
|
248
244
|
Rename a table:
|
|
249
245
|
|
|
250
|
-
>>>>
|
|
246
|
+
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
251
247
|
"""
|
|
252
248
|
p = catalog.Path(path)
|
|
253
249
|
Catalog.get().paths.check_is_valid(p, expected=catalog.SchemaObject)
|
|
@@ -260,18 +256,18 @@ def move(path: str, new_path: str) -> None:
|
|
|
260
256
|
|
|
261
257
|
|
|
262
258
|
def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
|
|
263
|
-
"""Drop a table
|
|
259
|
+
"""Drop a table, view, or snapshot.
|
|
264
260
|
|
|
265
261
|
Args:
|
|
266
262
|
path: Path to the [`Table`][pixeltable.Table].
|
|
267
|
-
force: If `True`, will also drop all views
|
|
268
|
-
ignore_errors:
|
|
263
|
+
force: If `True`, will also drop all views and sub-views of this table.
|
|
264
|
+
ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
|
|
269
265
|
|
|
270
266
|
Raises:
|
|
271
|
-
Error: If the path does not exist or does not designate a table object and ignore_errors
|
|
267
|
+
Error: If the path does not exist or does not designate a table object, and `ignore_errors=False`.
|
|
272
268
|
|
|
273
269
|
Examples:
|
|
274
|
-
>>>
|
|
270
|
+
>>> pxt.drop_table('my_table')
|
|
275
271
|
"""
|
|
276
272
|
cat = Catalog.get()
|
|
277
273
|
path_obj = catalog.Path(path)
|
|
@@ -302,7 +298,8 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
302
298
|
|
|
303
299
|
Args:
|
|
304
300
|
dir_path: Path to the directory. Defaults to the root directory.
|
|
305
|
-
recursive:
|
|
301
|
+
recursive: If `False`, returns only those tables that are directly contained in specified directory; if
|
|
302
|
+
`True`, returns all tables that are descendants of the specified directory, recursively.
|
|
306
303
|
|
|
307
304
|
Returns:
|
|
308
305
|
A list of [`Table`][pixeltable.Table] paths.
|
|
@@ -313,13 +310,11 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
313
310
|
Examples:
|
|
314
311
|
List tables in top-level directory:
|
|
315
312
|
|
|
316
|
-
>>>
|
|
317
|
-
['my_table', ...]
|
|
313
|
+
>>> pxt.list_tables()
|
|
318
314
|
|
|
319
315
|
List tables in 'dir1':
|
|
320
316
|
|
|
321
|
-
>>>
|
|
322
|
-
[...]
|
|
317
|
+
>>> pxt.list_tables('dir1')
|
|
323
318
|
"""
|
|
324
319
|
assert dir_path is not None
|
|
325
320
|
path = catalog.Path(dir_path, empty_is_valid=True)
|
|
@@ -332,17 +327,17 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
|
|
|
332
327
|
|
|
333
328
|
Args:
|
|
334
329
|
path_str: Path to the directory.
|
|
335
|
-
ignore_errors: if True
|
|
330
|
+
ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
|
|
336
331
|
|
|
337
332
|
Raises:
|
|
338
|
-
Error: If the path already exists or the parent is not a directory
|
|
333
|
+
Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
|
|
339
334
|
|
|
340
335
|
Examples:
|
|
341
|
-
>>>
|
|
336
|
+
>>> pxt.create_dir('my_dir')
|
|
342
337
|
|
|
343
338
|
Create a subdirectory:
|
|
344
339
|
|
|
345
|
-
>>>
|
|
340
|
+
>>> pxt.create_dir('my_dir.sub_dir')
|
|
346
341
|
"""
|
|
347
342
|
try:
|
|
348
343
|
path = catalog.Path(path_str)
|
|
@@ -373,17 +368,21 @@ def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) ->
|
|
|
373
368
|
"""Remove a directory.
|
|
374
369
|
|
|
375
370
|
Args:
|
|
376
|
-
path_str:
|
|
371
|
+
path_str: Name or path of the directory.
|
|
372
|
+
force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
|
|
373
|
+
with any views or snapshots that depend on any of the dropped tables.
|
|
374
|
+
ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
|
|
375
|
+
does not exist.
|
|
377
376
|
|
|
378
377
|
Raises:
|
|
379
|
-
Error: If the path does not exist or does not designate a directory or if the directory is not empty.
|
|
378
|
+
Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
|
|
380
379
|
|
|
381
380
|
Examples:
|
|
382
|
-
>>>
|
|
381
|
+
>>> pxt.drop_dir('my_dir')
|
|
383
382
|
|
|
384
383
|
Remove a subdirectory:
|
|
385
384
|
|
|
386
|
-
>>>
|
|
385
|
+
>>> pxt.drop_dir('my_dir.sub_dir')
|
|
387
386
|
"""
|
|
388
387
|
cat = Catalog.get()
|
|
389
388
|
path = catalog.Path(path_str)
|
|
@@ -428,14 +427,14 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
428
427
|
"""List the directories in a directory.
|
|
429
428
|
|
|
430
429
|
Args:
|
|
431
|
-
path_str:
|
|
432
|
-
recursive:
|
|
430
|
+
path_str: Name or path of the directory.
|
|
431
|
+
recursive: If `True`, lists all descendants of this directory recursively.
|
|
433
432
|
|
|
434
433
|
Returns:
|
|
435
434
|
List of directory paths.
|
|
436
435
|
|
|
437
436
|
Raises:
|
|
438
|
-
Error: If
|
|
437
|
+
Error: If `path_str` does not exist or does not designate a directory.
|
|
439
438
|
|
|
440
439
|
Examples:
|
|
441
440
|
>>> cl.list_dirs('my_dir', recursive=True)
|
pixeltable/index/base.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from pixeltable import catalog, exprs
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class IndexBase(abc.ABC):
|
|
@@ -22,7 +22,7 @@ class IndexBase(abc.ABC):
|
|
|
22
22
|
pass
|
|
23
23
|
|
|
24
24
|
@abc.abstractmethod
|
|
25
|
-
def index_value_expr(self) ->
|
|
25
|
+
def index_value_expr(self) -> exprs.Expr:
|
|
26
26
|
"""Return expression that computes the value that goes into the index"""
|
|
27
27
|
pass
|
|
28
28
|
|
pixeltable/index/btree.py
CHANGED
|
@@ -2,10 +2,10 @@ from typing import Optional
|
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
|
-
# TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
|
|
6
|
-
#import pixeltable.catalog as catalog
|
|
7
5
|
import pixeltable.exceptions as excs
|
|
8
|
-
|
|
6
|
+
from pixeltable import catalog, exprs
|
|
7
|
+
from pixeltable.func.udf import udf
|
|
8
|
+
|
|
9
9
|
from .base import IndexBase
|
|
10
10
|
|
|
11
11
|
|
|
@@ -15,7 +15,8 @@ class BtreeIndex(IndexBase):
|
|
|
15
15
|
"""
|
|
16
16
|
MAX_STRING_LEN = 256
|
|
17
17
|
|
|
18
|
-
@
|
|
18
|
+
@staticmethod
|
|
19
|
+
@udf
|
|
19
20
|
def str_filter(s: Optional[str]) -> Optional[str]:
|
|
20
21
|
if s is None:
|
|
21
22
|
return None
|
|
@@ -24,10 +25,9 @@ class BtreeIndex(IndexBase):
|
|
|
24
25
|
def __init__(self, c: 'catalog.Column'):
|
|
25
26
|
if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
|
|
26
27
|
raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
|
|
27
|
-
|
|
28
|
-
self.value_expr = self.str_filter(ColumnRef(c)) if c.col_type.is_string_type() else ColumnRef(c)
|
|
28
|
+
self.value_expr = BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
|
|
29
29
|
|
|
30
|
-
def index_value_expr(self) -> '
|
|
30
|
+
def index_value_expr(self) -> 'exprs.Expr':
|
|
31
31
|
return self.value_expr
|
|
32
32
|
|
|
33
33
|
def records_value_errors(self) -> bool:
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional, Any
|
|
4
3
|
import enum
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
|
-
import PIL.Image
|
|
7
6
|
import numpy as np
|
|
8
|
-
import pgvector.sqlalchemy
|
|
7
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
9
8
|
import PIL.Image
|
|
10
9
|
import sqlalchemy as sql
|
|
11
10
|
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
13
11
|
import pixeltable.exceptions as excs
|
|
14
|
-
import pixeltable.func as func
|
|
15
12
|
import pixeltable.type_system as ts
|
|
13
|
+
from pixeltable import catalog, exprs, func
|
|
14
|
+
|
|
16
15
|
from .base import IndexBase
|
|
17
16
|
|
|
18
17
|
|
|
@@ -58,16 +57,15 @@ class EmbeddingIndex(IndexBase):
|
|
|
58
57
|
self._validate_embedding_fn(image_embed, 'image_embed', ts.ColumnType.Type.IMAGE)
|
|
59
58
|
|
|
60
59
|
self.metric = self.Metric[metric.upper()]
|
|
61
|
-
|
|
62
|
-
self.value_expr
|
|
63
|
-
assert self.value_expr.col_type.is_array_type()
|
|
60
|
+
self.value_expr = string_embed(exprs.ColumnRef(c)) if c.col_type.is_string_type() else image_embed(exprs.ColumnRef(c))
|
|
61
|
+
assert isinstance(self.value_expr.col_type, ts.ArrayType)
|
|
64
62
|
self.string_embed = string_embed
|
|
65
63
|
self.image_embed = image_embed
|
|
66
64
|
vector_size = self.value_expr.col_type.shape[0]
|
|
67
65
|
assert vector_size is not None
|
|
68
66
|
self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
|
|
69
67
|
|
|
70
|
-
def index_value_expr(self) ->
|
|
68
|
+
def index_value_expr(self) -> exprs.Expr:
|
|
71
69
|
"""Return expression that computes the value that goes into the index"""
|
|
72
70
|
return self.value_expr
|
|
73
71
|
|
|
@@ -151,7 +149,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
151
149
|
img = PIL.Image.new('RGB', (512, 512))
|
|
152
150
|
return_type = embed_fn.call_return_type({param_name: img})
|
|
153
151
|
assert return_type is not None
|
|
154
|
-
if not return_type.
|
|
152
|
+
if not isinstance(return_type, ts.ArrayType):
|
|
155
153
|
raise excs.Error(f'{name} must return an array, but returns {return_type}')
|
|
156
154
|
else:
|
|
157
155
|
shape = return_type.shape
|