pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +8 -7
- pixeltable/catalog/column.py +11 -8
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/table.py +20 -14
- pixeltable/catalog/table_version.py +92 -55
- pixeltable/catalog/table_version_path.py +7 -9
- pixeltable/catalog/view.py +3 -2
- pixeltable/dataframe.py +2 -2
- pixeltable/env.py +205 -86
- pixeltable/exceptions.py +5 -1
- pixeltable/exec/aggregation_node.py +2 -1
- pixeltable/exec/component_iteration_node.py +2 -2
- pixeltable/exec/sql_node.py +11 -8
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -1
- pixeltable/exprs/column_property_ref.py +9 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/comparison.py +10 -7
- pixeltable/exprs/compound_predicate.py +3 -2
- pixeltable/exprs/data_row.py +19 -4
- pixeltable/exprs/expr.py +51 -41
- pixeltable/exprs/expr_set.py +32 -9
- pixeltable/exprs/function_call.py +62 -40
- pixeltable/exprs/in_predicate.py +3 -2
- pixeltable/exprs/inline_expr.py +200 -0
- pixeltable/exprs/is_null.py +3 -2
- pixeltable/exprs/json_mapper.py +5 -4
- pixeltable/exprs/json_path.py +7 -1
- pixeltable/exprs/literal.py +34 -7
- pixeltable/exprs/method_ref.py +3 -3
- pixeltable/exprs/object_ref.py +6 -5
- pixeltable/exprs/row_builder.py +25 -17
- pixeltable/exprs/rowid_ref.py +2 -1
- pixeltable/exprs/similarity_expr.py +2 -1
- pixeltable/exprs/sql_element_cache.py +30 -0
- pixeltable/exprs/type_cast.py +3 -3
- pixeltable/exprs/variable.py +2 -1
- pixeltable/ext/functions/whisperx.py +6 -4
- pixeltable/ext/functions/yolox.py +11 -9
- pixeltable/func/aggregate_function.py +1 -0
- pixeltable/func/function.py +28 -4
- pixeltable/functions/__init__.py +4 -2
- pixeltable/functions/anthropic.py +15 -5
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -1
- pixeltable/functions/huggingface.py +91 -14
- pixeltable/functions/image.py +20 -5
- pixeltable/functions/json.py +5 -5
- pixeltable/functions/mistralai.py +188 -0
- pixeltable/functions/openai.py +6 -10
- pixeltable/functions/string.py +3 -2
- pixeltable/functions/timestamp.py +95 -7
- pixeltable/functions/together.py +18 -11
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +69 -37
- pixeltable/functions/whisper.py +4 -1
- pixeltable/globals.py +5 -1
- pixeltable/io/hf_datasets.py +17 -15
- pixeltable/io/pandas.py +0 -2
- pixeltable/io/parquet.py +15 -14
- pixeltable/iterators/document.py +16 -15
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +1 -1
- pixeltable/metadata/converters/convert_19.py +46 -0
- pixeltable/metadata/converters/convert_20.py +56 -0
- pixeltable/metadata/converters/util.py +29 -4
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -4
- pixeltable/plan.py +100 -78
- pixeltable/store.py +5 -1
- pixeltable/tool/create_test_db_dump.py +18 -6
- pixeltable/type_system.py +15 -15
- pixeltable/utils/documents.py +45 -42
- pixeltable/utils/formatter.py +2 -2
- pixeltable-0.2.19.dist-info/LICENSE +201 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
- pixeltable-0.2.19.dist-info/RECORD +147 -0
- pixeltable/exprs/inline_array.py +0 -116
- pixeltable/exprs/inline_dict.py +0 -103
- pixeltable-0.2.17.dist-info/LICENSE +0 -18
- pixeltable-0.2.17.dist-info/RECORD +0 -144
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0
|
@@ -9,6 +9,7 @@ from .data_row import DataRow
|
|
|
9
9
|
from .expr import Expr
|
|
10
10
|
from .globals import LogicalOperator
|
|
11
11
|
from .row_builder import RowBuilder
|
|
12
|
+
from .sql_element_cache import SqlElementCache
|
|
12
13
|
import pixeltable.type_system as ts
|
|
13
14
|
|
|
14
15
|
|
|
@@ -66,8 +67,8 @@ class CompoundPredicate(Expr):
|
|
|
66
67
|
non_matches = [op for op in self.components if not condition(op)]
|
|
67
68
|
return (matches, self.make_conjunction(non_matches))
|
|
68
69
|
|
|
69
|
-
def sql_expr(self) -> Optional[sql.
|
|
70
|
-
sql_exprs = [
|
|
70
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
71
|
+
sql_exprs = [sql_elements.get(op) for op in self.components]
|
|
71
72
|
if any(e is None for e in sql_exprs):
|
|
72
73
|
return None
|
|
73
74
|
if self.operator == LogicalOperator.NOT:
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import datetime
|
|
3
4
|
import io
|
|
4
5
|
import urllib.parse
|
|
5
6
|
import urllib.request
|
|
@@ -8,8 +9,11 @@ from typing import Optional, List, Any, Tuple
|
|
|
8
9
|
import sqlalchemy as sql
|
|
9
10
|
import pgvector.sqlalchemy
|
|
10
11
|
import PIL
|
|
12
|
+
import PIL.Image
|
|
11
13
|
import numpy as np
|
|
12
14
|
|
|
15
|
+
from pixeltable import env
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
class DataRow:
|
|
15
19
|
"""
|
|
@@ -101,6 +105,7 @@ class DataRow:
|
|
|
101
105
|
|
|
102
106
|
def __getitem__(self, index: object) -> Any:
|
|
103
107
|
"""Returns in-memory value, ie, what is needed for expr evaluation"""
|
|
108
|
+
assert isinstance(index, int)
|
|
104
109
|
if not self.has_val[index]:
|
|
105
110
|
# for debugging purposes
|
|
106
111
|
pass
|
|
@@ -115,7 +120,7 @@ class DataRow:
|
|
|
115
120
|
|
|
116
121
|
return self.vals[index]
|
|
117
122
|
|
|
118
|
-
def get_stored_val(self, index:
|
|
123
|
+
def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
|
|
119
124
|
"""Return the value that gets stored in the db"""
|
|
120
125
|
assert self.excs[index] is None
|
|
121
126
|
if not self.has_val[index]:
|
|
@@ -140,12 +145,17 @@ class DataRow:
|
|
|
140
145
|
if self.vals[index] is None and sa_col_type is not None and isinstance(sa_col_type, sql.JSON):
|
|
141
146
|
return sql.sql.null()
|
|
142
147
|
|
|
148
|
+
if isinstance(self.vals[index], datetime.datetime) and self.vals[index].tzinfo is None:
|
|
149
|
+
# if the datetime is naive, cast it to the default time zone
|
|
150
|
+
return self.vals[index].replace(tzinfo=env.Env.get().default_time_zone)
|
|
151
|
+
|
|
143
152
|
return self.vals[index]
|
|
144
153
|
|
|
145
154
|
def __setitem__(self, idx: object, val: Any) -> None:
|
|
146
155
|
"""Assign in-memory cell value
|
|
147
156
|
This allows overwriting
|
|
148
157
|
"""
|
|
158
|
+
assert isinstance(idx, int)
|
|
149
159
|
assert self.excs[idx] is None
|
|
150
160
|
|
|
151
161
|
if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
|
|
@@ -177,7 +187,7 @@ class DataRow:
|
|
|
177
187
|
self.vals[idx] = val
|
|
178
188
|
self.has_val[idx] = True
|
|
179
189
|
|
|
180
|
-
def set_file_path(self, idx:
|
|
190
|
+
def set_file_path(self, idx: int, path: str) -> None:
|
|
181
191
|
"""Augment an existing url with a local file path"""
|
|
182
192
|
assert self.has_val[idx]
|
|
183
193
|
assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
|
|
@@ -185,7 +195,7 @@ class DataRow:
|
|
|
185
195
|
if idx in self.media_slot_idxs:
|
|
186
196
|
self.vals[idx] = path
|
|
187
197
|
|
|
188
|
-
def flush_img(self, index:
|
|
198
|
+
def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
|
|
189
199
|
"""Discard the in-memory value and save it to a local file, if filepath is not None"""
|
|
190
200
|
if self.vals[index] is None:
|
|
191
201
|
return
|
|
@@ -195,7 +205,12 @@ class DataRow:
|
|
|
195
205
|
# we want to save this to a file
|
|
196
206
|
self.file_paths[index] = filepath
|
|
197
207
|
self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
|
|
198
|
-
self.vals[index]
|
|
208
|
+
image = self.vals[index]
|
|
209
|
+
assert isinstance(image, PIL.Image.Image)
|
|
210
|
+
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
211
|
+
# In that case, use WebP instead.
|
|
212
|
+
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
213
|
+
image.save(filepath, format=format)
|
|
199
214
|
else:
|
|
200
215
|
# we discard the content of this cell
|
|
201
216
|
self.has_val[index] = False
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -7,10 +7,11 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import sys
|
|
9
9
|
import typing
|
|
10
|
-
from typing import Any, Callable,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
+
from typing_extensions import Self
|
|
14
15
|
|
|
15
16
|
import pixeltable
|
|
16
17
|
import pixeltable.catalog as catalog
|
|
@@ -21,6 +22,8 @@ import pixeltable.type_system as ts
|
|
|
21
22
|
from .data_row import DataRow
|
|
22
23
|
from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
|
|
23
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from pixeltable import exprs
|
|
24
27
|
|
|
25
28
|
class ExprScope:
|
|
26
29
|
"""
|
|
@@ -49,23 +52,31 @@ class Expr(abc.ABC):
|
|
|
49
52
|
- during eval(), components can only be accessed via self.components; any Exprs outside of that won't
|
|
50
53
|
have slot_idx set
|
|
51
54
|
"""
|
|
52
|
-
def __init__(self, col_type: ts.ColumnType):
|
|
53
|
-
self.col_type = col_type
|
|
54
55
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
col_type: ts.ColumnType
|
|
57
|
+
|
|
58
|
+
# the subexprs are needed to construct this expr
|
|
59
|
+
components: list[Expr]
|
|
60
|
+
|
|
61
|
+
# each instance has an id that is used for equality comparisons
|
|
62
|
+
# - set by the subclass's __init__()
|
|
63
|
+
# - produced by _create_id()
|
|
64
|
+
# - not expected to survive a serialize()/deserialize() roundtrip
|
|
65
|
+
id: Optional[int]
|
|
60
66
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
+
# index of the expr's value in the data row:
|
|
68
|
+
# - set for all materialized exprs
|
|
69
|
+
# - None: not executable
|
|
70
|
+
# - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
|
|
71
|
+
slot_idx: Optional[int]
|
|
72
|
+
|
|
73
|
+
def __init__(self, col_type: ts.ColumnType):
|
|
74
|
+
self.col_type = col_type
|
|
75
|
+
self.components = []
|
|
76
|
+
self.id = None
|
|
77
|
+
self.slot_idx = None
|
|
67
78
|
|
|
68
|
-
def dependencies(self) ->
|
|
79
|
+
def dependencies(self) -> list[Expr]:
|
|
69
80
|
"""
|
|
70
81
|
Returns all exprs that need to have been evaluated before eval() can be called on this one.
|
|
71
82
|
"""
|
|
@@ -115,7 +126,7 @@ class Expr(abc.ABC):
|
|
|
115
126
|
# override this
|
|
116
127
|
return True
|
|
117
128
|
|
|
118
|
-
def _id_attrs(self) ->
|
|
129
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
119
130
|
"""Returns attribute name/value pairs that are used to construct the instance id.
|
|
120
131
|
|
|
121
132
|
Attribute values must be immutable and have str() defined.
|
|
@@ -137,7 +148,7 @@ class Expr(abc.ABC):
|
|
|
137
148
|
return self.id
|
|
138
149
|
|
|
139
150
|
@classmethod
|
|
140
|
-
def list_equals(cls, a:
|
|
151
|
+
def list_equals(cls, a: list[Expr], b: list[Expr]) -> bool:
|
|
141
152
|
if len(a) != len(b):
|
|
142
153
|
return False
|
|
143
154
|
for i in range(len(a)):
|
|
@@ -158,7 +169,7 @@ class Expr(abc.ABC):
|
|
|
158
169
|
return result
|
|
159
170
|
|
|
160
171
|
@classmethod
|
|
161
|
-
def copy_list(cls, expr_list: Optional[
|
|
172
|
+
def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
|
|
162
173
|
if expr_list is None:
|
|
163
174
|
return None
|
|
164
175
|
return [e.copy() for e in expr_list]
|
|
@@ -183,11 +194,11 @@ class Expr(abc.ABC):
|
|
|
183
194
|
return self
|
|
184
195
|
|
|
185
196
|
@classmethod
|
|
186
|
-
def list_substitute(cls, expr_list:
|
|
197
|
+
def list_substitute(cls, expr_list: list[Expr], spec: dict[Expr, Expr]) -> None:
|
|
187
198
|
for i in range(len(expr_list)):
|
|
188
199
|
expr_list[i] = expr_list[i].substitute(spec)
|
|
189
200
|
|
|
190
|
-
def resolve_computed_cols(self, resolve_cols: Optional[
|
|
201
|
+
def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
|
|
191
202
|
"""
|
|
192
203
|
Recursively replace ColRefs to unstored computed columns with their value exprs.
|
|
193
204
|
Also replaces references to stored computed columns in resolve_cols.
|
|
@@ -215,12 +226,12 @@ class Expr(abc.ABC):
|
|
|
215
226
|
return False
|
|
216
227
|
return True
|
|
217
228
|
|
|
218
|
-
def retarget(self, tbl: catalog.TableVersionPath) ->
|
|
229
|
+
def retarget(self, tbl: catalog.TableVersionPath) -> Self:
|
|
219
230
|
"""Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
|
|
220
231
|
tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
|
|
221
232
|
return self._retarget(tbl_versions)
|
|
222
233
|
|
|
223
|
-
def _retarget(self, tbl_versions:
|
|
234
|
+
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
|
|
224
235
|
from .column_ref import ColumnRef
|
|
225
236
|
if isinstance(self, ColumnRef):
|
|
226
237
|
target = tbl_versions[self.col.tbl.id]
|
|
@@ -299,7 +310,7 @@ class Expr(abc.ABC):
|
|
|
299
310
|
for e in expr_list:
|
|
300
311
|
yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
|
|
301
312
|
|
|
302
|
-
def _contains(self, cls: Optional[
|
|
313
|
+
def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
|
|
303
314
|
"""
|
|
304
315
|
Returns True if any subexpr is an instance of cls.
|
|
305
316
|
"""
|
|
@@ -312,15 +323,15 @@ class Expr(abc.ABC):
|
|
|
312
323
|
except StopIteration:
|
|
313
324
|
return False
|
|
314
325
|
|
|
315
|
-
def tbl_ids(self) ->
|
|
326
|
+
def tbl_ids(self) -> set[UUID]:
|
|
316
327
|
"""Returns table ids referenced by this expr."""
|
|
317
328
|
from .column_ref import ColumnRef
|
|
318
329
|
from .rowid_ref import RowidRef
|
|
319
330
|
return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
|
|
320
331
|
|
|
321
332
|
@classmethod
|
|
322
|
-
def list_tbl_ids(cls, expr_list:
|
|
323
|
-
ids:
|
|
333
|
+
def list_tbl_ids(cls, expr_list: list[Expr]) -> set[UUID]:
|
|
334
|
+
ids: set[UUID] = set()
|
|
324
335
|
for e in expr_list:
|
|
325
336
|
ids.update(e.tbl_ids())
|
|
326
337
|
return ids
|
|
@@ -345,15 +356,14 @@ class Expr(abc.ABC):
|
|
|
345
356
|
"""
|
|
346
357
|
if isinstance(o, Expr):
|
|
347
358
|
return o
|
|
348
|
-
# Try to create a literal. We need to check for
|
|
349
|
-
# first, to prevent
|
|
359
|
+
# Try to create a literal. We need to check for InlineList/InlineDict
|
|
360
|
+
# first, to prevent them from inappropriately being interpreted as JsonType
|
|
350
361
|
# literals.
|
|
351
|
-
# TODO: general cleanup of InlineArray/InlineDict
|
|
352
362
|
if isinstance(o, list):
|
|
353
|
-
from .
|
|
354
|
-
return
|
|
363
|
+
from .inline_expr import InlineList
|
|
364
|
+
return InlineList(o)
|
|
355
365
|
if isinstance(o, dict):
|
|
356
|
-
from .
|
|
366
|
+
from .inline_expr import InlineDict
|
|
357
367
|
return InlineDict(o)
|
|
358
368
|
obj_type = ts.ColumnType.infer_literal_type(o)
|
|
359
369
|
if obj_type is not None:
|
|
@@ -362,7 +372,7 @@ class Expr(abc.ABC):
|
|
|
362
372
|
return None
|
|
363
373
|
|
|
364
374
|
@abc.abstractmethod
|
|
365
|
-
def sql_expr(self) -> Optional[sql.ColumnElement]:
|
|
375
|
+
def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
|
|
366
376
|
"""
|
|
367
377
|
If this expr can be materialized directly in SQL:
|
|
368
378
|
- returns a ColumnElement
|
|
@@ -389,14 +399,14 @@ class Expr(abc.ABC):
|
|
|
389
399
|
c.release()
|
|
390
400
|
|
|
391
401
|
@classmethod
|
|
392
|
-
def release_list(cls, expr_list:
|
|
402
|
+
def release_list(cls, expr_list: list[Expr]) -> None:
|
|
393
403
|
for e in expr_list:
|
|
394
404
|
e.release()
|
|
395
405
|
|
|
396
406
|
def serialize(self) -> str:
|
|
397
407
|
return json.dumps(self.as_dict())
|
|
398
408
|
|
|
399
|
-
def as_dict(self) ->
|
|
409
|
+
def as_dict(self) -> dict:
|
|
400
410
|
"""
|
|
401
411
|
Turn Expr object into a dict that can be passed to json.dumps().
|
|
402
412
|
Subclasses override _as_dict().
|
|
@@ -407,10 +417,10 @@ class Expr(abc.ABC):
|
|
|
407
417
|
}
|
|
408
418
|
|
|
409
419
|
@classmethod
|
|
410
|
-
def as_dict_list(self, expr_list:
|
|
420
|
+
def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
|
|
411
421
|
return [e.as_dict() for e in expr_list]
|
|
412
422
|
|
|
413
|
-
def _as_dict(self) ->
|
|
423
|
+
def _as_dict(self) -> dict:
|
|
414
424
|
if len(self.components) > 0:
|
|
415
425
|
return {'components': [c.as_dict() for c in self.components]}
|
|
416
426
|
return {}
|
|
@@ -420,24 +430,24 @@ class Expr(abc.ABC):
|
|
|
420
430
|
return cls.from_dict(json.loads(dict_str))
|
|
421
431
|
|
|
422
432
|
@classmethod
|
|
423
|
-
def from_dict(cls, d:
|
|
433
|
+
def from_dict(cls, d: dict) -> Self:
|
|
424
434
|
"""
|
|
425
435
|
Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
|
|
426
436
|
"""
|
|
427
437
|
assert '_classname' in d
|
|
428
438
|
exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
|
|
429
439
|
type_class = getattr(exprs_module, d['_classname'])
|
|
430
|
-
components:
|
|
440
|
+
components: list[Expr] = []
|
|
431
441
|
if 'components' in d:
|
|
432
442
|
components = [cls.from_dict(component_dict) for component_dict in d['components']]
|
|
433
443
|
return type_class._from_dict(d, components)
|
|
434
444
|
|
|
435
445
|
@classmethod
|
|
436
|
-
def from_dict_list(cls, dict_list:
|
|
446
|
+
def from_dict_list(cls, dict_list: list[dict]) -> list[Expr]:
|
|
437
447
|
return [cls.from_dict(d) for d in dict_list]
|
|
438
448
|
|
|
439
449
|
@classmethod
|
|
440
|
-
def _from_dict(cls, d:
|
|
450
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
|
|
441
451
|
assert False, 'not implemented'
|
|
442
452
|
|
|
443
453
|
def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
|
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -1,25 +1,36 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Iterable, Iterator
|
|
3
4
|
|
|
4
5
|
from .expr import Expr
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class ExprSet:
|
|
8
|
-
"""
|
|
9
|
+
"""
|
|
10
|
+
A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
|
|
11
|
+
"""
|
|
12
|
+
exprs: dict[int, Expr] # key: Expr.id
|
|
13
|
+
exprs_by_idx: dict[int, Expr] # key: slot_idx
|
|
14
|
+
|
|
9
15
|
def __init__(self, elements: Optional[Iterable[Expr]] = None):
|
|
10
|
-
self.exprs
|
|
16
|
+
self.exprs = {}
|
|
17
|
+
self.exprs_by_idx = {}
|
|
11
18
|
if elements is not None:
|
|
12
19
|
for e in elements:
|
|
13
|
-
self.
|
|
20
|
+
self.add(e)
|
|
14
21
|
|
|
15
|
-
def
|
|
22
|
+
def add(self, expr: Expr) -> None:
|
|
16
23
|
if expr.id in self.exprs:
|
|
17
24
|
return
|
|
18
25
|
self.exprs[expr.id] = expr
|
|
26
|
+
if expr.slot_idx is None:
|
|
27
|
+
return
|
|
28
|
+
self.exprs_by_idx[expr.slot_idx] = expr
|
|
19
29
|
|
|
20
|
-
def
|
|
21
|
-
for
|
|
22
|
-
|
|
30
|
+
def update(self, *others: Iterable[Expr]) -> None:
|
|
31
|
+
for other in others:
|
|
32
|
+
for e in other:
|
|
33
|
+
self.add(e)
|
|
23
34
|
|
|
24
35
|
def __contains__(self, item: Expr) -> bool:
|
|
25
36
|
return item.id in self.exprs
|
|
@@ -31,9 +42,21 @@ class ExprSet:
|
|
|
31
42
|
return iter(self.exprs.values())
|
|
32
43
|
|
|
33
44
|
def __getitem__(self, index: object) -> Optional[Expr]:
|
|
45
|
+
"""Indexed lookup by slot_idx or Expr.id."""
|
|
46
|
+
if not isinstance(index, int) and not isinstance(index, Expr):
|
|
47
|
+
pass
|
|
34
48
|
assert isinstance(index, int) or isinstance(index, Expr)
|
|
35
49
|
if isinstance(index, int):
|
|
36
50
|
# return expr with matching slot_idx
|
|
37
|
-
return
|
|
51
|
+
return self.exprs_by_idx.get(index)
|
|
38
52
|
else:
|
|
39
53
|
return self.exprs.get(index.id)
|
|
54
|
+
|
|
55
|
+
def issuperset(self, other: ExprSet) -> bool:
|
|
56
|
+
return self.exprs.keys() >= other.exprs.keys()
|
|
57
|
+
|
|
58
|
+
def __ge__(self, other: ExprSet) -> bool:
|
|
59
|
+
return self.issuperset(other)
|
|
60
|
+
|
|
61
|
+
def __le__(self, other: ExprSet) -> bool:
|
|
62
|
+
return other.issuperset(self)
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import inspect
|
|
4
4
|
import json
|
|
5
5
|
import sys
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
@@ -11,34 +11,40 @@ import pixeltable.catalog as catalog
|
|
|
11
11
|
import pixeltable.exceptions as excs
|
|
12
12
|
import pixeltable.func as func
|
|
13
13
|
import pixeltable.type_system as ts
|
|
14
|
+
|
|
14
15
|
from .data_row import DataRow
|
|
15
16
|
from .expr import Expr
|
|
16
|
-
from .
|
|
17
|
-
from .inline_dict import InlineDict
|
|
17
|
+
from .inline_expr import InlineDict, InlineList
|
|
18
18
|
from .row_builder import RowBuilder
|
|
19
19
|
from .rowid_ref import RowidRef
|
|
20
|
+
from .sql_element_cache import SqlElementCache
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class FunctionCall(Expr):
|
|
23
24
|
|
|
24
25
|
fn: func.Function
|
|
25
26
|
is_method_call: bool
|
|
26
|
-
agg_init_args:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
agg_init_args: dict[str, Any]
|
|
28
|
+
|
|
29
|
+
# tuple[Optional[int], Optional[Any]]:
|
|
30
|
+
# - for Exprs: (index into components, None)
|
|
31
|
+
# - otherwise: (None, val)
|
|
32
|
+
args: list[tuple[Optional[int], Optional[Any]]]
|
|
33
|
+
kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
|
|
34
|
+
|
|
35
|
+
arg_types: list[ts.ColumnType]
|
|
36
|
+
kwarg_types: dict[str, ts.ColumnType]
|
|
31
37
|
group_by_start_idx: int
|
|
32
38
|
group_by_stop_idx: int
|
|
33
39
|
fn_expr_idx: int
|
|
34
40
|
order_by_start_idx: int
|
|
35
41
|
constant_args: set[str]
|
|
36
42
|
aggregator: Optional[Any]
|
|
37
|
-
current_partition_vals: Optional[
|
|
43
|
+
current_partition_vals: Optional[list[Any]]
|
|
38
44
|
|
|
39
45
|
def __init__(
|
|
40
|
-
self, fn: func.Function, bound_args:
|
|
41
|
-
group_by_clause: Optional[
|
|
46
|
+
self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
|
|
47
|
+
group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
|
|
42
48
|
if order_by_clause is None:
|
|
43
49
|
order_by_clause = []
|
|
44
50
|
if group_by_clause is None:
|
|
@@ -47,7 +53,7 @@ class FunctionCall(Expr):
|
|
|
47
53
|
super().__init__(fn.call_return_type(bound_args))
|
|
48
54
|
self.fn = fn
|
|
49
55
|
self.is_method_call = is_method_call
|
|
50
|
-
self.normalize_args(signature, bound_args)
|
|
56
|
+
self.normalize_args(fn.name, signature, bound_args)
|
|
51
57
|
|
|
52
58
|
self.agg_init_args = {}
|
|
53
59
|
if self.is_agg_fn_call:
|
|
@@ -58,10 +64,6 @@ class FunctionCall(Expr):
|
|
|
58
64
|
bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
|
|
59
65
|
|
|
60
66
|
# construct components, args, kwargs
|
|
61
|
-
|
|
62
|
-
# Tuple[int, Any]:
|
|
63
|
-
# - for Exprs: (index into components, None)
|
|
64
|
-
# - otherwise: (None, val)
|
|
65
67
|
self.args = []
|
|
66
68
|
self.kwargs = {}
|
|
67
69
|
|
|
@@ -131,7 +133,7 @@ class FunctionCall(Expr):
|
|
|
131
133
|
|
|
132
134
|
self.id = self._create_id()
|
|
133
135
|
|
|
134
|
-
def _create_rowid_refs(self, tbl: catalog.Table) ->
|
|
136
|
+
def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
|
|
135
137
|
target = tbl._tbl_version_path.tbl_version
|
|
136
138
|
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
137
139
|
|
|
@@ -141,7 +143,7 @@ class FunctionCall(Expr):
|
|
|
141
143
|
return super().default_column_name()
|
|
142
144
|
|
|
143
145
|
@classmethod
|
|
144
|
-
def normalize_args(cls, signature: func.Signature, bound_args:
|
|
146
|
+
def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
|
|
145
147
|
"""Converts all args to Exprs and checks that they are compatible with signature.
|
|
146
148
|
|
|
147
149
|
Updates bound_args in place, where necessary.
|
|
@@ -161,9 +163,7 @@ class FunctionCall(Expr):
|
|
|
161
163
|
|
|
162
164
|
if isinstance(arg, list) or isinstance(arg, tuple):
|
|
163
165
|
try:
|
|
164
|
-
|
|
165
|
-
is_json = is_var_param or (param.col_type is not None and param.col_type.is_json_type())
|
|
166
|
-
arg = InlineArray(arg, force_json=is_json)
|
|
166
|
+
arg = InlineList(arg)
|
|
167
167
|
bound_args[param_name] = arg
|
|
168
168
|
continue
|
|
169
169
|
except excs.Error:
|
|
@@ -175,7 +175,7 @@ class FunctionCall(Expr):
|
|
|
175
175
|
try:
|
|
176
176
|
_ = json.dumps(arg)
|
|
177
177
|
except TypeError:
|
|
178
|
-
raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg}')
|
|
178
|
+
raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
|
|
179
179
|
if arg is not None:
|
|
180
180
|
try:
|
|
181
181
|
param_type = param.col_type
|
|
@@ -213,7 +213,7 @@ class FunctionCall(Expr):
|
|
|
213
213
|
or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
|
|
214
214
|
):
|
|
215
215
|
raise excs.Error(
|
|
216
|
-
f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
|
|
216
|
+
f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
|
|
217
217
|
f'{param.col_type}')
|
|
218
218
|
|
|
219
219
|
def _equals(self, other: FunctionCall) -> bool:
|
|
@@ -232,7 +232,7 @@ class FunctionCall(Expr):
|
|
|
232
232
|
return False
|
|
233
233
|
return True
|
|
234
234
|
|
|
235
|
-
def _id_attrs(self) ->
|
|
235
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
236
236
|
return super()._id_attrs() + [
|
|
237
237
|
('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
|
|
238
238
|
('args', self.args),
|
|
@@ -253,11 +253,11 @@ class FunctionCall(Expr):
|
|
|
253
253
|
return f'{fn_name}({self._print_args()})'
|
|
254
254
|
|
|
255
255
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
256
|
+
def print_arg(arg: Any) -> str:
|
|
257
|
+
return repr(arg) if isinstance(arg, str) else str(arg)
|
|
256
258
|
arg_strs = [
|
|
257
|
-
|
|
259
|
+
print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
|
|
258
260
|
]
|
|
259
|
-
def print_arg(arg: Any) -> str:
|
|
260
|
-
return f"'{arg}'" if isinstance(arg, str) else str(arg)
|
|
261
261
|
arg_strs.extend([
|
|
262
262
|
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
263
263
|
for param_name, (idx, arg) in self.kwargs.items()
|
|
@@ -273,15 +273,15 @@ class FunctionCall(Expr):
|
|
|
273
273
|
separator = ', ' if inline else ',\n '
|
|
274
274
|
return separator.join(arg_strs)
|
|
275
275
|
|
|
276
|
-
def has_group_by(self) ->
|
|
276
|
+
def has_group_by(self) -> list[Expr]:
|
|
277
277
|
return self.group_by_stop_idx != 0
|
|
278
278
|
|
|
279
279
|
@property
|
|
280
|
-
def group_by(self) ->
|
|
280
|
+
def group_by(self) -> list[Expr]:
|
|
281
281
|
return self.components[self.group_by_start_idx:self.group_by_stop_idx]
|
|
282
282
|
|
|
283
283
|
@property
|
|
284
|
-
def order_by(self) ->
|
|
284
|
+
def order_by(self) -> list[Expr]:
|
|
285
285
|
return self.components[self.order_by_start_idx:]
|
|
286
286
|
|
|
287
287
|
@property
|
|
@@ -291,20 +291,42 @@ class FunctionCall(Expr):
|
|
|
291
291
|
or self.has_group_by() \
|
|
292
292
|
or (len(self.order_by) > 0 and not self.fn.requires_order_by))
|
|
293
293
|
|
|
294
|
-
def get_window_sort_exprs(self) ->
|
|
294
|
+
def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
|
|
295
295
|
return self.group_by, self.order_by
|
|
296
296
|
|
|
297
297
|
@property
|
|
298
298
|
def is_agg_fn_call(self) -> bool:
|
|
299
299
|
return isinstance(self.fn, func.AggregateFunction)
|
|
300
300
|
|
|
301
|
-
def get_agg_order_by(self) ->
|
|
301
|
+
def get_agg_order_by(self) -> list[Expr]:
|
|
302
302
|
assert self.is_agg_fn_call
|
|
303
303
|
return self.order_by
|
|
304
304
|
|
|
305
|
-
def sql_expr(self) -> Optional[sql.
|
|
306
|
-
#
|
|
307
|
-
|
|
305
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
306
|
+
# try to construct args and kwargs to call self.fn._to_sql()
|
|
307
|
+
kwargs: dict[str, sql.ColumnElement] = {}
|
|
308
|
+
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
309
|
+
param = self.fn.signature.parameters[param_name]
|
|
310
|
+
assert param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD
|
|
311
|
+
if component_idx is None:
|
|
312
|
+
kwargs[param_name] = sql.literal(arg)
|
|
313
|
+
else:
|
|
314
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
315
|
+
if arg_element is None:
|
|
316
|
+
return None
|
|
317
|
+
kwargs[param_name] = arg_element
|
|
318
|
+
|
|
319
|
+
args: list[sql.ColumnElement] = []
|
|
320
|
+
for _, (component_idx, arg) in enumerate(self.args):
|
|
321
|
+
if component_idx is None:
|
|
322
|
+
args.append(sql.literal(arg))
|
|
323
|
+
else:
|
|
324
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
325
|
+
if arg_element is None:
|
|
326
|
+
return None
|
|
327
|
+
args.append(arg_element)
|
|
328
|
+
result = self.fn._to_sql(*args, **kwargs)
|
|
329
|
+
return result
|
|
308
330
|
|
|
309
331
|
def reset_agg(self) -> None:
|
|
310
332
|
"""
|
|
@@ -322,9 +344,9 @@ class FunctionCall(Expr):
|
|
|
322
344
|
args, kwargs = self._make_args(data_row)
|
|
323
345
|
self.aggregator.update(*args, **kwargs)
|
|
324
346
|
|
|
325
|
-
def _make_args(self, data_row: DataRow) ->
|
|
347
|
+
def _make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]]:
|
|
326
348
|
"""Return args and kwargs, constructed for data_row"""
|
|
327
|
-
kwargs:
|
|
349
|
+
kwargs: dict[str, Any] = {}
|
|
328
350
|
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
329
351
|
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
330
352
|
param = self.fn.signature.parameters[param_name]
|
|
@@ -335,7 +357,7 @@ class FunctionCall(Expr):
|
|
|
335
357
|
assert param.kind != inspect.Parameter.VAR_POSITIONAL
|
|
336
358
|
kwargs[param_name] = val
|
|
337
359
|
|
|
338
|
-
args:
|
|
360
|
+
args: list[Any] = []
|
|
339
361
|
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
340
362
|
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
341
363
|
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
@@ -393,7 +415,7 @@ class FunctionCall(Expr):
|
|
|
393
415
|
else:
|
|
394
416
|
data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
|
|
395
417
|
|
|
396
|
-
def _as_dict(self) ->
|
|
418
|
+
def _as_dict(self) -> dict:
|
|
397
419
|
result = {
|
|
398
420
|
'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
|
|
399
421
|
'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
|
|
@@ -403,7 +425,7 @@ class FunctionCall(Expr):
|
|
|
403
425
|
return result
|
|
404
426
|
|
|
405
427
|
@classmethod
|
|
406
|
-
def _from_dict(cls, d:
|
|
428
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
|
|
407
429
|
assert 'fn' in d
|
|
408
430
|
assert 'args' in d
|
|
409
431
|
assert 'kwargs' in d
|