pixeltable 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +8 -7
- pixeltable/catalog/column.py +11 -8
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/table.py +20 -13
- pixeltable/catalog/table_version.py +91 -54
- pixeltable/catalog/table_version_path.py +7 -9
- pixeltable/catalog/view.py +2 -1
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +173 -83
- pixeltable/exec/aggregation_node.py +2 -1
- pixeltable/exec/component_iteration_node.py +1 -1
- pixeltable/exec/sql_node.py +11 -8
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -1
- pixeltable/exprs/column_property_ref.py +9 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/comparison.py +10 -7
- pixeltable/exprs/compound_predicate.py +3 -2
- pixeltable/exprs/data_row.py +19 -4
- pixeltable/exprs/expr.py +46 -35
- pixeltable/exprs/expr_set.py +32 -9
- pixeltable/exprs/function_call.py +56 -32
- pixeltable/exprs/in_predicate.py +3 -2
- pixeltable/exprs/inline_array.py +2 -1
- pixeltable/exprs/inline_dict.py +2 -1
- pixeltable/exprs/is_null.py +3 -2
- pixeltable/exprs/json_mapper.py +5 -4
- pixeltable/exprs/json_path.py +7 -1
- pixeltable/exprs/literal.py +34 -7
- pixeltable/exprs/method_ref.py +3 -3
- pixeltable/exprs/object_ref.py +6 -5
- pixeltable/exprs/row_builder.py +25 -17
- pixeltable/exprs/rowid_ref.py +2 -1
- pixeltable/exprs/similarity_expr.py +2 -1
- pixeltable/exprs/sql_element_cache.py +30 -0
- pixeltable/exprs/type_cast.py +3 -3
- pixeltable/exprs/variable.py +2 -1
- pixeltable/ext/functions/whisperx.py +4 -4
- pixeltable/ext/functions/yolox.py +6 -6
- pixeltable/func/aggregate_function.py +1 -0
- pixeltable/func/function.py +28 -4
- pixeltable/functions/__init__.py +4 -2
- pixeltable/functions/anthropic.py +15 -5
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -1
- pixeltable/functions/huggingface.py +2 -2
- pixeltable/functions/image.py +17 -2
- pixeltable/functions/json.py +5 -5
- pixeltable/functions/mistralai.py +188 -0
- pixeltable/functions/openai.py +6 -10
- pixeltable/functions/string.py +3 -2
- pixeltable/functions/timestamp.py +95 -7
- pixeltable/functions/together.py +4 -4
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +27 -17
- pixeltable/functions/whisper.py +1 -1
- pixeltable/io/hf_datasets.py +17 -15
- pixeltable/io/pandas.py +0 -2
- pixeltable/io/parquet.py +15 -14
- pixeltable/iterators/document.py +16 -15
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_19.py +46 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -4
- pixeltable/plan.py +100 -78
- pixeltable/store.py +5 -1
- pixeltable/tool/create_test_db_dump.py +4 -3
- pixeltable/type_system.py +12 -14
- pixeltable/utils/documents.py +45 -42
- pixeltable/utils/formatter.py +2 -2
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
- pixeltable-0.2.18.dist-info/RECORD +147 -0
- pixeltable-0.2.17.dist-info/RECORD +0 -144
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/data_row.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import datetime
|
|
3
4
|
import io
|
|
4
5
|
import urllib.parse
|
|
5
6
|
import urllib.request
|
|
@@ -8,8 +9,11 @@ from typing import Optional, List, Any, Tuple
|
|
|
8
9
|
import sqlalchemy as sql
|
|
9
10
|
import pgvector.sqlalchemy
|
|
10
11
|
import PIL
|
|
12
|
+
import PIL.Image
|
|
11
13
|
import numpy as np
|
|
12
14
|
|
|
15
|
+
from pixeltable import env
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
class DataRow:
|
|
15
19
|
"""
|
|
@@ -101,6 +105,7 @@ class DataRow:
|
|
|
101
105
|
|
|
102
106
|
def __getitem__(self, index: object) -> Any:
|
|
103
107
|
"""Returns in-memory value, ie, what is needed for expr evaluation"""
|
|
108
|
+
assert isinstance(index, int)
|
|
104
109
|
if not self.has_val[index]:
|
|
105
110
|
# for debugging purposes
|
|
106
111
|
pass
|
|
@@ -115,7 +120,7 @@ class DataRow:
|
|
|
115
120
|
|
|
116
121
|
return self.vals[index]
|
|
117
122
|
|
|
118
|
-
def get_stored_val(self, index:
|
|
123
|
+
def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
|
|
119
124
|
"""Return the value that gets stored in the db"""
|
|
120
125
|
assert self.excs[index] is None
|
|
121
126
|
if not self.has_val[index]:
|
|
@@ -140,12 +145,17 @@ class DataRow:
|
|
|
140
145
|
if self.vals[index] is None and sa_col_type is not None and isinstance(sa_col_type, sql.JSON):
|
|
141
146
|
return sql.sql.null()
|
|
142
147
|
|
|
148
|
+
if isinstance(self.vals[index], datetime.datetime) and self.vals[index].tzinfo is None:
|
|
149
|
+
# if the datetime is naive, cast it to the default time zone
|
|
150
|
+
return self.vals[index].replace(tzinfo=env.Env.get().default_time_zone)
|
|
151
|
+
|
|
143
152
|
return self.vals[index]
|
|
144
153
|
|
|
145
154
|
def __setitem__(self, idx: object, val: Any) -> None:
|
|
146
155
|
"""Assign in-memory cell value
|
|
147
156
|
This allows overwriting
|
|
148
157
|
"""
|
|
158
|
+
assert isinstance(idx, int)
|
|
149
159
|
assert self.excs[idx] is None
|
|
150
160
|
|
|
151
161
|
if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
|
|
@@ -177,7 +187,7 @@ class DataRow:
|
|
|
177
187
|
self.vals[idx] = val
|
|
178
188
|
self.has_val[idx] = True
|
|
179
189
|
|
|
180
|
-
def set_file_path(self, idx:
|
|
190
|
+
def set_file_path(self, idx: int, path: str) -> None:
|
|
181
191
|
"""Augment an existing url with a local file path"""
|
|
182
192
|
assert self.has_val[idx]
|
|
183
193
|
assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
|
|
@@ -185,7 +195,7 @@ class DataRow:
|
|
|
185
195
|
if idx in self.media_slot_idxs:
|
|
186
196
|
self.vals[idx] = path
|
|
187
197
|
|
|
188
|
-
def flush_img(self, index:
|
|
198
|
+
def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
|
|
189
199
|
"""Discard the in-memory value and save it to a local file, if filepath is not None"""
|
|
190
200
|
if self.vals[index] is None:
|
|
191
201
|
return
|
|
@@ -195,7 +205,12 @@ class DataRow:
|
|
|
195
205
|
# we want to save this to a file
|
|
196
206
|
self.file_paths[index] = filepath
|
|
197
207
|
self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
|
|
198
|
-
self.vals[index]
|
|
208
|
+
image = self.vals[index]
|
|
209
|
+
assert isinstance(image, PIL.Image.Image)
|
|
210
|
+
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
211
|
+
# In that case, use WebP instead.
|
|
212
|
+
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
213
|
+
image.save(filepath, format=format)
|
|
199
214
|
else:
|
|
200
215
|
# we discard the content of this cell
|
|
201
216
|
self.has_val[index] = False
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -7,10 +7,11 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import sys
|
|
9
9
|
import typing
|
|
10
|
-
from typing import Any, Callable,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import sqlalchemy as sql
|
|
14
|
+
from typing_extensions import Self
|
|
14
15
|
|
|
15
16
|
import pixeltable
|
|
16
17
|
import pixeltable.catalog as catalog
|
|
@@ -21,6 +22,8 @@ import pixeltable.type_system as ts
|
|
|
21
22
|
from .data_row import DataRow
|
|
22
23
|
from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
|
|
23
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from pixeltable import exprs
|
|
24
27
|
|
|
25
28
|
class ExprScope:
|
|
26
29
|
"""
|
|
@@ -49,23 +52,31 @@ class Expr(abc.ABC):
|
|
|
49
52
|
- during eval(), components can only be accessed via self.components; any Exprs outside of that won't
|
|
50
53
|
have slot_idx set
|
|
51
54
|
"""
|
|
52
|
-
def __init__(self, col_type: ts.ColumnType):
|
|
53
|
-
self.col_type = col_type
|
|
54
55
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
col_type: ts.ColumnType
|
|
57
|
+
|
|
58
|
+
# the subexprs are needed to construct this expr
|
|
59
|
+
components: list[Expr]
|
|
60
|
+
|
|
61
|
+
# each instance has an id that is used for equality comparisons
|
|
62
|
+
# - set by the subclass's __init__()
|
|
63
|
+
# - produced by _create_id()
|
|
64
|
+
# - not expected to survive a serialize()/deserialize() roundtrip
|
|
65
|
+
id: Optional[int]
|
|
60
66
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
+
# index of the expr's value in the data row:
|
|
68
|
+
# - set for all materialized exprs
|
|
69
|
+
# - None: not executable
|
|
70
|
+
# - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
|
|
71
|
+
slot_idx: Optional[int]
|
|
72
|
+
|
|
73
|
+
def __init__(self, col_type: ts.ColumnType):
|
|
74
|
+
self.col_type = col_type
|
|
75
|
+
self.components = []
|
|
76
|
+
self.id = None
|
|
77
|
+
self.slot_idx = None
|
|
67
78
|
|
|
68
|
-
def dependencies(self) ->
|
|
79
|
+
def dependencies(self) -> list[Expr]:
|
|
69
80
|
"""
|
|
70
81
|
Returns all exprs that need to have been evaluated before eval() can be called on this one.
|
|
71
82
|
"""
|
|
@@ -115,7 +126,7 @@ class Expr(abc.ABC):
|
|
|
115
126
|
# override this
|
|
116
127
|
return True
|
|
117
128
|
|
|
118
|
-
def _id_attrs(self) ->
|
|
129
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
119
130
|
"""Returns attribute name/value pairs that are used to construct the instance id.
|
|
120
131
|
|
|
121
132
|
Attribute values must be immutable and have str() defined.
|
|
@@ -137,7 +148,7 @@ class Expr(abc.ABC):
|
|
|
137
148
|
return self.id
|
|
138
149
|
|
|
139
150
|
@classmethod
|
|
140
|
-
def list_equals(cls, a:
|
|
151
|
+
def list_equals(cls, a: list[Expr], b: list[Expr]) -> bool:
|
|
141
152
|
if len(a) != len(b):
|
|
142
153
|
return False
|
|
143
154
|
for i in range(len(a)):
|
|
@@ -158,7 +169,7 @@ class Expr(abc.ABC):
|
|
|
158
169
|
return result
|
|
159
170
|
|
|
160
171
|
@classmethod
|
|
161
|
-
def copy_list(cls, expr_list: Optional[
|
|
172
|
+
def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
|
|
162
173
|
if expr_list is None:
|
|
163
174
|
return None
|
|
164
175
|
return [e.copy() for e in expr_list]
|
|
@@ -183,11 +194,11 @@ class Expr(abc.ABC):
|
|
|
183
194
|
return self
|
|
184
195
|
|
|
185
196
|
@classmethod
|
|
186
|
-
def list_substitute(cls, expr_list:
|
|
197
|
+
def list_substitute(cls, expr_list: list[Expr], spec: dict[Expr, Expr]) -> None:
|
|
187
198
|
for i in range(len(expr_list)):
|
|
188
199
|
expr_list[i] = expr_list[i].substitute(spec)
|
|
189
200
|
|
|
190
|
-
def resolve_computed_cols(self, resolve_cols: Optional[
|
|
201
|
+
def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
|
|
191
202
|
"""
|
|
192
203
|
Recursively replace ColRefs to unstored computed columns with their value exprs.
|
|
193
204
|
Also replaces references to stored computed columns in resolve_cols.
|
|
@@ -215,12 +226,12 @@ class Expr(abc.ABC):
|
|
|
215
226
|
return False
|
|
216
227
|
return True
|
|
217
228
|
|
|
218
|
-
def retarget(self, tbl: catalog.TableVersionPath) ->
|
|
229
|
+
def retarget(self, tbl: catalog.TableVersionPath) -> Self:
|
|
219
230
|
"""Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
|
|
220
231
|
tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
|
|
221
232
|
return self._retarget(tbl_versions)
|
|
222
233
|
|
|
223
|
-
def _retarget(self, tbl_versions:
|
|
234
|
+
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
|
|
224
235
|
from .column_ref import ColumnRef
|
|
225
236
|
if isinstance(self, ColumnRef):
|
|
226
237
|
target = tbl_versions[self.col.tbl.id]
|
|
@@ -299,7 +310,7 @@ class Expr(abc.ABC):
|
|
|
299
310
|
for e in expr_list:
|
|
300
311
|
yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
|
|
301
312
|
|
|
302
|
-
def _contains(self, cls: Optional[
|
|
313
|
+
def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
|
|
303
314
|
"""
|
|
304
315
|
Returns True if any subexpr is an instance of cls.
|
|
305
316
|
"""
|
|
@@ -312,15 +323,15 @@ class Expr(abc.ABC):
|
|
|
312
323
|
except StopIteration:
|
|
313
324
|
return False
|
|
314
325
|
|
|
315
|
-
def tbl_ids(self) ->
|
|
326
|
+
def tbl_ids(self) -> set[UUID]:
|
|
316
327
|
"""Returns table ids referenced by this expr."""
|
|
317
328
|
from .column_ref import ColumnRef
|
|
318
329
|
from .rowid_ref import RowidRef
|
|
319
330
|
return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
|
|
320
331
|
|
|
321
332
|
@classmethod
|
|
322
|
-
def list_tbl_ids(cls, expr_list:
|
|
323
|
-
ids:
|
|
333
|
+
def list_tbl_ids(cls, expr_list: list[Expr]) -> set[UUID]:
|
|
334
|
+
ids: set[UUID] = set()
|
|
324
335
|
for e in expr_list:
|
|
325
336
|
ids.update(e.tbl_ids())
|
|
326
337
|
return ids
|
|
@@ -362,7 +373,7 @@ class Expr(abc.ABC):
|
|
|
362
373
|
return None
|
|
363
374
|
|
|
364
375
|
@abc.abstractmethod
|
|
365
|
-
def sql_expr(self) -> Optional[sql.ColumnElement]:
|
|
376
|
+
def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
|
|
366
377
|
"""
|
|
367
378
|
If this expr can be materialized directly in SQL:
|
|
368
379
|
- returns a ColumnElement
|
|
@@ -389,14 +400,14 @@ class Expr(abc.ABC):
|
|
|
389
400
|
c.release()
|
|
390
401
|
|
|
391
402
|
@classmethod
|
|
392
|
-
def release_list(cls, expr_list:
|
|
403
|
+
def release_list(cls, expr_list: list[Expr]) -> None:
|
|
393
404
|
for e in expr_list:
|
|
394
405
|
e.release()
|
|
395
406
|
|
|
396
407
|
def serialize(self) -> str:
|
|
397
408
|
return json.dumps(self.as_dict())
|
|
398
409
|
|
|
399
|
-
def as_dict(self) ->
|
|
410
|
+
def as_dict(self) -> dict:
|
|
400
411
|
"""
|
|
401
412
|
Turn Expr object into a dict that can be passed to json.dumps().
|
|
402
413
|
Subclasses override _as_dict().
|
|
@@ -407,10 +418,10 @@ class Expr(abc.ABC):
|
|
|
407
418
|
}
|
|
408
419
|
|
|
409
420
|
@classmethod
|
|
410
|
-
def as_dict_list(self, expr_list:
|
|
421
|
+
def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
|
|
411
422
|
return [e.as_dict() for e in expr_list]
|
|
412
423
|
|
|
413
|
-
def _as_dict(self) ->
|
|
424
|
+
def _as_dict(self) -> dict:
|
|
414
425
|
if len(self.components) > 0:
|
|
415
426
|
return {'components': [c.as_dict() for c in self.components]}
|
|
416
427
|
return {}
|
|
@@ -420,24 +431,24 @@ class Expr(abc.ABC):
|
|
|
420
431
|
return cls.from_dict(json.loads(dict_str))
|
|
421
432
|
|
|
422
433
|
@classmethod
|
|
423
|
-
def from_dict(cls, d:
|
|
434
|
+
def from_dict(cls, d: dict) -> Self:
|
|
424
435
|
"""
|
|
425
436
|
Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
|
|
426
437
|
"""
|
|
427
438
|
assert '_classname' in d
|
|
428
439
|
exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
|
|
429
440
|
type_class = getattr(exprs_module, d['_classname'])
|
|
430
|
-
components:
|
|
441
|
+
components: list[Expr] = []
|
|
431
442
|
if 'components' in d:
|
|
432
443
|
components = [cls.from_dict(component_dict) for component_dict in d['components']]
|
|
433
444
|
return type_class._from_dict(d, components)
|
|
434
445
|
|
|
435
446
|
@classmethod
|
|
436
|
-
def from_dict_list(cls, dict_list:
|
|
447
|
+
def from_dict_list(cls, dict_list: list[dict]) -> list[Expr]:
|
|
437
448
|
return [cls.from_dict(d) for d in dict_list]
|
|
438
449
|
|
|
439
450
|
@classmethod
|
|
440
|
-
def _from_dict(cls, d:
|
|
451
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
|
|
441
452
|
assert False, 'not implemented'
|
|
442
453
|
|
|
443
454
|
def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
|
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -1,25 +1,36 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Iterable, Iterator
|
|
3
4
|
|
|
4
5
|
from .expr import Expr
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class ExprSet:
|
|
8
|
-
"""
|
|
9
|
+
"""
|
|
10
|
+
A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
|
|
11
|
+
"""
|
|
12
|
+
exprs: dict[int, Expr] # key: Expr.id
|
|
13
|
+
exprs_by_idx: dict[int, Expr] # key: slot_idx
|
|
14
|
+
|
|
9
15
|
def __init__(self, elements: Optional[Iterable[Expr]] = None):
|
|
10
|
-
self.exprs
|
|
16
|
+
self.exprs = {}
|
|
17
|
+
self.exprs_by_idx = {}
|
|
11
18
|
if elements is not None:
|
|
12
19
|
for e in elements:
|
|
13
|
-
self.
|
|
20
|
+
self.add(e)
|
|
14
21
|
|
|
15
|
-
def
|
|
22
|
+
def add(self, expr: Expr) -> None:
|
|
16
23
|
if expr.id in self.exprs:
|
|
17
24
|
return
|
|
18
25
|
self.exprs[expr.id] = expr
|
|
26
|
+
if expr.slot_idx is None:
|
|
27
|
+
return
|
|
28
|
+
self.exprs_by_idx[expr.slot_idx] = expr
|
|
19
29
|
|
|
20
|
-
def
|
|
21
|
-
for
|
|
22
|
-
|
|
30
|
+
def update(self, *others: Iterable[Expr]) -> None:
|
|
31
|
+
for other in others:
|
|
32
|
+
for e in other:
|
|
33
|
+
self.add(e)
|
|
23
34
|
|
|
24
35
|
def __contains__(self, item: Expr) -> bool:
|
|
25
36
|
return item.id in self.exprs
|
|
@@ -31,9 +42,21 @@ class ExprSet:
|
|
|
31
42
|
return iter(self.exprs.values())
|
|
32
43
|
|
|
33
44
|
def __getitem__(self, index: object) -> Optional[Expr]:
|
|
45
|
+
"""Indexed lookup by slot_idx or Expr.id."""
|
|
46
|
+
if not isinstance(index, int) and not isinstance(index, Expr):
|
|
47
|
+
pass
|
|
34
48
|
assert isinstance(index, int) or isinstance(index, Expr)
|
|
35
49
|
if isinstance(index, int):
|
|
36
50
|
# return expr with matching slot_idx
|
|
37
|
-
return
|
|
51
|
+
return self.exprs_by_idx.get(index)
|
|
38
52
|
else:
|
|
39
53
|
return self.exprs.get(index.id)
|
|
54
|
+
|
|
55
|
+
def issuperset(self, other: ExprSet) -> bool:
|
|
56
|
+
return self.exprs.keys() >= other.exprs.keys()
|
|
57
|
+
|
|
58
|
+
def __ge__(self, other: ExprSet) -> bool:
|
|
59
|
+
return self.issuperset(other)
|
|
60
|
+
|
|
61
|
+
def __le__(self, other: ExprSet) -> bool:
|
|
62
|
+
return other.issuperset(self)
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import inspect
|
|
4
4
|
import json
|
|
5
5
|
import sys
|
|
6
|
-
from typing import Optional,
|
|
6
|
+
from typing import Optional, Any
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
@@ -17,28 +17,34 @@ from .inline_array import InlineArray
|
|
|
17
17
|
from .inline_dict import InlineDict
|
|
18
18
|
from .row_builder import RowBuilder
|
|
19
19
|
from .rowid_ref import RowidRef
|
|
20
|
+
from .sql_element_cache import SqlElementCache
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class FunctionCall(Expr):
|
|
23
24
|
|
|
24
25
|
fn: func.Function
|
|
25
26
|
is_method_call: bool
|
|
26
|
-
agg_init_args:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
agg_init_args: dict[str, Any]
|
|
28
|
+
|
|
29
|
+
# tuple[Optional[int], Optional[Any]]:
|
|
30
|
+
# - for Exprs: (index into components, None)
|
|
31
|
+
# - otherwise: (None, val)
|
|
32
|
+
args: list[tuple[Optional[int], Optional[Any]]]
|
|
33
|
+
kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
|
|
34
|
+
|
|
35
|
+
arg_types: list[ts.ColumnType]
|
|
36
|
+
kwarg_types: dict[str, ts.ColumnType]
|
|
31
37
|
group_by_start_idx: int
|
|
32
38
|
group_by_stop_idx: int
|
|
33
39
|
fn_expr_idx: int
|
|
34
40
|
order_by_start_idx: int
|
|
35
41
|
constant_args: set[str]
|
|
36
42
|
aggregator: Optional[Any]
|
|
37
|
-
current_partition_vals: Optional[
|
|
43
|
+
current_partition_vals: Optional[list[Any]]
|
|
38
44
|
|
|
39
45
|
def __init__(
|
|
40
|
-
self, fn: func.Function, bound_args:
|
|
41
|
-
group_by_clause: Optional[
|
|
46
|
+
self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
|
|
47
|
+
group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
|
|
42
48
|
if order_by_clause is None:
|
|
43
49
|
order_by_clause = []
|
|
44
50
|
if group_by_clause is None:
|
|
@@ -58,10 +64,6 @@ class FunctionCall(Expr):
|
|
|
58
64
|
bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
|
|
59
65
|
|
|
60
66
|
# construct components, args, kwargs
|
|
61
|
-
|
|
62
|
-
# Tuple[int, Any]:
|
|
63
|
-
# - for Exprs: (index into components, None)
|
|
64
|
-
# - otherwise: (None, val)
|
|
65
67
|
self.args = []
|
|
66
68
|
self.kwargs = {}
|
|
67
69
|
|
|
@@ -131,7 +133,7 @@ class FunctionCall(Expr):
|
|
|
131
133
|
|
|
132
134
|
self.id = self._create_id()
|
|
133
135
|
|
|
134
|
-
def _create_rowid_refs(self, tbl: catalog.Table) ->
|
|
136
|
+
def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
|
|
135
137
|
target = tbl._tbl_version_path.tbl_version
|
|
136
138
|
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
137
139
|
|
|
@@ -141,7 +143,7 @@ class FunctionCall(Expr):
|
|
|
141
143
|
return super().default_column_name()
|
|
142
144
|
|
|
143
145
|
@classmethod
|
|
144
|
-
def normalize_args(cls, signature: func.Signature, bound_args:
|
|
146
|
+
def normalize_args(cls, signature: func.Signature, bound_args: dict[str, Any]) -> None:
|
|
145
147
|
"""Converts all args to Exprs and checks that they are compatible with signature.
|
|
146
148
|
|
|
147
149
|
Updates bound_args in place, where necessary.
|
|
@@ -232,7 +234,7 @@ class FunctionCall(Expr):
|
|
|
232
234
|
return False
|
|
233
235
|
return True
|
|
234
236
|
|
|
235
|
-
def _id_attrs(self) ->
|
|
237
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
236
238
|
return super()._id_attrs() + [
|
|
237
239
|
('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
|
|
238
240
|
('args', self.args),
|
|
@@ -253,11 +255,11 @@ class FunctionCall(Expr):
|
|
|
253
255
|
return f'{fn_name}({self._print_args()})'
|
|
254
256
|
|
|
255
257
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
258
|
+
def print_arg(arg: Any) -> str:
|
|
259
|
+
return repr(arg) if isinstance(arg, str) else str(arg)
|
|
256
260
|
arg_strs = [
|
|
257
|
-
|
|
261
|
+
print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
|
|
258
262
|
]
|
|
259
|
-
def print_arg(arg: Any) -> str:
|
|
260
|
-
return f"'{arg}'" if isinstance(arg, str) else str(arg)
|
|
261
263
|
arg_strs.extend([
|
|
262
264
|
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
263
265
|
for param_name, (idx, arg) in self.kwargs.items()
|
|
@@ -273,15 +275,15 @@ class FunctionCall(Expr):
|
|
|
273
275
|
separator = ', ' if inline else ',\n '
|
|
274
276
|
return separator.join(arg_strs)
|
|
275
277
|
|
|
276
|
-
def has_group_by(self) ->
|
|
278
|
+
def has_group_by(self) -> list[Expr]:
|
|
277
279
|
return self.group_by_stop_idx != 0
|
|
278
280
|
|
|
279
281
|
@property
|
|
280
|
-
def group_by(self) ->
|
|
282
|
+
def group_by(self) -> list[Expr]:
|
|
281
283
|
return self.components[self.group_by_start_idx:self.group_by_stop_idx]
|
|
282
284
|
|
|
283
285
|
@property
|
|
284
|
-
def order_by(self) ->
|
|
286
|
+
def order_by(self) -> list[Expr]:
|
|
285
287
|
return self.components[self.order_by_start_idx:]
|
|
286
288
|
|
|
287
289
|
@property
|
|
@@ -291,20 +293,42 @@ class FunctionCall(Expr):
|
|
|
291
293
|
or self.has_group_by() \
|
|
292
294
|
or (len(self.order_by) > 0 and not self.fn.requires_order_by))
|
|
293
295
|
|
|
294
|
-
def get_window_sort_exprs(self) ->
|
|
296
|
+
def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
|
|
295
297
|
return self.group_by, self.order_by
|
|
296
298
|
|
|
297
299
|
@property
|
|
298
300
|
def is_agg_fn_call(self) -> bool:
|
|
299
301
|
return isinstance(self.fn, func.AggregateFunction)
|
|
300
302
|
|
|
301
|
-
def get_agg_order_by(self) ->
|
|
303
|
+
def get_agg_order_by(self) -> list[Expr]:
|
|
302
304
|
assert self.is_agg_fn_call
|
|
303
305
|
return self.order_by
|
|
304
306
|
|
|
305
|
-
def sql_expr(self) -> Optional[sql.
|
|
306
|
-
#
|
|
307
|
-
|
|
307
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
308
|
+
# try to construct args and kwargs to call self.fn._to_sql()
|
|
309
|
+
kwargs: dict[str, sql.ColumnElement] = {}
|
|
310
|
+
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
311
|
+
param = self.fn.signature.parameters[param_name]
|
|
312
|
+
assert param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD
|
|
313
|
+
if component_idx is None:
|
|
314
|
+
kwargs[param_name] = sql.literal(arg)
|
|
315
|
+
else:
|
|
316
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
317
|
+
if arg_element is None:
|
|
318
|
+
return None
|
|
319
|
+
kwargs[param_name] = arg_element
|
|
320
|
+
|
|
321
|
+
args: list[sql.ColumnElement] = []
|
|
322
|
+
for _, (component_idx, arg) in enumerate(self.args):
|
|
323
|
+
if component_idx is None:
|
|
324
|
+
args.append(sql.literal(arg))
|
|
325
|
+
else:
|
|
326
|
+
arg_element = sql_elements.get(self.components[component_idx])
|
|
327
|
+
if arg_element is None:
|
|
328
|
+
return None
|
|
329
|
+
args.append(arg_element)
|
|
330
|
+
result = self.fn._to_sql(*args, **kwargs)
|
|
331
|
+
return result
|
|
308
332
|
|
|
309
333
|
def reset_agg(self) -> None:
|
|
310
334
|
"""
|
|
@@ -322,9 +346,9 @@ class FunctionCall(Expr):
|
|
|
322
346
|
args, kwargs = self._make_args(data_row)
|
|
323
347
|
self.aggregator.update(*args, **kwargs)
|
|
324
348
|
|
|
325
|
-
def _make_args(self, data_row: DataRow) ->
|
|
349
|
+
def _make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]]:
|
|
326
350
|
"""Return args and kwargs, constructed for data_row"""
|
|
327
|
-
kwargs:
|
|
351
|
+
kwargs: dict[str, Any] = {}
|
|
328
352
|
for param_name, (component_idx, arg) in self.kwargs.items():
|
|
329
353
|
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
330
354
|
param = self.fn.signature.parameters[param_name]
|
|
@@ -335,7 +359,7 @@ class FunctionCall(Expr):
|
|
|
335
359
|
assert param.kind != inspect.Parameter.VAR_POSITIONAL
|
|
336
360
|
kwargs[param_name] = val
|
|
337
361
|
|
|
338
|
-
args:
|
|
362
|
+
args: list[Any] = []
|
|
339
363
|
for param_idx, (component_idx, arg) in enumerate(self.args):
|
|
340
364
|
val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
|
|
341
365
|
param = self.fn.signature.parameters_by_pos[param_idx]
|
|
@@ -393,7 +417,7 @@ class FunctionCall(Expr):
|
|
|
393
417
|
else:
|
|
394
418
|
data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
|
|
395
419
|
|
|
396
|
-
def _as_dict(self) ->
|
|
420
|
+
def _as_dict(self) -> dict:
|
|
397
421
|
result = {
|
|
398
422
|
'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
|
|
399
423
|
'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
|
|
@@ -403,7 +427,7 @@ class FunctionCall(Expr):
|
|
|
403
427
|
return result
|
|
404
428
|
|
|
405
429
|
@classmethod
|
|
406
|
-
def _from_dict(cls, d:
|
|
430
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
|
|
407
431
|
assert 'fn' in d
|
|
408
432
|
assert 'args' in d
|
|
409
433
|
assert 'kwargs' in d
|
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Optional, List, Any, Dict, Tuple, Iterable
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
+
from .sql_element_cache import SqlElementCache
|
|
8
9
|
import pixeltable.type_system as ts
|
|
9
10
|
from .data_row import DataRow
|
|
10
11
|
from .expr import Expr
|
|
@@ -70,8 +71,8 @@ class InPredicate(Expr):
|
|
|
70
71
|
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
71
72
|
return super()._id_attrs() + [('value_list', self.value_list)]
|
|
72
73
|
|
|
73
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
74
|
-
lhs_sql_exprs = self.components[0]
|
|
74
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
75
|
+
lhs_sql_exprs = sql_elements.get(self.components[0])
|
|
75
76
|
if lhs_sql_exprs is None or self.value_list is None:
|
|
76
77
|
return None
|
|
77
78
|
return lhs_sql_exprs.in_(self.value_list)
|
pixeltable/exprs/inline_array.py
CHANGED
|
@@ -11,6 +11,7 @@ from .data_row import DataRow
|
|
|
11
11
|
from .expr import Expr
|
|
12
12
|
from .inline_dict import InlineDict
|
|
13
13
|
from .row_builder import RowBuilder
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class InlineArray(Expr):
|
|
@@ -82,7 +83,7 @@ class InlineArray(Expr):
|
|
|
82
83
|
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
83
84
|
return super()._id_attrs() + [('elements', self.elements)]
|
|
84
85
|
|
|
85
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
86
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
86
87
|
return None
|
|
87
88
|
|
|
88
89
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/inline_dict.py
CHANGED
|
@@ -10,6 +10,7 @@ import pixeltable.type_system as ts
|
|
|
10
10
|
from .data_row import DataRow
|
|
11
11
|
from .expr import Expr
|
|
12
12
|
from .row_builder import RowBuilder
|
|
13
|
+
from .sql_element_cache import SqlElementCache
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class InlineDict(Expr):
|
|
@@ -73,7 +74,7 @@ class InlineDict(Expr):
|
|
|
73
74
|
"""Return the original dict used to construct this"""
|
|
74
75
|
return {key: val if idx is None else self.components[idx] for key, idx, val in self.dict_items}
|
|
75
76
|
|
|
76
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
77
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
77
78
|
return None
|
|
78
79
|
|
|
79
80
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/is_null.py
CHANGED
|
@@ -8,6 +8,7 @@ import pixeltable.type_system as ts
|
|
|
8
8
|
from .data_row import DataRow
|
|
9
9
|
from .expr import Expr
|
|
10
10
|
from .row_builder import RowBuilder
|
|
11
|
+
from .sql_element_cache import SqlElementCache
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class IsNull(Expr):
|
|
@@ -22,8 +23,8 @@ class IsNull(Expr):
|
|
|
22
23
|
def _equals(self, other: IsNull) -> bool:
|
|
23
24
|
return True
|
|
24
25
|
|
|
25
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
26
|
-
e = self.components[0]
|
|
26
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
27
|
+
e = sql_elements.get(self.components[0])
|
|
27
28
|
if e is None:
|
|
28
29
|
return None
|
|
29
30
|
return e == None
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
from typing import Optional, List, Dict
|
|
3
4
|
|
|
4
5
|
import sqlalchemy as sql
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
import pixeltable.type_system as ts
|
|
7
8
|
from .data_row import DataRow
|
|
9
|
+
from .expr import Expr, ExprScope, _GLOBAL_SCOPE
|
|
8
10
|
from .row_builder import RowBuilder
|
|
9
|
-
|
|
10
|
-
import pixeltable.type_system as ts
|
|
11
|
+
from .sql_element_cache import SqlElementCache
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class JsonMapper(Expr):
|
|
@@ -86,7 +87,7 @@ class JsonMapper(Expr):
|
|
|
86
87
|
def _equals(self, other: JsonMapper) -> bool:
|
|
87
88
|
return True
|
|
88
89
|
|
|
89
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
90
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
90
91
|
return None
|
|
91
92
|
|
|
92
93
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|