pixeltable 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +22 -12
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +121 -101
- pixeltable/catalog/table_version.py +291 -142
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +67 -26
- pixeltable/dataframe.py +106 -81
- pixeltable/env.py +28 -24
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -9
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +13 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
- pixeltable/exec/expr_eval/globals.py +30 -7
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +151 -31
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +108 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +32 -17
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +16 -12
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +231 -113
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +60 -26
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +2 -1
- pixeltable/io/label_studio.py +77 -68
- pixeltable/io/pandas.py +36 -23
- pixeltable/io/parquet.py +9 -12
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +7 -1
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/share/__init__.py +0 -0
- pixeltable/share/packager.py +218 -0
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +102 -75
- pixeltable/utils/arrow.py +7 -8
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +6 -3
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +12 -7
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/iceberg.py +14 -0
- pixeltable/utils/media_store.py +3 -2
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/METADATA +9 -9
- pixeltable-0.3.4.dist-info/RECORD +166 -0
- pixeltable-0.3.2.dist-info/RECORD +0 -161
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/expr.py
CHANGED
|
@@ -7,11 +7,12 @@ import inspect
|
|
|
7
7
|
import json
|
|
8
8
|
import sys
|
|
9
9
|
import typing
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
|
+
import numpy as np
|
|
13
14
|
import sqlalchemy as sql
|
|
14
|
-
from typing_extensions import
|
|
15
|
+
from typing_extensions import Self, _AnnotatedAlias
|
|
15
16
|
|
|
16
17
|
import pixeltable.catalog as catalog
|
|
17
18
|
import pixeltable.exceptions as excs
|
|
@@ -24,11 +25,13 @@ from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes,
|
|
|
24
25
|
if TYPE_CHECKING:
|
|
25
26
|
from pixeltable import exprs
|
|
26
27
|
|
|
28
|
+
|
|
27
29
|
class ExprScope:
|
|
28
30
|
"""
|
|
29
31
|
Representation of the scope in which an Expr needs to be evaluated. Used to determine nesting of scopes.
|
|
30
32
|
parent is None: outermost scope
|
|
31
33
|
"""
|
|
34
|
+
|
|
32
35
|
def __init__(self, parent: Optional[ExprScope]):
|
|
33
36
|
self.parent = parent
|
|
34
37
|
|
|
@@ -137,6 +140,9 @@ class Expr(abc.ABC):
|
|
|
137
140
|
for attr, value in self._id_attrs():
|
|
138
141
|
hasher.update(attr.encode('utf-8'))
|
|
139
142
|
hasher.update(str(value).encode('utf-8'))
|
|
143
|
+
# Include the col_type of the expression to avoid expressions with identical str() representations
|
|
144
|
+
# but different types being considered the same expression, e.g. str(int(4)) == "4"
|
|
145
|
+
hasher.update(repr(self.col_type).encode('utf-8'))
|
|
140
146
|
for expr in self.components:
|
|
141
147
|
hasher.update(str(expr.id).encode('utf-8'))
|
|
142
148
|
# truncate to machine's word size
|
|
@@ -183,13 +189,19 @@ class Expr(abc.ABC):
|
|
|
183
189
|
|
|
184
190
|
def substitute(self, spec: dict[Expr, Expr]) -> Expr:
|
|
185
191
|
"""
|
|
186
|
-
Replace 'old' with 'new' recursively
|
|
192
|
+
Replace 'old' with 'new' recursively, and return a new version of the expression
|
|
193
|
+
This method must be used in the form: expr = expr.substitute(spec)
|
|
187
194
|
"""
|
|
195
|
+
from .literal import Literal
|
|
196
|
+
|
|
197
|
+
if isinstance(self, Literal):
|
|
198
|
+
return self
|
|
188
199
|
for old, new in spec.items():
|
|
189
200
|
if self.equals(old):
|
|
190
201
|
return new.copy()
|
|
191
202
|
for i in range(len(self.components)):
|
|
192
203
|
self.components[i] = self.components[i].substitute(spec)
|
|
204
|
+
self = self.maybe_literal()
|
|
193
205
|
self.id = self._create_id()
|
|
194
206
|
return self
|
|
195
207
|
|
|
@@ -205,14 +217,18 @@ class Expr(abc.ABC):
|
|
|
205
217
|
"""
|
|
206
218
|
from .column_ref import ColumnRef
|
|
207
219
|
from .expr_set import ExprSet
|
|
220
|
+
|
|
208
221
|
if resolve_cols is None:
|
|
209
222
|
resolve_cols = set()
|
|
210
223
|
result = self
|
|
211
224
|
while True:
|
|
212
|
-
target_col_refs = ExprSet(
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
225
|
+
target_col_refs = ExprSet(
|
|
226
|
+
[
|
|
227
|
+
e
|
|
228
|
+
for e in result.subexprs()
|
|
229
|
+
if isinstance(e, ColumnRef) and e.col.is_computed and (not e.col.is_stored or e.col in resolve_cols)
|
|
230
|
+
]
|
|
231
|
+
)
|
|
216
232
|
if len(target_col_refs) == 0:
|
|
217
233
|
return result
|
|
218
234
|
result = result.substitute({ref: ref.col.value_expr for ref in target_col_refs})
|
|
@@ -220,6 +236,7 @@ class Expr(abc.ABC):
|
|
|
220
236
|
def is_bound_by(self, tbls: list[catalog.TableVersionPath]) -> bool:
|
|
221
237
|
"""Returns True if this expr can be evaluated in the context of tbls."""
|
|
222
238
|
from .column_ref import ColumnRef
|
|
239
|
+
|
|
223
240
|
col_refs = self.subexprs(ColumnRef)
|
|
224
241
|
for col_ref in col_refs:
|
|
225
242
|
if not any(tbl.has_column(col_ref.col) for tbl in tbls):
|
|
@@ -232,7 +249,7 @@ class Expr(abc.ABC):
|
|
|
232
249
|
return self._retarget(tbl_versions)
|
|
233
250
|
|
|
234
251
|
def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
|
|
235
|
-
for i in range
|
|
252
|
+
for i in range(len(self.components)):
|
|
236
253
|
self.components[i] = self.components[i]._retarget(tbl_versions)
|
|
237
254
|
return self
|
|
238
255
|
|
|
@@ -264,13 +281,14 @@ class Expr(abc.ABC):
|
|
|
264
281
|
|
|
265
282
|
@overload
|
|
266
283
|
def subexprs(
|
|
267
|
-
self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None,
|
|
268
|
-
traverse_matches: bool = True
|
|
284
|
+
self, expr_class: type[T], filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
|
|
269
285
|
) -> Iterator[T]: ...
|
|
270
286
|
|
|
271
287
|
def subexprs(
|
|
272
|
-
self,
|
|
273
|
-
|
|
288
|
+
self,
|
|
289
|
+
expr_class: Optional[type[T]] = None,
|
|
290
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
291
|
+
traverse_matches: bool = True,
|
|
274
292
|
) -> Iterator[T]:
|
|
275
293
|
"""
|
|
276
294
|
Iterate over all subexprs, including self.
|
|
@@ -288,20 +306,30 @@ class Expr(abc.ABC):
|
|
|
288
306
|
@overload
|
|
289
307
|
@classmethod
|
|
290
308
|
def list_subexprs(
|
|
291
|
-
cls,
|
|
309
|
+
cls,
|
|
310
|
+
expr_list: Iterable[Expr],
|
|
311
|
+
*,
|
|
312
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
313
|
+
traverse_matches: bool = True,
|
|
292
314
|
) -> Iterator[Expr]: ...
|
|
293
315
|
|
|
294
316
|
@overload
|
|
295
317
|
@classmethod
|
|
296
318
|
def list_subexprs(
|
|
297
|
-
cls,
|
|
298
|
-
|
|
319
|
+
cls,
|
|
320
|
+
expr_list: Iterable[Expr],
|
|
321
|
+
expr_class: type[T],
|
|
322
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
323
|
+
traverse_matches: bool = True,
|
|
299
324
|
) -> Iterator[T]: ...
|
|
300
325
|
|
|
301
326
|
@classmethod
|
|
302
327
|
def list_subexprs(
|
|
303
|
-
cls,
|
|
304
|
-
|
|
328
|
+
cls,
|
|
329
|
+
expr_list: Iterable[Expr],
|
|
330
|
+
expr_class: Optional[type[T]] = None,
|
|
331
|
+
filter: Optional[Callable[[Expr], bool]] = None,
|
|
332
|
+
traverse_matches: bool = True,
|
|
305
333
|
) -> Iterator[T]:
|
|
306
334
|
"""Produce subexprs for all exprs in list. Can contain duplicates."""
|
|
307
335
|
for e in expr_list:
|
|
@@ -322,6 +350,7 @@ class Expr(abc.ABC):
|
|
|
322
350
|
"""Returns table ids referenced by this expr."""
|
|
323
351
|
from .column_ref import ColumnRef
|
|
324
352
|
from .rowid_ref import RowidRef
|
|
353
|
+
|
|
325
354
|
return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
|
|
326
355
|
|
|
327
356
|
@classmethod
|
|
@@ -334,6 +363,7 @@ class Expr(abc.ABC):
|
|
|
334
363
|
result: list[catalog.Column] = []
|
|
335
364
|
assert '_classname' in expr_dict
|
|
336
365
|
from .column_ref import ColumnRef
|
|
366
|
+
|
|
337
367
|
if expr_dict['_classname'] == 'ColumnRef':
|
|
338
368
|
result.append(ColumnRef.get_column(expr_dict))
|
|
339
369
|
if 'components' in expr_dict:
|
|
@@ -341,66 +371,63 @@ class Expr(abc.ABC):
|
|
|
341
371
|
result.extend(cls.get_refd_columns(component_dict))
|
|
342
372
|
return result
|
|
343
373
|
|
|
344
|
-
def
|
|
345
|
-
"""Returns True if this expr is a constant."""
|
|
346
|
-
return all(comp.is_constant() for comp in self.components)
|
|
347
|
-
|
|
348
|
-
def _as_constant(self) -> Any:
|
|
349
|
-
return None
|
|
350
|
-
|
|
351
|
-
def as_constant(self) -> Any:
|
|
374
|
+
def as_literal(self) -> Optional[Expr]:
|
|
352
375
|
"""
|
|
353
|
-
|
|
376
|
+
Return a Literal expression if this expression can be evaluated to a constant value, otherwise return None.
|
|
354
377
|
"""
|
|
355
|
-
if self.is_constant():
|
|
356
|
-
return self._as_constant()
|
|
357
378
|
return None
|
|
358
379
|
|
|
359
380
|
@classmethod
|
|
360
381
|
def from_array(cls, elements: Iterable) -> Optional[Expr]:
|
|
361
382
|
from .inline_expr import InlineArray
|
|
383
|
+
from .literal import Literal
|
|
384
|
+
|
|
385
|
+
if isinstance(elements, np.ndarray):
|
|
386
|
+
pxttype = ts.ArrayType.from_literal(elements)
|
|
387
|
+
if pxttype is not None:
|
|
388
|
+
return Literal(elements, col_type=pxttype)
|
|
389
|
+
|
|
362
390
|
inline_array = InlineArray(elements)
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
391
|
+
return inline_array.maybe_literal()
|
|
392
|
+
|
|
393
|
+
def maybe_literal(self: Expr) -> Expr:
|
|
394
|
+
"""
|
|
395
|
+
Return a Literal if this expression can be evaluated to a constant value, otherwise return the expression.
|
|
396
|
+
"""
|
|
397
|
+
lit_expr = self.as_literal()
|
|
398
|
+
if lit_expr is not None:
|
|
399
|
+
return lit_expr
|
|
367
400
|
else:
|
|
368
|
-
return
|
|
401
|
+
return self
|
|
369
402
|
|
|
370
403
|
@classmethod
|
|
371
404
|
def from_object(cls, o: object) -> Optional[Expr]:
|
|
372
405
|
"""
|
|
373
406
|
Try to turn a literal object into an Expr.
|
|
374
407
|
"""
|
|
408
|
+
from .inline_expr import InlineDict, InlineList
|
|
409
|
+
from .literal import Literal
|
|
410
|
+
|
|
375
411
|
# Try to create a literal. We need to check for InlineList/InlineDict
|
|
376
412
|
# first, to prevent them from inappropriately being interpreted as JsonType
|
|
377
413
|
# literals.
|
|
414
|
+
if isinstance(o, Literal):
|
|
415
|
+
return o
|
|
416
|
+
|
|
378
417
|
if isinstance(o, (list, tuple, dict, Expr)):
|
|
379
|
-
expr:
|
|
418
|
+
expr: Expr
|
|
380
419
|
if isinstance(o, (list, tuple)):
|
|
381
|
-
from .inline_expr import InlineList
|
|
382
420
|
expr = InlineList(o)
|
|
383
421
|
elif isinstance(o, dict):
|
|
384
|
-
from .inline_expr import InlineDict
|
|
385
422
|
expr = InlineDict(o)
|
|
386
|
-
|
|
423
|
+
else:
|
|
387
424
|
expr = o
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
return expr
|
|
391
|
-
# Check if the expression is constant
|
|
392
|
-
if expr is not None:
|
|
393
|
-
expr_value = expr.as_constant()
|
|
394
|
-
if expr_value is not None:
|
|
395
|
-
from .literal import Literal
|
|
396
|
-
return Literal(expr_value)
|
|
397
|
-
else:
|
|
398
|
-
return expr
|
|
425
|
+
|
|
426
|
+
return expr.maybe_literal()
|
|
399
427
|
else:
|
|
400
428
|
# convert scalar to a literal
|
|
401
429
|
obj_type = ts.ColumnType.infer_literal_type(o)
|
|
402
430
|
if obj_type is not None:
|
|
403
|
-
from .literal import Literal
|
|
404
431
|
return Literal(o, col_type=obj_type)
|
|
405
432
|
return None
|
|
406
433
|
|
|
@@ -444,10 +471,7 @@ class Expr(abc.ABC):
|
|
|
444
471
|
Turn Expr object into a dict that can be passed to json.dumps().
|
|
445
472
|
Subclasses override _as_dict().
|
|
446
473
|
"""
|
|
447
|
-
return {
|
|
448
|
-
'_classname': self.__class__.__name__,
|
|
449
|
-
**self._as_dict(),
|
|
450
|
-
}
|
|
474
|
+
return {'_classname': self.__class__.__name__, **self._as_dict()}
|
|
451
475
|
|
|
452
476
|
@classmethod
|
|
453
477
|
def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
|
|
@@ -485,6 +509,7 @@ class Expr(abc.ABC):
|
|
|
485
509
|
|
|
486
510
|
def isin(self, value_set: Any) -> 'exprs.InPredicate':
|
|
487
511
|
from .in_predicate import InPredicate
|
|
512
|
+
|
|
488
513
|
if isinstance(value_set, Expr):
|
|
489
514
|
return InPredicate(self, value_set_expr=value_set)
|
|
490
515
|
else:
|
|
@@ -492,6 +517,7 @@ class Expr(abc.ABC):
|
|
|
492
517
|
|
|
493
518
|
def astype(self, new_type: Union[ts.ColumnType, type, _AnnotatedAlias]) -> 'exprs.TypeCast':
|
|
494
519
|
from pixeltable.exprs import TypeCast
|
|
520
|
+
|
|
495
521
|
# Interpret the type argument the same way we would if given in a schema
|
|
496
522
|
col_type = ts.ColumnType.normalize_type(new_type, nullable_default=True, allow_builtin_types=False)
|
|
497
523
|
if not self.col_type.nullable:
|
|
@@ -500,7 +526,9 @@ class Expr(abc.ABC):
|
|
|
500
526
|
col_type = col_type.copy(nullable=False)
|
|
501
527
|
return TypeCast(self, col_type)
|
|
502
528
|
|
|
503
|
-
def apply(
|
|
529
|
+
def apply(
|
|
530
|
+
self, fn: Callable, *, col_type: Union[ts.ColumnType, type, _AnnotatedAlias, None] = None
|
|
531
|
+
) -> 'exprs.FunctionCall':
|
|
504
532
|
if col_type is not None:
|
|
505
533
|
col_type = ts.ColumnType.normalize_type(col_type)
|
|
506
534
|
function = self._make_applicator_function(fn, col_type)
|
|
@@ -509,10 +537,7 @@ class Expr(abc.ABC):
|
|
|
509
537
|
|
|
510
538
|
def __dir__(self) -> list[str]:
|
|
511
539
|
attrs = ['isin', 'astype', 'apply']
|
|
512
|
-
attrs += [
|
|
513
|
-
f.name
|
|
514
|
-
for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)
|
|
515
|
-
]
|
|
540
|
+
attrs += [f.name for f in func.FunctionRegistry.get().get_type_methods(self.col_type.type_enum)]
|
|
516
541
|
return attrs
|
|
517
542
|
|
|
518
543
|
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
@@ -521,9 +546,11 @@ class Expr(abc.ABC):
|
|
|
521
546
|
def __getitem__(self, index: object) -> Expr:
|
|
522
547
|
if self.col_type.is_json_type():
|
|
523
548
|
from .json_path import JsonPath
|
|
549
|
+
|
|
524
550
|
return JsonPath(self)[index]
|
|
525
551
|
if self.col_type.is_array_type():
|
|
526
552
|
from .array_slice import ArraySlice
|
|
553
|
+
|
|
527
554
|
if not isinstance(index, tuple):
|
|
528
555
|
index = (index,)
|
|
529
556
|
if any(not isinstance(i, (int, slice)) for i in index):
|
|
@@ -537,6 +564,7 @@ class Expr(abc.ABC):
|
|
|
537
564
|
"""
|
|
538
565
|
from .json_path import JsonPath
|
|
539
566
|
from .method_ref import MethodRef
|
|
567
|
+
|
|
540
568
|
if self.col_type.is_json_type():
|
|
541
569
|
return JsonPath(self).__getattr__(name)
|
|
542
570
|
else:
|
|
@@ -555,7 +583,8 @@ class Expr(abc.ABC):
|
|
|
555
583
|
|
|
556
584
|
def __bool__(self) -> bool:
|
|
557
585
|
raise TypeError(
|
|
558
|
-
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
|
|
586
|
+
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
|
|
587
|
+
)
|
|
559
588
|
|
|
560
589
|
def __lt__(self, other: object) -> 'exprs.Comparison':
|
|
561
590
|
return self._make_comparison(ComparisonOperator.LT, other)
|
|
@@ -566,6 +595,7 @@ class Expr(abc.ABC):
|
|
|
566
595
|
def __eq__(self, other: object) -> 'exprs.Expr': # type: ignore[override]
|
|
567
596
|
if other is None:
|
|
568
597
|
from .is_null import IsNull
|
|
598
|
+
|
|
569
599
|
return IsNull(self)
|
|
570
600
|
return self._make_comparison(ComparisonOperator.EQ, other)
|
|
571
601
|
|
|
@@ -573,6 +603,7 @@ class Expr(abc.ABC):
|
|
|
573
603
|
if other is None:
|
|
574
604
|
from .compound_predicate import CompoundPredicate
|
|
575
605
|
from .is_null import IsNull
|
|
606
|
+
|
|
576
607
|
return CompoundPredicate(LogicalOperator.NOT, [IsNull(self)])
|
|
577
608
|
return self._make_comparison(ComparisonOperator.NE, other)
|
|
578
609
|
|
|
@@ -589,6 +620,7 @@ class Expr(abc.ABC):
|
|
|
589
620
|
# TODO: check for compatibility
|
|
590
621
|
from .comparison import Comparison
|
|
591
622
|
from .literal import Literal
|
|
623
|
+
|
|
592
624
|
if isinstance(other, Expr):
|
|
593
625
|
return Comparison(op, self, other)
|
|
594
626
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
@@ -641,6 +673,7 @@ class Expr(abc.ABC):
|
|
|
641
673
|
# TODO: check for compatibility
|
|
642
674
|
from .arithmetic_expr import ArithmeticExpr
|
|
643
675
|
from .literal import Literal
|
|
676
|
+
|
|
644
677
|
if isinstance(other, Expr):
|
|
645
678
|
return ArithmeticExpr(op, self, other)
|
|
646
679
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
@@ -655,6 +688,7 @@ class Expr(abc.ABC):
|
|
|
655
688
|
# TODO: check for compatibility
|
|
656
689
|
from .arithmetic_expr import ArithmeticExpr
|
|
657
690
|
from .literal import Literal
|
|
691
|
+
|
|
658
692
|
assert not isinstance(other, Expr) # Else the left-handed form would have evaluated first
|
|
659
693
|
if isinstance(other, typing.get_args(LiteralPythonTypes)):
|
|
660
694
|
return ArithmeticExpr(op, Literal(other), self)
|
|
@@ -666,6 +700,7 @@ class Expr(abc.ABC):
|
|
|
666
700
|
if not other.col_type.is_bool_type():
|
|
667
701
|
raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
|
|
668
702
|
from .compound_predicate import CompoundPredicate
|
|
703
|
+
|
|
669
704
|
return CompoundPredicate(LogicalOperator.AND, [self, other])
|
|
670
705
|
|
|
671
706
|
def __or__(self, other: object) -> Expr:
|
|
@@ -674,14 +709,15 @@ class Expr(abc.ABC):
|
|
|
674
709
|
if not other.col_type.is_bool_type():
|
|
675
710
|
raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
|
|
676
711
|
from .compound_predicate import CompoundPredicate
|
|
712
|
+
|
|
677
713
|
return CompoundPredicate(LogicalOperator.OR, [self, other])
|
|
678
714
|
|
|
679
715
|
def __invert__(self) -> Expr:
|
|
680
716
|
from .compound_predicate import CompoundPredicate
|
|
717
|
+
|
|
681
718
|
return CompoundPredicate(LogicalOperator.NOT, [self])
|
|
682
719
|
|
|
683
|
-
def split_conjuncts(
|
|
684
|
-
self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
720
|
+
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
685
721
|
"""
|
|
686
722
|
Returns clauses of a conjunction that meet condition in the first element.
|
|
687
723
|
The second element contains remaining clauses, rolled into a conjunction.
|
|
@@ -721,7 +757,8 @@ class Expr(abc.ABC):
|
|
|
721
757
|
if fn_type is None:
|
|
722
758
|
raise excs.Error(
|
|
723
759
|
f'Column type of `{fn.__name__}` cannot be inferred. '
|
|
724
|
-
f'Use `.apply({fn.__name__}, col_type=...)` to specify.'
|
|
760
|
+
f'Use `.apply({fn.__name__}, col_type=...)` to specify.'
|
|
761
|
+
)
|
|
725
762
|
|
|
726
763
|
# TODO(aaron-siegel) Currently we assume that `fn` has exactly one required parameter
|
|
727
764
|
# and all optional parameters take their default values. Should we provide a more
|
|
@@ -741,17 +778,15 @@ class Expr(abc.ABC):
|
|
|
741
778
|
second_param = next(params_iter) if len(params) >= 2 else None
|
|
742
779
|
# Check that fn has at least one positional parameter
|
|
743
780
|
if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
|
|
744
|
-
raise excs.Error(
|
|
745
|
-
f'Function `{fn.__name__}` has no positional parameters.'
|
|
746
|
-
)
|
|
781
|
+
raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
|
|
747
782
|
# Check that fn has at most one required parameter, i.e., its second parameter
|
|
748
783
|
# has no default and is not a varargs
|
|
749
|
-
if
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
)
|
|
784
|
+
if (
|
|
785
|
+
len(params) >= 2
|
|
786
|
+
and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
787
|
+
and second_param.default == inspect.Parameter.empty
|
|
788
|
+
):
|
|
789
|
+
raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
|
|
755
790
|
except ValueError:
|
|
756
791
|
# inspect.signature(fn) will raise a `ValueError` if `fn` is a builtin; I don't
|
|
757
792
|
# know of any way to get the signature of a builtin, nor to check for this in
|
|
@@ -765,7 +800,8 @@ class Expr(abc.ABC):
|
|
|
765
800
|
# We also set the display_name explicitly, so that the `FunctionCall` gets the
|
|
766
801
|
# name of `decorated_fn`, not the lambda.
|
|
767
802
|
return func.make_function(
|
|
768
|
-
decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__
|
|
803
|
+
decorated_fn=lambda x: fn(x), return_type=fn_type, param_types=[self.col_type], function_name=fn.__name__
|
|
804
|
+
)
|
|
769
805
|
|
|
770
806
|
|
|
771
807
|
# A dictionary of result types of various stdlib functions that are
|
pixeltable/exprs/expr_dict.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from typing import Generic,
|
|
1
|
+
from typing import Generic, Iterable, Iterator, Optional, TypeVar
|
|
2
2
|
|
|
3
3
|
T = TypeVar('T')
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
class ExprDict(Generic[T]):
|
|
8
9
|
"""
|
|
9
10
|
A dictionary that maps Expr instances to values of type T.
|
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Generic, Iterable, Iterator, Optional, TypeVar
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
6
|
|
|
7
7
|
T = TypeVar('T', bound='Expr')
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
class ExprSet(Generic[T]):
|
|
10
11
|
"""
|
|
11
12
|
A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
|
|
12
13
|
"""
|
|
14
|
+
|
|
13
15
|
exprs: dict[int, T] # key: Expr.id
|
|
14
16
|
exprs_by_idx: dict[int, T] # key: slot_idx
|
|
15
17
|
|
|
@@ -22,7 +22,6 @@ from .sql_element_cache import SqlElementCache
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class FunctionCall(Expr):
|
|
25
|
-
|
|
26
25
|
fn: func.Function
|
|
27
26
|
is_method_call: bool
|
|
28
27
|
agg_init_args: dict[str, Any]
|
|
@@ -58,7 +57,7 @@ class FunctionCall(Expr):
|
|
|
58
57
|
return_type: ts.ColumnType,
|
|
59
58
|
order_by_clause: Optional[list[Any]] = None,
|
|
60
59
|
group_by_clause: Optional[list[Any]] = None,
|
|
61
|
-
is_method_call: bool = False
|
|
60
|
+
is_method_call: bool = False,
|
|
62
61
|
):
|
|
63
62
|
if order_by_clause is None:
|
|
64
63
|
order_by_clause = []
|
|
@@ -69,7 +68,7 @@ class FunctionCall(Expr):
|
|
|
69
68
|
|
|
70
69
|
self.fn = fn
|
|
71
70
|
self.is_method_call = is_method_call
|
|
72
|
-
#self.normalize_args(fn.name, signature, bound_args)
|
|
71
|
+
# self.normalize_args(fn.name, signature, bound_args)
|
|
73
72
|
self.resource_pool = fn.call_resource_pool(bound_args)
|
|
74
73
|
signature = fn.signature
|
|
75
74
|
|
|
@@ -79,8 +78,10 @@ class FunctionCall(Expr):
|
|
|
79
78
|
for arg_name, arg in bound_args.items():
|
|
80
79
|
param = signature.parameters[arg_name]
|
|
81
80
|
if (
|
|
82
|
-
param.col_type is not None
|
|
83
|
-
and
|
|
81
|
+
param.col_type is not None
|
|
82
|
+
and not param.col_type.nullable
|
|
83
|
+
and isinstance(arg, Expr)
|
|
84
|
+
and arg.col_type.nullable
|
|
84
85
|
):
|
|
85
86
|
return_type = return_type.copy(nullable=True)
|
|
86
87
|
break
|
|
@@ -96,7 +97,9 @@ class FunctionCall(Expr):
|
|
|
96
97
|
self.agg_init_args = {
|
|
97
98
|
arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
|
|
98
99
|
}
|
|
99
|
-
bound_args = {
|
|
100
|
+
bound_args = {
|
|
101
|
+
arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]
|
|
102
|
+
}
|
|
100
103
|
|
|
101
104
|
# construct components, args, kwargs
|
|
102
105
|
self.args = []
|
|
@@ -171,7 +174,8 @@ class FunctionCall(Expr):
|
|
|
171
174
|
# (that's done in SQL)
|
|
172
175
|
if len(order_by_clause) > 0 and not isinstance(order_by_clause[0], Expr):
|
|
173
176
|
raise excs.Error(
|
|
174
|
-
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
177
|
+
f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
|
|
178
|
+
)
|
|
175
179
|
# don't add components after this, everthing after order_by_start_idx is part of the order_by clause
|
|
176
180
|
self.order_by_start_idx = len(self.components)
|
|
177
181
|
self.components.extend(order_by_clause)
|
|
@@ -257,7 +261,8 @@ class FunctionCall(Expr):
|
|
|
257
261
|
):
|
|
258
262
|
raise excs.Error(
|
|
259
263
|
f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
|
|
260
|
-
f'{param.col_type}'
|
|
264
|
+
f'{param.col_type}'
|
|
265
|
+
)
|
|
261
266
|
|
|
262
267
|
def _equals(self, other: FunctionCall) -> bool:
|
|
263
268
|
if self.fn != other.fn:
|
|
@@ -282,7 +287,7 @@ class FunctionCall(Expr):
|
|
|
282
287
|
('kwargs', self.kwargs),
|
|
283
288
|
('group_by_start_idx', self.group_by_start_idx),
|
|
284
289
|
('group_by_stop_idx', self.group_by_stop_idx),
|
|
285
|
-
('order_by_start_idx', self.order_by_start_idx)
|
|
290
|
+
('order_by_start_idx', self.order_by_start_idx),
|
|
286
291
|
]
|
|
287
292
|
|
|
288
293
|
def __repr__(self) -> str:
|
|
@@ -298,13 +303,14 @@ class FunctionCall(Expr):
|
|
|
298
303
|
def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
|
|
299
304
|
def print_arg(arg: Any) -> str:
|
|
300
305
|
return repr(arg) if isinstance(arg, str) else str(arg)
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
306
|
+
|
|
307
|
+
arg_strs = [print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]]
|
|
308
|
+
arg_strs.extend(
|
|
309
|
+
[
|
|
310
|
+
f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
|
|
311
|
+
for param_name, (idx, arg) in self.kwargs.items()
|
|
312
|
+
]
|
|
313
|
+
)
|
|
308
314
|
if len(self.order_by) > 0:
|
|
309
315
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
310
316
|
if self.fn.requires_order_by:
|
|
@@ -322,18 +328,22 @@ class FunctionCall(Expr):
|
|
|
322
328
|
|
|
323
329
|
@property
|
|
324
330
|
def group_by(self) -> list[Expr]:
|
|
325
|
-
return self.components[self.group_by_start_idx:self.group_by_stop_idx]
|
|
331
|
+
return self.components[self.group_by_start_idx : self.group_by_stop_idx]
|
|
326
332
|
|
|
327
333
|
@property
|
|
328
334
|
def order_by(self) -> list[Expr]:
|
|
329
|
-
return self.components[self.order_by_start_idx:]
|
|
335
|
+
return self.components[self.order_by_start_idx :]
|
|
330
336
|
|
|
331
337
|
@property
|
|
332
338
|
def is_window_fn_call(self) -> bool:
|
|
333
|
-
return
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
339
|
+
return (
|
|
340
|
+
isinstance(self.fn, func.AggregateFunction)
|
|
341
|
+
and self.fn.allows_window
|
|
342
|
+
and (
|
|
343
|
+
not self.fn.allows_std_agg
|
|
344
|
+
or self.has_group_by()
|
|
345
|
+
or (len(self.order_by) > 0 and not self.fn.requires_order_by)
|
|
346
|
+
)
|
|
337
347
|
)
|
|
338
348
|
|
|
339
349
|
def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
|
|
@@ -435,7 +445,7 @@ class FunctionCall(Expr):
|
|
|
435
445
|
Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
|
|
436
446
|
data_rows
|
|
437
447
|
"""
|
|
438
|
-
assert all(name in self._param_values for name in param_names)
|
|
448
|
+
assert all(name in self._param_values for name in param_names), f'{param_names}, {self._param_values.keys()}'
|
|
439
449
|
result: list[dict[str, Any]] = []
|
|
440
450
|
for row in data_rows:
|
|
441
451
|
d: dict[str, Any] = {}
|
|
@@ -512,16 +522,12 @@ class FunctionCall(Expr):
|
|
|
512
522
|
fn = func.Function.from_dict(d['fn'])
|
|
513
523
|
assert not fn.is_polymorphic
|
|
514
524
|
return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
|
|
515
|
-
group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
|
|
516
|
-
order_by_exprs = components[d['order_by_start_idx']:]
|
|
525
|
+
group_by_exprs = components[d['group_by_start_idx'] : d['group_by_stop_idx']]
|
|
526
|
+
order_by_exprs = components[d['order_by_start_idx'] :]
|
|
517
527
|
|
|
518
|
-
args = [
|
|
519
|
-
expr if idx is None else components[idx]
|
|
520
|
-
for idx, expr in d['args']
|
|
521
|
-
]
|
|
528
|
+
args = [expr if idx is None else components[idx] for idx, expr in d['args']]
|
|
522
529
|
kwargs = {
|
|
523
|
-
param_name: (expr if idx is None else components[idx])
|
|
524
|
-
for param_name, (idx, expr) in d['kwargs'].items()
|
|
530
|
+
param_name: (expr if idx is None else components[idx]) for param_name, (idx, expr) in d['kwargs'].items()
|
|
525
531
|
}
|
|
526
532
|
|
|
527
533
|
# `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
|
|
@@ -538,9 +544,7 @@ class FunctionCall(Expr):
|
|
|
538
544
|
# for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
|
|
539
545
|
# case where this is necessary to support existing conditional_return_type implementations). Once the general
|
|
540
546
|
# pattern is implemented, we can remove this hack.
|
|
541
|
-
unpacked_bound_args = {
|
|
542
|
-
param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()
|
|
543
|
-
}
|
|
547
|
+
unpacked_bound_args = {param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()}
|
|
544
548
|
|
|
545
549
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
546
550
|
call_return_type = fn.call_return_type([], unpacked_bound_args)
|
|
@@ -567,13 +571,7 @@ class FunctionCall(Expr):
|
|
|
567
571
|
f'Return type as currently defined: `{call_return_type}`'
|
|
568
572
|
)
|
|
569
573
|
|
|
570
|
-
fn_call = cls(
|
|
571
|
-
fn,
|
|
572
|
-
bound_args,
|
|
573
|
-
return_type,
|
|
574
|
-
group_by_clause=group_by_exprs,
|
|
575
|
-
order_by_clause=order_by_exprs
|
|
576
|
-
)
|
|
574
|
+
fn_call = cls(fn, bound_args, return_type, group_by_clause=group_by_exprs, order_by_clause=order_by_exprs)
|
|
577
575
|
return fn_call
|
|
578
576
|
|
|
579
577
|
@classmethod
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import Union
|
|
|
7
7
|
# Python types corresponding to our literal types
|
|
8
8
|
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
def print_slice(s: slice) -> str:
|
|
11
12
|
start_str = f'{str(s.start) if s.start is not None else ""}'
|
|
12
13
|
stop_str = f'{str(s.stop) if s.stop is not None else ""}'
|