pixeltable 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (79) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +8 -7
  3. pixeltable/catalog/column.py +11 -8
  4. pixeltable/catalog/insertable_table.py +1 -1
  5. pixeltable/catalog/path_dict.py +8 -6
  6. pixeltable/catalog/table.py +20 -13
  7. pixeltable/catalog/table_version.py +91 -54
  8. pixeltable/catalog/table_version_path.py +7 -9
  9. pixeltable/catalog/view.py +2 -1
  10. pixeltable/dataframe.py +1 -1
  11. pixeltable/env.py +173 -83
  12. pixeltable/exec/aggregation_node.py +2 -1
  13. pixeltable/exec/component_iteration_node.py +1 -1
  14. pixeltable/exec/sql_node.py +11 -8
  15. pixeltable/exprs/__init__.py +1 -0
  16. pixeltable/exprs/arithmetic_expr.py +4 -4
  17. pixeltable/exprs/array_slice.py +2 -1
  18. pixeltable/exprs/column_property_ref.py +9 -7
  19. pixeltable/exprs/column_ref.py +2 -1
  20. pixeltable/exprs/comparison.py +10 -7
  21. pixeltable/exprs/compound_predicate.py +3 -2
  22. pixeltable/exprs/data_row.py +19 -4
  23. pixeltable/exprs/expr.py +46 -35
  24. pixeltable/exprs/expr_set.py +32 -9
  25. pixeltable/exprs/function_call.py +56 -32
  26. pixeltable/exprs/in_predicate.py +3 -2
  27. pixeltable/exprs/inline_array.py +2 -1
  28. pixeltable/exprs/inline_dict.py +2 -1
  29. pixeltable/exprs/is_null.py +3 -2
  30. pixeltable/exprs/json_mapper.py +5 -4
  31. pixeltable/exprs/json_path.py +7 -1
  32. pixeltable/exprs/literal.py +34 -7
  33. pixeltable/exprs/method_ref.py +3 -3
  34. pixeltable/exprs/object_ref.py +6 -5
  35. pixeltable/exprs/row_builder.py +25 -17
  36. pixeltable/exprs/rowid_ref.py +2 -1
  37. pixeltable/exprs/similarity_expr.py +2 -1
  38. pixeltable/exprs/sql_element_cache.py +30 -0
  39. pixeltable/exprs/type_cast.py +3 -3
  40. pixeltable/exprs/variable.py +2 -1
  41. pixeltable/ext/functions/whisperx.py +4 -4
  42. pixeltable/ext/functions/yolox.py +6 -6
  43. pixeltable/func/aggregate_function.py +1 -0
  44. pixeltable/func/function.py +28 -4
  45. pixeltable/functions/__init__.py +4 -2
  46. pixeltable/functions/anthropic.py +15 -5
  47. pixeltable/functions/fireworks.py +1 -1
  48. pixeltable/functions/globals.py +6 -1
  49. pixeltable/functions/huggingface.py +2 -2
  50. pixeltable/functions/image.py +17 -2
  51. pixeltable/functions/json.py +5 -5
  52. pixeltable/functions/mistralai.py +188 -0
  53. pixeltable/functions/openai.py +6 -10
  54. pixeltable/functions/string.py +3 -2
  55. pixeltable/functions/timestamp.py +95 -7
  56. pixeltable/functions/together.py +4 -4
  57. pixeltable/functions/video.py +2 -2
  58. pixeltable/functions/vision.py +27 -17
  59. pixeltable/functions/whisper.py +1 -1
  60. pixeltable/io/hf_datasets.py +17 -15
  61. pixeltable/io/pandas.py +0 -2
  62. pixeltable/io/parquet.py +15 -14
  63. pixeltable/iterators/document.py +16 -15
  64. pixeltable/metadata/__init__.py +1 -1
  65. pixeltable/metadata/converters/convert_19.py +46 -0
  66. pixeltable/metadata/notes.py +1 -0
  67. pixeltable/metadata/schema.py +5 -4
  68. pixeltable/plan.py +100 -78
  69. pixeltable/store.py +5 -1
  70. pixeltable/tool/create_test_db_dump.py +4 -3
  71. pixeltable/type_system.py +12 -14
  72. pixeltable/utils/documents.py +45 -42
  73. pixeltable/utils/formatter.py +2 -2
  74. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
  75. pixeltable-0.2.18.dist-info/RECORD +147 -0
  76. pixeltable-0.2.17.dist-info/RECORD +0 -144
  77. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
  78. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
  79. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import datetime
3
4
  import io
4
5
  import urllib.parse
5
6
  import urllib.request
@@ -8,8 +9,11 @@ from typing import Optional, List, Any, Tuple
8
9
  import sqlalchemy as sql
9
10
  import pgvector.sqlalchemy
10
11
  import PIL
12
+ import PIL.Image
11
13
  import numpy as np
12
14
 
15
+ from pixeltable import env
16
+
13
17
 
14
18
  class DataRow:
15
19
  """
@@ -101,6 +105,7 @@ class DataRow:
101
105
 
102
106
  def __getitem__(self, index: object) -> Any:
103
107
  """Returns in-memory value, ie, what is needed for expr evaluation"""
108
+ assert isinstance(index, int)
104
109
  if not self.has_val[index]:
105
110
  # for debugging purposes
106
111
  pass
@@ -115,7 +120,7 @@ class DataRow:
115
120
 
116
121
  return self.vals[index]
117
122
 
118
- def get_stored_val(self, index: object, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
123
+ def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
119
124
  """Return the value that gets stored in the db"""
120
125
  assert self.excs[index] is None
121
126
  if not self.has_val[index]:
@@ -140,12 +145,17 @@ class DataRow:
140
145
  if self.vals[index] is None and sa_col_type is not None and isinstance(sa_col_type, sql.JSON):
141
146
  return sql.sql.null()
142
147
 
148
+ if isinstance(self.vals[index], datetime.datetime) and self.vals[index].tzinfo is None:
149
+ # if the datetime is naive, cast it to the default time zone
150
+ return self.vals[index].replace(tzinfo=env.Env.get().default_time_zone)
151
+
143
152
  return self.vals[index]
144
153
 
145
154
  def __setitem__(self, idx: object, val: Any) -> None:
146
155
  """Assign in-memory cell value
147
156
  This allows overwriting
148
157
  """
158
+ assert isinstance(idx, int)
149
159
  assert self.excs[idx] is None
150
160
 
151
161
  if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
@@ -177,7 +187,7 @@ class DataRow:
177
187
  self.vals[idx] = val
178
188
  self.has_val[idx] = True
179
189
 
180
- def set_file_path(self, idx: object, path: str) -> None:
190
+ def set_file_path(self, idx: int, path: str) -> None:
181
191
  """Augment an existing url with a local file path"""
182
192
  assert self.has_val[idx]
183
193
  assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
@@ -185,7 +195,7 @@ class DataRow:
185
195
  if idx in self.media_slot_idxs:
186
196
  self.vals[idx] = path
187
197
 
188
- def flush_img(self, index: object, filepath: Optional[str] = None) -> None:
198
+ def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
189
199
  """Discard the in-memory value and save it to a local file, if filepath is not None"""
190
200
  if self.vals[index] is None:
191
201
  return
@@ -195,7 +205,12 @@ class DataRow:
195
205
  # we want to save this to a file
196
206
  self.file_paths[index] = filepath
197
207
  self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
198
- self.vals[index].save(filepath, format='JPEG')
208
+ image = self.vals[index]
209
+ assert isinstance(image, PIL.Image.Image)
210
+ # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
211
+ # In that case, use WebP instead.
212
+ format = 'webp' if image.has_transparency_data else 'jpeg'
213
+ image.save(filepath, format=format)
199
214
  else:
200
215
  # we discard the content of this cell
201
216
  self.has_val[index] = False
pixeltable/exprs/expr.py CHANGED
@@ -7,10 +7,11 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Type, TypeVar, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import sqlalchemy as sql
14
+ from typing_extensions import Self
14
15
 
15
16
  import pixeltable
16
17
  import pixeltable.catalog as catalog
@@ -21,6 +22,8 @@ import pixeltable.type_system as ts
21
22
  from .data_row import DataRow
22
23
  from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
23
24
 
25
+ if TYPE_CHECKING:
26
+ from pixeltable import exprs
24
27
 
25
28
  class ExprScope:
26
29
  """
@@ -49,23 +52,31 @@ class Expr(abc.ABC):
49
52
  - during eval(), components can only be accessed via self.components; any Exprs outside of that won't
50
53
  have slot_idx set
51
54
  """
52
- def __init__(self, col_type: ts.ColumnType):
53
- self.col_type = col_type
54
55
 
55
- # each instance has an id that is used for equality comparisons
56
- # - set by the subclass's __init__()
57
- # - produced by _create_id()
58
- # - not expected to survive a serialize()/deserialize() roundtrip
59
- self.id: Optional[int] = None
56
+ col_type: ts.ColumnType
57
+
58
+ # the subexprs are needed to construct this expr
59
+ components: list[Expr]
60
+
61
+ # each instance has an id that is used for equality comparisons
62
+ # - set by the subclass's __init__()
63
+ # - produced by _create_id()
64
+ # - not expected to survive a serialize()/deserialize() roundtrip
65
+ id: Optional[int]
60
66
 
61
- # index of the expr's value in the data row:
62
- # - set for all materialized exprs
63
- # - None: not executable
64
- # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
65
- self.slot_idx: Optional[int] = None
66
- self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
67
+ # index of the expr's value in the data row:
68
+ # - set for all materialized exprs
69
+ # - None: not executable
70
+ # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
71
+ slot_idx: Optional[int]
72
+
73
+ def __init__(self, col_type: ts.ColumnType):
74
+ self.col_type = col_type
75
+ self.components = []
76
+ self.id = None
77
+ self.slot_idx = None
67
78
 
68
- def dependencies(self) -> List[Expr]:
79
+ def dependencies(self) -> list[Expr]:
69
80
  """
70
81
  Returns all exprs that need to have been evaluated before eval() can be called on this one.
71
82
  """
@@ -115,7 +126,7 @@ class Expr(abc.ABC):
115
126
  # override this
116
127
  return True
117
128
 
118
- def _id_attrs(self) -> List[Tuple[str, Any]]:
129
+ def _id_attrs(self) -> list[tuple[str, Any]]:
119
130
  """Returns attribute name/value pairs that are used to construct the instance id.
120
131
 
121
132
  Attribute values must be immutable and have str() defined.
@@ -137,7 +148,7 @@ class Expr(abc.ABC):
137
148
  return self.id
138
149
 
139
150
  @classmethod
140
- def list_equals(cls, a: List[Expr], b: List[Expr]) -> bool:
151
+ def list_equals(cls, a: list[Expr], b: list[Expr]) -> bool:
141
152
  if len(a) != len(b):
142
153
  return False
143
154
  for i in range(len(a)):
@@ -158,7 +169,7 @@ class Expr(abc.ABC):
158
169
  return result
159
170
 
160
171
  @classmethod
161
- def copy_list(cls, expr_list: Optional[List[Expr]]) -> Optional[List[Expr]]:
172
+ def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
162
173
  if expr_list is None:
163
174
  return None
164
175
  return [e.copy() for e in expr_list]
@@ -183,11 +194,11 @@ class Expr(abc.ABC):
183
194
  return self
184
195
 
185
196
  @classmethod
186
- def list_substitute(cls, expr_list: List[Expr], spec: dict[Expr, Expr]) -> None:
197
+ def list_substitute(cls, expr_list: list[Expr], spec: dict[Expr, Expr]) -> None:
187
198
  for i in range(len(expr_list)):
188
199
  expr_list[i] = expr_list[i].substitute(spec)
189
200
 
190
- def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
201
+ def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
191
202
  """
192
203
  Recursively replace ColRefs to unstored computed columns with their value exprs.
193
204
  Also replaces references to stored computed columns in resolve_cols.
@@ -215,12 +226,12 @@ class Expr(abc.ABC):
215
226
  return False
216
227
  return True
217
228
 
218
- def retarget(self, tbl: catalog.TableVersionPath) -> Expr:
229
+ def retarget(self, tbl: catalog.TableVersionPath) -> Self:
219
230
  """Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
220
231
  tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
221
232
  return self._retarget(tbl_versions)
222
233
 
223
- def _retarget(self, tbl_versions: Dict[UUID, catalog.TableVersion]) -> Expr:
234
+ def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
224
235
  from .column_ref import ColumnRef
225
236
  if isinstance(self, ColumnRef):
226
237
  target = tbl_versions[self.col.tbl.id]
@@ -299,7 +310,7 @@ class Expr(abc.ABC):
299
310
  for e in expr_list:
300
311
  yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
301
312
 
302
- def _contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
313
+ def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
303
314
  """
304
315
  Returns True if any subexpr is an instance of cls.
305
316
  """
@@ -312,15 +323,15 @@ class Expr(abc.ABC):
312
323
  except StopIteration:
313
324
  return False
314
325
 
315
- def tbl_ids(self) -> Set[UUID]:
326
+ def tbl_ids(self) -> set[UUID]:
316
327
  """Returns table ids referenced by this expr."""
317
328
  from .column_ref import ColumnRef
318
329
  from .rowid_ref import RowidRef
319
330
  return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
320
331
 
321
332
  @classmethod
322
- def list_tbl_ids(cls, expr_list: List[Expr]) -> Set[UUID]:
323
- ids: Set[UUID] = set()
333
+ def list_tbl_ids(cls, expr_list: list[Expr]) -> set[UUID]:
334
+ ids: set[UUID] = set()
324
335
  for e in expr_list:
325
336
  ids.update(e.tbl_ids())
326
337
  return ids
@@ -362,7 +373,7 @@ class Expr(abc.ABC):
362
373
  return None
363
374
 
364
375
  @abc.abstractmethod
365
- def sql_expr(self) -> Optional[sql.ColumnElement]:
376
+ def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
366
377
  """
367
378
  If this expr can be materialized directly in SQL:
368
379
  - returns a ColumnElement
@@ -389,14 +400,14 @@ class Expr(abc.ABC):
389
400
  c.release()
390
401
 
391
402
  @classmethod
392
- def release_list(cls, expr_list: List[Expr]) -> None:
403
+ def release_list(cls, expr_list: list[Expr]) -> None:
393
404
  for e in expr_list:
394
405
  e.release()
395
406
 
396
407
  def serialize(self) -> str:
397
408
  return json.dumps(self.as_dict())
398
409
 
399
- def as_dict(self) -> Dict:
410
+ def as_dict(self) -> dict:
400
411
  """
401
412
  Turn Expr object into a dict that can be passed to json.dumps().
402
413
  Subclasses override _as_dict().
@@ -407,10 +418,10 @@ class Expr(abc.ABC):
407
418
  }
408
419
 
409
420
  @classmethod
410
- def as_dict_list(self, expr_list: List[Expr]) -> List[Dict]:
421
+ def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
411
422
  return [e.as_dict() for e in expr_list]
412
423
 
413
- def _as_dict(self) -> Dict:
424
+ def _as_dict(self) -> dict:
414
425
  if len(self.components) > 0:
415
426
  return {'components': [c.as_dict() for c in self.components]}
416
427
  return {}
@@ -420,24 +431,24 @@ class Expr(abc.ABC):
420
431
  return cls.from_dict(json.loads(dict_str))
421
432
 
422
433
  @classmethod
423
- def from_dict(cls, d: Dict) -> Expr:
434
+ def from_dict(cls, d: dict) -> Self:
424
435
  """
425
436
  Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
426
437
  """
427
438
  assert '_classname' in d
428
439
  exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
429
440
  type_class = getattr(exprs_module, d['_classname'])
430
- components: List[Expr] = []
441
+ components: list[Expr] = []
431
442
  if 'components' in d:
432
443
  components = [cls.from_dict(component_dict) for component_dict in d['components']]
433
444
  return type_class._from_dict(d, components)
434
445
 
435
446
  @classmethod
436
- def from_dict_list(cls, dict_list: List[Dict]) -> List[Expr]:
447
+ def from_dict_list(cls, dict_list: list[dict]) -> list[Expr]:
437
448
  return [cls.from_dict(d) for d in dict_list]
438
449
 
439
450
  @classmethod
440
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
451
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
441
452
  assert False, 'not implemented'
442
453
 
443
454
  def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
@@ -1,25 +1,36 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, Dict, Iterable, Iterator
2
+
3
+ from typing import Optional, Iterable, Iterator
3
4
 
4
5
  from .expr import Expr
5
6
 
6
7
 
7
8
  class ExprSet:
8
- """A set that also supports indexed lookup (by slot_idx and Expr.id)"""
9
+ """
10
+ A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
11
+ """
12
+ exprs: dict[int, Expr] # key: Expr.id
13
+ exprs_by_idx: dict[int, Expr] # key: slot_idx
14
+
9
15
  def __init__(self, elements: Optional[Iterable[Expr]] = None):
10
- self.exprs: Dict[int, Expr] = {} # Expr.id -> Expr
16
+ self.exprs = {}
17
+ self.exprs_by_idx = {}
11
18
  if elements is not None:
12
19
  for e in elements:
13
- self.append(e)
20
+ self.add(e)
14
21
 
15
- def append(self, expr: Expr) -> None:
22
+ def add(self, expr: Expr) -> None:
16
23
  if expr.id in self.exprs:
17
24
  return
18
25
  self.exprs[expr.id] = expr
26
+ if expr.slot_idx is None:
27
+ return
28
+ self.exprs_by_idx[expr.slot_idx] = expr
19
29
 
20
- def extend(self, elements: Iterable[Expr]) -> None:
21
- for e in elements:
22
- self.append(e)
30
+ def update(self, *others: Iterable[Expr]) -> None:
31
+ for other in others:
32
+ for e in other:
33
+ self.add(e)
23
34
 
24
35
  def __contains__(self, item: Expr) -> bool:
25
36
  return item.id in self.exprs
@@ -31,9 +42,21 @@ class ExprSet:
31
42
  return iter(self.exprs.values())
32
43
 
33
44
  def __getitem__(self, index: object) -> Optional[Expr]:
45
+ """Indexed lookup by slot_idx or Expr.id."""
46
+ if not isinstance(index, int) and not isinstance(index, Expr):
47
+ pass
34
48
  assert isinstance(index, int) or isinstance(index, Expr)
35
49
  if isinstance(index, int):
36
50
  # return expr with matching slot_idx
37
- return list(self.exprs.values())[index]
51
+ return self.exprs_by_idx.get(index)
38
52
  else:
39
53
  return self.exprs.get(index.id)
54
+
55
+ def issuperset(self, other: ExprSet) -> bool:
56
+ return self.exprs.keys() >= other.exprs.keys()
57
+
58
+ def __ge__(self, other: ExprSet) -> bool:
59
+ return self.issuperset(other)
60
+
61
+ def __le__(self, other: ExprSet) -> bool:
62
+ return other.issuperset(self)
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import inspect
4
4
  import json
5
5
  import sys
6
- from typing import Optional, List, Any, Dict, Tuple
6
+ from typing import Optional, Any
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
@@ -17,28 +17,34 @@ from .inline_array import InlineArray
17
17
  from .inline_dict import InlineDict
18
18
  from .row_builder import RowBuilder
19
19
  from .rowid_ref import RowidRef
20
+ from .sql_element_cache import SqlElementCache
20
21
 
21
22
 
22
23
  class FunctionCall(Expr):
23
24
 
24
25
  fn: func.Function
25
26
  is_method_call: bool
26
- agg_init_args: Dict[str, Any]
27
- args: List[Tuple[Optional[int], Optional[Any]]]
28
- kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]]
29
- arg_types: List[ts.ColumnType]
30
- kwarg_types: Dict[str, ts.ColumnType]
27
+ agg_init_args: dict[str, Any]
28
+
29
+ # tuple[Optional[int], Optional[Any]]:
30
+ # - for Exprs: (index into components, None)
31
+ # - otherwise: (None, val)
32
+ args: list[tuple[Optional[int], Optional[Any]]]
33
+ kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
34
+
35
+ arg_types: list[ts.ColumnType]
36
+ kwarg_types: dict[str, ts.ColumnType]
31
37
  group_by_start_idx: int
32
38
  group_by_stop_idx: int
33
39
  fn_expr_idx: int
34
40
  order_by_start_idx: int
35
41
  constant_args: set[str]
36
42
  aggregator: Optional[Any]
37
- current_partition_vals: Optional[List[Any]]
43
+ current_partition_vals: Optional[list[Any]]
38
44
 
39
45
  def __init__(
40
- self, fn: func.Function, bound_args: Dict[str, Any], order_by_clause: Optional[List[Any]] = None,
41
- group_by_clause: Optional[List[Any]] = None, is_method_call: bool = False):
46
+ self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
47
+ group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
42
48
  if order_by_clause is None:
43
49
  order_by_clause = []
44
50
  if group_by_clause is None:
@@ -58,10 +64,6 @@ class FunctionCall(Expr):
58
64
  bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
59
65
 
60
66
  # construct components, args, kwargs
61
-
62
- # Tuple[int, Any]:
63
- # - for Exprs: (index into components, None)
64
- # - otherwise: (None, val)
65
67
  self.args = []
66
68
  self.kwargs = {}
67
69
 
@@ -131,7 +133,7 @@ class FunctionCall(Expr):
131
133
 
132
134
  self.id = self._create_id()
133
135
 
134
- def _create_rowid_refs(self, tbl: catalog.Table) -> List[Expr]:
136
+ def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
135
137
  target = tbl._tbl_version_path.tbl_version
136
138
  return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
137
139
 
@@ -141,7 +143,7 @@ class FunctionCall(Expr):
141
143
  return super().default_column_name()
142
144
 
143
145
  @classmethod
144
- def normalize_args(cls, signature: func.Signature, bound_args: Dict[str, Any]) -> None:
146
+ def normalize_args(cls, signature: func.Signature, bound_args: dict[str, Any]) -> None:
145
147
  """Converts all args to Exprs and checks that they are compatible with signature.
146
148
 
147
149
  Updates bound_args in place, where necessary.
@@ -232,7 +234,7 @@ class FunctionCall(Expr):
232
234
  return False
233
235
  return True
234
236
 
235
- def _id_attrs(self) -> List[Tuple[str, Any]]:
237
+ def _id_attrs(self) -> list[tuple[str, Any]]:
236
238
  return super()._id_attrs() + [
237
239
  ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
238
240
  ('args', self.args),
@@ -253,11 +255,11 @@ class FunctionCall(Expr):
253
255
  return f'{fn_name}({self._print_args()})'
254
256
 
255
257
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
258
+ def print_arg(arg: Any) -> str:
259
+ return repr(arg) if isinstance(arg, str) else str(arg)
256
260
  arg_strs = [
257
- str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
261
+ print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
258
262
  ]
259
- def print_arg(arg: Any) -> str:
260
- return f"'{arg}'" if isinstance(arg, str) else str(arg)
261
263
  arg_strs.extend([
262
264
  f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
263
265
  for param_name, (idx, arg) in self.kwargs.items()
@@ -273,15 +275,15 @@ class FunctionCall(Expr):
273
275
  separator = ', ' if inline else ',\n '
274
276
  return separator.join(arg_strs)
275
277
 
276
- def has_group_by(self) -> List[Expr]:
278
+ def has_group_by(self) -> list[Expr]:
277
279
  return self.group_by_stop_idx != 0
278
280
 
279
281
  @property
280
- def group_by(self) -> List[Expr]:
282
+ def group_by(self) -> list[Expr]:
281
283
  return self.components[self.group_by_start_idx:self.group_by_stop_idx]
282
284
 
283
285
  @property
284
- def order_by(self) -> List[Expr]:
286
+ def order_by(self) -> list[Expr]:
285
287
  return self.components[self.order_by_start_idx:]
286
288
 
287
289
  @property
@@ -291,20 +293,42 @@ class FunctionCall(Expr):
291
293
  or self.has_group_by() \
292
294
  or (len(self.order_by) > 0 and not self.fn.requires_order_by))
293
295
 
294
- def get_window_sort_exprs(self) -> Tuple[List[Expr], List[Expr]]:
296
+ def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
295
297
  return self.group_by, self.order_by
296
298
 
297
299
  @property
298
300
  def is_agg_fn_call(self) -> bool:
299
301
  return isinstance(self.fn, func.AggregateFunction)
300
302
 
301
- def get_agg_order_by(self) -> List[Expr]:
303
+ def get_agg_order_by(self) -> list[Expr]:
302
304
  assert self.is_agg_fn_call
303
305
  return self.order_by
304
306
 
305
- def sql_expr(self) -> Optional[sql.ClauseElement]:
306
- # TODO: implement for standard aggregate functions
307
- return None
307
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
308
+ # try to construct args and kwargs to call self.fn._to_sql()
309
+ kwargs: dict[str, sql.ColumnElement] = {}
310
+ for param_name, (component_idx, arg) in self.kwargs.items():
311
+ param = self.fn.signature.parameters[param_name]
312
+ assert param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD
313
+ if component_idx is None:
314
+ kwargs[param_name] = sql.literal(arg)
315
+ else:
316
+ arg_element = sql_elements.get(self.components[component_idx])
317
+ if arg_element is None:
318
+ return None
319
+ kwargs[param_name] = arg_element
320
+
321
+ args: list[sql.ColumnElement] = []
322
+ for _, (component_idx, arg) in enumerate(self.args):
323
+ if component_idx is None:
324
+ args.append(sql.literal(arg))
325
+ else:
326
+ arg_element = sql_elements.get(self.components[component_idx])
327
+ if arg_element is None:
328
+ return None
329
+ args.append(arg_element)
330
+ result = self.fn._to_sql(*args, **kwargs)
331
+ return result
308
332
 
309
333
  def reset_agg(self) -> None:
310
334
  """
@@ -322,9 +346,9 @@ class FunctionCall(Expr):
322
346
  args, kwargs = self._make_args(data_row)
323
347
  self.aggregator.update(*args, **kwargs)
324
348
 
325
- def _make_args(self, data_row: DataRow) -> Tuple[List[Any], Dict[str, Any]]:
349
+ def _make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]]:
326
350
  """Return args and kwargs, constructed for data_row"""
327
- kwargs: Dict[str, Any] = {}
351
+ kwargs: dict[str, Any] = {}
328
352
  for param_name, (component_idx, arg) in self.kwargs.items():
329
353
  val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
330
354
  param = self.fn.signature.parameters[param_name]
@@ -335,7 +359,7 @@ class FunctionCall(Expr):
335
359
  assert param.kind != inspect.Parameter.VAR_POSITIONAL
336
360
  kwargs[param_name] = val
337
361
 
338
- args: List[Any] = []
362
+ args: list[Any] = []
339
363
  for param_idx, (component_idx, arg) in enumerate(self.args):
340
364
  val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
341
365
  param = self.fn.signature.parameters_by_pos[param_idx]
@@ -393,7 +417,7 @@ class FunctionCall(Expr):
393
417
  else:
394
418
  data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
395
419
 
396
- def _as_dict(self) -> Dict:
420
+ def _as_dict(self) -> dict:
397
421
  result = {
398
422
  'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
399
423
  'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
@@ -403,7 +427,7 @@ class FunctionCall(Expr):
403
427
  return result
404
428
 
405
429
  @classmethod
406
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
430
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
407
431
  assert 'fn' in d
408
432
  assert 'args' in d
409
433
  assert 'kwargs' in d
@@ -5,6 +5,7 @@ from typing import Optional, List, Any, Dict, Tuple, Iterable
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
+ from .sql_element_cache import SqlElementCache
8
9
  import pixeltable.type_system as ts
9
10
  from .data_row import DataRow
10
11
  from .expr import Expr
@@ -70,8 +71,8 @@ class InPredicate(Expr):
70
71
  def _id_attrs(self) -> List[Tuple[str, Any]]:
71
72
  return super()._id_attrs() + [('value_list', self.value_list)]
72
73
 
73
- def sql_expr(self) -> Optional[sql.ClauseElement]:
74
- lhs_sql_exprs = self.components[0].sql_expr()
74
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
75
+ lhs_sql_exprs = sql_elements.get(self.components[0])
75
76
  if lhs_sql_exprs is None or self.value_list is None:
76
77
  return None
77
78
  return lhs_sql_exprs.in_(self.value_list)
@@ -11,6 +11,7 @@ from .data_row import DataRow
11
11
  from .expr import Expr
12
12
  from .inline_dict import InlineDict
13
13
  from .row_builder import RowBuilder
14
+ from .sql_element_cache import SqlElementCache
14
15
 
15
16
 
16
17
  class InlineArray(Expr):
@@ -82,7 +83,7 @@ class InlineArray(Expr):
82
83
  def _id_attrs(self) -> List[Tuple[str, Any]]:
83
84
  return super()._id_attrs() + [('elements', self.elements)]
84
85
 
85
- def sql_expr(self) -> Optional[sql.ClauseElement]:
86
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
86
87
  return None
87
88
 
88
89
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -10,6 +10,7 @@ import pixeltable.type_system as ts
10
10
  from .data_row import DataRow
11
11
  from .expr import Expr
12
12
  from .row_builder import RowBuilder
13
+ from .sql_element_cache import SqlElementCache
13
14
 
14
15
 
15
16
  class InlineDict(Expr):
@@ -73,7 +74,7 @@ class InlineDict(Expr):
73
74
  """Return the original dict used to construct this"""
74
75
  return {key: val if idx is None else self.components[idx] for key, idx, val in self.dict_items}
75
76
 
76
- def sql_expr(self) -> Optional[sql.ClauseElement]:
77
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
77
78
  return None
78
79
 
79
80
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -8,6 +8,7 @@ import pixeltable.type_system as ts
8
8
  from .data_row import DataRow
9
9
  from .expr import Expr
10
10
  from .row_builder import RowBuilder
11
+ from .sql_element_cache import SqlElementCache
11
12
 
12
13
 
13
14
  class IsNull(Expr):
@@ -22,8 +23,8 @@ class IsNull(Expr):
22
23
  def _equals(self, other: IsNull) -> bool:
23
24
  return True
24
25
 
25
- def sql_expr(self) -> Optional[sql.ClauseElement]:
26
- e = self.components[0].sql_expr()
26
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
27
+ e = sql_elements.get(self.components[0])
27
28
  if e is None:
28
29
  return None
29
30
  return e == None
@@ -1,13 +1,14 @@
1
1
  from __future__ import annotations
2
+
2
3
  from typing import Optional, List, Dict
3
4
 
4
5
  import sqlalchemy as sql
5
6
 
6
- from .expr import Expr, ExprScope, _GLOBAL_SCOPE
7
+ import pixeltable.type_system as ts
7
8
  from .data_row import DataRow
9
+ from .expr import Expr, ExprScope, _GLOBAL_SCOPE
8
10
  from .row_builder import RowBuilder
9
- import pixeltable.catalog as catalog
10
- import pixeltable.type_system as ts
11
+ from .sql_element_cache import SqlElementCache
11
12
 
12
13
 
13
14
  class JsonMapper(Expr):
@@ -86,7 +87,7 @@ class JsonMapper(Expr):
86
87
  def _equals(self, other: JsonMapper) -> bool:
87
88
  return True
88
89
 
89
- def sql_expr(self) -> Optional[sql.ClauseElement]:
90
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
90
91
  return None
91
92
 
92
93
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None: