pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +8 -7
  4. pixeltable/catalog/column.py +11 -8
  5. pixeltable/catalog/insertable_table.py +1 -1
  6. pixeltable/catalog/path_dict.py +8 -6
  7. pixeltable/catalog/table.py +20 -14
  8. pixeltable/catalog/table_version.py +92 -55
  9. pixeltable/catalog/table_version_path.py +7 -9
  10. pixeltable/catalog/view.py +3 -2
  11. pixeltable/dataframe.py +2 -2
  12. pixeltable/env.py +205 -86
  13. pixeltable/exceptions.py +5 -1
  14. pixeltable/exec/aggregation_node.py +2 -1
  15. pixeltable/exec/component_iteration_node.py +2 -2
  16. pixeltable/exec/sql_node.py +11 -8
  17. pixeltable/exprs/__init__.py +2 -2
  18. pixeltable/exprs/arithmetic_expr.py +4 -4
  19. pixeltable/exprs/array_slice.py +2 -1
  20. pixeltable/exprs/column_property_ref.py +9 -7
  21. pixeltable/exprs/column_ref.py +2 -1
  22. pixeltable/exprs/comparison.py +10 -7
  23. pixeltable/exprs/compound_predicate.py +3 -2
  24. pixeltable/exprs/data_row.py +19 -4
  25. pixeltable/exprs/expr.py +51 -41
  26. pixeltable/exprs/expr_set.py +32 -9
  27. pixeltable/exprs/function_call.py +62 -40
  28. pixeltable/exprs/in_predicate.py +3 -2
  29. pixeltable/exprs/inline_expr.py +200 -0
  30. pixeltable/exprs/is_null.py +3 -2
  31. pixeltable/exprs/json_mapper.py +5 -4
  32. pixeltable/exprs/json_path.py +7 -1
  33. pixeltable/exprs/literal.py +34 -7
  34. pixeltable/exprs/method_ref.py +3 -3
  35. pixeltable/exprs/object_ref.py +6 -5
  36. pixeltable/exprs/row_builder.py +25 -17
  37. pixeltable/exprs/rowid_ref.py +2 -1
  38. pixeltable/exprs/similarity_expr.py +2 -1
  39. pixeltable/exprs/sql_element_cache.py +30 -0
  40. pixeltable/exprs/type_cast.py +3 -3
  41. pixeltable/exprs/variable.py +2 -1
  42. pixeltable/ext/functions/whisperx.py +6 -4
  43. pixeltable/ext/functions/yolox.py +11 -9
  44. pixeltable/func/aggregate_function.py +1 -0
  45. pixeltable/func/function.py +28 -4
  46. pixeltable/functions/__init__.py +4 -2
  47. pixeltable/functions/anthropic.py +15 -5
  48. pixeltable/functions/fireworks.py +1 -1
  49. pixeltable/functions/globals.py +6 -1
  50. pixeltable/functions/huggingface.py +91 -14
  51. pixeltable/functions/image.py +20 -5
  52. pixeltable/functions/json.py +5 -5
  53. pixeltable/functions/mistralai.py +188 -0
  54. pixeltable/functions/openai.py +6 -10
  55. pixeltable/functions/string.py +3 -2
  56. pixeltable/functions/timestamp.py +95 -7
  57. pixeltable/functions/together.py +18 -11
  58. pixeltable/functions/video.py +2 -2
  59. pixeltable/functions/vision.py +69 -37
  60. pixeltable/functions/whisper.py +4 -1
  61. pixeltable/globals.py +5 -1
  62. pixeltable/io/hf_datasets.py +17 -15
  63. pixeltable/io/pandas.py +0 -2
  64. pixeltable/io/parquet.py +15 -14
  65. pixeltable/iterators/document.py +16 -15
  66. pixeltable/metadata/__init__.py +1 -1
  67. pixeltable/metadata/converters/convert_18.py +1 -1
  68. pixeltable/metadata/converters/convert_19.py +46 -0
  69. pixeltable/metadata/converters/convert_20.py +56 -0
  70. pixeltable/metadata/converters/util.py +29 -4
  71. pixeltable/metadata/notes.py +2 -0
  72. pixeltable/metadata/schema.py +5 -4
  73. pixeltable/plan.py +100 -78
  74. pixeltable/store.py +5 -1
  75. pixeltable/tool/create_test_db_dump.py +18 -6
  76. pixeltable/type_system.py +15 -15
  77. pixeltable/utils/documents.py +45 -42
  78. pixeltable/utils/formatter.py +2 -2
  79. pixeltable-0.2.19.dist-info/LICENSE +201 -0
  80. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
  81. pixeltable-0.2.19.dist-info/RECORD +147 -0
  82. pixeltable/exprs/inline_array.py +0 -116
  83. pixeltable/exprs/inline_dict.py +0 -103
  84. pixeltable-0.2.17.dist-info/LICENSE +0 -18
  85. pixeltable-0.2.17.dist-info/RECORD +0 -144
  86. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
  87. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0
@@ -9,6 +9,7 @@ from .data_row import DataRow
9
9
  from .expr import Expr
10
10
  from .globals import LogicalOperator
11
11
  from .row_builder import RowBuilder
12
+ from .sql_element_cache import SqlElementCache
12
13
  import pixeltable.type_system as ts
13
14
 
14
15
 
@@ -66,8 +67,8 @@ class CompoundPredicate(Expr):
66
67
  non_matches = [op for op in self.components if not condition(op)]
67
68
  return (matches, self.make_conjunction(non_matches))
68
69
 
69
- def sql_expr(self) -> Optional[sql.ClauseElement]:
70
- sql_exprs = [op.sql_expr() for op in self.components]
70
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
71
+ sql_exprs = [sql_elements.get(op) for op in self.components]
71
72
  if any(e is None for e in sql_exprs):
72
73
  return None
73
74
  if self.operator == LogicalOperator.NOT:
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import datetime
3
4
  import io
4
5
  import urllib.parse
5
6
  import urllib.request
@@ -8,8 +9,11 @@ from typing import Optional, List, Any, Tuple
8
9
  import sqlalchemy as sql
9
10
  import pgvector.sqlalchemy
10
11
  import PIL
12
+ import PIL.Image
11
13
  import numpy as np
12
14
 
15
+ from pixeltable import env
16
+
13
17
 
14
18
  class DataRow:
15
19
  """
@@ -101,6 +105,7 @@ class DataRow:
101
105
 
102
106
  def __getitem__(self, index: object) -> Any:
103
107
  """Returns in-memory value, ie, what is needed for expr evaluation"""
108
+ assert isinstance(index, int)
104
109
  if not self.has_val[index]:
105
110
  # for debugging purposes
106
111
  pass
@@ -115,7 +120,7 @@ class DataRow:
115
120
 
116
121
  return self.vals[index]
117
122
 
118
- def get_stored_val(self, index: object, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
123
+ def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
119
124
  """Return the value that gets stored in the db"""
120
125
  assert self.excs[index] is None
121
126
  if not self.has_val[index]:
@@ -140,12 +145,17 @@ class DataRow:
140
145
  if self.vals[index] is None and sa_col_type is not None and isinstance(sa_col_type, sql.JSON):
141
146
  return sql.sql.null()
142
147
 
148
+ if isinstance(self.vals[index], datetime.datetime) and self.vals[index].tzinfo is None:
149
+ # if the datetime is naive, cast it to the default time zone
150
+ return self.vals[index].replace(tzinfo=env.Env.get().default_time_zone)
151
+
143
152
  return self.vals[index]
144
153
 
145
154
  def __setitem__(self, idx: object, val: Any) -> None:
146
155
  """Assign in-memory cell value
147
156
  This allows overwriting
148
157
  """
158
+ assert isinstance(idx, int)
149
159
  assert self.excs[idx] is None
150
160
 
151
161
  if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
@@ -177,7 +187,7 @@ class DataRow:
177
187
  self.vals[idx] = val
178
188
  self.has_val[idx] = True
179
189
 
180
- def set_file_path(self, idx: object, path: str) -> None:
190
+ def set_file_path(self, idx: int, path: str) -> None:
181
191
  """Augment an existing url with a local file path"""
182
192
  assert self.has_val[idx]
183
193
  assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
@@ -185,7 +195,7 @@ class DataRow:
185
195
  if idx in self.media_slot_idxs:
186
196
  self.vals[idx] = path
187
197
 
188
- def flush_img(self, index: object, filepath: Optional[str] = None) -> None:
198
+ def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
189
199
  """Discard the in-memory value and save it to a local file, if filepath is not None"""
190
200
  if self.vals[index] is None:
191
201
  return
@@ -195,7 +205,12 @@ class DataRow:
195
205
  # we want to save this to a file
196
206
  self.file_paths[index] = filepath
197
207
  self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
198
- self.vals[index].save(filepath, format='JPEG')
208
+ image = self.vals[index]
209
+ assert isinstance(image, PIL.Image.Image)
210
+ # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
211
+ # In that case, use WebP instead.
212
+ format = 'webp' if image.has_transparency_data else 'jpeg'
213
+ image.save(filepath, format=format)
199
214
  else:
200
215
  # we discard the content of this cell
201
216
  self.has_val[index] = False
pixeltable/exprs/expr.py CHANGED
@@ -7,10 +7,11 @@ import inspect
7
7
  import json
8
8
  import sys
9
9
  import typing
10
- from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Type, TypeVar, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, TypeVar, Union, overload
11
11
  from uuid import UUID
12
12
 
13
13
  import sqlalchemy as sql
14
+ from typing_extensions import Self
14
15
 
15
16
  import pixeltable
16
17
  import pixeltable.catalog as catalog
@@ -21,6 +22,8 @@ import pixeltable.type_system as ts
21
22
  from .data_row import DataRow
22
23
  from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
23
24
 
25
+ if TYPE_CHECKING:
26
+ from pixeltable import exprs
24
27
 
25
28
  class ExprScope:
26
29
  """
@@ -49,23 +52,31 @@ class Expr(abc.ABC):
49
52
  - during eval(), components can only be accessed via self.components; any Exprs outside of that won't
50
53
  have slot_idx set
51
54
  """
52
- def __init__(self, col_type: ts.ColumnType):
53
- self.col_type = col_type
54
55
 
55
- # each instance has an id that is used for equality comparisons
56
- # - set by the subclass's __init__()
57
- # - produced by _create_id()
58
- # - not expected to survive a serialize()/deserialize() roundtrip
59
- self.id: Optional[int] = None
56
+ col_type: ts.ColumnType
57
+
58
+ # the subexprs are needed to construct this expr
59
+ components: list[Expr]
60
+
61
+ # each instance has an id that is used for equality comparisons
62
+ # - set by the subclass's __init__()
63
+ # - produced by _create_id()
64
+ # - not expected to survive a serialize()/deserialize() roundtrip
65
+ id: Optional[int]
60
66
 
61
- # index of the expr's value in the data row:
62
- # - set for all materialized exprs
63
- # - None: not executable
64
- # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
65
- self.slot_idx: Optional[int] = None
66
- self.components: List[Expr] = [] # the subexprs that are needed to construct this expr
67
+ # index of the expr's value in the data row:
68
+ # - set for all materialized exprs
69
+ # - None: not executable
70
+ # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
71
+ slot_idx: Optional[int]
72
+
73
+ def __init__(self, col_type: ts.ColumnType):
74
+ self.col_type = col_type
75
+ self.components = []
76
+ self.id = None
77
+ self.slot_idx = None
67
78
 
68
- def dependencies(self) -> List[Expr]:
79
+ def dependencies(self) -> list[Expr]:
69
80
  """
70
81
  Returns all exprs that need to have been evaluated before eval() can be called on this one.
71
82
  """
@@ -115,7 +126,7 @@ class Expr(abc.ABC):
115
126
  # override this
116
127
  return True
117
128
 
118
- def _id_attrs(self) -> List[Tuple[str, Any]]:
129
+ def _id_attrs(self) -> list[tuple[str, Any]]:
119
130
  """Returns attribute name/value pairs that are used to construct the instance id.
120
131
 
121
132
  Attribute values must be immutable and have str() defined.
@@ -137,7 +148,7 @@ class Expr(abc.ABC):
137
148
  return self.id
138
149
 
139
150
  @classmethod
140
- def list_equals(cls, a: List[Expr], b: List[Expr]) -> bool:
151
+ def list_equals(cls, a: list[Expr], b: list[Expr]) -> bool:
141
152
  if len(a) != len(b):
142
153
  return False
143
154
  for i in range(len(a)):
@@ -158,7 +169,7 @@ class Expr(abc.ABC):
158
169
  return result
159
170
 
160
171
  @classmethod
161
- def copy_list(cls, expr_list: Optional[List[Expr]]) -> Optional[List[Expr]]:
172
+ def copy_list(cls, expr_list: Optional[list[Expr]]) -> Optional[list[Expr]]:
162
173
  if expr_list is None:
163
174
  return None
164
175
  return [e.copy() for e in expr_list]
@@ -183,11 +194,11 @@ class Expr(abc.ABC):
183
194
  return self
184
195
 
185
196
  @classmethod
186
- def list_substitute(cls, expr_list: List[Expr], spec: dict[Expr, Expr]) -> None:
197
+ def list_substitute(cls, expr_list: list[Expr], spec: dict[Expr, Expr]) -> None:
187
198
  for i in range(len(expr_list)):
188
199
  expr_list[i] = expr_list[i].substitute(spec)
189
200
 
190
- def resolve_computed_cols(self, resolve_cols: Optional[Set[catalog.Column]] = None) -> Expr:
201
+ def resolve_computed_cols(self, resolve_cols: Optional[set[catalog.Column]] = None) -> Expr:
191
202
  """
192
203
  Recursively replace ColRefs to unstored computed columns with their value exprs.
193
204
  Also replaces references to stored computed columns in resolve_cols.
@@ -215,12 +226,12 @@ class Expr(abc.ABC):
215
226
  return False
216
227
  return True
217
228
 
218
- def retarget(self, tbl: catalog.TableVersionPath) -> Expr:
229
+ def retarget(self, tbl: catalog.TableVersionPath) -> Self:
219
230
  """Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
220
231
  tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
221
232
  return self._retarget(tbl_versions)
222
233
 
223
- def _retarget(self, tbl_versions: Dict[UUID, catalog.TableVersion]) -> Expr:
234
+ def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
224
235
  from .column_ref import ColumnRef
225
236
  if isinstance(self, ColumnRef):
226
237
  target = tbl_versions[self.col.tbl.id]
@@ -299,7 +310,7 @@ class Expr(abc.ABC):
299
310
  for e in expr_list:
300
311
  yield from e.subexprs(expr_class=expr_class, filter=filter, traverse_matches=traverse_matches)
301
312
 
302
- def _contains(self, cls: Optional[Type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
313
+ def _contains(self, cls: Optional[type[Expr]] = None, filter: Optional[Callable[[Expr], bool]] = None) -> bool:
303
314
  """
304
315
  Returns True if any subexpr is an instance of cls.
305
316
  """
@@ -312,15 +323,15 @@ class Expr(abc.ABC):
312
323
  except StopIteration:
313
324
  return False
314
325
 
315
- def tbl_ids(self) -> Set[UUID]:
326
+ def tbl_ids(self) -> set[UUID]:
316
327
  """Returns table ids referenced by this expr."""
317
328
  from .column_ref import ColumnRef
318
329
  from .rowid_ref import RowidRef
319
330
  return {ref.col.tbl.id for ref in self.subexprs(ColumnRef)} | {ref.tbl.id for ref in self.subexprs(RowidRef)}
320
331
 
321
332
  @classmethod
322
- def list_tbl_ids(cls, expr_list: List[Expr]) -> Set[UUID]:
323
- ids: Set[UUID] = set()
333
+ def list_tbl_ids(cls, expr_list: list[Expr]) -> set[UUID]:
334
+ ids: set[UUID] = set()
324
335
  for e in expr_list:
325
336
  ids.update(e.tbl_ids())
326
337
  return ids
@@ -345,15 +356,14 @@ class Expr(abc.ABC):
345
356
  """
346
357
  if isinstance(o, Expr):
347
358
  return o
348
- # Try to create a literal. We need to check for InlineArray/InlineDict
349
- # first, to prevent arrays from inappropriately being interpreted as JsonType
359
+ # Try to create a literal. We need to check for InlineList/InlineDict
360
+ # first, to prevent them from inappropriately being interpreted as JsonType
350
361
  # literals.
351
- # TODO: general cleanup of InlineArray/InlineDict
352
362
  if isinstance(o, list):
353
- from .inline_array import InlineArray
354
- return InlineArray(tuple(o))
363
+ from .inline_expr import InlineList
364
+ return InlineList(o)
355
365
  if isinstance(o, dict):
356
- from .inline_dict import InlineDict
366
+ from .inline_expr import InlineDict
357
367
  return InlineDict(o)
358
368
  obj_type = ts.ColumnType.infer_literal_type(o)
359
369
  if obj_type is not None:
@@ -362,7 +372,7 @@ class Expr(abc.ABC):
362
372
  return None
363
373
 
364
374
  @abc.abstractmethod
365
- def sql_expr(self) -> Optional[sql.ColumnElement]:
375
+ def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
366
376
  """
367
377
  If this expr can be materialized directly in SQL:
368
378
  - returns a ColumnElement
@@ -389,14 +399,14 @@ class Expr(abc.ABC):
389
399
  c.release()
390
400
 
391
401
  @classmethod
392
- def release_list(cls, expr_list: List[Expr]) -> None:
402
+ def release_list(cls, expr_list: list[Expr]) -> None:
393
403
  for e in expr_list:
394
404
  e.release()
395
405
 
396
406
  def serialize(self) -> str:
397
407
  return json.dumps(self.as_dict())
398
408
 
399
- def as_dict(self) -> Dict:
409
+ def as_dict(self) -> dict:
400
410
  """
401
411
  Turn Expr object into a dict that can be passed to json.dumps().
402
412
  Subclasses override _as_dict().
@@ -407,10 +417,10 @@ class Expr(abc.ABC):
407
417
  }
408
418
 
409
419
  @classmethod
410
- def as_dict_list(self, expr_list: List[Expr]) -> List[Dict]:
420
+ def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
411
421
  return [e.as_dict() for e in expr_list]
412
422
 
413
- def _as_dict(self) -> Dict:
423
+ def _as_dict(self) -> dict:
414
424
  if len(self.components) > 0:
415
425
  return {'components': [c.as_dict() for c in self.components]}
416
426
  return {}
@@ -420,24 +430,24 @@ class Expr(abc.ABC):
420
430
  return cls.from_dict(json.loads(dict_str))
421
431
 
422
432
  @classmethod
423
- def from_dict(cls, d: Dict) -> Expr:
433
+ def from_dict(cls, d: dict) -> Self:
424
434
  """
425
435
  Turn dict that was produced by calling Expr.as_dict() into an instance of the correct Expr subclass.
426
436
  """
427
437
  assert '_classname' in d
428
438
  exprs_module = importlib.import_module(cls.__module__.rsplit('.', 1)[0])
429
439
  type_class = getattr(exprs_module, d['_classname'])
430
- components: List[Expr] = []
440
+ components: list[Expr] = []
431
441
  if 'components' in d:
432
442
  components = [cls.from_dict(component_dict) for component_dict in d['components']]
433
443
  return type_class._from_dict(d, components)
434
444
 
435
445
  @classmethod
436
- def from_dict_list(cls, dict_list: List[Dict]) -> List[Expr]:
446
+ def from_dict_list(cls, dict_list: list[dict]) -> list[Expr]:
437
447
  return [cls.from_dict(d) for d in dict_list]
438
448
 
439
449
  @classmethod
440
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
450
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
441
451
  assert False, 'not implemented'
442
452
 
443
453
  def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
@@ -1,25 +1,36 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, Dict, Iterable, Iterator
2
+
3
+ from typing import Optional, Iterable, Iterator
3
4
 
4
5
  from .expr import Expr
5
6
 
6
7
 
7
8
  class ExprSet:
8
- """A set that also supports indexed lookup (by slot_idx and Expr.id)"""
9
+ """
10
+ A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
11
+ """
12
+ exprs: dict[int, Expr] # key: Expr.id
13
+ exprs_by_idx: dict[int, Expr] # key: slot_idx
14
+
9
15
  def __init__(self, elements: Optional[Iterable[Expr]] = None):
10
- self.exprs: Dict[int, Expr] = {} # Expr.id -> Expr
16
+ self.exprs = {}
17
+ self.exprs_by_idx = {}
11
18
  if elements is not None:
12
19
  for e in elements:
13
- self.append(e)
20
+ self.add(e)
14
21
 
15
- def append(self, expr: Expr) -> None:
22
+ def add(self, expr: Expr) -> None:
16
23
  if expr.id in self.exprs:
17
24
  return
18
25
  self.exprs[expr.id] = expr
26
+ if expr.slot_idx is None:
27
+ return
28
+ self.exprs_by_idx[expr.slot_idx] = expr
19
29
 
20
- def extend(self, elements: Iterable[Expr]) -> None:
21
- for e in elements:
22
- self.append(e)
30
+ def update(self, *others: Iterable[Expr]) -> None:
31
+ for other in others:
32
+ for e in other:
33
+ self.add(e)
23
34
 
24
35
  def __contains__(self, item: Expr) -> bool:
25
36
  return item.id in self.exprs
@@ -31,9 +42,21 @@ class ExprSet:
31
42
  return iter(self.exprs.values())
32
43
 
33
44
  def __getitem__(self, index: object) -> Optional[Expr]:
45
+ """Indexed lookup by slot_idx or Expr.id."""
46
+ if not isinstance(index, int) and not isinstance(index, Expr):
47
+ pass
34
48
  assert isinstance(index, int) or isinstance(index, Expr)
35
49
  if isinstance(index, int):
36
50
  # return expr with matching slot_idx
37
- return list(self.exprs.values())[index]
51
+ return self.exprs_by_idx.get(index)
38
52
  else:
39
53
  return self.exprs.get(index.id)
54
+
55
+ def issuperset(self, other: ExprSet) -> bool:
56
+ return self.exprs.keys() >= other.exprs.keys()
57
+
58
+ def __ge__(self, other: ExprSet) -> bool:
59
+ return self.issuperset(other)
60
+
61
+ def __le__(self, other: ExprSet) -> bool:
62
+ return other.issuperset(self)
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import inspect
4
4
  import json
5
5
  import sys
6
- from typing import Optional, List, Any, Dict, Tuple
6
+ from typing import Any, Optional
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
@@ -11,34 +11,40 @@ import pixeltable.catalog as catalog
11
11
  import pixeltable.exceptions as excs
12
12
  import pixeltable.func as func
13
13
  import pixeltable.type_system as ts
14
+
14
15
  from .data_row import DataRow
15
16
  from .expr import Expr
16
- from .inline_array import InlineArray
17
- from .inline_dict import InlineDict
17
+ from .inline_expr import InlineDict, InlineList
18
18
  from .row_builder import RowBuilder
19
19
  from .rowid_ref import RowidRef
20
+ from .sql_element_cache import SqlElementCache
20
21
 
21
22
 
22
23
  class FunctionCall(Expr):
23
24
 
24
25
  fn: func.Function
25
26
  is_method_call: bool
26
- agg_init_args: Dict[str, Any]
27
- args: List[Tuple[Optional[int], Optional[Any]]]
28
- kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]]
29
- arg_types: List[ts.ColumnType]
30
- kwarg_types: Dict[str, ts.ColumnType]
27
+ agg_init_args: dict[str, Any]
28
+
29
+ # tuple[Optional[int], Optional[Any]]:
30
+ # - for Exprs: (index into components, None)
31
+ # - otherwise: (None, val)
32
+ args: list[tuple[Optional[int], Optional[Any]]]
33
+ kwargs: dict[str, tuple[Optional[int], Optional[Any]]]
34
+
35
+ arg_types: list[ts.ColumnType]
36
+ kwarg_types: dict[str, ts.ColumnType]
31
37
  group_by_start_idx: int
32
38
  group_by_stop_idx: int
33
39
  fn_expr_idx: int
34
40
  order_by_start_idx: int
35
41
  constant_args: set[str]
36
42
  aggregator: Optional[Any]
37
- current_partition_vals: Optional[List[Any]]
43
+ current_partition_vals: Optional[list[Any]]
38
44
 
39
45
  def __init__(
40
- self, fn: func.Function, bound_args: Dict[str, Any], order_by_clause: Optional[List[Any]] = None,
41
- group_by_clause: Optional[List[Any]] = None, is_method_call: bool = False):
46
+ self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
47
+ group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
42
48
  if order_by_clause is None:
43
49
  order_by_clause = []
44
50
  if group_by_clause is None:
@@ -47,7 +53,7 @@ class FunctionCall(Expr):
47
53
  super().__init__(fn.call_return_type(bound_args))
48
54
  self.fn = fn
49
55
  self.is_method_call = is_method_call
50
- self.normalize_args(signature, bound_args)
56
+ self.normalize_args(fn.name, signature, bound_args)
51
57
 
52
58
  self.agg_init_args = {}
53
59
  if self.is_agg_fn_call:
@@ -58,10 +64,6 @@ class FunctionCall(Expr):
58
64
  bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
59
65
 
60
66
  # construct components, args, kwargs
61
-
62
- # Tuple[int, Any]:
63
- # - for Exprs: (index into components, None)
64
- # - otherwise: (None, val)
65
67
  self.args = []
66
68
  self.kwargs = {}
67
69
 
@@ -131,7 +133,7 @@ class FunctionCall(Expr):
131
133
 
132
134
  self.id = self._create_id()
133
135
 
134
- def _create_rowid_refs(self, tbl: catalog.Table) -> List[Expr]:
136
+ def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
135
137
  target = tbl._tbl_version_path.tbl_version
136
138
  return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
137
139
 
@@ -141,7 +143,7 @@ class FunctionCall(Expr):
141
143
  return super().default_column_name()
142
144
 
143
145
  @classmethod
144
- def normalize_args(cls, signature: func.Signature, bound_args: Dict[str, Any]) -> None:
146
+ def normalize_args(cls, fn_name: str, signature: func.Signature, bound_args: dict[str, Any]) -> None:
145
147
  """Converts all args to Exprs and checks that they are compatible with signature.
146
148
 
147
149
  Updates bound_args in place, where necessary.
@@ -161,9 +163,7 @@ class FunctionCall(Expr):
161
163
 
162
164
  if isinstance(arg, list) or isinstance(arg, tuple):
163
165
  try:
164
- # If the column type is JsonType, force the literal to be JSON
165
- is_json = is_var_param or (param.col_type is not None and param.col_type.is_json_type())
166
- arg = InlineArray(arg, force_json=is_json)
166
+ arg = InlineList(arg)
167
167
  bound_args[param_name] = arg
168
168
  continue
169
169
  except excs.Error:
@@ -175,7 +175,7 @@ class FunctionCall(Expr):
175
175
  try:
176
176
  _ = json.dumps(arg)
177
177
  except TypeError:
178
- raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg}')
178
+ raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
179
179
  if arg is not None:
180
180
  try:
181
181
  param_type = param.col_type
@@ -213,7 +213,7 @@ class FunctionCall(Expr):
213
213
  or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
214
214
  ):
215
215
  raise excs.Error(
216
- f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
216
+ f'Parameter {param_name} (in function {fn_name}): argument type {arg.col_type} does not match parameter type '
217
217
  f'{param.col_type}')
218
218
 
219
219
  def _equals(self, other: FunctionCall) -> bool:
@@ -232,7 +232,7 @@ class FunctionCall(Expr):
232
232
  return False
233
233
  return True
234
234
 
235
- def _id_attrs(self) -> List[Tuple[str, Any]]:
235
+ def _id_attrs(self) -> list[tuple[str, Any]]:
236
236
  return super()._id_attrs() + [
237
237
  ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
238
238
  ('args', self.args),
@@ -253,11 +253,11 @@ class FunctionCall(Expr):
253
253
  return f'{fn_name}({self._print_args()})'
254
254
 
255
255
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
256
+ def print_arg(arg: Any) -> str:
257
+ return repr(arg) if isinstance(arg, str) else str(arg)
256
258
  arg_strs = [
257
- str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
259
+ print_arg(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
258
260
  ]
259
- def print_arg(arg: Any) -> str:
260
- return f"'{arg}'" if isinstance(arg, str) else str(arg)
261
261
  arg_strs.extend([
262
262
  f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
263
263
  for param_name, (idx, arg) in self.kwargs.items()
@@ -273,15 +273,15 @@ class FunctionCall(Expr):
273
273
  separator = ', ' if inline else ',\n '
274
274
  return separator.join(arg_strs)
275
275
 
276
- def has_group_by(self) -> List[Expr]:
276
+ def has_group_by(self) -> list[Expr]:
277
277
  return self.group_by_stop_idx != 0
278
278
 
279
279
  @property
280
- def group_by(self) -> List[Expr]:
280
+ def group_by(self) -> list[Expr]:
281
281
  return self.components[self.group_by_start_idx:self.group_by_stop_idx]
282
282
 
283
283
  @property
284
- def order_by(self) -> List[Expr]:
284
+ def order_by(self) -> list[Expr]:
285
285
  return self.components[self.order_by_start_idx:]
286
286
 
287
287
  @property
@@ -291,20 +291,42 @@ class FunctionCall(Expr):
291
291
  or self.has_group_by() \
292
292
  or (len(self.order_by) > 0 and not self.fn.requires_order_by))
293
293
 
294
- def get_window_sort_exprs(self) -> Tuple[List[Expr], List[Expr]]:
294
+ def get_window_sort_exprs(self) -> tuple[list[Expr], list[Expr]]:
295
295
  return self.group_by, self.order_by
296
296
 
297
297
  @property
298
298
  def is_agg_fn_call(self) -> bool:
299
299
  return isinstance(self.fn, func.AggregateFunction)
300
300
 
301
- def get_agg_order_by(self) -> List[Expr]:
301
+ def get_agg_order_by(self) -> list[Expr]:
302
302
  assert self.is_agg_fn_call
303
303
  return self.order_by
304
304
 
305
- def sql_expr(self) -> Optional[sql.ClauseElement]:
306
- # TODO: implement for standard aggregate functions
307
- return None
305
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
306
+ # try to construct args and kwargs to call self.fn._to_sql()
307
+ kwargs: dict[str, sql.ColumnElement] = {}
308
+ for param_name, (component_idx, arg) in self.kwargs.items():
309
+ param = self.fn.signature.parameters[param_name]
310
+ assert param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD
311
+ if component_idx is None:
312
+ kwargs[param_name] = sql.literal(arg)
313
+ else:
314
+ arg_element = sql_elements.get(self.components[component_idx])
315
+ if arg_element is None:
316
+ return None
317
+ kwargs[param_name] = arg_element
318
+
319
+ args: list[sql.ColumnElement] = []
320
+ for _, (component_idx, arg) in enumerate(self.args):
321
+ if component_idx is None:
322
+ args.append(sql.literal(arg))
323
+ else:
324
+ arg_element = sql_elements.get(self.components[component_idx])
325
+ if arg_element is None:
326
+ return None
327
+ args.append(arg_element)
328
+ result = self.fn._to_sql(*args, **kwargs)
329
+ return result
308
330
 
309
331
  def reset_agg(self) -> None:
310
332
  """
@@ -322,9 +344,9 @@ class FunctionCall(Expr):
322
344
  args, kwargs = self._make_args(data_row)
323
345
  self.aggregator.update(*args, **kwargs)
324
346
 
325
- def _make_args(self, data_row: DataRow) -> Tuple[List[Any], Dict[str, Any]]:
347
+ def _make_args(self, data_row: DataRow) -> tuple[list[Any], dict[str, Any]]:
326
348
  """Return args and kwargs, constructed for data_row"""
327
- kwargs: Dict[str, Any] = {}
349
+ kwargs: dict[str, Any] = {}
328
350
  for param_name, (component_idx, arg) in self.kwargs.items():
329
351
  val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
330
352
  param = self.fn.signature.parameters[param_name]
@@ -335,7 +357,7 @@ class FunctionCall(Expr):
335
357
  assert param.kind != inspect.Parameter.VAR_POSITIONAL
336
358
  kwargs[param_name] = val
337
359
 
338
- args: List[Any] = []
360
+ args: list[Any] = []
339
361
  for param_idx, (component_idx, arg) in enumerate(self.args):
340
362
  val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
341
363
  param = self.fn.signature.parameters_by_pos[param_idx]
@@ -393,7 +415,7 @@ class FunctionCall(Expr):
393
415
  else:
394
416
  data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
395
417
 
396
- def _as_dict(self) -> Dict:
418
+ def _as_dict(self) -> dict:
397
419
  result = {
398
420
  'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
399
421
  'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
@@ -403,7 +425,7 @@ class FunctionCall(Expr):
403
425
  return result
404
426
 
405
427
  @classmethod
406
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
428
+ def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
407
429
  assert 'fn' in d
408
430
  assert 'args' in d
409
431
  assert 'kwargs' in d