pixeltable 0.2.24__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (101) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +531 -251
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/catalog/view.py +8 -7
  12. pixeltable/dataframe.py +439 -105
  13. pixeltable/env.py +19 -5
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/exec_node.py +6 -7
  16. pixeltable/exec/expr_eval_node.py +1 -1
  17. pixeltable/exec/sql_node.py +92 -45
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/arithmetic_expr.py +1 -1
  20. pixeltable/exprs/array_slice.py +1 -1
  21. pixeltable/exprs/column_property_ref.py +1 -1
  22. pixeltable/exprs/column_ref.py +29 -2
  23. pixeltable/exprs/comparison.py +1 -1
  24. pixeltable/exprs/compound_predicate.py +1 -1
  25. pixeltable/exprs/expr.py +12 -5
  26. pixeltable/exprs/expr_set.py +8 -0
  27. pixeltable/exprs/function_call.py +147 -39
  28. pixeltable/exprs/in_predicate.py +1 -1
  29. pixeltable/exprs/inline_expr.py +25 -5
  30. pixeltable/exprs/is_null.py +1 -1
  31. pixeltable/exprs/json_mapper.py +1 -1
  32. pixeltable/exprs/json_path.py +1 -1
  33. pixeltable/exprs/method_ref.py +1 -1
  34. pixeltable/exprs/row_builder.py +1 -1
  35. pixeltable/exprs/rowid_ref.py +1 -1
  36. pixeltable/exprs/similarity_expr.py +17 -7
  37. pixeltable/exprs/sql_element_cache.py +4 -0
  38. pixeltable/exprs/type_cast.py +2 -2
  39. pixeltable/exprs/variable.py +3 -0
  40. pixeltable/func/__init__.py +5 -4
  41. pixeltable/func/aggregate_function.py +151 -68
  42. pixeltable/func/callable_function.py +48 -16
  43. pixeltable/func/expr_template_function.py +64 -23
  44. pixeltable/func/function.py +227 -23
  45. pixeltable/func/function_registry.py +2 -1
  46. pixeltable/func/query_template_function.py +51 -9
  47. pixeltable/func/signature.py +65 -7
  48. pixeltable/func/tools.py +153 -0
  49. pixeltable/func/udf.py +57 -35
  50. pixeltable/functions/__init__.py +2 -2
  51. pixeltable/functions/anthropic.py +51 -4
  52. pixeltable/functions/gemini.py +85 -0
  53. pixeltable/functions/globals.py +54 -34
  54. pixeltable/functions/huggingface.py +10 -28
  55. pixeltable/functions/json.py +3 -8
  56. pixeltable/functions/math.py +67 -0
  57. pixeltable/functions/mistralai.py +0 -2
  58. pixeltable/functions/ollama.py +8 -8
  59. pixeltable/functions/openai.py +51 -4
  60. pixeltable/functions/timestamp.py +1 -1
  61. pixeltable/functions/video.py +3 -9
  62. pixeltable/functions/vision.py +1 -1
  63. pixeltable/globals.py +374 -89
  64. pixeltable/index/embedding_index.py +106 -29
  65. pixeltable/io/__init__.py +1 -1
  66. pixeltable/io/label_studio.py +1 -1
  67. pixeltable/io/parquet.py +39 -19
  68. pixeltable/iterators/__init__.py +1 -0
  69. pixeltable/iterators/document.py +12 -0
  70. pixeltable/iterators/image.py +100 -0
  71. pixeltable/iterators/video.py +7 -8
  72. pixeltable/metadata/__init__.py +1 -1
  73. pixeltable/metadata/converters/convert_16.py +2 -1
  74. pixeltable/metadata/converters/convert_17.py +2 -1
  75. pixeltable/metadata/converters/convert_22.py +17 -0
  76. pixeltable/metadata/converters/convert_23.py +35 -0
  77. pixeltable/metadata/converters/convert_24.py +56 -0
  78. pixeltable/metadata/converters/convert_25.py +19 -0
  79. pixeltable/metadata/converters/util.py +4 -2
  80. pixeltable/metadata/notes.py +4 -0
  81. pixeltable/metadata/schema.py +1 -0
  82. pixeltable/plan.py +129 -51
  83. pixeltable/store.py +1 -1
  84. pixeltable/type_system.py +196 -54
  85. pixeltable/utils/arrow.py +8 -3
  86. pixeltable/utils/description_helper.py +89 -0
  87. pixeltable/utils/documents.py +14 -0
  88. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/METADATA +32 -22
  89. pixeltable-0.3.0.dist-info/RECORD +155 -0
  90. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
  91. pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
  92. pixeltable/tool/create_test_db_dump.py +0 -308
  93. pixeltable/tool/create_test_video.py +0 -81
  94. pixeltable/tool/doc_plugins/griffe.py +0 -50
  95. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  96. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  97. pixeltable/tool/embed_udf.py +0 -9
  98. pixeltable/tool/mypy_plugin.py +0 -55
  99. pixeltable-0.2.24.dist-info/RECORD +0 -153
  100. pixeltable-0.2.24.dist-info/entry_points.txt +0 -3
  101. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
@@ -15,6 +15,7 @@ import pixeltable.type_system as ts
15
15
  from .data_row import DataRow
16
16
  from .expr import Expr
17
17
  from .inline_expr import InlineDict, InlineList
18
+ from .literal import Literal
18
19
  from .row_builder import RowBuilder
19
20
  from .rowid_ref import RowidRef
20
21
  from .sql_element_cache import SqlElementCache
@@ -34,6 +35,7 @@ class FunctionCall(Expr):
34
35
 
35
36
  arg_types: list[ts.ColumnType]
36
37
  kwarg_types: dict[str, ts.ColumnType]
38
+ return_type: ts.ColumnType
37
39
  group_by_start_idx: int
38
40
  group_by_stop_idx: int
39
41
  fn_expr_idx: int
@@ -43,17 +45,25 @@ class FunctionCall(Expr):
43
45
  current_partition_vals: Optional[list[Any]]
44
46
 
45
47
  def __init__(
46
- self, fn: func.Function, bound_args: dict[str, Any], order_by_clause: Optional[list[Any]] = None,
47
- group_by_clause: Optional[list[Any]] = None, is_method_call: bool = False):
48
+ self,
49
+ fn: func.Function,
50
+ bound_args: dict[str, Any],
51
+ return_type: ts.ColumnType,
52
+ order_by_clause: Optional[list[Any]] = None,
53
+ group_by_clause: Optional[list[Any]] = None,
54
+ is_method_call: bool = False
55
+ ):
48
56
  if order_by_clause is None:
49
57
  order_by_clause = []
50
58
  if group_by_clause is None:
51
59
  group_by_clause = []
52
- signature = fn.signature
53
- return_type = fn.call_return_type(bound_args)
60
+
61
+ assert not fn.is_polymorphic
62
+
54
63
  self.fn = fn
55
64
  self.is_method_call = is_method_call
56
- self.normalize_args(fn.name, signature, bound_args)
65
+
66
+ signature = fn.signature
57
67
 
58
68
  # If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
59
69
  # parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
@@ -67,6 +77,8 @@ class FunctionCall(Expr):
67
77
  return_type = return_type.copy(nullable=True)
68
78
  break
69
79
 
80
+ self.return_type = return_type
81
+
70
82
  super().__init__(return_type)
71
83
 
72
84
  self.agg_init_args = {}
@@ -74,9 +86,9 @@ class FunctionCall(Expr):
74
86
  # we separate out the init args for the aggregator
75
87
  assert isinstance(fn, func.AggregateFunction)
76
88
  self.agg_init_args = {
77
- arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
89
+ arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
78
90
  }
79
- bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
91
+ bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]}
80
92
 
81
93
  # construct components, args, kwargs
82
94
  self.args = []
@@ -85,8 +97,10 @@ class FunctionCall(Expr):
85
97
  # we record the types of non-variable parameters for runtime type checks
86
98
  self.arg_types = []
87
99
  self.kwarg_types = {}
100
+
88
101
  # the prefix of parameters that are bound can be passed by position
89
- for py_param in fn.signature.py_signature.parameters.values():
102
+ processed_args: set[str] = set()
103
+ for py_param in signature.py_signature.parameters.values():
90
104
  if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
91
105
  break
92
106
  arg = bound_args[py_param.name]
@@ -97,18 +111,19 @@ class FunctionCall(Expr):
97
111
  self.args.append((None, arg))
98
112
  if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
99
113
  self.arg_types.append(signature.parameters[py_param.name].col_type)
114
+ processed_args.add(py_param.name)
100
115
 
101
116
  # the remaining args are passed as keywords
102
- kw_param_names = set(bound_args.keys()) - set(list(fn.signature.py_signature.parameters.keys())[:len(self.args)])
103
- for param_name in kw_param_names:
104
- arg = bound_args[param_name]
105
- if isinstance(arg, Expr):
106
- self.kwargs[param_name] = (len(self.components), None)
107
- self.components.append(arg.copy())
108
- else:
109
- self.kwargs[param_name] = (None, arg)
110
- if fn.signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
111
- self.kwarg_types[param_name] = signature.parameters[param_name].col_type
117
+ for param_name in bound_args.keys():
118
+ if param_name not in processed_args:
119
+ arg = bound_args[param_name]
120
+ if isinstance(arg, Expr):
121
+ self.kwargs[param_name] = (len(self.components), None)
122
+ self.components.append(arg.copy())
123
+ else:
124
+ self.kwargs[param_name] = (None, arg)
125
+ if signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
126
+ self.kwarg_types[param_name] = signature.parameters[param_name].col_type
112
127
 
113
128
  # window function state:
114
129
  # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
@@ -126,7 +141,7 @@ class FunctionCall(Expr):
126
141
 
127
142
  if isinstance(self.fn, func.ExprTemplateFunction):
128
143
  # we instantiate the template to create an Expr that can be evaluated and record that as a component
129
- fn_expr = self.fn.instantiate(**bound_args)
144
+ fn_expr = self.fn.instantiate([], bound_args)
130
145
  self.components.append(fn_expr)
131
146
  self.fn_expr_idx = len(self.components) - 1
132
147
  else:
@@ -184,11 +199,6 @@ class FunctionCall(Expr):
184
199
  pass
185
200
 
186
201
  if not isinstance(arg, Expr):
187
- # make sure that non-Expr args are json-serializable and are literals of the correct type
188
- try:
189
- _ = json.dumps(arg)
190
- except TypeError:
191
- raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
192
202
  if arg is not None:
193
203
  try:
194
204
  param_type = param.col_type
@@ -255,7 +265,7 @@ class FunctionCall(Expr):
255
265
  ('order_by_start_idx', self.order_by_start_idx)
256
266
  ]
257
267
 
258
- def __str__(self) -> str:
268
+ def __repr__(self) -> str:
259
269
  return self.display_str()
260
270
 
261
271
  def display_str(self, inline: bool = True) -> str:
@@ -357,7 +367,7 @@ class FunctionCall(Expr):
357
367
  """
358
368
  assert self.is_agg_fn_call
359
369
  assert isinstance(self.fn, func.AggregateFunction)
360
- self.aggregator = self.fn.agg_cls(**self.agg_init_args)
370
+ self.aggregator = self.fn.agg_class(**self.agg_init_args)
361
371
 
362
372
  def update(self, data_row: DataRow) -> None:
363
373
  """
@@ -429,27 +439,32 @@ class FunctionCall(Expr):
429
439
  data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
430
440
  elif self.is_window_fn_call:
431
441
  assert isinstance(self.fn, func.AggregateFunction)
442
+ agg_cls = self.fn.agg_class
432
443
  if self.has_group_by():
433
444
  if self.current_partition_vals is None:
434
445
  self.current_partition_vals = [None] * len(self.group_by)
435
446
  partition_vals = [data_row[e.slot_idx] for e in self.group_by]
436
447
  if partition_vals != self.current_partition_vals:
437
448
  # new partition
438
- self.aggregator = self.fn.agg_cls(**self.agg_init_args)
449
+ self.aggregator = agg_cls(**self.agg_init_args)
439
450
  self.current_partition_vals = partition_vals
440
451
  elif self.aggregator is None:
441
- self.aggregator = self.fn.agg_cls(**self.agg_init_args)
452
+ self.aggregator = agg_cls(**self.agg_init_args)
442
453
  self.aggregator.update(*args)
443
454
  data_row[self.slot_idx] = self.aggregator.value()
444
455
  else:
445
- data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
456
+ data_row[self.slot_idx] = self.fn.exec(args, kwargs)
446
457
 
447
458
  def _as_dict(self) -> dict:
448
459
  result = {
449
- 'fn': self.fn.as_dict(), 'args': self.args, 'kwargs': self.kwargs,
450
- 'group_by_start_idx': self.group_by_start_idx, 'group_by_stop_idx': self.group_by_stop_idx,
460
+ 'fn': self.fn.as_dict(),
461
+ 'args': self.args,
462
+ 'kwargs': self.kwargs,
463
+ 'return_type': self.return_type.as_dict(),
464
+ 'group_by_start_idx': self.group_by_start_idx,
465
+ 'group_by_stop_idx': self.group_by_stop_idx,
451
466
  'order_by_start_idx': self.order_by_start_idx,
452
- **super()._as_dict()
467
+ **super()._as_dict(),
453
468
  }
454
469
  return result
455
470
 
@@ -458,15 +473,108 @@ class FunctionCall(Expr):
458
473
  assert 'fn' in d
459
474
  assert 'args' in d
460
475
  assert 'kwargs' in d
461
- # reassemble bound args
476
+
462
477
  fn = func.Function.from_dict(d['fn'])
463
- param_names = list(fn.signature.parameters.keys())
464
- bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
465
- bound_args.update(
466
- {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
478
+ assert not fn.is_polymorphic
479
+ return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
467
480
  group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
468
481
  order_by_exprs = components[d['order_by_start_idx']:]
482
+
483
+ args = [
484
+ expr if idx is None else components[idx]
485
+ for idx, expr in d['args']
486
+ ]
487
+ kwargs = {
488
+ param_name: (expr if idx is None else components[idx])
489
+ for param_name, (idx, expr) in d['kwargs'].items()
490
+ }
491
+
492
+ # `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
493
+ # consistent with its signature.
494
+
495
+ # Reassemble bound_args. Note that args and kwargs represent "already bound arguments": they are not bindable
496
+ # in the Python sense, because variable args (such as *args and **kwargs) have already been condensed.
497
+ param_names = list(fn.signature.parameters.keys())
498
+ bound_args = {param_names[i]: arg for i, arg in enumerate(args)}
499
+ bound_args.update(kwargs.items())
500
+
501
+ # TODO: In order to properly invoke call_return_type, we need to ensure that any InlineLists or InlineDicts
502
+ # in bound_args are unpacked into Python lists/dicts. There is an open task to ensure this is true in general;
503
+ # for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
504
+ # case where this is necessary to support existing conditional_return_type implementations). Once the general
505
+ # pattern is implemented, we can remove this hack.
506
+ unpacked_bound_args = {
507
+ param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()
508
+ }
509
+
510
+ # Evaluate the call_return_type as defined in the current codebase.
511
+ call_return_type = fn.call_return_type([], unpacked_bound_args)
512
+
513
+ if return_type is None:
514
+ # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
515
+ # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
516
+ # the call_return_type that we just inferred (which matches the deserialization behavior prior to
517
+ # version 25).
518
+ return_type = call_return_type
519
+ else:
520
+ # There is a return_type stored in metadata (schema version >= 25).
521
+ # Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
522
+ # fail-fast if it doesn't (otherwise we risk getting downstream database errors).
523
+ # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
524
+ # mark this FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or Function
525
+ # signature mismatch.
526
+ if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
527
+ raise excs.Error(
528
+ f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
529
+ f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
530
+ f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
531
+ f'Return type in database: `{return_type}`\n'
532
+ f'Return type as currently defined: `{call_return_type}`'
533
+ )
534
+
469
535
  fn_call = cls(
470
- func.Function.from_dict(d['fn']), bound_args, group_by_clause=group_by_exprs,
471
- order_by_clause=order_by_exprs)
536
+ fn,
537
+ bound_args,
538
+ return_type,
539
+ group_by_clause=group_by_exprs,
540
+ order_by_clause=order_by_exprs
541
+ )
472
542
  return fn_call
543
+
544
+ @classmethod
545
+ def __find_matching_signature(cls, fn: func.Function, args: list[Any], kwargs: dict[str, Any]) -> Optional[int]:
546
+ for idx, sig in enumerate(fn.signatures):
547
+ if cls.__signature_matches(sig, args, kwargs):
548
+ return idx
549
+ return None
550
+
551
+ @classmethod
552
+ def __signature_matches(cls, sig: func.Signature, args: list[Any], kwargs: dict[str, Any]) -> bool:
553
+ unbound_parameters = set(sig.parameters.keys())
554
+ for i, arg in enumerate(args):
555
+ if i >= len(sig.parameters_by_pos):
556
+ return False
557
+ param = sig.parameters_by_pos[i]
558
+ arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
559
+ if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
560
+ return False
561
+ unbound_parameters.remove(param.name)
562
+ for param_name, arg in kwargs.items():
563
+ if param_name not in unbound_parameters:
564
+ return False
565
+ param = sig.parameters[param_name]
566
+ arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
567
+ if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
568
+ return False
569
+ unbound_parameters.remove(param_name)
570
+ for param_name in unbound_parameters:
571
+ param = sig.parameters[param_name]
572
+ if not param.has_default:
573
+ return False
574
+ return True
575
+
576
+ @classmethod
577
+ def __unpack_bound_arg(cls, arg: Any) -> Any:
578
+ if isinstance(arg, InlineList) and all(isinstance(el, Literal) for el in arg.components):
579
+ return [el.val for el in arg.components]
580
+ return arg
@@ -61,7 +61,7 @@ class InPredicate(Expr):
61
61
  pass
62
62
  return result
63
63
 
64
- def __str__(self) -> str:
64
+ def __repr__(self) -> str:
65
65
  if self.value_list is not None:
66
66
  return f'{self.components[0]}.isin({self.value_list})'
67
67
  return f'{self.components[0]}.isin({self.components[1]})'
@@ -56,7 +56,7 @@ class InlineArray(Expr):
56
56
  self.components.extend(exprs)
57
57
  self.id = self._create_id()
58
58
 
59
- def __str__(self) -> str:
59
+ def __repr__(self) -> str:
60
60
  elem_strs = [str(expr) for expr in self.components]
61
61
  return f'[{", ".join(elem_strs)}]'
62
62
 
@@ -101,11 +101,17 @@ class InlineList(Expr):
101
101
  else:
102
102
  exprs.append(Literal(el))
103
103
 
104
- super().__init__(ts.JsonType())
104
+ json_schema = {
105
+ 'type': 'array',
106
+ 'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
107
+ 'items': False # No additional items (fixed length)
108
+ }
109
+
110
+ super().__init__(ts.JsonType(json_schema))
105
111
  self.components.extend(exprs)
106
112
  self.id = self._create_id()
107
113
 
108
- def __str__(self) -> str:
114
+ def __repr__(self) -> str:
109
115
  elem_strs = [str(expr) for expr in self.components]
110
116
  return f'[{", ".join(elem_strs)}]'
111
117
 
@@ -149,11 +155,25 @@ class InlineDict(Expr):
149
155
  else:
150
156
  exprs.append(Literal(val))
151
157
 
152
- super().__init__(ts.JsonType())
158
+ json_schema: Optional[dict[str, Any]]
159
+ try:
160
+ json_schema = {
161
+ 'type': 'object',
162
+ 'properties': {
163
+ key: expr.col_type.to_json_schema()
164
+ for key, expr in zip(self.keys, exprs)
165
+ },
166
+ }
167
+ except excs.Error:
168
+ # InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
169
+ # so we can't always construct a valid schema.
170
+ json_schema = None
171
+
172
+ super().__init__(ts.JsonType(json_schema))
153
173
  self.components.extend(exprs)
154
174
  self.id = self._create_id()
155
175
 
156
- def __str__(self) -> str:
176
+ def __repr__(self) -> str:
157
177
  item_strs = list(f"'{key}': {str(expr)}" for key, expr in zip(self.keys, self.components))
158
178
  return '{' + ', '.join(item_strs) + '}'
159
179
 
@@ -18,7 +18,7 @@ class IsNull(Expr):
18
18
  self.components = [e]
19
19
  self.id = self._create_id()
20
20
 
21
- def __str__(self) -> str:
21
+ def __repr__(self) -> str:
22
22
  return f'{str(self.components[0])} == None'
23
23
 
24
24
  def _equals(self, other: IsNull) -> bool:
@@ -69,7 +69,7 @@ class JsonMapper(Expr):
69
69
  return False
70
70
  return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
71
71
 
72
- def __str__(self) -> str:
72
+ def __repr__(self) -> str:
73
73
  return f'{str(self._src_expr)} >> {str(self._target_expr)}'
74
74
 
75
75
  @property
@@ -42,7 +42,7 @@ class JsonPath(Expr):
42
42
  # this is not a problem, because _create_id() shouldn't be called after init()
43
43
  self.id = self._create_id()
44
44
 
45
- def __str__(self) -> str:
45
+ def __repr__(self) -> str:
46
46
  # else "R": the anchor is RELATIVE_PATH_ROOT
47
47
  return (f'{str(self._anchor) if self._anchor is not None else "R"}'
48
48
  f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}')
@@ -60,5 +60,5 @@ class MethodRef(Expr):
60
60
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
61
61
  assert False, 'MethodRef cannot be evaluated directly'
62
62
 
63
- def __str__(self) -> str:
63
+ def __repr__(self) -> str:
64
64
  return f'{self.base_expr}.{self.method_name}'
@@ -368,7 +368,7 @@ class RowBuilder:
368
368
  if not ignore_errors:
369
369
  input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
370
370
  raise excs.ExprEvalError(
371
- expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
371
+ expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0) from exc
372
372
 
373
373
  def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
374
374
  """Create a table row from the slots that have an output column assigned
@@ -55,7 +55,7 @@ class RowidRef(Expr):
55
55
  return super()._id_attrs() +\
56
56
  [('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
57
57
 
58
- def __str__(self) -> str:
58
+ def __repr__(self) -> str:
59
59
  # check if this is the pos column of a component view
60
60
  tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
61
61
  if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
@@ -23,7 +23,6 @@ class SimilarityExpr(Expr):
23
23
  assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
24
24
 
25
25
  self.components = [col_ref, item_expr]
26
- self.id = self._create_id()
27
26
 
28
27
  # determine index to use
29
28
  idx_info = col_ref.col.get_idx_info()
@@ -48,16 +47,23 @@ class SimilarityExpr(Expr):
48
47
 
49
48
  if item_expr.col_type.is_string_type() and idx.string_embed is None:
50
49
  raise excs.Error(
51
- f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
52
- f"'string_embed' parameter and does not support string queries")
50
+ f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} does not have a '
51
+ f"string embedding and does not support string queries")
53
52
  if item_expr.col_type.is_image_type() and idx.image_embed is None:
54
53
  raise excs.Error(
55
- f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
56
- f"'image_embed' parameter and does not support image queries")
54
+ f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} does not have an '
55
+ f"image embedding and does not support image queries")
56
+ self.id = self._create_id()
57
57
 
58
- def __str__(self) -> str:
58
+ def __repr__(self) -> str:
59
59
  return f'{self.components[0]}.similarity({self.components[1]})'
60
60
 
61
+ def _id_attrs(self):
62
+ return super()._id_attrs() + [('idx_name', self.idx_info.name)]
63
+
64
+ def default_column_name(self) -> str:
65
+ return 'similarity'
66
+
61
67
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
62
68
  if not isinstance(self.components[1], Literal):
63
69
  raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
@@ -78,8 +84,12 @@ class SimilarityExpr(Expr):
78
84
  # this should never get called
79
85
  assert False
80
86
 
87
+ def _as_dict(self) -> dict:
88
+ return {'idx_name': self.idx_info.name, **super()._as_dict()}
89
+
81
90
  @classmethod
82
91
  def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
92
+ iname = d['idx_name'] if 'idx_name' in d else None
83
93
  assert len(components) == 2
84
94
  assert isinstance(components[0], ColumnRef)
85
- return cls(components[0], components[1])
95
+ return cls(components[0], components[1], idx_name=iname)
@@ -17,6 +17,10 @@ class SqlElementCache:
17
17
  for e, el in elements.items():
18
18
  self.cache[e.id] = el
19
19
 
20
+ def extend(self, elements: ExprDict[sql.ColumnElement]):
21
+ for e, el in elements.items():
22
+ self.cache[e.id] = el
23
+
20
24
  def get(self, e: Expr) -> Optional[sql.ColumnElement]:
21
25
  """Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
22
26
  try:
@@ -51,5 +51,5 @@ class TypeCast(Expr):
51
51
  assert len(components) == 1
52
52
  return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
53
53
 
54
- def __str__(self) -> str:
55
- return f'{self._underlying}.astype({self.col_type})'
54
+ def __repr__(self) -> str:
55
+ return f'{self._underlying}.astype({self.col_type._to_str(as_schema=True)})'
@@ -33,6 +33,9 @@ class Variable(Expr):
33
33
  def __str__(self) -> str:
34
34
  return self.name
35
35
 
36
+ def __repr__(self) -> str:
37
+ return f"Variable('{self.name}')"
38
+
36
39
  def sql_expr(self, _: SqlElementCache) -> NoReturn:
37
40
  raise NotImplementedError()
38
41
 
@@ -1,8 +1,9 @@
1
- from .aggregate_function import Aggregator, AggregateFunction, uda
1
+ from .aggregate_function import AggregateFunction, Aggregator, uda
2
2
  from .callable_function import CallableFunction
3
3
  from .expr_template_function import ExprTemplateFunction
4
4
  from .function import Function
5
5
  from .function_registry import FunctionRegistry
6
- from .query_template_function import QueryTemplateFunction
7
- from .signature import Signature, Parameter, Batch
8
- from .udf import udf, make_function, expr_udf
6
+ from .query_template_function import QueryTemplateFunction, query
7
+ from .signature import Batch, Parameter, Signature
8
+ from .tools import Tool, ToolChoice, Tools
9
+ from .udf import expr_udf, make_function, udf