pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (127) hide show
  1. pixeltable/__init__.py +5 -3
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -0
  4. pixeltable/catalog/catalog.py +335 -128
  5. pixeltable/catalog/column.py +22 -5
  6. pixeltable/catalog/dir.py +19 -6
  7. pixeltable/catalog/insertable_table.py +34 -37
  8. pixeltable/catalog/named_function.py +0 -4
  9. pixeltable/catalog/schema_object.py +28 -42
  10. pixeltable/catalog/table.py +193 -158
  11. pixeltable/catalog/table_version.py +191 -232
  12. pixeltable/catalog/table_version_handle.py +50 -0
  13. pixeltable/catalog/table_version_path.py +49 -33
  14. pixeltable/catalog/view.py +56 -96
  15. pixeltable/config.py +103 -0
  16. pixeltable/dataframe.py +89 -89
  17. pixeltable/env.py +98 -168
  18. pixeltable/exec/aggregation_node.py +5 -4
  19. pixeltable/exec/cache_prefetch_node.py +1 -1
  20. pixeltable/exec/component_iteration_node.py +13 -9
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +0 -4
  23. pixeltable/exec/exec_node.py +3 -2
  24. pixeltable/exec/expr_eval/schedulers.py +2 -1
  25. pixeltable/exec/in_memory_data_node.py +9 -4
  26. pixeltable/exec/row_update_node.py +1 -2
  27. pixeltable/exec/sql_node.py +20 -16
  28. pixeltable/exprs/__init__.py +2 -0
  29. pixeltable/exprs/arithmetic_expr.py +7 -11
  30. pixeltable/exprs/array_slice.py +1 -1
  31. pixeltable/exprs/column_property_ref.py +3 -3
  32. pixeltable/exprs/column_ref.py +12 -13
  33. pixeltable/exprs/comparison.py +3 -6
  34. pixeltable/exprs/compound_predicate.py +4 -4
  35. pixeltable/exprs/expr.py +31 -22
  36. pixeltable/exprs/expr_dict.py +3 -3
  37. pixeltable/exprs/expr_set.py +1 -1
  38. pixeltable/exprs/function_call.py +110 -80
  39. pixeltable/exprs/globals.py +3 -3
  40. pixeltable/exprs/in_predicate.py +1 -1
  41. pixeltable/exprs/inline_expr.py +3 -3
  42. pixeltable/exprs/is_null.py +1 -1
  43. pixeltable/exprs/json_mapper.py +2 -2
  44. pixeltable/exprs/json_path.py +17 -10
  45. pixeltable/exprs/literal.py +1 -1
  46. pixeltable/exprs/method_ref.py +2 -2
  47. pixeltable/exprs/row_builder.py +8 -17
  48. pixeltable/exprs/rowid_ref.py +21 -10
  49. pixeltable/exprs/similarity_expr.py +5 -5
  50. pixeltable/exprs/sql_element_cache.py +1 -1
  51. pixeltable/exprs/type_cast.py +2 -3
  52. pixeltable/exprs/variable.py +2 -2
  53. pixeltable/ext/__init__.py +2 -0
  54. pixeltable/ext/functions/__init__.py +2 -0
  55. pixeltable/ext/functions/yolox.py +3 -3
  56. pixeltable/func/__init__.py +3 -1
  57. pixeltable/func/aggregate_function.py +9 -9
  58. pixeltable/func/callable_function.py +3 -4
  59. pixeltable/func/expr_template_function.py +6 -16
  60. pixeltable/func/function.py +48 -14
  61. pixeltable/func/function_registry.py +1 -3
  62. pixeltable/func/query_template_function.py +5 -12
  63. pixeltable/func/signature.py +23 -22
  64. pixeltable/func/tools.py +3 -3
  65. pixeltable/func/udf.py +6 -4
  66. pixeltable/functions/__init__.py +2 -0
  67. pixeltable/functions/fireworks.py +7 -4
  68. pixeltable/functions/globals.py +4 -5
  69. pixeltable/functions/huggingface.py +1 -5
  70. pixeltable/functions/image.py +17 -7
  71. pixeltable/functions/llama_cpp.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +4 -4
  74. pixeltable/functions/openai.py +19 -19
  75. pixeltable/functions/string.py +23 -30
  76. pixeltable/functions/timestamp.py +11 -6
  77. pixeltable/functions/together.py +14 -12
  78. pixeltable/functions/util.py +1 -1
  79. pixeltable/functions/video.py +5 -4
  80. pixeltable/functions/vision.py +6 -9
  81. pixeltable/functions/whisper.py +3 -3
  82. pixeltable/globals.py +246 -260
  83. pixeltable/index/__init__.py +2 -0
  84. pixeltable/index/base.py +1 -1
  85. pixeltable/index/btree.py +3 -1
  86. pixeltable/index/embedding_index.py +11 -5
  87. pixeltable/io/external_store.py +11 -12
  88. pixeltable/io/label_studio.py +4 -3
  89. pixeltable/io/parquet.py +57 -56
  90. pixeltable/iterators/__init__.py +4 -2
  91. pixeltable/iterators/audio.py +11 -11
  92. pixeltable/iterators/document.py +10 -10
  93. pixeltable/iterators/string.py +1 -2
  94. pixeltable/iterators/video.py +14 -15
  95. pixeltable/metadata/__init__.py +9 -5
  96. pixeltable/metadata/converters/convert_10.py +0 -1
  97. pixeltable/metadata/converters/convert_15.py +0 -2
  98. pixeltable/metadata/converters/convert_23.py +0 -2
  99. pixeltable/metadata/converters/convert_24.py +3 -3
  100. pixeltable/metadata/converters/convert_25.py +1 -1
  101. pixeltable/metadata/converters/convert_27.py +0 -2
  102. pixeltable/metadata/converters/convert_28.py +0 -2
  103. pixeltable/metadata/converters/convert_29.py +7 -8
  104. pixeltable/metadata/converters/util.py +7 -7
  105. pixeltable/metadata/schema.py +27 -19
  106. pixeltable/plan.py +68 -40
  107. pixeltable/share/__init__.py +2 -0
  108. pixeltable/share/packager.py +15 -12
  109. pixeltable/share/publish.py +3 -5
  110. pixeltable/store.py +37 -38
  111. pixeltable/type_system.py +41 -28
  112. pixeltable/utils/coco.py +4 -4
  113. pixeltable/utils/console_output.py +1 -3
  114. pixeltable/utils/description_helper.py +1 -1
  115. pixeltable/utils/documents.py +3 -3
  116. pixeltable/utils/filecache.py +20 -9
  117. pixeltable/utils/formatter.py +2 -3
  118. pixeltable/utils/media_store.py +1 -1
  119. pixeltable/utils/pytorch.py +1 -1
  120. pixeltable/utils/sql.py +4 -4
  121. pixeltable/utils/transactional_directory.py +2 -1
  122. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
  123. pixeltable-0.3.8.dist-info/RECORD +174 -0
  124. pixeltable-0.3.6.dist-info/RECORD +0 -172
  125. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
  126. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
  127. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
@@ -1,15 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
+ import logging
4
5
  import sys
6
+ from textwrap import dedent
5
7
  from typing import Any, Optional, Sequence, Union
6
8
 
7
9
  import sqlalchemy as sql
8
10
 
9
- import pixeltable.catalog as catalog
10
- import pixeltable.exceptions as excs
11
- import pixeltable.func as func
12
- import pixeltable.type_system as ts
11
+ from pixeltable import catalog, exceptions as excs, func, type_system as ts
13
12
 
14
13
  from .data_row import DataRow
15
14
  from .expr import Expr
@@ -18,6 +17,8 @@ from .row_builder import RowBuilder
18
17
  from .rowid_ref import RowidRef
19
18
  from .sql_element_cache import SqlElementCache
20
19
 
20
+ _logger = logging.getLogger('pixeltable')
21
+
21
22
 
22
23
  class FunctionCall(Expr):
23
24
  fn: func.Function
@@ -45,6 +46,8 @@ class FunctionCall(Expr):
45
46
  aggregator: Optional[Any]
46
47
  current_partition_vals: Optional[list[Any]]
47
48
 
49
+ _validation_error: Optional[str]
50
+
48
51
  def __init__(
49
52
  self,
50
53
  fn: func.Function,
@@ -54,6 +57,7 @@ class FunctionCall(Expr):
54
57
  order_by_clause: Optional[list[Any]] = None,
55
58
  group_by_clause: Optional[list[Any]] = None,
56
59
  is_method_call: bool = False,
60
+ validation_error: Optional[str] = None,
57
61
  ):
58
62
  assert not fn.is_polymorphic
59
63
  assert all(isinstance(arg, Expr) for arg in args)
@@ -76,26 +80,6 @@ class FunctionCall(Expr):
76
80
  self.components.extend(arg.copy() for arg in kwargs.values())
77
81
  self.kwarg_idxs = {name: i + len(args) for i, name in enumerate(kwargs.keys())}
78
82
 
79
- # Now generate bound_idxs for the args and kwargs indices.
80
- # This is guaranteed to work, because at this point the call has already been validated.
81
- # These will be used later to dereference specific parameter values.
82
- bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
83
- self.bound_idxs = bindings.arguments
84
-
85
- # Separately generate bound_args for purposes of determining the resource pool.
86
- bindings = fn.signature.py_signature.bind(*args, **kwargs)
87
- bound_args = bindings.arguments
88
- self.resource_pool = fn.call_resource_pool(bound_args)
89
-
90
- self.agg_init_args = {}
91
- if self.is_agg_fn_call:
92
- # We separate out the init args for the aggregator. Unpack Literals in init args.
93
- assert isinstance(fn, func.AggregateFunction)
94
- for arg_name, arg in bound_args.items():
95
- if arg_name in fn.init_param_names[0]:
96
- assert isinstance(arg, Literal) # This was checked during validate_call
97
- self.agg_init_args[arg_name] = arg.val
98
-
99
83
  # window function state:
100
84
  # self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
101
85
  self.group_by_start_idx, self.group_by_stop_idx = 0, 0
@@ -125,10 +109,35 @@ class FunctionCall(Expr):
125
109
  raise excs.Error(
126
110
  f'order_by argument needs to be a Pixeltable expression, but instead is a {type(order_by_clause[0])}'
127
111
  )
128
- # don't add components after this, everthing after order_by_start_idx is part of the order_by clause
129
112
  self.order_by_start_idx = len(self.components)
130
113
  self.components.extend(order_by_clause)
131
114
 
115
+ self._validation_error = validation_error
116
+
117
+ if validation_error is not None:
118
+ self.resource_pool = None
119
+ return
120
+
121
+ # Now generate bound_idxs for the args and kwargs indices.
122
+ # This is guaranteed to work, because at this point the call has already been validated.
123
+ # These will be used later to dereference specific parameter values.
124
+ bindings = fn.signature.py_signature.bind(*self.arg_idxs, **self.kwarg_idxs)
125
+ self.bound_idxs = bindings.arguments
126
+
127
+ # Separately generate bound_args for purposes of determining the resource pool.
128
+ bindings = fn.signature.py_signature.bind(*args, **kwargs)
129
+ bound_args = bindings.arguments
130
+ self.resource_pool = fn.call_resource_pool(bound_args)
131
+
132
+ self.agg_init_args = {}
133
+ if self.is_agg_fn_call:
134
+ # We separate out the init args for the aggregator. Unpack Literals in init args.
135
+ assert isinstance(fn, func.AggregateFunction)
136
+ for arg_name, arg in bound_args.items():
137
+ if arg_name in fn.init_param_names[0]:
138
+ assert isinstance(arg, Literal) # This was checked during validate_call
139
+ self.agg_init_args[arg_name] = arg.val
140
+
132
141
  # execution state for aggregate functions
133
142
  self.aggregator = None
134
143
  self.current_partition_vals = None
@@ -137,50 +146,50 @@ class FunctionCall(Expr):
137
146
 
138
147
  def _create_rowid_refs(self, tbl: catalog.Table) -> list[Expr]:
139
148
  target = tbl._tbl_version_path.tbl_version
140
- return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
149
+ return [RowidRef(target, i) for i in range(target.get().num_rowid_columns())]
141
150
 
142
151
  def default_column_name(self) -> Optional[str]:
143
152
  return self.fn.name
144
153
 
145
154
  def _equals(self, other: FunctionCall) -> bool:
146
- if self.fn != other.fn:
147
- return False
148
- if self.arg_idxs != other.arg_idxs:
149
- return False
150
- if self.kwarg_idxs != other.kwarg_idxs:
151
- return False
152
- if self.group_by_start_idx != other.group_by_start_idx:
153
- return False
154
- if self.group_by_stop_idx != other.group_by_stop_idx:
155
- return False
156
- if self.order_by_start_idx != other.order_by_start_idx:
157
- return False
158
- return True
155
+ return (
156
+ self.fn == other.fn
157
+ and self.arg_idxs == other.arg_idxs
158
+ and self.kwarg_idxs == other.kwarg_idxs
159
+ and self.group_by_start_idx == other.group_by_start_idx
160
+ and self.group_by_stop_idx == other.group_by_stop_idx
161
+ and self.order_by_start_idx == other.order_by_start_idx
162
+ )
159
163
 
160
164
  def _id_attrs(self) -> list[tuple[str, Any]]:
161
- return super()._id_attrs() + [
165
+ return [
166
+ *super()._id_attrs(),
162
167
  ('fn', id(self.fn)), # use the function pointer, not the fqn, which isn't set for lambdas
163
168
  ('args', self.arg_idxs),
164
169
  ('kwargs', self.kwarg_idxs),
165
170
  ('group_by_start_idx', self.group_by_start_idx),
166
171
  ('group_by_stop_idx', self.group_by_stop_idx),
167
172
  ('fn_expr_idx', self.fn_expr_idx),
168
- ('order_by_idx', self.order_by_start_idx),
173
+ ('order_by_start_idx', self.order_by_start_idx),
169
174
  ]
170
175
 
171
176
  def __repr__(self) -> str:
172
177
  return self.display_str()
173
178
 
179
+ @property
180
+ def validation_error(self) -> Optional[str]:
181
+ return self._validation_error or super().validation_error
182
+
174
183
  def display_str(self, inline: bool = True) -> str:
175
184
  if self.is_method_call:
176
185
  return f'{self.components[0]}.{self.fn.name}({self._print_args(1, inline)})'
177
186
  else:
178
- fn_name = self.fn.display_name if self.fn.display_name != '' else 'anonymous_fn'
187
+ fn_name = self.fn.display_name or 'anonymous_fn'
179
188
  return f'{fn_name}({self._print_args()})'
180
189
 
181
190
  def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
182
191
  arg_strs = [str(self.components[idx]) for idx in self.arg_idxs[start_idx:]]
183
- arg_strs.extend([f'{param_name}={str(self.components[idx])}' for param_name, idx in self.kwarg_idxs.items()])
192
+ arg_strs.extend([f'{param_name}={self.components[idx]}' for param_name, idx in self.kwarg_idxs.items()])
184
193
  if len(self.order_by) > 0:
185
194
  assert isinstance(self.fn, func.AggregateFunction)
186
195
  if self.fn.requires_order_by:
@@ -232,6 +241,8 @@ class FunctionCall(Expr):
232
241
  return self.order_by
233
242
 
234
243
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
244
+ assert self.is_valid
245
+
235
246
  # we currently can't translate aggregate functions with grouping and/or ordering to SQL
236
247
  if self.has_group_by() or len(self.order_by) > 0:
237
248
  return None
@@ -278,7 +289,7 @@ class FunctionCall(Expr):
278
289
  if (
279
290
  val is None
280
291
  and parameters_by_pos[idx].kind
281
- in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
292
+ in {inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD}
282
293
  and not parameters_by_pos[idx].col_type.nullable
283
294
  ):
284
295
  return None
@@ -291,7 +302,7 @@ class FunctionCall(Expr):
291
302
  if (
292
303
  val is None
293
304
  and parameters[param_name].kind
294
- in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
305
+ in {inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD}
295
306
  and not parameters[param_name].col_type.nullable
296
307
  ):
297
308
  return None
@@ -304,6 +315,7 @@ class FunctionCall(Expr):
304
315
  Returns a list of dicts mapping each param name to its value when this FunctionCall is evaluated against
305
316
  data_rows
306
317
  """
318
+ assert self.is_valid
307
319
  assert all(name in self.fn.signature.parameters for name in param_names), f'{param_names}, {self.fn.signature}'
308
320
  result: list[dict[str, Any]] = []
309
321
  for row in data_rows:
@@ -327,6 +339,8 @@ class FunctionCall(Expr):
327
339
  return result
328
340
 
329
341
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
342
+ assert self.is_valid
343
+
330
344
  if isinstance(self.fn, func.ExprTemplateFunction):
331
345
  # we need to evaluate the template
332
346
  # TODO: can we get rid of this extra copy?
@@ -396,51 +410,66 @@ class FunctionCall(Expr):
396
410
  group_by_exprs = components[group_by_start_idx:group_by_stop_idx]
397
411
  order_by_exprs = components[order_by_start_idx:]
398
412
 
413
+ validation_error: Optional[str] = None
414
+
415
+ if isinstance(fn, func.InvalidFunction):
416
+ validation_error = (
417
+ dedent(
418
+ f"""
419
+ The UDF '{fn.self_path}' cannot be located, because
420
+ {{errormsg}}
421
+ """
422
+ )
423
+ .strip()
424
+ .format(errormsg=fn.errormsg)
425
+ )
426
+ return cls(fn, args, kwargs, return_type, is_method_call=is_method_call, validation_error=validation_error)
427
+
399
428
  # Now re-bind args and kwargs using the version of `fn` that is currently represented in code. This ensures
400
429
  # that we get a valid binding even if the signatures of `fn` have changed since the FunctionCall was
401
430
  # serialized.
402
431
 
403
- resolved_fn: func.Function
404
- bound_args: dict[str, Expr]
432
+ resolved_fn: func.Function = fn
405
433
 
406
434
  try:
435
+ # Bind args and kwargs to the function signature in the current codebase.
407
436
  resolved_fn, bound_args = fn._bind_to_matching_signature(args, kwargs)
408
437
  except (TypeError, excs.Error):
409
- # TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
410
- # mark any enclosing FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or
411
- # FunctionCall return type mismatch.
412
438
  signature_note_str = 'any of its signatures' if fn.is_polymorphic else 'its signature'
413
- instance_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
414
- raise excs.Error(
415
- f'The signature stored in the database for the UDF `{fn.self_path}` no longer matches '
416
- f'{signature_note_str} as currently defined in the code.\nThis probably means that the code for '
417
- f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
418
- f'Signature in database: {fn}\n'
419
- f'Signature as currently defined in code: {instance_signature_str}'
420
- )
421
-
422
- # Evaluate the call_return_type as defined in the current codebase.
423
- call_return_type = resolved_fn.call_return_type(bound_args)
424
-
425
- if return_type is None:
426
- # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
427
- # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
428
- # the call_return_type that we just inferred (which matches the deserialization behavior prior to
429
- # version 25).
430
- return_type = call_return_type
439
+ args_str = [str(arg.col_type) for arg in args]
440
+ args_str.extend(f'{name}: {arg.col_type}' for name, arg in kwargs.items())
441
+ call_signature_str = f'({", ".join(args_str)}) -> {return_type}'
442
+ fn_signature_str = f'{len(fn.signatures)} signatures' if fn.is_polymorphic else str(fn.signature)
443
+ validation_error = dedent(
444
+ f"""
445
+ The signature stored in the database for a UDF call to {fn.self_path!r} no longer
446
+ matches {signature_note_str} as currently defined in the code. This probably means that the
447
+ code for {fn.self_path!r} has changed in a backward-incompatible way.
448
+ Signature of UDF call in the database: {call_signature_str}
449
+ Signature of UDF as currently defined in code: {fn_signature_str}
450
+ """
451
+ ).strip()
431
452
  else:
432
- # There is a return_type stored in metadata (schema version >= 25).
433
- # Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
434
- # fail-fast if it doesn't (otherwise we risk getting downstream database errors).
435
- # TODO: Handle this more gracefully (as noted above).
436
- if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
437
- raise excs.Error(
438
- f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
439
- f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
440
- f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
441
- f'Return type in database: `{return_type}`\n'
442
- f'Return type as currently defined in code: `{call_return_type}`'
443
- )
453
+ # Evaluate the call_return_type as defined in the current codebase.
454
+ call_return_type = resolved_fn.call_return_type(bound_args)
455
+ if return_type is None:
456
+ # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
457
+ # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
458
+ # the call_return_type that we just inferred (which matches the deserialization behavior prior to
459
+ # version 25).
460
+ return_type = call_return_type
461
+ elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
462
+ # There is a return_type stored in metadata (schema version >= 25),
463
+ # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
464
+ validation_error = dedent(
465
+ f"""
466
+ The return type stored in the database for a UDF call to {fn.self_path!r} no longer
467
+ matches its return type as currently defined in the code. This probably means that the
468
+ code for {fn.self_path!r} has changed in a backward-incompatible way.
469
+ Return type of UDF call in the database: {return_type}
470
+ Return type of UDF as currently defined in code: {call_return_type}
471
+ """
472
+ ).strip()
444
473
 
445
474
  fn_call = cls(
446
475
  resolved_fn,
@@ -450,6 +479,7 @@ class FunctionCall(Expr):
450
479
  group_by_clause=group_by_exprs,
451
480
  order_by_clause=order_by_exprs,
452
481
  is_method_call=is_method_call,
482
+ validation_error=validation_error,
453
483
  )
454
484
 
455
485
  return fn_call
@@ -36,7 +36,7 @@ class ComparisonOperator(enum.Enum):
36
36
  return '>'
37
37
  if self == self.GE:
38
38
  return '>='
39
- assert False
39
+ raise AssertionError()
40
40
 
41
41
  def reverse(self) -> ComparisonOperator:
42
42
  if self == self.LT:
@@ -62,7 +62,7 @@ class LogicalOperator(enum.Enum):
62
62
  return '|'
63
63
  if self == self.NOT:
64
64
  return '~'
65
- assert False
65
+ raise AssertionError()
66
66
 
67
67
 
68
68
  class ArithmeticOperator(enum.Enum):
@@ -86,4 +86,4 @@ class ArithmeticOperator(enum.Enum):
86
86
  return '%'
87
87
  if self == self.FLOORDIV:
88
88
  return '//'
89
- assert False
89
+ raise AssertionError()
@@ -71,7 +71,7 @@ class InPredicate(Expr):
71
71
  return self.value_list == other.value_list
72
72
 
73
73
  def _id_attrs(self) -> list[tuple[str, Any]]:
74
- return super()._id_attrs() + [('value_list', self.value_list)]
74
+ return [*super()._id_attrs(), ('value_list', self.value_list)]
75
75
 
76
76
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
77
77
  lhs_sql_exprs = sql_elements.get(self.components[0])
@@ -131,7 +131,7 @@ class InlineList(Expr):
131
131
  def as_literal(self) -> Optional[Literal]:
132
132
  if not all(isinstance(comp, Literal) for comp in self.components):
133
133
  return None
134
- return Literal(list(c.as_literal().val for c in self.components), self.col_type)
134
+ return Literal([c.as_literal().val for c in self.components], self.col_type)
135
135
 
136
136
 
137
137
  class InlineDict(Expr):
@@ -166,7 +166,7 @@ class InlineDict(Expr):
166
166
  self.id = self._create_id()
167
167
 
168
168
  def __repr__(self) -> str:
169
- item_strs = list(f"'{key}': {str(expr)}" for key, expr in zip(self.keys, self.components))
169
+ item_strs = [f"'{key}': {expr}" for key, expr in zip(self.keys, self.components)]
170
170
  return '{' + ', '.join(item_strs) + '}'
171
171
 
172
172
  def _equals(self, other: InlineDict) -> bool:
@@ -174,7 +174,7 @@ class InlineDict(Expr):
174
174
  return self.keys == other.keys
175
175
 
176
176
  def _id_attrs(self) -> list[tuple[str, Any]]:
177
- return super()._id_attrs() + [('keys', self.keys)]
177
+ return [*super()._id_attrs(), ('keys', self.keys)]
178
178
 
179
179
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
180
180
  return None
@@ -19,7 +19,7 @@ class IsNull(Expr):
19
19
  self.id = self._create_id()
20
20
 
21
21
  def __repr__(self) -> str:
22
- return f'{str(self.components[0])} == None'
22
+ return f'{self.components[0]} == None'
23
23
 
24
24
  def _equals(self, other: IsNull) -> bool:
25
25
  return True
@@ -81,12 +81,12 @@ class JsonMapper(Expr):
81
81
  """
82
82
  We override equals() because we need to avoid comparing our scope anchor.
83
83
  """
84
- if type(self) != type(other):
84
+ if type(self) is not type(other):
85
85
  return False
86
86
  return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
87
87
 
88
88
  def __repr__(self) -> str:
89
- return f'{str(self._src_expr)} >> {str(self._target_expr)}'
89
+ return f'{self._src_expr} >> {self._target_expr}'
90
90
 
91
91
  @property
92
92
  def _src_expr(self) -> Expr:
@@ -6,14 +6,13 @@ import jmespath
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable as pxt
9
- import pixeltable.catalog as catalog
10
- import pixeltable.exceptions as excs
11
- import pixeltable.type_system as ts
9
+ from pixeltable import catalog, exceptions as excs, type_system as ts
12
10
 
13
11
  from .data_row import DataRow
14
12
  from .expr import Expr
15
13
  from .globals import print_slice
16
14
  from .json_mapper import JsonMapper
15
+ from .object_ref import ObjectRef
17
16
  from .row_builder import RowBuilder
18
17
  from .sql_element_cache import SqlElementCache
19
18
 
@@ -50,8 +49,16 @@ class JsonPath(Expr):
50
49
  return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
51
50
 
52
51
  def _as_dict(self) -> dict:
52
+ assert len(self.components) <= 1
53
+ components_dict: dict[str, Any]
54
+ if len(self.components) == 0 or isinstance(self.components[0], ObjectRef):
55
+ # If the anchor is an ObjectRef, it means this JsonPath is a bound relative path. We store it as a relative
56
+ # path, *not* a bound path (which has no meaning in the dict).
57
+ components_dict = {}
58
+ else:
59
+ components_dict = super()._as_dict()
53
60
  path_elements = [[el.start, el.stop, el.step] if isinstance(el, slice) else el for el in self.path_elements]
54
- return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **super()._as_dict()}
61
+ return {'path_elements': path_elements, 'scope_idx': self.scope_idx, **components_dict}
55
62
 
56
63
  @classmethod
57
64
  def _from_dict(cls, d: dict, components: list[Expr]) -> JsonPath:
@@ -84,18 +91,18 @@ class JsonPath(Expr):
84
91
  Construct a relative path that references an ancestor of the immediately enclosing JsonMapper.
85
92
  """
86
93
  if not self.is_relative_path():
87
- raise excs.Error(f'() for an absolute path is invalid')
94
+ raise excs.Error('() for an absolute path is invalid')
88
95
  if len(args) != 1 or not isinstance(args[0], int) or args[0] >= 0:
89
- raise excs.Error(f'R() requires a negative index')
96
+ raise excs.Error('R() requires a negative index')
90
97
  return JsonPath(None, [], args[0])
91
98
 
92
99
  def __getattr__(self, name: str) -> 'JsonPath':
93
100
  assert isinstance(name, str)
94
- return JsonPath(self._anchor, self.path_elements + [name])
101
+ return JsonPath(self._anchor, [*self.path_elements, name])
95
102
 
96
103
  def __getitem__(self, index: object) -> 'JsonPath':
97
104
  if isinstance(index, (int, slice, str)):
98
- return JsonPath(self._anchor, self.path_elements + [index])
105
+ return JsonPath(self._anchor, [*self.path_elements, index])
99
106
  raise excs.Error(f'Invalid json list index: {index}')
100
107
 
101
108
  def __rshift__(self, other: object) -> 'JsonMapper':
@@ -120,7 +127,7 @@ class JsonPath(Expr):
120
127
 
121
128
  clean_name = ''.join(map(cleanup_char, ret_name))
122
129
  clean_name = clean_name.lstrip('_') # remove leading underscore
123
- if clean_name == '':
130
+ if not clean_name: # Replace '' with None
124
131
  clean_name = None
125
132
 
126
133
  assert clean_name is None or catalog.is_valid_identifier(clean_name)
@@ -130,7 +137,7 @@ class JsonPath(Expr):
130
137
  return self.path_elements == other.path_elements
131
138
 
132
139
  def _id_attrs(self) -> list[tuple[str, Any]]:
133
- return super()._id_attrs() + [('path_elements', self.path_elements)]
140
+ return [*super()._id_attrs(), ('path_elements', self.path_elements)]
134
141
 
135
142
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
136
143
  """
@@ -62,7 +62,7 @@ class Literal(Expr):
62
62
  return self.val == other.val
63
63
 
64
64
  def _id_attrs(self) -> list[tuple[str, Any]]:
65
- return super()._id_attrs() + [('val', self.val)]
65
+ return [*super()._id_attrs(), ('val', self.val)]
66
66
 
67
67
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
68
68
  # Return a sql object so that constants can participate in SQL expressions
@@ -53,13 +53,13 @@ class MethodRef(Expr):
53
53
  return self.base_expr.id == other.base_expr.id and self.method_name == other.method_name
54
54
 
55
55
  def _id_attrs(self) -> list[tuple[str, Any]]:
56
- return super()._id_attrs() + [('method_name', self.method_name)]
56
+ return [*super()._id_attrs(), ('method_name', self.method_name)]
57
57
 
58
58
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
59
59
  return None
60
60
 
61
61
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
62
- assert False, 'MethodRef cannot be evaluated directly'
62
+ raise AssertionError('MethodRef cannot be evaluated directly')
63
63
 
64
64
  def __repr__(self) -> str:
65
65
  return f'{self.base_expr}.{self.method_name}'
@@ -7,12 +7,8 @@ from typing import Any, Iterable, Optional, Sequence
7
7
  from uuid import UUID
8
8
 
9
9
  import numpy as np
10
- import sqlalchemy as sql
11
10
 
12
- import pixeltable.catalog as catalog
13
- import pixeltable.exceptions as excs
14
- import pixeltable.func as func
15
- import pixeltable.utils as utils
11
+ from pixeltable import catalog, exceptions as excs, utils
16
12
  from pixeltable.env import Env
17
13
  from pixeltable.utils.media_store import MediaStore
18
14
 
@@ -174,11 +170,13 @@ class RowBuilder:
174
170
 
175
171
  def refs_unstored_iter_col(col_ref: ColumnRef) -> bool:
176
172
  tbl = col_ref.col.tbl
177
- return tbl.is_component_view() and tbl.is_iterator_column(col_ref.col) and not col_ref.col.is_stored
173
+ return (
174
+ tbl.get().is_component_view and tbl.get().is_iterator_column(col_ref.col) and not col_ref.col.is_stored
175
+ )
178
176
 
179
177
  unstored_iter_col_refs = [col_ref for col_ref in col_refs if refs_unstored_iter_col(col_ref)]
180
178
  component_views = [col_ref.col.tbl for col_ref in unstored_iter_col_refs]
181
- unstored_iter_args = {view.id: view.iterator_args.copy() for view in component_views}
179
+ unstored_iter_args = {view.id: view.get().iterator_args.copy() for view in component_views}
182
180
  self.unstored_iter_args = {
183
181
  id: self._record_unique_expr(arg, recursive=True) for id, arg in unstored_iter_args.items()
184
182
  }
@@ -236,13 +234,6 @@ class RowBuilder:
236
234
  """Return ColumnSlotIdx for output columns"""
237
235
  return self.table_columns
238
236
 
239
- def set_conn(self, conn: sql.engine.Connection) -> None:
240
- from .function_call import FunctionCall
241
-
242
- for expr in self.unique_exprs:
243
- if isinstance(expr, FunctionCall) and isinstance(expr.fn, func.QueryTemplateFunction):
244
- expr.fn.set_conn(conn)
245
-
246
237
  @property
247
238
  def num_materialized(self) -> int:
248
239
  return self.next_slot_idx
@@ -373,8 +364,8 @@ class RowBuilder:
373
364
  def set_exc(self, data_row: DataRow, slot_idx: int, exc: Exception) -> None:
374
365
  """Record an exception in data_row and propagate it to dependents"""
375
366
  data_row.set_exc(slot_idx, exc)
376
- for slot_idx in self._exc_dependents[slot_idx]:
377
- data_row.set_exc(slot_idx, exc)
367
+ for idx in self._exc_dependents[slot_idx]:
368
+ data_row.set_exc(idx, exc)
378
369
 
379
370
  def eval(
380
371
  self,
@@ -432,7 +423,7 @@ class RowBuilder:
432
423
  else:
433
424
  if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
434
425
  # we have yet to store this image
435
- filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
426
+ filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.get().version))
436
427
  data_row.flush_img(slot_idx, filepath)
437
428
  val = data_row.get_stored_val(slot_idx, col.sa_col.type)
438
429
  table_row[col.store_name()] = val
@@ -1,12 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any, Optional, cast
4
4
  from uuid import UUID
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable.catalog as catalog
9
- import pixeltable.type_system as ts
8
+ from pixeltable import catalog, type_system as ts
10
9
 
11
10
  from .data_row import DataRow
12
11
  from .expr import Expr
@@ -23,9 +22,15 @@ class RowidRef(Expr):
23
22
  (with and without a TableVersion).
24
23
  """
25
24
 
25
+ tbl: Optional[catalog.TableVersionHandle]
26
+ normalized_base: Optional[catalog.TableVersionHandle]
27
+ tbl_id: UUID
28
+ normalized_base_id: UUID
29
+ rowid_component_idx: int
30
+
26
31
  def __init__(
27
32
  self,
28
- tbl: catalog.TableVersion,
33
+ tbl: catalog.TableVersionHandle,
29
34
  idx: int,
30
35
  tbl_id: Optional[UUID] = None,
31
36
  normalized_base_id: Optional[UUID] = None,
@@ -37,8 +42,8 @@ class RowidRef(Expr):
37
42
  # (which has the same values as all its descendent views)
38
43
  normalized_base = tbl
39
44
  # don't try to reference tbl.store_tbl here
40
- while normalized_base.base is not None and normalized_base.base.num_rowid_columns() > idx:
41
- normalized_base = normalized_base.base
45
+ while normalized_base.get().base is not None and normalized_base.get().base.get().num_rowid_columns() > idx:
46
+ normalized_base = normalized_base.get().base
42
47
  self.normalized_base = normalized_base
43
48
  else:
44
49
  self.normalized_base = None
@@ -59,15 +64,21 @@ class RowidRef(Expr):
59
64
  )
60
65
 
61
66
  def _id_attrs(self) -> list[tuple[str, Any]]:
62
- return super()._id_attrs() + [
67
+ return [
68
+ *super()._id_attrs(),
63
69
  ('normalized_base_id', self.normalized_base_id),
64
70
  ('idx', self.rowid_component_idx),
65
71
  ]
66
72
 
67
73
  def __repr__(self) -> str:
68
74
  # check if this is the pos column of a component view
69
- tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
70
- if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
75
+ from pixeltable import store
76
+
77
+ tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
78
+ if (
79
+ tbl.is_component_view
80
+ and self.rowid_component_idx == cast(store.StoreComponentView, tbl.store_tbl).pos_col_idx
81
+ ):
71
82
  return catalog.globals._POS_COLUMN_NAME
72
83
  return ''
73
84
 
@@ -85,7 +96,7 @@ class RowidRef(Expr):
85
96
  self.tbl_id = self.tbl.id
86
97
 
87
98
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
88
- tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
99
+ tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
89
100
  rowid_cols = tbl.store_tbl.rowid_columns()
90
101
  return rowid_cols[self.rowid_component_idx]
91
102