pixeltable 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (106) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -1
  4. pixeltable/catalog/catalog.py +63 -36
  5. pixeltable/catalog/column.py +6 -4
  6. pixeltable/catalog/dir.py +5 -5
  7. pixeltable/catalog/globals.py +12 -14
  8. pixeltable/catalog/insertable_table.py +4 -7
  9. pixeltable/catalog/path.py +2 -2
  10. pixeltable/catalog/table.py +64 -56
  11. pixeltable/catalog/table_version.py +42 -40
  12. pixeltable/catalog/table_version_handle.py +3 -0
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/view.py +8 -7
  15. pixeltable/dataframe.py +5 -3
  16. pixeltable/env.py +108 -42
  17. pixeltable/exec/__init__.py +2 -0
  18. pixeltable/exec/aggregation_node.py +6 -8
  19. pixeltable/exec/cache_prefetch_node.py +4 -7
  20. pixeltable/exec/component_iteration_node.py +1 -3
  21. pixeltable/exec/data_row_batch.py +1 -2
  22. pixeltable/exec/exec_context.py +1 -1
  23. pixeltable/exec/exec_node.py +1 -2
  24. pixeltable/exec/expr_eval/__init__.py +2 -0
  25. pixeltable/exec/expr_eval/evaluators.py +137 -20
  26. pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
  27. pixeltable/exec/expr_eval/globals.py +68 -7
  28. pixeltable/exec/expr_eval/schedulers.py +25 -23
  29. pixeltable/exec/in_memory_data_node.py +8 -6
  30. pixeltable/exec/row_update_node.py +3 -4
  31. pixeltable/exec/sql_node.py +16 -17
  32. pixeltable/exprs/__init__.py +1 -1
  33. pixeltable/exprs/column_property_ref.py +1 -1
  34. pixeltable/exprs/column_ref.py +3 -3
  35. pixeltable/exprs/compound_predicate.py +1 -1
  36. pixeltable/exprs/data_row.py +17 -1
  37. pixeltable/exprs/expr.py +12 -12
  38. pixeltable/exprs/function_call.py +34 -2
  39. pixeltable/exprs/json_mapper.py +95 -48
  40. pixeltable/exprs/json_path.py +3 -4
  41. pixeltable/exprs/method_ref.py +2 -2
  42. pixeltable/exprs/object_ref.py +2 -2
  43. pixeltable/exprs/row_builder.py +33 -6
  44. pixeltable/exprs/similarity_expr.py +1 -1
  45. pixeltable/exprs/sql_element_cache.py +1 -1
  46. pixeltable/exprs/string_op.py +2 -2
  47. pixeltable/ext/__init__.py +1 -1
  48. pixeltable/ext/functions/__init__.py +1 -1
  49. pixeltable/ext/functions/whisperx.py +1 -1
  50. pixeltable/ext/functions/yolox.py +1 -1
  51. pixeltable/func/aggregate_function.py +1 -1
  52. pixeltable/func/callable_function.py +2 -5
  53. pixeltable/func/expr_template_function.py +22 -2
  54. pixeltable/func/function.py +4 -5
  55. pixeltable/func/function_registry.py +1 -1
  56. pixeltable/func/signature.py +1 -1
  57. pixeltable/func/udf.py +2 -2
  58. pixeltable/functions/__init__.py +1 -1
  59. pixeltable/functions/anthropic.py +2 -2
  60. pixeltable/functions/audio.py +1 -1
  61. pixeltable/functions/deepseek.py +1 -1
  62. pixeltable/functions/fireworks.py +1 -1
  63. pixeltable/functions/globals.py +6 -6
  64. pixeltable/functions/huggingface.py +1 -1
  65. pixeltable/functions/image.py +1 -1
  66. pixeltable/functions/json.py +1 -1
  67. pixeltable/functions/llama_cpp.py +1 -1
  68. pixeltable/functions/math.py +1 -1
  69. pixeltable/functions/mistralai.py +1 -1
  70. pixeltable/functions/ollama.py +1 -1
  71. pixeltable/functions/openai.py +2 -2
  72. pixeltable/functions/replicate.py +1 -1
  73. pixeltable/functions/string.py +1 -1
  74. pixeltable/functions/timestamp.py +1 -1
  75. pixeltable/functions/together.py +1 -1
  76. pixeltable/functions/util.py +1 -1
  77. pixeltable/functions/video.py +2 -2
  78. pixeltable/functions/vision.py +2 -2
  79. pixeltable/index/embedding_index.py +12 -1
  80. pixeltable/io/__init__.py +5 -3
  81. pixeltable/io/fiftyone.py +6 -7
  82. pixeltable/io/label_studio.py +21 -20
  83. pixeltable/io/pandas.py +6 -5
  84. pixeltable/iterators/__init__.py +1 -1
  85. pixeltable/metadata/__init__.py +5 -3
  86. pixeltable/metadata/converters/convert_24.py +3 -3
  87. pixeltable/metadata/converters/convert_25.py +1 -1
  88. pixeltable/metadata/converters/convert_29.py +1 -1
  89. pixeltable/store.py +2 -2
  90. pixeltable/type_system.py +19 -7
  91. pixeltable/utils/console_output.py +3 -2
  92. pixeltable/utils/coroutine.py +3 -3
  93. pixeltable/utils/dbms.py +66 -0
  94. pixeltable/utils/documents.py +61 -67
  95. pixeltable/utils/filecache.py +1 -1
  96. pixeltable/utils/http_server.py +3 -2
  97. pixeltable/utils/pytorch.py +1 -1
  98. pixeltable/utils/sql.py +1 -1
  99. pixeltable-0.3.11.dist-info/METADATA +436 -0
  100. pixeltable-0.3.11.dist-info/RECORD +179 -0
  101. pixeltable/catalog/path_dict.py +0 -169
  102. pixeltable-0.3.10.dist-info/METADATA +0 -382
  103. pixeltable-0.3.10.dist-info/RECORD +0 -179
  104. {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
  105. {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +0 -0
  106. {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
@@ -11,7 +11,7 @@ from typing import Awaitable, Collection, Optional
11
11
  from pixeltable import env, func
12
12
  from pixeltable.config import Config
13
13
 
14
- from .globals import Dispatcher, FnCallArgs, Scheduler
14
+ from .globals import Dispatcher, ExecCtx, FnCallArgs, Scheduler
15
15
 
16
16
  _logger = logging.getLogger('pixeltable')
17
17
 
@@ -62,9 +62,6 @@ class RateLimitsScheduler(Scheduler):
62
62
  def matches(cls, resource_pool: str) -> bool:
63
63
  return resource_pool.startswith('rate-limits:')
64
64
 
65
- def submit(self, item: FnCallArgs) -> None:
66
- self.queue.put_nowait(self.QueueItem(item, 0))
67
-
68
65
  def _set_pool_info(self) -> None:
69
66
  """Initialize pool_info with the RateLimitsInfo for the resource pool, if available"""
70
67
  if self.pool_info is not None:
@@ -76,7 +73,7 @@ class RateLimitsScheduler(Scheduler):
76
73
  assert hasattr(self.pool_info, 'get_request_resources')
77
74
  sig = inspect.signature(self.pool_info.get_request_resources)
78
75
  self.get_request_resources_param_names = [p.name for p in sig.parameters.values()]
79
- self.est_usage = {r: 0 for r in self._resources}
76
+ self.est_usage = dict.fromkeys(self._resources, 0)
80
77
 
81
78
  async def _main_loop(self) -> None:
82
79
  item: Optional[RateLimitsScheduler.QueueItem] = None
@@ -90,7 +87,7 @@ class RateLimitsScheduler(Scheduler):
90
87
  if self.pool_info is None or not self.pool_info.is_initialized():
91
88
  # wait for a single request to get rate limits
92
89
  _logger.debug(f'initializing rate limits for {self.resource_pool}')
93
- await self._exec(item.request, item.num_retries, is_task=False)
90
+ await self._exec(item.request, item.exec_ctx, item.num_retries, is_task=False)
94
91
  _logger.debug(f'initialized rate limits for {self.resource_pool}')
95
92
  item = None
96
93
  # if this was the first request, it created the pool_info
@@ -141,7 +138,7 @@ class RateLimitsScheduler(Scheduler):
141
138
  self.est_usage[resource] += val
142
139
  _logger.debug(f'creating task for {self.resource_pool}')
143
140
  self.num_in_flight += 1
144
- task = asyncio.create_task(self._exec(item.request, item.num_retries, is_task=True))
141
+ task = asyncio.create_task(self._exec(item.request, item.exec_ctx, item.num_retries, is_task=True))
145
142
  self.dispatcher.register_task(task)
146
143
  item = None
147
144
 
@@ -171,7 +168,7 @@ class RateLimitsScheduler(Scheduler):
171
168
  return None
172
169
  return min(candidates, key=lambda x: x[1])[0]
173
170
 
174
- async def _exec(self, request: FnCallArgs, num_retries: int, is_task: bool) -> None:
171
+ async def _exec(self, request: FnCallArgs, exec_ctx: ExecCtx, num_retries: int, is_task: bool) -> None:
175
172
  assert all(not row.has_val[request.fn_call.slot_idx] for row in request.rows)
176
173
  assert all(not row.has_exc(request.fn_call.slot_idx) for row in request.rows)
177
174
 
@@ -180,7 +177,8 @@ class RateLimitsScheduler(Scheduler):
180
177
  pxt_fn = request.fn_call.fn
181
178
  assert isinstance(pxt_fn, func.CallableFunction)
182
179
  _logger.debug(
183
- f'scheduler {self.resource_pool}: start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
180
+ f'scheduler {self.resource_pool}: '
181
+ f'start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
184
182
  )
185
183
  self.total_requests += 1
186
184
  if request.is_batched:
@@ -193,13 +191,14 @@ class RateLimitsScheduler(Scheduler):
193
191
  request.row[request.fn_call.slot_idx] = result
194
192
  end_ts = datetime.datetime.now(tz=datetime.timezone.utc)
195
193
  _logger.debug(
196
- f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} in {end_ts - start_ts}, batch_size={len(request.rows)}'
194
+ f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} '
195
+ f'in {end_ts - start_ts}, batch_size={len(request.rows)}'
197
196
  )
198
197
 
199
198
  # purge accumulated usage estimate, now that we have a new report
200
- self.est_usage = {r: 0 for r in self._resources}
199
+ self.est_usage = dict.fromkeys(self._resources, 0)
201
200
 
202
- self.dispatcher.dispatch(request.rows)
201
+ self.dispatcher.dispatch(request.rows, exec_ctx)
203
202
  except Exception as exc:
204
203
  _logger.debug(f'scheduler {self.resource_pool}: exception in slot {request.fn_call.slot_idx}: {exc}')
205
204
  if self.pool_info is None:
@@ -212,7 +211,7 @@ class RateLimitsScheduler(Scheduler):
212
211
  self.total_retried += 1
213
212
  _logger.debug(f'scheduler {self.resource_pool}: retrying in {retry_delay} seconds')
214
213
  await asyncio.sleep(retry_delay)
215
- self.queue.put_nowait(self.QueueItem(request, num_retries + 1))
214
+ self.queue.put_nowait(self.QueueItem(request, num_retries + 1, exec_ctx))
216
215
  return
217
216
  # TODO: update resource limits reported in exc.response.headers, if present
218
217
 
@@ -220,7 +219,7 @@ class RateLimitsScheduler(Scheduler):
220
219
  _, _, exc_tb = sys.exc_info()
221
220
  for row in request.rows:
222
221
  row.set_exc(request.fn_call.slot_idx, exc)
223
- self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb)
222
+ self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb, exec_ctx)
224
223
  finally:
225
224
  _logger.debug(f'Scheduler stats: #requests={self.total_requests}, #retried={self.total_retried}')
226
225
  if is_task:
@@ -301,15 +300,15 @@ class RequestRateScheduler(Scheduler):
301
300
  if item.num_retries > 0:
302
301
  # the last request encountered some problem: retry it synchronously, to wait for the problem to pass
303
302
  _logger.debug(f'retrying request for {self.resource_pool}: #retries={item.num_retries}')
304
- await self._exec(item.request, item.num_retries, is_task=False)
303
+ await self._exec(item.request, item.exec_ctx, item.num_retries, is_task=False)
305
304
  _logger.debug(f'retried request for {self.resource_pool}: #retries={item.num_retries}')
306
305
  else:
307
306
  _logger.debug(f'creating task for {self.resource_pool}')
308
307
  self.num_in_flight += 1
309
- task = asyncio.create_task(self._exec(item.request, item.num_retries, is_task=True))
308
+ task = asyncio.create_task(self._exec(item.request, item.exec_ctx, item.num_retries, is_task=True))
310
309
  self.dispatcher.register_task(task)
311
310
 
312
- async def _exec(self, request: FnCallArgs, num_retries: int, is_task: bool) -> None:
311
+ async def _exec(self, request: FnCallArgs, exec_ctx: ExecCtx, num_retries: int, is_task: bool) -> None:
313
312
  assert all(not row.has_val[request.fn_call.slot_idx] for row in request.rows)
314
313
  assert all(not row.has_exc(request.fn_call.slot_idx) for row in request.rows)
315
314
 
@@ -318,7 +317,8 @@ class RequestRateScheduler(Scheduler):
318
317
  pxt_fn = request.fn_call.fn
319
318
  assert isinstance(pxt_fn, func.CallableFunction)
320
319
  _logger.debug(
321
- f'scheduler {self.resource_pool}: start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
320
+ f'scheduler {self.resource_pool}: '
321
+ f'start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
322
322
  )
323
323
  self.total_requests += 1
324
324
  if request.is_batched:
@@ -331,9 +331,10 @@ class RequestRateScheduler(Scheduler):
331
331
  request.row[request.fn_call.slot_idx] = result
332
332
  end_ts = datetime.datetime.now(tz=datetime.timezone.utc)
333
333
  _logger.debug(
334
- f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} in {end_ts - start_ts}, batch_size={len(request.rows)}'
334
+ f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} '
335
+ f'in {end_ts - start_ts}, batch_size={len(request.rows)}'
335
336
  )
336
- self.dispatcher.dispatch(request.rows)
337
+ self.dispatcher.dispatch(request.rows, exec_ctx)
337
338
 
338
339
  except Exception as exc:
339
340
  # TODO: which exception can be retried?
@@ -341,17 +342,18 @@ class RequestRateScheduler(Scheduler):
341
342
  status = getattr(exc, 'status', None)
342
343
  _logger.debug(f'type={type(exc)} has_status={hasattr(exc, "status")} status={status}')
343
344
  if num_retries < self.MAX_RETRIES:
344
- self.queue.put_nowait(self.QueueItem(request, num_retries + 1))
345
+ self.queue.put_nowait(self.QueueItem(request, num_retries + 1, exec_ctx))
345
346
  return
346
347
 
347
348
  # record the exception
348
349
  _, _, exc_tb = sys.exc_info()
349
350
  for row in request.rows:
350
351
  row.set_exc(request.fn_call.slot_idx, exc)
351
- self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb)
352
+ self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb, exec_ctx)
352
353
  finally:
353
354
  _logger.debug(
354
- f'Scheduler stats: #in-flight={self.num_in_flight} #requests={self.total_requests}, #retried={self.total_retried}'
355
+ f'Scheduler stats: #in-flight={self.num_in_flight} #requests={self.total_requests}, '
356
+ f'#retried={self.total_retried}'
355
357
  )
356
358
  if is_task:
357
359
  self.num_in_flight -= 1
@@ -1,8 +1,7 @@
1
1
  import logging
2
- from typing import Any, AsyncIterator, Iterator, Optional
2
+ from typing import Any, AsyncIterator, Optional
3
3
 
4
- import pixeltable.catalog as catalog
5
- import pixeltable.exprs as exprs
4
+ from pixeltable import catalog, exprs
6
5
  from pixeltable.utils.media_store import MediaStore
7
6
 
8
7
  from .data_row_batch import DataRowBatch
@@ -68,9 +67,12 @@ class InMemoryDataNode(ExecNode):
68
67
  if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
69
68
  # this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
70
69
  path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
71
- open(path, 'wb').write(val)
72
- val = path
73
- self.output_rows[row_idx][col_info.slot_idx] = val
70
+ with open(path, 'wb') as fp:
71
+ fp.write(val)
72
+ self.output_rows[row_idx][col_info.slot_idx] = path
73
+ else:
74
+ self.output_rows[row_idx][col_info.slot_idx] = val
75
+
74
76
  input_slot_idxs.add(col_info.slot_idx)
75
77
 
76
78
  # set the remaining output slots to their default values (presently None)
@@ -1,8 +1,7 @@
1
1
  import logging
2
2
  from typing import Any, AsyncIterator
3
3
 
4
- import pixeltable.catalog as catalog
5
- import pixeltable.exprs as exprs
4
+ from pixeltable import catalog, exprs
6
5
 
7
6
  from .data_row_batch import DataRowBatch
8
7
  from .exec_node import ExecNode
@@ -29,7 +28,7 @@ class RowUpdateNode(ExecNode):
29
28
  input: ExecNode,
30
29
  ):
31
30
  super().__init__(row_builder, [], [], input)
32
- self.updates = {key_vals: col_vals for key_vals, col_vals in zip(key_vals_batch, col_vals_batch)}
31
+ self.updates = dict(zip(key_vals_batch, col_vals_batch))
33
32
  self.is_rowid_key = is_rowid_key
34
33
  # determine slot idxs of all columns we need to read or write
35
34
  # retrieve ColumnRefs from the RowBuilder (has slot_idx set)
@@ -38,7 +37,7 @@ class RowUpdateNode(ExecNode):
38
37
  for col_ref in row_builder.unique_exprs
39
38
  if isinstance(col_ref, exprs.ColumnRef)
40
39
  }
41
- self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
40
+ self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0]}
42
41
  self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.get().primary_key_columns()}
43
42
  self.matched_key_vals: set[tuple] = set()
44
43
 
@@ -6,8 +6,7 @@ from uuid import UUID
6
6
 
7
7
  import sqlalchemy as sql
8
8
 
9
- import pixeltable.catalog as catalog
10
- import pixeltable.exprs as exprs
9
+ from pixeltable import catalog, exprs
11
10
  from pixeltable.env import Env
12
11
 
13
12
  from .data_row_batch import DataRowBatch
@@ -217,31 +216,31 @@ class SqlNode(ExecNode):
217
216
  candidates = tbl.get_tbl_versions()
218
217
  assert len(candidates) > 0
219
218
  joined_tbls: list[catalog.TableVersionHandle] = [candidates[0]]
220
- for tbl in candidates[1:]:
221
- if tbl.id in refd_tbl_ids:
222
- joined_tbls.append(tbl)
219
+ for t in candidates[1:]:
220
+ if t.id in refd_tbl_ids:
221
+ joined_tbls.append(t)
223
222
 
224
223
  first = True
225
- prev_tbl: catalog.TableVersionHandle
226
- for tbl in joined_tbls[::-1]:
224
+ prev_tbl: Optional[catalog.TableVersionHandle] = None
225
+ for t in joined_tbls[::-1]:
227
226
  if first:
228
- stmt = stmt.select_from(tbl.get().store_tbl.sa_tbl)
227
+ stmt = stmt.select_from(t.get().store_tbl.sa_tbl)
229
228
  first = False
230
229
  else:
231
230
  # join tbl to prev_tbl on prev_tbl's rowid cols
232
231
  prev_tbl_rowid_cols = prev_tbl.get().store_tbl.rowid_columns()
233
- tbl_rowid_cols = tbl.get().store_tbl.rowid_columns()
232
+ tbl_rowid_cols = t.get().store_tbl.rowid_columns()
234
233
  rowid_clauses = [
235
234
  c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[: len(prev_tbl_rowid_cols)])
236
235
  ]
237
- stmt = stmt.join(tbl.get().store_tbl.sa_tbl, sql.and_(*rowid_clauses))
238
- if tbl.id in exact_version_only:
239
- stmt = stmt.where(tbl.get().store_tbl.v_min_col == tbl.get().version)
236
+ stmt = stmt.join(t.get().store_tbl.sa_tbl, sql.and_(*rowid_clauses))
237
+ if t.id in exact_version_only:
238
+ stmt = stmt.where(t.get().store_tbl.v_min_col == t.get().version)
240
239
  else:
241
- stmt = stmt.where(tbl.get().store_tbl.v_min_col <= tbl.get().version).where(
242
- tbl.get().store_tbl.v_max_col > tbl.get().version
240
+ stmt = stmt.where(t.get().store_tbl.v_min_col <= t.get().version).where(
241
+ t.get().store_tbl.v_max_col > t.get().version
243
242
  )
244
- prev_tbl = tbl
243
+ prev_tbl = t
245
244
  return stmt
246
245
 
247
246
  def set_where(self, where_clause: exprs.Expr) -> None:
@@ -291,7 +290,7 @@ class SqlNode(ExecNode):
291
290
 
292
291
  conn = Env.get().conn
293
292
  result_cursor = conn.execute(stmt)
294
- for warning in w:
293
+ for _ in w:
295
294
  pass
296
295
 
297
296
  tbl_version = self.tbl.tbl_version if self.tbl is not None else None
@@ -494,7 +493,7 @@ class SqlJoinNode(SqlNode):
494
493
  if join_clause.join_type != plan.JoinType.CROSS
495
494
  else sql.sql.expression.literal(True)
496
495
  )
497
- is_outer = join_clause.join_type == plan.JoinType.LEFT or join_clause.join_type == plan.JoinType.FULL_OUTER
496
+ is_outer = join_clause.join_type in (plan.JoinType.LEFT, plan.JoinType.FULL_OUTER)
498
497
  stmt = stmt.join(
499
498
  self.input_ctes[i + 1],
500
499
  onclause=on_clause,
@@ -15,7 +15,7 @@ from .globals import ArithmeticOperator, ComparisonOperator, LogicalOperator
15
15
  from .in_predicate import InPredicate
16
16
  from .inline_expr import InlineArray, InlineDict, InlineList
17
17
  from .is_null import IsNull
18
- from .json_mapper import JsonMapper
18
+ from .json_mapper import JsonMapper, JsonMapperDispatch
19
19
  from .json_path import JsonPath
20
20
  from .literal import Literal
21
21
  from .method_ref import MethodRef
@@ -52,7 +52,7 @@ class ColumnPropertyRef(Expr):
52
52
  return f'{self._col_ref}.{self.prop.name.lower()}'
53
53
 
54
54
  def is_error_prop(self) -> bool:
55
- return self.prop in {self.Property.ERRORTYPE, self.Property.ERRORMSG}
55
+ return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
56
56
 
57
57
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
58
58
  if not self._col_ref.col.is_stored:
@@ -176,13 +176,13 @@ class ColumnRef(Expr):
176
176
  tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
177
177
  return tbl.select(self)
178
178
 
179
- def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
179
+ def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
180
180
  return self._df().show(*args, **kwargs)
181
181
 
182
- def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
182
+ def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
183
183
  return self._df().head(*args, **kwargs)
184
184
 
185
- def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
185
+ def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
186
186
  return self._df().tail(*args, **kwargs)
187
187
 
188
188
  def count(self) -> int:
@@ -61,7 +61,7 @@ class CompoundPredicate(Expr):
61
61
  return [*super()._id_attrs(), ('operator', self.operator.value)]
62
62
 
63
63
  def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
64
- if self.operator in {LogicalOperator.OR, LogicalOperator.NOT}:
64
+ if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
65
65
  return super().split_conjuncts(condition)
66
66
  matches = [op for op in self.components if condition(op)]
67
67
  non_matches = [op for op in self.components if not condition(op)]
@@ -63,11 +63,25 @@ class DataRow:
63
63
  # - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
64
64
  file_paths: np.ndarray # of str
65
65
 
66
- def __init__(self, size: int, img_slot_idxs: list[int], media_slot_idxs: list[int], array_slot_idxs: list[int]):
66
+ # for nested rows (ie, those produced by JsonMapperDispatcher)
67
+ parent_row: Optional[DataRow]
68
+ parent_slot_idx: Optional[int]
69
+
70
+ def __init__(
71
+ self,
72
+ size: int,
73
+ img_slot_idxs: list[int],
74
+ media_slot_idxs: list[int],
75
+ array_slot_idxs: list[int],
76
+ parent_row: Optional[DataRow] = None,
77
+ parent_slot_idx: Optional[int] = None,
78
+ ):
67
79
  self.img_slot_idxs = img_slot_idxs
68
80
  self.media_slot_idxs = media_slot_idxs
69
81
  self.array_slot_idxs = array_slot_idxs
70
82
  self.init(size)
83
+ self.parent_row = parent_row
84
+ self.parent_slot_idx = parent_slot_idx
71
85
 
72
86
  def init(self, num_slots: int) -> None:
73
87
  self.vals = np.full(num_slots, None, dtype=object)
@@ -79,6 +93,8 @@ class DataRow:
79
93
  self.pk = None
80
94
  self.file_urls = np.full(num_slots, None, dtype=object)
81
95
  self.file_paths = np.full(num_slots, None, dtype=object)
96
+ self.parent_row = None
97
+ self.parent_slot_idx = None
82
98
 
83
99
  def clear(self, idxs: Optional[np.ndarray] = None) -> None:
84
100
  if idxs is not None:
pixeltable/exprs/expr.py CHANGED
@@ -69,6 +69,8 @@ class Expr(abc.ABC):
69
69
  # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
70
70
  slot_idx: Optional[int]
71
71
 
72
+ T = TypeVar('T', bound='Expr')
73
+
72
74
  def __init__(self, col_type: ts.ColumnType):
73
75
  self.col_type = col_type
74
76
  self.components = []
@@ -97,9 +99,11 @@ class Expr(abc.ABC):
97
99
  by the immediately containing JsonMapper during initialization.
98
100
  """
99
101
  self._bind_rel_paths()
100
- assert not self._has_relative_path, self._expr_tree()
102
+ has_rel_path = self._has_relative_path()
103
+ assert not has_rel_path, self._expr_tree()
104
+ assert not self._has_relative_path(), self._expr_tree()
101
105
 
102
- def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
106
+ def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapperDispatch'] = None) -> None:
103
107
  for c in self.components:
104
108
  c._bind_rel_paths(mapper)
105
109
 
@@ -188,7 +192,7 @@ class Expr(abc.ABC):
188
192
  return False
189
193
  return all(a[i].equals(b[i]) for i in range(len(a)))
190
194
 
191
- def copy(self) -> Expr:
195
+ def copy(self: T) -> T:
192
196
  """
193
197
  Creates a copy that can be evaluated separately: it doesn't share any eval context (slot_idx)
194
198
  but shares everything else (catalog objects, etc.)
@@ -206,7 +210,7 @@ class Expr(abc.ABC):
206
210
  return None
207
211
  return [e.copy() for e in expr_list]
208
212
 
209
- def __deepcopy__(self, memo=None) -> Expr:
213
+ def __deepcopy__(self, memo: Optional[dict[int, Any]] = None) -> Expr:
210
214
  # we don't need to create an actual deep copy because all state other than execution state is read-only
211
215
  if memo is None:
212
216
  memo = {}
@@ -296,8 +300,6 @@ class Expr(abc.ABC):
296
300
  # instances of that subclass; and another that returns all subexpressions that match the given filter.
297
301
  # In order for type checking to behave correctly on both forms, we provide two overloaded signatures.
298
302
 
299
- T = TypeVar('T', bound='Expr')
300
-
301
303
  @overload
302
304
  def subexprs(
303
305
  self, *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
@@ -370,9 +372,8 @@ class Expr(abc.ABC):
370
372
  except StopIteration:
371
373
  return False
372
374
 
373
- @property
374
375
  def _has_relative_path(self) -> bool:
375
- return any(c._has_relative_path for c in self.components)
376
+ return any(c._has_relative_path() for c in self.components)
376
377
 
377
378
  def tbl_ids(self) -> set[UUID]:
378
379
  """Returns table ids referenced by this expr."""
@@ -459,7 +460,6 @@ class Expr(abc.ABC):
459
460
  return Literal(o, col_type=obj_type)
460
461
  return None
461
462
 
462
- @abc.abstractmethod
463
463
  def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
464
464
  """
465
465
  If this expr can be materialized directly in SQL:
@@ -469,7 +469,7 @@ class Expr(abc.ABC):
469
469
  - returns None
470
470
  - eval() will be called
471
471
  """
472
- pass
472
+ return None
473
473
 
474
474
  @abc.abstractmethod
475
475
  def eval(self, data_row: DataRow, row_builder: 'exprs.RowBuilder') -> None:
@@ -835,13 +835,13 @@ class Expr(abc.ABC):
835
835
  first_param = next(params_iter) if len(params) >= 1 else None
836
836
  second_param = next(params_iter) if len(params) >= 2 else None
837
837
  # Check that fn has at least one positional parameter
838
- if len(params) == 0 or first_param.kind in {inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD}:
838
+ if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
839
839
  raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
840
840
  # Check that fn has at most one required parameter, i.e., its second parameter
841
841
  # has no default and is not a varargs
842
842
  if (
843
843
  len(params) >= 2
844
- and second_param.kind not in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
844
+ and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
845
845
  and second_param.default is inspect.Parameter.empty
846
846
  ):
847
847
  raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
@@ -205,6 +205,10 @@ class FunctionCall(Expr):
205
205
  def has_group_by(self) -> bool:
206
206
  return self.group_by_stop_idx != 0
207
207
 
208
+ @property
209
+ def is_async(self) -> bool:
210
+ return self.fn.is_async
211
+
208
212
  @property
209
213
  def group_by(self) -> list[Expr]:
210
214
  return self.components[self.group_by_start_idx : self.group_by_stop_idx]
@@ -272,6 +276,34 @@ class FunctionCall(Expr):
272
276
  assert isinstance(self.fn, func.AggregateFunction)
273
277
  self.aggregator = self.fn.agg_class(**self.agg_init_args)
274
278
 
279
+ @property
280
+ def bound_args(self) -> dict[str, Expr]:
281
+ """
282
+ Reconstructs bound arguments from the components of this FunctionCall.
283
+ """
284
+ bound_args: dict[str, Expr] = {}
285
+ for name, idx in self.bound_idxs.items():
286
+ if isinstance(idx, int):
287
+ bound_args[name] = self.components[idx]
288
+ elif isinstance(idx, Sequence):
289
+ bound_args[name] = Expr.from_object([self.components[i] for i in idx])
290
+ elif isinstance(idx, dict):
291
+ bound_args[name] = Expr.from_object({k: self.components[i] for k, i in idx.items()})
292
+ else:
293
+ raise AssertionError(f'{name}: {idx} (of type `{type(idx)}`)')
294
+ return bound_args
295
+
296
+ def substitute(self, spec: dict[Expr, Expr]) -> Expr:
297
+ """
298
+ Substitution of FunctionCall arguments could cause the return value to become more specific, in the case
299
+ where a variable is replaced with a specific value.
300
+ """
301
+ res = super().substitute(spec)
302
+ assert res is self
303
+ self.return_type = self.fn.call_return_type(self.bound_args)
304
+ self.col_type = self.return_type
305
+ return self
306
+
275
307
  def update(self, data_row: DataRow) -> None:
276
308
  """
277
309
  Update agg state
@@ -289,7 +321,7 @@ class FunctionCall(Expr):
289
321
  if (
290
322
  val is None
291
323
  and parameters_by_pos[idx].kind
292
- in {inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD}
324
+ in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
293
325
  and not parameters_by_pos[idx].col_type.nullable
294
326
  ):
295
327
  return None
@@ -302,7 +334,7 @@ class FunctionCall(Expr):
302
334
  if (
303
335
  val is None
304
336
  and parameters[param_name].kind
305
- in {inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD}
337
+ in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
306
338
  and not parameters[param_name].col_type.nullable
307
339
  ):
308
340
  return None