pixeltable 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +63 -36
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +12 -14
- pixeltable/catalog/insertable_table.py +4 -7
- pixeltable/catalog/path.py +2 -2
- pixeltable/catalog/table.py +64 -56
- pixeltable/catalog/table_version.py +42 -40
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +8 -7
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -17
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +3 -4
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -5
- pixeltable/func/expr_template_function.py +22 -2
- pixeltable/func/function.py +4 -5
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/signature.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +5 -3
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.11.dist-info/METADATA +436 -0
- pixeltable-0.3.11.dist-info/RECORD +179 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
|
@@ -11,7 +11,7 @@ from typing import Awaitable, Collection, Optional
|
|
|
11
11
|
from pixeltable import env, func
|
|
12
12
|
from pixeltable.config import Config
|
|
13
13
|
|
|
14
|
-
from .globals import Dispatcher, FnCallArgs, Scheduler
|
|
14
|
+
from .globals import Dispatcher, ExecCtx, FnCallArgs, Scheduler
|
|
15
15
|
|
|
16
16
|
_logger = logging.getLogger('pixeltable')
|
|
17
17
|
|
|
@@ -62,9 +62,6 @@ class RateLimitsScheduler(Scheduler):
|
|
|
62
62
|
def matches(cls, resource_pool: str) -> bool:
|
|
63
63
|
return resource_pool.startswith('rate-limits:')
|
|
64
64
|
|
|
65
|
-
def submit(self, item: FnCallArgs) -> None:
|
|
66
|
-
self.queue.put_nowait(self.QueueItem(item, 0))
|
|
67
|
-
|
|
68
65
|
def _set_pool_info(self) -> None:
|
|
69
66
|
"""Initialize pool_info with the RateLimitsInfo for the resource pool, if available"""
|
|
70
67
|
if self.pool_info is not None:
|
|
@@ -76,7 +73,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
76
73
|
assert hasattr(self.pool_info, 'get_request_resources')
|
|
77
74
|
sig = inspect.signature(self.pool_info.get_request_resources)
|
|
78
75
|
self.get_request_resources_param_names = [p.name for p in sig.parameters.values()]
|
|
79
|
-
self.est_usage =
|
|
76
|
+
self.est_usage = dict.fromkeys(self._resources, 0)
|
|
80
77
|
|
|
81
78
|
async def _main_loop(self) -> None:
|
|
82
79
|
item: Optional[RateLimitsScheduler.QueueItem] = None
|
|
@@ -90,7 +87,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
90
87
|
if self.pool_info is None or not self.pool_info.is_initialized():
|
|
91
88
|
# wait for a single request to get rate limits
|
|
92
89
|
_logger.debug(f'initializing rate limits for {self.resource_pool}')
|
|
93
|
-
await self._exec(item.request, item.num_retries, is_task=False)
|
|
90
|
+
await self._exec(item.request, item.exec_ctx, item.num_retries, is_task=False)
|
|
94
91
|
_logger.debug(f'initialized rate limits for {self.resource_pool}')
|
|
95
92
|
item = None
|
|
96
93
|
# if this was the first request, it created the pool_info
|
|
@@ -141,7 +138,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
141
138
|
self.est_usage[resource] += val
|
|
142
139
|
_logger.debug(f'creating task for {self.resource_pool}')
|
|
143
140
|
self.num_in_flight += 1
|
|
144
|
-
task = asyncio.create_task(self._exec(item.request, item.num_retries, is_task=True))
|
|
141
|
+
task = asyncio.create_task(self._exec(item.request, item.exec_ctx, item.num_retries, is_task=True))
|
|
145
142
|
self.dispatcher.register_task(task)
|
|
146
143
|
item = None
|
|
147
144
|
|
|
@@ -171,7 +168,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
171
168
|
return None
|
|
172
169
|
return min(candidates, key=lambda x: x[1])[0]
|
|
173
170
|
|
|
174
|
-
async def _exec(self, request: FnCallArgs, num_retries: int, is_task: bool) -> None:
|
|
171
|
+
async def _exec(self, request: FnCallArgs, exec_ctx: ExecCtx, num_retries: int, is_task: bool) -> None:
|
|
175
172
|
assert all(not row.has_val[request.fn_call.slot_idx] for row in request.rows)
|
|
176
173
|
assert all(not row.has_exc(request.fn_call.slot_idx) for row in request.rows)
|
|
177
174
|
|
|
@@ -180,7 +177,8 @@ class RateLimitsScheduler(Scheduler):
|
|
|
180
177
|
pxt_fn = request.fn_call.fn
|
|
181
178
|
assert isinstance(pxt_fn, func.CallableFunction)
|
|
182
179
|
_logger.debug(
|
|
183
|
-
f'scheduler {self.resource_pool}:
|
|
180
|
+
f'scheduler {self.resource_pool}: '
|
|
181
|
+
f'start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
|
|
184
182
|
)
|
|
185
183
|
self.total_requests += 1
|
|
186
184
|
if request.is_batched:
|
|
@@ -193,13 +191,14 @@ class RateLimitsScheduler(Scheduler):
|
|
|
193
191
|
request.row[request.fn_call.slot_idx] = result
|
|
194
192
|
end_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
195
193
|
_logger.debug(
|
|
196
|
-
f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx}
|
|
194
|
+
f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} '
|
|
195
|
+
f'in {end_ts - start_ts}, batch_size={len(request.rows)}'
|
|
197
196
|
)
|
|
198
197
|
|
|
199
198
|
# purge accumulated usage estimate, now that we have a new report
|
|
200
|
-
self.est_usage =
|
|
199
|
+
self.est_usage = dict.fromkeys(self._resources, 0)
|
|
201
200
|
|
|
202
|
-
self.dispatcher.dispatch(request.rows)
|
|
201
|
+
self.dispatcher.dispatch(request.rows, exec_ctx)
|
|
203
202
|
except Exception as exc:
|
|
204
203
|
_logger.debug(f'scheduler {self.resource_pool}: exception in slot {request.fn_call.slot_idx}: {exc}')
|
|
205
204
|
if self.pool_info is None:
|
|
@@ -212,7 +211,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
212
211
|
self.total_retried += 1
|
|
213
212
|
_logger.debug(f'scheduler {self.resource_pool}: retrying in {retry_delay} seconds')
|
|
214
213
|
await asyncio.sleep(retry_delay)
|
|
215
|
-
self.queue.put_nowait(self.QueueItem(request, num_retries + 1))
|
|
214
|
+
self.queue.put_nowait(self.QueueItem(request, num_retries + 1, exec_ctx))
|
|
216
215
|
return
|
|
217
216
|
# TODO: update resource limits reported in exc.response.headers, if present
|
|
218
217
|
|
|
@@ -220,7 +219,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
220
219
|
_, _, exc_tb = sys.exc_info()
|
|
221
220
|
for row in request.rows:
|
|
222
221
|
row.set_exc(request.fn_call.slot_idx, exc)
|
|
223
|
-
self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb)
|
|
222
|
+
self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb, exec_ctx)
|
|
224
223
|
finally:
|
|
225
224
|
_logger.debug(f'Scheduler stats: #requests={self.total_requests}, #retried={self.total_retried}')
|
|
226
225
|
if is_task:
|
|
@@ -301,15 +300,15 @@ class RequestRateScheduler(Scheduler):
|
|
|
301
300
|
if item.num_retries > 0:
|
|
302
301
|
# the last request encountered some problem: retry it synchronously, to wait for the problem to pass
|
|
303
302
|
_logger.debug(f'retrying request for {self.resource_pool}: #retries={item.num_retries}')
|
|
304
|
-
await self._exec(item.request, item.num_retries, is_task=False)
|
|
303
|
+
await self._exec(item.request, item.exec_ctx, item.num_retries, is_task=False)
|
|
305
304
|
_logger.debug(f'retried request for {self.resource_pool}: #retries={item.num_retries}')
|
|
306
305
|
else:
|
|
307
306
|
_logger.debug(f'creating task for {self.resource_pool}')
|
|
308
307
|
self.num_in_flight += 1
|
|
309
|
-
task = asyncio.create_task(self._exec(item.request, item.num_retries, is_task=True))
|
|
308
|
+
task = asyncio.create_task(self._exec(item.request, item.exec_ctx, item.num_retries, is_task=True))
|
|
310
309
|
self.dispatcher.register_task(task)
|
|
311
310
|
|
|
312
|
-
async def _exec(self, request: FnCallArgs, num_retries: int, is_task: bool) -> None:
|
|
311
|
+
async def _exec(self, request: FnCallArgs, exec_ctx: ExecCtx, num_retries: int, is_task: bool) -> None:
|
|
313
312
|
assert all(not row.has_val[request.fn_call.slot_idx] for row in request.rows)
|
|
314
313
|
assert all(not row.has_exc(request.fn_call.slot_idx) for row in request.rows)
|
|
315
314
|
|
|
@@ -318,7 +317,8 @@ class RequestRateScheduler(Scheduler):
|
|
|
318
317
|
pxt_fn = request.fn_call.fn
|
|
319
318
|
assert isinstance(pxt_fn, func.CallableFunction)
|
|
320
319
|
_logger.debug(
|
|
321
|
-
f'scheduler {self.resource_pool}:
|
|
320
|
+
f'scheduler {self.resource_pool}: '
|
|
321
|
+
f'start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
|
|
322
322
|
)
|
|
323
323
|
self.total_requests += 1
|
|
324
324
|
if request.is_batched:
|
|
@@ -331,9 +331,10 @@ class RequestRateScheduler(Scheduler):
|
|
|
331
331
|
request.row[request.fn_call.slot_idx] = result
|
|
332
332
|
end_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
333
333
|
_logger.debug(
|
|
334
|
-
f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx}
|
|
334
|
+
f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} '
|
|
335
|
+
f'in {end_ts - start_ts}, batch_size={len(request.rows)}'
|
|
335
336
|
)
|
|
336
|
-
self.dispatcher.dispatch(request.rows)
|
|
337
|
+
self.dispatcher.dispatch(request.rows, exec_ctx)
|
|
337
338
|
|
|
338
339
|
except Exception as exc:
|
|
339
340
|
# TODO: which exception can be retried?
|
|
@@ -341,17 +342,18 @@ class RequestRateScheduler(Scheduler):
|
|
|
341
342
|
status = getattr(exc, 'status', None)
|
|
342
343
|
_logger.debug(f'type={type(exc)} has_status={hasattr(exc, "status")} status={status}')
|
|
343
344
|
if num_retries < self.MAX_RETRIES:
|
|
344
|
-
self.queue.put_nowait(self.QueueItem(request, num_retries + 1))
|
|
345
|
+
self.queue.put_nowait(self.QueueItem(request, num_retries + 1, exec_ctx))
|
|
345
346
|
return
|
|
346
347
|
|
|
347
348
|
# record the exception
|
|
348
349
|
_, _, exc_tb = sys.exc_info()
|
|
349
350
|
for row in request.rows:
|
|
350
351
|
row.set_exc(request.fn_call.slot_idx, exc)
|
|
351
|
-
self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb)
|
|
352
|
+
self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb, exec_ctx)
|
|
352
353
|
finally:
|
|
353
354
|
_logger.debug(
|
|
354
|
-
f'Scheduler stats: #in-flight={self.num_in_flight} #requests={self.total_requests},
|
|
355
|
+
f'Scheduler stats: #in-flight={self.num_in_flight} #requests={self.total_requests}, '
|
|
356
|
+
f'#retried={self.total_retried}'
|
|
355
357
|
)
|
|
356
358
|
if is_task:
|
|
357
359
|
self.num_in_flight -= 1
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, AsyncIterator,
|
|
2
|
+
from typing import Any, AsyncIterator, Optional
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
import pixeltable.exprs as exprs
|
|
4
|
+
from pixeltable import catalog, exprs
|
|
6
5
|
from pixeltable.utils.media_store import MediaStore
|
|
7
6
|
|
|
8
7
|
from .data_row_batch import DataRowBatch
|
|
@@ -68,9 +67,12 @@ class InMemoryDataNode(ExecNode):
|
|
|
68
67
|
if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
|
|
69
68
|
# this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
|
|
70
69
|
path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
|
|
71
|
-
open(path, 'wb')
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
with open(path, 'wb') as fp:
|
|
71
|
+
fp.write(val)
|
|
72
|
+
self.output_rows[row_idx][col_info.slot_idx] = path
|
|
73
|
+
else:
|
|
74
|
+
self.output_rows[row_idx][col_info.slot_idx] = val
|
|
75
|
+
|
|
74
76
|
input_slot_idxs.add(col_info.slot_idx)
|
|
75
77
|
|
|
76
78
|
# set the remaining output slots to their default values (presently None)
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, AsyncIterator
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
import pixeltable.exprs as exprs
|
|
4
|
+
from pixeltable import catalog, exprs
|
|
6
5
|
|
|
7
6
|
from .data_row_batch import DataRowBatch
|
|
8
7
|
from .exec_node import ExecNode
|
|
@@ -29,7 +28,7 @@ class RowUpdateNode(ExecNode):
|
|
|
29
28
|
input: ExecNode,
|
|
30
29
|
):
|
|
31
30
|
super().__init__(row_builder, [], [], input)
|
|
32
|
-
self.updates =
|
|
31
|
+
self.updates = dict(zip(key_vals_batch, col_vals_batch))
|
|
33
32
|
self.is_rowid_key = is_rowid_key
|
|
34
33
|
# determine slot idxs of all columns we need to read or write
|
|
35
34
|
# retrieve ColumnRefs from the RowBuilder (has slot_idx set)
|
|
@@ -38,7 +37,7 @@ class RowUpdateNode(ExecNode):
|
|
|
38
37
|
for col_ref in row_builder.unique_exprs
|
|
39
38
|
if isinstance(col_ref, exprs.ColumnRef)
|
|
40
39
|
}
|
|
41
|
-
self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0]
|
|
40
|
+
self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0]}
|
|
42
41
|
self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.get().primary_key_columns()}
|
|
43
42
|
self.matched_key_vals: set[tuple] = set()
|
|
44
43
|
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -6,8 +6,7 @@ from uuid import UUID
|
|
|
6
6
|
|
|
7
7
|
import sqlalchemy as sql
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
import pixeltable.exprs as exprs
|
|
9
|
+
from pixeltable import catalog, exprs
|
|
11
10
|
from pixeltable.env import Env
|
|
12
11
|
|
|
13
12
|
from .data_row_batch import DataRowBatch
|
|
@@ -217,31 +216,31 @@ class SqlNode(ExecNode):
|
|
|
217
216
|
candidates = tbl.get_tbl_versions()
|
|
218
217
|
assert len(candidates) > 0
|
|
219
218
|
joined_tbls: list[catalog.TableVersionHandle] = [candidates[0]]
|
|
220
|
-
for
|
|
221
|
-
if
|
|
222
|
-
joined_tbls.append(
|
|
219
|
+
for t in candidates[1:]:
|
|
220
|
+
if t.id in refd_tbl_ids:
|
|
221
|
+
joined_tbls.append(t)
|
|
223
222
|
|
|
224
223
|
first = True
|
|
225
|
-
prev_tbl: catalog.TableVersionHandle
|
|
226
|
-
for
|
|
224
|
+
prev_tbl: Optional[catalog.TableVersionHandle] = None
|
|
225
|
+
for t in joined_tbls[::-1]:
|
|
227
226
|
if first:
|
|
228
|
-
stmt = stmt.select_from(
|
|
227
|
+
stmt = stmt.select_from(t.get().store_tbl.sa_tbl)
|
|
229
228
|
first = False
|
|
230
229
|
else:
|
|
231
230
|
# join tbl to prev_tbl on prev_tbl's rowid cols
|
|
232
231
|
prev_tbl_rowid_cols = prev_tbl.get().store_tbl.rowid_columns()
|
|
233
|
-
tbl_rowid_cols =
|
|
232
|
+
tbl_rowid_cols = t.get().store_tbl.rowid_columns()
|
|
234
233
|
rowid_clauses = [
|
|
235
234
|
c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[: len(prev_tbl_rowid_cols)])
|
|
236
235
|
]
|
|
237
|
-
stmt = stmt.join(
|
|
238
|
-
if
|
|
239
|
-
stmt = stmt.where(
|
|
236
|
+
stmt = stmt.join(t.get().store_tbl.sa_tbl, sql.and_(*rowid_clauses))
|
|
237
|
+
if t.id in exact_version_only:
|
|
238
|
+
stmt = stmt.where(t.get().store_tbl.v_min_col == t.get().version)
|
|
240
239
|
else:
|
|
241
|
-
stmt = stmt.where(
|
|
242
|
-
|
|
240
|
+
stmt = stmt.where(t.get().store_tbl.v_min_col <= t.get().version).where(
|
|
241
|
+
t.get().store_tbl.v_max_col > t.get().version
|
|
243
242
|
)
|
|
244
|
-
prev_tbl =
|
|
243
|
+
prev_tbl = t
|
|
245
244
|
return stmt
|
|
246
245
|
|
|
247
246
|
def set_where(self, where_clause: exprs.Expr) -> None:
|
|
@@ -291,7 +290,7 @@ class SqlNode(ExecNode):
|
|
|
291
290
|
|
|
292
291
|
conn = Env.get().conn
|
|
293
292
|
result_cursor = conn.execute(stmt)
|
|
294
|
-
for
|
|
293
|
+
for _ in w:
|
|
295
294
|
pass
|
|
296
295
|
|
|
297
296
|
tbl_version = self.tbl.tbl_version if self.tbl is not None else None
|
|
@@ -494,7 +493,7 @@ class SqlJoinNode(SqlNode):
|
|
|
494
493
|
if join_clause.join_type != plan.JoinType.CROSS
|
|
495
494
|
else sql.sql.expression.literal(True)
|
|
496
495
|
)
|
|
497
|
-
is_outer = join_clause.join_type
|
|
496
|
+
is_outer = join_clause.join_type in (plan.JoinType.LEFT, plan.JoinType.FULL_OUTER)
|
|
498
497
|
stmt = stmt.join(
|
|
499
498
|
self.input_ctes[i + 1],
|
|
500
499
|
onclause=on_clause,
|
pixeltable/exprs/__init__.py
CHANGED
|
@@ -15,7 +15,7 @@ from .globals import ArithmeticOperator, ComparisonOperator, LogicalOperator
|
|
|
15
15
|
from .in_predicate import InPredicate
|
|
16
16
|
from .inline_expr import InlineArray, InlineDict, InlineList
|
|
17
17
|
from .is_null import IsNull
|
|
18
|
-
from .json_mapper import JsonMapper
|
|
18
|
+
from .json_mapper import JsonMapper, JsonMapperDispatch
|
|
19
19
|
from .json_path import JsonPath
|
|
20
20
|
from .literal import Literal
|
|
21
21
|
from .method_ref import MethodRef
|
|
@@ -52,7 +52,7 @@ class ColumnPropertyRef(Expr):
|
|
|
52
52
|
return f'{self._col_ref}.{self.prop.name.lower()}'
|
|
53
53
|
|
|
54
54
|
def is_error_prop(self) -> bool:
|
|
55
|
-
return self.prop in
|
|
55
|
+
return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
|
|
56
56
|
|
|
57
57
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
58
58
|
if not self._col_ref.col.is_stored:
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -176,13 +176,13 @@ class ColumnRef(Expr):
|
|
|
176
176
|
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
|
|
177
177
|
return tbl.select(self)
|
|
178
178
|
|
|
179
|
-
def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
179
|
+
def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
180
180
|
return self._df().show(*args, **kwargs)
|
|
181
181
|
|
|
182
|
-
def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
182
|
+
def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
183
183
|
return self._df().head(*args, **kwargs)
|
|
184
184
|
|
|
185
|
-
def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
|
|
185
|
+
def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
186
186
|
return self._df().tail(*args, **kwargs)
|
|
187
187
|
|
|
188
188
|
def count(self) -> int:
|
|
@@ -61,7 +61,7 @@ class CompoundPredicate(Expr):
|
|
|
61
61
|
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
62
|
|
|
63
63
|
def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
|
|
64
|
-
if self.operator in
|
|
64
|
+
if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
|
|
65
65
|
return super().split_conjuncts(condition)
|
|
66
66
|
matches = [op for op in self.components if condition(op)]
|
|
67
67
|
non_matches = [op for op in self.components if not condition(op)]
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -63,11 +63,25 @@ class DataRow:
|
|
|
63
63
|
# - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
|
|
64
64
|
file_paths: np.ndarray # of str
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
# for nested rows (ie, those produced by JsonMapperDispatcher)
|
|
67
|
+
parent_row: Optional[DataRow]
|
|
68
|
+
parent_slot_idx: Optional[int]
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
size: int,
|
|
73
|
+
img_slot_idxs: list[int],
|
|
74
|
+
media_slot_idxs: list[int],
|
|
75
|
+
array_slot_idxs: list[int],
|
|
76
|
+
parent_row: Optional[DataRow] = None,
|
|
77
|
+
parent_slot_idx: Optional[int] = None,
|
|
78
|
+
):
|
|
67
79
|
self.img_slot_idxs = img_slot_idxs
|
|
68
80
|
self.media_slot_idxs = media_slot_idxs
|
|
69
81
|
self.array_slot_idxs = array_slot_idxs
|
|
70
82
|
self.init(size)
|
|
83
|
+
self.parent_row = parent_row
|
|
84
|
+
self.parent_slot_idx = parent_slot_idx
|
|
71
85
|
|
|
72
86
|
def init(self, num_slots: int) -> None:
|
|
73
87
|
self.vals = np.full(num_slots, None, dtype=object)
|
|
@@ -79,6 +93,8 @@ class DataRow:
|
|
|
79
93
|
self.pk = None
|
|
80
94
|
self.file_urls = np.full(num_slots, None, dtype=object)
|
|
81
95
|
self.file_paths = np.full(num_slots, None, dtype=object)
|
|
96
|
+
self.parent_row = None
|
|
97
|
+
self.parent_slot_idx = None
|
|
82
98
|
|
|
83
99
|
def clear(self, idxs: Optional[np.ndarray] = None) -> None:
|
|
84
100
|
if idxs is not None:
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -69,6 +69,8 @@ class Expr(abc.ABC):
|
|
|
69
69
|
# - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
|
|
70
70
|
slot_idx: Optional[int]
|
|
71
71
|
|
|
72
|
+
T = TypeVar('T', bound='Expr')
|
|
73
|
+
|
|
72
74
|
def __init__(self, col_type: ts.ColumnType):
|
|
73
75
|
self.col_type = col_type
|
|
74
76
|
self.components = []
|
|
@@ -97,9 +99,11 @@ class Expr(abc.ABC):
|
|
|
97
99
|
by the immediately containing JsonMapper during initialization.
|
|
98
100
|
"""
|
|
99
101
|
self._bind_rel_paths()
|
|
100
|
-
|
|
102
|
+
has_rel_path = self._has_relative_path()
|
|
103
|
+
assert not has_rel_path, self._expr_tree()
|
|
104
|
+
assert not self._has_relative_path(), self._expr_tree()
|
|
101
105
|
|
|
102
|
-
def _bind_rel_paths(self, mapper: Optional['exprs.
|
|
106
|
+
def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapperDispatch'] = None) -> None:
|
|
103
107
|
for c in self.components:
|
|
104
108
|
c._bind_rel_paths(mapper)
|
|
105
109
|
|
|
@@ -188,7 +192,7 @@ class Expr(abc.ABC):
|
|
|
188
192
|
return False
|
|
189
193
|
return all(a[i].equals(b[i]) for i in range(len(a)))
|
|
190
194
|
|
|
191
|
-
def copy(self) ->
|
|
195
|
+
def copy(self: T) -> T:
|
|
192
196
|
"""
|
|
193
197
|
Creates a copy that can be evaluated separately: it doesn't share any eval context (slot_idx)
|
|
194
198
|
but shares everything else (catalog objects, etc.)
|
|
@@ -206,7 +210,7 @@ class Expr(abc.ABC):
|
|
|
206
210
|
return None
|
|
207
211
|
return [e.copy() for e in expr_list]
|
|
208
212
|
|
|
209
|
-
def __deepcopy__(self, memo=None) -> Expr:
|
|
213
|
+
def __deepcopy__(self, memo: Optional[dict[int, Any]] = None) -> Expr:
|
|
210
214
|
# we don't need to create an actual deep copy because all state other than execution state is read-only
|
|
211
215
|
if memo is None:
|
|
212
216
|
memo = {}
|
|
@@ -296,8 +300,6 @@ class Expr(abc.ABC):
|
|
|
296
300
|
# instances of that subclass; and another that returns all subexpressions that match the given filter.
|
|
297
301
|
# In order for type checking to behave correctly on both forms, we provide two overloaded signatures.
|
|
298
302
|
|
|
299
|
-
T = TypeVar('T', bound='Expr')
|
|
300
|
-
|
|
301
303
|
@overload
|
|
302
304
|
def subexprs(
|
|
303
305
|
self, *, filter: Optional[Callable[[Expr], bool]] = None, traverse_matches: bool = True
|
|
@@ -370,9 +372,8 @@ class Expr(abc.ABC):
|
|
|
370
372
|
except StopIteration:
|
|
371
373
|
return False
|
|
372
374
|
|
|
373
|
-
@property
|
|
374
375
|
def _has_relative_path(self) -> bool:
|
|
375
|
-
return any(c._has_relative_path for c in self.components)
|
|
376
|
+
return any(c._has_relative_path() for c in self.components)
|
|
376
377
|
|
|
377
378
|
def tbl_ids(self) -> set[UUID]:
|
|
378
379
|
"""Returns table ids referenced by this expr."""
|
|
@@ -459,7 +460,6 @@ class Expr(abc.ABC):
|
|
|
459
460
|
return Literal(o, col_type=obj_type)
|
|
460
461
|
return None
|
|
461
462
|
|
|
462
|
-
@abc.abstractmethod
|
|
463
463
|
def sql_expr(self, sql_elements: 'exprs.SqlElementCache') -> Optional[sql.ColumnElement]:
|
|
464
464
|
"""
|
|
465
465
|
If this expr can be materialized directly in SQL:
|
|
@@ -469,7 +469,7 @@ class Expr(abc.ABC):
|
|
|
469
469
|
- returns None
|
|
470
470
|
- eval() will be called
|
|
471
471
|
"""
|
|
472
|
-
|
|
472
|
+
return None
|
|
473
473
|
|
|
474
474
|
@abc.abstractmethod
|
|
475
475
|
def eval(self, data_row: DataRow, row_builder: 'exprs.RowBuilder') -> None:
|
|
@@ -835,13 +835,13 @@ class Expr(abc.ABC):
|
|
|
835
835
|
first_param = next(params_iter) if len(params) >= 1 else None
|
|
836
836
|
second_param = next(params_iter) if len(params) >= 2 else None
|
|
837
837
|
# Check that fn has at least one positional parameter
|
|
838
|
-
if len(params) == 0 or first_param.kind in
|
|
838
|
+
if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
|
|
839
839
|
raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
|
|
840
840
|
# Check that fn has at most one required parameter, i.e., its second parameter
|
|
841
841
|
# has no default and is not a varargs
|
|
842
842
|
if (
|
|
843
843
|
len(params) >= 2
|
|
844
|
-
and second_param.kind not in
|
|
844
|
+
and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
845
845
|
and second_param.default is inspect.Parameter.empty
|
|
846
846
|
):
|
|
847
847
|
raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
|
|
@@ -205,6 +205,10 @@ class FunctionCall(Expr):
|
|
|
205
205
|
def has_group_by(self) -> bool:
|
|
206
206
|
return self.group_by_stop_idx != 0
|
|
207
207
|
|
|
208
|
+
@property
|
|
209
|
+
def is_async(self) -> bool:
|
|
210
|
+
return self.fn.is_async
|
|
211
|
+
|
|
208
212
|
@property
|
|
209
213
|
def group_by(self) -> list[Expr]:
|
|
210
214
|
return self.components[self.group_by_start_idx : self.group_by_stop_idx]
|
|
@@ -272,6 +276,34 @@ class FunctionCall(Expr):
|
|
|
272
276
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
273
277
|
self.aggregator = self.fn.agg_class(**self.agg_init_args)
|
|
274
278
|
|
|
279
|
+
@property
|
|
280
|
+
def bound_args(self) -> dict[str, Expr]:
|
|
281
|
+
"""
|
|
282
|
+
Reconstructs bound arguments from the components of this FunctionCall.
|
|
283
|
+
"""
|
|
284
|
+
bound_args: dict[str, Expr] = {}
|
|
285
|
+
for name, idx in self.bound_idxs.items():
|
|
286
|
+
if isinstance(idx, int):
|
|
287
|
+
bound_args[name] = self.components[idx]
|
|
288
|
+
elif isinstance(idx, Sequence):
|
|
289
|
+
bound_args[name] = Expr.from_object([self.components[i] for i in idx])
|
|
290
|
+
elif isinstance(idx, dict):
|
|
291
|
+
bound_args[name] = Expr.from_object({k: self.components[i] for k, i in idx.items()})
|
|
292
|
+
else:
|
|
293
|
+
raise AssertionError(f'{name}: {idx} (of type `{type(idx)}`)')
|
|
294
|
+
return bound_args
|
|
295
|
+
|
|
296
|
+
def substitute(self, spec: dict[Expr, Expr]) -> Expr:
|
|
297
|
+
"""
|
|
298
|
+
Substitution of FunctionCall arguments could cause the return value to become more specific, in the case
|
|
299
|
+
where a variable is replaced with a specific value.
|
|
300
|
+
"""
|
|
301
|
+
res = super().substitute(spec)
|
|
302
|
+
assert res is self
|
|
303
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
304
|
+
self.col_type = self.return_type
|
|
305
|
+
return self
|
|
306
|
+
|
|
275
307
|
def update(self, data_row: DataRow) -> None:
|
|
276
308
|
"""
|
|
277
309
|
Update agg state
|
|
@@ -289,7 +321,7 @@ class FunctionCall(Expr):
|
|
|
289
321
|
if (
|
|
290
322
|
val is None
|
|
291
323
|
and parameters_by_pos[idx].kind
|
|
292
|
-
in
|
|
324
|
+
in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
293
325
|
and not parameters_by_pos[idx].col_type.nullable
|
|
294
326
|
):
|
|
295
327
|
return None
|
|
@@ -302,7 +334,7 @@ class FunctionCall(Expr):
|
|
|
302
334
|
if (
|
|
303
335
|
val is None
|
|
304
336
|
and parameters[param_name].kind
|
|
305
|
-
in
|
|
337
|
+
in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
306
338
|
and not parameters[param_name].col_type.nullable
|
|
307
339
|
):
|
|
308
340
|
return None
|