pixeltable 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +25 -15
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +123 -103
- pixeltable/catalog/table_version.py +292 -143
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +68 -27
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +39 -23
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +18 -17
- pixeltable/exec/expr_eval/expr_eval_node.py +29 -16
- pixeltable/exec/expr_eval/globals.py +33 -11
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +170 -42
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +31 -16
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +21 -15
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +214 -109
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +61 -28
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +3 -2
- pixeltable/io/label_studio.py +80 -71
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +10 -13
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +9 -2
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +130 -85
- pixeltable/utils/arrow.py +1 -7
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +44 -0
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +13 -8
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/METADATA +7 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.1.dist-info/RECORD +0 -160
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
|
@@ -4,24 +4,23 @@ import asyncio
|
|
|
4
4
|
import logging
|
|
5
5
|
import traceback
|
|
6
6
|
from types import TracebackType
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import AsyncIterator, Iterable, Optional, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
11
|
import pixeltable.exceptions as excs
|
|
12
|
-
from pixeltable import exprs
|
|
13
|
-
|
|
12
|
+
from pixeltable import exprs, func
|
|
13
|
+
|
|
14
|
+
from ..data_row_batch import DataRowBatch
|
|
15
|
+
from ..exec_node import ExecNode
|
|
14
16
|
from .evaluators import DefaultExprEvaluator, FnCallEvaluator
|
|
15
17
|
from .globals import Evaluator, Scheduler
|
|
16
18
|
from .row_buffer import RowBuffer
|
|
17
19
|
from .schedulers import SCHEDULERS
|
|
18
|
-
from ..data_row_batch import DataRowBatch
|
|
19
|
-
from ..exec_node import ExecNode
|
|
20
20
|
|
|
21
21
|
_logger = logging.getLogger('pixeltable')
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
|
|
25
24
|
class ExprEvalNode(ExecNode):
|
|
26
25
|
"""
|
|
27
26
|
Expression evaluation
|
|
@@ -35,10 +34,13 @@ class ExprEvalNode(ExecNode):
|
|
|
35
34
|
TODO:
|
|
36
35
|
- Literal handling: currently, Literal values are copied into slots via the normal evaluation mechanism, which is
|
|
37
36
|
needless overhead; instead: pre-populate Literal slots in _init_row()
|
|
37
|
+
- dynamically determine MAX_BUFFERED_ROWS, based on the avg memory consumption of a row and our configured memory
|
|
38
|
+
limit
|
|
38
39
|
- local model inference on gpu: currently, no attempt is made to ensure that models can fit onto the gpu
|
|
39
40
|
simultaneously, which will cause errors; instead, the execution should be divided into sequential phases, each
|
|
40
41
|
of which only contains a subset of the models which is known to fit onto the gpu simultaneously
|
|
41
42
|
"""
|
|
43
|
+
|
|
42
44
|
maintain_input_order: bool # True if we're returning rows in the order we received them from our input
|
|
43
45
|
num_dependencies: np.ndarray # number of dependencies for our output slots; indexed by slot idx
|
|
44
46
|
outputs: np.ndarray # bool per slot; True if this slot is part of our output
|
|
@@ -68,11 +70,15 @@ class ExprEvalNode(ExecNode):
|
|
|
68
70
|
num_output_rows: int
|
|
69
71
|
|
|
70
72
|
BATCH_SIZE = 64
|
|
71
|
-
MAX_BUFFERED_ROWS =
|
|
73
|
+
MAX_BUFFERED_ROWS = 2048 # maximum number of rows that have been dispatched but not yet returned
|
|
72
74
|
|
|
73
75
|
def __init__(
|
|
74
|
-
self,
|
|
75
|
-
|
|
76
|
+
self,
|
|
77
|
+
row_builder: exprs.RowBuilder,
|
|
78
|
+
output_exprs: Iterable[exprs.Expr],
|
|
79
|
+
input_exprs: Iterable[exprs.Expr],
|
|
80
|
+
input: ExecNode,
|
|
81
|
+
maintain_input_order: bool = True,
|
|
76
82
|
):
|
|
77
83
|
super().__init__(row_builder, output_exprs, input_exprs, input)
|
|
78
84
|
self.maintain_input_order = maintain_input_order
|
|
@@ -148,7 +154,9 @@ class ExprEvalNode(ExecNode):
|
|
|
148
154
|
self.row_pos_map[id(row)] = self.num_input_rows + idx
|
|
149
155
|
self.num_input_rows += len(batch)
|
|
150
156
|
self.avail_input_rows += len(batch)
|
|
151
|
-
_logger.debug(
|
|
157
|
+
_logger.debug(
|
|
158
|
+
f'adding input: batch_size={len(batch)} #input_rows={self.num_input_rows} #avail={self.avail_input_rows}'
|
|
159
|
+
)
|
|
152
160
|
except StopAsyncIteration:
|
|
153
161
|
self.input_complete = True
|
|
154
162
|
_logger.debug(f'finished input: #input_rows={self.num_input_rows}, #avail={self.avail_input_rows}')
|
|
@@ -175,11 +183,11 @@ class ExprEvalNode(ExecNode):
|
|
|
175
183
|
rows: list[exprs.DataRow]
|
|
176
184
|
if avail_current_batch_rows > num_rows:
|
|
177
185
|
# we only need rows from current_input_batch
|
|
178
|
-
rows = self.current_input_batch.rows[self.input_row_idx:self.input_row_idx + num_rows]
|
|
186
|
+
rows = self.current_input_batch.rows[self.input_row_idx : self.input_row_idx + num_rows]
|
|
179
187
|
self.input_row_idx += num_rows
|
|
180
188
|
else:
|
|
181
189
|
# we need rows from both current_/next_input_batch
|
|
182
|
-
rows = self.current_input_batch.rows[self.input_row_idx:]
|
|
190
|
+
rows = self.current_input_batch.rows[self.input_row_idx :]
|
|
183
191
|
self.current_input_batch = self.next_input_batch
|
|
184
192
|
self.next_input_batch = None
|
|
185
193
|
self.input_row_idx = 0
|
|
@@ -236,6 +244,7 @@ class ExprEvalNode(ExecNode):
|
|
|
236
244
|
exc_event_aw = asyncio.create_task(self.exc_event.wait(), name='exc_event.wait()')
|
|
237
245
|
input_batch_aw: Optional[asyncio.Task] = None
|
|
238
246
|
completed_aw: Optional[asyncio.Task] = None
|
|
247
|
+
closed_evaluators = False # True after calling Evaluator.close()
|
|
239
248
|
|
|
240
249
|
try:
|
|
241
250
|
while True:
|
|
@@ -275,11 +284,12 @@ class ExprEvalNode(ExecNode):
|
|
|
275
284
|
assert self.output_buffer.num_rows == 0
|
|
276
285
|
return
|
|
277
286
|
|
|
278
|
-
if self.input_complete and self.avail_input_rows == 0:
|
|
287
|
+
if self.input_complete and self.avail_input_rows == 0 and not closed_evaluators:
|
|
279
288
|
# no more input rows to dispatch, but we're still waiting for rows to finish:
|
|
280
289
|
# close all slot evaluators to flush queued rows
|
|
281
290
|
for evaluator in self.slot_evaluators.values():
|
|
282
291
|
evaluator.close()
|
|
292
|
+
closed_evaluators = True
|
|
283
293
|
|
|
284
294
|
# we don't have a full batch of rows at this point and need to wait
|
|
285
295
|
aws = {exc_event_aw} # always wait for an exception
|
|
@@ -335,8 +345,7 @@ class ExprEvalNode(ExecNode):
|
|
|
335
345
|
first_row = rows[0]
|
|
336
346
|
input_vals = [first_row[idx] for idx in dependency_idxs]
|
|
337
347
|
e = self.row_builder.unique_exprs[slot_with_exc]
|
|
338
|
-
self.error = excs.ExprEvalError(
|
|
339
|
-
e, f'expression {e}', first_row.get_exc(e.slot_idx), exc_tb, input_vals, 0)
|
|
348
|
+
self.error = excs.ExprEvalError(e, f'expression {e}', first_row.get_exc(e.slot_idx), exc_tb, input_vals, 0)
|
|
340
349
|
self.exc_event.set()
|
|
341
350
|
return
|
|
342
351
|
|
|
@@ -391,7 +400,11 @@ class ExprEvalNode(ExecNode):
|
|
|
391
400
|
_logger.debug(f'Scheduling {len(ready_rows)} rows for slot {slot_idx}')
|
|
392
401
|
self.slot_evaluators[slot_idx].schedule(ready_rows, slot_idx)
|
|
393
402
|
|
|
394
|
-
def
|
|
403
|
+
def register_task(self, t: asyncio.Task) -> None:
|
|
404
|
+
self.tasks.add(t)
|
|
405
|
+
t.add_done_callback(self._done_cb)
|
|
406
|
+
|
|
407
|
+
def _done_cb(self, t: asyncio.Task) -> None:
|
|
395
408
|
self.tasks.discard(t)
|
|
396
409
|
# end the main loop if we had an unhandled exception
|
|
397
410
|
try:
|
|
@@ -1,16 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import abc
|
|
2
4
|
import asyncio
|
|
3
5
|
from dataclasses import dataclass
|
|
4
6
|
from types import TracebackType
|
|
5
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional, Protocol
|
|
6
8
|
|
|
7
|
-
from pixeltable import exprs
|
|
8
|
-
from pixeltable import func
|
|
9
|
+
from pixeltable import exprs, func
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@dataclass
|
|
12
13
|
class FnCallArgs:
|
|
13
14
|
"""Container for everything needed to execute a FunctionCall against one or more DataRows"""
|
|
15
|
+
|
|
14
16
|
fn_call: exprs.FunctionCall
|
|
15
17
|
rows: list[exprs.DataRow]
|
|
16
18
|
# single call
|
|
@@ -37,16 +39,36 @@ class FnCallArgs:
|
|
|
37
39
|
|
|
38
40
|
class Scheduler(abc.ABC):
|
|
39
41
|
"""
|
|
40
|
-
Base class for schedulers. A scheduler executes FunctionCalls against a limited resource pool.
|
|
42
|
+
Base class for queueing schedulers. A scheduler executes FunctionCalls against a limited resource pool.
|
|
41
43
|
|
|
42
44
|
Expected behavior:
|
|
43
45
|
- all created tasks must be recorded in dispatcher.tasks
|
|
44
46
|
- schedulers are responsible for aborting execution when a) the task is cancelled or b) when an exception occurred
|
|
45
47
|
elsewhere (indicated by dispatcher.exc_event)
|
|
46
48
|
"""
|
|
47
|
-
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class QueueItem:
|
|
52
|
+
"""Container of work items for queueing schedulers"""
|
|
53
|
+
|
|
54
|
+
request: FnCallArgs
|
|
55
|
+
num_retries: int
|
|
56
|
+
|
|
57
|
+
def __lt__(self, other: Scheduler.QueueItem) -> bool:
|
|
58
|
+
# prioritize by number of retries (more retries = higher priority)
|
|
59
|
+
return self.num_retries > other.num_retries
|
|
60
|
+
|
|
61
|
+
resource_pool: str
|
|
62
|
+
queue: asyncio.PriorityQueue[QueueItem] # prioritizes retries
|
|
63
|
+
dispatcher: Dispatcher
|
|
64
|
+
|
|
65
|
+
def __init__(self, resource_pool: str, dispatcher: Dispatcher):
|
|
66
|
+
self.resource_pool = resource_pool
|
|
67
|
+
self.queue = asyncio.PriorityQueue()
|
|
68
|
+
self.dispatcher = dispatcher
|
|
69
|
+
|
|
48
70
|
def submit(self, item: FnCallArgs) -> None:
|
|
49
|
-
|
|
71
|
+
self.queue.put_nowait(self.QueueItem(item, 0))
|
|
50
72
|
|
|
51
73
|
@classmethod
|
|
52
74
|
@abc.abstractmethod
|
|
@@ -59,11 +81,11 @@ class Dispatcher(Protocol):
|
|
|
59
81
|
"""
|
|
60
82
|
Row dispatcher used by Evaluators/Schedulers for post-processing after slot materialization and for task management.
|
|
61
83
|
|
|
62
|
-
Task management: all tasks need to be
|
|
84
|
+
Task management: all tasks need to be registered via register_task()
|
|
63
85
|
Exceptions: evaluators/schedulers need to check exc_event prior to starting long-running (non-interruptible)
|
|
64
86
|
computations
|
|
65
87
|
"""
|
|
66
|
-
|
|
88
|
+
|
|
67
89
|
row_builder: exprs.RowBuilder
|
|
68
90
|
exc_event: asyncio.Event
|
|
69
91
|
schedulers: dict[str, Scheduler] # key: resource pool id
|
|
@@ -76,8 +98,8 @@ class Dispatcher(Protocol):
|
|
|
76
98
|
"""Propagates exception in slot_with_exc to all dependent slots and dispatches the rest; does not block"""
|
|
77
99
|
...
|
|
78
100
|
|
|
79
|
-
def
|
|
80
|
-
"""
|
|
101
|
+
def register_task(self, f: asyncio.Task) -> None:
|
|
102
|
+
"""Register task with dispatcher for subsequent cleanup; does not block"""
|
|
81
103
|
...
|
|
82
104
|
|
|
83
105
|
|
|
@@ -91,6 +113,7 @@ class Evaluator(abc.ABC):
|
|
|
91
113
|
- evaluators are responsible for aborting execution when a) the task is cancelled or b) when an exception occurred
|
|
92
114
|
elsewhere (indicated by dispatcher.exc_event)
|
|
93
115
|
"""
|
|
116
|
+
|
|
94
117
|
dispatcher: Dispatcher
|
|
95
118
|
is_closed: bool
|
|
96
119
|
|
|
@@ -111,4 +134,3 @@ class Evaluator(abc.ABC):
|
|
|
111
134
|
"""Indicates that there may not be any more rows getting scheduled"""
|
|
112
135
|
self.is_closed = True
|
|
113
136
|
self._close()
|
|
114
|
-
|
|
@@ -62,15 +62,14 @@ class RowBuffer:
|
|
|
62
62
|
return []
|
|
63
63
|
rows: list[exprs.DataRow]
|
|
64
64
|
if self.head_idx + n <= self.size:
|
|
65
|
-
rows = self.buffer[self.head_idx:self.head_idx + n].tolist()
|
|
66
|
-
self.buffer[self.head_idx:self.head_idx + n] = None
|
|
65
|
+
rows = self.buffer[self.head_idx : self.head_idx + n].tolist()
|
|
66
|
+
self.buffer[self.head_idx : self.head_idx + n] = None
|
|
67
67
|
else:
|
|
68
|
-
rows = np.concatenate([self.buffer[self.head_idx:], self.buffer[:self.head_idx + n - self.size]]).tolist()
|
|
69
|
-
self.buffer[self.head_idx:] = None
|
|
70
|
-
self.buffer[:self.head_idx + n - self.size] = None
|
|
68
|
+
rows = np.concatenate([self.buffer[self.head_idx :], self.buffer[: self.head_idx + n - self.size]]).tolist()
|
|
69
|
+
self.buffer[self.head_idx :] = None
|
|
70
|
+
self.buffer[: self.head_idx + n - self.size] = None
|
|
71
71
|
self.head_pos += n
|
|
72
72
|
self.head_idx = (self.head_idx + n) % self.size
|
|
73
73
|
self.num_rows -= n
|
|
74
74
|
self.num_ready -= n
|
|
75
75
|
return rows
|
|
76
|
-
|
|
@@ -5,12 +5,12 @@ import datetime
|
|
|
5
5
|
import inspect
|
|
6
6
|
import logging
|
|
7
7
|
import sys
|
|
8
|
-
|
|
9
|
-
from typing import
|
|
8
|
+
import time
|
|
9
|
+
from typing import Awaitable, Collection, Optional
|
|
10
10
|
|
|
11
|
-
from pixeltable import env
|
|
12
|
-
|
|
13
|
-
from .globals import
|
|
11
|
+
from pixeltable import env, func
|
|
12
|
+
|
|
13
|
+
from .globals import Dispatcher, FnCallArgs, Scheduler
|
|
14
14
|
|
|
15
15
|
_logger = logging.getLogger('pixeltable')
|
|
16
16
|
|
|
@@ -29,19 +29,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
29
29
|
TODO:
|
|
30
30
|
- limit the number of in-flight requests based on the open file limit
|
|
31
31
|
"""
|
|
32
|
-
|
|
33
|
-
class QueueItem:
|
|
34
|
-
request: FnCallArgs
|
|
35
|
-
num_retries: int
|
|
36
|
-
|
|
37
|
-
def __lt__(self, other: RateLimitsScheduler.QueueItem) -> bool:
|
|
38
|
-
# prioritize by number of retries
|
|
39
|
-
return self.num_retries > other.num_retries
|
|
40
|
-
|
|
41
|
-
resource_pool: str
|
|
42
|
-
queue: asyncio.PriorityQueue[QueueItem] # prioritizes retries
|
|
43
|
-
loop_task: asyncio.Task
|
|
44
|
-
dispatcher: Dispatcher
|
|
32
|
+
|
|
45
33
|
get_request_resources_param_names: list[str] # names of parameters of RateLimitsInfo.get_request_resources()
|
|
46
34
|
|
|
47
35
|
# scheduling-related state
|
|
@@ -58,12 +46,9 @@ class RateLimitsScheduler(Scheduler):
|
|
|
58
46
|
MAX_RETRIES = 10
|
|
59
47
|
|
|
60
48
|
def __init__(self, resource_pool: str, dispatcher: Dispatcher):
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
self.dispatcher
|
|
64
|
-
self.loop_task = asyncio.create_task(self._main_loop())
|
|
65
|
-
self.dispatcher.tasks.add(self.loop_task)
|
|
66
|
-
self.loop_task.add_done_callback(self.dispatcher.done_cb)
|
|
49
|
+
super().__init__(resource_pool, dispatcher)
|
|
50
|
+
loop_task = asyncio.create_task(self._main_loop())
|
|
51
|
+
self.dispatcher.register_task(loop_task)
|
|
67
52
|
self.pool_info = None # initialized in _main_loop by the first request
|
|
68
53
|
self.est_usage = {}
|
|
69
54
|
self.num_in_flight = 0
|
|
@@ -79,6 +64,19 @@ class RateLimitsScheduler(Scheduler):
|
|
|
79
64
|
def submit(self, item: FnCallArgs) -> None:
|
|
80
65
|
self.queue.put_nowait(self.QueueItem(item, 0))
|
|
81
66
|
|
|
67
|
+
def _set_pool_info(self) -> None:
|
|
68
|
+
"""Initialize pool_info with the RateLimitsInfo for the resource pool, if available"""
|
|
69
|
+
if self.pool_info is not None:
|
|
70
|
+
return
|
|
71
|
+
self.pool_info = env.Env.get().get_resource_pool_info(self.resource_pool, None)
|
|
72
|
+
if self.pool_info is None:
|
|
73
|
+
return
|
|
74
|
+
assert isinstance(self.pool_info, env.RateLimitsInfo)
|
|
75
|
+
assert hasattr(self.pool_info, 'get_request_resources')
|
|
76
|
+
sig = inspect.signature(self.pool_info.get_request_resources)
|
|
77
|
+
self.get_request_resources_param_names = [p.name for p in sig.parameters.values()]
|
|
78
|
+
self.est_usage = {r: 0 for r in self._resources}
|
|
79
|
+
|
|
82
80
|
async def _main_loop(self) -> None:
|
|
83
81
|
item: Optional[RateLimitsScheduler.QueueItem] = None
|
|
84
82
|
while True:
|
|
@@ -92,21 +90,15 @@ class RateLimitsScheduler(Scheduler):
|
|
|
92
90
|
# wait for a single request to get rate limits
|
|
93
91
|
_logger.debug(f'initializing rate limits for {self.resource_pool}')
|
|
94
92
|
await self._exec(item.request, item.num_retries, is_task=False)
|
|
93
|
+
_logger.debug(f'initialized rate limits for {self.resource_pool}')
|
|
95
94
|
item = None
|
|
96
95
|
# if this was the first request, it created the pool_info
|
|
97
96
|
if self.pool_info is None:
|
|
98
|
-
self.
|
|
99
|
-
if self.pool_info is None:
|
|
100
|
-
# we still don't have rate limits, wait for the next request
|
|
101
|
-
continue
|
|
102
|
-
assert isinstance(self.pool_info, env.RateLimitsInfo)
|
|
103
|
-
assert hasattr(self.pool_info, 'get_request_resources')
|
|
104
|
-
sig = inspect.signature(self.pool_info.get_request_resources)
|
|
105
|
-
self.get_request_resources_param_names = [p.name for p in sig.parameters.values()]
|
|
106
|
-
self.est_usage = {r: 0 for r in self._resources}
|
|
97
|
+
self._set_pool_info()
|
|
107
98
|
continue
|
|
108
99
|
|
|
109
100
|
# check rate limits
|
|
101
|
+
_logger.debug(f'checking rate limits for {self.resource_pool}')
|
|
110
102
|
request_resources = self._get_request_resources(item.request)
|
|
111
103
|
limits_info = self._check_resource_limits(request_resources)
|
|
112
104
|
aws: list[Awaitable[None]] = []
|
|
@@ -149,8 +141,7 @@ class RateLimitsScheduler(Scheduler):
|
|
|
149
141
|
_logger.debug(f'creating task for {self.resource_pool}')
|
|
150
142
|
self.num_in_flight += 1
|
|
151
143
|
task = asyncio.create_task(self._exec(item.request, item.num_retries, is_task=True))
|
|
152
|
-
self.dispatcher.
|
|
153
|
-
task.add_done_callback(self.dispatcher.done_cb)
|
|
144
|
+
self.dispatcher.register_task(task)
|
|
154
145
|
item = None
|
|
155
146
|
|
|
156
147
|
@property
|
|
@@ -166,7 +157,6 @@ class RateLimitsScheduler(Scheduler):
|
|
|
166
157
|
constant_kwargs, batch_kwargs = request.pxt_fn.create_batch_kwargs(batch_kwargs)
|
|
167
158
|
return self.pool_info.get_request_resources(**constant_kwargs, **batch_kwargs)
|
|
168
159
|
|
|
169
|
-
|
|
170
160
|
def _check_resource_limits(self, request_resources: dict[str, int]) -> Optional[env.RateLimitInfo]:
|
|
171
161
|
"""Returns the most depleted resource, relative to its limit, or None if all resources are within limits"""
|
|
172
162
|
candidates: list[tuple[env.RateLimitInfo, float]] = [] # (info, relative usage)
|
|
@@ -188,7 +178,9 @@ class RateLimitsScheduler(Scheduler):
|
|
|
188
178
|
start_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
189
179
|
pxt_fn = request.fn_call.fn
|
|
190
180
|
assert isinstance(pxt_fn, func.CallableFunction)
|
|
191
|
-
_logger.debug(
|
|
181
|
+
_logger.debug(
|
|
182
|
+
f'scheduler {self.resource_pool}: start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
|
|
183
|
+
)
|
|
192
184
|
self.total_requests += 1
|
|
193
185
|
if request.is_batched:
|
|
194
186
|
batch_result = await pxt_fn.aexec_batch(*request.batch_args, **request.batch_kwargs)
|
|
@@ -199,17 +191,25 @@ class RateLimitsScheduler(Scheduler):
|
|
|
199
191
|
result = await pxt_fn.aexec(*request.args, **request.kwargs)
|
|
200
192
|
request.row[request.fn_call.slot_idx] = result
|
|
201
193
|
end_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
202
|
-
_logger.debug(
|
|
194
|
+
_logger.debug(
|
|
195
|
+
f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} in {end_ts - start_ts}, batch_size={len(request.rows)}'
|
|
196
|
+
)
|
|
203
197
|
|
|
204
198
|
# purge accumulated usage estimate, now that we have a new report
|
|
205
199
|
self.est_usage = {r: 0 for r in self._resources}
|
|
206
200
|
|
|
207
201
|
self.dispatcher.dispatch(request.rows)
|
|
208
202
|
except Exception as exc:
|
|
209
|
-
|
|
203
|
+
_logger.debug(f'scheduler {self.resource_pool}: exception in slot {request.fn_call.slot_idx}: {exc}')
|
|
204
|
+
if self.pool_info is None:
|
|
205
|
+
# our pool info should be available at this point
|
|
206
|
+
self._set_pool_info()
|
|
207
|
+
assert self.pool_info is not None
|
|
208
|
+
if num_retries < self.MAX_RETRIES:
|
|
210
209
|
retry_delay = self.pool_info.get_retry_delay(exc)
|
|
211
210
|
if retry_delay is not None:
|
|
212
211
|
self.total_retried += 1
|
|
212
|
+
_logger.debug(f'scheduler {self.resource_pool}: retrying in {retry_delay} seconds')
|
|
213
213
|
await asyncio.sleep(retry_delay)
|
|
214
214
|
self.queue.put_nowait(self.QueueItem(request, num_retries + 1))
|
|
215
215
|
return
|
|
@@ -221,12 +221,140 @@ class RateLimitsScheduler(Scheduler):
|
|
|
221
221
|
row.set_exc(request.fn_call.slot_idx, exc)
|
|
222
222
|
self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb)
|
|
223
223
|
finally:
|
|
224
|
-
_logger.debug(
|
|
225
|
-
f'Scheduler stats: #requests={self.total_requests}, #retried={self.total_retried}')
|
|
224
|
+
_logger.debug(f'Scheduler stats: #requests={self.total_requests}, #retried={self.total_retried}')
|
|
226
225
|
if is_task:
|
|
227
226
|
self.num_in_flight -= 1
|
|
228
227
|
self.request_completed.set()
|
|
229
228
|
|
|
230
229
|
|
|
230
|
+
class RequestRateScheduler(Scheduler):
|
|
231
|
+
"""
|
|
232
|
+
Scheduler for FunctionCalls with a fixed request rate limit and no runtime resource usage reports.
|
|
233
|
+
|
|
234
|
+
Rate limits are supplied in the config, in one of two ways:
|
|
235
|
+
- resource_pool='request-rate:<endpoint>':
|
|
236
|
+
* a single rate limit for all calls against that endpoint
|
|
237
|
+
* in the config: section '<endpoint>', key 'rate_limit'
|
|
238
|
+
- resource_pool='request-rate:<endpoint>:<model>':
|
|
239
|
+
* a single rate limit for all calls against that model
|
|
240
|
+
* in the config: section '<endpoint>.rate_limits', key '<model>'
|
|
241
|
+
- if no rate limit is found in the config, uses a default of 600 RPM
|
|
242
|
+
|
|
243
|
+
TODO:
|
|
244
|
+
- adaptive rate limiting based on 429 errors
|
|
245
|
+
"""
|
|
246
|
+
|
|
247
|
+
secs_per_request: float # inverted rate limit
|
|
248
|
+
num_in_flight: int
|
|
249
|
+
total_requests: int
|
|
250
|
+
total_retried: int
|
|
251
|
+
|
|
252
|
+
TIME_FORMAT = '%H:%M.%S %f'
|
|
253
|
+
MAX_RETRIES = 10
|
|
254
|
+
DEFAULT_RATE_LIMIT = 600 # requests per minute
|
|
255
|
+
|
|
256
|
+
def __init__(self, resource_pool: str, dispatcher: Dispatcher):
|
|
257
|
+
super().__init__(resource_pool, dispatcher)
|
|
258
|
+
loop_task = asyncio.create_task(self._main_loop())
|
|
259
|
+
self.dispatcher.register_task(loop_task)
|
|
260
|
+
self.num_in_flight = 0
|
|
261
|
+
self.total_requests = 0
|
|
262
|
+
self.total_retried = 0
|
|
263
|
+
|
|
264
|
+
# try to get the rate limit from the config
|
|
265
|
+
elems = resource_pool.split(':')
|
|
266
|
+
section: str
|
|
267
|
+
key: str
|
|
268
|
+
if len(elems) == 2:
|
|
269
|
+
# resource_pool: request-rate:endpoint
|
|
270
|
+
_, endpoint = elems
|
|
271
|
+
section = endpoint
|
|
272
|
+
key = 'rate_limit'
|
|
273
|
+
else:
|
|
274
|
+
# resource_pool: request-rate:endpoint:model
|
|
275
|
+
assert len(elems) == 3
|
|
276
|
+
_, endpoint, model = elems
|
|
277
|
+
section = f'{endpoint}.rate_limits'
|
|
278
|
+
key = model
|
|
279
|
+
requests_per_min = env.Env.get().config.get_int_value(key, section=section)
|
|
280
|
+
requests_per_min = requests_per_min or self.DEFAULT_RATE_LIMIT
|
|
281
|
+
self.secs_per_request = 1 / (requests_per_min / 60)
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
def matches(cls, resource_pool: str) -> bool:
|
|
285
|
+
return resource_pool.startswith('request-rate:')
|
|
286
|
+
|
|
287
|
+
async def _main_loop(self) -> None:
|
|
288
|
+
last_request_ts = 0.0
|
|
289
|
+
while True:
|
|
290
|
+
item = await self.queue.get()
|
|
291
|
+
if item.num_retries > 0:
|
|
292
|
+
self.total_retried += 1
|
|
293
|
+
now = time.monotonic()
|
|
294
|
+
if now - last_request_ts < self.secs_per_request:
|
|
295
|
+
wait_duration = self.secs_per_request - (now - last_request_ts)
|
|
296
|
+
_logger.debug(f'waiting for {wait_duration} for {self.resource_pool}')
|
|
297
|
+
await asyncio.sleep(wait_duration)
|
|
298
|
+
|
|
299
|
+
last_request_ts = time.monotonic()
|
|
300
|
+
if item.num_retries > 0:
|
|
301
|
+
# the last request encountered some problem: retry it synchronously, to wait for the problem to pass
|
|
302
|
+
_logger.debug(f'retrying request for {self.resource_pool}: #retries={item.num_retries}')
|
|
303
|
+
await self._exec(item.request, item.num_retries, is_task=False)
|
|
304
|
+
_logger.debug(f'retried request for {self.resource_pool}: #retries={item.num_retries}')
|
|
305
|
+
else:
|
|
306
|
+
_logger.debug(f'creating task for {self.resource_pool}')
|
|
307
|
+
self.num_in_flight += 1
|
|
308
|
+
task = asyncio.create_task(self._exec(item.request, item.num_retries, is_task=True))
|
|
309
|
+
self.dispatcher.register_task(task)
|
|
310
|
+
|
|
311
|
+
async def _exec(self, request: FnCallArgs, num_retries: int, is_task: bool) -> None:
|
|
312
|
+
assert all(not row.has_val[request.fn_call.slot_idx] for row in request.rows)
|
|
313
|
+
assert all(not row.has_exc(request.fn_call.slot_idx) for row in request.rows)
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
start_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
317
|
+
pxt_fn = request.fn_call.fn
|
|
318
|
+
assert isinstance(pxt_fn, func.CallableFunction)
|
|
319
|
+
_logger.debug(
|
|
320
|
+
f'scheduler {self.resource_pool}: start evaluating slot {request.fn_call.slot_idx}, batch_size={len(request.rows)}'
|
|
321
|
+
)
|
|
322
|
+
self.total_requests += 1
|
|
323
|
+
if request.is_batched:
|
|
324
|
+
batch_result = await pxt_fn.aexec_batch(*request.batch_args, **request.batch_kwargs)
|
|
325
|
+
assert len(batch_result) == len(request.rows)
|
|
326
|
+
for row, result in zip(request.rows, batch_result):
|
|
327
|
+
row[request.fn_call.slot_idx] = result
|
|
328
|
+
else:
|
|
329
|
+
result = await pxt_fn.aexec(*request.args, **request.kwargs)
|
|
330
|
+
request.row[request.fn_call.slot_idx] = result
|
|
331
|
+
end_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
332
|
+
_logger.debug(
|
|
333
|
+
f'scheduler {self.resource_pool}: evaluated slot {request.fn_call.slot_idx} in {end_ts - start_ts}, batch_size={len(request.rows)}'
|
|
334
|
+
)
|
|
335
|
+
self.dispatcher.dispatch(request.rows)
|
|
336
|
+
|
|
337
|
+
except Exception as exc:
|
|
338
|
+
# TODO: which exception can be retried?
|
|
339
|
+
_logger.debug(f'exception for {self.resource_pool}: {exc}')
|
|
340
|
+
status = getattr(exc, 'status', None)
|
|
341
|
+
_logger.debug(f'type={type(exc)} has_status={hasattr(exc, "status")} status={status}')
|
|
342
|
+
if num_retries < self.MAX_RETRIES:
|
|
343
|
+
self.queue.put_nowait(self.QueueItem(request, num_retries + 1))
|
|
344
|
+
return
|
|
345
|
+
|
|
346
|
+
# record the exception
|
|
347
|
+
_, _, exc_tb = sys.exc_info()
|
|
348
|
+
for row in request.rows:
|
|
349
|
+
row.set_exc(request.fn_call.slot_idx, exc)
|
|
350
|
+
self.dispatcher.dispatch_exc(request.rows, request.fn_call.slot_idx, exc_tb)
|
|
351
|
+
finally:
|
|
352
|
+
_logger.debug(
|
|
353
|
+
f'Scheduler stats: #in-flight={self.num_in_flight} #requests={self.total_requests}, #retried={self.total_retried}'
|
|
354
|
+
)
|
|
355
|
+
if is_task:
|
|
356
|
+
self.num_in_flight -= 1
|
|
357
|
+
|
|
358
|
+
|
|
231
359
|
# all concrete Scheduler subclasses that implement matches()
|
|
232
|
-
SCHEDULERS = [RateLimitsScheduler]
|
|
360
|
+
SCHEDULERS = [RateLimitsScheduler, RequestRateScheduler]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, Iterator, Optional
|
|
2
|
+
from typing import Any, AsyncIterator, Iterator, Optional
|
|
3
3
|
|
|
4
4
|
import pixeltable.catalog as catalog
|
|
5
5
|
import pixeltable.exprs as exprs
|
|
@@ -10,6 +10,7 @@ from .exec_node import ExecNode
|
|
|
10
10
|
|
|
11
11
|
_logger = logging.getLogger('pixeltable')
|
|
12
12
|
|
|
13
|
+
|
|
13
14
|
class InMemoryDataNode(ExecNode):
|
|
14
15
|
"""
|
|
15
16
|
Outputs in-memory data as a DataRowBatch of a particular table.
|
|
@@ -18,6 +19,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
18
19
|
- with the values provided in the input rows
|
|
19
20
|
- if an input row doesn't provide a value, sets the slot to the column default
|
|
20
21
|
"""
|
|
22
|
+
|
|
21
23
|
tbl: catalog.TableVersion
|
|
22
24
|
input_rows: list[dict[str, Any]]
|
|
23
25
|
start_row_id: int
|
|
@@ -27,8 +29,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
27
29
|
output_exprs: list[exprs.ColumnRef]
|
|
28
30
|
|
|
29
31
|
def __init__(
|
|
30
|
-
self, tbl: catalog.TableVersion, rows: list[dict[str, Any]],
|
|
31
|
-
row_builder: exprs.RowBuilder, start_row_id: int,
|
|
32
|
+
self, tbl: catalog.TableVersion, rows: list[dict[str, Any]], row_builder: exprs.RowBuilder, start_row_id: int
|
|
32
33
|
):
|
|
33
34
|
# we materialize the input slots
|
|
34
35
|
output_exprs = list(row_builder.input_exprs)
|
|
@@ -43,11 +44,11 @@ class InMemoryDataNode(ExecNode):
|
|
|
43
44
|
"""Create row batch and populate with self.input_rows"""
|
|
44
45
|
user_cols_by_name = {
|
|
45
46
|
col_ref.col.name: exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
|
|
46
|
-
for col_ref in self.output_exprs
|
|
47
|
+
for col_ref in self.output_exprs
|
|
48
|
+
if col_ref.col.name is not None
|
|
47
49
|
}
|
|
48
50
|
output_cols_by_idx = {
|
|
49
|
-
col_ref.slot_idx: exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx)
|
|
50
|
-
for col_ref in self.output_exprs
|
|
51
|
+
col_ref.slot_idx: exprs.ColumnSlotIdx(col_ref.col, col_ref.slot_idx) for col_ref in self.output_exprs
|
|
51
52
|
}
|
|
52
53
|
output_slot_idxs = {e.slot_idx for e in self.output_exprs}
|
|
53
54
|
|
|
@@ -68,7 +69,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
68
69
|
input_slot_idxs.add(col_info.slot_idx)
|
|
69
70
|
|
|
70
71
|
# set the remaining output slots to their default values (presently None)
|
|
71
|
-
missing_slot_idxs =
|
|
72
|
+
missing_slot_idxs = output_slot_idxs - input_slot_idxs
|
|
72
73
|
for slot_idx in missing_slot_idxs:
|
|
73
74
|
col_info = output_cols_by_idx.get(slot_idx)
|
|
74
75
|
assert col_info is not None
|
|
@@ -4,11 +4,13 @@ from typing import Any, AsyncIterator
|
|
|
4
4
|
import pixeltable.catalog as catalog
|
|
5
5
|
import pixeltable.exprs as exprs
|
|
6
6
|
from pixeltable.utils.media_store import MediaStore
|
|
7
|
+
|
|
7
8
|
from .data_row_batch import DataRowBatch
|
|
8
9
|
from .exec_node import ExecNode
|
|
9
10
|
|
|
10
11
|
_logger = logging.getLogger('pixeltable')
|
|
11
12
|
|
|
13
|
+
|
|
12
14
|
class RowUpdateNode(ExecNode):
|
|
13
15
|
"""
|
|
14
16
|
Update individual rows in the input batches, identified by key columns.
|
|
@@ -17,9 +19,15 @@ class RowUpdateNode(ExecNode):
|
|
|
17
19
|
The node assumes that all update dicts contain the same keys, and it populates the slots of the columns present in
|
|
18
20
|
the update list.
|
|
19
21
|
"""
|
|
22
|
+
|
|
20
23
|
def __init__(
|
|
21
|
-
|
|
22
|
-
|
|
24
|
+
self,
|
|
25
|
+
tbl: catalog.TableVersionPath,
|
|
26
|
+
key_vals_batch: list[tuple],
|
|
27
|
+
is_rowid_key: bool,
|
|
28
|
+
col_vals_batch: list[dict[catalog.Column, Any]],
|
|
29
|
+
row_builder: exprs.RowBuilder,
|
|
30
|
+
input: ExecNode,
|
|
23
31
|
):
|
|
24
32
|
super().__init__(row_builder, [], [], input)
|
|
25
33
|
self.updates = {key_vals: col_vals for key_vals, col_vals in zip(key_vals_batch, col_vals_batch)}
|
|
@@ -28,7 +36,8 @@ class RowUpdateNode(ExecNode):
|
|
|
28
36
|
# retrieve ColumnRefs from the RowBuilder (has slot_idx set)
|
|
29
37
|
all_col_slot_idxs = {
|
|
30
38
|
col_ref.col: col_ref.slot_idx
|
|
31
|
-
for col_ref in row_builder.unique_exprs
|
|
39
|
+
for col_ref in row_builder.unique_exprs
|
|
40
|
+
if isinstance(col_ref, exprs.ColumnRef)
|
|
32
41
|
}
|
|
33
42
|
self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
|
|
34
43
|
self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
|
|
@@ -37,8 +46,9 @@ class RowUpdateNode(ExecNode):
|
|
|
37
46
|
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
38
47
|
async for batch in self.input:
|
|
39
48
|
for row in batch:
|
|
40
|
-
key_vals =
|
|
41
|
-
tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
|
|
49
|
+
key_vals = (
|
|
50
|
+
row.rowid if self.is_rowid_key else tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
|
|
51
|
+
)
|
|
42
52
|
if key_vals not in self.updates:
|
|
43
53
|
continue
|
|
44
54
|
self.matched_key_vals.add(key_vals)
|