sentry-arroyo 2.20.10__py3-none-any.whl → 2.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arroyo/backends/abstract.py +10 -33
- arroyo/processing/strategies/run_task_with_multiprocessing.py +26 -3
- {sentry_arroyo-2.20.10.dist-info → sentry_arroyo-2.22.0.dist-info}/METADATA +1 -1
- {sentry_arroyo-2.20.10.dist-info → sentry_arroyo-2.22.0.dist-info}/RECORD +8 -8
- tests/backends/test_kafka.py +9 -0
- {sentry_arroyo-2.20.10.dist-info → sentry_arroyo-2.22.0.dist-info}/WHEEL +0 -0
- {sentry_arroyo-2.20.10.dist-info → sentry_arroyo-2.22.0.dist-info}/licenses/LICENSE +0 -0
- {sentry_arroyo-2.20.10.dist-info → sentry_arroyo-2.22.0.dist-info}/top_level.txt +0 -0
arroyo/backends/abstract.py
CHANGED
|
@@ -4,16 +4,7 @@ import logging
|
|
|
4
4
|
import time
|
|
5
5
|
from abc import ABC, abstractmethod, abstractproperty
|
|
6
6
|
from concurrent.futures import Future
|
|
7
|
-
from typing import
|
|
8
|
-
Callable,
|
|
9
|
-
Generic,
|
|
10
|
-
Mapping,
|
|
11
|
-
Optional,
|
|
12
|
-
Protocol,
|
|
13
|
-
Sequence,
|
|
14
|
-
TypeVar,
|
|
15
|
-
Union,
|
|
16
|
-
)
|
|
7
|
+
from typing import Callable, Generic, Mapping, Optional, Sequence, TypeVar, Union
|
|
17
8
|
|
|
18
9
|
from arroyo.types import BrokerValue, Partition, Topic, TStrategyPayload
|
|
19
10
|
|
|
@@ -187,32 +178,11 @@ class Consumer(Generic[TStrategyPayload], ABC):
|
|
|
187
178
|
raise NotImplementedError
|
|
188
179
|
|
|
189
180
|
|
|
190
|
-
class ProducerFuture(Protocol, Generic[T]):
|
|
191
|
-
"""
|
|
192
|
-
An abstract interface for a kind of Future. Stdlib futures are too slow to
|
|
193
|
-
construct, so we use these.
|
|
194
|
-
"""
|
|
195
|
-
|
|
196
|
-
def done(self) -> bool:
|
|
197
|
-
...
|
|
198
|
-
|
|
199
|
-
def result(self, timeout: float | None = None) -> T:
|
|
200
|
-
"""
|
|
201
|
-
Return result or raise exception. May block, but does not have to.
|
|
202
|
-
"""
|
|
203
|
-
...
|
|
204
|
-
|
|
205
|
-
def set_result(self, result: T) -> None:
|
|
206
|
-
...
|
|
207
|
-
|
|
208
|
-
def set_exception(self, exception: Exception) -> None:
|
|
209
|
-
...
|
|
210
|
-
|
|
211
|
-
|
|
212
181
|
class SimpleProducerFuture(Generic[T]):
|
|
213
182
|
"""
|
|
214
183
|
A stub for concurrent.futures.Future that does not construct any Condition
|
|
215
|
-
variables, therefore is faster to construct.
|
|
184
|
+
variables, therefore is faster to construct. However, some methods are
|
|
185
|
+
missing, and result() in particular is not efficient with timeout > 0.
|
|
216
186
|
"""
|
|
217
187
|
|
|
218
188
|
def __init__(self) -> None:
|
|
@@ -232,6 +202,10 @@ class SimpleProducerFuture(Generic[T]):
|
|
|
232
202
|
# only in tests at most. It is only here for the sake of implementing
|
|
233
203
|
# the contract. If you really need result with timeout>0, you should
|
|
234
204
|
# use the stdlib future.
|
|
205
|
+
#
|
|
206
|
+
# If this becomes performance sensitive, we can potentially implement
|
|
207
|
+
# something more sophisticated such as lazily creating the condition
|
|
208
|
+
# variable, and synchronizing the creation of that using a global lock.
|
|
235
209
|
while deadline is None or time.time() < deadline:
|
|
236
210
|
if self.result_exception is not None:
|
|
237
211
|
raise self.result_exception
|
|
@@ -248,6 +222,9 @@ class SimpleProducerFuture(Generic[T]):
|
|
|
248
222
|
self.result_exception = exception
|
|
249
223
|
|
|
250
224
|
|
|
225
|
+
ProducerFuture = Union[SimpleProducerFuture[T], Future[T]]
|
|
226
|
+
|
|
227
|
+
|
|
251
228
|
class Producer(Generic[TStrategyPayload], ABC):
|
|
252
229
|
@abstractmethod
|
|
253
230
|
def produce(
|
|
@@ -196,7 +196,7 @@ class BatchBuilder(Generic[TBatchValue]):
|
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
def parallel_worker_initializer(
|
|
199
|
-
custom_initialize_func: Optional[Callable[[], None]] = None
|
|
199
|
+
custom_initialize_func: Optional[Callable[[], None]] = None,
|
|
200
200
|
) -> None:
|
|
201
201
|
# Worker process should ignore ``SIGINT`` so that processing is not
|
|
202
202
|
# interrupted by ``KeyboardInterrupt`` during graceful shutdown.
|
|
@@ -468,6 +468,20 @@ class RunTaskWithMultiprocessing(
|
|
|
468
468
|
is applying backpressure. You can likely reduce ``num_processes`` and won't
|
|
469
469
|
notice a performance regression.
|
|
470
470
|
|
|
471
|
+
Prefetching
|
|
472
|
+
~~~~~~~~~~~
|
|
473
|
+
|
|
474
|
+
If you set ``prefetch_batches`` to `True`, Arroyo will allocate twice as
|
|
475
|
+
many input blocks as processes, and will prefetch the next batch while the
|
|
476
|
+
current batch is being processed. This can help saturate the process pool to
|
|
477
|
+
increase throughput, but it also increases memory usage.
|
|
478
|
+
|
|
479
|
+
Use this option if your consumer is bottlenecked on the multiprocessing step
|
|
480
|
+
but also runs time-consuming tasks in the other steps, like ``Produce`` or
|
|
481
|
+
``Unfold``. By prefetching batches, the pool can immediately start working
|
|
482
|
+
on the next batch while the current batch is being sent through the next
|
|
483
|
+
steps.
|
|
484
|
+
|
|
471
485
|
How to tune your consumer
|
|
472
486
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
473
487
|
|
|
@@ -507,6 +521,7 @@ class RunTaskWithMultiprocessing(
|
|
|
507
521
|
output_block_size: Optional[int] = None,
|
|
508
522
|
max_input_block_size: Optional[int] = None,
|
|
509
523
|
max_output_block_size: Optional[int] = None,
|
|
524
|
+
prefetch_batches: bool = False,
|
|
510
525
|
) -> None:
|
|
511
526
|
self.__transform_function = function
|
|
512
527
|
self.__next_step = next_step
|
|
@@ -525,18 +540,26 @@ class RunTaskWithMultiprocessing(
|
|
|
525
540
|
self.__shared_memory_manager = SharedMemoryManager()
|
|
526
541
|
self.__shared_memory_manager.start()
|
|
527
542
|
|
|
543
|
+
block_count = num_processes
|
|
544
|
+
if prefetch_batches:
|
|
545
|
+
# Allocate twice as many blocks as processes to ensure that every
|
|
546
|
+
# process can immediately continue to handle another batch while the
|
|
547
|
+
# main strategy is busy to submit the transformed messages to the
|
|
548
|
+
# next step.
|
|
549
|
+
block_count *= 2
|
|
550
|
+
|
|
528
551
|
self.__input_blocks = [
|
|
529
552
|
self.__shared_memory_manager.SharedMemory(
|
|
530
553
|
input_block_size or DEFAULT_INPUT_BLOCK_SIZE
|
|
531
554
|
)
|
|
532
|
-
for _ in range(
|
|
555
|
+
for _ in range(block_count)
|
|
533
556
|
]
|
|
534
557
|
|
|
535
558
|
self.__output_blocks = [
|
|
536
559
|
self.__shared_memory_manager.SharedMemory(
|
|
537
560
|
output_block_size or DEFAULT_OUTPUT_BLOCK_SIZE
|
|
538
561
|
)
|
|
539
|
-
for _ in range(
|
|
562
|
+
for _ in range(block_count)
|
|
540
563
|
]
|
|
541
564
|
|
|
542
565
|
self.__batch_builder: Optional[
|
|
@@ -5,7 +5,7 @@ arroyo/errors.py,sha256=IbtoIbz_m5QrxNRBLOxiy-hOfJQTEwNPCyq6yqedJYk,1059
|
|
|
5
5
|
arroyo/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
arroyo/types.py,sha256=sLY0x030np4UmbaW5C1KH1se7Z2pjQiPvAe5x2sXf7A,5684
|
|
7
7
|
arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
arroyo/backends/abstract.py,sha256=
|
|
8
|
+
arroyo/backends/abstract.py,sha256=NQ5xG9rjchSUW8URl1WtSYSzMFtwRKB6wtJhWmDyR9E,9386
|
|
9
9
|
arroyo/backends/kafka/__init__.py,sha256=TZ0omd3LFXcZUaN_soFTuTgbjNEQYF1mF_i6_KIfCNo,306
|
|
10
10
|
arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
|
|
11
11
|
arroyo/backends/kafka/configuration.py,sha256=D9zre9H2xagUsk7qBA-bm38V3_4Mg_X5hpKsKx2BkM8,3048
|
|
@@ -30,7 +30,7 @@ arroyo/processing/strategies/produce.py,sha256=LUsg2bsVsguc2fTbueTbqLeg3mbnk_FOf
|
|
|
30
30
|
arroyo/processing/strategies/reduce.py,sha256=xv9bYisgHHyS8fVD1PdGi4TJsaK-4RAhMEDh4WHhYfI,3933
|
|
31
31
|
arroyo/processing/strategies/run_task.py,sha256=MGe2UcIWN7FkPc9plKzRVUNbZ7Sk0jWjw1z2vVOFI_I,2160
|
|
32
32
|
arroyo/processing/strategies/run_task_in_threads.py,sha256=f1sb2AG-BLz11X78jfhtERIkdFogrV8vtdT3pyJdkx0,6144
|
|
33
|
-
arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=
|
|
33
|
+
arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=YQ-H3b-9ARnUzeMBzIFSc0TO9c6QIwpZp6A1xlw6JGE,35612
|
|
34
34
|
arroyo/processing/strategies/unfold.py,sha256=bi47pwmKGT0Irsx0HdB7Bhc5hb-yYqLF_xcv3g1ewTk,4231
|
|
35
35
|
arroyo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
arroyo/utils/clock.py,sha256=r2EMO4nL5qIb1xnAd1sTAk2yK1UltyUi04lk5BqWKIc,944
|
|
@@ -46,11 +46,11 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
46
46
|
examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
|
|
47
47
|
examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
|
|
48
48
|
examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
|
|
49
|
-
sentry_arroyo-2.
|
|
49
|
+
sentry_arroyo-2.22.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
|
|
50
50
|
tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
|
|
52
52
|
tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
|
|
53
|
-
tests/backends/test_kafka.py,sha256=
|
|
53
|
+
tests/backends/test_kafka.py,sha256=6W6EA41X-ECTfcOeivhQxURnmV2Y0fYy-UzDCnJgDsU,12830
|
|
54
54
|
tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
|
|
55
55
|
tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
tests/processing/test_processor.py,sha256=2uQcbNeiyuPUlpThq9KTBs_fz5JY-BqZE5Fz4Dxspl0,23274
|
|
@@ -72,7 +72,7 @@ tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
72
72
|
tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
|
|
73
73
|
tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
|
|
74
74
|
tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
|
|
75
|
-
sentry_arroyo-2.
|
|
76
|
-
sentry_arroyo-2.
|
|
77
|
-
sentry_arroyo-2.
|
|
78
|
-
sentry_arroyo-2.
|
|
75
|
+
sentry_arroyo-2.22.0.dist-info/METADATA,sha256=zDfd2uzTtPpa0GqIxsFpKwZZtSUAEx3GUlKw0mKXBHQ,2200
|
|
76
|
+
sentry_arroyo-2.22.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
77
|
+
sentry_arroyo-2.22.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
|
|
78
|
+
sentry_arroyo-2.22.0.dist-info/RECORD,,
|
tests/backends/test_kafka.py
CHANGED
|
@@ -167,6 +167,15 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
|
|
|
167
167
|
else:
|
|
168
168
|
raise AssertionError("expected EndOfPartition error")
|
|
169
169
|
|
|
170
|
+
@pytest.mark.parametrize("use_simple_futures", [True, False])
|
|
171
|
+
def test_producer_future_behavior(self, use_simple_futures: bool) -> None:
|
|
172
|
+
with self.get_topic() as topic:
|
|
173
|
+
with closing(self.get_producer(use_simple_futures)) as producer:
|
|
174
|
+
future = producer.produce(topic, next(self.get_payloads()))
|
|
175
|
+
assert not future.done()
|
|
176
|
+
assert future.result(5.0)
|
|
177
|
+
assert future.done()
|
|
178
|
+
|
|
170
179
|
def test_lenient_offset_reset_latest(self) -> None:
|
|
171
180
|
payload = KafkaPayload(b"a", b"0", [])
|
|
172
181
|
with self.get_topic() as topic:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|