sentry-arroyo 2.20.10__py3-none-any.whl → 2.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,16 +4,7 @@ import logging
4
4
  import time
5
5
  from abc import ABC, abstractmethod, abstractproperty
6
6
  from concurrent.futures import Future
7
- from typing import (
8
- Callable,
9
- Generic,
10
- Mapping,
11
- Optional,
12
- Protocol,
13
- Sequence,
14
- TypeVar,
15
- Union,
16
- )
7
+ from typing import Callable, Generic, Mapping, Optional, Sequence, TypeVar, Union
17
8
 
18
9
  from arroyo.types import BrokerValue, Partition, Topic, TStrategyPayload
19
10
 
@@ -187,32 +178,11 @@ class Consumer(Generic[TStrategyPayload], ABC):
187
178
  raise NotImplementedError
188
179
 
189
180
 
190
- class ProducerFuture(Protocol, Generic[T]):
191
- """
192
- An abstract interface for a kind of Future. Stdlib futures are too slow to
193
- construct, so we use these.
194
- """
195
-
196
- def done(self) -> bool:
197
- ...
198
-
199
- def result(self, timeout: float | None = None) -> T:
200
- """
201
- Return result or raise exception. May block, but does not have to.
202
- """
203
- ...
204
-
205
- def set_result(self, result: T) -> None:
206
- ...
207
-
208
- def set_exception(self, exception: Exception) -> None:
209
- ...
210
-
211
-
212
181
  class SimpleProducerFuture(Generic[T]):
213
182
  """
214
183
  A stub for concurrent.futures.Future that does not construct any Condition
215
- variables, therefore is faster to construct.
184
+ variables, therefore is faster to construct. However, some methods are
185
+ missing, and result() in particular is not efficient with timeout > 0.
216
186
  """
217
187
 
218
188
  def __init__(self) -> None:
@@ -232,6 +202,10 @@ class SimpleProducerFuture(Generic[T]):
232
202
  # only in tests at most. It is only here for the sake of implementing
233
203
  # the contract. If you really need result with timeout>0, you should
234
204
  # use the stdlib future.
205
+ #
206
+ # If this becomes performance sensitive, we can potentially implement
207
+ # something more sophisticated such as lazily creating the condition
208
+ # variable, and synchronizing the creation of that using a global lock.
235
209
  while deadline is None or time.time() < deadline:
236
210
  if self.result_exception is not None:
237
211
  raise self.result_exception
@@ -248,6 +222,9 @@ class SimpleProducerFuture(Generic[T]):
248
222
  self.result_exception = exception
249
223
 
250
224
 
225
+ ProducerFuture = Union[SimpleProducerFuture[T], Future[T]]
226
+
227
+
251
228
  class Producer(Generic[TStrategyPayload], ABC):
252
229
  @abstractmethod
253
230
  def produce(
@@ -196,7 +196,7 @@ class BatchBuilder(Generic[TBatchValue]):
196
196
 
197
197
 
198
198
  def parallel_worker_initializer(
199
- custom_initialize_func: Optional[Callable[[], None]] = None
199
+ custom_initialize_func: Optional[Callable[[], None]] = None,
200
200
  ) -> None:
201
201
  # Worker process should ignore ``SIGINT`` so that processing is not
202
202
  # interrupted by ``KeyboardInterrupt`` during graceful shutdown.
@@ -468,6 +468,20 @@ class RunTaskWithMultiprocessing(
468
468
  is applying backpressure. You can likely reduce ``num_processes`` and won't
469
469
  notice a performance regression.
470
470
 
471
+ Prefetching
472
+ ~~~~~~~~~~~
473
+
474
+ If you set ``prefetch_batches`` to `True`, Arroyo will allocate twice as
475
+ many input blocks as processes, and will prefetch the next batch while the
476
+ current batch is being processed. This can help saturate the process pool to
477
+ increase throughput, but it also increases memory usage.
478
+
479
+ Use this option if your consumer is bottlenecked on the multiprocessing step
480
+ but also runs time-consuming tasks in the other steps, like ``Produce`` or
481
+ ``Unfold``. By prefetching batches, the pool can immediately start working
482
+ on the next batch while the current batch is being sent through the next
483
+ steps.
484
+
471
485
  How to tune your consumer
472
486
  ~~~~~~~~~~~~~~~~~~~~~~~~~
473
487
 
@@ -507,6 +521,7 @@ class RunTaskWithMultiprocessing(
507
521
  output_block_size: Optional[int] = None,
508
522
  max_input_block_size: Optional[int] = None,
509
523
  max_output_block_size: Optional[int] = None,
524
+ prefetch_batches: bool = False,
510
525
  ) -> None:
511
526
  self.__transform_function = function
512
527
  self.__next_step = next_step
@@ -525,18 +540,26 @@ class RunTaskWithMultiprocessing(
525
540
  self.__shared_memory_manager = SharedMemoryManager()
526
541
  self.__shared_memory_manager.start()
527
542
 
543
+ block_count = num_processes
544
+ if prefetch_batches:
545
+ # Allocate twice as many blocks as processes to ensure that every
546
+ # process can immediately continue to handle another batch while the
547
+ # main strategy is busy to submit the transformed messages to the
548
+ # next step.
549
+ block_count *= 2
550
+
528
551
  self.__input_blocks = [
529
552
  self.__shared_memory_manager.SharedMemory(
530
553
  input_block_size or DEFAULT_INPUT_BLOCK_SIZE
531
554
  )
532
- for _ in range(num_processes)
555
+ for _ in range(block_count)
533
556
  ]
534
557
 
535
558
  self.__output_blocks = [
536
559
  self.__shared_memory_manager.SharedMemory(
537
560
  output_block_size or DEFAULT_OUTPUT_BLOCK_SIZE
538
561
  )
539
- for _ in range(num_processes)
562
+ for _ in range(block_count)
540
563
  ]
541
564
 
542
565
  self.__batch_builder: Optional[
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.20.10
3
+ Version: 2.22.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -5,7 +5,7 @@ arroyo/errors.py,sha256=IbtoIbz_m5QrxNRBLOxiy-hOfJQTEwNPCyq6yqedJYk,1059
5
5
  arroyo/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  arroyo/types.py,sha256=sLY0x030np4UmbaW5C1KH1se7Z2pjQiPvAe5x2sXf7A,5684
7
7
  arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- arroyo/backends/abstract.py,sha256=qnyuAcbxTIImEXTDA9T1V0Vh4GH_CImnXezK0E3tH5E,9539
8
+ arroyo/backends/abstract.py,sha256=NQ5xG9rjchSUW8URl1WtSYSzMFtwRKB6wtJhWmDyR9E,9386
9
9
  arroyo/backends/kafka/__init__.py,sha256=TZ0omd3LFXcZUaN_soFTuTgbjNEQYF1mF_i6_KIfCNo,306
10
10
  arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
11
11
  arroyo/backends/kafka/configuration.py,sha256=D9zre9H2xagUsk7qBA-bm38V3_4Mg_X5hpKsKx2BkM8,3048
@@ -30,7 +30,7 @@ arroyo/processing/strategies/produce.py,sha256=LUsg2bsVsguc2fTbueTbqLeg3mbnk_FOf
30
30
  arroyo/processing/strategies/reduce.py,sha256=xv9bYisgHHyS8fVD1PdGi4TJsaK-4RAhMEDh4WHhYfI,3933
31
31
  arroyo/processing/strategies/run_task.py,sha256=MGe2UcIWN7FkPc9plKzRVUNbZ7Sk0jWjw1z2vVOFI_I,2160
32
32
  arroyo/processing/strategies/run_task_in_threads.py,sha256=f1sb2AG-BLz11X78jfhtERIkdFogrV8vtdT3pyJdkx0,6144
33
- arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=jOL6SKLyrTuCsOkFDl5eOLQzr79bo_yZiuE1d9rhnfA,34558
33
+ arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=YQ-H3b-9ARnUzeMBzIFSc0TO9c6QIwpZp6A1xlw6JGE,35612
34
34
  arroyo/processing/strategies/unfold.py,sha256=bi47pwmKGT0Irsx0HdB7Bhc5hb-yYqLF_xcv3g1ewTk,4231
35
35
  arroyo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  arroyo/utils/clock.py,sha256=r2EMO4nL5qIb1xnAd1sTAk2yK1UltyUi04lk5BqWKIc,944
@@ -46,11 +46,11 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
46
46
  examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
47
47
  examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
48
48
  examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
49
- sentry_arroyo-2.20.10.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
49
+ sentry_arroyo-2.22.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
50
50
  tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
52
52
  tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
53
- tests/backends/test_kafka.py,sha256=XGljy72Juesc1qbVs0D02OJitFnry72fRWQdbTqmBp4,12371
53
+ tests/backends/test_kafka.py,sha256=6W6EA41X-ECTfcOeivhQxURnmV2Y0fYy-UzDCnJgDsU,12830
54
54
  tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
55
55
  tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  tests/processing/test_processor.py,sha256=2uQcbNeiyuPUlpThq9KTBs_fz5JY-BqZE5Fz4Dxspl0,23274
@@ -72,7 +72,7 @@ tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
72
  tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
73
73
  tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
74
74
  tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
75
- sentry_arroyo-2.20.10.dist-info/METADATA,sha256=zxvIwG5Fnbol5xK6YUeGT4_R2YwQgYjHXEudCdO74mA,2201
76
- sentry_arroyo-2.20.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
77
- sentry_arroyo-2.20.10.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
78
- sentry_arroyo-2.20.10.dist-info/RECORD,,
75
+ sentry_arroyo-2.22.0.dist-info/METADATA,sha256=zDfd2uzTtPpa0GqIxsFpKwZZtSUAEx3GUlKw0mKXBHQ,2200
76
+ sentry_arroyo-2.22.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
77
+ sentry_arroyo-2.22.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
78
+ sentry_arroyo-2.22.0.dist-info/RECORD,,
@@ -167,6 +167,15 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
167
167
  else:
168
168
  raise AssertionError("expected EndOfPartition error")
169
169
 
170
+ @pytest.mark.parametrize("use_simple_futures", [True, False])
171
+ def test_producer_future_behavior(self, use_simple_futures: bool) -> None:
172
+ with self.get_topic() as topic:
173
+ with closing(self.get_producer(use_simple_futures)) as producer:
174
+ future = producer.produce(topic, next(self.get_payloads()))
175
+ assert not future.done()
176
+ assert future.result(5.0)
177
+ assert future.done()
178
+
170
179
  def test_lenient_offset_reset_latest(self) -> None:
171
180
  payload = KafkaPayload(b"a", b"0", [])
172
181
  with self.get_topic() as topic: