sentry-arroyo 2.21.0__py3-none-any.whl → 2.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -236,7 +236,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
236
236
  logger.info("New partitions assigned: %r", partitions)
237
237
  logger.info("Member id: %r", self.__consumer.member_id)
238
238
  self.__metrics_buffer.metrics.increment(
239
- "arroyo.consumer.partitions_assigned.count", len(partitions)
239
+ "arroyo.consumer.partitions_assigned.count", len(partitions), tags={"consumer_member_id": self.__consumer.member_id}
240
240
  )
241
241
 
242
242
  current_partitions = dict(self.__consumer.tell())
@@ -262,7 +262,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
262
262
  logger.info("Partitions to revoke: %r", partitions)
263
263
 
264
264
  self.__metrics_buffer.metrics.increment(
265
- "arroyo.consumer.partitions_revoked.count", len(partitions)
265
+ "arroyo.consumer.partitions_revoked.count", len(partitions), tags={"consumer_member_id": self.__consumer.member_id}
266
266
  )
267
267
 
268
268
  if partitions:
@@ -196,7 +196,7 @@ class BatchBuilder(Generic[TBatchValue]):
196
196
 
197
197
 
198
198
  def parallel_worker_initializer(
199
- custom_initialize_func: Optional[Callable[[], None]] = None
199
+ custom_initialize_func: Optional[Callable[[], None]] = None,
200
200
  ) -> None:
201
201
  # Worker process should ignore ``SIGINT`` so that processing is not
202
202
  # interrupted by ``KeyboardInterrupt`` during graceful shutdown.
@@ -468,6 +468,20 @@ class RunTaskWithMultiprocessing(
468
468
  is applying backpressure. You can likely reduce ``num_processes`` and won't
469
469
  notice a performance regression.
470
470
 
471
+ Prefetching
472
+ ~~~~~~~~~~~
473
+
474
+ If you set ``prefetch_batches`` to `True`, Arroyo will allocate twice as
475
+ many input blocks as processes, and will prefetch the next batch while the
476
+ current batch is being processed. This can help saturate the process pool to
477
+ increase throughput, but it also increases memory usage.
478
+
479
+ Use this option if your consumer is bottlenecked on the multiprocessing step
480
+ but also runs time-consuming tasks in the other steps, like ``Produce`` or
481
+ ``Unfold``. By prefetching batches, the pool can immediately start working
482
+ on the next batch while the current batch is being sent through the next
483
+ steps.
484
+
471
485
  How to tune your consumer
472
486
  ~~~~~~~~~~~~~~~~~~~~~~~~~
473
487
 
@@ -507,6 +521,7 @@ class RunTaskWithMultiprocessing(
507
521
  output_block_size: Optional[int] = None,
508
522
  max_input_block_size: Optional[int] = None,
509
523
  max_output_block_size: Optional[int] = None,
524
+ prefetch_batches: bool = False,
510
525
  ) -> None:
511
526
  self.__transform_function = function
512
527
  self.__next_step = next_step
@@ -525,18 +540,26 @@ class RunTaskWithMultiprocessing(
525
540
  self.__shared_memory_manager = SharedMemoryManager()
526
541
  self.__shared_memory_manager.start()
527
542
 
543
+ block_count = num_processes
544
+ if prefetch_batches:
545
+ # Allocate twice as many blocks as processes to ensure that every
546
+ # process can immediately continue to handle another batch while the
547
+ # main strategy is busy to submit the transformed messages to the
548
+ # next step.
549
+ block_count *= 2
550
+
528
551
  self.__input_blocks = [
529
552
  self.__shared_memory_manager.SharedMemory(
530
553
  input_block_size or DEFAULT_INPUT_BLOCK_SIZE
531
554
  )
532
- for _ in range(num_processes)
555
+ for _ in range(block_count)
533
556
  ]
534
557
 
535
558
  self.__output_blocks = [
536
559
  self.__shared_memory_manager.SharedMemory(
537
560
  output_block_size or DEFAULT_OUTPUT_BLOCK_SIZE
538
561
  )
539
- for _ in range(num_processes)
562
+ for _ in range(block_count)
540
563
  ]
541
564
 
542
565
  self.__batch_builder: Optional[
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.21.0
3
+ Version: 2.23.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python
13
13
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: confluent-kafka>=2.7.0
16
+ Requires-Dist: confluent-kafka<2.10.0,>=2.7.0
17
17
  Dynamic: author
18
18
  Dynamic: author-email
19
19
  Dynamic: classifier
@@ -16,7 +16,7 @@ arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkID
16
16
  arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
17
17
  arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
18
18
  arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
19
- arroyo/processing/processor.py,sha256=N5b3Nv_PIH_wnxb_5DOOd7NQTbq4Kyh6K1D9-sD5GF0,20598
19
+ arroyo/processing/processor.py,sha256=beBp0lv8bac31ZEDwb6UAMKpYU4S_jOR-xNETdBuqv8,20710
20
20
  arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
21
21
  arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
22
22
  arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
@@ -30,7 +30,7 @@ arroyo/processing/strategies/produce.py,sha256=LUsg2bsVsguc2fTbueTbqLeg3mbnk_FOf
30
30
  arroyo/processing/strategies/reduce.py,sha256=xv9bYisgHHyS8fVD1PdGi4TJsaK-4RAhMEDh4WHhYfI,3933
31
31
  arroyo/processing/strategies/run_task.py,sha256=MGe2UcIWN7FkPc9plKzRVUNbZ7Sk0jWjw1z2vVOFI_I,2160
32
32
  arroyo/processing/strategies/run_task_in_threads.py,sha256=f1sb2AG-BLz11X78jfhtERIkdFogrV8vtdT3pyJdkx0,6144
33
- arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=jOL6SKLyrTuCsOkFDl5eOLQzr79bo_yZiuE1d9rhnfA,34558
33
+ arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=YQ-H3b-9ARnUzeMBzIFSc0TO9c6QIwpZp6A1xlw6JGE,35612
34
34
  arroyo/processing/strategies/unfold.py,sha256=bi47pwmKGT0Irsx0HdB7Bhc5hb-yYqLF_xcv3g1ewTk,4231
35
35
  arroyo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  arroyo/utils/clock.py,sha256=r2EMO4nL5qIb1xnAd1sTAk2yK1UltyUi04lk5BqWKIc,944
@@ -46,7 +46,7 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
46
46
  examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
47
47
  examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
48
48
  examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
49
- sentry_arroyo-2.21.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
49
+ sentry_arroyo-2.23.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
50
50
  tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
52
52
  tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
@@ -72,7 +72,7 @@ tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
72
  tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
73
73
  tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
74
74
  tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
75
- sentry_arroyo-2.21.0.dist-info/METADATA,sha256=Bm-pDoQfyoW8OzHLKb4J9aYoUdQCVTq6HFUHdAp1Bj4,2200
76
- sentry_arroyo-2.21.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
77
- sentry_arroyo-2.21.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
78
- sentry_arroyo-2.21.0.dist-info/RECORD,,
75
+ sentry_arroyo-2.23.0.dist-info/METADATA,sha256=aZWbyEkmo8zKhcic0xlLFNTlyTHxEwT7eEtRZ53K7VM,2208
76
+ sentry_arroyo-2.23.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
77
+ sentry_arroyo-2.23.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
78
+ sentry_arroyo-2.23.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5