sentry-arroyo 2.21.0__tar.gz → 2.23.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {sentry_arroyo-2.21.0/sentry_arroyo.egg-info → sentry_arroyo-2.23.0}/PKG-INFO +2 -2
  2. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/processor.py +2 -2
  3. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/run_task_with_multiprocessing.py +26 -3
  4. sentry_arroyo-2.23.0/requirements.txt +3 -0
  5. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0/sentry_arroyo.egg-info}/PKG-INFO +2 -2
  6. sentry_arroyo-2.23.0/sentry_arroyo.egg-info/requires.txt +1 -0
  7. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/setup.py +1 -1
  8. sentry_arroyo-2.21.0/requirements.txt +0 -1
  9. sentry_arroyo-2.21.0/sentry_arroyo.egg-info/requires.txt +0 -1
  10. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/LICENSE +0 -0
  11. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/MANIFEST.in +0 -0
  12. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/README.md +0 -0
  13. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/__init__.py +0 -0
  14. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/__init__.py +0 -0
  15. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/abstract.py +0 -0
  16. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/kafka/__init__.py +0 -0
  17. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/kafka/commit.py +0 -0
  18. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/kafka/configuration.py +0 -0
  19. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/kafka/consumer.py +0 -0
  20. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/local/__init__.py +0 -0
  21. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/local/backend.py +0 -0
  22. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/local/storages/__init__.py +0 -0
  23. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/local/storages/abstract.py +0 -0
  24. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/backends/local/storages/memory.py +0 -0
  25. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/commit.py +0 -0
  26. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/dlq.py +0 -0
  27. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/errors.py +0 -0
  28. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/__init__.py +0 -0
  29. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/__init__.py +0 -0
  30. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/abstract.py +0 -0
  31. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/batching.py +0 -0
  32. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/buffer.py +0 -0
  33. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/commit.py +0 -0
  34. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/filter.py +0 -0
  35. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/guard.py +0 -0
  36. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/healthcheck.py +0 -0
  37. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/noop.py +0 -0
  38. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/produce.py +0 -0
  39. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/reduce.py +0 -0
  40. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/run_task.py +0 -0
  41. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
  42. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/processing/strategies/unfold.py +0 -0
  43. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/py.typed +0 -0
  44. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/types.py +0 -0
  45. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/__init__.py +0 -0
  46. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/clock.py +0 -0
  47. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/codecs.py +0 -0
  48. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/concurrent.py +0 -0
  49. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/logging.py +0 -0
  50. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/metricDefs.json +0 -0
  51. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/metric_defs.py +0 -0
  52. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/metrics.py +0 -0
  53. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/profiler.py +0 -0
  54. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/arroyo/utils/retries.py +0 -0
  55. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/examples/transform_and_produce/__init__.py +0 -0
  56. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/examples/transform_and_produce/batched.py +0 -0
  57. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/examples/transform_and_produce/script.py +0 -0
  58. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/examples/transform_and_produce/simple.py +0 -0
  59. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/sentry_arroyo.egg-info/SOURCES.txt +0 -0
  60. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
  61. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/sentry_arroyo.egg-info/not-zip-safe +0 -0
  62. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/sentry_arroyo.egg-info/top_level.txt +0 -0
  63. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/setup.cfg +0 -0
  64. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/backends/__init__.py +0 -0
  65. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/backends/mixins.py +0 -0
  66. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/backends/test_commit.py +0 -0
  67. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/backends/test_kafka.py +0 -0
  68. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/backends/test_local.py +0 -0
  69. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/__init__.py +0 -0
  70. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/__init__.py +0 -0
  71. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_all.py +0 -0
  72. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_batching.py +0 -0
  73. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_buffer.py +0 -0
  74. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_commit.py +0 -0
  75. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_filter.py +0 -0
  76. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_guard.py +0 -0
  77. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_noop.py +0 -0
  78. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_produce.py +0 -0
  79. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_reduce.py +0 -0
  80. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_run_task.py +0 -0
  81. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
  82. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_run_task_with_multiprocessing.py +0 -0
  83. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/strategies/test_unfold.py +0 -0
  84. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/processing/test_processor.py +0 -0
  85. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/test_commit.py +0 -0
  86. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/test_dlq.py +0 -0
  87. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/test_kip848_e2e.py +0 -0
  88. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/test_types.py +0 -0
  89. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/utils/__init__.py +0 -0
  90. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/utils/test_concurrent.py +0 -0
  91. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/utils/test_metrics.py +0 -0
  92. {sentry_arroyo-2.21.0 → sentry_arroyo-2.23.0}/tests/utils/test_retries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.21.0
3
+ Version: 2.23.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python
13
13
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: confluent-kafka>=2.7.0
16
+ Requires-Dist: confluent-kafka<2.10.0,>=2.7.0
17
17
  Dynamic: author
18
18
  Dynamic: author-email
19
19
  Dynamic: classifier
@@ -236,7 +236,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
236
236
  logger.info("New partitions assigned: %r", partitions)
237
237
  logger.info("Member id: %r", self.__consumer.member_id)
238
238
  self.__metrics_buffer.metrics.increment(
239
- "arroyo.consumer.partitions_assigned.count", len(partitions)
239
+ "arroyo.consumer.partitions_assigned.count", len(partitions), tags={"consumer_member_id": self.__consumer.member_id}
240
240
  )
241
241
 
242
242
  current_partitions = dict(self.__consumer.tell())
@@ -262,7 +262,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
262
262
  logger.info("Partitions to revoke: %r", partitions)
263
263
 
264
264
  self.__metrics_buffer.metrics.increment(
265
- "arroyo.consumer.partitions_revoked.count", len(partitions)
265
+ "arroyo.consumer.partitions_revoked.count", len(partitions), tags={"consumer_member_id": self.__consumer.member_id}
266
266
  )
267
267
 
268
268
  if partitions:
@@ -196,7 +196,7 @@ class BatchBuilder(Generic[TBatchValue]):
196
196
 
197
197
 
198
198
  def parallel_worker_initializer(
199
- custom_initialize_func: Optional[Callable[[], None]] = None
199
+ custom_initialize_func: Optional[Callable[[], None]] = None,
200
200
  ) -> None:
201
201
  # Worker process should ignore ``SIGINT`` so that processing is not
202
202
  # interrupted by ``KeyboardInterrupt`` during graceful shutdown.
@@ -468,6 +468,20 @@ class RunTaskWithMultiprocessing(
468
468
  is applying backpressure. You can likely reduce ``num_processes`` and won't
469
469
  notice a performance regression.
470
470
 
471
+ Prefetching
472
+ ~~~~~~~~~~~
473
+
474
+ If you set ``prefetch_batches`` to `True`, Arroyo will allocate twice as
475
+ many input blocks as processes, and will prefetch the next batch while the
476
+ current batch is being processed. This can help saturate the process pool to
477
+ increase throughput, but it also increases memory usage.
478
+
479
+ Use this option if your consumer is bottlenecked on the multiprocessing step
480
+ but also runs time-consuming tasks in the other steps, like ``Produce`` or
481
+ ``Unfold``. By prefetching batches, the pool can immediately start working
482
+ on the next batch while the current batch is being sent through the next
483
+ steps.
484
+
471
485
  How to tune your consumer
472
486
  ~~~~~~~~~~~~~~~~~~~~~~~~~
473
487
 
@@ -507,6 +521,7 @@ class RunTaskWithMultiprocessing(
507
521
  output_block_size: Optional[int] = None,
508
522
  max_input_block_size: Optional[int] = None,
509
523
  max_output_block_size: Optional[int] = None,
524
+ prefetch_batches: bool = False,
510
525
  ) -> None:
511
526
  self.__transform_function = function
512
527
  self.__next_step = next_step
@@ -525,18 +540,26 @@ class RunTaskWithMultiprocessing(
525
540
  self.__shared_memory_manager = SharedMemoryManager()
526
541
  self.__shared_memory_manager.start()
527
542
 
543
+ block_count = num_processes
544
+ if prefetch_batches:
545
+ # Allocate twice as many blocks as processes to ensure that every
546
+ # process can immediately continue to handle another batch while the
547
+ # main strategy is busy to submit the transformed messages to the
548
+ # next step.
549
+ block_count *= 2
550
+
528
551
  self.__input_blocks = [
529
552
  self.__shared_memory_manager.SharedMemory(
530
553
  input_block_size or DEFAULT_INPUT_BLOCK_SIZE
531
554
  )
532
- for _ in range(num_processes)
555
+ for _ in range(block_count)
533
556
  ]
534
557
 
535
558
  self.__output_blocks = [
536
559
  self.__shared_memory_manager.SharedMemory(
537
560
  output_block_size or DEFAULT_OUTPUT_BLOCK_SIZE
538
561
  )
539
- for _ in range(num_processes)
562
+ for _ in range(block_count)
540
563
  ]
541
564
 
542
565
  self.__batch_builder: Optional[
@@ -0,0 +1,3 @@
1
+ # It seems 2.10.0 is problematic for the kip 848 tests.
2
+ # It needs some time to be fixed.
3
+ confluent-kafka>=2.7.0,<2.10.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.21.0
3
+ Version: 2.23.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python
13
13
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: confluent-kafka>=2.7.0
16
+ Requires-Dist: confluent-kafka<2.10.0,>=2.7.0
17
17
  Dynamic: author
18
18
  Dynamic: author-email
19
19
  Dynamic: classifier
@@ -0,0 +1 @@
1
+ confluent-kafka<2.10.0,>=2.7.0
@@ -10,7 +10,7 @@ def get_requirements() -> Sequence[str]:
10
10
 
11
11
  setup(
12
12
  name="sentry-arroyo",
13
- version="2.21.0",
13
+ version="2.23.0",
14
14
  author="Sentry",
15
15
  author_email="oss@sentry.io",
16
16
  license="Apache-2.0",
@@ -1 +0,0 @@
1
- confluent-kafka>=2.7.0
@@ -1 +0,0 @@
1
- confluent-kafka>=2.7.0
File without changes
File without changes
File without changes