sentry-arroyo 2.32.3__py3-none-any.whl → 2.32.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -238,6 +238,7 @@ def build_kafka_consumer_configuration(
238
238
  override_params: Optional[Mapping[str, Any]] = None,
239
239
  strict_offset_reset: Optional[bool] = None,
240
240
  enable_auto_commit: bool = False,
241
+ retry_handle_destroyed: bool = False,
241
242
  ) -> KafkaBrokerConfig:
242
243
 
243
244
  if auto_offset_reset is None:
@@ -263,6 +264,8 @@ def build_kafka_consumer_configuration(
263
264
  "arroyo.strict.offset.reset": strict_offset_reset,
264
265
  # this is an arroyo specific flag to enable auto-commit mode
265
266
  "arroyo.enable.auto.commit": enable_auto_commit,
267
+ # arroyo specific flag to enable retries when hitting `KafkaError._DESTROY` while committing
268
+ "arroyo.retry.broker.handle.destroyed": retry_handle_destroyed,
266
269
  # overridden to reduce memory usage when there's a large backlog
267
270
  "queued.max.messages.kbytes": queued_max_messages_kbytes,
268
271
  "queued.min.messages": queued_min_messages,
@@ -141,14 +141,15 @@ class StreamProcessor(Generic[TStrategyPayload]):
141
141
  dlq_policy: Optional[DlqPolicy[TStrategyPayload]] = None,
142
142
  join_timeout: Optional[float] = None,
143
143
  shutdown_strategy_before_consumer: bool = False,
144
+ handle_poll_while_paused: Optional[bool] = False,
144
145
  ) -> None:
145
146
  self.__consumer = consumer
146
147
  self.__processor_factory = processor_factory
147
148
  self.__metrics_buffer = MetricsBuffer()
148
149
 
149
- self.__processing_strategy: Optional[ProcessingStrategy[TStrategyPayload]] = (
150
- None
151
- )
150
+ self.__processing_strategy: Optional[
151
+ ProcessingStrategy[TStrategyPayload]
152
+ ] = None
152
153
 
153
154
  self.__message: Optional[BrokerValue[TStrategyPayload]] = None
154
155
 
@@ -167,6 +168,8 @@ class StreamProcessor(Generic[TStrategyPayload]):
167
168
  self.__shutdown_requested = False
168
169
  self.__shutdown_strategy_before_consumer = shutdown_strategy_before_consumer
169
170
 
171
+ self.__handle_poll_while_paused = handle_poll_while_paused
172
+
170
173
  # Buffers messages for DLQ. Messages are added when they are submitted for processing and
171
174
  # removed once the commit callback is fired as they are guaranteed to be valid at that point.
172
175
  self.__buffered_messages: BufferedMessages[TStrategyPayload] = BufferedMessages(
@@ -464,40 +467,59 @@ class StreamProcessor(Generic[TStrategyPayload]):
464
467
  self.__is_paused = True
465
468
 
466
469
  elif self.__is_paused:
467
- paused_partitions = set(self.__consumer.paused())
468
- all_partitions = set(self.__consumer.tell())
469
- unpaused_partitions = (
470
- all_partitions - paused_partitions
471
- )
472
- if unpaused_partitions:
473
- logger.warning(
474
- "Processor in paused state while consumer is partially unpaused: %s, paused: %s",
475
- unpaused_partitions,
476
- paused_partitions,
477
- )
478
- self.__is_paused = False
479
- # unpause paused partitions... just in case a subset is paused
480
- self.__metrics_buffer.incr_counter(
481
- "arroyo.consumer.resume", 1
482
- )
483
- self.__consumer.resume([*paused_partitions])
484
- else:
485
- # A paused consumer should still poll periodically to avoid it's partitions
486
- # getting revoked by the broker after reaching the max.poll.interval.ms
487
- # Polling a paused consumer should never yield a message.
488
- logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
470
+ if self.__handle_poll_while_paused:
489
471
  maybe_message = self.__consumer.poll(0.1)
490
472
  if maybe_message is not None:
491
- logger.warning("Received a message from partition: %s, \
492
- consumer.tell() value right after poll() is: %s \
493
- Some lines above consumer.tell() was called, all_partitons value was: %s \
494
- Some lines above consumer.paused() was called, paused_partitions value is: %s",
495
- maybe_message.partition,
496
- self.__consumer.tell(),
497
- all_partitions,
498
- paused_partitions
499
- )
500
- assert maybe_message is None
473
+ # The paused consumer, in the above poll, has
474
+ # gone through rebalancing. In this case we
475
+ # expect that partition revocation cleared the
476
+ # pause flag and the carried over message.
477
+ # this assumption will not hold for cooperative-sticky rebalancing.
478
+ assert (
479
+ not self.__is_paused
480
+ ), "consumer unpaused itself without rebalancing"
481
+ assert (
482
+ self.__message is None
483
+ ), "consumer unpaused itself without rebalancing"
484
+ # this path might raise AssertionErrors
485
+ else:
486
+ paused_partitions = set(self.__consumer.paused())
487
+ all_partitions = set(self.__consumer.tell())
488
+ unpaused_partitions = all_partitions - paused_partitions
489
+ if unpaused_partitions:
490
+ logger.warning(
491
+ "Processor in paused state while consumer is partially unpaused: %s, paused: %s",
492
+ unpaused_partitions,
493
+ paused_partitions,
494
+ )
495
+ self.__is_paused = False
496
+ # unpause paused partitions... just in case a subset is paused
497
+ self.__metrics_buffer.incr_counter(
498
+ "arroyo.consumer.resume", 1
499
+ )
500
+ self.__consumer.resume([*paused_partitions])
501
+ else:
502
+ # A paused consumer should still poll periodically to avoid it's partitions
503
+ # getting revoked by the broker after reaching the max.poll.interval.ms
504
+ # Polling a paused consumer should never yield a message.
505
+ logger.warning(
506
+ "consumer.tell() value right before poll() is: %s",
507
+ self.__consumer.tell(),
508
+ )
509
+ maybe_message = self.__consumer.poll(0.1)
510
+ if maybe_message is not None:
511
+ logger.warning(
512
+ "Received a message from partition: %s, \
513
+ consumer.tell() value right after poll() is: %s \
514
+ Some lines above consumer.tell() was called, all_partitons value was: %s \
515
+ Some lines above consumer.paused() was called, paused_partitions value is: %s",
516
+ maybe_message.partition,
517
+ self.__consumer.tell(),
518
+ all_partitions,
519
+ paused_partitions,
520
+ )
521
+ assert maybe_message is None
522
+
501
523
  else:
502
524
  time.sleep(0.01)
503
525
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.32.3
3
+ Version: 2.32.5
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -8,7 +8,7 @@ arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  arroyo/backends/abstract.py,sha256=Wy9xhE1dtFiumG8Cz3JhksJ0rF74uJWZWq10UO1rxOI,9524
9
9
  arroyo/backends/kafka/__init__.py,sha256=xgf-AqHbQkJsh73YokO2uoyyHfZf8XwUp6BULtM8stI,445
10
10
  arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
11
- arroyo/backends/kafka/configuration.py,sha256=zB54w7qsyVeMVkH5MpV6F8ztXfEzIXrex6aKYX-GcqA,9141
11
+ arroyo/backends/kafka/configuration.py,sha256=42FQyrpIQGRaECBPljSaNO8E1RRyEoG1-a5cg3PwRe0,9356
12
12
  arroyo/backends/kafka/consumer.py,sha256=dzypkibGGieLg819TOoxtbVpVaxKYF9dazorQv5FxPg,34688
13
13
  arroyo/backends/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  arroyo/backends/local/backend.py,sha256=hUXdCV6B5e7s4mjFC6HnIuUhjENU2tNZt5vuEOJmGZQ,13888
@@ -16,7 +16,7 @@ arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkID
16
16
  arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
17
17
  arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
18
18
  arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
19
- arroyo/processing/processor.py,sha256=raPgmhl9K9kwI9ZlhCZELJGaba3xK525gmTi4bABenc,22377
19
+ arroyo/processing/processor.py,sha256=CHBvpAxH6T3MrUtoCJR7nTFJppqmIAx0DTosADrUAEM,23711
20
20
  arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
21
21
  arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
22
22
  arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
@@ -46,17 +46,17 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
46
46
  examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
47
47
  examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
48
48
  examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
49
- sentry_arroyo-2.32.3.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
49
+ sentry_arroyo-2.32.5.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
50
50
  tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
52
52
  tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
53
53
  tests/backends/test_confluent_producer.py,sha256=KWqgvjDvqAdd0HxngdWKsUJaV7Hl1L5vAVQhBYlHeHU,3146
54
- tests/backends/test_kafka.py,sha256=wBFCKEHoP6h0uG1bgDuzk84IZmrV_UVOFCrtbxztmJg,15506
54
+ tests/backends/test_kafka.py,sha256=I5TOC1R1usWGyd4r3C9W_W6lbeKRVID1m5CaW6Vi4lM,18780
55
55
  tests/backends/test_kafka_commit_callback.py,sha256=svpY3T7FvhXvz2jp729e60LLn8MyFa8w88FQ1Y0Ursw,1987
56
56
  tests/backends/test_kafka_producer.py,sha256=LpwkqnstcCDxemlKZ0FpzNKrP-1UuXXY15P7P-spjhE,3912
57
57
  tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
58
58
  tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- tests/processing/test_processor.py,sha256=Gj86_WYQ-Eybz2YwLjNLonBfzJQQI9Vqh1WiYTbmcQk,23265
59
+ tests/processing/test_processor.py,sha256=Ku_3z5aUeCRPRfeltHTSDLUMiATWmM_UXylhbE4kupg,25963
60
60
  tests/processing/strategies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  tests/processing/strategies/test_all.py,sha256=ahAF-nbdmqVkYGNCg0OFCD6fzNTA-XxYrW8NQHajCDU,10167
62
62
  tests/processing/strategies/test_batching.py,sha256=nyyX0y6qYHX7jT4gCgsUjT5RzBMDrBp790SCmOizQ0Q,11787
@@ -75,7 +75,7 @@ tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
75
  tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
76
76
  tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
77
77
  tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
78
- sentry_arroyo-2.32.3.dist-info/METADATA,sha256=EfJLX_0UhKzQRBftakhcDvPKZOqlR-JS17uEN-hNV4w,2208
79
- sentry_arroyo-2.32.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- sentry_arroyo-2.32.3.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
81
- sentry_arroyo-2.32.3.dist-info/RECORD,,
78
+ sentry_arroyo-2.32.5.dist-info/METADATA,sha256=Ld6VXfYDMWphFkcbBijnnHx4GebcIT91If8tqP_6Tho,2208
79
+ sentry_arroyo-2.32.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ sentry_arroyo-2.32.5.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
81
+ sentry_arroyo-2.32.5.dist-info/RECORD,,
@@ -80,7 +80,7 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
80
80
  @property
81
81
  def configuration(self) -> KafkaBrokerConfig:
82
82
  config = {
83
- "bootstrap.servers": os.environ.get("DEFAULT_BROKERS", "localhost:9092"),
83
+ "bootstrap.servers": os.environ.get("DEFAULT_BROKERS", "127.0.0.1:9092"),
84
84
  }
85
85
 
86
86
  return build_kafka_configuration(config)
@@ -231,6 +231,93 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
231
231
  with pytest.raises(RuntimeError):
232
232
  processor.run()
233
233
 
234
+ @mock.patch("arroyo.processing.processor.BACKPRESSURE_THRESHOLD", 0)
235
+ def test_assign_partition_during_pause(self) -> None:
236
+ if self.cooperative_sticky or self.kip_848:
237
+ pytest.skip("test does not work with cooperative-sticky rebalancing")
238
+
239
+ payloads = self.get_payloads()
240
+
241
+ strategy = mock.Mock()
242
+ strategy.submit.side_effect = MessageRejected()
243
+ factory = mock.Mock()
244
+ factory.create_with_partitions.return_value = strategy
245
+
246
+ partition_count = 2
247
+
248
+ with self.get_topic(partition_count) as topic, closing(
249
+ self.get_producer()
250
+ ) as producer, closing(
251
+ self.get_consumer(
252
+ "test_assign_partition_during_pause", enable_end_of_partition=True
253
+ )
254
+ ) as consumer_a, closing(
255
+ self.get_consumer(
256
+ "test_assign_partition_during_pause", enable_end_of_partition=True
257
+ )
258
+ ) as consumer_b:
259
+ for i in range(partition_count):
260
+ producer.produce(Partition(topic, i), next(payloads)).result(
261
+ timeout=5.0
262
+ )
263
+
264
+ processor_a = StreamProcessor(
265
+ consumer_a, topic, factory, IMMEDIATE, handle_poll_while_paused=True
266
+ )
267
+
268
+ def wait_until_consumer_pauses(processor: StreamProcessor[Any]) -> None:
269
+ for _ in range(20):
270
+ try:
271
+ processor._run_once()
272
+ except EndOfPartition:
273
+ pass
274
+
275
+ if processor._StreamProcessor__is_paused: # type:ignore
276
+ return
277
+ raise RuntimeError("processor was not paused")
278
+
279
+ # calling _run_once will pause both consumers because of the MessageRejected strategy above
280
+ wait_until_consumer_pauses(processor_a)
281
+
282
+ # consumer A has all the partitions
283
+ assert len(consumer_a.tell()) == 2
284
+ assert len(consumer_b.tell()) == 0
285
+
286
+ # consumer A has all partitions paused (both from consumer and from
287
+ # StreamProcessor POV)
288
+ assert consumer_a.paused()
289
+ assert processor_a._StreamProcessor__is_paused is True # type:ignore
290
+
291
+ # subscribe with another consumer, now we should have rebalancing in the next few polls
292
+ processor_b = StreamProcessor(
293
+ consumer_b, topic, factory, IMMEDIATE, handle_poll_while_paused=True
294
+ )
295
+
296
+ for _ in range(10):
297
+ try:
298
+ processor_a._run_once()
299
+ except EndOfPartition:
300
+ pass
301
+ try:
302
+ processor_b._run_once()
303
+ except EndOfPartition:
304
+ pass
305
+
306
+ # balanced
307
+ assert len(consumer_a.tell()) == 1
308
+ assert len(consumer_b.tell()) == 1
309
+
310
+ # close B, but A has not polled yet, so it only has one partition still
311
+ consumer_b.close()
312
+ assert len(consumer_a.tell()) == 1
313
+
314
+ for _ in range(20):
315
+ try:
316
+ processor_a._run_once()
317
+ except EndOfPartition:
318
+ pass
319
+ assert len(consumer_a.tell()) == 2
320
+
234
321
  def test_consumer_polls_when_paused(self) -> None:
235
322
  strategy = mock.Mock()
236
323
  factory = mock.Mock()
@@ -690,3 +690,75 @@ def test_processor_pause_with_invalid_message() -> None:
690
690
 
691
691
  processor._run_once()
692
692
  assert strategy.submit.call_args_list[-1] == mock.call(new_message)
693
+
694
+ def test_processor_poll_while_paused() -> None:
695
+
696
+ topic = Topic("topic")
697
+
698
+ consumer = mock.Mock()
699
+ strategy = mock.Mock()
700
+ factory = mock.Mock()
701
+ factory.create_with_partitions.return_value = strategy
702
+
703
+ processor: StreamProcessor[int] = StreamProcessor(
704
+ consumer, topic, factory, IMMEDIATE, handle_poll_while_paused=True
705
+ )
706
+
707
+ # Subscribe to topic
708
+ subscribe_args, subscribe_kwargs = consumer.subscribe.call_args
709
+ assert subscribe_args[0] == [topic]
710
+
711
+ # Partition assignment
712
+ partition = Partition(topic, 0)
713
+ new_partition = Partition(topic, 1)
714
+ consumer.tell.return_value = {}
715
+ assignment_callback = subscribe_kwargs["on_assign"]
716
+ offsets = {partition: 0}
717
+ assignment_callback(offsets)
718
+
719
+ # Message that we will get from polling
720
+ message = Message(BrokerValue(0, partition, 0, datetime.now()))
721
+
722
+ # Message will be rejected
723
+ consumer.poll.return_value = message.value
724
+ strategy.submit.side_effect = MessageRejected()
725
+ with assert_changes(lambda: int(consumer.pause.call_count), 0, 1):
726
+ processor._run_once()
727
+ assert strategy.submit.call_args_list[-1] == mock.call(message)
728
+
729
+ with mock.patch("time.time", return_value=time.time() + 5):
730
+ processor._run_once() # Should pause now
731
+
732
+ # Consumer is in paused state
733
+ # The same rejected message should be carried over
734
+
735
+ # All partitions are paused
736
+ consumer.paused.return_value = set(p for p in offsets)
737
+ # Simulate a continuous backpressure state where messages are being rejected
738
+ strategy.submit.side_effect = MessageRejected()
739
+
740
+ # Simulate Kafka returning nothing since the consumer is paused
741
+ consumer.poll.return_value = None
742
+
743
+ # The next poll returns nothing, but we are still carrying over the rejected message
744
+ processor._run_once()
745
+ assert consumer.poll.return_value is None
746
+
747
+ # At this point, let's say the message carried over is invalid (e.g. it could be stale)
748
+ strategy.submit.side_effect = InvalidMessage(partition, 0, needs_commit=False)
749
+
750
+ # Handles the invalid message and unpauses the consumer
751
+ with assert_changes(lambda: int(consumer.resume.call_count), 0, 1):
752
+ processor._run_once()
753
+
754
+ # Poll for the next message from Kafka, but this time the partition has changed
755
+ new_message = Message(BrokerValue(0, new_partition, 1, datetime.now()))
756
+ consumer.poll.return_value = new_message.value
757
+ processor._run_once()
758
+ assert processor._StreamProcessor__is_paused is False # type:ignore
759
+
760
+ strategy.submit.return_value = None
761
+ strategy.submit.side_effect = None
762
+
763
+ processor._run_once()
764
+ assert strategy.submit.call_args_list[-1] == mock.call(new_message)