sentry-arroyo 2.32.4__py3-none-any.whl → 2.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -237,8 +237,7 @@ def build_kafka_consumer_configuration(
237
237
  bootstrap_servers: Optional[Sequence[str]] = None,
238
238
  override_params: Optional[Mapping[str, Any]] = None,
239
239
  strict_offset_reset: Optional[bool] = None,
240
- enable_auto_commit: bool = False,
241
- retry_handle_destroyed: bool = False,
240
+ enable_auto_commit: bool = True,
242
241
  ) -> KafkaBrokerConfig:
243
242
 
244
243
  if auto_offset_reset is None:
@@ -264,8 +263,6 @@ def build_kafka_consumer_configuration(
264
263
  "arroyo.strict.offset.reset": strict_offset_reset,
265
264
  # this is an arroyo specific flag to enable auto-commit mode
266
265
  "arroyo.enable.auto.commit": enable_auto_commit,
267
- # arroyo specific flag to enable retries when hitting `KafkaError._DESTROY` while committing
268
- "arroyo.retry.broker.handle.destroyed": retry_handle_destroyed,
269
266
  # overridden to reduce memory usage when there's a large backlog
270
267
  "queued.max.messages.kbytes": queued_max_messages_kbytes,
271
268
  "queued.min.messages": queued_min_messages,
@@ -159,22 +159,13 @@ class KafkaConsumer(Consumer[KafkaPayload]):
159
159
  ) -> None:
160
160
  configuration = dict(configuration)
161
161
 
162
- # Feature flag to enable retrying on `Broker handle destroyed` errors
163
- # which can occur if we attempt to commit during a rebalance when
164
- # the consumer group coordinator changed
165
- self.__retry_handle_destroyed = as_kafka_configuration_bool(
166
- configuration.pop("arroyo.retry.broker.handle.destroyed", False)
167
- )
168
-
169
- retryable_errors: Tuple[int, ...] = (
162
+ retryable_errors = (
170
163
  KafkaError.REQUEST_TIMED_OUT,
171
164
  KafkaError.NOT_COORDINATOR,
172
165
  KafkaError._WAIT_COORD,
173
166
  KafkaError.STALE_MEMBER_EPOCH, # kip-848
174
167
  KafkaError.COORDINATOR_LOAD_IN_PROGRESS,
175
168
  )
176
- if self.__retry_handle_destroyed:
177
- retryable_errors += (KafkaError._DESTROY,)
178
169
 
179
170
  commit_retry_policy = BasicRetryPolicy(
180
171
  3,
@@ -140,15 +140,14 @@ class StreamProcessor(Generic[TStrategyPayload]):
140
140
  commit_policy: CommitPolicy = ONCE_PER_SECOND,
141
141
  dlq_policy: Optional[DlqPolicy[TStrategyPayload]] = None,
142
142
  join_timeout: Optional[float] = None,
143
- shutdown_strategy_before_consumer: bool = False,
144
143
  ) -> None:
145
144
  self.__consumer = consumer
146
145
  self.__processor_factory = processor_factory
147
146
  self.__metrics_buffer = MetricsBuffer()
148
147
 
149
- self.__processing_strategy: Optional[ProcessingStrategy[TStrategyPayload]] = (
150
- None
151
- )
148
+ self.__processing_strategy: Optional[
149
+ ProcessingStrategy[TStrategyPayload]
150
+ ] = None
152
151
 
153
152
  self.__message: Optional[BrokerValue[TStrategyPayload]] = None
154
153
 
@@ -165,7 +164,6 @@ class StreamProcessor(Generic[TStrategyPayload]):
165
164
  )
166
165
 
167
166
  self.__shutdown_requested = False
168
- self.__shutdown_strategy_before_consumer = shutdown_strategy_before_consumer
169
167
 
170
168
  # Buffers messages for DLQ. Messages are added when they are submitted for processing and
171
169
  # removed once the commit callback is fired as they are guaranteed to be valid at that point.
@@ -464,40 +462,20 @@ class StreamProcessor(Generic[TStrategyPayload]):
464
462
  self.__is_paused = True
465
463
 
466
464
  elif self.__is_paused:
467
- paused_partitions = set(self.__consumer.paused())
468
- all_partitions = set(self.__consumer.tell())
469
- unpaused_partitions = (
470
- all_partitions - paused_partitions
471
- )
472
- if unpaused_partitions:
473
- logger.warning(
474
- "Processor in paused state while consumer is partially unpaused: %s, paused: %s",
475
- unpaused_partitions,
476
- paused_partitions,
477
- )
478
- self.__is_paused = False
479
- # unpause paused partitions... just in case a subset is paused
480
- self.__metrics_buffer.incr_counter(
481
- "arroyo.consumer.resume", 1
482
- )
483
- self.__consumer.resume([*paused_partitions])
484
- else:
485
- # A paused consumer should still poll periodically to avoid it's partitions
486
- # getting revoked by the broker after reaching the max.poll.interval.ms
487
- # Polling a paused consumer should never yield a message.
488
- logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
489
- maybe_message = self.__consumer.poll(0.1)
490
- if maybe_message is not None:
491
- logger.warning("Received a message from partition: %s, \
492
- consumer.tell() value right after poll() is: %s \
493
- Some lines above consumer.tell() was called, all_partitons value was: %s \
494
- Some lines above consumer.paused() was called, paused_partitions value is: %s",
495
- maybe_message.partition,
496
- self.__consumer.tell(),
497
- all_partitions,
498
- paused_partitions
499
- )
500
- assert maybe_message is None
465
+ maybe_message = self.__consumer.poll(0.1)
466
+ if maybe_message is not None:
467
+ # The paused consumer, in the above poll, has
468
+ # gone through rebalancing. In this case we
469
+ # expect that partition revocation cleared the
470
+ # pause flag and the carried over message.
471
+ # this assumption will not hold for cooperative-sticky rebalancing.
472
+ assert (
473
+ not self.__is_paused
474
+ ), "consumer unpaused itself without rebalancing"
475
+ assert (
476
+ self.__message is None
477
+ ), "consumer unpaused itself without rebalancing"
478
+
501
479
  else:
502
480
  time.sleep(0.01)
503
481
 
@@ -538,12 +516,11 @@ class StreamProcessor(Generic[TStrategyPayload]):
538
516
  self.__shutdown_requested = True
539
517
 
540
518
  def _shutdown(self) -> None:
541
- # If shutdown_strategy_before_consumer is set, work around an issue
542
- # where rdkafka would revoke our partition, but then also immediately
543
- # revoke our member ID as well, causing join() of the CommitStrategy
544
- # (that is running in the partition revocation callback) to crash.
545
- if self.__shutdown_strategy_before_consumer:
546
- self._close_processing_strategy()
519
+ # when we close() a consumer, rdkafka would would revoke our partition
520
+ # and call revocation callbacks, but also immediately revoke our member
521
+ # ID as well, causing join() of the CommitStrategy (that is running in
522
+ # the partition revocation callback) to crash.
523
+ self._close_processing_strategy()
547
524
 
548
525
  # close the consumer
549
526
  logger.info("Stopping consumer")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.32.4
3
+ Version: 2.33.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -8,15 +8,15 @@ arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  arroyo/backends/abstract.py,sha256=Wy9xhE1dtFiumG8Cz3JhksJ0rF74uJWZWq10UO1rxOI,9524
9
9
  arroyo/backends/kafka/__init__.py,sha256=xgf-AqHbQkJsh73YokO2uoyyHfZf8XwUp6BULtM8stI,445
10
10
  arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
11
- arroyo/backends/kafka/configuration.py,sha256=42FQyrpIQGRaECBPljSaNO8E1RRyEoG1-a5cg3PwRe0,9356
12
- arroyo/backends/kafka/consumer.py,sha256=dzypkibGGieLg819TOoxtbVpVaxKYF9dazorQv5FxPg,34688
11
+ arroyo/backends/kafka/configuration.py,sha256=voSOnySPlIjmd9LafpnfE_X4s2Bkxd6YTpdsTiZ2CNk,9140
12
+ arroyo/backends/kafka/consumer.py,sha256=6exjpmSQxWVAdFCz52iIFQW3l3to-eJl9rCumY65e1o,34216
13
13
  arroyo/backends/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  arroyo/backends/local/backend.py,sha256=hUXdCV6B5e7s4mjFC6HnIuUhjENU2tNZt5vuEOJmGZQ,13888
15
15
  arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkIDv9wRg2rLhLXeg,104
16
16
  arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
17
17
  arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
18
18
  arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
19
- arroyo/processing/processor.py,sha256=raPgmhl9K9kwI9ZlhCZELJGaba3xK525gmTi4bABenc,22377
19
+ arroyo/processing/processor.py,sha256=l8nGwRpVe5JWZ-zA6KTrrsJezbjKvfKvsDHBrtUtTLw,20632
20
20
  arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
21
21
  arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
22
22
  arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
@@ -46,17 +46,17 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
46
46
  examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
47
47
  examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
48
48
  examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
49
- sentry_arroyo-2.32.4.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
49
+ sentry_arroyo-2.33.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
50
50
  tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
52
52
  tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
53
53
  tests/backends/test_confluent_producer.py,sha256=KWqgvjDvqAdd0HxngdWKsUJaV7Hl1L5vAVQhBYlHeHU,3146
54
- tests/backends/test_kafka.py,sha256=wBFCKEHoP6h0uG1bgDuzk84IZmrV_UVOFCrtbxztmJg,15506
54
+ tests/backends/test_kafka.py,sha256=YL6CAPW8EtAJpjMhY71tyqHwcKtVkruB9cRCj9xx0Dk,18658
55
55
  tests/backends/test_kafka_commit_callback.py,sha256=svpY3T7FvhXvz2jp729e60LLn8MyFa8w88FQ1Y0Ursw,1987
56
56
  tests/backends/test_kafka_producer.py,sha256=LpwkqnstcCDxemlKZ0FpzNKrP-1UuXXY15P7P-spjhE,3912
57
57
  tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
58
58
  tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- tests/processing/test_processor.py,sha256=Gj86_WYQ-Eybz2YwLjNLonBfzJQQI9Vqh1WiYTbmcQk,23265
59
+ tests/processing/test_processor.py,sha256=f4WsNBo8RfNK2sPGWnwoJie1vfgaf74W1VAaazojwJ8,25934
60
60
  tests/processing/strategies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  tests/processing/strategies/test_all.py,sha256=ahAF-nbdmqVkYGNCg0OFCD6fzNTA-XxYrW8NQHajCDU,10167
62
62
  tests/processing/strategies/test_batching.py,sha256=nyyX0y6qYHX7jT4gCgsUjT5RzBMDrBp790SCmOizQ0Q,11787
@@ -75,7 +75,7 @@ tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
75
  tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
76
76
  tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
77
77
  tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
78
- sentry_arroyo-2.32.4.dist-info/METADATA,sha256=pvsMvCmDOq3xqVGXTolHjmZhwwER_sVRLFBSuFpV-ms,2208
79
- sentry_arroyo-2.32.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- sentry_arroyo-2.32.4.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
81
- sentry_arroyo-2.32.4.dist-info/RECORD,,
78
+ sentry_arroyo-2.33.0.dist-info/METADATA,sha256=ycSuuLcy8YTgaDmntDbj5HWy9iFaIKweYmeqWUTpiCk,2208
79
+ sentry_arroyo-2.33.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ sentry_arroyo-2.33.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
81
+ sentry_arroyo-2.33.0.dist-info/RECORD,,
@@ -80,7 +80,7 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
80
80
  @property
81
81
  def configuration(self) -> KafkaBrokerConfig:
82
82
  config = {
83
- "bootstrap.servers": os.environ.get("DEFAULT_BROKERS", "localhost:9092"),
83
+ "bootstrap.servers": os.environ.get("DEFAULT_BROKERS", "127.0.0.1:9092"),
84
84
  }
85
85
 
86
86
  return build_kafka_configuration(config)
@@ -231,6 +231,89 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
231
231
  with pytest.raises(RuntimeError):
232
232
  processor.run()
233
233
 
234
+ @mock.patch("arroyo.processing.processor.BACKPRESSURE_THRESHOLD", 0)
235
+ def test_assign_partition_during_pause(self) -> None:
236
+ if self.cooperative_sticky or self.kip_848:
237
+ pytest.skip("test does not work with cooperative-sticky rebalancing")
238
+
239
+ payloads = self.get_payloads()
240
+
241
+ strategy = mock.Mock()
242
+ strategy.submit.side_effect = MessageRejected()
243
+ factory = mock.Mock()
244
+ factory.create_with_partitions.return_value = strategy
245
+
246
+ partition_count = 2
247
+
248
+ with self.get_topic(partition_count) as topic, closing(
249
+ self.get_producer()
250
+ ) as producer, closing(
251
+ self.get_consumer(
252
+ "test_assign_partition_during_pause", enable_end_of_partition=True
253
+ )
254
+ ) as consumer_a, closing(
255
+ self.get_consumer(
256
+ "test_assign_partition_during_pause", enable_end_of_partition=True
257
+ )
258
+ ) as consumer_b:
259
+ for i in range(partition_count):
260
+ producer.produce(Partition(topic, i), next(payloads)).result(
261
+ timeout=5.0
262
+ )
263
+
264
+ processor_a = StreamProcessor(consumer_a, topic, factory, IMMEDIATE)
265
+
266
+ def wait_until_consumer_pauses(processor: StreamProcessor[Any]) -> None:
267
+ for _ in range(20):
268
+ try:
269
+ processor._run_once()
270
+ except EndOfPartition:
271
+ pass
272
+
273
+ if processor._StreamProcessor__is_paused: # type:ignore
274
+ return
275
+ raise RuntimeError("processor was not paused")
276
+
277
+ # calling _run_once will pause both consumers because of the MessageRejected strategy above
278
+ wait_until_consumer_pauses(processor_a)
279
+
280
+ # consumer A has all the partitions
281
+ assert len(consumer_a.tell()) == 2
282
+ assert len(consumer_b.tell()) == 0
283
+
284
+ # consumer A has all partitions paused (both from consumer and from
285
+ # StreamProcessor POV)
286
+ assert consumer_a.paused()
287
+ assert processor_a._StreamProcessor__is_paused is True # type:ignore
288
+
289
+ # subscribe with another consumer, now we should have rebalancing in the next few polls
290
+ processor_b = StreamProcessor(consumer_b, topic, factory, IMMEDIATE)
291
+
292
+ for _ in range(10):
293
+ try:
294
+ processor_a._run_once()
295
+ except EndOfPartition:
296
+ pass
297
+ try:
298
+ processor_b._run_once()
299
+ except EndOfPartition:
300
+ pass
301
+
302
+ # balanced
303
+ assert len(consumer_a.tell()) == 1
304
+ assert len(consumer_b.tell()) == 1
305
+
306
+ # close B, but A has not polled yet, so it only has one partition still
307
+ consumer_b.close()
308
+ assert len(consumer_a.tell()) == 1
309
+
310
+ for _ in range(20):
311
+ try:
312
+ processor_a._run_once()
313
+ except EndOfPartition:
314
+ pass
315
+ assert len(consumer_a.tell()) == 2
316
+
234
317
  def test_consumer_polls_when_paused(self) -> None:
235
318
  strategy = mock.Mock()
236
319
  factory = mock.Mock()
@@ -690,3 +690,76 @@ def test_processor_pause_with_invalid_message() -> None:
690
690
 
691
691
  processor._run_once()
692
692
  assert strategy.submit.call_args_list[-1] == mock.call(new_message)
693
+
694
+
695
+ def test_processor_poll_while_paused() -> None:
696
+
697
+ topic = Topic("topic")
698
+
699
+ consumer = mock.Mock()
700
+ strategy = mock.Mock()
701
+ factory = mock.Mock()
702
+ factory.create_with_partitions.return_value = strategy
703
+
704
+ processor: StreamProcessor[int] = StreamProcessor(
705
+ consumer, topic, factory, IMMEDIATE
706
+ )
707
+
708
+ # Subscribe to topic
709
+ subscribe_args, subscribe_kwargs = consumer.subscribe.call_args
710
+ assert subscribe_args[0] == [topic]
711
+
712
+ # Partition assignment
713
+ partition = Partition(topic, 0)
714
+ new_partition = Partition(topic, 1)
715
+ consumer.tell.return_value = {}
716
+ assignment_callback = subscribe_kwargs["on_assign"]
717
+ offsets = {partition: 0}
718
+ assignment_callback(offsets)
719
+
720
+ # Message that we will get from polling
721
+ message = Message(BrokerValue(0, partition, 0, datetime.now()))
722
+
723
+ # Message will be rejected
724
+ consumer.poll.return_value = message.value
725
+ strategy.submit.side_effect = MessageRejected()
726
+ with assert_changes(lambda: int(consumer.pause.call_count), 0, 1):
727
+ processor._run_once()
728
+ assert strategy.submit.call_args_list[-1] == mock.call(message)
729
+
730
+ with mock.patch("time.time", return_value=time.time() + 5):
731
+ processor._run_once() # Should pause now
732
+
733
+ # Consumer is in paused state
734
+ # The same rejected message should be carried over
735
+
736
+ # All partitions are paused
737
+ consumer.paused.return_value = set(p for p in offsets)
738
+ # Simulate a continuous backpressure state where messages are being rejected
739
+ strategy.submit.side_effect = MessageRejected()
740
+
741
+ # Simulate Kafka returning nothing since the consumer is paused
742
+ consumer.poll.return_value = None
743
+
744
+ # The next poll returns nothing, but we are still carrying over the rejected message
745
+ processor._run_once()
746
+ assert consumer.poll.return_value is None
747
+
748
+ # At this point, let's say the message carried over is invalid (e.g. it could be stale)
749
+ strategy.submit.side_effect = InvalidMessage(partition, 0, needs_commit=False)
750
+
751
+ # Handles the invalid message and unpauses the consumer
752
+ with assert_changes(lambda: int(consumer.resume.call_count), 0, 1):
753
+ processor._run_once()
754
+
755
+ # Poll for the next message from Kafka, but this time the partition has changed
756
+ new_message = Message(BrokerValue(0, new_partition, 1, datetime.now()))
757
+ consumer.poll.return_value = new_message.value
758
+ processor._run_once()
759
+ assert processor._StreamProcessor__is_paused is False # type:ignore
760
+
761
+ strategy.submit.return_value = None
762
+ strategy.submit.side_effect = None
763
+
764
+ processor._run_once()
765
+ assert strategy.submit.call_args_list[-1] == mock.call(new_message)