sentry-arroyo 2.20.7__tar.gz → 2.20.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {sentry_arroyo-2.20.7/sentry_arroyo.egg-info → sentry_arroyo-2.20.9}/PKG-INFO +1 -1
  2. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/abstract.py +10 -33
  3. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/processor.py +6 -3
  4. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/run_task_with_multiprocessing.py +8 -3
  5. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9/sentry_arroyo.egg-info}/PKG-INFO +1 -1
  6. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/setup.py +1 -1
  7. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/backends/test_kafka.py +9 -0
  8. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_run_task_with_multiprocessing.py +8 -1
  9. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/test_processor.py +69 -0
  10. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/LICENSE +0 -0
  11. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/MANIFEST.in +0 -0
  12. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/README.md +0 -0
  13. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/__init__.py +0 -0
  14. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/__init__.py +0 -0
  15. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/__init__.py +0 -0
  16. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/commit.py +0 -0
  17. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/configuration.py +0 -0
  18. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/consumer.py +0 -0
  19. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/local/__init__.py +0 -0
  20. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/local/backend.py +0 -0
  21. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/local/storages/__init__.py +0 -0
  22. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/local/storages/abstract.py +0 -0
  23. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/backends/local/storages/memory.py +0 -0
  24. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/commit.py +0 -0
  25. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/dlq.py +0 -0
  26. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/errors.py +0 -0
  27. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/__init__.py +0 -0
  28. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/__init__.py +0 -0
  29. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/abstract.py +0 -0
  30. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/batching.py +0 -0
  31. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/buffer.py +0 -0
  32. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/commit.py +0 -0
  33. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/filter.py +0 -0
  34. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/guard.py +0 -0
  35. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/healthcheck.py +0 -0
  36. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/noop.py +0 -0
  37. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/produce.py +0 -0
  38. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/reduce.py +0 -0
  39. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/run_task.py +0 -0
  40. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
  41. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/unfold.py +0 -0
  42. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/py.typed +0 -0
  43. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/types.py +0 -0
  44. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/__init__.py +0 -0
  45. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/clock.py +0 -0
  46. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/codecs.py +0 -0
  47. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/concurrent.py +0 -0
  48. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/logging.py +0 -0
  49. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/metricDefs.json +0 -0
  50. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/metric_defs.py +0 -0
  51. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/metrics.py +0 -0
  52. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/profiler.py +0 -0
  53. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/arroyo/utils/retries.py +0 -0
  54. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/examples/transform_and_produce/__init__.py +0 -0
  55. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/examples/transform_and_produce/batched.py +0 -0
  56. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/examples/transform_and_produce/script.py +0 -0
  57. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/examples/transform_and_produce/simple.py +0 -0
  58. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/requirements.txt +0 -0
  59. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/SOURCES.txt +0 -0
  60. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
  61. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/not-zip-safe +0 -0
  62. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/requires.txt +0 -0
  63. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/top_level.txt +0 -0
  64. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/setup.cfg +0 -0
  65. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/backends/__init__.py +0 -0
  66. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/backends/mixins.py +0 -0
  67. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/backends/test_commit.py +0 -0
  68. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/backends/test_local.py +0 -0
  69. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/__init__.py +0 -0
  70. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/__init__.py +0 -0
  71. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_all.py +0 -0
  72. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_batching.py +0 -0
  73. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_buffer.py +0 -0
  74. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_commit.py +0 -0
  75. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_filter.py +0 -0
  76. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_guard.py +0 -0
  77. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_noop.py +0 -0
  78. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_produce.py +0 -0
  79. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_reduce.py +0 -0
  80. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_run_task.py +0 -0
  81. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
  82. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_unfold.py +0 -0
  83. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/test_commit.py +0 -0
  84. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/test_dlq.py +0 -0
  85. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/test_kip848_e2e.py +0 -0
  86. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/test_types.py +0 -0
  87. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/utils/__init__.py +0 -0
  88. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/utils/test_concurrent.py +0 -0
  89. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/utils/test_metrics.py +0 -0
  90. {sentry_arroyo-2.20.7 → sentry_arroyo-2.20.9}/tests/utils/test_retries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.20.7
3
+ Version: 2.20.9
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -4,16 +4,7 @@ import logging
4
4
  import time
5
5
  from abc import ABC, abstractmethod, abstractproperty
6
6
  from concurrent.futures import Future
7
- from typing import (
8
- Callable,
9
- Generic,
10
- Mapping,
11
- Optional,
12
- Protocol,
13
- Sequence,
14
- TypeVar,
15
- Union,
16
- )
7
+ from typing import Callable, Generic, Mapping, Optional, Sequence, TypeVar, Union
17
8
 
18
9
  from arroyo.types import BrokerValue, Partition, Topic, TStrategyPayload
19
10
 
@@ -187,32 +178,11 @@ class Consumer(Generic[TStrategyPayload], ABC):
187
178
  raise NotImplementedError
188
179
 
189
180
 
190
- class ProducerFuture(Protocol, Generic[T]):
191
- """
192
- An abstract interface for a kind of Future. Stdlib futures are too slow to
193
- construct, so we use these.
194
- """
195
-
196
- def done(self) -> bool:
197
- ...
198
-
199
- def result(self, timeout: float | None = None) -> T:
200
- """
201
- Return result or raise exception. May block, but does not have to.
202
- """
203
- ...
204
-
205
- def set_result(self, result: T) -> None:
206
- ...
207
-
208
- def set_exception(self, exception: Exception) -> None:
209
- ...
210
-
211
-
212
181
  class SimpleProducerFuture(Generic[T]):
213
182
  """
214
183
  A stub for concurrent.futures.Future that does not construct any Condition
215
- variables, therefore is faster to construct.
184
+ variables, therefore is faster to construct. However, some methods are
185
+ missing, and result() in particular is not efficient with timeout > 0.
216
186
  """
217
187
 
218
188
  def __init__(self) -> None:
@@ -232,6 +202,10 @@ class SimpleProducerFuture(Generic[T]):
232
202
  # only in tests at most. It is only here for the sake of implementing
233
203
  # the contract. If you really need result with timeout>0, you should
234
204
  # use the stdlib future.
205
+ #
206
+ # If this becomes performance sensitive, we can potentially implement
207
+ # something more sophisticated such as lazily creating the condition
208
+ # variable, and synchronizing the creation of that using a global lock.
235
209
  while deadline is None or time.time() < deadline:
236
210
  if self.result_exception is not None:
237
211
  raise self.result_exception
@@ -248,6 +222,9 @@ class SimpleProducerFuture(Generic[T]):
248
222
  self.result_exception = exception
249
223
 
250
224
 
225
+ ProducerFuture = Union[SimpleProducerFuture[T], Future[T]]
226
+
227
+
251
228
  class Producer(Generic[TStrategyPayload], ABC):
252
229
  @abstractmethod
253
230
  def produce(
@@ -425,9 +425,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
425
425
  if self.__message is not None:
426
426
  try:
427
427
  start_submit = time.time()
428
- message = (
429
- Message(self.__message) if self.__message is not None else None
430
- )
428
+ message = Message(self.__message)
431
429
  self.__processing_strategy.submit(message)
432
430
 
433
431
  self.__metrics_buffer.incr_timing(
@@ -483,6 +481,11 @@ class StreamProcessor(Generic[TStrategyPayload]):
483
481
  except InvalidMessage as e:
484
482
  self._handle_invalid_message(e)
485
483
 
484
+ if self.__is_paused:
485
+ self.__metrics_buffer.incr_counter("arroyo.consumer.resume", 1)
486
+ self.__consumer.resume([*self.__consumer.tell().keys()])
487
+ self.__is_paused = False
488
+
486
489
  else:
487
490
  # Resume if we are currently in a paused state
488
491
  if self.__is_paused:
@@ -581,7 +581,7 @@ class RunTaskWithMultiprocessing(
581
581
  self.__metrics.increment("sigchld.detected")
582
582
  raise ChildProcessTerminated(signum)
583
583
 
584
- signal.signal(signal.SIGCHLD, handle_sigchld)
584
+ self.original_sigchld = signal.signal(signal.SIGCHLD, handle_sigchld)
585
585
 
586
586
  def __submit_batch(self, input_block_too_small: bool) -> None:
587
587
  assert self.__batch_builder is not None
@@ -839,14 +839,19 @@ class RunTaskWithMultiprocessing(
839
839
  # compression.)
840
840
  self.__batch_builder.append(message)
841
841
 
842
- def close(self) -> None:
842
+ def _do_close(self) -> None:
843
843
  self.__closed = True
844
844
 
845
+ signal.signal(signal.SIGCHLD, self.original_sigchld)
846
+
847
+ def close(self) -> None:
848
+ self._do_close()
849
+
845
850
  if self.__batch_builder is not None and len(self.__batch_builder) > 0:
846
851
  self.__submit_batch(False)
847
852
 
848
853
  def terminate(self) -> None:
849
- self.__closed = True
854
+ self._do_close()
850
855
 
851
856
  logger.info("Terminating %r...", self.__pool)
852
857
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.20.7
3
+ Version: 2.20.9
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -10,7 +10,7 @@ def get_requirements() -> Sequence[str]:
10
10
 
11
11
  setup(
12
12
  name="sentry-arroyo",
13
- version="2.20.7",
13
+ version="2.20.9",
14
14
  author="Sentry",
15
15
  author_email="oss@sentry.io",
16
16
  license="Apache-2.0",
@@ -167,6 +167,15 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
167
167
  else:
168
168
  raise AssertionError("expected EndOfPartition error")
169
169
 
170
+ @pytest.mark.parametrize("use_simple_futures", [True, False])
171
+ def test_producer_future_behavior(self, use_simple_futures: bool) -> None:
172
+ with self.get_topic() as topic:
173
+ with closing(self.get_producer(use_simple_futures)) as producer:
174
+ future = producer.produce(topic, next(self.get_payloads()))
175
+ assert not future.done()
176
+ assert future.result(5.0)
177
+ assert future.done()
178
+
170
179
  def test_lenient_offset_reset_latest(self) -> None:
171
180
  payload = KafkaPayload(b"a", b"0", [])
172
181
  with self.get_topic() as topic:
@@ -1,8 +1,9 @@
1
1
  import multiprocessing
2
+ import signal
2
3
  import time
3
4
  from datetime import datetime
4
5
  from multiprocessing.managers import SharedMemoryManager
5
- from typing import Any
6
+ from typing import Any, Generator
6
7
  from unittest.mock import Mock, call
7
8
 
8
9
  import pytest
@@ -25,6 +26,12 @@ from tests.metrics import TestingMetricsBackend
25
26
  from tests.metrics import Timing as TimingCall
26
27
 
27
28
 
29
+ @pytest.fixture(autouse=True)
30
+ def does_not_leak_sigchild_handler() -> Generator[None, None, None]:
31
+ yield
32
+ assert isinstance(signal.getsignal(signal.SIGCHLD), int)
33
+
34
+
28
35
  def test_message_batch() -> None:
29
36
  partition = Partition(Topic("test"), 0)
30
37
 
@@ -622,3 +622,72 @@ def test_healthcheck(tmpdir: py.path.local) -> None:
622
622
 
623
623
  processor._run_once()
624
624
  assert tmpdir.join("health.txt").mtime() == health_mtime
625
+
626
+
627
+ def test_processor_pause_with_invalid_message() -> None:
628
+
629
+ topic = Topic("topic")
630
+
631
+ consumer = mock.Mock()
632
+ strategy = mock.Mock()
633
+ factory = mock.Mock()
634
+ factory.create_with_partitions.return_value = strategy
635
+
636
+ processor: StreamProcessor[int] = StreamProcessor(
637
+ consumer, topic, factory, IMMEDIATE
638
+ )
639
+
640
+ # Subscribe to topic
641
+ subscribe_args, subscribe_kwargs = consumer.subscribe.call_args
642
+ assert subscribe_args[0] == [topic]
643
+
644
+ # Partition assignment
645
+ partition = Partition(topic, 0)
646
+ consumer.tell.return_value = {}
647
+ assignment_callback = subscribe_kwargs["on_assign"]
648
+ offsets = {partition: 0}
649
+ assignment_callback(offsets)
650
+
651
+ # Message that we will get from polling
652
+ message = Message(BrokerValue(0, partition, 0, datetime.now()))
653
+
654
+ # Message will be rejected
655
+ consumer.poll.return_value = message.value
656
+ strategy.submit.side_effect = MessageRejected()
657
+ with assert_changes(lambda: int(consumer.pause.call_count), 0, 1):
658
+ processor._run_once()
659
+ assert strategy.submit.call_args_list[-1] == mock.call(message)
660
+
661
+ with mock.patch("time.time", return_value=time.time() + 5):
662
+ processor._run_once() # Should pause now
663
+
664
+ # Consumer is in paused state
665
+ # The same rejected message should be carried over
666
+
667
+ # All partitions are paused
668
+ consumer.paused.return_value = set(p for p in offsets)
669
+ # Simulate a continuous backpressure state where messages are being rejected
670
+ strategy.submit.side_effect = MessageRejected()
671
+
672
+ # Simulate Kafka returning nothing since the consumer is paused
673
+ consumer.poll.return_value = None
674
+
675
+ # The next poll returns nothing, but we are still carrying over the rejected message
676
+ processor._run_once()
677
+ assert consumer.poll.return_value is None
678
+
679
+ # At this point, let's say the message carried over is invalid (e.g. it could be stale)
680
+ strategy.submit.side_effect = InvalidMessage(partition, 0, needs_commit=False)
681
+
682
+ # Handles the invalid message and unpauses the consumer
683
+ with assert_changes(lambda: int(consumer.resume.call_count), 0, 1):
684
+ processor._run_once()
685
+
686
+ # Poll for the next message from Kafka
687
+ new_message = Message(BrokerValue(0, partition, 1, datetime.now()))
688
+ consumer.poll.return_value = new_message.value
689
+ strategy.submit.return_value = None
690
+ strategy.submit.side_effect = None
691
+
692
+ processor._run_once()
693
+ assert strategy.submit.call_args_list[-1] == mock.call(new_message)
File without changes
File without changes
File without changes