sentry-arroyo 2.20.8__tar.gz → 2.20.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {sentry_arroyo-2.20.8/sentry_arroyo.egg-info → sentry_arroyo-2.20.9}/PKG-INFO +1 -1
  2. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/abstract.py +10 -33
  3. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/processor.py +6 -3
  4. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9/sentry_arroyo.egg-info}/PKG-INFO +1 -1
  5. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/setup.py +1 -1
  6. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/backends/test_kafka.py +9 -0
  7. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/test_processor.py +69 -0
  8. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/LICENSE +0 -0
  9. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/MANIFEST.in +0 -0
  10. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/README.md +0 -0
  11. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/__init__.py +0 -0
  12. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/__init__.py +0 -0
  13. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/__init__.py +0 -0
  14. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/commit.py +0 -0
  15. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/configuration.py +0 -0
  16. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/kafka/consumer.py +0 -0
  17. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/local/__init__.py +0 -0
  18. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/local/backend.py +0 -0
  19. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/local/storages/__init__.py +0 -0
  20. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/local/storages/abstract.py +0 -0
  21. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/backends/local/storages/memory.py +0 -0
  22. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/commit.py +0 -0
  23. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/dlq.py +0 -0
  24. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/errors.py +0 -0
  25. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/__init__.py +0 -0
  26. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/__init__.py +0 -0
  27. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/abstract.py +0 -0
  28. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/batching.py +0 -0
  29. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/buffer.py +0 -0
  30. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/commit.py +0 -0
  31. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/filter.py +0 -0
  32. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/guard.py +0 -0
  33. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/healthcheck.py +0 -0
  34. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/noop.py +0 -0
  35. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/produce.py +0 -0
  36. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/reduce.py +0 -0
  37. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/run_task.py +0 -0
  38. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
  39. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/run_task_with_multiprocessing.py +0 -0
  40. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/processing/strategies/unfold.py +0 -0
  41. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/py.typed +0 -0
  42. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/types.py +0 -0
  43. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/__init__.py +0 -0
  44. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/clock.py +0 -0
  45. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/codecs.py +0 -0
  46. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/concurrent.py +0 -0
  47. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/logging.py +0 -0
  48. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/metricDefs.json +0 -0
  49. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/metric_defs.py +0 -0
  50. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/metrics.py +0 -0
  51. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/profiler.py +0 -0
  52. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/arroyo/utils/retries.py +0 -0
  53. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/examples/transform_and_produce/__init__.py +0 -0
  54. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/examples/transform_and_produce/batched.py +0 -0
  55. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/examples/transform_and_produce/script.py +0 -0
  56. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/examples/transform_and_produce/simple.py +0 -0
  57. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/requirements.txt +0 -0
  58. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/SOURCES.txt +0 -0
  59. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
  60. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/not-zip-safe +0 -0
  61. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/requires.txt +0 -0
  62. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/sentry_arroyo.egg-info/top_level.txt +0 -0
  63. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/setup.cfg +0 -0
  64. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/backends/__init__.py +0 -0
  65. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/backends/mixins.py +0 -0
  66. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/backends/test_commit.py +0 -0
  67. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/backends/test_local.py +0 -0
  68. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/__init__.py +0 -0
  69. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/__init__.py +0 -0
  70. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_all.py +0 -0
  71. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_batching.py +0 -0
  72. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_buffer.py +0 -0
  73. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_commit.py +0 -0
  74. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_filter.py +0 -0
  75. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_guard.py +0 -0
  76. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_noop.py +0 -0
  77. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_produce.py +0 -0
  78. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_reduce.py +0 -0
  79. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_run_task.py +0 -0
  80. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
  81. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_run_task_with_multiprocessing.py +0 -0
  82. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/processing/strategies/test_unfold.py +0 -0
  83. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/test_commit.py +0 -0
  84. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/test_dlq.py +0 -0
  85. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/test_kip848_e2e.py +0 -0
  86. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/test_types.py +0 -0
  87. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/utils/__init__.py +0 -0
  88. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/utils/test_concurrent.py +0 -0
  89. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/utils/test_metrics.py +0 -0
  90. {sentry_arroyo-2.20.8 → sentry_arroyo-2.20.9}/tests/utils/test_retries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.20.8
3
+ Version: 2.20.9
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -4,16 +4,7 @@ import logging
4
4
  import time
5
5
  from abc import ABC, abstractmethod, abstractproperty
6
6
  from concurrent.futures import Future
7
- from typing import (
8
- Callable,
9
- Generic,
10
- Mapping,
11
- Optional,
12
- Protocol,
13
- Sequence,
14
- TypeVar,
15
- Union,
16
- )
7
+ from typing import Callable, Generic, Mapping, Optional, Sequence, TypeVar, Union
17
8
 
18
9
  from arroyo.types import BrokerValue, Partition, Topic, TStrategyPayload
19
10
 
@@ -187,32 +178,11 @@ class Consumer(Generic[TStrategyPayload], ABC):
187
178
  raise NotImplementedError
188
179
 
189
180
 
190
- class ProducerFuture(Protocol, Generic[T]):
191
- """
192
- An abstract interface for a kind of Future. Stdlib futures are too slow to
193
- construct, so we use these.
194
- """
195
-
196
- def done(self) -> bool:
197
- ...
198
-
199
- def result(self, timeout: float | None = None) -> T:
200
- """
201
- Return result or raise exception. May block, but does not have to.
202
- """
203
- ...
204
-
205
- def set_result(self, result: T) -> None:
206
- ...
207
-
208
- def set_exception(self, exception: Exception) -> None:
209
- ...
210
-
211
-
212
181
  class SimpleProducerFuture(Generic[T]):
213
182
  """
214
183
  A stub for concurrent.futures.Future that does not construct any Condition
215
- variables, therefore is faster to construct.
184
+ variables, therefore is faster to construct. However, some methods are
185
+ missing, and result() in particular is not efficient with timeout > 0.
216
186
  """
217
187
 
218
188
  def __init__(self) -> None:
@@ -232,6 +202,10 @@ class SimpleProducerFuture(Generic[T]):
232
202
  # only in tests at most. It is only here for the sake of implementing
233
203
  # the contract. If you really need result with timeout>0, you should
234
204
  # use the stdlib future.
205
+ #
206
+ # If this becomes performance sensitive, we can potentially implement
207
+ # something more sophisticated such as lazily creating the condition
208
+ # variable, and synchronizing the creation of that using a global lock.
235
209
  while deadline is None or time.time() < deadline:
236
210
  if self.result_exception is not None:
237
211
  raise self.result_exception
@@ -248,6 +222,9 @@ class SimpleProducerFuture(Generic[T]):
248
222
  self.result_exception = exception
249
223
 
250
224
 
225
+ ProducerFuture = Union[SimpleProducerFuture[T], Future[T]]
226
+
227
+
251
228
  class Producer(Generic[TStrategyPayload], ABC):
252
229
  @abstractmethod
253
230
  def produce(
@@ -425,9 +425,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
425
425
  if self.__message is not None:
426
426
  try:
427
427
  start_submit = time.time()
428
- message = (
429
- Message(self.__message) if self.__message is not None else None
430
- )
428
+ message = Message(self.__message)
431
429
  self.__processing_strategy.submit(message)
432
430
 
433
431
  self.__metrics_buffer.incr_timing(
@@ -483,6 +481,11 @@ class StreamProcessor(Generic[TStrategyPayload]):
483
481
  except InvalidMessage as e:
484
482
  self._handle_invalid_message(e)
485
483
 
484
+ if self.__is_paused:
485
+ self.__metrics_buffer.incr_counter("arroyo.consumer.resume", 1)
486
+ self.__consumer.resume([*self.__consumer.tell().keys()])
487
+ self.__is_paused = False
488
+
486
489
  else:
487
490
  # Resume if we are currently in a paused state
488
491
  if self.__is_paused:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.20.8
3
+ Version: 2.20.9
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -10,7 +10,7 @@ def get_requirements() -> Sequence[str]:
10
10
 
11
11
  setup(
12
12
  name="sentry-arroyo",
13
- version="2.20.8",
13
+ version="2.20.9",
14
14
  author="Sentry",
15
15
  author_email="oss@sentry.io",
16
16
  license="Apache-2.0",
@@ -167,6 +167,15 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
167
167
  else:
168
168
  raise AssertionError("expected EndOfPartition error")
169
169
 
170
+ @pytest.mark.parametrize("use_simple_futures", [True, False])
171
+ def test_producer_future_behavior(self, use_simple_futures: bool) -> None:
172
+ with self.get_topic() as topic:
173
+ with closing(self.get_producer(use_simple_futures)) as producer:
174
+ future = producer.produce(topic, next(self.get_payloads()))
175
+ assert not future.done()
176
+ assert future.result(5.0)
177
+ assert future.done()
178
+
170
179
  def test_lenient_offset_reset_latest(self) -> None:
171
180
  payload = KafkaPayload(b"a", b"0", [])
172
181
  with self.get_topic() as topic:
@@ -622,3 +622,72 @@ def test_healthcheck(tmpdir: py.path.local) -> None:
622
622
 
623
623
  processor._run_once()
624
624
  assert tmpdir.join("health.txt").mtime() == health_mtime
625
+
626
+
627
+ def test_processor_pause_with_invalid_message() -> None:
628
+
629
+ topic = Topic("topic")
630
+
631
+ consumer = mock.Mock()
632
+ strategy = mock.Mock()
633
+ factory = mock.Mock()
634
+ factory.create_with_partitions.return_value = strategy
635
+
636
+ processor: StreamProcessor[int] = StreamProcessor(
637
+ consumer, topic, factory, IMMEDIATE
638
+ )
639
+
640
+ # Subscribe to topic
641
+ subscribe_args, subscribe_kwargs = consumer.subscribe.call_args
642
+ assert subscribe_args[0] == [topic]
643
+
644
+ # Partition assignment
645
+ partition = Partition(topic, 0)
646
+ consumer.tell.return_value = {}
647
+ assignment_callback = subscribe_kwargs["on_assign"]
648
+ offsets = {partition: 0}
649
+ assignment_callback(offsets)
650
+
651
+ # Message that we will get from polling
652
+ message = Message(BrokerValue(0, partition, 0, datetime.now()))
653
+
654
+ # Message will be rejected
655
+ consumer.poll.return_value = message.value
656
+ strategy.submit.side_effect = MessageRejected()
657
+ with assert_changes(lambda: int(consumer.pause.call_count), 0, 1):
658
+ processor._run_once()
659
+ assert strategy.submit.call_args_list[-1] == mock.call(message)
660
+
661
+ with mock.patch("time.time", return_value=time.time() + 5):
662
+ processor._run_once() # Should pause now
663
+
664
+ # Consumer is in paused state
665
+ # The same rejected message should be carried over
666
+
667
+ # All partitions are paused
668
+ consumer.paused.return_value = set(p for p in offsets)
669
+ # Simulate a continuous backpressure state where messages are being rejected
670
+ strategy.submit.side_effect = MessageRejected()
671
+
672
+ # Simulate Kafka returning nothing since the consumer is paused
673
+ consumer.poll.return_value = None
674
+
675
+ # The next poll returns nothing, but we are still carrying over the rejected message
676
+ processor._run_once()
677
+ assert consumer.poll.return_value is None
678
+
679
+ # At this point, let's say the message carried over is invalid (e.g. it could be stale)
680
+ strategy.submit.side_effect = InvalidMessage(partition, 0, needs_commit=False)
681
+
682
+ # Handles the invalid message and unpauses the consumer
683
+ with assert_changes(lambda: int(consumer.resume.call_count), 0, 1):
684
+ processor._run_once()
685
+
686
+ # Poll for the next message from Kafka
687
+ new_message = Message(BrokerValue(0, partition, 1, datetime.now()))
688
+ consumer.poll.return_value = new_message.value
689
+ strategy.submit.return_value = None
690
+ strategy.submit.side_effect = None
691
+
692
+ processor._run_once()
693
+ assert strategy.submit.call_args_list[-1] == mock.call(new_message)
File without changes
File without changes
File without changes