sentry-arroyo 2.30.0__tar.gz → 2.32.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {sentry_arroyo-2.30.0/sentry_arroyo.egg-info → sentry_arroyo-2.32.0}/PKG-INFO +1 -1
  2. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/abstract.py +4 -1
  3. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/__init__.py +2 -1
  4. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/configuration.py +20 -16
  5. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/consumer.py +153 -19
  6. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/backend.py +4 -4
  7. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/processor.py +17 -4
  8. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0/sentry_arroyo.egg-info}/PKG-INFO +1 -1
  9. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/SOURCES.txt +1 -0
  10. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/setup.py +1 -1
  11. sentry_arroyo-2.32.0/tests/backends/test_confluent_producer.py +79 -0
  12. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_kafka.py +64 -0
  13. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/LICENSE +0 -0
  14. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/MANIFEST.in +0 -0
  15. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/README.md +0 -0
  16. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/__init__.py +0 -0
  17. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/__init__.py +0 -0
  18. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/commit.py +0 -0
  19. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/__init__.py +0 -0
  20. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/storages/__init__.py +0 -0
  21. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/storages/abstract.py +0 -0
  22. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/storages/memory.py +0 -0
  23. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/commit.py +0 -0
  24. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/dlq.py +0 -0
  25. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/errors.py +0 -0
  26. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/__init__.py +0 -0
  27. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/__init__.py +0 -0
  28. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/abstract.py +0 -0
  29. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/batching.py +0 -0
  30. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/buffer.py +0 -0
  31. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/commit.py +0 -0
  32. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/filter.py +0 -0
  33. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/guard.py +0 -0
  34. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/healthcheck.py +0 -0
  35. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/noop.py +0 -0
  36. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/produce.py +0 -0
  37. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/reduce.py +0 -0
  38. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task.py +0 -0
  39. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
  40. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task_with_multiprocessing.py +0 -0
  41. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/unfold.py +0 -0
  42. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/py.typed +0 -0
  43. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/types.py +0 -0
  44. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/__init__.py +0 -0
  45. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/clock.py +0 -0
  46. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/codecs.py +0 -0
  47. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/concurrent.py +0 -0
  48. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/logging.py +0 -0
  49. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/metricDefs.json +0 -0
  50. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/metric_defs.py +0 -0
  51. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/metrics.py +0 -0
  52. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/profiler.py +0 -0
  53. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/retries.py +0 -0
  54. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/__init__.py +0 -0
  55. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/batched.py +0 -0
  56. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/script.py +0 -0
  57. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/simple.py +0 -0
  58. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/requirements.txt +0 -0
  59. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
  60. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/not-zip-safe +0 -0
  61. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/requires.txt +0 -0
  62. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/top_level.txt +0 -0
  63. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/setup.cfg +0 -0
  64. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/__init__.py +0 -0
  65. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/mixins.py +0 -0
  66. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_commit.py +0 -0
  67. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_kafka_producer.py +0 -0
  68. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_local.py +0 -0
  69. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/__init__.py +0 -0
  70. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/__init__.py +0 -0
  71. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_all.py +0 -0
  72. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_batching.py +0 -0
  73. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_buffer.py +0 -0
  74. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_commit.py +0 -0
  75. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_filter.py +0 -0
  76. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_guard.py +0 -0
  77. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_noop.py +0 -0
  78. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_produce.py +0 -0
  79. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_reduce.py +0 -0
  80. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_run_task.py +0 -0
  81. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
  82. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_run_task_with_multiprocessing.py +0 -0
  83. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_unfold.py +0 -0
  84. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/test_processor.py +0 -0
  85. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_commit.py +0 -0
  86. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_dlq.py +0 -0
  87. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_kip848_e2e.py +0 -0
  88. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_types.py +0 -0
  89. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/__init__.py +0 -0
  90. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/test_concurrent.py +0 -0
  91. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/test_metrics.py +0 -0
  92. {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/test_retries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.30.0
3
+ Version: 2.32.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -153,10 +153,13 @@ class Consumer(Generic[TStrategyPayload], ABC):
153
153
  raise NotImplementedError
154
154
 
155
155
  @abstractmethod
156
- def commit_offsets(self) -> Mapping[Partition, int]:
156
+ def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
157
157
  """
158
158
  Commit staged offsets. The return value of this method is a mapping
159
159
  of streams with their committed offsets as values.
160
+
161
+ When auto-commit is enabled (in Kafka consumers), returns None since
162
+ the broker handles commits automatically.
160
163
  """
161
164
  raise NotImplementedError
162
165
 
@@ -3,12 +3,13 @@ from .configuration import (
3
3
  build_kafka_consumer_configuration,
4
4
  build_kafka_producer_configuration,
5
5
  )
6
- from .consumer import KafkaConsumer, KafkaPayload, KafkaProducer
6
+ from .consumer import ConfluentProducer, KafkaConsumer, KafkaPayload, KafkaProducer
7
7
 
8
8
  __all__ = [
9
9
  "build_kafka_configuration",
10
10
  "build_kafka_consumer_configuration",
11
11
  "build_kafka_producer_configuration",
12
+ "ConfluentProducer",
12
13
  "KafkaConsumer",
13
14
  "KafkaPayload",
14
15
  "KafkaProducer",
@@ -237,6 +237,7 @@ def build_kafka_consumer_configuration(
237
237
  bootstrap_servers: Optional[Sequence[str]] = None,
238
238
  override_params: Optional[Mapping[str, Any]] = None,
239
239
  strict_offset_reset: Optional[bool] = None,
240
+ enable_auto_commit: bool = False,
240
241
  ) -> KafkaBrokerConfig:
241
242
 
242
243
  if auto_offset_reset is None:
@@ -252,20 +253,23 @@ def build_kafka_consumer_configuration(
252
253
  default_config, bootstrap_servers, override_params
253
254
  )
254
255
 
255
- broker_config.update(
256
- {
257
- "enable.auto.commit": False,
258
- "enable.auto.offset.store": False,
259
- "group.id": group_id,
260
- "auto.offset.reset": auto_offset_reset,
261
- # this is an arroyo specific flag that only affects the consumer.
262
- "arroyo.strict.offset.reset": strict_offset_reset,
263
- # overridden to reduce memory usage when there's a large backlog
264
- "queued.max.messages.kbytes": queued_max_messages_kbytes,
265
- "queued.min.messages": queued_min_messages,
266
- "enable.partition.eof": False,
267
- "statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
268
- "stats_cb": stats_callback,
269
- }
270
- )
256
+ # Default configuration with manual commit management
257
+ config_update = {
258
+ "enable.auto.commit": False,
259
+ "enable.auto.offset.store": False,
260
+ "group.id": group_id,
261
+ "auto.offset.reset": auto_offset_reset,
262
+ # this is an arroyo specific flag that only affects the consumer.
263
+ "arroyo.strict.offset.reset": strict_offset_reset,
264
+ # this is an arroyo specific flag to enable auto-commit mode
265
+ "arroyo.enable.auto.commit": enable_auto_commit,
266
+ # overridden to reduce memory usage when there's a large backlog
267
+ "queued.max.messages.kbytes": queued_max_messages_kbytes,
268
+ "queued.min.messages": queued_min_messages,
269
+ "enable.partition.eof": False,
270
+ "statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
271
+ "stats_cb": stats_callback,
272
+ }
273
+
274
+ broker_config.update(config_update)
271
275
  return broker_config
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import time
5
+ from collections import defaultdict
4
6
  from concurrent.futures import Future
5
7
  from datetime import datetime
6
8
  from enum import Enum
@@ -21,6 +23,7 @@ from typing import (
21
23
  Tuple,
22
24
  Type,
23
25
  Union,
26
+ cast,
24
27
  )
25
28
 
26
29
  from confluent_kafka import (
@@ -33,7 +36,7 @@ from confluent_kafka import (
33
36
  from confluent_kafka import Consumer as ConfluentConsumer
34
37
  from confluent_kafka import KafkaError, KafkaException
35
38
  from confluent_kafka import Message as ConfluentMessage
36
- from confluent_kafka import Producer as ConfluentProducer
39
+ from confluent_kafka import Producer as ConfluentKafkaProducer
37
40
  from confluent_kafka import TopicPartition as ConfluentTopicPartition
38
41
 
39
42
  from arroyo.backends.abstract import (
@@ -50,6 +53,7 @@ from arroyo.errors import (
50
53
  )
51
54
  from arroyo.types import BrokerValue, Partition, Topic
52
55
  from arroyo.utils.concurrent import execute
56
+ from arroyo.utils.metrics import get_metrics
53
57
  from arroyo.utils.retries import BasicRetryPolicy
54
58
 
55
59
  logger = logging.getLogger(__name__)
@@ -182,6 +186,13 @@ class KafkaConsumer(Consumer[KafkaPayload]):
182
186
  if self.__strict_offset_reset is None:
183
187
  self.__strict_offset_reset = True
184
188
 
189
+ # Feature flag to enable rdkafka auto-commit with store_offsets
190
+ # When enabled, offsets are stored via store_offsets() and rdkafka
191
+ # automatically commits them periodically
192
+ self.__use_auto_commit = as_kafka_configuration_bool(
193
+ configuration.pop("arroyo.enable.auto.commit", False)
194
+ )
195
+
185
196
  if auto_offset_reset in {"smallest", "earliest", "beginning"}:
186
197
  self.__resolve_partition_starting_offset = (
187
198
  self.__resolve_partition_offset_earliest
@@ -197,21 +208,32 @@ class KafkaConsumer(Consumer[KafkaPayload]):
197
208
  else:
198
209
  raise ValueError("invalid value for 'auto.offset.reset' configuration")
199
210
 
200
- if (
201
- as_kafka_configuration_bool(configuration.get("enable.auto.commit", "true"))
202
- is not False
203
- ):
204
- raise ValueError("invalid value for 'enable.auto.commit' configuration")
211
+ # When auto-commit is disabled (default), we require explicit configuration
212
+ # When auto-commit is enabled, we allow rdkafka to handle commits
213
+ if not self.__use_auto_commit:
214
+ if (
215
+ as_kafka_configuration_bool(
216
+ configuration.get("enable.auto.commit", "true")
217
+ )
218
+ is not False
219
+ ):
220
+ raise ValueError("invalid value for 'enable.auto.commit' configuration")
205
221
 
206
- if (
207
- as_kafka_configuration_bool(
208
- configuration.get("enable.auto.offset.store", "true")
209
- )
210
- is not False
211
- ):
212
- raise ValueError(
213
- "invalid value for 'enable.auto.offset.store' configuration"
214
- )
222
+ if (
223
+ as_kafka_configuration_bool(
224
+ configuration.get("enable.auto.offset.store", "true")
225
+ )
226
+ is not False
227
+ ):
228
+ raise ValueError(
229
+ "invalid value for 'enable.auto.offset.store' configuration"
230
+ )
231
+ else:
232
+ # In auto-commit mode, enable auto.commit and keep auto.offset.store disabled
233
+ # We'll use store_offsets() manually to control which offsets get committed
234
+ configuration["enable.auto.commit"] = True
235
+ configuration["enable.auto.offset.store"] = False
236
+ configuration["on_commit"] = self.__on_commit_callback
215
237
 
216
238
  # NOTE: Offsets are explicitly managed as part of the assignment
217
239
  # callback, so preemptively resetting offsets is not enabled when
@@ -231,6 +253,19 @@ class KafkaConsumer(Consumer[KafkaPayload]):
231
253
 
232
254
  self.__state = KafkaConsumerState.CONSUMING
233
255
 
256
+ def __on_commit_callback(
257
+ self,
258
+ error: Optional[KafkaException],
259
+ partitions: Sequence[ConfluentTopicPartition],
260
+ ) -> None:
261
+ if error:
262
+ partition_info = [f"{p.topic}:{p.partition}" for p in partitions]
263
+ logger.warning(
264
+ "Commit failed: %s. Partitions: %s",
265
+ error,
266
+ partition_info,
267
+ )
268
+
234
269
  def __resolve_partition_offset_earliest(
235
270
  self, partition: ConfluentTopicPartition
236
271
  ) -> ConfluentTopicPartition:
@@ -568,7 +603,21 @@ class KafkaConsumer(Consumer[KafkaPayload]):
568
603
  # TODO: Maybe log a warning if these offsets exceed the current
569
604
  # offsets, since that's probably a side effect of an incorrect usage
570
605
  # pattern?
571
- self.__staged_offsets.update(offsets)
606
+ if self.__use_auto_commit:
607
+ # When auto-commit is enabled, use store_offsets to stage offsets
608
+ # for rdkafka to auto-commit
609
+ if offsets:
610
+ self.__consumer.store_offsets(
611
+ offsets=[
612
+ ConfluentTopicPartition(
613
+ partition.topic.name, partition.index, offset
614
+ )
615
+ for partition, offset in offsets.items()
616
+ ]
617
+ )
618
+ else:
619
+ # Default behavior: manually track staged offsets
620
+ self.__staged_offsets.update(offsets)
572
621
 
573
622
  def __commit(self) -> Mapping[Partition, int]:
574
623
  if self.__state in {KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR}:
@@ -616,15 +665,24 @@ class KafkaConsumer(Consumer[KafkaPayload]):
616
665
 
617
666
  return offsets
618
667
 
619
- def commit_offsets(self) -> Mapping[Partition, int]:
668
+ def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
620
669
  """
621
670
  Commit staged offsets for all partitions that this consumer is
622
671
  assigned to. The return value of this method is a mapping of
623
672
  partitions with their committed offsets as values.
624
673
 
674
+ When auto-commit is enabled, returns None since rdkafka handles
675
+ commits automatically and we don't track which offsets were committed.
676
+
625
677
  Raises an ``InvalidState`` if called on a closed consumer.
626
678
  """
627
- return self.__commit_retry_policy.call(self.__commit)
679
+ if self.__use_auto_commit:
680
+ # When auto-commit is enabled, rdkafka commits automatically
681
+ # We don't track what was committed, so return None
682
+ # The offsets have already been staged via store_offsets()
683
+ return None
684
+ else:
685
+ return self.__commit_retry_policy.call(self.__commit)
628
686
 
629
687
  def close(self, timeout: Optional[float] = None) -> None:
630
688
  """
@@ -657,7 +715,7 @@ class KafkaProducer(Producer[KafkaPayload]):
657
715
  self, configuration: Mapping[str, Any], use_simple_futures: bool = False
658
716
  ) -> None:
659
717
  self.__configuration = configuration
660
- self.__producer = ConfluentProducer(configuration)
718
+ self.__producer = ConfluentKafkaProducer(configuration)
661
719
  self.__shutdown_requested = Event()
662
720
 
663
721
  # The worker must execute in a separate thread to ensure that callbacks
@@ -742,3 +800,79 @@ class KafkaProducer(Producer[KafkaPayload]):
742
800
  def close(self) -> Future[None]:
743
801
  self.__shutdown_requested.set()
744
802
  return self.__result
803
+
804
+
805
+ # Type alias for the delivery callback function
806
+ DeliveryCallback = Callable[[Optional[KafkaError], ConfluentMessage], None]
807
+
808
+ # Interval between metric flushes (in seconds)
809
+ METRICS_FREQUENCY_SEC = 1.0
810
+
811
+
812
+ class ConfluentProducer(ConfluentKafkaProducer): # type: ignore[misc]
813
+ """
814
+ A thin wrapper for confluent_kafka.Producer that adds metrics reporting.
815
+ """
816
+
817
+ def __init__(self, configuration: Mapping[str, Any]) -> None:
818
+ super().__init__(configuration)
819
+ self.producer_name = configuration.get("client.id") or None
820
+ self.__metrics = get_metrics()
821
+ self.__produce_counters: MutableMapping[str, int] = defaultdict(int)
822
+ self.__reset_metrics()
823
+
824
+ def __metrics_delivery_callback(
825
+ self,
826
+ error: Optional[KafkaError],
827
+ _message: ConfluentMessage,
828
+ ) -> None:
829
+ if error is not None:
830
+ status = "error"
831
+ else:
832
+ status = "success"
833
+ self.__produce_counters[status] += 1
834
+ self.__throttled_record()
835
+
836
+ def __delivery_callback(
837
+ self,
838
+ user_callback: Optional[DeliveryCallback],
839
+ ) -> DeliveryCallback:
840
+ def wrapped(error: Optional[KafkaError], message: ConfluentMessage) -> None:
841
+ self.__metrics_delivery_callback(error, message)
842
+ if user_callback is not None:
843
+ user_callback(error, message)
844
+
845
+ return wrapped
846
+
847
+ def produce(self, *args: Any, **kwargs: Any) -> None:
848
+ # callback and on_delivery are aliases, callback takes precedence over on_delivery
849
+ callback = kwargs.pop("callback", None)
850
+ on_delivery = kwargs.pop("on_delivery", None)
851
+ user_callback = callback or on_delivery
852
+ wrapped_callback = self.__delivery_callback(user_callback)
853
+ super().produce(*args, on_delivery=wrapped_callback, **kwargs)
854
+
855
+ def __flush_metrics(self) -> None:
856
+ for status, count in self.__produce_counters.items():
857
+ tags = {"status": status}
858
+ if self.producer_name:
859
+ tags["producer_name"] = self.producer_name
860
+ self.__metrics.increment(
861
+ name="arroyo.producer.produce_status",
862
+ value=count,
863
+ tags=tags,
864
+ )
865
+ self.__reset_metrics()
866
+
867
+ def flush(self, timeout: float = -1) -> int:
868
+ # Kafka producer flush should flush metrics too
869
+ self.__flush_metrics()
870
+ return cast(int, super().flush(timeout))
871
+
872
+ def __reset_metrics(self) -> None:
873
+ self.__produce_counters.clear()
874
+ self.__last_record_time = time.time()
875
+
876
+ def __throttled_record(self) -> None:
877
+ if time.time() - self.__last_record_time > METRICS_FREQUENCY_SEC:
878
+ self.__flush_metrics()
@@ -38,9 +38,9 @@ class LocalBroker(Generic[TStrategyPayload]):
38
38
  self.__message_storage = message_storage
39
39
  self.__clock = clock
40
40
 
41
- self.__offsets: MutableMapping[str, MutableMapping[Partition, int]] = (
42
- defaultdict(dict)
43
- )
41
+ self.__offsets: MutableMapping[
42
+ str, MutableMapping[Partition, int]
43
+ ] = defaultdict(dict)
44
44
 
45
45
  # The active subscriptions are stored by consumer group as a mapping
46
46
  # between the consumer and it's subscribed topics.
@@ -326,7 +326,7 @@ class LocalConsumer(Consumer[TStrategyPayload]):
326
326
  # atomic
327
327
  self.__staged_offsets.update(offsets)
328
328
 
329
- def commit_offsets(self) -> Mapping[Partition, int]:
329
+ def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
330
330
  with self.__lock:
331
331
  if self.__closed:
332
332
  raise RuntimeError("consumer is closed")
@@ -146,9 +146,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
146
146
  self.__processor_factory = processor_factory
147
147
  self.__metrics_buffer = MetricsBuffer()
148
148
 
149
- self.__processing_strategy: Optional[
150
- ProcessingStrategy[TStrategyPayload]
151
- ] = None
149
+ self.__processing_strategy: Optional[ProcessingStrategy[TStrategyPayload]] = (
150
+ None
151
+ )
152
152
 
153
153
  self.__message: Optional[BrokerValue[TStrategyPayload]] = None
154
154
 
@@ -465,8 +465,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
465
465
 
466
466
  elif self.__is_paused:
467
467
  paused_partitions = set(self.__consumer.paused())
468
+ all_partitions = set(self.__consumer.tell())
468
469
  unpaused_partitions = (
469
- set(self.__consumer.tell()) - paused_partitions
470
+ all_partitions - paused_partitions
470
471
  )
471
472
  if unpaused_partitions:
472
473
  logger.warning(
@@ -484,6 +485,18 @@ class StreamProcessor(Generic[TStrategyPayload]):
484
485
  # A paused consumer should still poll periodically to avoid it's partitions
485
486
  # getting revoked by the broker after reaching the max.poll.interval.ms
486
487
  # Polling a paused consumer should never yield a message.
488
+ logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
489
+ maybe_message = self.__consumer.poll(0.1)
490
+ if maybe_message is not None:
491
+ logger.warning("Received a message from partition: %s, \
492
+ consumer.tell() value right after poll() is: %s \
493
+ Some lines above consumer.tell() was called, all_partitons value was: %s \
494
+ Some lines above consumer.paused() was called, paused_partitions value is: %s",
495
+ maybe_message.partition,
496
+ self.__consumer.tell(),
497
+ all_partitions,
498
+ paused_partitions
499
+ )
487
500
  assert self.__consumer.poll(0.1) is None
488
501
  else:
489
502
  time.sleep(0.01)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.30.0
3
+ Version: 2.32.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -64,6 +64,7 @@ tests/test_types.py
64
64
  tests/backends/__init__.py
65
65
  tests/backends/mixins.py
66
66
  tests/backends/test_commit.py
67
+ tests/backends/test_confluent_producer.py
67
68
  tests/backends/test_kafka.py
68
69
  tests/backends/test_kafka_producer.py
69
70
  tests/backends/test_local.py
@@ -10,7 +10,7 @@ def get_requirements() -> Sequence[str]:
10
10
 
11
11
  setup(
12
12
  name="sentry-arroyo",
13
- version="2.30.0",
13
+ version="2.32.0",
14
14
  author="Sentry",
15
15
  author_email="oss@sentry.io",
16
16
  license="Apache-2.0",
@@ -0,0 +1,79 @@
1
+ from typing import Optional
2
+ from unittest import mock
3
+
4
+ from confluent_kafka import KafkaError
5
+ from confluent_kafka import Message as ConfluentMessage
6
+ from confluent_kafka import Producer as ConfluentKafkaProducer
7
+
8
+ from arroyo.backends.kafka.consumer import ConfluentProducer
9
+ from tests.metrics import Increment, TestingMetricsBackend
10
+
11
+
12
+ class TestConfluentProducer:
13
+ """
14
+ Tests for ConfluentProducer wrapper around confluent_kafka.Producer.
15
+ """
16
+
17
+ def test_init(self) -> None:
18
+ """Test that ConfluentProducer can be instantiated"""
19
+ config = {"bootstrap.servers": "fake:9092"}
20
+ producer = ConfluentProducer(config)
21
+
22
+ assert isinstance(producer, ConfluentProducer)
23
+ assert isinstance(producer, ConfluentKafkaProducer)
24
+
25
+ def test_metrics_callback_records_success(self) -> None:
26
+ """Test that the metrics callback records success metric"""
27
+ producer = ConfluentProducer(
28
+ {"bootstrap.servers": "fake:9092", "client.id": "test-producer-name"}
29
+ )
30
+ mock_message = mock.Mock(spec=ConfluentMessage)
31
+ producer._ConfluentProducer__metrics_delivery_callback(None, mock_message)
32
+ producer.flush() # Flush buffered metrics
33
+ assert (
34
+ Increment(
35
+ "arroyo.producer.produce_status",
36
+ 1,
37
+ {"status": "success", "producer_name": "test-producer-name"},
38
+ )
39
+ in TestingMetricsBackend.calls
40
+ )
41
+
42
+ def test_metrics_callback_records_error(self) -> None:
43
+ """Test that the metrics callback records error metric"""
44
+ producer = ConfluentProducer({"bootstrap.servers": "fake:9092"})
45
+ mock_error = mock.Mock(spec=KafkaError)
46
+ mock_message = mock.Mock(spec=ConfluentMessage)
47
+ producer._ConfluentProducer__metrics_delivery_callback(mock_error, mock_message)
48
+ producer.flush() # Flush buffered metrics
49
+ assert (
50
+ Increment("arroyo.producer.produce_status", 1, {"status": "error"})
51
+ in TestingMetricsBackend.calls
52
+ )
53
+
54
+ def test_delivery_callback_wraps_user_callback(self) -> None:
55
+ """Test that the delivery callback wrapper calls both metrics and user callbacks"""
56
+ producer = ConfluentProducer(
57
+ {"bootstrap.servers": "fake:9092", "client.id": "test-producer-name"}
58
+ )
59
+ user_callback_invoked = []
60
+
61
+ def user_callback(
62
+ error: Optional[KafkaError], message: ConfluentMessage
63
+ ) -> None:
64
+ user_callback_invoked.append((error, message))
65
+
66
+ wrapped = producer._ConfluentProducer__delivery_callback(user_callback)
67
+ mock_message = mock.Mock(spec=ConfluentMessage)
68
+ wrapped(None, mock_message)
69
+ producer.flush() # Flush buffered metrics
70
+ assert (
71
+ Increment(
72
+ "arroyo.producer.produce_status",
73
+ 1,
74
+ {"status": "success", "producer_name": "test-producer-name"},
75
+ )
76
+ in TestingMetricsBackend.calls
77
+ )
78
+ assert len(user_callback_invoked) == 1
79
+ assert user_callback_invoked[0] == (None, mock_message)
@@ -275,6 +275,70 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
275
275
  processor._run_once()
276
276
  assert consumer.paused() == []
277
277
 
278
+ def test_auto_commit_mode(self) -> None:
279
+ """Test that auto-commit mode uses store_offsets and commits on close"""
280
+ group_id = uuid.uuid1().hex
281
+
282
+ with self.get_topic() as topic:
283
+ # Produce some messages
284
+ with closing(self.get_producer()) as producer:
285
+ for i in range(5):
286
+ payload = KafkaPayload(None, f"msg_{i}".encode("utf8"), [])
287
+ producer.produce(topic, payload).result(5.0)
288
+
289
+ # Create consumer with auto-commit enabled
290
+ configuration = {
291
+ **self.configuration,
292
+ "auto.offset.reset": "earliest",
293
+ "arroyo.enable.auto.commit": True,
294
+ "group.id": group_id,
295
+ "session.timeout.ms": 10000,
296
+ }
297
+
298
+ # First consumer: consume messages and close
299
+ consumed_offsets = []
300
+ with closing(KafkaConsumer(configuration)) as consumer:
301
+ consumer.subscribe([topic])
302
+
303
+ # Consume all 5 messages and stage their offsets
304
+ for i in range(5):
305
+ value = consumer.poll(10.0)
306
+ assert value is not None
307
+ consumed_offsets.append(value.offset)
308
+
309
+ # Stage offsets (will use store_offsets internally in auto-commit mode)
310
+ consumer.stage_offsets(value.committable)
311
+
312
+ # commit_offsets should return None in auto-commit mode
313
+ result = consumer.commit_offsets()
314
+ assert result is None
315
+
316
+ # Close will commit any stored offsets
317
+
318
+ # Verify we consumed offsets 0-4
319
+ assert consumed_offsets == [0, 1, 2, 3, 4]
320
+
321
+ # Second consumer: verify offsets were committed on close
322
+ # This consumer uses manual commit to verify the committed offset
323
+ with closing(
324
+ self.get_consumer(
325
+ group=group_id,
326
+ auto_offset_reset="earliest",
327
+ enable_end_of_partition=True,
328
+ )
329
+ ) as consumer:
330
+ consumer.subscribe([topic])
331
+
332
+ # Should start from offset 5, hitting EndOfPartition immediately
333
+ # If we got a message with offset < 5, auto-commit didn't work
334
+ try:
335
+ consumer.poll(10.0)
336
+ pytest.fail("Expected EndOfPartition, but poll succeeded")
337
+ except EndOfPartition as e:
338
+ # Verify we got EndOfPartition at offset 5
339
+ assert e.offset == 5
340
+ assert e.partition == Partition(topic, 0)
341
+
278
342
 
279
343
  class TestKafkaStreamsIncrementalRebalancing(TestKafkaStreams):
280
344
  # re-test the kafka consumer with cooperative-sticky rebalancing
File without changes
File without changes
File without changes