sentry-arroyo 2.30.0__tar.gz → 2.32.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sentry_arroyo-2.30.0/sentry_arroyo.egg-info → sentry_arroyo-2.32.0}/PKG-INFO +1 -1
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/abstract.py +4 -1
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/__init__.py +2 -1
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/configuration.py +20 -16
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/consumer.py +153 -19
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/backend.py +4 -4
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/processor.py +17 -4
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0/sentry_arroyo.egg-info}/PKG-INFO +1 -1
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/SOURCES.txt +1 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/setup.py +1 -1
- sentry_arroyo-2.32.0/tests/backends/test_confluent_producer.py +79 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_kafka.py +64 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/LICENSE +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/MANIFEST.in +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/README.md +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/kafka/commit.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/storages/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/storages/abstract.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/backends/local/storages/memory.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/commit.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/dlq.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/errors.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/abstract.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/batching.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/buffer.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/commit.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/filter.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/guard.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/healthcheck.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/noop.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/produce.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/reduce.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task_with_multiprocessing.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/unfold.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/py.typed +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/types.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/clock.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/codecs.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/concurrent.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/logging.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/metricDefs.json +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/metric_defs.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/metrics.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/profiler.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/utils/retries.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/batched.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/script.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/examples/transform_and_produce/simple.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/requirements.txt +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/not-zip-safe +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/requires.txt +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/sentry_arroyo.egg-info/top_level.txt +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/setup.cfg +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/mixins.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_commit.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_kafka_producer.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/backends/test_local.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_all.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_batching.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_buffer.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_commit.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_filter.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_guard.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_noop.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_produce.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_reduce.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_run_task.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_run_task_with_multiprocessing.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/strategies/test_unfold.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/processing/test_processor.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_commit.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_dlq.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_kip848_e2e.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/test_types.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/__init__.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/test_concurrent.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/test_metrics.py +0 -0
- {sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/tests/utils/test_retries.py +0 -0
|
@@ -153,10 +153,13 @@ class Consumer(Generic[TStrategyPayload], ABC):
|
|
|
153
153
|
raise NotImplementedError
|
|
154
154
|
|
|
155
155
|
@abstractmethod
|
|
156
|
-
def commit_offsets(self) -> Mapping[Partition, int]:
|
|
156
|
+
def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
|
|
157
157
|
"""
|
|
158
158
|
Commit staged offsets. The return value of this method is a mapping
|
|
159
159
|
of streams with their committed offsets as values.
|
|
160
|
+
|
|
161
|
+
When auto-commit is enabled (in Kafka consumers), returns None since
|
|
162
|
+
the broker handles commits automatically.
|
|
160
163
|
"""
|
|
161
164
|
raise NotImplementedError
|
|
162
165
|
|
|
@@ -3,12 +3,13 @@ from .configuration import (
|
|
|
3
3
|
build_kafka_consumer_configuration,
|
|
4
4
|
build_kafka_producer_configuration,
|
|
5
5
|
)
|
|
6
|
-
from .consumer import KafkaConsumer, KafkaPayload, KafkaProducer
|
|
6
|
+
from .consumer import ConfluentProducer, KafkaConsumer, KafkaPayload, KafkaProducer
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"build_kafka_configuration",
|
|
10
10
|
"build_kafka_consumer_configuration",
|
|
11
11
|
"build_kafka_producer_configuration",
|
|
12
|
+
"ConfluentProducer",
|
|
12
13
|
"KafkaConsumer",
|
|
13
14
|
"KafkaPayload",
|
|
14
15
|
"KafkaProducer",
|
|
@@ -237,6 +237,7 @@ def build_kafka_consumer_configuration(
|
|
|
237
237
|
bootstrap_servers: Optional[Sequence[str]] = None,
|
|
238
238
|
override_params: Optional[Mapping[str, Any]] = None,
|
|
239
239
|
strict_offset_reset: Optional[bool] = None,
|
|
240
|
+
enable_auto_commit: bool = False,
|
|
240
241
|
) -> KafkaBrokerConfig:
|
|
241
242
|
|
|
242
243
|
if auto_offset_reset is None:
|
|
@@ -252,20 +253,23 @@ def build_kafka_consumer_configuration(
|
|
|
252
253
|
default_config, bootstrap_servers, override_params
|
|
253
254
|
)
|
|
254
255
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
256
|
+
# Default configuration with manual commit management
|
|
257
|
+
config_update = {
|
|
258
|
+
"enable.auto.commit": False,
|
|
259
|
+
"enable.auto.offset.store": False,
|
|
260
|
+
"group.id": group_id,
|
|
261
|
+
"auto.offset.reset": auto_offset_reset,
|
|
262
|
+
# this is an arroyo specific flag that only affects the consumer.
|
|
263
|
+
"arroyo.strict.offset.reset": strict_offset_reset,
|
|
264
|
+
# this is an arroyo specific flag to enable auto-commit mode
|
|
265
|
+
"arroyo.enable.auto.commit": enable_auto_commit,
|
|
266
|
+
# overridden to reduce memory usage when there's a large backlog
|
|
267
|
+
"queued.max.messages.kbytes": queued_max_messages_kbytes,
|
|
268
|
+
"queued.min.messages": queued_min_messages,
|
|
269
|
+
"enable.partition.eof": False,
|
|
270
|
+
"statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
|
|
271
|
+
"stats_cb": stats_callback,
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
broker_config.update(config_update)
|
|
271
275
|
return broker_config
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import time
|
|
5
|
+
from collections import defaultdict
|
|
4
6
|
from concurrent.futures import Future
|
|
5
7
|
from datetime import datetime
|
|
6
8
|
from enum import Enum
|
|
@@ -21,6 +23,7 @@ from typing import (
|
|
|
21
23
|
Tuple,
|
|
22
24
|
Type,
|
|
23
25
|
Union,
|
|
26
|
+
cast,
|
|
24
27
|
)
|
|
25
28
|
|
|
26
29
|
from confluent_kafka import (
|
|
@@ -33,7 +36,7 @@ from confluent_kafka import (
|
|
|
33
36
|
from confluent_kafka import Consumer as ConfluentConsumer
|
|
34
37
|
from confluent_kafka import KafkaError, KafkaException
|
|
35
38
|
from confluent_kafka import Message as ConfluentMessage
|
|
36
|
-
from confluent_kafka import Producer as
|
|
39
|
+
from confluent_kafka import Producer as ConfluentKafkaProducer
|
|
37
40
|
from confluent_kafka import TopicPartition as ConfluentTopicPartition
|
|
38
41
|
|
|
39
42
|
from arroyo.backends.abstract import (
|
|
@@ -50,6 +53,7 @@ from arroyo.errors import (
|
|
|
50
53
|
)
|
|
51
54
|
from arroyo.types import BrokerValue, Partition, Topic
|
|
52
55
|
from arroyo.utils.concurrent import execute
|
|
56
|
+
from arroyo.utils.metrics import get_metrics
|
|
53
57
|
from arroyo.utils.retries import BasicRetryPolicy
|
|
54
58
|
|
|
55
59
|
logger = logging.getLogger(__name__)
|
|
@@ -182,6 +186,13 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
182
186
|
if self.__strict_offset_reset is None:
|
|
183
187
|
self.__strict_offset_reset = True
|
|
184
188
|
|
|
189
|
+
# Feature flag to enable rdkafka auto-commit with store_offsets
|
|
190
|
+
# When enabled, offsets are stored via store_offsets() and rdkafka
|
|
191
|
+
# automatically commits them periodically
|
|
192
|
+
self.__use_auto_commit = as_kafka_configuration_bool(
|
|
193
|
+
configuration.pop("arroyo.enable.auto.commit", False)
|
|
194
|
+
)
|
|
195
|
+
|
|
185
196
|
if auto_offset_reset in {"smallest", "earliest", "beginning"}:
|
|
186
197
|
self.__resolve_partition_starting_offset = (
|
|
187
198
|
self.__resolve_partition_offset_earliest
|
|
@@ -197,21 +208,32 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
197
208
|
else:
|
|
198
209
|
raise ValueError("invalid value for 'auto.offset.reset' configuration")
|
|
199
210
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
211
|
+
# When auto-commit is disabled (default), we require explicit configuration
|
|
212
|
+
# When auto-commit is enabled, we allow rdkafka to handle commits
|
|
213
|
+
if not self.__use_auto_commit:
|
|
214
|
+
if (
|
|
215
|
+
as_kafka_configuration_bool(
|
|
216
|
+
configuration.get("enable.auto.commit", "true")
|
|
217
|
+
)
|
|
218
|
+
is not False
|
|
219
|
+
):
|
|
220
|
+
raise ValueError("invalid value for 'enable.auto.commit' configuration")
|
|
205
221
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
222
|
+
if (
|
|
223
|
+
as_kafka_configuration_bool(
|
|
224
|
+
configuration.get("enable.auto.offset.store", "true")
|
|
225
|
+
)
|
|
226
|
+
is not False
|
|
227
|
+
):
|
|
228
|
+
raise ValueError(
|
|
229
|
+
"invalid value for 'enable.auto.offset.store' configuration"
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
# In auto-commit mode, enable auto.commit and keep auto.offset.store disabled
|
|
233
|
+
# We'll use store_offsets() manually to control which offsets get committed
|
|
234
|
+
configuration["enable.auto.commit"] = True
|
|
235
|
+
configuration["enable.auto.offset.store"] = False
|
|
236
|
+
configuration["on_commit"] = self.__on_commit_callback
|
|
215
237
|
|
|
216
238
|
# NOTE: Offsets are explicitly managed as part of the assignment
|
|
217
239
|
# callback, so preemptively resetting offsets is not enabled when
|
|
@@ -231,6 +253,19 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
231
253
|
|
|
232
254
|
self.__state = KafkaConsumerState.CONSUMING
|
|
233
255
|
|
|
256
|
+
def __on_commit_callback(
|
|
257
|
+
self,
|
|
258
|
+
error: Optional[KafkaException],
|
|
259
|
+
partitions: Sequence[ConfluentTopicPartition],
|
|
260
|
+
) -> None:
|
|
261
|
+
if error:
|
|
262
|
+
partition_info = [f"{p.topic}:{p.partition}" for p in partitions]
|
|
263
|
+
logger.warning(
|
|
264
|
+
"Commit failed: %s. Partitions: %s",
|
|
265
|
+
error,
|
|
266
|
+
partition_info,
|
|
267
|
+
)
|
|
268
|
+
|
|
234
269
|
def __resolve_partition_offset_earliest(
|
|
235
270
|
self, partition: ConfluentTopicPartition
|
|
236
271
|
) -> ConfluentTopicPartition:
|
|
@@ -568,7 +603,21 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
568
603
|
# TODO: Maybe log a warning if these offsets exceed the current
|
|
569
604
|
# offsets, since that's probably a side effect of an incorrect usage
|
|
570
605
|
# pattern?
|
|
571
|
-
self.
|
|
606
|
+
if self.__use_auto_commit:
|
|
607
|
+
# When auto-commit is enabled, use store_offsets to stage offsets
|
|
608
|
+
# for rdkafka to auto-commit
|
|
609
|
+
if offsets:
|
|
610
|
+
self.__consumer.store_offsets(
|
|
611
|
+
offsets=[
|
|
612
|
+
ConfluentTopicPartition(
|
|
613
|
+
partition.topic.name, partition.index, offset
|
|
614
|
+
)
|
|
615
|
+
for partition, offset in offsets.items()
|
|
616
|
+
]
|
|
617
|
+
)
|
|
618
|
+
else:
|
|
619
|
+
# Default behavior: manually track staged offsets
|
|
620
|
+
self.__staged_offsets.update(offsets)
|
|
572
621
|
|
|
573
622
|
def __commit(self) -> Mapping[Partition, int]:
|
|
574
623
|
if self.__state in {KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR}:
|
|
@@ -616,15 +665,24 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
616
665
|
|
|
617
666
|
return offsets
|
|
618
667
|
|
|
619
|
-
def commit_offsets(self) -> Mapping[Partition, int]:
|
|
668
|
+
def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
|
|
620
669
|
"""
|
|
621
670
|
Commit staged offsets for all partitions that this consumer is
|
|
622
671
|
assigned to. The return value of this method is a mapping of
|
|
623
672
|
partitions with their committed offsets as values.
|
|
624
673
|
|
|
674
|
+
When auto-commit is enabled, returns None since rdkafka handles
|
|
675
|
+
commits automatically and we don't track which offsets were committed.
|
|
676
|
+
|
|
625
677
|
Raises an ``InvalidState`` if called on a closed consumer.
|
|
626
678
|
"""
|
|
627
|
-
|
|
679
|
+
if self.__use_auto_commit:
|
|
680
|
+
# When auto-commit is enabled, rdkafka commits automatically
|
|
681
|
+
# We don't track what was committed, so return None
|
|
682
|
+
# The offsets have already been staged via store_offsets()
|
|
683
|
+
return None
|
|
684
|
+
else:
|
|
685
|
+
return self.__commit_retry_policy.call(self.__commit)
|
|
628
686
|
|
|
629
687
|
def close(self, timeout: Optional[float] = None) -> None:
|
|
630
688
|
"""
|
|
@@ -657,7 +715,7 @@ class KafkaProducer(Producer[KafkaPayload]):
|
|
|
657
715
|
self, configuration: Mapping[str, Any], use_simple_futures: bool = False
|
|
658
716
|
) -> None:
|
|
659
717
|
self.__configuration = configuration
|
|
660
|
-
self.__producer =
|
|
718
|
+
self.__producer = ConfluentKafkaProducer(configuration)
|
|
661
719
|
self.__shutdown_requested = Event()
|
|
662
720
|
|
|
663
721
|
# The worker must execute in a separate thread to ensure that callbacks
|
|
@@ -742,3 +800,79 @@ class KafkaProducer(Producer[KafkaPayload]):
|
|
|
742
800
|
def close(self) -> Future[None]:
|
|
743
801
|
self.__shutdown_requested.set()
|
|
744
802
|
return self.__result
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
# Type alias for the delivery callback function
|
|
806
|
+
DeliveryCallback = Callable[[Optional[KafkaError], ConfluentMessage], None]
|
|
807
|
+
|
|
808
|
+
# Interval between metric flushes (in seconds)
|
|
809
|
+
METRICS_FREQUENCY_SEC = 1.0
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
class ConfluentProducer(ConfluentKafkaProducer): # type: ignore[misc]
|
|
813
|
+
"""
|
|
814
|
+
A thin wrapper for confluent_kafka.Producer that adds metrics reporting.
|
|
815
|
+
"""
|
|
816
|
+
|
|
817
|
+
def __init__(self, configuration: Mapping[str, Any]) -> None:
|
|
818
|
+
super().__init__(configuration)
|
|
819
|
+
self.producer_name = configuration.get("client.id") or None
|
|
820
|
+
self.__metrics = get_metrics()
|
|
821
|
+
self.__produce_counters: MutableMapping[str, int] = defaultdict(int)
|
|
822
|
+
self.__reset_metrics()
|
|
823
|
+
|
|
824
|
+
def __metrics_delivery_callback(
|
|
825
|
+
self,
|
|
826
|
+
error: Optional[KafkaError],
|
|
827
|
+
_message: ConfluentMessage,
|
|
828
|
+
) -> None:
|
|
829
|
+
if error is not None:
|
|
830
|
+
status = "error"
|
|
831
|
+
else:
|
|
832
|
+
status = "success"
|
|
833
|
+
self.__produce_counters[status] += 1
|
|
834
|
+
self.__throttled_record()
|
|
835
|
+
|
|
836
|
+
def __delivery_callback(
|
|
837
|
+
self,
|
|
838
|
+
user_callback: Optional[DeliveryCallback],
|
|
839
|
+
) -> DeliveryCallback:
|
|
840
|
+
def wrapped(error: Optional[KafkaError], message: ConfluentMessage) -> None:
|
|
841
|
+
self.__metrics_delivery_callback(error, message)
|
|
842
|
+
if user_callback is not None:
|
|
843
|
+
user_callback(error, message)
|
|
844
|
+
|
|
845
|
+
return wrapped
|
|
846
|
+
|
|
847
|
+
def produce(self, *args: Any, **kwargs: Any) -> None:
|
|
848
|
+
# callback and on_delivery are aliases, callback takes precedence over on_delivery
|
|
849
|
+
callback = kwargs.pop("callback", None)
|
|
850
|
+
on_delivery = kwargs.pop("on_delivery", None)
|
|
851
|
+
user_callback = callback or on_delivery
|
|
852
|
+
wrapped_callback = self.__delivery_callback(user_callback)
|
|
853
|
+
super().produce(*args, on_delivery=wrapped_callback, **kwargs)
|
|
854
|
+
|
|
855
|
+
def __flush_metrics(self) -> None:
|
|
856
|
+
for status, count in self.__produce_counters.items():
|
|
857
|
+
tags = {"status": status}
|
|
858
|
+
if self.producer_name:
|
|
859
|
+
tags["producer_name"] = self.producer_name
|
|
860
|
+
self.__metrics.increment(
|
|
861
|
+
name="arroyo.producer.produce_status",
|
|
862
|
+
value=count,
|
|
863
|
+
tags=tags,
|
|
864
|
+
)
|
|
865
|
+
self.__reset_metrics()
|
|
866
|
+
|
|
867
|
+
def flush(self, timeout: float = -1) -> int:
|
|
868
|
+
# Kafka producer flush should flush metrics too
|
|
869
|
+
self.__flush_metrics()
|
|
870
|
+
return cast(int, super().flush(timeout))
|
|
871
|
+
|
|
872
|
+
def __reset_metrics(self) -> None:
|
|
873
|
+
self.__produce_counters.clear()
|
|
874
|
+
self.__last_record_time = time.time()
|
|
875
|
+
|
|
876
|
+
def __throttled_record(self) -> None:
|
|
877
|
+
if time.time() - self.__last_record_time > METRICS_FREQUENCY_SEC:
|
|
878
|
+
self.__flush_metrics()
|
|
@@ -38,9 +38,9 @@ class LocalBroker(Generic[TStrategyPayload]):
|
|
|
38
38
|
self.__message_storage = message_storage
|
|
39
39
|
self.__clock = clock
|
|
40
40
|
|
|
41
|
-
self.__offsets: MutableMapping[
|
|
42
|
-
|
|
43
|
-
)
|
|
41
|
+
self.__offsets: MutableMapping[
|
|
42
|
+
str, MutableMapping[Partition, int]
|
|
43
|
+
] = defaultdict(dict)
|
|
44
44
|
|
|
45
45
|
# The active subscriptions are stored by consumer group as a mapping
|
|
46
46
|
# between the consumer and it's subscribed topics.
|
|
@@ -326,7 +326,7 @@ class LocalConsumer(Consumer[TStrategyPayload]):
|
|
|
326
326
|
# atomic
|
|
327
327
|
self.__staged_offsets.update(offsets)
|
|
328
328
|
|
|
329
|
-
def commit_offsets(self) -> Mapping[Partition, int]:
|
|
329
|
+
def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
|
|
330
330
|
with self.__lock:
|
|
331
331
|
if self.__closed:
|
|
332
332
|
raise RuntimeError("consumer is closed")
|
|
@@ -146,9 +146,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
|
|
|
146
146
|
self.__processor_factory = processor_factory
|
|
147
147
|
self.__metrics_buffer = MetricsBuffer()
|
|
148
148
|
|
|
149
|
-
self.__processing_strategy: Optional[
|
|
150
|
-
|
|
151
|
-
|
|
149
|
+
self.__processing_strategy: Optional[ProcessingStrategy[TStrategyPayload]] = (
|
|
150
|
+
None
|
|
151
|
+
)
|
|
152
152
|
|
|
153
153
|
self.__message: Optional[BrokerValue[TStrategyPayload]] = None
|
|
154
154
|
|
|
@@ -465,8 +465,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
|
|
|
465
465
|
|
|
466
466
|
elif self.__is_paused:
|
|
467
467
|
paused_partitions = set(self.__consumer.paused())
|
|
468
|
+
all_partitions = set(self.__consumer.tell())
|
|
468
469
|
unpaused_partitions = (
|
|
469
|
-
|
|
470
|
+
all_partitions - paused_partitions
|
|
470
471
|
)
|
|
471
472
|
if unpaused_partitions:
|
|
472
473
|
logger.warning(
|
|
@@ -484,6 +485,18 @@ class StreamProcessor(Generic[TStrategyPayload]):
|
|
|
484
485
|
# A paused consumer should still poll periodically to avoid it's partitions
|
|
485
486
|
# getting revoked by the broker after reaching the max.poll.interval.ms
|
|
486
487
|
# Polling a paused consumer should never yield a message.
|
|
488
|
+
logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
|
|
489
|
+
maybe_message = self.__consumer.poll(0.1)
|
|
490
|
+
if maybe_message is not None:
|
|
491
|
+
logger.warning("Received a message from partition: %s, \
|
|
492
|
+
consumer.tell() value right after poll() is: %s \
|
|
493
|
+
Some lines above consumer.tell() was called, all_partitons value was: %s \
|
|
494
|
+
Some lines above consumer.paused() was called, paused_partitions value is: %s",
|
|
495
|
+
maybe_message.partition,
|
|
496
|
+
self.__consumer.tell(),
|
|
497
|
+
all_partitions,
|
|
498
|
+
paused_partitions
|
|
499
|
+
)
|
|
487
500
|
assert self.__consumer.poll(0.1) is None
|
|
488
501
|
else:
|
|
489
502
|
time.sleep(0.01)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from unittest import mock
|
|
3
|
+
|
|
4
|
+
from confluent_kafka import KafkaError
|
|
5
|
+
from confluent_kafka import Message as ConfluentMessage
|
|
6
|
+
from confluent_kafka import Producer as ConfluentKafkaProducer
|
|
7
|
+
|
|
8
|
+
from arroyo.backends.kafka.consumer import ConfluentProducer
|
|
9
|
+
from tests.metrics import Increment, TestingMetricsBackend
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestConfluentProducer:
|
|
13
|
+
"""
|
|
14
|
+
Tests for ConfluentProducer wrapper around confluent_kafka.Producer.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def test_init(self) -> None:
|
|
18
|
+
"""Test that ConfluentProducer can be instantiated"""
|
|
19
|
+
config = {"bootstrap.servers": "fake:9092"}
|
|
20
|
+
producer = ConfluentProducer(config)
|
|
21
|
+
|
|
22
|
+
assert isinstance(producer, ConfluentProducer)
|
|
23
|
+
assert isinstance(producer, ConfluentKafkaProducer)
|
|
24
|
+
|
|
25
|
+
def test_metrics_callback_records_success(self) -> None:
|
|
26
|
+
"""Test that the metrics callback records success metric"""
|
|
27
|
+
producer = ConfluentProducer(
|
|
28
|
+
{"bootstrap.servers": "fake:9092", "client.id": "test-producer-name"}
|
|
29
|
+
)
|
|
30
|
+
mock_message = mock.Mock(spec=ConfluentMessage)
|
|
31
|
+
producer._ConfluentProducer__metrics_delivery_callback(None, mock_message)
|
|
32
|
+
producer.flush() # Flush buffered metrics
|
|
33
|
+
assert (
|
|
34
|
+
Increment(
|
|
35
|
+
"arroyo.producer.produce_status",
|
|
36
|
+
1,
|
|
37
|
+
{"status": "success", "producer_name": "test-producer-name"},
|
|
38
|
+
)
|
|
39
|
+
in TestingMetricsBackend.calls
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def test_metrics_callback_records_error(self) -> None:
|
|
43
|
+
"""Test that the metrics callback records error metric"""
|
|
44
|
+
producer = ConfluentProducer({"bootstrap.servers": "fake:9092"})
|
|
45
|
+
mock_error = mock.Mock(spec=KafkaError)
|
|
46
|
+
mock_message = mock.Mock(spec=ConfluentMessage)
|
|
47
|
+
producer._ConfluentProducer__metrics_delivery_callback(mock_error, mock_message)
|
|
48
|
+
producer.flush() # Flush buffered metrics
|
|
49
|
+
assert (
|
|
50
|
+
Increment("arroyo.producer.produce_status", 1, {"status": "error"})
|
|
51
|
+
in TestingMetricsBackend.calls
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def test_delivery_callback_wraps_user_callback(self) -> None:
|
|
55
|
+
"""Test that the delivery callback wrapper calls both metrics and user callbacks"""
|
|
56
|
+
producer = ConfluentProducer(
|
|
57
|
+
{"bootstrap.servers": "fake:9092", "client.id": "test-producer-name"}
|
|
58
|
+
)
|
|
59
|
+
user_callback_invoked = []
|
|
60
|
+
|
|
61
|
+
def user_callback(
|
|
62
|
+
error: Optional[KafkaError], message: ConfluentMessage
|
|
63
|
+
) -> None:
|
|
64
|
+
user_callback_invoked.append((error, message))
|
|
65
|
+
|
|
66
|
+
wrapped = producer._ConfluentProducer__delivery_callback(user_callback)
|
|
67
|
+
mock_message = mock.Mock(spec=ConfluentMessage)
|
|
68
|
+
wrapped(None, mock_message)
|
|
69
|
+
producer.flush() # Flush buffered metrics
|
|
70
|
+
assert (
|
|
71
|
+
Increment(
|
|
72
|
+
"arroyo.producer.produce_status",
|
|
73
|
+
1,
|
|
74
|
+
{"status": "success", "producer_name": "test-producer-name"},
|
|
75
|
+
)
|
|
76
|
+
in TestingMetricsBackend.calls
|
|
77
|
+
)
|
|
78
|
+
assert len(user_callback_invoked) == 1
|
|
79
|
+
assert user_callback_invoked[0] == (None, mock_message)
|
|
@@ -275,6 +275,70 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
|
|
|
275
275
|
processor._run_once()
|
|
276
276
|
assert consumer.paused() == []
|
|
277
277
|
|
|
278
|
+
def test_auto_commit_mode(self) -> None:
|
|
279
|
+
"""Test that auto-commit mode uses store_offsets and commits on close"""
|
|
280
|
+
group_id = uuid.uuid1().hex
|
|
281
|
+
|
|
282
|
+
with self.get_topic() as topic:
|
|
283
|
+
# Produce some messages
|
|
284
|
+
with closing(self.get_producer()) as producer:
|
|
285
|
+
for i in range(5):
|
|
286
|
+
payload = KafkaPayload(None, f"msg_{i}".encode("utf8"), [])
|
|
287
|
+
producer.produce(topic, payload).result(5.0)
|
|
288
|
+
|
|
289
|
+
# Create consumer with auto-commit enabled
|
|
290
|
+
configuration = {
|
|
291
|
+
**self.configuration,
|
|
292
|
+
"auto.offset.reset": "earliest",
|
|
293
|
+
"arroyo.enable.auto.commit": True,
|
|
294
|
+
"group.id": group_id,
|
|
295
|
+
"session.timeout.ms": 10000,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# First consumer: consume messages and close
|
|
299
|
+
consumed_offsets = []
|
|
300
|
+
with closing(KafkaConsumer(configuration)) as consumer:
|
|
301
|
+
consumer.subscribe([topic])
|
|
302
|
+
|
|
303
|
+
# Consume all 5 messages and stage their offsets
|
|
304
|
+
for i in range(5):
|
|
305
|
+
value = consumer.poll(10.0)
|
|
306
|
+
assert value is not None
|
|
307
|
+
consumed_offsets.append(value.offset)
|
|
308
|
+
|
|
309
|
+
# Stage offsets (will use store_offsets internally in auto-commit mode)
|
|
310
|
+
consumer.stage_offsets(value.committable)
|
|
311
|
+
|
|
312
|
+
# commit_offsets should return None in auto-commit mode
|
|
313
|
+
result = consumer.commit_offsets()
|
|
314
|
+
assert result is None
|
|
315
|
+
|
|
316
|
+
# Close will commit any stored offsets
|
|
317
|
+
|
|
318
|
+
# Verify we consumed offsets 0-4
|
|
319
|
+
assert consumed_offsets == [0, 1, 2, 3, 4]
|
|
320
|
+
|
|
321
|
+
# Second consumer: verify offsets were committed on close
|
|
322
|
+
# This consumer uses manual commit to verify the committed offset
|
|
323
|
+
with closing(
|
|
324
|
+
self.get_consumer(
|
|
325
|
+
group=group_id,
|
|
326
|
+
auto_offset_reset="earliest",
|
|
327
|
+
enable_end_of_partition=True,
|
|
328
|
+
)
|
|
329
|
+
) as consumer:
|
|
330
|
+
consumer.subscribe([topic])
|
|
331
|
+
|
|
332
|
+
# Should start from offset 5, hitting EndOfPartition immediately
|
|
333
|
+
# If we got a message with offset < 5, auto-commit didn't work
|
|
334
|
+
try:
|
|
335
|
+
consumer.poll(10.0)
|
|
336
|
+
pytest.fail("Expected EndOfPartition, but poll succeeded")
|
|
337
|
+
except EndOfPartition as e:
|
|
338
|
+
# Verify we got EndOfPartition at offset 5
|
|
339
|
+
assert e.offset == 5
|
|
340
|
+
assert e.partition == Partition(topic, 0)
|
|
341
|
+
|
|
278
342
|
|
|
279
343
|
class TestKafkaStreamsIncrementalRebalancing(TestKafkaStreams):
|
|
280
344
|
# re-test the kafka consumer with cooperative-sticky rebalancing
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sentry_arroyo-2.30.0 → sentry_arroyo-2.32.0}/arroyo/processing/strategies/run_task_in_threads.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|