sentry-arroyo 2.31.2__py3-none-any.whl → 2.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arroyo/backends/abstract.py +4 -1
- arroyo/backends/kafka/configuration.py +20 -16
- arroyo/backends/kafka/consumer.py +71 -17
- arroyo/backends/local/backend.py +4 -4
- arroyo/processing/processor.py +14 -1
- arroyo/processing/strategies/run_task_with_multiprocessing.py +12 -4
- arroyo/utils/metricDefs.json +1 -1
- arroyo/utils/metric_defs.py +2 -3
- {sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/METADATA +1 -1
- {sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/RECORD +15 -15
- tests/backends/test_kafka.py +64 -0
- tests/processing/strategies/test_run_task_with_multiprocessing.py +20 -5
- {sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/WHEEL +0 -0
- {sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/licenses/LICENSE +0 -0
- {sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/top_level.txt +0 -0
arroyo/backends/abstract.py
CHANGED
|
@@ -153,10 +153,13 @@ class Consumer(Generic[TStrategyPayload], ABC):
|
|
|
153
153
|
raise NotImplementedError
|
|
154
154
|
|
|
155
155
|
@abstractmethod
|
|
156
|
-
def commit_offsets(self) -> Mapping[Partition, int]:
|
|
156
|
+
def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
|
|
157
157
|
"""
|
|
158
158
|
Commit staged offsets. The return value of this method is a mapping
|
|
159
159
|
of streams with their committed offsets as values.
|
|
160
|
+
|
|
161
|
+
When auto-commit is enabled (in Kafka consumers), returns None since
|
|
162
|
+
the broker handles commits automatically.
|
|
160
163
|
"""
|
|
161
164
|
raise NotImplementedError
|
|
162
165
|
|
|
@@ -237,6 +237,7 @@ def build_kafka_consumer_configuration(
|
|
|
237
237
|
bootstrap_servers: Optional[Sequence[str]] = None,
|
|
238
238
|
override_params: Optional[Mapping[str, Any]] = None,
|
|
239
239
|
strict_offset_reset: Optional[bool] = None,
|
|
240
|
+
enable_auto_commit: bool = False,
|
|
240
241
|
) -> KafkaBrokerConfig:
|
|
241
242
|
|
|
242
243
|
if auto_offset_reset is None:
|
|
@@ -252,20 +253,23 @@ def build_kafka_consumer_configuration(
|
|
|
252
253
|
default_config, bootstrap_servers, override_params
|
|
253
254
|
)
|
|
254
255
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
256
|
+
# Default configuration with manual commit management
|
|
257
|
+
config_update = {
|
|
258
|
+
"enable.auto.commit": False,
|
|
259
|
+
"enable.auto.offset.store": False,
|
|
260
|
+
"group.id": group_id,
|
|
261
|
+
"auto.offset.reset": auto_offset_reset,
|
|
262
|
+
# this is an arroyo specific flag that only affects the consumer.
|
|
263
|
+
"arroyo.strict.offset.reset": strict_offset_reset,
|
|
264
|
+
# this is an arroyo specific flag to enable auto-commit mode
|
|
265
|
+
"arroyo.enable.auto.commit": enable_auto_commit,
|
|
266
|
+
# overridden to reduce memory usage when there's a large backlog
|
|
267
|
+
"queued.max.messages.kbytes": queued_max_messages_kbytes,
|
|
268
|
+
"queued.min.messages": queued_min_messages,
|
|
269
|
+
"enable.partition.eof": False,
|
|
270
|
+
"statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
|
|
271
|
+
"stats_cb": stats_callback,
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
broker_config.update(config_update)
|
|
271
275
|
return broker_config
|
|
@@ -186,6 +186,13 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
186
186
|
if self.__strict_offset_reset is None:
|
|
187
187
|
self.__strict_offset_reset = True
|
|
188
188
|
|
|
189
|
+
# Feature flag to enable rdkafka auto-commit with store_offsets
|
|
190
|
+
# When enabled, offsets are stored via store_offsets() and rdkafka
|
|
191
|
+
# automatically commits them periodically
|
|
192
|
+
self.__use_auto_commit = as_kafka_configuration_bool(
|
|
193
|
+
configuration.pop("arroyo.enable.auto.commit", False)
|
|
194
|
+
)
|
|
195
|
+
|
|
189
196
|
if auto_offset_reset in {"smallest", "earliest", "beginning"}:
|
|
190
197
|
self.__resolve_partition_starting_offset = (
|
|
191
198
|
self.__resolve_partition_offset_earliest
|
|
@@ -201,21 +208,32 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
201
208
|
else:
|
|
202
209
|
raise ValueError("invalid value for 'auto.offset.reset' configuration")
|
|
203
210
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
211
|
+
# When auto-commit is disabled (default), we require explicit configuration
|
|
212
|
+
# When auto-commit is enabled, we allow rdkafka to handle commits
|
|
213
|
+
if not self.__use_auto_commit:
|
|
214
|
+
if (
|
|
215
|
+
as_kafka_configuration_bool(
|
|
216
|
+
configuration.get("enable.auto.commit", "true")
|
|
217
|
+
)
|
|
218
|
+
is not False
|
|
219
|
+
):
|
|
220
|
+
raise ValueError("invalid value for 'enable.auto.commit' configuration")
|
|
209
221
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
222
|
+
if (
|
|
223
|
+
as_kafka_configuration_bool(
|
|
224
|
+
configuration.get("enable.auto.offset.store", "true")
|
|
225
|
+
)
|
|
226
|
+
is not False
|
|
227
|
+
):
|
|
228
|
+
raise ValueError(
|
|
229
|
+
"invalid value for 'enable.auto.offset.store' configuration"
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
# In auto-commit mode, enable auto.commit and keep auto.offset.store disabled
|
|
233
|
+
# We'll use store_offsets() manually to control which offsets get committed
|
|
234
|
+
configuration["enable.auto.commit"] = True
|
|
235
|
+
configuration["enable.auto.offset.store"] = False
|
|
236
|
+
configuration["on_commit"] = self.__on_commit_callback
|
|
219
237
|
|
|
220
238
|
# NOTE: Offsets are explicitly managed as part of the assignment
|
|
221
239
|
# callback, so preemptively resetting offsets is not enabled when
|
|
@@ -235,6 +253,19 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
235
253
|
|
|
236
254
|
self.__state = KafkaConsumerState.CONSUMING
|
|
237
255
|
|
|
256
|
+
def __on_commit_callback(
|
|
257
|
+
self,
|
|
258
|
+
error: Optional[KafkaException],
|
|
259
|
+
partitions: Sequence[ConfluentTopicPartition],
|
|
260
|
+
) -> None:
|
|
261
|
+
if error:
|
|
262
|
+
partition_info = [f"{p.topic}:{p.partition}" for p in partitions]
|
|
263
|
+
logger.warning(
|
|
264
|
+
"Commit failed: %s. Partitions: %s",
|
|
265
|
+
error,
|
|
266
|
+
partition_info,
|
|
267
|
+
)
|
|
268
|
+
|
|
238
269
|
def __resolve_partition_offset_earliest(
|
|
239
270
|
self, partition: ConfluentTopicPartition
|
|
240
271
|
) -> ConfluentTopicPartition:
|
|
@@ -572,7 +603,21 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
572
603
|
# TODO: Maybe log a warning if these offsets exceed the current
|
|
573
604
|
# offsets, since that's probably a side effect of an incorrect usage
|
|
574
605
|
# pattern?
|
|
575
|
-
self.
|
|
606
|
+
if self.__use_auto_commit:
|
|
607
|
+
# When auto-commit is enabled, use store_offsets to stage offsets
|
|
608
|
+
# for rdkafka to auto-commit
|
|
609
|
+
if offsets:
|
|
610
|
+
self.__consumer.store_offsets(
|
|
611
|
+
offsets=[
|
|
612
|
+
ConfluentTopicPartition(
|
|
613
|
+
partition.topic.name, partition.index, offset
|
|
614
|
+
)
|
|
615
|
+
for partition, offset in offsets.items()
|
|
616
|
+
]
|
|
617
|
+
)
|
|
618
|
+
else:
|
|
619
|
+
# Default behavior: manually track staged offsets
|
|
620
|
+
self.__staged_offsets.update(offsets)
|
|
576
621
|
|
|
577
622
|
def __commit(self) -> Mapping[Partition, int]:
|
|
578
623
|
if self.__state in {KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR}:
|
|
@@ -620,15 +665,24 @@ class KafkaConsumer(Consumer[KafkaPayload]):
|
|
|
620
665
|
|
|
621
666
|
return offsets
|
|
622
667
|
|
|
623
|
-
def commit_offsets(self) -> Mapping[Partition, int]:
|
|
668
|
+
def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
|
|
624
669
|
"""
|
|
625
670
|
Commit staged offsets for all partitions that this consumer is
|
|
626
671
|
assigned to. The return value of this method is a mapping of
|
|
627
672
|
partitions with their committed offsets as values.
|
|
628
673
|
|
|
674
|
+
When auto-commit is enabled, returns None since rdkafka handles
|
|
675
|
+
commits automatically and we don't track which offsets were committed.
|
|
676
|
+
|
|
629
677
|
Raises an ``InvalidState`` if called on a closed consumer.
|
|
630
678
|
"""
|
|
631
|
-
|
|
679
|
+
if self.__use_auto_commit:
|
|
680
|
+
# When auto-commit is enabled, rdkafka commits automatically
|
|
681
|
+
# We don't track what was committed, so return None
|
|
682
|
+
# The offsets have already been staged via store_offsets()
|
|
683
|
+
return None
|
|
684
|
+
else:
|
|
685
|
+
return self.__commit_retry_policy.call(self.__commit)
|
|
632
686
|
|
|
633
687
|
def close(self, timeout: Optional[float] = None) -> None:
|
|
634
688
|
"""
|
arroyo/backends/local/backend.py
CHANGED
|
@@ -38,9 +38,9 @@ class LocalBroker(Generic[TStrategyPayload]):
|
|
|
38
38
|
self.__message_storage = message_storage
|
|
39
39
|
self.__clock = clock
|
|
40
40
|
|
|
41
|
-
self.__offsets: MutableMapping[
|
|
42
|
-
|
|
43
|
-
)
|
|
41
|
+
self.__offsets: MutableMapping[
|
|
42
|
+
str, MutableMapping[Partition, int]
|
|
43
|
+
] = defaultdict(dict)
|
|
44
44
|
|
|
45
45
|
# The active subscriptions are stored by consumer group as a mapping
|
|
46
46
|
# between the consumer and it's subscribed topics.
|
|
@@ -326,7 +326,7 @@ class LocalConsumer(Consumer[TStrategyPayload]):
|
|
|
326
326
|
# atomic
|
|
327
327
|
self.__staged_offsets.update(offsets)
|
|
328
328
|
|
|
329
|
-
def commit_offsets(self) -> Mapping[Partition, int]:
|
|
329
|
+
def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
|
|
330
330
|
with self.__lock:
|
|
331
331
|
if self.__closed:
|
|
332
332
|
raise RuntimeError("consumer is closed")
|
arroyo/processing/processor.py
CHANGED
|
@@ -465,8 +465,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
|
|
|
465
465
|
|
|
466
466
|
elif self.__is_paused:
|
|
467
467
|
paused_partitions = set(self.__consumer.paused())
|
|
468
|
+
all_partitions = set(self.__consumer.tell())
|
|
468
469
|
unpaused_partitions = (
|
|
469
|
-
|
|
470
|
+
all_partitions - paused_partitions
|
|
470
471
|
)
|
|
471
472
|
if unpaused_partitions:
|
|
472
473
|
logger.warning(
|
|
@@ -484,6 +485,18 @@ class StreamProcessor(Generic[TStrategyPayload]):
|
|
|
484
485
|
# A paused consumer should still poll periodically to avoid it's partitions
|
|
485
486
|
# getting revoked by the broker after reaching the max.poll.interval.ms
|
|
486
487
|
# Polling a paused consumer should never yield a message.
|
|
488
|
+
logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
|
|
489
|
+
maybe_message = self.__consumer.poll(0.1)
|
|
490
|
+
if maybe_message is not None:
|
|
491
|
+
logger.warning("Received a message from partition: %s, \
|
|
492
|
+
consumer.tell() value right after poll() is: %s \
|
|
493
|
+
Some lines above consumer.tell() was called, all_partitons value was: %s \
|
|
494
|
+
Some lines above consumer.paused() was called, paused_partitions value is: %s",
|
|
495
|
+
maybe_message.partition,
|
|
496
|
+
self.__consumer.tell(),
|
|
497
|
+
all_partitions,
|
|
498
|
+
paused_partitions
|
|
499
|
+
)
|
|
487
500
|
assert self.__consumer.poll(0.1) is None
|
|
488
501
|
else:
|
|
489
502
|
time.sleep(0.01)
|
|
@@ -424,13 +424,16 @@ class RunTaskWithMultiprocessing(
|
|
|
424
424
|
point.
|
|
425
425
|
|
|
426
426
|
The metric ``arroyo.strategies.run_task_with_multiprocessing.processes``
|
|
427
|
-
shows
|
|
427
|
+
shows the total number of available processes in the pool. This is recorded
|
|
428
|
+
as a gauge alongside ``batches_in_progress`` so you can calculate the ratio
|
|
429
|
+
of processes in use to total available processes.
|
|
428
430
|
|
|
429
431
|
If those two metrics don't line up, your consumer is not bottlenecked on
|
|
430
432
|
number of processes. That's a good thing, you want to have some reserve
|
|
431
433
|
capacity. But it means that increasing ``num_processes`` will not make your
|
|
432
434
|
consumer faster.
|
|
433
435
|
|
|
436
|
+
|
|
434
437
|
Batching
|
|
435
438
|
~~~~~~~~
|
|
436
439
|
|
|
@@ -602,9 +605,6 @@ class RunTaskWithMultiprocessing(
|
|
|
602
605
|
)
|
|
603
606
|
self.__pool_waiting_time: Optional[float] = None
|
|
604
607
|
self.__pool_waiting_log_time: Optional[float] = None
|
|
605
|
-
self.__metrics.gauge(
|
|
606
|
-
"arroyo.strategies.run_task_with_multiprocessing.processes", num_processes
|
|
607
|
-
)
|
|
608
608
|
|
|
609
609
|
self.__closed = False
|
|
610
610
|
|
|
@@ -641,6 +641,10 @@ class RunTaskWithMultiprocessing(
|
|
|
641
641
|
end_time - start_time,
|
|
642
642
|
)
|
|
643
643
|
self.__batches_in_progress.increment()
|
|
644
|
+
self.__metrics.gauge(
|
|
645
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
646
|
+
self.__pool.num_processes,
|
|
647
|
+
)
|
|
644
648
|
self.__metrics.timing(
|
|
645
649
|
"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", len(batch)
|
|
646
650
|
)
|
|
@@ -835,6 +839,10 @@ class RunTaskWithMultiprocessing(
|
|
|
835
839
|
self.__input_blocks.append(new_input_block)
|
|
836
840
|
self.__output_blocks.append(new_output_block)
|
|
837
841
|
self.__batches_in_progress.decrement()
|
|
842
|
+
self.__metrics.gauge(
|
|
843
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
844
|
+
self.__pool.num_processes,
|
|
845
|
+
)
|
|
838
846
|
|
|
839
847
|
del self.__processes[0]
|
|
840
848
|
|
arroyo/utils/metricDefs.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
|
|
1
|
+
{"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Gauge", "description": "Shows the total number of available processes in the pool."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nCounter: Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
|
arroyo/utils/metric_defs.py
CHANGED
|
@@ -40,11 +40,10 @@ MetricName = Literal[
|
|
|
40
40
|
"arroyo.strategies.run_task_with_multiprocessing.batch.output.resize",
|
|
41
41
|
# Gauge: How many batches are being processed in parallel by multiprocessing.
|
|
42
42
|
"arroyo.strategies.run_task_with_multiprocessing.batches_in_progress",
|
|
43
|
+
# Gauge: Shows the total number of available processes in the pool.
|
|
44
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
43
45
|
# Counter: A subprocess by multiprocessing unexpectedly died.
|
|
44
46
|
"sigchld.detected",
|
|
45
|
-
# Gauge: Shows how many processes the multiprocessing strategy is
|
|
46
|
-
# configured with.
|
|
47
|
-
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
48
47
|
# Counter: Incremented when the multiprocessing pool is created (or re-created).
|
|
49
48
|
"arroyo.strategies.run_task_with_multiprocessing.pool.create",
|
|
50
49
|
# Time: (unitless) spent polling librdkafka for new messages.
|
|
@@ -5,18 +5,18 @@ arroyo/errors.py,sha256=IbtoIbz_m5QrxNRBLOxiy-hOfJQTEwNPCyq6yqedJYk,1059
|
|
|
5
5
|
arroyo/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
arroyo/types.py,sha256=sLY0x030np4UmbaW5C1KH1se7Z2pjQiPvAe5x2sXf7A,5684
|
|
7
7
|
arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
arroyo/backends/abstract.py,sha256=
|
|
8
|
+
arroyo/backends/abstract.py,sha256=Wy9xhE1dtFiumG8Cz3JhksJ0rF74uJWZWq10UO1rxOI,9524
|
|
9
9
|
arroyo/backends/kafka/__init__.py,sha256=xgf-AqHbQkJsh73YokO2uoyyHfZf8XwUp6BULtM8stI,445
|
|
10
10
|
arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
|
|
11
|
-
arroyo/backends/kafka/configuration.py,sha256=
|
|
12
|
-
arroyo/backends/kafka/consumer.py,sha256=
|
|
11
|
+
arroyo/backends/kafka/configuration.py,sha256=zB54w7qsyVeMVkH5MpV6F8ztXfEzIXrex6aKYX-GcqA,9141
|
|
12
|
+
arroyo/backends/kafka/consumer.py,sha256=zZ2ZoDaurLDBN9l9QR0fFWL16RJcf0D8Apaa3aff22k,33534
|
|
13
13
|
arroyo/backends/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
arroyo/backends/local/backend.py,sha256=
|
|
14
|
+
arroyo/backends/local/backend.py,sha256=hUXdCV6B5e7s4mjFC6HnIuUhjENU2tNZt5vuEOJmGZQ,13888
|
|
15
15
|
arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkIDv9wRg2rLhLXeg,104
|
|
16
16
|
arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
|
|
17
17
|
arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
|
|
18
18
|
arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
|
|
19
|
-
arroyo/processing/processor.py,sha256=
|
|
19
|
+
arroyo/processing/processor.py,sha256=BtNaIxBApuUAtSH-syGJnpeKADHUafut9Ve1KMe8JM0,22389
|
|
20
20
|
arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
|
|
21
21
|
arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
|
|
22
22
|
arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
|
|
@@ -30,15 +30,15 @@ arroyo/processing/strategies/produce.py,sha256=w4GI7KC-CGn2bLG_qPcuKJo0EbZ4PF2TJ
|
|
|
30
30
|
arroyo/processing/strategies/reduce.py,sha256=xv9bYisgHHyS8fVD1PdGi4TJsaK-4RAhMEDh4WHhYfI,3933
|
|
31
31
|
arroyo/processing/strategies/run_task.py,sha256=MGe2UcIWN7FkPc9plKzRVUNbZ7Sk0jWjw1z2vVOFI_I,2160
|
|
32
32
|
arroyo/processing/strategies/run_task_in_threads.py,sha256=f1sb2AG-BLz11X78jfhtERIkdFogrV8vtdT3pyJdkx0,6144
|
|
33
|
-
arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=
|
|
33
|
+
arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=IH21qpXKxN0DN4XBsG26piw84ZJaisx7rBup7nci8m8,37272
|
|
34
34
|
arroyo/processing/strategies/unfold.py,sha256=bi47pwmKGT0Irsx0HdB7Bhc5hb-yYqLF_xcv3g1ewTk,4231
|
|
35
35
|
arroyo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
arroyo/utils/clock.py,sha256=r2EMO4nL5qIb1xnAd1sTAk2yK1UltyUi04lk5BqWKIc,944
|
|
37
37
|
arroyo/utils/codecs.py,sha256=x-8SJK0GLTOH4c_k24K97JPjBckxyQJcSpgoEViGUy0,541
|
|
38
38
|
arroyo/utils/concurrent.py,sha256=dbdPinjqmxCQ7izUGFNbGjB3OxfSIO01bnCSTANaVOE,1187
|
|
39
39
|
arroyo/utils/logging.py,sha256=Y1PnhYcI9XNNEK0H13Ct2xKLr2Niuw0dxayc6sWnui8,606
|
|
40
|
-
arroyo/utils/metricDefs.json,sha256=
|
|
41
|
-
arroyo/utils/metric_defs.py,sha256=
|
|
40
|
+
arroyo/utils/metricDefs.json,sha256=xsDxmGMLo4nhNqMdVNefOEgEa0zLFQFIS7xQwonb8pI,13674
|
|
41
|
+
arroyo/utils/metric_defs.py,sha256=y36K1VmBj5ZI2wUcQDZiM6g0H56j8pnBjBAxqhG2Ns4,9634
|
|
42
42
|
arroyo/utils/metrics.py,sha256=kcyUR5cacoPMoU80RHSUhTMNzEcMBDpTXzcyW7yWZBk,3308
|
|
43
43
|
arroyo/utils/profiler.py,sha256=aiYy2RRPX_IiDIO7AnFM3hARaHCctS3rqUS5nrHXbSg,2452
|
|
44
44
|
arroyo/utils/retries.py,sha256=4MRhHUR7da9x1ytlo7YETo8S9HEebXmPF2-mKP4xYz0,3445
|
|
@@ -46,12 +46,12 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
46
46
|
examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
|
|
47
47
|
examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
|
|
48
48
|
examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
|
|
49
|
-
sentry_arroyo-2.
|
|
49
|
+
sentry_arroyo-2.32.1.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
|
|
50
50
|
tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
|
|
52
52
|
tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
|
|
53
53
|
tests/backends/test_confluent_producer.py,sha256=KWqgvjDvqAdd0HxngdWKsUJaV7Hl1L5vAVQhBYlHeHU,3146
|
|
54
|
-
tests/backends/test_kafka.py,sha256=
|
|
54
|
+
tests/backends/test_kafka.py,sha256=wBFCKEHoP6h0uG1bgDuzk84IZmrV_UVOFCrtbxztmJg,15506
|
|
55
55
|
tests/backends/test_kafka_producer.py,sha256=LpwkqnstcCDxemlKZ0FpzNKrP-1UuXXY15P7P-spjhE,3912
|
|
56
56
|
tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
|
|
57
57
|
tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -68,13 +68,13 @@ tests/processing/strategies/test_produce.py,sha256=UQ03quIAvfnsg8Og7US6D4ERs-J8n
|
|
|
68
68
|
tests/processing/strategies/test_reduce.py,sha256=crPFtGp7cyD8QOsmfVsyYh8KLOTzb8ryI7XtYg0vQSQ,1101
|
|
69
69
|
tests/processing/strategies/test_run_task.py,sha256=bWIy4U6QyOBtqdiJdGLMAadlEME-W2aE_ZzDbU_BsGo,2805
|
|
70
70
|
tests/processing/strategies/test_run_task_in_threads.py,sha256=5nwzF1iV6MTK1xETzWvMEOwAcZWrMOQaIPSWbiAjKFo,1457
|
|
71
|
-
tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=
|
|
71
|
+
tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=qwlFx1Twd7uAZitxVoGlFKtz1aAH9L8aS3fFfsrHhqk,20826
|
|
72
72
|
tests/processing/strategies/test_unfold.py,sha256=mbC4XhT6GkJRuC7vPR0h7jqwt4cu20q7Z114EJ6J9mQ,2009
|
|
73
73
|
tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
74
|
tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
|
|
75
75
|
tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
|
|
76
76
|
tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
|
|
77
|
-
sentry_arroyo-2.
|
|
78
|
-
sentry_arroyo-2.
|
|
79
|
-
sentry_arroyo-2.
|
|
80
|
-
sentry_arroyo-2.
|
|
77
|
+
sentry_arroyo-2.32.1.dist-info/METADATA,sha256=CnhY-1mwt8qNk1ezNLHxmAeq7U-3mZzLMksUiOARCLY,2208
|
|
78
|
+
sentry_arroyo-2.32.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
sentry_arroyo-2.32.1.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
|
|
80
|
+
sentry_arroyo-2.32.1.dist-info/RECORD,,
|
tests/backends/test_kafka.py
CHANGED
|
@@ -275,6 +275,70 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
|
|
|
275
275
|
processor._run_once()
|
|
276
276
|
assert consumer.paused() == []
|
|
277
277
|
|
|
278
|
+
def test_auto_commit_mode(self) -> None:
|
|
279
|
+
"""Test that auto-commit mode uses store_offsets and commits on close"""
|
|
280
|
+
group_id = uuid.uuid1().hex
|
|
281
|
+
|
|
282
|
+
with self.get_topic() as topic:
|
|
283
|
+
# Produce some messages
|
|
284
|
+
with closing(self.get_producer()) as producer:
|
|
285
|
+
for i in range(5):
|
|
286
|
+
payload = KafkaPayload(None, f"msg_{i}".encode("utf8"), [])
|
|
287
|
+
producer.produce(topic, payload).result(5.0)
|
|
288
|
+
|
|
289
|
+
# Create consumer with auto-commit enabled
|
|
290
|
+
configuration = {
|
|
291
|
+
**self.configuration,
|
|
292
|
+
"auto.offset.reset": "earliest",
|
|
293
|
+
"arroyo.enable.auto.commit": True,
|
|
294
|
+
"group.id": group_id,
|
|
295
|
+
"session.timeout.ms": 10000,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# First consumer: consume messages and close
|
|
299
|
+
consumed_offsets = []
|
|
300
|
+
with closing(KafkaConsumer(configuration)) as consumer:
|
|
301
|
+
consumer.subscribe([topic])
|
|
302
|
+
|
|
303
|
+
# Consume all 5 messages and stage their offsets
|
|
304
|
+
for i in range(5):
|
|
305
|
+
value = consumer.poll(10.0)
|
|
306
|
+
assert value is not None
|
|
307
|
+
consumed_offsets.append(value.offset)
|
|
308
|
+
|
|
309
|
+
# Stage offsets (will use store_offsets internally in auto-commit mode)
|
|
310
|
+
consumer.stage_offsets(value.committable)
|
|
311
|
+
|
|
312
|
+
# commit_offsets should return None in auto-commit mode
|
|
313
|
+
result = consumer.commit_offsets()
|
|
314
|
+
assert result is None
|
|
315
|
+
|
|
316
|
+
# Close will commit any stored offsets
|
|
317
|
+
|
|
318
|
+
# Verify we consumed offsets 0-4
|
|
319
|
+
assert consumed_offsets == [0, 1, 2, 3, 4]
|
|
320
|
+
|
|
321
|
+
# Second consumer: verify offsets were committed on close
|
|
322
|
+
# This consumer uses manual commit to verify the committed offset
|
|
323
|
+
with closing(
|
|
324
|
+
self.get_consumer(
|
|
325
|
+
group=group_id,
|
|
326
|
+
auto_offset_reset="earliest",
|
|
327
|
+
enable_end_of_partition=True,
|
|
328
|
+
)
|
|
329
|
+
) as consumer:
|
|
330
|
+
consumer.subscribe([topic])
|
|
331
|
+
|
|
332
|
+
# Should start from offset 5, hitting EndOfPartition immediately
|
|
333
|
+
# If we got a message with offset < 5, auto-commit didn't work
|
|
334
|
+
try:
|
|
335
|
+
consumer.poll(10.0)
|
|
336
|
+
pytest.fail("Expected EndOfPartition, but poll succeeded")
|
|
337
|
+
except EndOfPartition as e:
|
|
338
|
+
# Verify we got EndOfPartition at offset 5
|
|
339
|
+
assert e.offset == 5
|
|
340
|
+
assert e.partition == Partition(topic, 0)
|
|
341
|
+
|
|
278
342
|
|
|
279
343
|
class TestKafkaStreamsIncrementalRebalancing(TestKafkaStreams):
|
|
280
344
|
# re-test the kafka consumer with cooperative-sticky rebalancing
|
|
@@ -176,11 +176,6 @@ def test_parallel_transform_step() -> None:
|
|
|
176
176
|
0.0,
|
|
177
177
|
tags=None,
|
|
178
178
|
),
|
|
179
|
-
GaugeCall(
|
|
180
|
-
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
181
|
-
2.0,
|
|
182
|
-
tags=None,
|
|
183
|
-
),
|
|
184
179
|
IncrementCall(
|
|
185
180
|
name="arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow",
|
|
186
181
|
value=1,
|
|
@@ -191,6 +186,11 @@ def test_parallel_transform_step() -> None:
|
|
|
191
186
|
1.0,
|
|
192
187
|
tags=None,
|
|
193
188
|
),
|
|
189
|
+
GaugeCall(
|
|
190
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
191
|
+
2.0,
|
|
192
|
+
tags=None,
|
|
193
|
+
),
|
|
194
194
|
TimingCall(
|
|
195
195
|
"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
|
|
196
196
|
3,
|
|
@@ -206,6 +206,11 @@ def test_parallel_transform_step() -> None:
|
|
|
206
206
|
2.0,
|
|
207
207
|
tags=None,
|
|
208
208
|
),
|
|
209
|
+
GaugeCall(
|
|
210
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
211
|
+
2.0,
|
|
212
|
+
tags=None,
|
|
213
|
+
),
|
|
209
214
|
TimingCall(
|
|
210
215
|
"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
|
|
211
216
|
1,
|
|
@@ -275,6 +280,11 @@ def test_parallel_transform_step() -> None:
|
|
|
275
280
|
1.0,
|
|
276
281
|
tags=None,
|
|
277
282
|
),
|
|
283
|
+
GaugeCall(
|
|
284
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
285
|
+
2.0,
|
|
286
|
+
tags=None,
|
|
287
|
+
),
|
|
278
288
|
TimingCall(
|
|
279
289
|
name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg",
|
|
280
290
|
value=1,
|
|
@@ -290,6 +300,11 @@ def test_parallel_transform_step() -> None:
|
|
|
290
300
|
0.0,
|
|
291
301
|
tags=None,
|
|
292
302
|
),
|
|
303
|
+
GaugeCall(
|
|
304
|
+
"arroyo.strategies.run_task_with_multiprocessing.processes",
|
|
305
|
+
2.0,
|
|
306
|
+
tags=None,
|
|
307
|
+
),
|
|
293
308
|
],
|
|
294
309
|
):
|
|
295
310
|
transform_step.join()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|