PyPI - sentry-arroyo - Versions diffs - 2.31.2__py3-none-any.whl → 2.32.1__py3-none-any.whl - Mend

sentry-arroyo 2.31.2py3-none-any.whl → 2.32.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

arroyo/backends/abstract.py CHANGED Viewed

@@ -153,10 +153,13 @@ class Consumer(Generic[TStrategyPayload], ABC):
         raise NotImplementedError
     @abstractmethod
-    def commit_offsets(self) -> Mapping[Partition, int]:
+    def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
         """
         Commit staged offsets. The return value of this method is a mapping
         of streams with their committed offsets as values.
+        When auto-commit is enabled (in Kafka consumers), returns None since
+        the broker handles commits automatically.
         """
         raise NotImplementedError

arroyo/backends/kafka/configuration.py CHANGED Viewed

@@ -237,6 +237,7 @@ def build_kafka_consumer_configuration(
     bootstrap_servers: Optional[Sequence[str]] = None,
     override_params: Optional[Mapping[str, Any]] = None,
     strict_offset_reset: Optional[bool] = None,
+    enable_auto_commit: bool = False,
 ) -> KafkaBrokerConfig:
     if auto_offset_reset is None:
@@ -252,20 +253,23 @@ def build_kafka_consumer_configuration(
         default_config, bootstrap_servers, override_params
     )
-    broker_config.update(
-        {
-            "enable.auto.commit": False,
-            "enable.auto.offset.store": False,
-            "group.id": group_id,
-            "auto.offset.reset": auto_offset_reset,
-            # this is an arroyo specific flag that only affects the consumer.
-            "arroyo.strict.offset.reset": strict_offset_reset,
-            # overridden to reduce memory usage when there's a large backlog
-            "queued.max.messages.kbytes": queued_max_messages_kbytes,
-            "queued.min.messages": queued_min_messages,
-            "enable.partition.eof": False,
-            "statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
-            "stats_cb": stats_callback,
-        }
-    )
+    # Default configuration with manual commit management
+    config_update = {
+        "enable.auto.commit": False,
+        "enable.auto.offset.store": False,
+        "group.id": group_id,
+        "auto.offset.reset": auto_offset_reset,
+        # this is an arroyo specific flag that only affects the consumer.
+        "arroyo.strict.offset.reset": strict_offset_reset,
+        # this is an arroyo specific flag to enable auto-commit mode
+        "arroyo.enable.auto.commit": enable_auto_commit,
+        # overridden to reduce memory usage when there's a large backlog
+        "queued.max.messages.kbytes": queued_max_messages_kbytes,
+        "queued.min.messages": queued_min_messages,
+        "enable.partition.eof": False,
+        "statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
+        "stats_cb": stats_callback,
+    }
+    broker_config.update(config_update)
     return broker_config

arroyo/backends/kafka/consumer.py CHANGED Viewed

@@ -186,6 +186,13 @@ class KafkaConsumer(Consumer[KafkaPayload]):
         if self.__strict_offset_reset is None:
             self.__strict_offset_reset = True
+        # Feature flag to enable rdkafka auto-commit with store_offsets
+        # When enabled, offsets are stored via store_offsets() and rdkafka
+        # automatically commits them periodically
+        self.__use_auto_commit = as_kafka_configuration_bool(
+            configuration.pop("arroyo.enable.auto.commit", False)
+        )
         if auto_offset_reset in {"smallest", "earliest", "beginning"}:
             self.__resolve_partition_starting_offset = (
                 self.__resolve_partition_offset_earliest
@@ -201,21 +208,32 @@ class KafkaConsumer(Consumer[KafkaPayload]):
         else:
             raise ValueError("invalid value for 'auto.offset.reset' configuration")
-        if (
-            as_kafka_configuration_bool(configuration.get("enable.auto.commit", "true"))
-            is not False
-        ):
-            raise ValueError("invalid value for 'enable.auto.commit' configuration")
+        # When auto-commit is disabled (default), we require explicit configuration
+        # When auto-commit is enabled, we allow rdkafka to handle commits
+        if not self.__use_auto_commit:
+            if (
+                as_kafka_configuration_bool(
+                    configuration.get("enable.auto.commit", "true")
+                )
+                is not False
+            ):
+                raise ValueError("invalid value for 'enable.auto.commit' configuration")
-        if (
-            as_kafka_configuration_bool(
-                configuration.get("enable.auto.offset.store", "true")
-            )
-            is not False
-        ):
-            raise ValueError(
-                "invalid value for 'enable.auto.offset.store' configuration"
-            )
+            if (
+                as_kafka_configuration_bool(
+                    configuration.get("enable.auto.offset.store", "true")
+                )
+                is not False
+            ):
+                raise ValueError(
+                    "invalid value for 'enable.auto.offset.store' configuration"
+                )
+        else:
+            # In auto-commit mode, enable auto.commit and keep auto.offset.store disabled
+            # We'll use store_offsets() manually to control which offsets get committed
+            configuration["enable.auto.commit"] = True
+            configuration["enable.auto.offset.store"] = False
+            configuration["on_commit"] = self.__on_commit_callback
         # NOTE: Offsets are explicitly managed as part of the assignment
         # callback, so preemptively resetting offsets is not enabled when
@@ -235,6 +253,19 @@ class KafkaConsumer(Consumer[KafkaPayload]):
         self.__state = KafkaConsumerState.CONSUMING
+    def __on_commit_callback(
+        self,
+        error: Optional[KafkaException],
+        partitions: Sequence[ConfluentTopicPartition],
+    ) -> None:
+        if error:
+            partition_info = [f"{p.topic}:{p.partition}" for p in partitions]
+            logger.warning(
+                "Commit failed: %s. Partitions: %s",
+                error,
+                partition_info,
+            )
     def __resolve_partition_offset_earliest(
         self, partition: ConfluentTopicPartition
     ) -> ConfluentTopicPartition:
@@ -572,7 +603,21 @@ class KafkaConsumer(Consumer[KafkaPayload]):
         # TODO: Maybe log a warning if these offsets exceed the current
         # offsets, since that's probably a side effect of an incorrect usage
         # pattern?
-        self.__staged_offsets.update(offsets)
+        if self.__use_auto_commit:
+            # When auto-commit is enabled, use store_offsets to stage offsets
+            # for rdkafka to auto-commit
+            if offsets:
+                self.__consumer.store_offsets(
+                    offsets=[
+                        ConfluentTopicPartition(
+                            partition.topic.name, partition.index, offset
+                        )
+                        for partition, offset in offsets.items()
+                    ]
+                )
+        else:
+            # Default behavior: manually track staged offsets
+            self.__staged_offsets.update(offsets)
     def __commit(self) -> Mapping[Partition, int]:
         if self.__state in {KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR}:
@@ -620,15 +665,24 @@ class KafkaConsumer(Consumer[KafkaPayload]):
         return offsets
-    def commit_offsets(self) -> Mapping[Partition, int]:
+    def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
         """
         Commit staged offsets for all partitions that this consumer is
         assigned to. The return value of this method is a mapping of
         partitions with their committed offsets as values.
+        When auto-commit is enabled, returns None since rdkafka handles
+        commits automatically and we don't track which offsets were committed.
         Raises an ``InvalidState`` if called on a closed consumer.
         """
-        return self.__commit_retry_policy.call(self.__commit)
+        if self.__use_auto_commit:
+            # When auto-commit is enabled, rdkafka commits automatically
+            # We don't track what was committed, so return None
+            # The offsets have already been staged via store_offsets()
+            return None
+        else:
+            return self.__commit_retry_policy.call(self.__commit)
     def close(self, timeout: Optional[float] = None) -> None:
         """

arroyo/backends/local/backend.py CHANGED Viewed

@@ -38,9 +38,9 @@ class LocalBroker(Generic[TStrategyPayload]):
         self.__message_storage = message_storage
         self.__clock = clock
-        self.__offsets: MutableMapping[str, MutableMapping[Partition, int]] = (
-            defaultdict(dict)
-        )
+        self.__offsets: MutableMapping[
+            str, MutableMapping[Partition, int]
+        ] = defaultdict(dict)
         # The active subscriptions are stored by consumer group as a mapping
         # between the consumer and it's subscribed topics.
@@ -326,7 +326,7 @@ class LocalConsumer(Consumer[TStrategyPayload]):
             # atomic
             self.__staged_offsets.update(offsets)
-    def commit_offsets(self) -> Mapping[Partition, int]:
+    def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
         with self.__lock:
             if self.__closed:
                 raise RuntimeError("consumer is closed")

arroyo/processing/processor.py CHANGED Viewed

@@ -465,8 +465,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
                     elif self.__is_paused:
                         paused_partitions = set(self.__consumer.paused())
+                        all_partitions = set(self.__consumer.tell())
                         unpaused_partitions = (
-                            set(self.__consumer.tell()) - paused_partitions
+                            all_partitions - paused_partitions
                         )
                         if unpaused_partitions:
                             logger.warning(
@@ -484,6 +485,18 @@ class StreamProcessor(Generic[TStrategyPayload]):
                             # A paused consumer should still poll periodically to avoid it's partitions
                             # getting revoked by the broker after reaching the max.poll.interval.ms
                             # Polling a paused consumer should never yield a message.
+                            logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
+                            maybe_message = self.__consumer.poll(0.1)
+                            if maybe_message is not None:
+                                logger.warning("Received a message from partition: %s, \
+                                                consumer.tell() value right after poll() is: %s \
+                                                Some lines above consumer.tell() was called, all_partitons value was: %s \
+                                                Some lines above consumer.paused() was called, paused_partitions value is: %s",
+                                                maybe_message.partition,
+                                                self.__consumer.tell(),
+                                                all_partitions,
+                                                paused_partitions
+                                                )
                             assert self.__consumer.poll(0.1) is None
                     else:
                         time.sleep(0.01)

arroyo/processing/strategies/run_task_with_multiprocessing.py CHANGED Viewed

@@ -424,13 +424,16 @@ class RunTaskWithMultiprocessing(
     point.
     The metric ``arroyo.strategies.run_task_with_multiprocessing.processes``
-    shows how many processes arroyo was configured with.
+    shows the total number of available processes in the pool. This is recorded
+    as a gauge alongside ``batches_in_progress`` so you can calculate the ratio
+    of processes in use to total available processes.
     If those two metrics don't line up, your consumer is not bottlenecked on
     number of processes. That's a good thing, you want to have some reserve
     capacity. But it means that increasing ``num_processes`` will not make your
     consumer faster.
     Batching
     ~~~~~~~~
@@ -602,9 +605,6 @@ class RunTaskWithMultiprocessing(
         )
         self.__pool_waiting_time: Optional[float] = None
         self.__pool_waiting_log_time: Optional[float] = None
-        self.__metrics.gauge(
-            "arroyo.strategies.run_task_with_multiprocessing.processes", num_processes
-        )
         self.__closed = False
@@ -641,6 +641,10 @@ class RunTaskWithMultiprocessing(
             end_time - start_time,
         )
         self.__batches_in_progress.increment()
+        self.__metrics.gauge(
+            "arroyo.strategies.run_task_with_multiprocessing.processes",
+            self.__pool.num_processes,
+        )
         self.__metrics.timing(
             "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", len(batch)
         )
@@ -835,6 +839,10 @@ class RunTaskWithMultiprocessing(
         self.__input_blocks.append(new_input_block)
         self.__output_blocks.append(new_output_block)
         self.__batches_in_progress.decrement()
+        self.__metrics.gauge(
+            "arroyo.strategies.run_task_with_multiprocessing.processes",
+            self.__pool.num_processes,
+        )
         del self.__processes[0]

arroyo/utils/metricDefs.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
1	+ {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Gauge", "description": "Shows the total number of available processes in the pool."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nCounter: Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}

arroyo/utils/metric_defs.py CHANGED Viewed

@@ -40,11 +40,10 @@ MetricName = Literal[
     "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize",
     # Gauge: How many batches are being processed in parallel by multiprocessing.
     "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress",
+    # Gauge: Shows the total number of available processes in the pool.
+    "arroyo.strategies.run_task_with_multiprocessing.processes",
     # Counter: A subprocess by multiprocessing unexpectedly died.
     "sigchld.detected",
-    # Gauge: Shows how many processes the multiprocessing strategy is
-    # configured with.
-    "arroyo.strategies.run_task_with_multiprocessing.processes",
     # Counter: Incremented when the multiprocessing pool is created (or re-created).
     "arroyo.strategies.run_task_with_multiprocessing.pool.create",
     # Time: (unitless) spent polling librdkafka for new messages.

{sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sentry-arroyo
-Version: 2.31.2
+Version: 2.32.1
 Summary: Arroyo is a Python library for working with streaming data.
 Home-page: https://github.com/getsentry/arroyo
 Author: Sentry

{sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/RECORD RENAMED Viewed

@@ -5,18 +5,18 @@ arroyo/errors.py,sha256=IbtoIbz_m5QrxNRBLOxiy-hOfJQTEwNPCyq6yqedJYk,1059
 arroyo/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arroyo/types.py,sha256=sLY0x030np4UmbaW5C1KH1se7Z2pjQiPvAe5x2sXf7A,5684
 arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arroyo/backends/abstract.py,sha256=NQ5xG9rjchSUW8URl1WtSYSzMFtwRKB6wtJhWmDyR9E,9386
+arroyo/backends/abstract.py,sha256=Wy9xhE1dtFiumG8Cz3JhksJ0rF74uJWZWq10UO1rxOI,9524
 arroyo/backends/kafka/__init__.py,sha256=xgf-AqHbQkJsh73YokO2uoyyHfZf8XwUp6BULtM8stI,445
 arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
-arroyo/backends/kafka/configuration.py,sha256=g1Y-vdu3OT9pqWuYRp9fw29Nmm1KBBExQLr3VhDsp90,8950
-arroyo/backends/kafka/consumer.py,sha256=hABMHwTFm0IMziSCTxDQkepvATjcKCjDB0l_NFPypKs,31093
+arroyo/backends/kafka/configuration.py,sha256=zB54w7qsyVeMVkH5MpV6F8ztXfEzIXrex6aKYX-GcqA,9141
+arroyo/backends/kafka/consumer.py,sha256=zZ2ZoDaurLDBN9l9QR0fFWL16RJcf0D8Apaa3aff22k,33534
 arroyo/backends/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arroyo/backends/local/backend.py,sha256=7odjCnzoGgdo8JHLgG1ntaXa-ZR9GteGkquiA2WAWmM,13880
+arroyo/backends/local/backend.py,sha256=hUXdCV6B5e7s4mjFC6HnIuUhjENU2tNZt5vuEOJmGZQ,13888
 arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkIDv9wRg2rLhLXeg,104
 arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
 arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
 arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
-arroyo/processing/processor.py,sha256=PeuCnnwp2Ehxkl3Wj_81ICKbWe45OJya1rByYcYJNlA,21323
+arroyo/processing/processor.py,sha256=BtNaIxBApuUAtSH-syGJnpeKADHUafut9Ve1KMe8JM0,22389
 arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
 arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
 arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
@@ -30,15 +30,15 @@ arroyo/processing/strategies/produce.py,sha256=w4GI7KC-CGn2bLG_qPcuKJo0EbZ4PF2TJ
 arroyo/processing/strategies/reduce.py,sha256=xv9bYisgHHyS8fVD1PdGi4TJsaK-4RAhMEDh4WHhYfI,3933
 arroyo/processing/strategies/run_task.py,sha256=MGe2UcIWN7FkPc9plKzRVUNbZ7Sk0jWjw1z2vVOFI_I,2160
 arroyo/processing/strategies/run_task_in_threads.py,sha256=f1sb2AG-BLz11X78jfhtERIkdFogrV8vtdT3pyJdkx0,6144
-arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=a9zo8kgOkX7V-4tE1nc1bpr0XxPyWBiqO9Ao3GU-cfY,36937
+arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=IH21qpXKxN0DN4XBsG26piw84ZJaisx7rBup7nci8m8,37272
 arroyo/processing/strategies/unfold.py,sha256=bi47pwmKGT0Irsx0HdB7Bhc5hb-yYqLF_xcv3g1ewTk,4231
 arroyo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arroyo/utils/clock.py,sha256=r2EMO4nL5qIb1xnAd1sTAk2yK1UltyUi04lk5BqWKIc,944
 arroyo/utils/codecs.py,sha256=x-8SJK0GLTOH4c_k24K97JPjBckxyQJcSpgoEViGUy0,541
 arroyo/utils/concurrent.py,sha256=dbdPinjqmxCQ7izUGFNbGjB3OxfSIO01bnCSTANaVOE,1187
 arroyo/utils/logging.py,sha256=Y1PnhYcI9XNNEK0H13Ct2xKLr2Niuw0dxayc6sWnui8,606
-arroyo/utils/metricDefs.json,sha256=Bx2Y-T0JFcmcjX9s8sUHppZXgtiFcTTVORMfwecs6qk,13690
-arroyo/utils/metric_defs.py,sha256=Cjzz40bQJ_6M6B_zCG78Tdnw72QNzEhuy0u9yr-KpGI,9655
+arroyo/utils/metricDefs.json,sha256=xsDxmGMLo4nhNqMdVNefOEgEa0zLFQFIS7xQwonb8pI,13674
+arroyo/utils/metric_defs.py,sha256=y36K1VmBj5ZI2wUcQDZiM6g0H56j8pnBjBAxqhG2Ns4,9634
 arroyo/utils/metrics.py,sha256=kcyUR5cacoPMoU80RHSUhTMNzEcMBDpTXzcyW7yWZBk,3308
 arroyo/utils/profiler.py,sha256=aiYy2RRPX_IiDIO7AnFM3hARaHCctS3rqUS5nrHXbSg,2452
 arroyo/utils/retries.py,sha256=4MRhHUR7da9x1ytlo7YETo8S9HEebXmPF2-mKP4xYz0,3445
@@ -46,12 +46,12 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
 examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
 examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
-sentry_arroyo-2.31.2.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
+sentry_arroyo-2.32.1.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
 tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
 tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
 tests/backends/test_confluent_producer.py,sha256=KWqgvjDvqAdd0HxngdWKsUJaV7Hl1L5vAVQhBYlHeHU,3146
-tests/backends/test_kafka.py,sha256=6W6EA41X-ECTfcOeivhQxURnmV2Y0fYy-UzDCnJgDsU,12830
+tests/backends/test_kafka.py,sha256=wBFCKEHoP6h0uG1bgDuzk84IZmrV_UVOFCrtbxztmJg,15506
 tests/backends/test_kafka_producer.py,sha256=LpwkqnstcCDxemlKZ0FpzNKrP-1UuXXY15P7P-spjhE,3912
 tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
 tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,13 +68,13 @@ tests/processing/strategies/test_produce.py,sha256=UQ03quIAvfnsg8Og7US6D4ERs-J8n
 tests/processing/strategies/test_reduce.py,sha256=crPFtGp7cyD8QOsmfVsyYh8KLOTzb8ryI7XtYg0vQSQ,1101
 tests/processing/strategies/test_run_task.py,sha256=bWIy4U6QyOBtqdiJdGLMAadlEME-W2aE_ZzDbU_BsGo,2805
 tests/processing/strategies/test_run_task_in_threads.py,sha256=5nwzF1iV6MTK1xETzWvMEOwAcZWrMOQaIPSWbiAjKFo,1457
-tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=QLEERw-26BSjN4Y7TVrPeuREFzKg6Y_t5hjEXpE9-u8,20337
+tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=qwlFx1Twd7uAZitxVoGlFKtz1aAH9L8aS3fFfsrHhqk,20826
 tests/processing/strategies/test_unfold.py,sha256=mbC4XhT6GkJRuC7vPR0h7jqwt4cu20q7Z114EJ6J9mQ,2009
 tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
 tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
 tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
-sentry_arroyo-2.31.2.dist-info/METADATA,sha256=3pXtE_zg-qbvUHSaMkpixJnBkFHmdNI-2Ln8YBlZkX0,2208
-sentry_arroyo-2.31.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sentry_arroyo-2.31.2.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
-sentry_arroyo-2.31.2.dist-info/RECORD,,
+sentry_arroyo-2.32.1.dist-info/METADATA,sha256=CnhY-1mwt8qNk1ezNLHxmAeq7U-3mZzLMksUiOARCLY,2208
+sentry_arroyo-2.32.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+sentry_arroyo-2.32.1.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
+sentry_arroyo-2.32.1.dist-info/RECORD,,

tests/backends/test_kafka.py CHANGED Viewed

@@ -275,6 +275,70 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
                 processor._run_once()
                 assert consumer.paused() == []
+    def test_auto_commit_mode(self) -> None:
+        """Test that auto-commit mode uses store_offsets and commits on close"""
+        group_id = uuid.uuid1().hex
+        with self.get_topic() as topic:
+            # Produce some messages
+            with closing(self.get_producer()) as producer:
+                for i in range(5):
+                    payload = KafkaPayload(None, f"msg_{i}".encode("utf8"), [])
+                    producer.produce(topic, payload).result(5.0)
+            # Create consumer with auto-commit enabled
+            configuration = {
+                **self.configuration,
+                "auto.offset.reset": "earliest",
+                "arroyo.enable.auto.commit": True,
+                "group.id": group_id,
+                "session.timeout.ms": 10000,
+            }
+            # First consumer: consume messages and close
+            consumed_offsets = []
+            with closing(KafkaConsumer(configuration)) as consumer:
+                consumer.subscribe([topic])
+                # Consume all 5 messages and stage their offsets
+                for i in range(5):
+                    value = consumer.poll(10.0)
+                    assert value is not None
+                    consumed_offsets.append(value.offset)
+                    # Stage offsets (will use store_offsets internally in auto-commit mode)
+                    consumer.stage_offsets(value.committable)
+                # commit_offsets should return None in auto-commit mode
+                result = consumer.commit_offsets()
+                assert result is None
+                # Close will commit any stored offsets
+            # Verify we consumed offsets 0-4
+            assert consumed_offsets == [0, 1, 2, 3, 4]
+            # Second consumer: verify offsets were committed on close
+            # This consumer uses manual commit to verify the committed offset
+            with closing(
+                self.get_consumer(
+                    group=group_id,
+                    auto_offset_reset="earliest",
+                    enable_end_of_partition=True,
+                )
+            ) as consumer:
+                consumer.subscribe([topic])
+                # Should start from offset 5, hitting EndOfPartition immediately
+                # If we got a message with offset < 5, auto-commit didn't work
+                try:
+                    consumer.poll(10.0)
+                    pytest.fail("Expected EndOfPartition, but poll succeeded")
+                except EndOfPartition as e:
+                    # Verify we got EndOfPartition at offset 5
+                    assert e.offset == 5
+                    assert e.partition == Partition(topic, 0)
 class TestKafkaStreamsIncrementalRebalancing(TestKafkaStreams):
     # re-test the kafka consumer with cooperative-sticky rebalancing

tests/processing/strategies/test_run_task_with_multiprocessing.py CHANGED Viewed

@@ -176,11 +176,6 @@ def test_parallel_transform_step() -> None:
                 0.0,
                 tags=None,
             ),
-            GaugeCall(
-                "arroyo.strategies.run_task_with_multiprocessing.processes",
-                2.0,
-                tags=None,
-            ),
             IncrementCall(
                 name="arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow",
                 value=1,
@@ -191,6 +186,11 @@ def test_parallel_transform_step() -> None:
                 1.0,
                 tags=None,
             ),
+            GaugeCall(
+                "arroyo.strategies.run_task_with_multiprocessing.processes",
+                2.0,
+                tags=None,
+            ),
             TimingCall(
                 "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
                 3,
@@ -206,6 +206,11 @@ def test_parallel_transform_step() -> None:
                 2.0,
                 tags=None,
             ),
+            GaugeCall(
+                "arroyo.strategies.run_task_with_multiprocessing.processes",
+                2.0,
+                tags=None,
+            ),
             TimingCall(
                 "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
                 1,
@@ -275,6 +280,11 @@ def test_parallel_transform_step() -> None:
                 1.0,
                 tags=None,
             ),
+            GaugeCall(
+                "arroyo.strategies.run_task_with_multiprocessing.processes",
+                2.0,
+                tags=None,
+            ),
             TimingCall(
                 name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg",
                 value=1,
@@ -290,6 +300,11 @@ def test_parallel_transform_step() -> None:
                 0.0,
                 tags=None,
             ),
+            GaugeCall(
+                "arroyo.strategies.run_task_with_multiprocessing.processes",
+                2.0,
+                tags=None,
+            ),
         ],
     ):
         transform_step.join()

{sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{sentry_arroyo-2.31.2.dist-info → sentry_arroyo-2.32.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

sentry-arroyo 2.31.2__py3-none-any.whl → 2.32.1__py3-none-any.whl

sentry-arroyo 2.31.2py3-none-any.whl → 2.32.1py3-none-any.whl