sentry-arroyo 2.31.2__py3-none-any.whl → 2.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -153,10 +153,13 @@ class Consumer(Generic[TStrategyPayload], ABC):
153
153
  raise NotImplementedError
154
154
 
155
155
  @abstractmethod
156
- def commit_offsets(self) -> Mapping[Partition, int]:
156
+ def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
157
157
  """
158
158
  Commit staged offsets. The return value of this method is a mapping
159
159
  of streams with their committed offsets as values.
160
+
161
+ When auto-commit is enabled (in Kafka consumers), returns None since
162
+ the broker handles commits automatically.
160
163
  """
161
164
  raise NotImplementedError
162
165
 
@@ -237,6 +237,7 @@ def build_kafka_consumer_configuration(
237
237
  bootstrap_servers: Optional[Sequence[str]] = None,
238
238
  override_params: Optional[Mapping[str, Any]] = None,
239
239
  strict_offset_reset: Optional[bool] = None,
240
+ enable_auto_commit: bool = False,
240
241
  ) -> KafkaBrokerConfig:
241
242
 
242
243
  if auto_offset_reset is None:
@@ -252,20 +253,23 @@ def build_kafka_consumer_configuration(
252
253
  default_config, bootstrap_servers, override_params
253
254
  )
254
255
 
255
- broker_config.update(
256
- {
257
- "enable.auto.commit": False,
258
- "enable.auto.offset.store": False,
259
- "group.id": group_id,
260
- "auto.offset.reset": auto_offset_reset,
261
- # this is an arroyo specific flag that only affects the consumer.
262
- "arroyo.strict.offset.reset": strict_offset_reset,
263
- # overridden to reduce memory usage when there's a large backlog
264
- "queued.max.messages.kbytes": queued_max_messages_kbytes,
265
- "queued.min.messages": queued_min_messages,
266
- "enable.partition.eof": False,
267
- "statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
268
- "stats_cb": stats_callback,
269
- }
270
- )
256
+ # Default configuration with manual commit management
257
+ config_update = {
258
+ "enable.auto.commit": False,
259
+ "enable.auto.offset.store": False,
260
+ "group.id": group_id,
261
+ "auto.offset.reset": auto_offset_reset,
262
+ # this is an arroyo specific flag that only affects the consumer.
263
+ "arroyo.strict.offset.reset": strict_offset_reset,
264
+ # this is an arroyo specific flag to enable auto-commit mode
265
+ "arroyo.enable.auto.commit": enable_auto_commit,
266
+ # overridden to reduce memory usage when there's a large backlog
267
+ "queued.max.messages.kbytes": queued_max_messages_kbytes,
268
+ "queued.min.messages": queued_min_messages,
269
+ "enable.partition.eof": False,
270
+ "statistics.interval.ms": STATS_COLLECTION_FREQ_MS,
271
+ "stats_cb": stats_callback,
272
+ }
273
+
274
+ broker_config.update(config_update)
271
275
  return broker_config
@@ -186,6 +186,13 @@ class KafkaConsumer(Consumer[KafkaPayload]):
186
186
  if self.__strict_offset_reset is None:
187
187
  self.__strict_offset_reset = True
188
188
 
189
+ # Feature flag to enable rdkafka auto-commit with store_offsets
190
+ # When enabled, offsets are stored via store_offsets() and rdkafka
191
+ # automatically commits them periodically
192
+ self.__use_auto_commit = as_kafka_configuration_bool(
193
+ configuration.pop("arroyo.enable.auto.commit", False)
194
+ )
195
+
189
196
  if auto_offset_reset in {"smallest", "earliest", "beginning"}:
190
197
  self.__resolve_partition_starting_offset = (
191
198
  self.__resolve_partition_offset_earliest
@@ -201,21 +208,32 @@ class KafkaConsumer(Consumer[KafkaPayload]):
201
208
  else:
202
209
  raise ValueError("invalid value for 'auto.offset.reset' configuration")
203
210
 
204
- if (
205
- as_kafka_configuration_bool(configuration.get("enable.auto.commit", "true"))
206
- is not False
207
- ):
208
- raise ValueError("invalid value for 'enable.auto.commit' configuration")
211
+ # When auto-commit is disabled (default), we require explicit configuration
212
+ # When auto-commit is enabled, we allow rdkafka to handle commits
213
+ if not self.__use_auto_commit:
214
+ if (
215
+ as_kafka_configuration_bool(
216
+ configuration.get("enable.auto.commit", "true")
217
+ )
218
+ is not False
219
+ ):
220
+ raise ValueError("invalid value for 'enable.auto.commit' configuration")
209
221
 
210
- if (
211
- as_kafka_configuration_bool(
212
- configuration.get("enable.auto.offset.store", "true")
213
- )
214
- is not False
215
- ):
216
- raise ValueError(
217
- "invalid value for 'enable.auto.offset.store' configuration"
218
- )
222
+ if (
223
+ as_kafka_configuration_bool(
224
+ configuration.get("enable.auto.offset.store", "true")
225
+ )
226
+ is not False
227
+ ):
228
+ raise ValueError(
229
+ "invalid value for 'enable.auto.offset.store' configuration"
230
+ )
231
+ else:
232
+ # In auto-commit mode, enable auto.commit and keep auto.offset.store disabled
233
+ # We'll use store_offsets() manually to control which offsets get committed
234
+ configuration["enable.auto.commit"] = True
235
+ configuration["enable.auto.offset.store"] = False
236
+ configuration["on_commit"] = self.__on_commit_callback
219
237
 
220
238
  # NOTE: Offsets are explicitly managed as part of the assignment
221
239
  # callback, so preemptively resetting offsets is not enabled when
@@ -235,6 +253,19 @@ class KafkaConsumer(Consumer[KafkaPayload]):
235
253
 
236
254
  self.__state = KafkaConsumerState.CONSUMING
237
255
 
256
+ def __on_commit_callback(
257
+ self,
258
+ error: Optional[KafkaException],
259
+ partitions: Sequence[ConfluentTopicPartition],
260
+ ) -> None:
261
+ if error:
262
+ partition_info = [f"{p.topic}:{p.partition}" for p in partitions]
263
+ logger.warning(
264
+ "Commit failed: %s. Partitions: %s",
265
+ error,
266
+ partition_info,
267
+ )
268
+
238
269
  def __resolve_partition_offset_earliest(
239
270
  self, partition: ConfluentTopicPartition
240
271
  ) -> ConfluentTopicPartition:
@@ -572,7 +603,21 @@ class KafkaConsumer(Consumer[KafkaPayload]):
572
603
  # TODO: Maybe log a warning if these offsets exceed the current
573
604
  # offsets, since that's probably a side effect of an incorrect usage
574
605
  # pattern?
575
- self.__staged_offsets.update(offsets)
606
+ if self.__use_auto_commit:
607
+ # When auto-commit is enabled, use store_offsets to stage offsets
608
+ # for rdkafka to auto-commit
609
+ if offsets:
610
+ self.__consumer.store_offsets(
611
+ offsets=[
612
+ ConfluentTopicPartition(
613
+ partition.topic.name, partition.index, offset
614
+ )
615
+ for partition, offset in offsets.items()
616
+ ]
617
+ )
618
+ else:
619
+ # Default behavior: manually track staged offsets
620
+ self.__staged_offsets.update(offsets)
576
621
 
577
622
  def __commit(self) -> Mapping[Partition, int]:
578
623
  if self.__state in {KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR}:
@@ -620,15 +665,24 @@ class KafkaConsumer(Consumer[KafkaPayload]):
620
665
 
621
666
  return offsets
622
667
 
623
- def commit_offsets(self) -> Mapping[Partition, int]:
668
+ def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
624
669
  """
625
670
  Commit staged offsets for all partitions that this consumer is
626
671
  assigned to. The return value of this method is a mapping of
627
672
  partitions with their committed offsets as values.
628
673
 
674
+ When auto-commit is enabled, returns None since rdkafka handles
675
+ commits automatically and we don't track which offsets were committed.
676
+
629
677
  Raises an ``InvalidState`` if called on a closed consumer.
630
678
  """
631
- return self.__commit_retry_policy.call(self.__commit)
679
+ if self.__use_auto_commit:
680
+ # When auto-commit is enabled, rdkafka commits automatically
681
+ # We don't track what was committed, so return None
682
+ # The offsets have already been staged via store_offsets()
683
+ return None
684
+ else:
685
+ return self.__commit_retry_policy.call(self.__commit)
632
686
 
633
687
  def close(self, timeout: Optional[float] = None) -> None:
634
688
  """
@@ -38,9 +38,9 @@ class LocalBroker(Generic[TStrategyPayload]):
38
38
  self.__message_storage = message_storage
39
39
  self.__clock = clock
40
40
 
41
- self.__offsets: MutableMapping[str, MutableMapping[Partition, int]] = (
42
- defaultdict(dict)
43
- )
41
+ self.__offsets: MutableMapping[
42
+ str, MutableMapping[Partition, int]
43
+ ] = defaultdict(dict)
44
44
 
45
45
  # The active subscriptions are stored by consumer group as a mapping
46
46
  # between the consumer and it's subscribed topics.
@@ -326,7 +326,7 @@ class LocalConsumer(Consumer[TStrategyPayload]):
326
326
  # atomic
327
327
  self.__staged_offsets.update(offsets)
328
328
 
329
- def commit_offsets(self) -> Mapping[Partition, int]:
329
+ def commit_offsets(self) -> Optional[Mapping[Partition, int]]:
330
330
  with self.__lock:
331
331
  if self.__closed:
332
332
  raise RuntimeError("consumer is closed")
@@ -465,8 +465,9 @@ class StreamProcessor(Generic[TStrategyPayload]):
465
465
 
466
466
  elif self.__is_paused:
467
467
  paused_partitions = set(self.__consumer.paused())
468
+ all_partitions = set(self.__consumer.tell())
468
469
  unpaused_partitions = (
469
- set(self.__consumer.tell()) - paused_partitions
470
+ all_partitions - paused_partitions
470
471
  )
471
472
  if unpaused_partitions:
472
473
  logger.warning(
@@ -484,6 +485,18 @@ class StreamProcessor(Generic[TStrategyPayload]):
484
485
  # A paused consumer should still poll periodically to avoid it's partitions
485
486
  # getting revoked by the broker after reaching the max.poll.interval.ms
486
487
  # Polling a paused consumer should never yield a message.
488
+ logger.warning("consumer.tell() value right before poll() is: %s", self.__consumer.tell())
489
+ maybe_message = self.__consumer.poll(0.1)
490
+ if maybe_message is not None:
491
+ logger.warning("Received a message from partition: %s, \
492
+ consumer.tell() value right after poll() is: %s \
493
+ Some lines above consumer.tell() was called, all_partitons value was: %s \
494
+ Some lines above consumer.paused() was called, paused_partitions value is: %s",
495
+ maybe_message.partition,
496
+ self.__consumer.tell(),
497
+ all_partitions,
498
+ paused_partitions
499
+ )
487
500
  assert self.__consumer.poll(0.1) is None
488
501
  else:
489
502
  time.sleep(0.01)
@@ -424,13 +424,16 @@ class RunTaskWithMultiprocessing(
424
424
  point.
425
425
 
426
426
  The metric ``arroyo.strategies.run_task_with_multiprocessing.processes``
427
- shows how many processes arroyo was configured with.
427
+ shows the total number of available processes in the pool. This is recorded
428
+ as a gauge alongside ``batches_in_progress`` so you can calculate the ratio
429
+ of processes in use to total available processes.
428
430
 
429
431
  If those two metrics don't line up, your consumer is not bottlenecked on
430
432
  number of processes. That's a good thing, you want to have some reserve
431
433
  capacity. But it means that increasing ``num_processes`` will not make your
432
434
  consumer faster.
433
435
 
436
+
434
437
  Batching
435
438
  ~~~~~~~~
436
439
 
@@ -602,9 +605,6 @@ class RunTaskWithMultiprocessing(
602
605
  )
603
606
  self.__pool_waiting_time: Optional[float] = None
604
607
  self.__pool_waiting_log_time: Optional[float] = None
605
- self.__metrics.gauge(
606
- "arroyo.strategies.run_task_with_multiprocessing.processes", num_processes
607
- )
608
608
 
609
609
  self.__closed = False
610
610
 
@@ -641,6 +641,10 @@ class RunTaskWithMultiprocessing(
641
641
  end_time - start_time,
642
642
  )
643
643
  self.__batches_in_progress.increment()
644
+ self.__metrics.gauge(
645
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
646
+ self.__pool.num_processes,
647
+ )
644
648
  self.__metrics.timing(
645
649
  "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", len(batch)
646
650
  )
@@ -835,6 +839,10 @@ class RunTaskWithMultiprocessing(
835
839
  self.__input_blocks.append(new_input_block)
836
840
  self.__output_blocks.append(new_output_block)
837
841
  self.__batches_in_progress.decrement()
842
+ self.__metrics.gauge(
843
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
844
+ self.__pool.num_processes,
845
+ )
838
846
 
839
847
  del self.__processes[0]
840
848
 
@@ -1 +1 @@
1
- {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
1
+ {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Gauge", "description": "Shows the total number of available processes in the pool."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nCounter: Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
@@ -40,11 +40,10 @@ MetricName = Literal[
40
40
  "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize",
41
41
  # Gauge: How many batches are being processed in parallel by multiprocessing.
42
42
  "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress",
43
+ # Gauge: Shows the total number of available processes in the pool.
44
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
43
45
  # Counter: A subprocess by multiprocessing unexpectedly died.
44
46
  "sigchld.detected",
45
- # Gauge: Shows how many processes the multiprocessing strategy is
46
- # configured with.
47
- "arroyo.strategies.run_task_with_multiprocessing.processes",
48
47
  # Counter: Incremented when the multiprocessing pool is created (or re-created).
49
48
  "arroyo.strategies.run_task_with_multiprocessing.pool.create",
50
49
  # Time: (unitless) spent polling librdkafka for new messages.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.31.2
3
+ Version: 2.32.1
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -5,18 +5,18 @@ arroyo/errors.py,sha256=IbtoIbz_m5QrxNRBLOxiy-hOfJQTEwNPCyq6yqedJYk,1059
5
5
  arroyo/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  arroyo/types.py,sha256=sLY0x030np4UmbaW5C1KH1se7Z2pjQiPvAe5x2sXf7A,5684
7
7
  arroyo/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- arroyo/backends/abstract.py,sha256=NQ5xG9rjchSUW8URl1WtSYSzMFtwRKB6wtJhWmDyR9E,9386
8
+ arroyo/backends/abstract.py,sha256=Wy9xhE1dtFiumG8Cz3JhksJ0rF74uJWZWq10UO1rxOI,9524
9
9
  arroyo/backends/kafka/__init__.py,sha256=xgf-AqHbQkJsh73YokO2uoyyHfZf8XwUp6BULtM8stI,445
10
10
  arroyo/backends/kafka/commit.py,sha256=LPsjvX5PPXR62DT6sa5GuSF78qk9F_L--Fz4kw7-m-s,3060
11
- arroyo/backends/kafka/configuration.py,sha256=g1Y-vdu3OT9pqWuYRp9fw29Nmm1KBBExQLr3VhDsp90,8950
12
- arroyo/backends/kafka/consumer.py,sha256=hABMHwTFm0IMziSCTxDQkepvATjcKCjDB0l_NFPypKs,31093
11
+ arroyo/backends/kafka/configuration.py,sha256=zB54w7qsyVeMVkH5MpV6F8ztXfEzIXrex6aKYX-GcqA,9141
12
+ arroyo/backends/kafka/consumer.py,sha256=zZ2ZoDaurLDBN9l9QR0fFWL16RJcf0D8Apaa3aff22k,33534
13
13
  arroyo/backends/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- arroyo/backends/local/backend.py,sha256=7odjCnzoGgdo8JHLgG1ntaXa-ZR9GteGkquiA2WAWmM,13880
14
+ arroyo/backends/local/backend.py,sha256=hUXdCV6B5e7s4mjFC6HnIuUhjENU2tNZt5vuEOJmGZQ,13888
15
15
  arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkIDv9wRg2rLhLXeg,104
16
16
  arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
17
17
  arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
18
18
  arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
19
- arroyo/processing/processor.py,sha256=PeuCnnwp2Ehxkl3Wj_81ICKbWe45OJya1rByYcYJNlA,21323
19
+ arroyo/processing/processor.py,sha256=BtNaIxBApuUAtSH-syGJnpeKADHUafut9Ve1KMe8JM0,22389
20
20
  arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
21
21
  arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
22
22
  arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
@@ -30,15 +30,15 @@ arroyo/processing/strategies/produce.py,sha256=w4GI7KC-CGn2bLG_qPcuKJo0EbZ4PF2TJ
30
30
  arroyo/processing/strategies/reduce.py,sha256=xv9bYisgHHyS8fVD1PdGi4TJsaK-4RAhMEDh4WHhYfI,3933
31
31
  arroyo/processing/strategies/run_task.py,sha256=MGe2UcIWN7FkPc9plKzRVUNbZ7Sk0jWjw1z2vVOFI_I,2160
32
32
  arroyo/processing/strategies/run_task_in_threads.py,sha256=f1sb2AG-BLz11X78jfhtERIkdFogrV8vtdT3pyJdkx0,6144
33
- arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=a9zo8kgOkX7V-4tE1nc1bpr0XxPyWBiqO9Ao3GU-cfY,36937
33
+ arroyo/processing/strategies/run_task_with_multiprocessing.py,sha256=IH21qpXKxN0DN4XBsG26piw84ZJaisx7rBup7nci8m8,37272
34
34
  arroyo/processing/strategies/unfold.py,sha256=bi47pwmKGT0Irsx0HdB7Bhc5hb-yYqLF_xcv3g1ewTk,4231
35
35
  arroyo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  arroyo/utils/clock.py,sha256=r2EMO4nL5qIb1xnAd1sTAk2yK1UltyUi04lk5BqWKIc,944
37
37
  arroyo/utils/codecs.py,sha256=x-8SJK0GLTOH4c_k24K97JPjBckxyQJcSpgoEViGUy0,541
38
38
  arroyo/utils/concurrent.py,sha256=dbdPinjqmxCQ7izUGFNbGjB3OxfSIO01bnCSTANaVOE,1187
39
39
  arroyo/utils/logging.py,sha256=Y1PnhYcI9XNNEK0H13Ct2xKLr2Niuw0dxayc6sWnui8,606
40
- arroyo/utils/metricDefs.json,sha256=Bx2Y-T0JFcmcjX9s8sUHppZXgtiFcTTVORMfwecs6qk,13690
41
- arroyo/utils/metric_defs.py,sha256=Cjzz40bQJ_6M6B_zCG78Tdnw72QNzEhuy0u9yr-KpGI,9655
40
+ arroyo/utils/metricDefs.json,sha256=xsDxmGMLo4nhNqMdVNefOEgEa0zLFQFIS7xQwonb8pI,13674
41
+ arroyo/utils/metric_defs.py,sha256=y36K1VmBj5ZI2wUcQDZiM6g0H56j8pnBjBAxqhG2Ns4,9634
42
42
  arroyo/utils/metrics.py,sha256=kcyUR5cacoPMoU80RHSUhTMNzEcMBDpTXzcyW7yWZBk,3308
43
43
  arroyo/utils/profiler.py,sha256=aiYy2RRPX_IiDIO7AnFM3hARaHCctS3rqUS5nrHXbSg,2452
44
44
  arroyo/utils/retries.py,sha256=4MRhHUR7da9x1ytlo7YETo8S9HEebXmPF2-mKP4xYz0,3445
@@ -46,12 +46,12 @@ examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
46
46
  examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
47
47
  examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
48
48
  examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
49
- sentry_arroyo-2.31.2.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
49
+ sentry_arroyo-2.32.1.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
50
50
  tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
52
52
  tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
53
53
  tests/backends/test_confluent_producer.py,sha256=KWqgvjDvqAdd0HxngdWKsUJaV7Hl1L5vAVQhBYlHeHU,3146
54
- tests/backends/test_kafka.py,sha256=6W6EA41X-ECTfcOeivhQxURnmV2Y0fYy-UzDCnJgDsU,12830
54
+ tests/backends/test_kafka.py,sha256=wBFCKEHoP6h0uG1bgDuzk84IZmrV_UVOFCrtbxztmJg,15506
55
55
  tests/backends/test_kafka_producer.py,sha256=LpwkqnstcCDxemlKZ0FpzNKrP-1UuXXY15P7P-spjhE,3912
56
56
  tests/backends/test_local.py,sha256=Mfd4DFuWVSVtl1GomQ6TIoWuJNcAliKqKU0BShPlEMY,3363
57
57
  tests/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,13 +68,13 @@ tests/processing/strategies/test_produce.py,sha256=UQ03quIAvfnsg8Og7US6D4ERs-J8n
68
68
  tests/processing/strategies/test_reduce.py,sha256=crPFtGp7cyD8QOsmfVsyYh8KLOTzb8ryI7XtYg0vQSQ,1101
69
69
  tests/processing/strategies/test_run_task.py,sha256=bWIy4U6QyOBtqdiJdGLMAadlEME-W2aE_ZzDbU_BsGo,2805
70
70
  tests/processing/strategies/test_run_task_in_threads.py,sha256=5nwzF1iV6MTK1xETzWvMEOwAcZWrMOQaIPSWbiAjKFo,1457
71
- tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=QLEERw-26BSjN4Y7TVrPeuREFzKg6Y_t5hjEXpE9-u8,20337
71
+ tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=qwlFx1Twd7uAZitxVoGlFKtz1aAH9L8aS3fFfsrHhqk,20826
72
72
  tests/processing/strategies/test_unfold.py,sha256=mbC4XhT6GkJRuC7vPR0h7jqwt4cu20q7Z114EJ6J9mQ,2009
73
73
  tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
75
75
  tests/utils/test_metrics.py,sha256=bI0EtGgPokMQyEqX58i0-8zvLfxRP2nWaWr2wLMaJ_o,917
76
76
  tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
77
- sentry_arroyo-2.31.2.dist-info/METADATA,sha256=3pXtE_zg-qbvUHSaMkpixJnBkFHmdNI-2Ln8YBlZkX0,2208
78
- sentry_arroyo-2.31.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
- sentry_arroyo-2.31.2.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
80
- sentry_arroyo-2.31.2.dist-info/RECORD,,
77
+ sentry_arroyo-2.32.1.dist-info/METADATA,sha256=CnhY-1mwt8qNk1ezNLHxmAeq7U-3mZzLMksUiOARCLY,2208
78
+ sentry_arroyo-2.32.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
+ sentry_arroyo-2.32.1.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
80
+ sentry_arroyo-2.32.1.dist-info/RECORD,,
@@ -275,6 +275,70 @@ class TestKafkaStreams(StreamsTestMixin[KafkaPayload]):
275
275
  processor._run_once()
276
276
  assert consumer.paused() == []
277
277
 
278
+ def test_auto_commit_mode(self) -> None:
279
+ """Test that auto-commit mode uses store_offsets and commits on close"""
280
+ group_id = uuid.uuid1().hex
281
+
282
+ with self.get_topic() as topic:
283
+ # Produce some messages
284
+ with closing(self.get_producer()) as producer:
285
+ for i in range(5):
286
+ payload = KafkaPayload(None, f"msg_{i}".encode("utf8"), [])
287
+ producer.produce(topic, payload).result(5.0)
288
+
289
+ # Create consumer with auto-commit enabled
290
+ configuration = {
291
+ **self.configuration,
292
+ "auto.offset.reset": "earliest",
293
+ "arroyo.enable.auto.commit": True,
294
+ "group.id": group_id,
295
+ "session.timeout.ms": 10000,
296
+ }
297
+
298
+ # First consumer: consume messages and close
299
+ consumed_offsets = []
300
+ with closing(KafkaConsumer(configuration)) as consumer:
301
+ consumer.subscribe([topic])
302
+
303
+ # Consume all 5 messages and stage their offsets
304
+ for i in range(5):
305
+ value = consumer.poll(10.0)
306
+ assert value is not None
307
+ consumed_offsets.append(value.offset)
308
+
309
+ # Stage offsets (will use store_offsets internally in auto-commit mode)
310
+ consumer.stage_offsets(value.committable)
311
+
312
+ # commit_offsets should return None in auto-commit mode
313
+ result = consumer.commit_offsets()
314
+ assert result is None
315
+
316
+ # Close will commit any stored offsets
317
+
318
+ # Verify we consumed offsets 0-4
319
+ assert consumed_offsets == [0, 1, 2, 3, 4]
320
+
321
+ # Second consumer: verify offsets were committed on close
322
+ # This consumer uses manual commit to verify the committed offset
323
+ with closing(
324
+ self.get_consumer(
325
+ group=group_id,
326
+ auto_offset_reset="earliest",
327
+ enable_end_of_partition=True,
328
+ )
329
+ ) as consumer:
330
+ consumer.subscribe([topic])
331
+
332
+ # Should start from offset 5, hitting EndOfPartition immediately
333
+ # If we got a message with offset < 5, auto-commit didn't work
334
+ try:
335
+ consumer.poll(10.0)
336
+ pytest.fail("Expected EndOfPartition, but poll succeeded")
337
+ except EndOfPartition as e:
338
+ # Verify we got EndOfPartition at offset 5
339
+ assert e.offset == 5
340
+ assert e.partition == Partition(topic, 0)
341
+
278
342
 
279
343
  class TestKafkaStreamsIncrementalRebalancing(TestKafkaStreams):
280
344
  # re-test the kafka consumer with cooperative-sticky rebalancing
@@ -176,11 +176,6 @@ def test_parallel_transform_step() -> None:
176
176
  0.0,
177
177
  tags=None,
178
178
  ),
179
- GaugeCall(
180
- "arroyo.strategies.run_task_with_multiprocessing.processes",
181
- 2.0,
182
- tags=None,
183
- ),
184
179
  IncrementCall(
185
180
  name="arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow",
186
181
  value=1,
@@ -191,6 +186,11 @@ def test_parallel_transform_step() -> None:
191
186
  1.0,
192
187
  tags=None,
193
188
  ),
189
+ GaugeCall(
190
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
191
+ 2.0,
192
+ tags=None,
193
+ ),
194
194
  TimingCall(
195
195
  "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
196
196
  3,
@@ -206,6 +206,11 @@ def test_parallel_transform_step() -> None:
206
206
  2.0,
207
207
  tags=None,
208
208
  ),
209
+ GaugeCall(
210
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
211
+ 2.0,
212
+ tags=None,
213
+ ),
209
214
  TimingCall(
210
215
  "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
211
216
  1,
@@ -275,6 +280,11 @@ def test_parallel_transform_step() -> None:
275
280
  1.0,
276
281
  tags=None,
277
282
  ),
283
+ GaugeCall(
284
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
285
+ 2.0,
286
+ tags=None,
287
+ ),
278
288
  TimingCall(
279
289
  name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg",
280
290
  value=1,
@@ -290,6 +300,11 @@ def test_parallel_transform_step() -> None:
290
300
  0.0,
291
301
  tags=None,
292
302
  ),
303
+ GaugeCall(
304
+ "arroyo.strategies.run_task_with_multiprocessing.processes",
305
+ 2.0,
306
+ tags=None,
307
+ ),
293
308
  ],
294
309
  ):
295
310
  transform_step.join()