sentry-arroyo 2.29.3__tar.gz → 2.29.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {sentry_arroyo-2.29.3/sentry_arroyo.egg-info → sentry_arroyo-2.29.5}/PKG-INFO +1 -1
  2. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/run_task_with_multiprocessing.py +17 -0
  3. sentry_arroyo-2.29.5/arroyo/utils/metricDefs.json +1 -0
  4. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/metric_defs.py +2 -0
  5. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5/sentry_arroyo.egg-info}/PKG-INFO +1 -1
  6. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/setup.py +1 -1
  7. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_run_task_with_multiprocessing.py +11 -68
  8. sentry_arroyo-2.29.3/arroyo/utils/metricDefs.json +0 -1
  9. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/LICENSE +0 -0
  10. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/MANIFEST.in +0 -0
  11. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/README.md +0 -0
  12. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/__init__.py +0 -0
  13. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/__init__.py +0 -0
  14. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/abstract.py +0 -0
  15. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/kafka/__init__.py +0 -0
  16. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/kafka/commit.py +0 -0
  17. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/kafka/configuration.py +0 -0
  18. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/kafka/consumer.py +0 -0
  19. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/local/__init__.py +0 -0
  20. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/local/backend.py +0 -0
  21. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/local/storages/__init__.py +0 -0
  22. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/local/storages/abstract.py +0 -0
  23. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/backends/local/storages/memory.py +0 -0
  24. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/commit.py +0 -0
  25. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/dlq.py +0 -0
  26. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/errors.py +0 -0
  27. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/__init__.py +0 -0
  28. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/processor.py +0 -0
  29. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/__init__.py +0 -0
  30. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/abstract.py +0 -0
  31. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/batching.py +0 -0
  32. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/buffer.py +0 -0
  33. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/commit.py +0 -0
  34. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/filter.py +0 -0
  35. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/guard.py +0 -0
  36. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/healthcheck.py +0 -0
  37. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/noop.py +0 -0
  38. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/produce.py +0 -0
  39. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/reduce.py +0 -0
  40. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/run_task.py +0 -0
  41. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
  42. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/processing/strategies/unfold.py +0 -0
  43. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/py.typed +0 -0
  44. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/types.py +0 -0
  45. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/__init__.py +0 -0
  46. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/clock.py +0 -0
  47. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/codecs.py +0 -0
  48. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/concurrent.py +0 -0
  49. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/logging.py +0 -0
  50. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/metrics.py +0 -0
  51. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/profiler.py +0 -0
  52. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/arroyo/utils/retries.py +0 -0
  53. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/examples/transform_and_produce/__init__.py +0 -0
  54. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/examples/transform_and_produce/batched.py +0 -0
  55. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/examples/transform_and_produce/script.py +0 -0
  56. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/examples/transform_and_produce/simple.py +0 -0
  57. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/requirements.txt +0 -0
  58. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/sentry_arroyo.egg-info/SOURCES.txt +0 -0
  59. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
  60. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/sentry_arroyo.egg-info/not-zip-safe +0 -0
  61. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/sentry_arroyo.egg-info/requires.txt +0 -0
  62. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/sentry_arroyo.egg-info/top_level.txt +0 -0
  63. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/setup.cfg +0 -0
  64. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/backends/__init__.py +0 -0
  65. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/backends/mixins.py +0 -0
  66. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/backends/test_commit.py +0 -0
  67. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/backends/test_kafka.py +0 -0
  68. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/backends/test_kafka_producer.py +0 -0
  69. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/backends/test_local.py +0 -0
  70. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/__init__.py +0 -0
  71. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/__init__.py +0 -0
  72. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_all.py +0 -0
  73. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_batching.py +0 -0
  74. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_buffer.py +0 -0
  75. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_commit.py +0 -0
  76. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_filter.py +0 -0
  77. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_guard.py +0 -0
  78. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_noop.py +0 -0
  79. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_produce.py +0 -0
  80. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_reduce.py +0 -0
  81. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_run_task.py +0 -0
  82. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
  83. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/strategies/test_unfold.py +0 -0
  84. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/processing/test_processor.py +0 -0
  85. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/test_commit.py +0 -0
  86. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/test_dlq.py +0 -0
  87. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/test_kip848_e2e.py +0 -0
  88. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/test_types.py +0 -0
  89. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/utils/__init__.py +0 -0
  90. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/utils/test_concurrent.py +0 -0
  91. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/utils/test_metrics.py +0 -0
  92. {sentry_arroyo-2.29.3 → sentry_arroyo-2.29.5}/tests/utils/test_retries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.29.3
3
+ Version: 2.29.5
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -198,12 +198,19 @@ class BatchBuilder(Generic[TBatchValue]):
198
198
  def parallel_worker_initializer(
199
199
  custom_initialize_func: Optional[Callable[[], None]] = None,
200
200
  ) -> None:
201
+ start_time = time.time()
201
202
  # Worker process should ignore ``SIGINT`` so that processing is not
202
203
  # interrupted by ``KeyboardInterrupt`` during graceful shutdown.
203
204
  signal.signal(signal.SIGINT, signal.SIG_IGN)
204
205
 
205
206
  if custom_initialize_func is not None:
206
207
  custom_initialize_func()
208
+ elapsed_time = time.time() - start_time
209
+ logger.info(
210
+ "Finished parallel_worker_initializer function with pid %d, elapsed time %f",
211
+ multiprocessing.current_process().pid,
212
+ elapsed_time,
213
+ )
207
214
 
208
215
 
209
216
  @dataclass
@@ -311,13 +318,16 @@ class MultiprocessingPool:
311
318
  num_processes: int,
312
319
  initializer: Optional[Callable[[], None]] = None,
313
320
  ) -> None:
321
+ logger.info("Starting init MultiprocessingPool class")
314
322
  self.__num_processes = num_processes
315
323
  self.__initializer = initializer
316
324
  self.__pool: Optional[Pool] = None
317
325
  self.__metrics = get_metrics()
318
326
  self.maybe_create_pool()
327
+ logger.info("Finished init MultiprocessingPool class")
319
328
 
320
329
  def maybe_create_pool(self) -> None:
330
+ logger.info("Starting maybe_create_pool function")
321
331
  if self.__pool is None:
322
332
  self.__metrics.increment(
323
333
  "arroyo.strategies.run_task_with_multiprocessing.pool.create"
@@ -327,6 +337,7 @@ class MultiprocessingPool:
327
337
  initializer=partial(parallel_worker_initializer, self.__initializer),
328
338
  context=multiprocessing.get_context("spawn"),
329
339
  )
340
+ logger.info("Finished maybe_create_pool function")
330
341
 
331
342
  @property
332
343
  def num_processes(self) -> int:
@@ -610,6 +621,7 @@ class RunTaskWithMultiprocessing(
610
621
  assert self.__batch_builder is not None
611
622
  batch = self.__batch_builder.build()
612
623
  logger.debug("Submitting %r to %r...", batch, self.__pool)
624
+ start_time = time.time()
613
625
  self.__processes.append(
614
626
  (
615
627
  batch,
@@ -621,6 +633,11 @@ class RunTaskWithMultiprocessing(
621
633
  False,
622
634
  )
623
635
  )
636
+ end_time = time.time()
637
+ self.__metrics.timing(
638
+ "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time",
639
+ end_time - start_time,
640
+ )
624
641
  self.__batches_in_progress.increment()
625
642
  self.__metrics.timing(
626
643
  "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", len(batch)
@@ -0,0 +1 @@
1
+ {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time", "type": "Time", "description": "How long it took to submit a batch to multiprocessing"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
@@ -5,6 +5,8 @@ MetricName = Literal[
5
5
  "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
6
6
  # Time: Number of bytes in a multiprocessing batch
7
7
  "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes",
8
+ # Time: How long it took to submit a batch to multiprocessing
9
+ "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time",
8
10
  # Time: Number of messages in a multiprocessing batch after the message transformation
9
11
  "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg",
10
12
  # Time: Number of bytes in a multiprocessing batch after the message transformation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.29.3
3
+ Version: 2.29.5
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -10,7 +10,7 @@ def get_requirements() -> Sequence[str]:
10
10
 
11
11
  setup(
12
12
  name="sentry-arroyo",
13
- version="2.29.3",
13
+ version="2.29.5",
14
14
  author="Sentry",
15
15
  author_email="oss@sentry.io",
16
16
  license="Apache-2.0",
@@ -22,7 +22,7 @@ from arroyo.types import Message, Partition, Topic, Value
22
22
  from tests.assertions import assert_changes, assert_does_not_change
23
23
  from tests.metrics import Gauge as GaugeCall
24
24
  from tests.metrics import Increment as IncrementCall
25
- from tests.metrics import TestingMetricsBackend
25
+ from tests.metrics import MetricCall, TestingMetricsBackend
26
26
  from tests.metrics import Timing as TimingCall
27
27
 
28
28
 
@@ -150,12 +150,20 @@ def test_parallel_transform_step() -> None:
150
150
  manager_processes = 1
151
151
  metrics = TestingMetricsBackend
152
152
 
153
+ def get_trimmed_metrics() -> list[MetricCall]:
154
+ return [
155
+ call
156
+ for call in metrics.calls
157
+ if call.name
158
+ != "arroyo.strategies.run_task_with_multiprocessing.batch.submit.time"
159
+ ]
160
+
153
161
  with assert_changes(
154
162
  get_subprocess_count,
155
163
  starting_processes,
156
164
  starting_processes + worker_processes + manager_processes,
157
165
  ), assert_changes(
158
- lambda: metrics.calls,
166
+ get_trimmed_metrics,
159
167
  [],
160
168
  [
161
169
  IncrementCall(
@@ -234,7 +242,7 @@ def test_parallel_transform_step() -> None:
234
242
  starting_processes + worker_processes + manager_processes,
235
243
  starting_processes,
236
244
  ), assert_changes(
237
- lambda: metrics.calls,
245
+ get_trimmed_metrics,
238
246
  [],
239
247
  [
240
248
  TimingCall(
@@ -405,71 +413,6 @@ def test_message_rejected_multiple() -> None:
405
413
  call(Message(Value(-98, {}, now))),
406
414
  ]
407
415
 
408
- assert TestingMetricsBackend.calls == [
409
- IncrementCall(
410
- name="arroyo.strategies.run_task_with_multiprocessing.pool.create",
411
- value=1,
412
- tags=None,
413
- ),
414
- GaugeCall(
415
- name="arroyo.strategies.run_task_with_multiprocessing.batches_in_progress",
416
- value=0.0,
417
- tags=None,
418
- ),
419
- GaugeCall(
420
- name="arroyo.strategies.run_task_with_multiprocessing.processes",
421
- value=1,
422
- tags=None,
423
- ),
424
- GaugeCall(
425
- name="arroyo.strategies.run_task_with_multiprocessing.batches_in_progress",
426
- value=1.0,
427
- tags=None,
428
- ),
429
- TimingCall(
430
- name="arroyo.strategies.run_task_with_multiprocessing.batch.size.msg",
431
- value=2,
432
- tags=None,
433
- ),
434
- TimingCall(
435
- name="arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes",
436
- value=0,
437
- tags=None,
438
- ),
439
- ] + [
440
- TimingCall(
441
- name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg",
442
- value=2,
443
- tags=None,
444
- ),
445
- TimingCall(
446
- name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes",
447
- value=0,
448
- tags=None,
449
- ),
450
- IncrementCall(
451
- name="arroyo.strategies.run_task_with_multiprocessing.batch.backpressure",
452
- value=1,
453
- tags=None,
454
- ),
455
- ] * 5 + [
456
- TimingCall(
457
- name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg",
458
- value=2,
459
- tags=None,
460
- ),
461
- TimingCall(
462
- name="arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes",
463
- value=0,
464
- tags=None,
465
- ),
466
- GaugeCall(
467
- name="arroyo.strategies.run_task_with_multiprocessing.batches_in_progress",
468
- value=0.0,
469
- tags=None,
470
- ),
471
- ]
472
-
473
416
  pool.close()
474
417
 
475
418
 
@@ -1 +0,0 @@
1
- {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}, "arroyo.consumer.dlq_buffer.exceeded": {"name": "arroyo.consumer.dlq_buffer.exceeded", "type": "Counter", "description": "Number of times the DLQ buffer size has been exceeded, causing messages to be dropped"}, "arroyo.consumer.dlq_buffer.assigned_partitions": {"name": "arroyo.consumer.dlq_buffer.assigned_partitions", "type": "Gauge", "description": "Number of partitions being tracked in the DLQ buffer"}, "arroyo.producer.librdkafka.p99_int_latency": {"name": "arroyo.producer.librdkafka.p99_int_latency", "type": "Time", "description": "Internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_outbuf_latency": {"name": "arroyo.producer.librdkafka.p99_outbuf_latency", "type": "Time", "description": "Output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.p99_rtt": {"name": "arroyo.producer.librdkafka.p99_rtt", "type": "Time", "description": "Round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_int_latency": {"name": "arroyo.producer.librdkafka.avg_int_latency", "type": "Time", "description": "Average internal producer queue latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_outbuf_latency": {"name": "arroyo.producer.librdkafka.avg_outbuf_latency", "type": "Time", "description": "Average output buffer latency from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.librdkafka.avg_rtt": {"name": "arroyo.producer.librdkafka.avg_rtt", "type": "Time", "description": "Average round-trip time to brokers from librdkafka statistics.\nTagged by broker_id."}, "arroyo.producer.produce_status": {"name": "arroyo.producer.produce_status", "type": "Counter", "description": "Number of times the produce strategy failed to produce a message"}, "arroyo.producer.librdkafka.message_count": {"name": "arroyo.producer.librdkafka.message_count", "type": "Gauge", "description": "Producer message count metric from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_count_max": {"name": "arroyo.producer.librdkafka.message_count_max", "type": "Gauge", "description": "Maximum producer message count from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size": {"name": "arroyo.producer.librdkafka.message_size", "type": "Gauge", "description": "Producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.message_size_max": {"name": "arroyo.producer.librdkafka.message_size_max", "type": "Gauge", "description": "Maximum producer message size from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.txmsgs": {"name": "arroyo.producer.librdkafka.txmsgs", "type": "Gauge", "description": "Total number of messages transmitted from librdkafka statistics\nTagged by producer_name"}, "arroyo.producer.librdkafka.broker_tx": {"name": "arroyo.producer.librdkafka.broker_tx", "type": "Gauge", "description": "Total number of transmission requests from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txbytes": {"name": "arroyo.producer.librdkafka.broker_txbytes", "type": "Gauge", "description": "Total number of bytes transmitted from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_requests": {"name": "arroyo.producer.librdkafka.broker_outbuf_requests", "type": "Gauge", "description": "Number of requests awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_outbuf_messages": {"name": "arroyo.producer.librdkafka.broker_outbuf_messages", "type": "Gauge", "description": "Number of messages awaiting transmission to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_connects": {"name": "arroyo.producer.librdkafka.broker_connects", "type": "Gauge", "description": "Number of connection attempts to broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_disconnects": {"name": "arroyo.producer.librdkafka.broker_disconnects", "type": "Gauge", "description": "Number of disconnections from broker from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txerrs": {"name": "arroyo.producer.librdkafka.broker_txerrs", "type": "Gauge", "description": "Total number of transmission errors from librdkafka statistics\nTagged by broker_id, producer_name"}, "arroyo.producer.librdkafka.broker_txretries": {"name": "arroyo.producer.librdkafka.broker_txretries", "type": "Gauge", "description": "Total number of request retries from librdkafka statistics\nTagged by broker_id, producer_name"}}
File without changes
File without changes
File without changes