sentry-arroyo 2.19.11__tar.gz → 2.20.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {sentry_arroyo-2.19.11/sentry_arroyo.egg-info → sentry_arroyo-2.20.0}/PKG-INFO +1 -1
  2. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/dlq.py +13 -1
  3. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/processor.py +6 -1
  4. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/unfold.py +25 -3
  5. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/metricDefs.json +1 -1
  6. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/metric_defs.py +4 -0
  7. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0/sentry_arroyo.egg-info}/PKG-INFO +1 -1
  8. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/setup.py +1 -1
  9. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_unfold.py +6 -4
  10. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/test_processor.py +1 -0
  11. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/LICENSE +0 -0
  12. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/MANIFEST.in +0 -0
  13. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/README.md +0 -0
  14. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/__init__.py +0 -0
  15. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/__init__.py +0 -0
  16. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/abstract.py +0 -0
  17. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/kafka/__init__.py +0 -0
  18. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/kafka/commit.py +0 -0
  19. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/kafka/configuration.py +0 -0
  20. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/kafka/consumer.py +0 -0
  21. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/local/__init__.py +0 -0
  22. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/local/backend.py +0 -0
  23. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/local/storages/__init__.py +0 -0
  24. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/local/storages/abstract.py +0 -0
  25. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/backends/local/storages/memory.py +0 -0
  26. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/commit.py +0 -0
  27. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/errors.py +0 -0
  28. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/__init__.py +0 -0
  29. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/__init__.py +0 -0
  30. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/abstract.py +0 -0
  31. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/batching.py +0 -0
  32. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/buffer.py +0 -0
  33. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/commit.py +0 -0
  34. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/filter.py +0 -0
  35. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/guard.py +0 -0
  36. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/healthcheck.py +0 -0
  37. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/noop.py +0 -0
  38. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/produce.py +0 -0
  39. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/reduce.py +0 -0
  40. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/run_task.py +0 -0
  41. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/run_task_in_threads.py +0 -0
  42. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/processing/strategies/run_task_with_multiprocessing.py +0 -0
  43. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/py.typed +0 -0
  44. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/types.py +0 -0
  45. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/__init__.py +0 -0
  46. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/clock.py +0 -0
  47. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/codecs.py +0 -0
  48. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/concurrent.py +0 -0
  49. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/logging.py +0 -0
  50. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/metrics.py +0 -0
  51. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/profiler.py +0 -0
  52. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/arroyo/utils/retries.py +0 -0
  53. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/examples/transform_and_produce/__init__.py +0 -0
  54. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/examples/transform_and_produce/batched.py +0 -0
  55. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/examples/transform_and_produce/script.py +0 -0
  56. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/examples/transform_and_produce/simple.py +0 -0
  57. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/requirements.txt +0 -0
  58. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/sentry_arroyo.egg-info/SOURCES.txt +0 -0
  59. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/sentry_arroyo.egg-info/dependency_links.txt +0 -0
  60. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/sentry_arroyo.egg-info/not-zip-safe +0 -0
  61. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/sentry_arroyo.egg-info/requires.txt +0 -0
  62. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/sentry_arroyo.egg-info/top_level.txt +0 -0
  63. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/setup.cfg +0 -0
  64. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/backends/__init__.py +0 -0
  65. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/backends/mixins.py +0 -0
  66. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/backends/test_commit.py +0 -0
  67. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/backends/test_kafka.py +0 -0
  68. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/backends/test_local.py +0 -0
  69. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/__init__.py +0 -0
  70. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/__init__.py +0 -0
  71. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_all.py +0 -0
  72. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_batching.py +0 -0
  73. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_buffer.py +0 -0
  74. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_commit.py +0 -0
  75. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_filter.py +0 -0
  76. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_guard.py +0 -0
  77. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_noop.py +0 -0
  78. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_produce.py +0 -0
  79. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_reduce.py +0 -0
  80. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_run_task.py +0 -0
  81. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_run_task_in_threads.py +0 -0
  82. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/processing/strategies/test_run_task_with_multiprocessing.py +0 -0
  83. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/test_commit.py +0 -0
  84. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/test_dlq.py +0 -0
  85. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/test_kip848_e2e.py +0 -0
  86. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/test_types.py +0 -0
  87. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/utils/__init__.py +0 -0
  88. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/utils/test_concurrent.py +0 -0
  89. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/utils/test_metrics.py +0 -0
  90. {sentry_arroyo-2.19.11 → sentry_arroyo-2.20.0}/tests/utils/test_retries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sentry-arroyo
3
- Version: 2.19.11
3
+ Version: 2.20.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -28,6 +28,7 @@ from arroyo.types import (
28
28
  TStrategyPayload,
29
29
  Value,
30
30
  )
31
+ from arroyo.utils.metrics import get_metrics
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
@@ -287,6 +288,11 @@ class BufferedMessages(Generic[TStrategyPayload]):
287
288
  self.__buffered_messages: MutableMapping[
288
289
  Partition, Deque[BrokerValue[TStrategyPayload]]
289
290
  ] = defaultdict(deque)
291
+ self.__metrics = get_metrics()
292
+
293
+ def report_partition_metrics(self, buffered: Deque[BrokerValue[TStrategyPayload]]) -> None:
294
+
295
+ self.__metrics.gauge("arroyo.consumer.dlq_buffer.len", len(buffered))
290
296
 
291
297
  def append(self, message: BrokerValue[TStrategyPayload]) -> None:
292
298
  """
@@ -304,6 +310,7 @@ class BufferedMessages(Generic[TStrategyPayload]):
304
310
  buffered.popleft()
305
311
 
306
312
  self.__buffered_messages[message.partition].append(message)
313
+ self.report_partition_metrics(self.__buffered_messages[message.partition])
307
314
 
308
315
  def pop(
309
316
  self, partition: Partition, offset: int
@@ -317,9 +324,14 @@ class BufferedMessages(Generic[TStrategyPayload]):
317
324
 
318
325
  while buffered:
319
326
  if buffered[0].offset == offset:
320
- return buffered.popleft()
327
+ msg = buffered.popleft()
328
+ self.report_partition_metrics(buffered)
329
+ return msg
321
330
  if buffered[0].offset > offset:
331
+ self.report_partition_metrics(buffered)
322
332
  break
333
+
334
+ self.report_partition_metrics(buffered)
323
335
  self.__buffered_messages[partition].popleft()
324
336
 
325
337
  return None
@@ -84,6 +84,7 @@ ConsumerCounter = Literal[
84
84
  "arroyo.consumer.invalid_message.count",
85
85
  "arroyo.consumer.pause",
86
86
  "arroyo.consumer.resume",
87
+ "arroyo.consumer.dlq.dropped_messages",
87
88
  ]
88
89
 
89
90
 
@@ -377,7 +378,11 @@ class StreamProcessor(Generic[TStrategyPayload]):
377
378
  ) from None
378
379
 
379
380
  # XXX: This blocks if there are more than MAX_PENDING_FUTURES in the queue.
380
- self.__dlq_policy.produce(invalid_message, exc.reason)
381
+ try:
382
+ self.__dlq_policy.produce(invalid_message, exc.reason)
383
+ except Exception:
384
+ logger.exception(f"Failed to produce message (partition: {exc.partition} offset: {exc.offset}) to DLQ topic, dropping")
385
+ self.__metrics_buffer.incr_counter("arroyo.consumer.dlq.dropped_messages", 1)
381
386
 
382
387
  self.__metrics_buffer.incr_timing(
383
388
  "arroyo.consumer.dlq.time", time.time() - start_dlq
@@ -3,7 +3,7 @@ from collections import deque
3
3
  from typing import Callable, Deque, Generic, Iterable, Optional, TypeVar, Union, cast
4
4
 
5
5
  from arroyo.processing.strategies.abstract import MessageRejected, ProcessingStrategy
6
- from arroyo.types import BaseValue, FilteredPayload, Message
6
+ from arroyo.types import BaseValue, FilteredPayload, Message, Value
7
7
 
8
8
  TInput = TypeVar("TInput")
9
9
  TOutput = TypeVar("TOutput")
@@ -17,9 +17,20 @@ class Unfold(
17
17
  messages submitting them one by one to the next step. The generated
18
18
  messages are created according to the generator function provided by the user.
19
19
 
20
+ ::
21
+
22
+ def generator(num: int) -> Sequence[Value[int]]:
23
+ return [Value(i, {}, None) for i in range(num)]
24
+
25
+ unfold = Unfold(generator, next_step)
26
+
20
27
  The generator function provided must return an iterable (i.e. a class that
21
28
  implements `__iter__` ).
22
29
 
30
+ The generator can choose to set its own committable on the return value. If
31
+ the `committable` is empty, `Unfold` will use the offsets of the
32
+ original message.
33
+
23
34
  If this step receives a `MessageRejected` exception from the next
24
35
  step it keeps the remaining messages and attempts to submit
25
36
  them on subsequent calls to `poll`
@@ -47,12 +58,23 @@ class Unfold(
47
58
  )
48
59
  return
49
60
 
50
- iterable = self.__generator(message.payload)
61
+ iterable = list(self.__generator(message.payload))
62
+ num_messages = len(iterable)
51
63
 
52
64
  store_remaining_messages = False
53
65
 
54
- for value in iterable:
66
+ for i, value in enumerate(iterable):
55
67
  next_message = Message(value=value)
68
+ # If generator did not provide committable, patch our own
69
+ # committable onto it
70
+ if i == num_messages - 1 and not next_message.committable:
71
+ next_message = Message(
72
+ Value(
73
+ next_message.payload,
74
+ message.committable,
75
+ next_message.timestamp,
76
+ )
77
+ )
56
78
 
57
79
  if store_remaining_messages == False:
58
80
  try:
@@ -1 +1 @@
1
- {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}}
1
+ {"arroyo.strategies.run_task_with_multiprocessing.batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.msg", "type": "Time", "description": "Number of messages in a multiprocessing batch after the message transformation"}, "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes": {"name": "arroyo.strategies.run_task_with_multiprocessing.output_batch.size.bytes", "type": "Time", "description": "Number of bytes in a multiprocessing batch after the message transformation"}, "arroyo.consumer.run.count": {"name": "arroyo.consumer.run.count", "type": "Counter", "description": "Number of times the consumer is spinning"}, "arroyo.consumer.invalid_message.count": {"name": "arroyo.consumer.invalid_message.count", "type": "Counter", "description": "Number of times the consumer encountered an invalid message."}, "arroyo.strategies.reduce.batch_time": {"name": "arroyo.strategies.reduce.batch_time", "type": "Time", "description": "How long it took the Reduce step to fill up a batch"}, "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.backpressure", "type": "Counter", "description": "Incremented when a strategy after multiprocessing applies\nbackpressure to multiprocessing. May be a reason why CPU cannot be\nsaturated."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot fill the input batch\nbecause not enough memory was allocated. This results in batches smaller\nthan configured. Increase `input_block_size` to fix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.overflow", "type": "Counter", "description": "Incremented when multiprocessing cannot pull results in batches\nequal to the input batch size, because not enough memory was allocated.\nThis can be devastating for throughput. Increase `output_block_size` to\nfix."}, "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.input.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat input\nbuffer overflow. This behavior can be disabled by explicitly setting\n`input_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize": {"name": "arroyo.strategies.run_task_with_multiprocessing.batch.output.resize", "type": "Counter", "description": "Arroyo has decided to re-allocate a block in order to combat output\nbuffer overflow. This behavior can be disabled by explicitly setting\n`output_block_size` to a not-None value in `RunTaskWithMultiprocessing`."}, "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress": {"name": "arroyo.strategies.run_task_with_multiprocessing.batches_in_progress", "type": "Gauge", "description": "How many batches are being processed in parallel by multiprocessing."}, "arroyo.strategies.run_task_with_multiprocessing.processes": {"name": "arroyo.strategies.run_task_with_multiprocessing.processes", "type": "Counter", "description": "A subprocess by multiprocessing unexpectedly died.\n\"sigchld.detected\",\nGauge: Shows how many processes the multiprocessing strategy is\nconfigured with."}, "arroyo.strategies.run_task_with_multiprocessing.pool.create": {"name": "arroyo.strategies.run_task_with_multiprocessing.pool.create", "type": "Counter", "description": "Incremented when the multiprocessing pool is created (or re-created)."}, "arroyo.consumer.poll.time": {"name": "arroyo.consumer.poll.time", "type": "Time", "description": "(unitless) spent polling librdkafka for new messages."}, "arroyo.consumer.processing.time": {"name": "arroyo.consumer.processing.time", "type": "Time", "description": "(unitless) spent in strategies (blocking in strategy.submit or\nstrategy.poll)"}, "arroyo.consumer.backpressure.time": {"name": "arroyo.consumer.backpressure.time", "type": "Time", "description": "(unitless) spent pausing the consumer due to backpressure (MessageRejected)"}, "arroyo.consumer.dlq.time": {"name": "arroyo.consumer.dlq.time", "type": "Time", "description": "(unitless) spent in handling `InvalidMessage` exceptions and sending\nmessages to the the DLQ."}, "arroyo.consumer.join.time": {"name": "arroyo.consumer.join.time", "type": "Time", "description": "(unitless) spent in waiting for the strategy to exit, such as during\nshutdown or rebalancing."}, "arroyo.consumer.callback.time": {"name": "arroyo.consumer.callback.time", "type": "Time", "description": "(unitless) spent in librdkafka callbacks. This metric's timings\noverlap other timings, and might spike at the same time."}, "arroyo.consumer.shutdown.time": {"name": "arroyo.consumer.shutdown.time", "type": "Time", "description": "(unitless) spent in shutting down the consumer. This metric's\ntimings overlap other timings, and might spike at the same time."}, "arroyo.consumer.run.callback": {"name": "arroyo.consumer.run.callback", "type": "Time", "description": "A regular duration metric where each datapoint is measuring the time it\ntook to execute a single callback. This metric is distinct from the\narroyo.consumer.*.time metrics as it does not attempt to accumulate time\nspent per second in an attempt to keep monitoring overhead low.\nThe metric is tagged by the name of the internal callback function being\nexecuted, as 'callback_name'. Possible values are on_partitions_assigned\nand on_partitions_revoked."}, "arroyo.consumer.run.close_strategy": {"name": "arroyo.consumer.run.close_strategy", "type": "Time", "description": "Duration metric measuring the time it took to flush in-flight messages\nand shut down the strategies."}, "arroyo.consumer.run.create_strategy": {"name": "arroyo.consumer.run.create_strategy", "type": "Time", "description": "Duration metric measuring the time it took to create the processing strategy."}, "arroyo.consumer.partitions_revoked.count": {"name": "arroyo.consumer.partitions_revoked.count", "type": "Counter", "description": "How many partitions have been revoked just now."}, "arroyo.consumer.partitions_assigned.count": {"name": "arroyo.consumer.partitions_assigned.count", "type": "Counter", "description": "How many partitions have been assigned just now."}, "arroyo.consumer.latency": {"name": "arroyo.consumer.latency", "type": "Time", "description": "Consumer latency in seconds. Recorded by the commit offsets strategy."}, "arroyo.consumer.pause": {"name": "arroyo.consumer.pause", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being paused.\nThis flushes internal prefetch buffers."}, "arroyo.consumer.resume": {"name": "arroyo.consumer.resume", "type": "Counter", "description": "Metric for when the underlying rdkafka consumer is being resumed.\nThis might cause increased network usage as messages are being re-fetched."}, "arroyo.consumer.librdkafka.total_queue_size": {"name": "arroyo.consumer.librdkafka.total_queue_size", "type": "Gauge", "description": "Queue size of background queue that librdkafka uses to prefetch messages."}, "arroyo.processing.strategies.healthcheck.touch": {"name": "arroyo.processing.strategies.healthcheck.touch", "type": "Counter", "description": "Counter metric to measure how often the healthcheck file has been touched."}, "arroyo.strategies.filter.dropped_messages": {"name": "arroyo.strategies.filter.dropped_messages", "type": "Counter", "description": "Number of messages dropped in the FilterStep strategy"}, "arroyo.consumer.dlq.dropped_messages": {"name": "arroyo.consumer.dlq.dropped_messages", "type": "Counter", "description": "how many messages are dropped due to errors producing to the dlq"}, "arroyo.consumer.dlq_buffer.len": {"name": "arroyo.consumer.dlq_buffer.len", "type": "Gauge", "description": "Current length of the DLQ buffer deque"}}
@@ -98,4 +98,8 @@ MetricName = Literal[
98
98
  "arroyo.processing.strategies.healthcheck.touch",
99
99
  # Counter: Number of messages dropped in the FilterStep strategy
100
100
  "arroyo.strategies.filter.dropped_messages",
101
+ # Counter: how many messages are dropped due to errors producing to the dlq
102
+ "arroyo.consumer.dlq.dropped_messages",
103
+ # Gauge: Current length of the DLQ buffer deque
104
+ "arroyo.consumer.dlq_buffer.len",
101
105
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sentry-arroyo
3
- Version: 2.19.11
3
+ Version: 2.20.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -10,7 +10,7 @@ def get_requirements() -> Sequence[str]:
10
10
 
11
11
  setup(
12
12
  name="sentry-arroyo",
13
- version="2.19.11",
13
+ version="2.20.0",
14
14
  author="Sentry",
15
15
  author_email="oss@sentry.io",
16
16
  license="Apache-2.0",
@@ -11,7 +11,7 @@ NOW = datetime.now()
11
11
 
12
12
 
13
13
  def generator(num: int) -> Sequence[Value[int]]:
14
- return [Value(i, {PARTITION: i}, NOW) for i in range(num)]
14
+ return [Value(i, {}, NOW) for i in range(num)]
15
15
 
16
16
 
17
17
  def test_unfold() -> None:
@@ -23,7 +23,9 @@ def test_unfold() -> None:
23
23
  strategy.submit(message)
24
24
 
25
25
  assert next_step.submit.call_args_list == [
26
- call(Message(Value(0, {PARTITION: 0}, NOW))),
26
+ # first message has no committable since the original message has not fully been processed
27
+ call(Message(Value(0, {}, NOW))),
28
+ # second message is last message from batch, so we can say the original msg was fully processed
27
29
  call(Message(Value(1, {PARTITION: 1}, NOW))),
28
30
  ]
29
31
 
@@ -44,7 +46,7 @@ def test_message_rejected() -> None:
44
46
 
45
47
  # Message doesn't actually go through since it was rejected
46
48
  assert next_step.submit.call_args_list == [
47
- call(Message(Value(0, {PARTITION: 0}, NOW))),
49
+ call(Message(Value(0, {}, NOW))),
48
50
  ]
49
51
 
50
52
  # clear the side effect, both messages should be submitted now
@@ -53,7 +55,7 @@ def test_message_rejected() -> None:
53
55
  strategy.poll()
54
56
 
55
57
  assert next_step.submit.call_args_list == [
56
- call(Message(Value(0, {PARTITION: 0}, NOW))),
58
+ call(Message(Value(0, {}, NOW))),
57
59
  call(Message(Value(1, {PARTITION: 1}, NOW))),
58
60
  ]
59
61
 
@@ -583,6 +583,7 @@ def test_dlq() -> None:
583
583
  assert dlq_policy.producer.produce.call_count == 1
584
584
 
585
585
 
586
+
586
587
  def test_healthcheck(tmpdir: py.path.local) -> None:
587
588
  """
588
589
  Test healthcheck strategy e2e with StreamProcessor, to ensure the
File without changes