sentry-arroyo 2.25.0__py3-none-any.whl → 2.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,9 +89,8 @@ ConsumerCounter = Literal[
89
89
 
90
90
 
91
91
  class MetricsBuffer:
92
- def __init__(self, consumer: Consumer[Any]) -> None:
93
- self.metrics = get_consumer_metrics(consumer.member_id)
94
- self.__consumer = consumer
92
+ def __init__(self) -> None:
93
+ self.metrics = get_consumer_metrics()
95
94
  self.__timers: MutableMapping[ConsumerTiming, float] = defaultdict(float)
96
95
  self.__counters: MutableMapping[ConsumerCounter, int] = defaultdict(int)
97
96
  self.__reset()
@@ -140,10 +139,11 @@ class StreamProcessor(Generic[TStrategyPayload]):
140
139
  commit_policy: CommitPolicy = ONCE_PER_SECOND,
141
140
  dlq_policy: Optional[DlqPolicy[TStrategyPayload]] = None,
142
141
  join_timeout: Optional[float] = None,
142
+ shutdown_strategy_before_consumer: bool = False,
143
143
  ) -> None:
144
144
  self.__consumer = consumer
145
145
  self.__processor_factory = processor_factory
146
- self.__metrics_buffer = MetricsBuffer(consumer)
146
+ self.__metrics_buffer = MetricsBuffer()
147
147
 
148
148
  self.__processing_strategy: Optional[
149
149
  ProcessingStrategy[TStrategyPayload]
@@ -159,6 +159,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
159
159
  self.__commit_policy_state = commit_policy.get_state_machine()
160
160
  self.__join_timeout = join_timeout
161
161
  self.__shutdown_requested = False
162
+ self.__shutdown_strategy_before_consumer = shutdown_strategy_before_consumer
162
163
 
163
164
  # Buffers messages for DLQ. Messages are added when they are submitted for processing and
164
165
  # removed once the commit callback is fired as they are guaranteed to be valid at that point.
@@ -171,49 +172,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
171
172
  )
172
173
 
173
174
  def _close_strategy() -> None:
174
- start_close = time.time()
175
-
176
- if self.__processing_strategy is None:
177
- # Partitions are revoked when the consumer is shutting down, at
178
- # which point we already have closed the consumer.
179
- return
180
-
181
- logger.info("Closing %r...", self.__processing_strategy)
182
- self.__processing_strategy.close()
183
-
184
- logger.info("Waiting for %r to exit...", self.__processing_strategy)
185
-
186
- while True:
187
- start_join = time.time()
188
-
189
- try:
190
- self.__processing_strategy.join(self.__join_timeout)
191
- self.__metrics_buffer.incr_timing(
192
- "arroyo.consumer.join.time", time.time() - start_join
193
- )
194
- break
195
- except InvalidMessage as e:
196
- self.__metrics_buffer.incr_timing(
197
- "arroyo.consumer.join.time", time.time() - start_join
198
- )
199
- self._handle_invalid_message(e)
200
-
201
- logger.info(
202
- "%r exited successfully, releasing assignment.",
203
- self.__processing_strategy,
204
- )
205
- self.__processing_strategy = None
206
- self.__message = None # avoid leaking buffered messages across assignments
207
- self.__is_paused = False
208
- self._clear_backpressure()
209
-
210
- value = time.time() - start_close
211
-
212
- self.__metrics_buffer.metrics.timing(
213
- "arroyo.consumer.run.close_strategy", value
214
- )
215
-
216
- self.__metrics_buffer.incr_timing("arroyo.consumer.shutdown.time", value)
175
+ self._close_processing_strategy()
217
176
 
218
177
  def _create_strategy(partitions: Mapping[Partition, int]) -> None:
219
178
  start_create = time.time()
@@ -236,6 +195,8 @@ class StreamProcessor(Generic[TStrategyPayload]):
236
195
  def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None:
237
196
  logger.info("New partitions assigned: %r", partitions)
238
197
  logger.info("Member id: %r", self.__consumer.member_id)
198
+ self.__metrics_buffer.metrics.consumer_member_id = self.__consumer.member_id
199
+
239
200
  self.__metrics_buffer.metrics.increment(
240
201
  "arroyo.consumer.partitions_assigned.count", len(partitions)
241
202
  )
@@ -245,6 +206,7 @@ class StreamProcessor(Generic[TStrategyPayload]):
245
206
 
246
207
  if self.__dlq_policy:
247
208
  self.__dlq_policy.reset_dlq_limits(current_partitions)
209
+
248
210
  if current_partitions:
249
211
  if self.__processing_strategy is not None:
250
212
  # TODO: for cooperative-sticky rebalancing this can happen
@@ -300,6 +262,48 @@ class StreamProcessor(Generic[TStrategyPayload]):
300
262
  [topic], on_assign=on_partitions_assigned, on_revoke=on_partitions_revoked
301
263
  )
302
264
 
265
+ def _close_processing_strategy(self) -> None:
266
+ """Close the processing strategy and wait for it to exit."""
267
+ start_close = time.time()
268
+
269
+ if self.__processing_strategy is None:
270
+ # Partitions are revoked when the consumer is shutting down, at
271
+ # which point we already have closed the consumer.
272
+ return
273
+
274
+ logger.info("Closing %r...", self.__processing_strategy)
275
+ logger.info("Member id: %r", self.__consumer.member_id)
276
+ self.__processing_strategy.close()
277
+
278
+ logger.info("Waiting for %r to exit...", self.__processing_strategy)
279
+
280
+ while True:
281
+ start_join = time.time()
282
+
283
+ try:
284
+ self.__processing_strategy.join(self.__join_timeout)
285
+ self.__metrics_buffer.incr_timing(
286
+ "arroyo.consumer.join.time", time.time() - start_join
287
+ )
288
+ break
289
+ except InvalidMessage as e:
290
+ self.__metrics_buffer.incr_timing(
291
+ "arroyo.consumer.join.time", time.time() - start_join
292
+ )
293
+ self._handle_invalid_message(e)
294
+
295
+ logger.info("%r exited successfully", self.__processing_strategy)
296
+ self.__processing_strategy = None
297
+ self.__message = None
298
+ self.__is_paused = False
299
+ self._clear_backpressure()
300
+
301
+ value = time.time() - start_close
302
+ self.__metrics_buffer.metrics.timing(
303
+ "arroyo.consumer.run.close_strategy", value
304
+ )
305
+ self.__metrics_buffer.incr_timing("arroyo.consumer.shutdown.time", value)
306
+
303
307
  def __commit(self, offsets: Mapping[Partition, int], force: bool = False) -> None:
304
308
  """
305
309
  If force is passed, commit immediately and do not throttle. This should
@@ -516,6 +520,13 @@ class StreamProcessor(Generic[TStrategyPayload]):
516
520
  self.__shutdown_requested = True
517
521
 
518
522
  def _shutdown(self) -> None:
523
+ # If shutdown_strategy_before_consumer is set, work around an issue
524
+ # where rdkafka would revoke our partition, but then also immediately
525
+ # revoke our member ID as well, causing join() of the CommitStrategy
526
+ # (that is running in the partition revocation callback) to crash.
527
+ if self.__shutdown_strategy_before_consumer:
528
+ self._close_processing_strategy()
529
+
519
530
  # close the consumer
520
531
  logger.info("Stopping consumer")
521
532
  self.__metrics_buffer.flush()
arroyo/utils/metrics.py CHANGED
@@ -49,18 +49,21 @@ class ConsumerMetricsWrapper(Metrics):
49
49
  """
50
50
  A wrapper around a metrics backend that automatically adds consumer_member_id
51
51
  to all metrics calls.
52
+
53
+ Right now we only use this to add tags to the metrics emitted by
54
+ StreamProcessor, but ideally all metrics, even those emitted by strategies
55
+ and application code, would get this tag. The metrics abstraction in arroyo
56
+ is not sufficient for this. We'd have to add a "add_global_tags" method
57
+ (similar to the concept of global tags in sentry) and users would have to
58
+ implement it.
52
59
  """
53
60
 
54
- def __init__(self, metrics: Metrics, consumer_member_id: str) -> None:
61
+ def __init__(self, metrics: Metrics) -> None:
55
62
  self.__metrics = metrics
56
- self.__consumer_member_id = consumer_member_id
63
+ self.consumer_member_id = ""
57
64
 
58
65
  def _add_consumer_tag(self, tags: Optional[Tags]) -> Tags:
59
- """Add consumer_member_id to the provided tags."""
60
- consumer_tags = {"consumer_member_id": self.__consumer_member_id}
61
- if tags:
62
- return {**consumer_tags, **tags}
63
- return consumer_tags
66
+ return {**(tags or {}), "consumer_member_id": self.consumer_member_id}
64
67
 
65
68
  def increment(
66
69
  self,
@@ -169,12 +172,12 @@ def get_metrics() -> Metrics:
169
172
  return _metrics_backend
170
173
 
171
174
 
172
- def get_consumer_metrics(consumer_member_id: str) -> Metrics:
175
+ def get_consumer_metrics() -> ConsumerMetricsWrapper:
173
176
  """
174
177
  Get a metrics backend that automatically adds consumer_member_id to all metrics.
175
178
  """
176
179
  base_metrics = get_metrics()
177
- return ConsumerMetricsWrapper(base_metrics, consumer_member_id)
180
+ return ConsumerMetricsWrapper(base_metrics)
178
181
 
179
182
 
180
183
  __all__ = ["configure_metrics", "Metrics", "MetricName", "Tags", "get_consumer_metrics"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sentry-arroyo
3
- Version: 2.25.0
3
+ Version: 2.26.0
4
4
  Summary: Arroyo is a Python library for working with streaming data.
5
5
  Home-page: https://github.com/getsentry/arroyo
6
6
  Author: Sentry
@@ -16,7 +16,7 @@ arroyo/backends/local/storages/__init__.py,sha256=AGYujdAAcn3osoj9jq84IzTywYbkID
16
16
  arroyo/backends/local/storages/abstract.py,sha256=1qVQp6roxHkK6XT2aklZyZk1qq7RzcPN6Db_CA5--kg,2901
17
17
  arroyo/backends/local/storages/memory.py,sha256=AoKDsVZzBXkOJyWArKWp3vfGfU9xLlKFXE9gsJiMIzQ,2613
18
18
  arroyo/processing/__init__.py,sha256=vZVg0wJvJfoVzlzGvnL59bT6YNIRJNQ5t7oU045Qbk4,87
19
- arroyo/processing/processor.py,sha256=cx8bPE41ZPT4hEhGdwCNAOe2ZAQ9lxZlr82goNkFBZ0,20702
19
+ arroyo/processing/processor.py,sha256=PddFijN7uJyNVq1XMBeOSlD1Z0L27kLBplH0X_HF4Kg,21235
20
20
  arroyo/processing/strategies/__init__.py,sha256=EU_JMb54eOxMxaC5mIFpI-sAF-X2ZScbE8czBZ7bQkY,1106
21
21
  arroyo/processing/strategies/abstract.py,sha256=nu7juEz_aQmQIH35Z8u--FBuLjkK8_LQ1hIG2xpw9AA,4808
22
22
  arroyo/processing/strategies/batching.py,sha256=s89xC6lQpBseEaApu1iNTipXGKeO95OMwinj2VBKn9s,4778
@@ -39,14 +39,14 @@ arroyo/utils/concurrent.py,sha256=dbdPinjqmxCQ7izUGFNbGjB3OxfSIO01bnCSTANaVOE,11
39
39
  arroyo/utils/logging.py,sha256=Y1PnhYcI9XNNEK0H13Ct2xKLr2Niuw0dxayc6sWnui8,606
40
40
  arroyo/utils/metricDefs.json,sha256=5LR4hiHwD9JLaKm8JcpshmVjKCmIZzCxwa5oJxRkzH0,10264
41
41
  arroyo/utils/metric_defs.py,sha256=hZ98tCimeW8W6Gpo_LVvYI4RGtAPRsM6kruf8TtvdEY,7283
42
- arroyo/utils/metrics.py,sha256=eQwfdsbMbhHbTzrUgO-z1Cg0viPwFqhEbcTN-RqsMZk,4836
42
+ arroyo/utils/metrics.py,sha256=UucA2igsosNtjA2L8h8EOkWyjBQNai5H4Vqah6KcgN8,5021
43
43
  arroyo/utils/profiler.py,sha256=aiYy2RRPX_IiDIO7AnFM3hARaHCctS3rqUS5nrHXbSg,2452
44
44
  arroyo/utils/retries.py,sha256=4MRhHUR7da9x1ytlo7YETo8S9HEebXmPF2-mKP4xYz0,3445
45
45
  examples/transform_and_produce/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  examples/transform_and_produce/batched.py,sha256=st2R6qTneAtV0JFbKP30Ti3sJDYj8Jkbmta9JckKdZU,2636
47
47
  examples/transform_and_produce/script.py,sha256=8kSMIjQNqGYEVyE0PvrfJh-a_UYCrJSstTp_De7kyyg,2306
48
48
  examples/transform_and_produce/simple.py,sha256=H7xqxItjl4tx34wVW5dy6mB9G39QucAtxkJSBzVmjgA,1637
49
- sentry_arroyo-2.25.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
49
+ sentry_arroyo-2.26.0.dist-info/licenses/LICENSE,sha256=0Ng3MFdEcnz0sVD1XvGBBzbavvNp_7OAM5yVObB46jU,10829
50
50
  tests/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  tests/backends/mixins.py,sha256=sfNyE0VTeiD3GHOnBYl-9urvPuURI2G1BWke0cz7Dvc,20445
52
52
  tests/backends/test_commit.py,sha256=iTHfK1qsBxim0XwxgMvNNSMqDUMEHoYkYBDcgxGBFbs,831
@@ -71,9 +71,9 @@ tests/processing/strategies/test_run_task_with_multiprocessing.py,sha256=eyv3O5X
71
71
  tests/processing/strategies/test_unfold.py,sha256=mbC4XhT6GkJRuC7vPR0h7jqwt4cu20q7Z114EJ6J9mQ,2009
72
72
  tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  tests/utils/test_concurrent.py,sha256=Gwdzym2UZ1HO3rhOSGmzxImWcLFygY8P7MXHT3Q0xTE,455
74
- tests/utils/test_metrics.py,sha256=CL9cfWhBFm63Z5svNFIiLaddKsL1bINcOeKiekziQbs,2050
74
+ tests/utils/test_metrics.py,sha256=y54LVGIkXok5cNVKKrdCpHmpZ0uJQOefT-PLvqZzjcw,2280
75
75
  tests/utils/test_retries.py,sha256=AxJLkXWeL9AjHv_p1n0pe8CXXJp24ZQIuYBHfNcmiz4,3075
76
- sentry_arroyo-2.25.0.dist-info/METADATA,sha256=7i3y9LsMIaUw4QFDsMzVn0V09ISVaB9s29hYeHXVqiQ,2208
77
- sentry_arroyo-2.25.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
- sentry_arroyo-2.25.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
79
- sentry_arroyo-2.25.0.dist-info/RECORD,,
76
+ sentry_arroyo-2.26.0.dist-info/METADATA,sha256=OiWrlIZUPspZgkXQ3UgNef4O8bRi3JfXDgh-Sm6etyk,2208
77
+ sentry_arroyo-2.26.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ sentry_arroyo-2.26.0.dist-info/top_level.txt,sha256=DVdMZKysL_iIxm5aY0sYgZtP5ZXMg9YBaBmGQHVmDXA,22
79
+ sentry_arroyo-2.26.0.dist-info/RECORD,,
@@ -1,8 +1,19 @@
1
1
  import pytest
2
2
 
3
- from arroyo.utils.metrics import Gauge, MetricName, configure_metrics, get_metrics, get_consumer_metrics
3
+ from arroyo.utils.metrics import (
4
+ Gauge,
5
+ MetricName,
6
+ configure_metrics,
7
+ get_consumer_metrics,
8
+ get_metrics,
9
+ )
4
10
  from tests.metrics import Gauge as GaugeCall
5
- from tests.metrics import Increment, Timing, TestingMetricsBackend, _TestingMetricsBackend
11
+ from tests.metrics import (
12
+ Increment,
13
+ TestingMetricsBackend,
14
+ Timing,
15
+ _TestingMetricsBackend,
16
+ )
6
17
 
7
18
 
8
19
  def test_gauge_simple() -> None:
@@ -40,7 +51,8 @@ def test_consumer_metrics_wrapper() -> None:
40
51
  configure_metrics(backend, force=True)
41
52
 
42
53
  consumer_member_id = "test-consumer-123"
43
- consumer_metrics = get_consumer_metrics(consumer_member_id)
54
+ consumer_metrics = get_consumer_metrics()
55
+ consumer_metrics.consumer_member_id = consumer_member_id
44
56
 
45
57
  # Test increment
46
58
  consumer_metrics.increment("arroyo.consumer.run.count", 5, tags={"extra": "tag"})
@@ -52,9 +64,21 @@ def test_consumer_metrics_wrapper() -> None:
52
64
  consumer_metrics.timing("arroyo.consumer.poll.time", 100, tags={"another": "tag"})
53
65
 
54
66
  expected_calls = [
55
- Increment("arroyo.consumer.run.count", 5, {"consumer_member_id": consumer_member_id, "extra": "tag"}),
56
- GaugeCall("arroyo.consumer.librdkafka.total_queue_size", 10.5, {"consumer_member_id": consumer_member_id}),
57
- Timing("arroyo.consumer.poll.time", 100, {"consumer_member_id": consumer_member_id, "another": "tag"}),
67
+ Increment(
68
+ "arroyo.consumer.run.count",
69
+ 5,
70
+ {"consumer_member_id": consumer_member_id, "extra": "tag"},
71
+ ),
72
+ GaugeCall(
73
+ "arroyo.consumer.librdkafka.total_queue_size",
74
+ 10.5,
75
+ {"consumer_member_id": consumer_member_id},
76
+ ),
77
+ Timing(
78
+ "arroyo.consumer.poll.time",
79
+ 100,
80
+ {"consumer_member_id": consumer_member_id, "another": "tag"},
81
+ ),
58
82
  ]
59
83
 
60
84
  assert backend.calls == expected_calls