statsd-instrument 3.9.3 → 3.9.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a392592155d75787a30ea7cb16342a84cda2a1c63deb978550f4f9118e997cbb
4
- data.tar.gz: 61a5e84e4b025fead6941adef95d7ddd341a97597644719ce74d49ecfba1a595
3
+ metadata.gz: 8248b85fead25773388f7bec728c5a6347f7b7af61e37e064ee8ed93ba31397d
4
+ data.tar.gz: 110ab2e5db1b66ef7c76e11ed7b82c925f767df0b09a1ca442afe95c31e30a40
5
5
  SHA512:
6
- metadata.gz: 7b703ad60f188205c6672498bb0791766e77763c867371025a01c4d4469895e11cc5a9b7ab4180e9db259066125392fe6074bf0cb8836c133a2a7dee8232bf61
7
- data.tar.gz: cb9afebc8101618965e419a4ba841863d4c20df54fced19f4d71c726bf168930f74b4b5559538d797f4736a7f02342ded3b51aa7fad1116fe82e2dd78f38e1a2
6
+ metadata.gz: e85d2333cae7f6e843b25b3a1e64cf8bd1e68e47bcb530b6483dfa05e2672374791209c65f888cc6c9354f0e5bca997098f776c5db10f4dfd155005dd3f22bc9
7
+ data.tar.gz: 330d5d152a42dd76fae018e4af8c4daa64f9be50391468f64bd338ddd91ecd668a6775f590e45452b552c6fe28f0e82e125a3a91476d5269e52d19ad0adee092
data/.gitignore CHANGED
@@ -6,3 +6,4 @@ Gemfile.lock
6
6
  pkg/*
7
7
  vendor/
8
8
  tmp/*
9
+ coverage/*
data/CHANGELOG.md CHANGED
@@ -6,9 +6,14 @@ section below.
6
6
 
7
7
  ## Unreleased changes
8
8
 
9
+ ## Version 3.9.4
10
+
11
+ - [#384](https://github.com/Shopify/statsd-instrument/pull/384) - Aggregation: fixing bug when sending metrics synchronously
12
+ e.g. when the main thread is killed and we are forced to flush the metrics.
13
+
9
14
  ## Version 3.9.3
10
15
 
11
- - [#384](https://github.com/Shopify/statsd-instrument/pull/382) - Fix warnings in Rubocop cops.
16
+ - [#382](https://github.com/Shopify/statsd-instrument/pull/382) - Fix warnings in Rubocop cops.
12
17
 
13
18
  ## Version 3.9.2
14
19
 
data/Gemfile CHANGED
@@ -12,6 +12,8 @@ gem "rubocop", ">= 1.0"
12
12
  gem "rubocop-shopify", require: false
13
13
  gem "benchmark-ips"
14
14
  gem "dogstatsd-ruby", "~> 5.0", require: false
15
+ gem "simplecov", require: false, group: :test
16
+
15
17
  platform :mri do
16
18
  # only if Ruby is MRI && >= 3.2
17
19
  if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.2")
@@ -105,17 +105,7 @@ module StatsD
105
105
 
106
106
  @pid = Process.pid
107
107
  @flush_interval = flush_interval
108
- @flush_thread = Thread.new do
109
- Thread.current.abort_on_exception = true
110
- loop do
111
- sleep(@flush_interval)
112
- thread_healthcheck
113
- flush
114
- rescue => e
115
- StatsD.logger.error { "[#{self.class.name}] Error in flush thread: #{e}" }
116
- raise e
117
- end
118
- end
108
+ start_flush_thread
119
109
 
120
110
  ObjectSpace.define_finalizer(
121
111
  self,
@@ -131,7 +121,7 @@ module StatsD
131
121
  # @return [void]
132
122
  def increment(name, value = 1, tags: [], no_prefix: false)
133
123
  unless thread_healthcheck
134
- sink << datagram_builder(no_prefix: no_prefix).c(name, value, CONST_SAMPLE_RATE, tags)
124
+ @sink << datagram_builder(no_prefix: no_prefix).c(name, value, CONST_SAMPLE_RATE, tags)
135
125
  return
136
126
  end
137
127
 
@@ -146,8 +136,8 @@ module StatsD
146
136
 
147
137
  def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION)
148
138
  unless thread_healthcheck
149
- sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
150
- name, type, [value], CONST_SAMPLE_RATE, tags
139
+ @sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
140
+ name, type.to_s, [value], CONST_SAMPLE_RATE, tags
151
141
  )
152
142
  return
153
143
  end
@@ -166,7 +156,7 @@ module StatsD
166
156
 
167
157
  def gauge(name, value, tags: [], no_prefix: false)
168
158
  unless thread_healthcheck
169
- sink << datagram_builder(no_prefix: no_prefix).g(name, value, CONST_SAMPLE_RATE, tags)
159
+ @sink << datagram_builder(no_prefix: no_prefix).g(name, value, CONST_SAMPLE_RATE, tags)
170
160
  return
171
161
  end
172
162
 
@@ -240,26 +230,41 @@ module StatsD
240
230
  )
241
231
  end
242
232
 
233
+ def start_flush_thread
234
+ @flush_thread = Thread.new do
235
+ Thread.current.abort_on_exception = true
236
+ loop do
237
+ sleep(@flush_interval)
238
+ thread_healthcheck
239
+ flush
240
+ end
241
+ rescue => e
242
+ StatsD.logger.error { "[#{self.class.name}] Error in flush thread: #{e}" }
243
+ raise e
244
+ end
245
+ end
246
+
243
247
  def thread_healthcheck
244
248
  @mutex.synchronize do
245
249
  unless @flush_thread&.alive?
250
+ # The main thread is dead, fallback to direct writes
246
251
  return false unless Thread.main.alive?
247
252
 
253
+ # If the PID changed, the process forked, reset the aggregator state
248
254
  if @pid != Process.pid
249
- StatsD.logger.debug { "[#{self.class.name}] Restarting the flush thread after fork" }
255
+ # TODO: Investigate/replace this with Process._fork hook.
256
+ # https://github.com/ruby/ruby/pull/5017
257
+ StatsD.logger.debug do
258
+ "[#{self.class.name}] Restarting the flush thread after fork. State size: #{@aggregation_state.size}"
259
+ end
250
260
  @pid = Process.pid
261
+ # Clear the aggregation state to avoid duplicate metrics
251
262
  @aggregation_state.clear
252
263
  else
253
264
  StatsD.logger.debug { "[#{self.class.name}] Restarting the flush thread" }
254
265
  end
255
- @flush_thread = Thread.new do
256
- Thread.current.abort_on_exception = true
257
- loop do
258
- sleep(@flush_interval)
259
- thread_healthcheck
260
- flush
261
- end
262
- end
266
+ # Restart the flush thread
267
+ start_flush_thread
263
268
  end
264
269
  true
265
270
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module StatsD
4
4
  module Instrument
5
- VERSION = "3.9.3"
5
+ VERSION = "3.9.4"
6
6
  end
7
7
  end
@@ -1,9 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "test_helper"
4
+ require "ostruct"
4
5
 
5
6
  class AggregatorTest < Minitest::Test
7
+ class CaptureLogger
8
+ attr_reader :messages
9
+
10
+ def initialize
11
+ @messages = []
12
+ end
13
+
14
+ [:debug, :info, :warn, :error, :fatal].each do |severity|
15
+ define_method(severity) do |message = nil, &block|
16
+ message = block.call if message.nil? && block
17
+ @messages << { severity: severity, message: message }
18
+ end
19
+ end
20
+ end
21
+
6
22
  def setup
23
+ @logger = CaptureLogger.new
24
+ @old_logger = StatsD.logger
25
+ StatsD.logger = @logger
26
+
7
27
  @sink = StatsD::Instrument::CaptureSink.new(parent: StatsD::Instrument::NullSink.new)
8
28
  @subject = StatsD::Instrument::Aggregator.new(
9
29
  @sink, StatsD::Instrument::DatagramBuilder, nil, [], flush_interval: 0.1
@@ -12,6 +32,7 @@ class AggregatorTest < Minitest::Test
12
32
 
13
33
  def teardown
14
34
  @sink.clear
35
+ StatsD.logger = @old_logger
15
36
  end
16
37
 
17
38
  def test_increment_simple
@@ -139,4 +160,174 @@ class AggregatorTest < Minitest::Test
139
160
  assert_equal("foo", @sink.datagrams.last.name)
140
161
  assert_equal(1, @sink.datagrams.last.value)
141
162
  end
163
+
164
+ def test_synchronous_operation_on_thread_failure
165
+ # Force thread_healthcheck to return false
166
+ @subject.stubs(:thread_healthcheck).returns(false)
167
+
168
+ # Stub methods on @aggregation_state to ensure they are not called
169
+ aggregation_state = @subject.instance_variable_get(:@aggregation_state)
170
+ aggregation_state.stubs(:[]=).never
171
+ aggregation_state.stubs(:clear).never
172
+
173
+ @subject.increment("foo", 1, tags: { foo: "bar" })
174
+ @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
175
+ @subject.gauge("baz", 100, tags: { foo: "bar" })
176
+
177
+ # Verify metrics were sent immediately
178
+ assert_equal(3, @sink.datagrams.size)
179
+
180
+ counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
181
+ assert_equal(1, counter_datagram.value)
182
+ assert_equal(["foo:bar"], counter_datagram.tags)
183
+
184
+ timing_datagram = @sink.datagrams.find { |d| d.name == "bar" }
185
+ assert_equal([100.0], [timing_datagram.value])
186
+ assert_equal(["foo:bar"], timing_datagram.tags)
187
+
188
+ gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
189
+ assert_equal(100, gauge_datagram.value)
190
+ assert_equal(["foo:bar"], gauge_datagram.tags)
191
+
192
+ # Additional metrics should also go through synchronously
193
+ @subject.increment("foo", 1, tags: { foo: "bar" })
194
+ @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
195
+
196
+ # Verify new metrics were also sent immediately
197
+ assert_equal(5, @sink.datagrams.size)
198
+
199
+ counter_datagram = @sink.datagrams.select { |d| d.name == "foo" }.last
200
+ assert_equal(1, counter_datagram.value)
201
+ assert_equal(["foo:bar"], counter_datagram.tags)
202
+
203
+ timing_datagram = @sink.datagrams.select { |d| d.name == "bar" }.last
204
+ assert_equal([200.0], [timing_datagram.value])
205
+ assert_equal(["foo:bar"], timing_datagram.tags)
206
+
207
+ # undo the stubbing
208
+ @subject.unstub(:thread_healthcheck)
209
+ end
210
+
211
+ def test_recreate_thread_after_fork
212
+ skip("#{RUBY_ENGINE} not supported for this test. Reason: fork()") if RUBY_ENGINE != "ruby"
213
+ # Record initial metrics
214
+ @subject.increment("foo", 1, tags: { foo: "bar" })
215
+ @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
216
+
217
+ # kill the flush thread
218
+ @subject.instance_variable_get(:@flush_thread).kill
219
+
220
+ # Fork the process
221
+ pid = Process.fork do
222
+ # In forked process, send more metrics
223
+ @subject.increment("foo", 2, tags: { foo: "bar" })
224
+ @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
225
+ @subject.flush
226
+
227
+ assert_equal(2, @sink.datagrams.size)
228
+ exit!
229
+ end
230
+
231
+ # Wait for forked process to complete
232
+ Process.wait(pid)
233
+
234
+ # Send metrics in parent process
235
+ @subject.increment("foo", 3, tags: { foo: "bar" })
236
+ @subject.aggregate_timing("bar", 300, tags: { foo: "bar" })
237
+ @subject.flush
238
+
239
+ assert_equal(2, @sink.datagrams.size)
240
+
241
+ # Verify metrics were properly aggregated in parent process
242
+ counter_datagrams = @sink.datagrams.select { |d| d.name == "foo" }
243
+ timing_datagrams = @sink.datagrams.select { |d| d.name == "bar" }
244
+
245
+ assert_equal(1, counter_datagrams.size)
246
+ assert_equal(1, timing_datagrams.size)
247
+
248
+ # Aggregate despite fork
249
+ assert_equal(4, counter_datagrams.last.value)
250
+ assert_equal([100.0, 300.0], timing_datagrams.last.value)
251
+ end
252
+
253
+ def test_race_condition_during_forking
254
+ skip("#{RUBY_ENGINE} not supported for this test. Reason: fork()") if RUBY_ENGINE != "ruby"
255
+ # Record initial metrics
256
+ @subject.increment("before_fork.count", 1, tags: { foo: "bar" })
257
+ @subject.aggregate_timing("before_fork.timing", 100, tags: { foo: "bar" })
258
+
259
+ # Fork the process
260
+ pid = Process.fork do
261
+ # In forked process, send more metrics
262
+ @subject.increment("in_child.count", 2, tags: { foo: "bar" })
263
+ @subject.aggregate_timing("in_child.timing", 200, tags: { foo: "bar" })
264
+
265
+ # Simulate thread waiting for flush
266
+ sleep(0.1)
267
+ @subject.flush
268
+
269
+ assert_equal(2, @sink.datagrams.size)
270
+ exit!
271
+ end
272
+
273
+ # Call flush concurrently in parent process
274
+ @subject.flush
275
+
276
+ # Wait for forked process to complete
277
+ Process.wait(pid)
278
+
279
+ # Send metrics in parent process
280
+ @subject.increment("after_fork.count", 3, tags: { foo: "bar" })
281
+ @subject.aggregate_timing("after_fork.timing", 300, tags: { foo: "bar" })
282
+ @subject.flush
283
+
284
+ assert_equal(4, @sink.datagrams.size)
285
+
286
+ # Verify metrics were properly aggregated in parent process
287
+ counter_datagrams = @sink.datagrams.select { |d| d.name == "before_fork.count" }
288
+ timing_datagrams = @sink.datagrams.select { |d| d.name == "before_fork.count" }
289
+ assert_equal(
290
+ 1,
291
+ counter_datagrams.size,
292
+ "Expected to find 1 counter datagram. Datagrams: #{@sink.datagrams.inspect}",
293
+ )
294
+ assert_equal(1, timing_datagrams.size)
295
+
296
+ # After fork metrics
297
+ counter_datagrams = @sink.datagrams.select { |d| d.name == "after_fork.count" }
298
+ timing_datagrams = @sink.datagrams.select { |d| d.name == "after_fork.count" }
299
+ assert_equal(1, counter_datagrams.size)
300
+ assert_equal(1, timing_datagrams.size)
301
+ end
302
+
303
+ def test_finalizer_flushes_pending_metrics
304
+ @subject.increment("foo", 1, tags: { foo: "bar" })
305
+ @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
306
+ @subject.gauge("baz", 100, tags: { foo: "bar" })
307
+
308
+ # Manually trigger the finalizer
309
+ finalizer = StatsD::Instrument::Aggregator.finalize(
310
+ @subject.instance_variable_get(:@aggregation_state),
311
+ @subject.instance_variable_get(:@sink),
312
+ @subject.instance_variable_get(:@datagram_builders),
313
+ StatsD::Instrument::DatagramBuilder,
314
+ [],
315
+ )
316
+ finalizer.call
317
+
318
+ # Verify that all pending metrics are sent
319
+ assert_equal(3, @sink.datagrams.size)
320
+
321
+ counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
322
+ assert_equal(1, counter_datagram.value)
323
+ assert_equal(["foo:bar"], counter_datagram.tags)
324
+
325
+ timing_datagram = @sink.datagrams.find { |d| d.name == "bar" }
326
+ assert_equal([100.0], [timing_datagram.value])
327
+ assert_equal(["foo:bar"], timing_datagram.tags)
328
+
329
+ gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
330
+ assert_equal(100, gauge_datagram.value)
331
+ assert_equal(["foo:bar"], gauge_datagram.tags)
332
+ end
142
333
  end
data/test/test_helper.rb CHANGED
@@ -6,8 +6,9 @@ end
6
6
 
7
7
  ENV["ENV"] = "test"
8
8
 
9
- unless ENV.key?("CI")
10
- require "minitest/pride"
9
+ if ENV["COVERAGE"]
10
+ require "simplecov"
11
+ SimpleCov.start
11
12
  end
12
13
  require "minitest/autorun"
13
14
  unless ENV.key?("CI")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsd-instrument
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.3
4
+ version: 3.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jesse Storimer
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2024-10-22 00:00:00.000000000 Z
13
+ date: 2024-10-29 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
16
16
  StatsD instrumentation into your code.