statsd-instrument 3.9.3 → 3.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a392592155d75787a30ea7cb16342a84cda2a1c63deb978550f4f9118e997cbb
4
- data.tar.gz: 61a5e84e4b025fead6941adef95d7ddd341a97597644719ce74d49ecfba1a595
3
+ metadata.gz: 8248b85fead25773388f7bec728c5a6347f7b7af61e37e064ee8ed93ba31397d
4
+ data.tar.gz: 110ab2e5db1b66ef7c76e11ed7b82c925f767df0b09a1ca442afe95c31e30a40
5
5
  SHA512:
6
- metadata.gz: 7b703ad60f188205c6672498bb0791766e77763c867371025a01c4d4469895e11cc5a9b7ab4180e9db259066125392fe6074bf0cb8836c133a2a7dee8232bf61
7
- data.tar.gz: cb9afebc8101618965e419a4ba841863d4c20df54fced19f4d71c726bf168930f74b4b5559538d797f4736a7f02342ded3b51aa7fad1116fe82e2dd78f38e1a2
6
+ metadata.gz: e85d2333cae7f6e843b25b3a1e64cf8bd1e68e47bcb530b6483dfa05e2672374791209c65f888cc6c9354f0e5bca997098f776c5db10f4dfd155005dd3f22bc9
7
+ data.tar.gz: 330d5d152a42dd76fae018e4af8c4daa64f9be50391468f64bd338ddd91ecd668a6775f590e45452b552c6fe28f0e82e125a3a91476d5269e52d19ad0adee092
data/.gitignore CHANGED
@@ -6,3 +6,4 @@ Gemfile.lock
6
6
  pkg/*
7
7
  vendor/
8
8
  tmp/*
9
+ coverage/*
data/CHANGELOG.md CHANGED
@@ -6,9 +6,14 @@ section below.
6
6
 
7
7
  ## Unreleased changes
8
8
 
9
+ ## Version 3.9.4
10
+
11
+ - [#384](https://github.com/Shopify/statsd-instrument/pull/384) - Aggregation: fixing bug when sending metrics synchronously
12
+ e.g. when the main thread is killed and we are forced to flush the metrics.
13
+
9
14
  ## Version 3.9.3
10
15
 
11
- - [#384](https://github.com/Shopify/statsd-instrument/pull/382) - Fix warnings in Rubocop cops.
16
+ - [#382](https://github.com/Shopify/statsd-instrument/pull/382) - Fix warnings in Rubocop cops.
12
17
 
13
18
  ## Version 3.9.2
14
19
 
data/Gemfile CHANGED
@@ -12,6 +12,8 @@ gem "rubocop", ">= 1.0"
12
12
  gem "rubocop-shopify", require: false
13
13
  gem "benchmark-ips"
14
14
  gem "dogstatsd-ruby", "~> 5.0", require: false
15
+ gem "simplecov", require: false, group: :test
16
+
15
17
  platform :mri do
16
18
  # only if Ruby is MRI && >= 3.2
17
19
  if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.2")
@@ -105,17 +105,7 @@ module StatsD
105
105
 
106
106
  @pid = Process.pid
107
107
  @flush_interval = flush_interval
108
- @flush_thread = Thread.new do
109
- Thread.current.abort_on_exception = true
110
- loop do
111
- sleep(@flush_interval)
112
- thread_healthcheck
113
- flush
114
- rescue => e
115
- StatsD.logger.error { "[#{self.class.name}] Error in flush thread: #{e}" }
116
- raise e
117
- end
118
- end
108
+ start_flush_thread
119
109
 
120
110
  ObjectSpace.define_finalizer(
121
111
  self,
@@ -131,7 +121,7 @@ module StatsD
131
121
  # @return [void]
132
122
  def increment(name, value = 1, tags: [], no_prefix: false)
133
123
  unless thread_healthcheck
134
- sink << datagram_builder(no_prefix: no_prefix).c(name, value, CONST_SAMPLE_RATE, tags)
124
+ @sink << datagram_builder(no_prefix: no_prefix).c(name, value, CONST_SAMPLE_RATE, tags)
135
125
  return
136
126
  end
137
127
 
@@ -146,8 +136,8 @@ module StatsD
146
136
 
147
137
  def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION)
148
138
  unless thread_healthcheck
149
- sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
150
- name, type, [value], CONST_SAMPLE_RATE, tags
139
+ @sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
140
+ name, type.to_s, [value], CONST_SAMPLE_RATE, tags
151
141
  )
152
142
  return
153
143
  end
@@ -166,7 +156,7 @@ module StatsD
166
156
 
167
157
  def gauge(name, value, tags: [], no_prefix: false)
168
158
  unless thread_healthcheck
169
- sink << datagram_builder(no_prefix: no_prefix).g(name, value, CONST_SAMPLE_RATE, tags)
159
+ @sink << datagram_builder(no_prefix: no_prefix).g(name, value, CONST_SAMPLE_RATE, tags)
170
160
  return
171
161
  end
172
162
 
@@ -240,26 +230,41 @@ module StatsD
240
230
  )
241
231
  end
242
232
 
233
+ def start_flush_thread
234
+ @flush_thread = Thread.new do
235
+ Thread.current.abort_on_exception = true
236
+ loop do
237
+ sleep(@flush_interval)
238
+ thread_healthcheck
239
+ flush
240
+ end
241
+ rescue => e
242
+ StatsD.logger.error { "[#{self.class.name}] Error in flush thread: #{e}" }
243
+ raise e
244
+ end
245
+ end
246
+
243
247
  def thread_healthcheck
244
248
  @mutex.synchronize do
245
249
  unless @flush_thread&.alive?
250
+ # The main thread is dead, fallback to direct writes
246
251
  return false unless Thread.main.alive?
247
252
 
253
+ # If the PID changed, the process forked, reset the aggregator state
248
254
  if @pid != Process.pid
249
- StatsD.logger.debug { "[#{self.class.name}] Restarting the flush thread after fork" }
255
+ # TODO: Investigate/replace this with Process._fork hook.
256
+ # https://github.com/ruby/ruby/pull/5017
257
+ StatsD.logger.debug do
258
+ "[#{self.class.name}] Restarting the flush thread after fork. State size: #{@aggregation_state.size}"
259
+ end
250
260
  @pid = Process.pid
261
+ # Clear the aggregation state to avoid duplicate metrics
251
262
  @aggregation_state.clear
252
263
  else
253
264
  StatsD.logger.debug { "[#{self.class.name}] Restarting the flush thread" }
254
265
  end
255
- @flush_thread = Thread.new do
256
- Thread.current.abort_on_exception = true
257
- loop do
258
- sleep(@flush_interval)
259
- thread_healthcheck
260
- flush
261
- end
262
- end
266
+ # Restart the flush thread
267
+ start_flush_thread
263
268
  end
264
269
  true
265
270
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module StatsD
4
4
  module Instrument
5
- VERSION = "3.9.3"
5
+ VERSION = "3.9.4"
6
6
  end
7
7
  end
@@ -1,9 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "test_helper"
4
+ require "ostruct"
4
5
 
5
6
  class AggregatorTest < Minitest::Test
7
+ class CaptureLogger
8
+ attr_reader :messages
9
+
10
+ def initialize
11
+ @messages = []
12
+ end
13
+
14
+ [:debug, :info, :warn, :error, :fatal].each do |severity|
15
+ define_method(severity) do |message = nil, &block|
16
+ message = block.call if message.nil? && block
17
+ @messages << { severity: severity, message: message }
18
+ end
19
+ end
20
+ end
21
+
6
22
  def setup
23
+ @logger = CaptureLogger.new
24
+ @old_logger = StatsD.logger
25
+ StatsD.logger = @logger
26
+
7
27
  @sink = StatsD::Instrument::CaptureSink.new(parent: StatsD::Instrument::NullSink.new)
8
28
  @subject = StatsD::Instrument::Aggregator.new(
9
29
  @sink, StatsD::Instrument::DatagramBuilder, nil, [], flush_interval: 0.1
@@ -12,6 +32,7 @@ class AggregatorTest < Minitest::Test
12
32
 
13
33
  def teardown
14
34
  @sink.clear
35
+ StatsD.logger = @old_logger
15
36
  end
16
37
 
17
38
  def test_increment_simple
@@ -139,4 +160,174 @@ class AggregatorTest < Minitest::Test
139
160
  assert_equal("foo", @sink.datagrams.last.name)
140
161
  assert_equal(1, @sink.datagrams.last.value)
141
162
  end
163
+
164
+ def test_synchronous_operation_on_thread_failure
165
+ # Force thread_healthcheck to return false
166
+ @subject.stubs(:thread_healthcheck).returns(false)
167
+
168
+ # Stub methods on @aggregation_state to ensure they are not called
169
+ aggregation_state = @subject.instance_variable_get(:@aggregation_state)
170
+ aggregation_state.stubs(:[]=).never
171
+ aggregation_state.stubs(:clear).never
172
+
173
+ @subject.increment("foo", 1, tags: { foo: "bar" })
174
+ @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
175
+ @subject.gauge("baz", 100, tags: { foo: "bar" })
176
+
177
+ # Verify metrics were sent immediately
178
+ assert_equal(3, @sink.datagrams.size)
179
+
180
+ counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
181
+ assert_equal(1, counter_datagram.value)
182
+ assert_equal(["foo:bar"], counter_datagram.tags)
183
+
184
+ timing_datagram = @sink.datagrams.find { |d| d.name == "bar" }
185
+ assert_equal([100.0], [timing_datagram.value])
186
+ assert_equal(["foo:bar"], timing_datagram.tags)
187
+
188
+ gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
189
+ assert_equal(100, gauge_datagram.value)
190
+ assert_equal(["foo:bar"], gauge_datagram.tags)
191
+
192
+ # Additional metrics should also go through synchronously
193
+ @subject.increment("foo", 1, tags: { foo: "bar" })
194
+ @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
195
+
196
+ # Verify new metrics were also sent immediately
197
+ assert_equal(5, @sink.datagrams.size)
198
+
199
+ counter_datagram = @sink.datagrams.select { |d| d.name == "foo" }.last
200
+ assert_equal(1, counter_datagram.value)
201
+ assert_equal(["foo:bar"], counter_datagram.tags)
202
+
203
+ timing_datagram = @sink.datagrams.select { |d| d.name == "bar" }.last
204
+ assert_equal([200.0], [timing_datagram.value])
205
+ assert_equal(["foo:bar"], timing_datagram.tags)
206
+
207
+ # undo the stubbing
208
+ @subject.unstub(:thread_healthcheck)
209
+ end
210
+
211
+ def test_recreate_thread_after_fork
212
+ skip("#{RUBY_ENGINE} not supported for this test. Reason: fork()") if RUBY_ENGINE != "ruby"
213
+ # Record initial metrics
214
+ @subject.increment("foo", 1, tags: { foo: "bar" })
215
+ @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
216
+
217
+ # kill the flush thread
218
+ @subject.instance_variable_get(:@flush_thread).kill
219
+
220
+ # Fork the process
221
+ pid = Process.fork do
222
+ # In forked process, send more metrics
223
+ @subject.increment("foo", 2, tags: { foo: "bar" })
224
+ @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
225
+ @subject.flush
226
+
227
+ assert_equal(2, @sink.datagrams.size)
228
+ exit!
229
+ end
230
+
231
+ # Wait for forked process to complete
232
+ Process.wait(pid)
233
+
234
+ # Send metrics in parent process
235
+ @subject.increment("foo", 3, tags: { foo: "bar" })
236
+ @subject.aggregate_timing("bar", 300, tags: { foo: "bar" })
237
+ @subject.flush
238
+
239
+ assert_equal(2, @sink.datagrams.size)
240
+
241
+ # Verify metrics were properly aggregated in parent process
242
+ counter_datagrams = @sink.datagrams.select { |d| d.name == "foo" }
243
+ timing_datagrams = @sink.datagrams.select { |d| d.name == "bar" }
244
+
245
+ assert_equal(1, counter_datagrams.size)
246
+ assert_equal(1, timing_datagrams.size)
247
+
248
+ # Aggregate despite fork
249
+ assert_equal(4, counter_datagrams.last.value)
250
+ assert_equal([100.0, 300.0], timing_datagrams.last.value)
251
+ end
252
+
253
+ def test_race_condition_during_forking
254
+ skip("#{RUBY_ENGINE} not supported for this test. Reason: fork()") if RUBY_ENGINE != "ruby"
255
+ # Record initial metrics
256
+ @subject.increment("before_fork.count", 1, tags: { foo: "bar" })
257
+ @subject.aggregate_timing("before_fork.timing", 100, tags: { foo: "bar" })
258
+
259
+ # Fork the process
260
+ pid = Process.fork do
261
+ # In forked process, send more metrics
262
+ @subject.increment("in_child.count", 2, tags: { foo: "bar" })
263
+ @subject.aggregate_timing("in_child.timing", 200, tags: { foo: "bar" })
264
+
265
+ # Simulate thread waiting for flush
266
+ sleep(0.1)
267
+ @subject.flush
268
+
269
+ assert_equal(2, @sink.datagrams.size)
270
+ exit!
271
+ end
272
+
273
+ # Call flush concurrently in parent process
274
+ @subject.flush
275
+
276
+ # Wait for forked process to complete
277
+ Process.wait(pid)
278
+
279
+ # Send metrics in parent process
280
+ @subject.increment("after_fork.count", 3, tags: { foo: "bar" })
281
+ @subject.aggregate_timing("after_fork.timing", 300, tags: { foo: "bar" })
282
+ @subject.flush
283
+
284
+ assert_equal(4, @sink.datagrams.size)
285
+
286
+ # Verify metrics were properly aggregated in parent process
287
+ counter_datagrams = @sink.datagrams.select { |d| d.name == "before_fork.count" }
288
+ timing_datagrams = @sink.datagrams.select { |d| d.name == "before_fork.count" }
289
+ assert_equal(
290
+ 1,
291
+ counter_datagrams.size,
292
+ "Expected to find 1 counter datagram. Datagrams: #{@sink.datagrams.inspect}",
293
+ )
294
+ assert_equal(1, timing_datagrams.size)
295
+
296
+ # After fork metrics
297
+ counter_datagrams = @sink.datagrams.select { |d| d.name == "after_fork.count" }
298
+ timing_datagrams = @sink.datagrams.select { |d| d.name == "after_fork.count" }
299
+ assert_equal(1, counter_datagrams.size)
300
+ assert_equal(1, timing_datagrams.size)
301
+ end
302
+
303
+ def test_finalizer_flushes_pending_metrics
304
+ @subject.increment("foo", 1, tags: { foo: "bar" })
305
+ @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
306
+ @subject.gauge("baz", 100, tags: { foo: "bar" })
307
+
308
+ # Manually trigger the finalizer
309
+ finalizer = StatsD::Instrument::Aggregator.finalize(
310
+ @subject.instance_variable_get(:@aggregation_state),
311
+ @subject.instance_variable_get(:@sink),
312
+ @subject.instance_variable_get(:@datagram_builders),
313
+ StatsD::Instrument::DatagramBuilder,
314
+ [],
315
+ )
316
+ finalizer.call
317
+
318
+ # Verify that all pending metrics are sent
319
+ assert_equal(3, @sink.datagrams.size)
320
+
321
+ counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
322
+ assert_equal(1, counter_datagram.value)
323
+ assert_equal(["foo:bar"], counter_datagram.tags)
324
+
325
+ timing_datagram = @sink.datagrams.find { |d| d.name == "bar" }
326
+ assert_equal([100.0], [timing_datagram.value])
327
+ assert_equal(["foo:bar"], timing_datagram.tags)
328
+
329
+ gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
330
+ assert_equal(100, gauge_datagram.value)
331
+ assert_equal(["foo:bar"], gauge_datagram.tags)
332
+ end
142
333
  end
data/test/test_helper.rb CHANGED
@@ -6,8 +6,9 @@ end
6
6
 
7
7
  ENV["ENV"] = "test"
8
8
 
9
- unless ENV.key?("CI")
10
- require "minitest/pride"
9
+ if ENV["COVERAGE"]
10
+ require "simplecov"
11
+ SimpleCov.start
11
12
  end
12
13
  require "minitest/autorun"
13
14
  unless ENV.key?("CI")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsd-instrument
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.3
4
+ version: 3.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jesse Storimer
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2024-10-22 00:00:00.000000000 Z
13
+ date: 2024-10-29 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
16
16
  StatsD instrumentation into your code.