statsd-instrument 3.9.4 → 3.9.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8248b85fead25773388f7bec728c5a6347f7b7af61e37e064ee8ed93ba31397d
4
- data.tar.gz: 110ab2e5db1b66ef7c76e11ed7b82c925f767df0b09a1ca442afe95c31e30a40
3
+ metadata.gz: 62a2601a6b425e15a8f3b616fcb182d48548179d95bc5b9bbdef74f5cdabcc1d
4
+ data.tar.gz: 64a4fd724bcee0e6fe8cac4001439c6a9e0b4b9e5234cd5b654b3df4045171cf
5
5
  SHA512:
6
- metadata.gz: e85d2333cae7f6e843b25b3a1e64cf8bd1e68e47bcb530b6483dfa05e2672374791209c65f888cc6c9354f0e5bca997098f776c5db10f4dfd155005dd3f22bc9
7
- data.tar.gz: 330d5d152a42dd76fae018e4af8c4daa64f9be50391468f64bd338ddd91ecd668a6775f590e45452b552c6fe28f0e82e125a3a91476d5269e52d19ad0adee092
6
+ metadata.gz: d74d19d5e29895763d423dd0daf67bedfeeb3e05ebff891f87eda90d4482648f5cd8bb3b163c000f2fe70473ece32f9f4febf36530bcfd16ab2d9d699cf1d49d
7
+ data.tar.gz: 18d287d1d24792d9e63d7d2f2db3bee74fd929d7e4fd5ea1dccb50447a7278de7cfe8f088184064298809e70ef82b3d719dc27c5d6dc07edbc01b5747dff7246
data/CHANGELOG.md CHANGED
@@ -6,6 +6,16 @@ section below.
6
6
 
7
7
  ## Unreleased changes
8
8
 
9
+ ## Version 3.9.6
10
+
11
+ - [#388](https://github.com/Shopify/statsd-instrument/pull/388) - Properly fixing the bug when using aggregation and sending sampled
12
+ histograms, now the client will respect the sampling rate when sending the metrics and pass it down to the aggregator.
13
+
14
+ ## Version 3.9.5
15
+
16
+ - [#387](https://github.com/Shopify/statsd-instrument/pull/387) - Fixing bug when using aggregation and sending sampled
17
+ histogram metrics, they will not be scaled properly because of missing sampling rate in the final sent sample.
18
+
9
19
  ## Version 3.9.4
10
20
 
11
21
  - [#384](https://github.com/Shopify/statsd-instrument/pull/384) - Aggregation: fixing bug when sending metrics synchronously
@@ -3,13 +3,14 @@
3
3
  module StatsD
4
4
  module Instrument
5
5
  class AggregationKey
6
- attr_reader :name, :tags, :no_prefix, :type, :hash
6
+ attr_reader :name, :tags, :no_prefix, :type, :hash, :sample_rate
7
7
 
8
- def initialize(name, tags, no_prefix, type)
8
+ def initialize(name, tags, no_prefix, type, sample_rate: 1.0)
9
9
  @name = name
10
10
  @tags = tags
11
11
  @no_prefix = no_prefix
12
12
  @type = type
13
+ @sample_rate = sample_rate
13
14
  @hash = [@name, @tags, @no_prefix, @type].hash
14
15
  end
15
16
 
@@ -56,7 +57,7 @@ module StatsD
56
57
  key.name,
57
58
  key.type.to_s,
58
59
  agg_value,
59
- CONST_SAMPLE_RATE,
60
+ key.sample_rate,
60
61
  key.tags,
61
62
  )
62
63
  when GAUGE
@@ -134,16 +135,16 @@ module StatsD
134
135
  end
135
136
  end
136
137
 
137
- def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION)
138
+ def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION, sample_rate: CONST_SAMPLE_RATE)
138
139
  unless thread_healthcheck
139
140
  @sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
140
- name, type.to_s, [value], CONST_SAMPLE_RATE, tags
141
+ name, type.to_s, [value], sample_rate, tags
141
142
  )
142
143
  return
143
144
  end
144
145
 
145
146
  tags = tags_sorted(tags)
146
- key = packet_key(name, tags, no_prefix, type)
147
+ key = packet_key(name, tags, no_prefix, type, sample_rate: sample_rate)
147
148
 
148
149
  @mutex.synchronize do
149
150
  values = @aggregation_state[key] ||= []
@@ -176,6 +177,9 @@ module StatsD
176
177
 
177
178
  EMPTY_ARRAY = [].freeze
178
179
 
180
+ # Flushes the aggregated metrics to the sink.
181
+ # Iterates over the aggregation state and sends each metric to the sink.
182
+ # If you change this function, you need to update the logic in the finalizer as well.
179
183
  def do_flush
180
184
  @aggregation_state.each do |key, value|
181
185
  case key.type
@@ -191,7 +195,7 @@ module StatsD
191
195
  key.name,
192
196
  key.type.to_s,
193
197
  value,
194
- CONST_SAMPLE_RATE,
198
+ key.sample_rate,
195
199
  key.tags,
196
200
  )
197
201
  when GAUGE
@@ -219,8 +223,14 @@ module StatsD
219
223
  datagram_builder(no_prefix: false).normalize_tags(tags)
220
224
  end
221
225
 
222
- def packet_key(name, tags = "".b, no_prefix = false, type = COUNT)
223
- AggregationKey.new(DatagramBuilder.normalize_string(name), tags, no_prefix, type).freeze
226
+ def packet_key(name, tags = "".b, no_prefix = false, type = COUNT, sample_rate: CONST_SAMPLE_RATE)
227
+ AggregationKey.new(
228
+ DatagramBuilder.normalize_string(name),
229
+ tags,
230
+ no_prefix,
231
+ type,
232
+ sample_rate: sample_rate,
233
+ ).freeze
224
234
  end
225
235
 
226
236
  def datagram_builder(no_prefix:)
@@ -318,25 +318,27 @@ module StatsD
318
318
  # @param tags (see #increment)
319
319
  # @return [void]
320
320
  def distribution(name, value = nil, sample_rate: nil, tags: nil, no_prefix: false, &block)
321
+ if block_given?
322
+ return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
323
+ end
324
+
325
+ # For all timing metrics, we have to use the sampling logic.
326
+ # Not doing so would impact performance and CPU usage.
327
+ # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
321
328
  sample_rate ||= @default_sample_rate
322
329
  if sample_rate && !sample?(sample_rate)
323
- # For all timing metrics, we have to use the sampling logic.
324
- # Not doing so would impact performance and CPU usage.
325
- # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
326
-
327
- if block_given?
328
- return yield
329
- end
330
-
331
330
  return StatsD::Instrument::VOID
332
331
  end
333
332
 
334
- if block_given?
335
- return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
336
- end
337
-
338
333
  if @enable_aggregation
339
- @aggregator.aggregate_timing(name, value, tags: tags, no_prefix: no_prefix, type: :d)
334
+ @aggregator.aggregate_timing(
335
+ name,
336
+ value,
337
+ tags: tags,
338
+ no_prefix: no_prefix,
339
+ type: :d,
340
+ sample_rate: sample_rate,
341
+ )
340
342
  return StatsD::Instrument::VOID
341
343
  end
342
344
 
@@ -392,13 +394,26 @@ module StatsD
392
394
  ensure
393
395
  stop = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond)
394
396
 
395
- metric_type ||= datagram_builder(no_prefix: no_prefix).latency_metric_type
396
- latency_in_ms = stop - start
397
- if @enable_aggregation
398
- @aggregator.aggregate_timing(name, latency_in_ms, tags: tags, no_prefix: no_prefix, type: metric_type)
399
- else
400
- sample_rate ||= @default_sample_rate
401
- if sample_rate.nil? || sample?(sample_rate)
397
+ # For all timing metrics, we have to use the sampling logic.
398
+ # Not doing so would impact performance and CPU usage.
399
+ # See Datadog's documentation for more details:
400
+ # https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
401
+ sample_rate ||= @default_sample_rate
402
+ if sample_rate.nil? || sample?(sample_rate)
403
+
404
+ metric_type ||= datagram_builder(no_prefix: no_prefix).latency_metric_type
405
+ latency_in_ms = stop - start
406
+
407
+ if @enable_aggregation
408
+ @aggregator.aggregate_timing(
409
+ name,
410
+ latency_in_ms,
411
+ tags: tags,
412
+ no_prefix: no_prefix,
413
+ type: metric_type,
414
+ sample_rate: sample_rate,
415
+ )
416
+ else
402
417
  emit(datagram_builder(no_prefix: no_prefix).send(metric_type, name, latency_in_ms, sample_rate, tags))
403
418
  end
404
419
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module StatsD
4
4
  module Instrument
5
- VERSION = "3.9.4"
5
+ VERSION = "3.9.6"
6
6
  end
7
7
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "test_helper"
4
- require "ostruct"
5
4
 
6
5
  class AggregatorTest < Minitest::Test
7
6
  class CaptureLogger
@@ -58,6 +57,24 @@ class AggregatorTest < Minitest::Test
58
57
  assert_equal([1.0, 100.0], datagram.value)
59
58
  end
60
59
 
60
+ def test_timing_sampling_scaling
61
+ @subject.aggregate_timing("timing.sampled", 60.0, sample_rate: 0.01)
62
+ @subject.aggregate_timing("timing.sampled", 80.0, sample_rate: 0.01)
63
+ @subject.aggregate_timing("timing.unsampled", 60.0, sample_rate: 1.0)
64
+
65
+ @subject.flush
66
+
67
+ assert_equal(2, @sink.datagrams.size)
68
+
69
+ sampled_datagram = @sink.datagrams.find { |d| d.name == "timing.sampled" }
70
+ assert_equal([60.0, 80.0], sampled_datagram.value)
71
+ assert_equal(0.01, sampled_datagram.sample_rate)
72
+ assert_equal("timing.sampled:60.0:80.0|d|@0.01", sampled_datagram.source)
73
+
74
+ unsampled_datagram = @sink.datagrams.find { |d| d.name == "timing.unsampled" }
75
+ assert_equal(60.0, unsampled_datagram.value)
76
+ end
77
+
61
78
  def test_mixed_type_timings
62
79
  @subject.aggregate_timing("foo_ms", 1, tags: { foo: "bar" }, type: :ms)
63
80
  @subject.aggregate_timing("foo_ms", 100, tags: { foo: "bar" }, type: :ms)
@@ -191,7 +208,7 @@ class AggregatorTest < Minitest::Test
191
208
 
192
209
  # Additional metrics should also go through synchronously
193
210
  @subject.increment("foo", 1, tags: { foo: "bar" })
194
- @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
211
+ @subject.aggregate_timing("bar", 200, tags: { foo: "bar" }, sample_rate: 0.5)
195
212
 
196
213
  # Verify new metrics were also sent immediately
197
214
  assert_equal(5, @sink.datagrams.size)
@@ -203,6 +220,7 @@ class AggregatorTest < Minitest::Test
203
220
  timing_datagram = @sink.datagrams.select { |d| d.name == "bar" }.last
204
221
  assert_equal([200.0], [timing_datagram.value])
205
222
  assert_equal(["foo:bar"], timing_datagram.tags)
223
+ assert_equal(0.5, timing_datagram.sample_rate)
206
224
 
207
225
  # undo the stubbing
208
226
  @subject.unstub(:thread_healthcheck)
@@ -304,6 +322,7 @@ class AggregatorTest < Minitest::Test
304
322
  @subject.increment("foo", 1, tags: { foo: "bar" })
305
323
  @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
306
324
  @subject.gauge("baz", 100, tags: { foo: "bar" })
325
+ @subject.aggregate_timing("sampled_timing", 100, tags: { foo: "bar" }, sample_rate: 0.01)
307
326
 
308
327
  # Manually trigger the finalizer
309
328
  finalizer = StatsD::Instrument::Aggregator.finalize(
@@ -316,7 +335,7 @@ class AggregatorTest < Minitest::Test
316
335
  finalizer.call
317
336
 
318
337
  # Verify that all pending metrics are sent
319
- assert_equal(3, @sink.datagrams.size)
338
+ assert_equal(4, @sink.datagrams.size)
320
339
 
321
340
  counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
322
341
  assert_equal(1, counter_datagram.value)
@@ -329,5 +348,9 @@ class AggregatorTest < Minitest::Test
329
348
  gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
330
349
  assert_equal(100, gauge_datagram.value)
331
350
  assert_equal(["foo:bar"], gauge_datagram.tags)
351
+
352
+ sampled_timing_datagram = @sink.datagrams.find { |d| d.name == "sampled_timing" }
353
+ assert_equal(100.0, sampled_timing_datagram.value)
354
+ assert_equal(0.01, sampled_timing_datagram.sample_rate)
332
355
  end
333
356
  end
data/test/client_test.rb CHANGED
@@ -90,7 +90,7 @@ class ClientTest < Minitest::Test
90
90
  client.measure("block_duration_example") { 1 + 1 }
91
91
  client.force_flush
92
92
 
93
- datagram = client.sink.datagrams.first
93
+ datagram = client.sink.datagrams.find { |d| d.name == "bar.foo" }
94
94
  assert_equal("bar.foo", datagram.name)
95
95
  assert_equal(2, datagram.value)
96
96
 
@@ -249,12 +249,17 @@ class ClientTest < Minitest::Test
249
249
  mock_sink = mock("sink")
250
250
  mock_sink.stubs(:sample?).returns(false, true, false, false, true)
251
251
  # Since we are aggregating, we only expect a single datagram.
252
- mock_sink.expects(:<<).with("metric:60:60|d").once
252
+ mock_sink.expects(:<<).with("metric:60:60|d|@0.5").once
253
253
  mock_sink.expects(:flush).once
254
254
 
255
255
  client = StatsD::Instrument::Client.new(sink: mock_sink, default_sample_rate: 0.5, enable_aggregation: true)
256
256
  5.times { client.distribution("metric", 60) }
257
257
  client.force_flush
258
+
259
+ # undo mock
260
+ mock_sink.unstub(:sample?)
261
+ mock_sink.unstub(:<<)
262
+ mock_sink.unstub(:flush)
258
263
  end
259
264
 
260
265
  def test_clone_with_prefix_option
@@ -77,4 +77,30 @@ class IntegrationTest < Minitest::Test
77
77
  assert_equal("counter:20|c", @server.recvfrom(100).first)
78
78
  assert_operator(Time.now - before_flush, :<, 0.3, "Flush and ingest should have happened within 0.4s")
79
79
  end
80
+
81
+ def test_live_local_udp_socket_with_aggregation_sampled_scenario
82
+ client = StatsD::Instrument::Environment.new(
83
+ "STATSD_ADDR" => "#{@server.addr[2]}:#{@server.addr[1]}",
84
+ "STATSD_IMPLEMENTATION" => "dogstatsd",
85
+ "STATSD_ENV" => "production",
86
+ "STATSD_ENABLE_AGGREGATION" => "true",
87
+ "STATSD_AGGREGATION_INTERVAL" => "0.1",
88
+ ).client
89
+
90
+ 100.times do
91
+ client.increment("counter", 2)
92
+ client.distribution("test_distribution", 3, sample_rate: 0.1)
93
+ end
94
+
95
+ sleep(0.2)
96
+
97
+ packets = []
98
+ while IO.select([@server], nil, nil, 0.1)
99
+ packets << @server.recvfrom(300).first
100
+ end
101
+ packets = packets.map { |packet| packet.split("\n") }.flatten
102
+
103
+ assert_match(/counter:\d+|c/, packets.find { |packet| packet.start_with?("counter:") })
104
+ assert_match(/test_distribution:\d+:3|d/, packets.find { |packet| packet.start_with?("test_distribution:") })
105
+ end
80
106
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsd-instrument
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.4
4
+ version: 3.9.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jesse Storimer
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2024-10-29 00:00:00.000000000 Z
13
+ date: 2024-10-31 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
16
16
  StatsD instrumentation into your code.