statsd-instrument 3.9.4 → 3.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8248b85fead25773388f7bec728c5a6347f7b7af61e37e064ee8ed93ba31397d
4
- data.tar.gz: 110ab2e5db1b66ef7c76e11ed7b82c925f767df0b09a1ca442afe95c31e30a40
3
+ metadata.gz: 62a2601a6b425e15a8f3b616fcb182d48548179d95bc5b9bbdef74f5cdabcc1d
4
+ data.tar.gz: 64a4fd724bcee0e6fe8cac4001439c6a9e0b4b9e5234cd5b654b3df4045171cf
5
5
  SHA512:
6
- metadata.gz: e85d2333cae7f6e843b25b3a1e64cf8bd1e68e47bcb530b6483dfa05e2672374791209c65f888cc6c9354f0e5bca997098f776c5db10f4dfd155005dd3f22bc9
7
- data.tar.gz: 330d5d152a42dd76fae018e4af8c4daa64f9be50391468f64bd338ddd91ecd668a6775f590e45452b552c6fe28f0e82e125a3a91476d5269e52d19ad0adee092
6
+ metadata.gz: d74d19d5e29895763d423dd0daf67bedfeeb3e05ebff891f87eda90d4482648f5cd8bb3b163c000f2fe70473ece32f9f4febf36530bcfd16ab2d9d699cf1d49d
7
+ data.tar.gz: 18d287d1d24792d9e63d7d2f2db3bee74fd929d7e4fd5ea1dccb50447a7278de7cfe8f088184064298809e70ef82b3d719dc27c5d6dc07edbc01b5747dff7246
data/CHANGELOG.md CHANGED
@@ -6,6 +6,16 @@ section below.
6
6
 
7
7
  ## Unreleased changes
8
8
 
9
+ ## Version 3.9.6
10
+
11
+ - [#388](https://github.com/Shopify/statsd-instrument/pull/388) - Properly fixing the bug when using aggregation and sending sampled
12
+ histograms, now the client will respect the sampling rate when sending the metrics and pass it down to the aggregator.
13
+
14
+ ## Version 3.9.5
15
+
16
+ - [#387](https://github.com/Shopify/statsd-instrument/pull/387) - Fixing bug when using aggregation and sending sampled
17
+ histogram metrics, they will not be scaled properly because of missing sampling rate in the final sent sample.
18
+
9
19
  ## Version 3.9.4
10
20
 
11
21
  - [#384](https://github.com/Shopify/statsd-instrument/pull/384) - Aggregation: fixing bug when sending metrics synchronously
@@ -3,13 +3,14 @@
3
3
  module StatsD
4
4
  module Instrument
5
5
  class AggregationKey
6
- attr_reader :name, :tags, :no_prefix, :type, :hash
6
+ attr_reader :name, :tags, :no_prefix, :type, :hash, :sample_rate
7
7
 
8
- def initialize(name, tags, no_prefix, type)
8
+ def initialize(name, tags, no_prefix, type, sample_rate: 1.0)
9
9
  @name = name
10
10
  @tags = tags
11
11
  @no_prefix = no_prefix
12
12
  @type = type
13
+ @sample_rate = sample_rate
13
14
  @hash = [@name, @tags, @no_prefix, @type].hash
14
15
  end
15
16
 
@@ -56,7 +57,7 @@ module StatsD
56
57
  key.name,
57
58
  key.type.to_s,
58
59
  agg_value,
59
- CONST_SAMPLE_RATE,
60
+ key.sample_rate,
60
61
  key.tags,
61
62
  )
62
63
  when GAUGE
@@ -134,16 +135,16 @@ module StatsD
134
135
  end
135
136
  end
136
137
 
137
- def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION)
138
+ def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION, sample_rate: CONST_SAMPLE_RATE)
138
139
  unless thread_healthcheck
139
140
  @sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
140
- name, type.to_s, [value], CONST_SAMPLE_RATE, tags
141
+ name, type.to_s, [value], sample_rate, tags
141
142
  )
142
143
  return
143
144
  end
144
145
 
145
146
  tags = tags_sorted(tags)
146
- key = packet_key(name, tags, no_prefix, type)
147
+ key = packet_key(name, tags, no_prefix, type, sample_rate: sample_rate)
147
148
 
148
149
  @mutex.synchronize do
149
150
  values = @aggregation_state[key] ||= []
@@ -176,6 +177,9 @@ module StatsD
176
177
 
177
178
  EMPTY_ARRAY = [].freeze
178
179
 
180
+ # Flushes the aggregated metrics to the sink.
181
+ # Iterates over the aggregation state and sends each metric to the sink.
182
+ # If you change this function, you need to update the logic in the finalizer as well.
179
183
  def do_flush
180
184
  @aggregation_state.each do |key, value|
181
185
  case key.type
@@ -191,7 +195,7 @@ module StatsD
191
195
  key.name,
192
196
  key.type.to_s,
193
197
  value,
194
- CONST_SAMPLE_RATE,
198
+ key.sample_rate,
195
199
  key.tags,
196
200
  )
197
201
  when GAUGE
@@ -219,8 +223,14 @@ module StatsD
219
223
  datagram_builder(no_prefix: false).normalize_tags(tags)
220
224
  end
221
225
 
222
- def packet_key(name, tags = "".b, no_prefix = false, type = COUNT)
223
- AggregationKey.new(DatagramBuilder.normalize_string(name), tags, no_prefix, type).freeze
226
+ def packet_key(name, tags = "".b, no_prefix = false, type = COUNT, sample_rate: CONST_SAMPLE_RATE)
227
+ AggregationKey.new(
228
+ DatagramBuilder.normalize_string(name),
229
+ tags,
230
+ no_prefix,
231
+ type,
232
+ sample_rate: sample_rate,
233
+ ).freeze
224
234
  end
225
235
 
226
236
  def datagram_builder(no_prefix:)
@@ -318,25 +318,27 @@ module StatsD
318
318
  # @param tags (see #increment)
319
319
  # @return [void]
320
320
  def distribution(name, value = nil, sample_rate: nil, tags: nil, no_prefix: false, &block)
321
+ if block_given?
322
+ return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
323
+ end
324
+
325
+ # For all timing metrics, we have to use the sampling logic.
326
+ # Not doing so would impact performance and CPU usage.
327
+ # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
321
328
  sample_rate ||= @default_sample_rate
322
329
  if sample_rate && !sample?(sample_rate)
323
- # For all timing metrics, we have to use the sampling logic.
324
- # Not doing so would impact performance and CPU usage.
325
- # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
326
-
327
- if block_given?
328
- return yield
329
- end
330
-
331
330
  return StatsD::Instrument::VOID
332
331
  end
333
332
 
334
- if block_given?
335
- return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
336
- end
337
-
338
333
  if @enable_aggregation
339
- @aggregator.aggregate_timing(name, value, tags: tags, no_prefix: no_prefix, type: :d)
334
+ @aggregator.aggregate_timing(
335
+ name,
336
+ value,
337
+ tags: tags,
338
+ no_prefix: no_prefix,
339
+ type: :d,
340
+ sample_rate: sample_rate,
341
+ )
340
342
  return StatsD::Instrument::VOID
341
343
  end
342
344
 
@@ -392,13 +394,26 @@ module StatsD
392
394
  ensure
393
395
  stop = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond)
394
396
 
395
- metric_type ||= datagram_builder(no_prefix: no_prefix).latency_metric_type
396
- latency_in_ms = stop - start
397
- if @enable_aggregation
398
- @aggregator.aggregate_timing(name, latency_in_ms, tags: tags, no_prefix: no_prefix, type: metric_type)
399
- else
400
- sample_rate ||= @default_sample_rate
401
- if sample_rate.nil? || sample?(sample_rate)
397
+ # For all timing metrics, we have to use the sampling logic.
398
+ # Not doing so would impact performance and CPU usage.
399
+ # See Datadog's documentation for more details:
400
+ # https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
401
+ sample_rate ||= @default_sample_rate
402
+ if sample_rate.nil? || sample?(sample_rate)
403
+
404
+ metric_type ||= datagram_builder(no_prefix: no_prefix).latency_metric_type
405
+ latency_in_ms = stop - start
406
+
407
+ if @enable_aggregation
408
+ @aggregator.aggregate_timing(
409
+ name,
410
+ latency_in_ms,
411
+ tags: tags,
412
+ no_prefix: no_prefix,
413
+ type: metric_type,
414
+ sample_rate: sample_rate,
415
+ )
416
+ else
402
417
  emit(datagram_builder(no_prefix: no_prefix).send(metric_type, name, latency_in_ms, sample_rate, tags))
403
418
  end
404
419
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module StatsD
4
4
  module Instrument
5
- VERSION = "3.9.4"
5
+ VERSION = "3.9.6"
6
6
  end
7
7
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "test_helper"
4
- require "ostruct"
5
4
 
6
5
  class AggregatorTest < Minitest::Test
7
6
  class CaptureLogger
@@ -58,6 +57,24 @@ class AggregatorTest < Minitest::Test
58
57
  assert_equal([1.0, 100.0], datagram.value)
59
58
  end
60
59
 
60
+ def test_timing_sampling_scaling
61
+ @subject.aggregate_timing("timing.sampled", 60.0, sample_rate: 0.01)
62
+ @subject.aggregate_timing("timing.sampled", 80.0, sample_rate: 0.01)
63
+ @subject.aggregate_timing("timing.unsampled", 60.0, sample_rate: 1.0)
64
+
65
+ @subject.flush
66
+
67
+ assert_equal(2, @sink.datagrams.size)
68
+
69
+ sampled_datagram = @sink.datagrams.find { |d| d.name == "timing.sampled" }
70
+ assert_equal([60.0, 80.0], sampled_datagram.value)
71
+ assert_equal(0.01, sampled_datagram.sample_rate)
72
+ assert_equal("timing.sampled:60.0:80.0|d|@0.01", sampled_datagram.source)
73
+
74
+ unsampled_datagram = @sink.datagrams.find { |d| d.name == "timing.unsampled" }
75
+ assert_equal(60.0, unsampled_datagram.value)
76
+ end
77
+
61
78
  def test_mixed_type_timings
62
79
  @subject.aggregate_timing("foo_ms", 1, tags: { foo: "bar" }, type: :ms)
63
80
  @subject.aggregate_timing("foo_ms", 100, tags: { foo: "bar" }, type: :ms)
@@ -191,7 +208,7 @@ class AggregatorTest < Minitest::Test
191
208
 
192
209
  # Additional metrics should also go through synchronously
193
210
  @subject.increment("foo", 1, tags: { foo: "bar" })
194
- @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
211
+ @subject.aggregate_timing("bar", 200, tags: { foo: "bar" }, sample_rate: 0.5)
195
212
 
196
213
  # Verify new metrics were also sent immediately
197
214
  assert_equal(5, @sink.datagrams.size)
@@ -203,6 +220,7 @@ class AggregatorTest < Minitest::Test
203
220
  timing_datagram = @sink.datagrams.select { |d| d.name == "bar" }.last
204
221
  assert_equal([200.0], [timing_datagram.value])
205
222
  assert_equal(["foo:bar"], timing_datagram.tags)
223
+ assert_equal(0.5, timing_datagram.sample_rate)
206
224
 
207
225
  # undo the stubbing
208
226
  @subject.unstub(:thread_healthcheck)
@@ -304,6 +322,7 @@ class AggregatorTest < Minitest::Test
304
322
  @subject.increment("foo", 1, tags: { foo: "bar" })
305
323
  @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
306
324
  @subject.gauge("baz", 100, tags: { foo: "bar" })
325
+ @subject.aggregate_timing("sampled_timing", 100, tags: { foo: "bar" }, sample_rate: 0.01)
307
326
 
308
327
  # Manually trigger the finalizer
309
328
  finalizer = StatsD::Instrument::Aggregator.finalize(
@@ -316,7 +335,7 @@ class AggregatorTest < Minitest::Test
316
335
  finalizer.call
317
336
 
318
337
  # Verify that all pending metrics are sent
319
- assert_equal(3, @sink.datagrams.size)
338
+ assert_equal(4, @sink.datagrams.size)
320
339
 
321
340
  counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
322
341
  assert_equal(1, counter_datagram.value)
@@ -329,5 +348,9 @@ class AggregatorTest < Minitest::Test
329
348
  gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
330
349
  assert_equal(100, gauge_datagram.value)
331
350
  assert_equal(["foo:bar"], gauge_datagram.tags)
351
+
352
+ sampled_timing_datagram = @sink.datagrams.find { |d| d.name == "sampled_timing" }
353
+ assert_equal(100.0, sampled_timing_datagram.value)
354
+ assert_equal(0.01, sampled_timing_datagram.sample_rate)
332
355
  end
333
356
  end
data/test/client_test.rb CHANGED
@@ -90,7 +90,7 @@ class ClientTest < Minitest::Test
90
90
  client.measure("block_duration_example") { 1 + 1 }
91
91
  client.force_flush
92
92
 
93
- datagram = client.sink.datagrams.first
93
+ datagram = client.sink.datagrams.find { |d| d.name == "bar.foo" }
94
94
  assert_equal("bar.foo", datagram.name)
95
95
  assert_equal(2, datagram.value)
96
96
 
@@ -249,12 +249,17 @@ class ClientTest < Minitest::Test
249
249
  mock_sink = mock("sink")
250
250
  mock_sink.stubs(:sample?).returns(false, true, false, false, true)
251
251
  # Since we are aggregating, we only expect a single datagram.
252
- mock_sink.expects(:<<).with("metric:60:60|d").once
252
+ mock_sink.expects(:<<).with("metric:60:60|d|@0.5").once
253
253
  mock_sink.expects(:flush).once
254
254
 
255
255
  client = StatsD::Instrument::Client.new(sink: mock_sink, default_sample_rate: 0.5, enable_aggregation: true)
256
256
  5.times { client.distribution("metric", 60) }
257
257
  client.force_flush
258
+
259
+ # undo mock
260
+ mock_sink.unstub(:sample?)
261
+ mock_sink.unstub(:<<)
262
+ mock_sink.unstub(:flush)
258
263
  end
259
264
 
260
265
  def test_clone_with_prefix_option
@@ -77,4 +77,30 @@ class IntegrationTest < Minitest::Test
77
77
  assert_equal("counter:20|c", @server.recvfrom(100).first)
78
78
  assert_operator(Time.now - before_flush, :<, 0.3, "Flush and ingest should have happened within 0.4s")
79
79
  end
80
+
81
+ def test_live_local_udp_socket_with_aggregation_sampled_scenario
82
+ client = StatsD::Instrument::Environment.new(
83
+ "STATSD_ADDR" => "#{@server.addr[2]}:#{@server.addr[1]}",
84
+ "STATSD_IMPLEMENTATION" => "dogstatsd",
85
+ "STATSD_ENV" => "production",
86
+ "STATSD_ENABLE_AGGREGATION" => "true",
87
+ "STATSD_AGGREGATION_INTERVAL" => "0.1",
88
+ ).client
89
+
90
+ 100.times do
91
+ client.increment("counter", 2)
92
+ client.distribution("test_distribution", 3, sample_rate: 0.1)
93
+ end
94
+
95
+ sleep(0.2)
96
+
97
+ packets = []
98
+ while IO.select([@server], nil, nil, 0.1)
99
+ packets << @server.recvfrom(300).first
100
+ end
101
+ packets = packets.map { |packet| packet.split("\n") }.flatten
102
+
103
+ assert_match(/counter:\d+|c/, packets.find { |packet| packet.start_with?("counter:") })
104
+ assert_match(/test_distribution:\d+:3|d/, packets.find { |packet| packet.start_with?("test_distribution:") })
105
+ end
80
106
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsd-instrument
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.4
4
+ version: 3.9.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jesse Storimer
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2024-10-29 00:00:00.000000000 Z
13
+ date: 2024-10-31 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
16
16
  StatsD instrumentation into your code.