RubyGems - statsd-instrument - Versions diffs - 3.9.4 → 3.9.6 - Mend

statsd-instrument 3.9.4 → 3.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/lib/statsd/instrument/aggregator.rb +19 -9
data/lib/statsd/instrument/client.rb +35 -20
data/lib/statsd/instrument/version.rb +1 -1
data/test/aggregator_test.rb +26 -3
data/test/client_test.rb +7 -2
data/test/integration_test.rb +26 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8248b85fead25773388f7bec728c5a6347f7b7af61e37e064ee8ed93ba31397d
-  data.tar.gz: 110ab2e5db1b66ef7c76e11ed7b82c925f767df0b09a1ca442afe95c31e30a40
+  metadata.gz: 62a2601a6b425e15a8f3b616fcb182d48548179d95bc5b9bbdef74f5cdabcc1d
+  data.tar.gz: 64a4fd724bcee0e6fe8cac4001439c6a9e0b4b9e5234cd5b654b3df4045171cf
 SHA512:
-  metadata.gz: e85d2333cae7f6e843b25b3a1e64cf8bd1e68e47bcb530b6483dfa05e2672374791209c65f888cc6c9354f0e5bca997098f776c5db10f4dfd155005dd3f22bc9
-  data.tar.gz: 330d5d152a42dd76fae018e4af8c4daa64f9be50391468f64bd338ddd91ecd668a6775f590e45452b552c6fe28f0e82e125a3a91476d5269e52d19ad0adee092
+  metadata.gz: d74d19d5e29895763d423dd0daf67bedfeeb3e05ebff891f87eda90d4482648f5cd8bb3b163c000f2fe70473ece32f9f4febf36530bcfd16ab2d9d699cf1d49d
+  data.tar.gz: 18d287d1d24792d9e63d7d2f2db3bee74fd929d7e4fd5ea1dccb50447a7278de7cfe8f088184064298809e70ef82b3d719dc27c5d6dc07edbc01b5747dff7246

data/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,16 @@ section below.
 ## Unreleased changes
+## Version 3.9.6
+- [#388](https://github.com/Shopify/statsd-instrument/pull/388) - Properly fixing the bug when using aggregation and sending sampled
+histograms, now the client will respect the sampling rate when sending the metrics and pass it down to the aggregator.
+## Version 3.9.5
+- [#387](https://github.com/Shopify/statsd-instrument/pull/387) - Fixing bug when using aggregation and sending sampled
+histogram metrics, they will not be scaled properly because of missing sampling rate in the final sent sample.
 ## Version 3.9.4
 - [#384](https://github.com/Shopify/statsd-instrument/pull/384) - Aggregation: fixing bug when sending metrics synchronously

data/lib/statsd/instrument/aggregator.rb CHANGED Viewed

@@ -3,13 +3,14 @@
 module StatsD
   module Instrument
     class AggregationKey
-      attr_reader :name, :tags, :no_prefix, :type, :hash
+      attr_reader :name, :tags, :no_prefix, :type, :hash, :sample_rate
-      def initialize(name, tags, no_prefix, type)
+      def initialize(name, tags, no_prefix, type, sample_rate: 1.0)
         @name = name
         @tags = tags
         @no_prefix = no_prefix
         @type = type
+        @sample_rate = sample_rate
         @hash = [@name, @tags, @no_prefix, @type].hash
       end
@@ -56,7 +57,7 @@ module StatsD
                   key.name,
                   key.type.to_s,
                   agg_value,
-                  CONST_SAMPLE_RATE,
+                  key.sample_rate,
                   key.tags,
                 )
               when GAUGE
@@ -134,16 +135,16 @@ module StatsD
         end
       end
-      def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION)
+      def aggregate_timing(name, value, tags: [], no_prefix: false, type: DISTRIBUTION, sample_rate: CONST_SAMPLE_RATE)
         unless thread_healthcheck
           @sink << datagram_builder(no_prefix: no_prefix).timing_value_packed(
-            name, type.to_s, [value], CONST_SAMPLE_RATE, tags
+            name, type.to_s, [value], sample_rate, tags
           )
           return
         end
         tags = tags_sorted(tags)
-        key = packet_key(name, tags, no_prefix, type)
+        key = packet_key(name, tags, no_prefix, type, sample_rate: sample_rate)
         @mutex.synchronize do
           values = @aggregation_state[key] ||= []
@@ -176,6 +177,9 @@ module StatsD
       EMPTY_ARRAY = [].freeze
+      # Flushes the aggregated metrics to the sink.
+      # Iterates over the aggregation state and sends each metric to the sink.
+      # If you change this function, you need to update the logic in the finalizer as well.
       def do_flush
         @aggregation_state.each do |key, value|
           case key.type
@@ -191,7 +195,7 @@ module StatsD
               key.name,
               key.type.to_s,
               value,
-              CONST_SAMPLE_RATE,
+              key.sample_rate,
               key.tags,
             )
           when GAUGE
@@ -219,8 +223,14 @@ module StatsD
         datagram_builder(no_prefix: false).normalize_tags(tags)
       end
-      def packet_key(name, tags = "".b, no_prefix = false, type = COUNT)
-        AggregationKey.new(DatagramBuilder.normalize_string(name), tags, no_prefix, type).freeze
+      def packet_key(name, tags = "".b, no_prefix = false, type = COUNT, sample_rate: CONST_SAMPLE_RATE)
+        AggregationKey.new(
+          DatagramBuilder.normalize_string(name),
+          tags,
+          no_prefix,
+          type,
+          sample_rate: sample_rate,
+        ).freeze
       end
       def datagram_builder(no_prefix:)

data/lib/statsd/instrument/client.rb CHANGED Viewed

@@ -318,25 +318,27 @@ module StatsD
       # @param tags (see #increment)
       # @return [void]
       def distribution(name, value = nil, sample_rate: nil, tags: nil, no_prefix: false, &block)
+        if block_given?
+          return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
+        end
+        # For all timing metrics, we have to use the sampling logic.
+        # Not doing so would impact performance and CPU usage.
+        # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
         sample_rate ||= @default_sample_rate
         if sample_rate && !sample?(sample_rate)
-          # For all timing metrics, we have to use the sampling logic.
-          # Not doing so would impact performance and CPU usage.
-          # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
-          if block_given?
-            return yield
-          end
           return StatsD::Instrument::VOID
         end
-        if block_given?
-          return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
-        end
         if @enable_aggregation
-          @aggregator.aggregate_timing(name, value, tags: tags, no_prefix: no_prefix, type: :d)
+          @aggregator.aggregate_timing(
+            name,
+            value,
+            tags: tags,
+            no_prefix: no_prefix,
+            type: :d,
+            sample_rate: sample_rate,
+          )
           return StatsD::Instrument::VOID
         end
@@ -392,13 +394,26 @@ module StatsD
         ensure
           stop = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond)
-          metric_type ||= datagram_builder(no_prefix: no_prefix).latency_metric_type
-          latency_in_ms = stop - start
-          if @enable_aggregation
-            @aggregator.aggregate_timing(name, latency_in_ms, tags: tags, no_prefix: no_prefix, type: metric_type)
-          else
-            sample_rate ||= @default_sample_rate
-            if sample_rate.nil? || sample?(sample_rate)
+          # For all timing metrics, we have to use the sampling logic.
+          # Not doing so would impact performance and CPU usage.
+          # See Datadog's documentation for more details:
+          # https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
+          sample_rate ||= @default_sample_rate
+          if sample_rate.nil? || sample?(sample_rate)
+            metric_type ||= datagram_builder(no_prefix: no_prefix).latency_metric_type
+            latency_in_ms = stop - start
+            if @enable_aggregation
+              @aggregator.aggregate_timing(
+                name,
+                latency_in_ms,
+                tags: tags,
+                no_prefix: no_prefix,
+                type: metric_type,
+                sample_rate: sample_rate,
+              )
+            else
               emit(datagram_builder(no_prefix: no_prefix).send(metric_type, name, latency_in_ms, sample_rate, tags))
             end
           end

data/lib/statsd/instrument/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module StatsD
   module Instrument
-    VERSION = "3.9.4"
+    VERSION = "3.9.6"
   end
 end

data/test/aggregator_test.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 # frozen_string_literal: true
 require "test_helper"
-require "ostruct"
 class AggregatorTest < Minitest::Test
   class CaptureLogger
@@ -58,6 +57,24 @@ class AggregatorTest < Minitest::Test
     assert_equal([1.0, 100.0], datagram.value)
   end
+  def test_timing_sampling_scaling
+    @subject.aggregate_timing("timing.sampled", 60.0, sample_rate: 0.01)
+    @subject.aggregate_timing("timing.sampled", 80.0, sample_rate: 0.01)
+    @subject.aggregate_timing("timing.unsampled", 60.0, sample_rate: 1.0)
+    @subject.flush
+    assert_equal(2, @sink.datagrams.size)
+    sampled_datagram = @sink.datagrams.find { |d| d.name == "timing.sampled" }
+    assert_equal([60.0, 80.0], sampled_datagram.value)
+    assert_equal(0.01, sampled_datagram.sample_rate)
+    assert_equal("timing.sampled:60.0:80.0|d|@0.01", sampled_datagram.source)
+    unsampled_datagram = @sink.datagrams.find { |d| d.name == "timing.unsampled" }
+    assert_equal(60.0, unsampled_datagram.value)
+  end
   def test_mixed_type_timings
     @subject.aggregate_timing("foo_ms", 1, tags: { foo: "bar" }, type: :ms)
     @subject.aggregate_timing("foo_ms", 100, tags: { foo: "bar" }, type: :ms)
@@ -191,7 +208,7 @@ class AggregatorTest < Minitest::Test
     # Additional metrics should also go through synchronously
     @subject.increment("foo", 1, tags: { foo: "bar" })
-    @subject.aggregate_timing("bar", 200, tags: { foo: "bar" })
+    @subject.aggregate_timing("bar", 200, tags: { foo: "bar" }, sample_rate: 0.5)
     # Verify new metrics were also sent immediately
     assert_equal(5, @sink.datagrams.size)
@@ -203,6 +220,7 @@ class AggregatorTest < Minitest::Test
     timing_datagram = @sink.datagrams.select { |d| d.name == "bar" }.last
     assert_equal([200.0], [timing_datagram.value])
     assert_equal(["foo:bar"], timing_datagram.tags)
+    assert_equal(0.5, timing_datagram.sample_rate)
     # undo the stubbing
     @subject.unstub(:thread_healthcheck)
@@ -304,6 +322,7 @@ class AggregatorTest < Minitest::Test
     @subject.increment("foo", 1, tags: { foo: "bar" })
     @subject.aggregate_timing("bar", 100, tags: { foo: "bar" })
     @subject.gauge("baz", 100, tags: { foo: "bar" })
+    @subject.aggregate_timing("sampled_timing", 100, tags: { foo: "bar" }, sample_rate: 0.01)
     # Manually trigger the finalizer
     finalizer = StatsD::Instrument::Aggregator.finalize(
@@ -316,7 +335,7 @@ class AggregatorTest < Minitest::Test
     finalizer.call
     # Verify that all pending metrics are sent
-    assert_equal(3, @sink.datagrams.size)
+    assert_equal(4, @sink.datagrams.size)
     counter_datagram = @sink.datagrams.find { |d| d.name == "foo" }
     assert_equal(1, counter_datagram.value)
@@ -329,5 +348,9 @@ class AggregatorTest < Minitest::Test
     gauge_datagram = @sink.datagrams.find { |d| d.name == "baz" }
     assert_equal(100, gauge_datagram.value)
     assert_equal(["foo:bar"], gauge_datagram.tags)
+    sampled_timing_datagram = @sink.datagrams.find { |d| d.name == "sampled_timing" }
+    assert_equal(100.0, sampled_timing_datagram.value)
+    assert_equal(0.01, sampled_timing_datagram.sample_rate)
   end
 end

data/test/client_test.rb CHANGED Viewed

@@ -90,7 +90,7 @@ class ClientTest < Minitest::Test
     client.measure("block_duration_example") { 1 + 1 }
     client.force_flush
-    datagram = client.sink.datagrams.first
+    datagram = client.sink.datagrams.find { |d| d.name == "bar.foo" }
     assert_equal("bar.foo", datagram.name)
     assert_equal(2, datagram.value)
@@ -249,12 +249,17 @@ class ClientTest < Minitest::Test
     mock_sink = mock("sink")
     mock_sink.stubs(:sample?).returns(false, true, false, false, true)
     # Since we are aggregating, we only expect a single datagram.
-    mock_sink.expects(:<<).with("metric:60:60|d").once
+    mock_sink.expects(:<<).with("metric:60:60|d|@0.5").once
     mock_sink.expects(:flush).once
     client = StatsD::Instrument::Client.new(sink: mock_sink, default_sample_rate: 0.5, enable_aggregation: true)
     5.times { client.distribution("metric", 60) }
     client.force_flush
+    # undo mock
+    mock_sink.unstub(:sample?)
+    mock_sink.unstub(:<<)
+    mock_sink.unstub(:flush)
   end
   def test_clone_with_prefix_option

data/test/integration_test.rb CHANGED Viewed

@@ -77,4 +77,30 @@ class IntegrationTest < Minitest::Test
     assert_equal("counter:20|c", @server.recvfrom(100).first)
     assert_operator(Time.now - before_flush, :<, 0.3, "Flush and ingest should have happened within 0.4s")
   end
+  def test_live_local_udp_socket_with_aggregation_sampled_scenario
+    client = StatsD::Instrument::Environment.new(
+      "STATSD_ADDR" => "#{@server.addr[2]}:#{@server.addr[1]}",
+      "STATSD_IMPLEMENTATION" => "dogstatsd",
+      "STATSD_ENV" => "production",
+      "STATSD_ENABLE_AGGREGATION" => "true",
+      "STATSD_AGGREGATION_INTERVAL" => "0.1",
+    ).client
+    100.times do
+      client.increment("counter", 2)
+      client.distribution("test_distribution", 3, sample_rate: 0.1)
+    end
+    sleep(0.2)
+    packets = []
+    while IO.select([@server], nil, nil, 0.1)
+      packets << @server.recvfrom(300).first
+    end
+    packets = packets.map { |packet| packet.split("\n") }.flatten
+    assert_match(/counter:\d+|c/, packets.find { |packet| packet.start_with?("counter:") })
+    assert_match(/test_distribution:\d+:3|d/, packets.find { |packet| packet.start_with?("test_distribution:") })
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: statsd-instrument
 version: !ruby/object:Gem::Version
-  version: 3.9.4
+  version: 3.9.6
 platform: ruby
 authors:
 - Jesse Storimer
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-29 00:00:00.000000000 Z
+date: 2024-10-31 00:00:00.000000000 Z
 dependencies: []
 description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
   StatsD instrumentation into your code.