RubyGems - statsd-instrument - Versions diffs - 3.9.0 → 3.9.1 - Mend

statsd-instrument 3.9.0 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/.ruby-version +1 -1
data/CHANGELOG.md +7 -1
data/lib/statsd/instrument/aggregator.rb +11 -1
data/lib/statsd/instrument/client.rb +41 -13
data/lib/statsd/instrument/environment.rb +7 -0
data/lib/statsd/instrument/version.rb +1 -1
data/test/client_test.rb +12 -0
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9776ac7b9f3dc12364a1ba286d7b77330c52b0aa8560c29c0ff47c8033c066df
-  data.tar.gz: ecfc23f01f345eabc82d1c696c2fb280cc3185f2d30b47e8e5c75326077f10bd
+  metadata.gz: 436100176f41a4557dfad3169208a34581a4a51ed03532857ce0192ef0586f95
+  data.tar.gz: 888b9d6dcab8741aede0db4044f9a4d5ce0eb146ce016037b5ee278296f1f2a1
 SHA512:
-  metadata.gz: e831388ae5824bd7d1e53b9eefc399dd639210648976adeafd48fabd9d4a37705af2d5814868e75922cc27840cba42d5066054751a8120e4ea8d055efdaea379
-  data.tar.gz: 91f63290ef5d9dcc032e0fcdad8e34acdc5448d563e1eedd71b23a68330060ba58af3999ace24b4a82567f2e5c96182996c3f8dda940684a1df93fe9acee02ef
+  metadata.gz: 181b505253e000ae9f4b457716e4715227dbdc81d85357bedc335296af681d2975e4da0062abd4028b87d37462cd3ae5280f1033886a1b96a9bf566c32cf7930
+  data.tar.gz: cbc3e3cb9ed41abfda94075eeb4e25e29445f42447af826616cc3545419be2e4ceeab273183bbb56af3e166066ef5bb02ed19ac09b4467782c0184e5251e454f

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.3.1
1	+ 3.3.3

data/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,12 @@ section below.
 ## Unreleased changes
+## Version 3.9.1
+- [#378](https://github.com/Shopify/statsd-instrument/pull/378) - Respect sampling rate when aggregation is enabled, just for timing metrics.
+  Not respecting sampling rate, incurs in a performance penalty, as we will send more metrics than expected.
+  Moreover, it overloads the StatsD server, which has to send out and process more metrics than expected.
 ## Version 3.9.0
 - Introduced an experimental aggregation feature to improve the efficiency of metrics reporting by aggregating
@@ -15,7 +21,7 @@ decrease the overhead associated with high-frequency metric reporting. To enable
 - Added support for sending StatsD via Unix domain sockets. This feature is enabled by
 setting the `STATSD_SOCKET` environment variable to the path of the Unix domain socket.
   - :warning: **Possible breaking change**: We removed/renamed some classes and now Sinks are generic, so the classes `UDPSink` and `UDPBatchedSink` are now called
-`StatsD::Instrument::Sink` and `StatsD::Instrument::Sink` respectively.
+`StatsD::Instrument::Sink` and `StatsD::Instrument::BatchedSink` respectively.
 If you used those internal classes, you will need to update your code to use the new classes.
 ## Version 3.8.0

data/lib/statsd/instrument/aggregator.rb CHANGED Viewed

@@ -24,12 +24,15 @@ module StatsD
     end
     class Aggregator
+      DEFAULT_MAX_CONTEXT_SIZE = 250
       CONST_SAMPLE_RATE = 1.0
       COUNT = :c
       DISTRIBUTION = :d
       MEASURE = :ms
       HISTOGRAM = :h
       GAUGE = :g
+      private_constant :COUNT, :DISTRIBUTION, :MEASURE, :HISTOGRAM, :GAUGE, :CONST_SAMPLE_RATE
       class << self
         def finalize(aggregation_state, sink, datagram_builders, datagram_builder_class, default_tags)
@@ -78,7 +81,14 @@ module StatsD
       # @param default_tags [Array<String>] The tags to add to all metrics.
       # @param flush_interval [Float] The interval at which to flush the aggregated metrics.
       # @param max_values [Integer] The maximum number of values to aggregate before flushing.
-      def initialize(sink, datagram_builder_class, prefix, default_tags, flush_interval: 5.0, max_values: 100)
+      def initialize(
+        sink,
+        datagram_builder_class,
+        prefix,
+        default_tags,
+        flush_interval: 5.0,
+        max_values: DEFAULT_MAX_CONTEXT_SIZE
+      )
         @sink = sink
         @datagram_builder_class = datagram_builder_class
         @metric_prefix = prefix

data/lib/statsd/instrument/client.rb CHANGED Viewed

@@ -156,7 +156,8 @@ module StatsD
         sink: StatsD::Instrument::NullSink.new,
         datagram_builder_class: self.class.datagram_builder_class_for_implementation(implementation),
         enable_aggregation: false,
-        aggregation_flush_interval: 2.0
+        aggregation_flush_interval: 2.0,
+        aggregation_max_context_size: StatsD::Instrument::Aggregator::DEFAULT_MAX_CONTEXT_SIZE
       )
         @sink = sink
         @datagram_builder_class = datagram_builder_class
@@ -176,6 +177,7 @@ module StatsD
               prefix,
               default_tags,
               flush_interval: @aggregation_flush_interval,
+              max_values: aggregation_max_context_size,
             )
         end
       end
@@ -237,6 +239,19 @@ module StatsD
       # @param tags (see #increment)
       # @return [void]
       def measure(name, value = nil, sample_rate: nil, tags: nil, no_prefix: false, &block)
+        sample_rate ||= @default_sample_rate
+        if sample_rate && !sample?(sample_rate)
+          # For all timing metrics, we have to use the sampling logic.
+          # Not doing so would impact performance and CPU usage.
+          # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
+          if block_given?
+            return yield
+          end
+          return StatsD::Instrument::VOID
+        end
         if block_given?
           return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :ms, no_prefix: no_prefix, &block)
         end
@@ -245,10 +260,7 @@ module StatsD
           @aggregator.aggregate_timing(name, value, tags: tags, no_prefix: no_prefix, type: :ms)
           return StatsD::Instrument::VOID
         end
-        sample_rate ||= @default_sample_rate
-        if sample_rate.nil? || sample?(sample_rate)
-          emit(datagram_builder(no_prefix: no_prefix).ms(name, value, sample_rate, tags))
-        end
+        emit(datagram_builder(no_prefix: no_prefix).ms(name, value, sample_rate, tags))
         StatsD::Instrument::VOID
       end
@@ -306,6 +318,19 @@ module StatsD
       # @param tags (see #increment)
       # @return [void]
       def distribution(name, value = nil, sample_rate: nil, tags: nil, no_prefix: false, &block)
+        sample_rate ||= @default_sample_rate
+        if sample_rate && !sample?(sample_rate)
+          # For all timing metrics, we have to use the sampling logic.
+          # Not doing so would impact performance and CPU usage.
+          # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
+          if block_given?
+            return yield
+          end
+          return StatsD::Instrument::VOID
+        end
         if block_given?
           return latency(name, sample_rate: sample_rate, tags: tags, metric_type: :d, no_prefix: no_prefix, &block)
         end
@@ -315,10 +340,7 @@ module StatsD
           return StatsD::Instrument::VOID
         end
-        sample_rate ||= @default_sample_rate
-        if sample_rate.nil? || sample?(sample_rate)
-          emit(datagram_builder(no_prefix: no_prefix).d(name, value, sample_rate, tags))
-        end
+        emit(datagram_builder(no_prefix: no_prefix).d(name, value, sample_rate, tags))
         StatsD::Instrument::VOID
       end
@@ -334,14 +356,20 @@ module StatsD
       # @param tags (see #increment)
       # @return [void]
       def histogram(name, value, sample_rate: nil, tags: nil, no_prefix: false)
+        sample_rate ||= @default_sample_rate
+        if sample_rate && !sample?(sample_rate)
+          # For all timing metrics, we have to use the sampling logic.
+          # Not doing so would impact performance and CPU usage.
+          # See Datadog's documentation for more details: https://github.com/DataDog/datadog-go/blob/20af2dbfabbbe6bd0347780cd57ed931f903f223/statsd/aggregator.go#L281-L283
+          return StatsD::Instrument::VOID
+        end
         if @enable_aggregation
           @aggregator.aggregate_timing(name, value, tags: tags, no_prefix: no_prefix, type: :h)
+          return StatsD::Instrument::VOID
         end
-        sample_rate ||= @default_sample_rate
-        if sample_rate.nil? || sample?(sample_rate)
-          emit(datagram_builder(no_prefix: no_prefix).h(name, value, sample_rate, tags))
-        end
+        emit(datagram_builder(no_prefix: no_prefix).h(name, value, sample_rate, tags))
         StatsD::Instrument::VOID
       end

data/lib/statsd/instrument/environment.rb CHANGED Viewed

@@ -125,6 +125,13 @@ module StatsD
         Float(env.fetch("STATSD_AGGREGATION_INTERVAL", 2.0))
       end
+      def aggregation_max_context_size
+        Integer(env.fetch(
+          "STATSD_AGGREGATION_MAX_CONTEXT_SIZE",
+          StatsD::Instrument::Aggregator::DEFAULT_MAX_CONTEXT_SIZE,
+        ))
+      end
       def client
         StatsD::Instrument::Client.from_env(self)
       end

data/lib/statsd/instrument/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module StatsD
   module Instrument
-    VERSION = "3.9.0"
+    VERSION = "3.9.1"
   end
 end

data/test/client_test.rb CHANGED Viewed

@@ -245,6 +245,18 @@ class ClientTest < Minitest::Test
     5.times { client.increment("metric") }
   end
+  def test_sampling_with_aggregation
+    mock_sink = mock("sink")
+    mock_sink.stubs(:sample?).returns(false, true, false, false, true)
+    # Since we are aggregating, we only expect a single datagram.
+    mock_sink.expects(:<<).with("metric:60:60|d").once
+    mock_sink.expects(:flush).once
+    client = StatsD::Instrument::Client.new(sink: mock_sink, default_sample_rate: 0.5, enable_aggregation: true)
+    5.times { client.distribution("metric", 60) }
+    client.force_flush
+  end
   def test_clone_with_prefix_option
     # Both clients will use the same sink.
     mock_sink = mock("sink")

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: statsd-instrument
 version: !ruby/object:Gem::Version
-  version: 3.9.0
+  version: 3.9.1
 platform: ruby
 authors:
 - Jesse Storimer
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-08-19 00:00:00.000000000 Z
+date: 2024-09-16 00:00:00.000000000 Z
 dependencies: []
 description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
   StatsD instrumentation into your code.
@@ -130,7 +130,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.17
+rubygems_version: 3.5.18
 signing_key:
 specification_version: 4
 summary: A StatsD client for Ruby apps