RubyGems - statsd-instrument - Versions diffs - 3.8.0 → 3.9.0 - Mend

statsd-instrument 3.8.0 → 3.9.0

Files changed (33) hide show

checksums.yaml +4 -4
data/.github/pull_request_template.md +14 -0
data/.github/workflows/benchmark.yml +2 -2
data/.github/workflows/lint.yml +1 -1
data/.github/workflows/tests.yml +2 -2
data/.ruby-version +1 -1
data/CHANGELOG.md +12 -0
data/Gemfile +7 -0
data/README.md +46 -0
data/Rakefile +11 -0
data/benchmark/local-udp-throughput +178 -13
data/benchmark/send-metrics-to-local-udp-receiver +6 -4
data/lib/statsd/instrument/aggregator.rb +259 -0
data/lib/statsd/instrument/{batched_udp_sink.rb → batched_sink.rb} +40 -24
data/lib/statsd/instrument/client.rb +65 -7
data/lib/statsd/instrument/datagram.rb +6 -2
data/lib/statsd/instrument/datagram_builder.rb +21 -0
data/lib/statsd/instrument/environment.rb +35 -7
data/lib/statsd/instrument/{udp_sink.rb → sink.rb} +34 -25
data/lib/statsd/instrument/udp_connection.rb +39 -0
data/lib/statsd/instrument/uds_connection.rb +52 -0
data/lib/statsd/instrument/version.rb +1 -1
data/lib/statsd/instrument.rb +9 -3
data/test/aggregator_test.rb +142 -0
data/test/client_test.rb +36 -1
data/test/datagram_builder_test.rb +5 -0
data/test/dispatcher_stats_test.rb +3 -3
data/test/environment_test.rb +4 -4
data/test/integration_test.rb +51 -0
data/test/test_helper.rb +6 -1
data/test/udp_sink_test.rb +7 -6
data/test/uds_sink_test.rb +187 -0
metadata +16 -8

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7ce05bd8d34026227e2960ccabca96119580dc2d5737fff81a2bdfd8ea18f826
-  data.tar.gz: d619b08700bb735922673013d7e6314f32d76e8744dcfa07966a35595aa27cf4
+  metadata.gz: 9776ac7b9f3dc12364a1ba286d7b77330c52b0aa8560c29c0ff47c8033c066df
+  data.tar.gz: ecfc23f01f345eabc82d1c696c2fb280cc3185f2d30b47e8e5c75326077f10bd
 SHA512:
-  metadata.gz: 42317b00c680ffc079e89bad712225b8a9656faedf1f8743d9031c26070ae05af0c2ef8a10f1200d94a708023e6cd667d91fbf64a972fecb7d97d327edc31f22
-  data.tar.gz: 540c9a8bccc54633f40e9b2830748e764d6bf5034791bcdacb611be6a7fc615e66a9fa138e2ce58f4a6461f46bb40114fb7507f3f4357d86fccc5d9d6614d244
+  metadata.gz: e831388ae5824bd7d1e53b9eefc399dd639210648976adeafd48fabd9d4a37705af2d5814868e75922cc27840cba42d5066054751a8120e4ea8d055efdaea379
+  data.tar.gz: 91f63290ef5d9dcc032e0fcdad8e34acdc5448d563e1eedd71b23a68330060ba58af3999ace24b4a82567f2e5c96182996c3f8dda940684a1df93fe9acee02ef

data/.github/pull_request_template.md ADDED Viewed

@@ -0,0 +1,14 @@
+## ✅ What
+<!-- A brief description of the changes in this PR. -->
+## 🤔 Why
+<!-- A brief description of the reason for these changes. -->
+## 👩🔬 How to validate
+<!-- Step-by-step instructions for how reviewers can verify these changes work as expected. -->
+## Checklist
+- [ ] I documented the changes in the CHANGELOG file.
+<!-- If this is a user-facing change, you must update the CHANGELOG file. OR -->
+<!-- - [ ] This change is not user-facing and does not require a CHANGELOG update. -->

data/.github/workflows/benchmark.yml CHANGED Viewed

@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Ruby
       uses: ruby/setup-ruby@v1
@@ -21,7 +21,7 @@ jobs:
     - name: Run throughput benchmark on branch
       run: benchmark/local-udp-throughput
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
       with:
         ref: 'main'

data/.github/workflows/lint.yml CHANGED Viewed

@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Ruby
       uses: ruby/setup-ruby@v1

data/.github/workflows/tests.yml CHANGED Viewed

@@ -9,14 +9,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        ruby: ['2.6', '2.7', '3.0', '3.1', '3.2', '3.3', 'ruby-head', 'jruby-9.3.7.0', 'truffleruby-22.2.0']
+        ruby: ['2.6', '2.7', '3.0', '3.1', '3.2', '3.3', 'ruby-head', 'jruby-9.4.8.0', 'truffleruby-22.3.1']
         # Windows on macOS builds started failing, so they are disabled for now
         # platform: [windows-2019, macOS-10.14, ubuntu-18.04]
         # exclude:
         # ...
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Ruby
       uses: ruby/setup-ruby@v1

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.3.0
1	+ 3.3.1

data/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,18 @@ section below.
 ## Unreleased changes
+## Version 3.9.0
+- Introduced an experimental aggregation feature to improve the efficiency of metrics reporting by aggregating
+multiple metric events into a single sample. This reduces the number of network requests and can significantly
+decrease the overhead associated with high-frequency metric reporting. To enable metric aggregation, set the
+`STATSD_ENABLE_AGGREGATION` environment variable to true. More information on this feature is available in the README.
+- Added support for sending StatsD via Unix domain sockets. This feature is enabled by
+setting the `STATSD_SOCKET` environment variable to the path of the Unix domain socket.
+  - :warning: **Possible breaking change**: We removed/renamed some classes and now Sinks are generic, so the classes `UDPSink` and `UDPBatchedSink` are now called
+`StatsD::Instrument::Sink` and `StatsD::Instrument::Sink` respectively.
+If you used those internal classes, you will need to update your code to use the new classes.
 ## Version 3.8.0
 - UDP batching will now track statistics about its own batching performance, and

data/Gemfile CHANGED Viewed

@@ -11,3 +11,10 @@ gem "yard"
 gem "rubocop", ">= 1.0"
 gem "rubocop-shopify", require: false
 gem "benchmark-ips"
+gem "dogstatsd-ruby", "~> 5.0", require: false
+platform :mri do
+  # only if Ruby is MRI && >= 3.2
+  if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.2")
+    gem "vernier", require: false
+  end
+end

data/README.md CHANGED Viewed

@@ -65,6 +65,52 @@ The following environment variables are supported:
   - `statsd_instrument.batched_udp_sink.avg_batch_length`: The average number of statsd lines per batch.
+### Experimental aggregation feature
+The aggregation feature is currently experimental and aims to improve the efficiency of metrics reporting by aggregating
+multiple metric events into a single sample. This reduces the number of network requests and can significantly decrease the overhead
+associated with high-frequency metric reporting.
+This means that instead of sending each metric event individually, the library will aggregate multiple events into a single sample and send it to the StatsD server.
+Example:
+Instead of sending counters in multiple packets like this:
+```
+my.counter:1|c
+my.counter:1|c
+my.counter:1|c
+```
+The library will aggregate them into a single packet like this:
+```
+my.counter:3|c
+```
+and for histograms/distributions:
+```
+my.histogram:1|h
+my.histogram:2|h
+my.histogram:3|h
+```
+The library will aggregate them into a single packet like this:
+```
+my.histogram:1:2:3|h
+```
+#### Enabling Aggregation
+To enable metric aggregation, set the following environment variables:
+- `STATSD_ENABLE_AGGREGATION`: Set this to `true` to enable the experimental aggregation feature. Aggregation is disabled by default.
+- `STATSD_AGGREGATION_INTERVAL`: Specifies the interval (in seconds) at which aggregated metrics are flushed and sent to the StatsD server.
+For example, setting this to `2` will aggregate and send metrics every 2 seconds. Two seconds is also the default value if this environment variable is not set.
+Please note that since aggregation is an experimental feature, it should be used with caution in production environments.
+> [!WARNING]
+> This feature is only compatible with Datadog Agent's version >=6.25.0 && <7.0.0 or Agent's versions >=7.25.0.
 ## StatsD keys
 StatsD keys look like 'admin.logins.api.success'. Dots are used as namespace separators.

data/Rakefile CHANGED Viewed

@@ -2,6 +2,7 @@
 require "bundler/gem_tasks"
 require "rake/testtask"
+require "rubocop/rake_task"
 Rake::TestTask.new("test") do |t|
   t.ruby_opts << "-r rubygems"
@@ -9,4 +10,14 @@ Rake::TestTask.new("test") do |t|
   t.test_files = FileList["test/**/*_test.rb"]
 end
+RuboCop::RakeTask.new(:lint) do |task|
+  task.options = ["-D", "-S", "-E"]
+end
+RuboCop::RakeTask.new(:lint_fix) do |task|
+  task.options = ["-a"]
+end
+task lf: :lint_fix
 task(default: :test)

data/benchmark/local-udp-throughput CHANGED Viewed

@@ -6,28 +6,113 @@ require "benchmark/ips"
 require "tmpdir"
 require "socket"
 require "statsd-instrument"
+require "datadog/statsd"
+require "forwardable"
+require "vernier"
+class DatadogShim
+  extend Forwardable
+  def_delegator :@client, :close
+  # This is a shim to make the Datadog client compatible with the StatsD client
+  # interface. It's not a complete implementation, but it's enough to run the
+  # benchmarks.
+  # @param [Datadog::Statsd] client
+  def initialize(client)
+    @client = client
+  end
+  class NullSink
+    def flush(blocking: false)
+    end
+  end
+  def sink
+    @sink ||= NullSink.new
+  end
+  def increment(stat, value = 1, tags: nil)
+    @client.increment(stat, value: value, tags: tags)
+  end
+  def measure(stat, value = nil, tags: nil, &block)
+    @client.time(stat, value: value, tags: tags, &block)
+  end
+  def histogram(stat, value = nil, tags: nil, &block)
+    @client.histogram(stat, value: value, tags: tags, &block)
+  end
+  def gauge(stat, value, tags: nil)
+    @client.gauge(stat, value: value, tags: tags)
+  end
+  def set(stat, value, tags: nil)
+    @client.set(stat, value: value, tags: tags)
+  end
+  def event(title, text, tags: nil)
+    @client.event(title, text, tags: tags)
+  end
+  def service_check(name, status, tags: nil)
+    @client.service_check(name, status, tags: tags)
+  end
+end
 def send_metrics(client)
   client.increment("StatsD.increment", 10)
   client.measure("StatsD.measure") { 1 + 1 }
   client.gauge("StatsD.gauge", 12.0, tags: ["foo:bar", "quc"])
-  client.set("StatsD.set", "value", tags: { foo: "bar", baz: "quc" })
-  client.event("StasD.event", "12345")
-  client.service_check("StatsD.service_check", "ok")
 end
+def send_metrics_high_cardinality(client)
+  SERIES_COUNT.times do |i|
+    tags = ["series:#{i}", "foo:bar", "baz:quc"]
+    client.increment("StatsD.increment", 10, tags: tags)
+    client.measure("StatsD.measure", tags: tags) { 1 + 1 }
+    client.gauge("StatsD.gauge", 12.0, tags: tags)
+  end
+end
+SOCKET_PATH = File.join(Dir.pwd, "tmp/metric.sock")
 THREAD_COUNT = Integer(ENV.fetch("THREAD_COUNT", 5))
-EVENTS_PER_ITERATION = 6
-ITERATIONS = 50_000
-def benchmark_implementation(name, env = {})
+EVENTS_PER_ITERATION = 3
+ITERATIONS = Integer(ENV.fetch("ITERATIONS", 10_000))
+SERIES_COUNT = Integer(ENV.fetch("SERIES_COUNT", 0))
+ENABLE_PROFILING = ENV.key?("ENABLE_PROFILING")
+UDS_MAX_SEND_SIZE = 32_768
+LOG_DIR = File.join(Dir.tmpdir, "statsd-instrument-benchmarks")
+FileUtils.mkdir_p(LOG_DIR)
+puts "Logs are stored in #{LOG_DIR}"
+def benchmark_implementation(name, env = {}, datadog_client = false)
   intermediate_results_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/"
-  log_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}-#{name}.log"
+  log_filename = File.join(LOG_DIR, "#{File.basename($PROGRAM_NAME)}-#{name}.log".tr(" ", "_"))
   FileUtils.mkdir_p(File.dirname(intermediate_results_filename))
+  FileUtils.mkdir_p(File.dirname(log_filename))
   # Set up an UDP listener to which we can send StatsD packets
   receiver = UDPSocket.new
   receiver.bind("localhost", 0)
+  FileUtils.mkdir_p(File.dirname(SOCKET_PATH))
+  FileUtils.rm_f(SOCKET_PATH)
+  receiver_uds = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM)
+  receiver_uds.setsockopt(Socket::SOL_SOCKET, Socket::SO_REUSEADDR, true)
+  receiver_uds.setsockopt(Socket::SOL_SOCKET, Socket::SO_RCVBUF, UDS_MAX_SEND_SIZE * THREAD_COUNT)
+  receiver_uds.bind(Socket.pack_sockaddr_un(SOCKET_PATH))
+  # with UDS we have to take data out of the socket, otherwise it will fill up
+  # and we will block writing to it (which is what we are testing)
+  consume = Thread.new do
+    loop do
+      receiver_uds.recv(32768)
+    rescue
+      # Ignored
+    end
+  end
   log_file = File.open(log_filename, "w+", level: Logger::WARN)
   StatsD.logger = Logger.new(log_file)
@@ -37,23 +122,103 @@ def benchmark_implementation(name, env = {})
     "STATSD_ENV" => "production",
   ).merge(env)).client
-  puts "===== #{name} throughtput (#{THREAD_COUNT} threads) ====="
+  if datadog_client
+    statsd = Datadog::Statsd.new(receiver.addr[2], receiver.addr[1], **env)
+    udp_client = DatadogShim.new(statsd)
+  end
+  series = SERIES_COUNT.zero? ? 1 : SERIES_COUNT
+  events_sent = THREAD_COUNT * EVENTS_PER_ITERATION * ITERATIONS * series
+  puts "===== #{name} throughput (#{THREAD_COUNT} threads) - total events: #{events_sent} ====="
+  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
   threads = THREAD_COUNT.times.map do
     Thread.new do
       count = ITERATIONS
       while (count -= 1) > 0
-        send_metrics(udp_client)
+        if SERIES_COUNT.zero?
+          send_metrics(udp_client)
+        else
+          send_metrics_high_cardinality(udp_client)
+        end
       end
     end
   end
-  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
   threads.each(&:join)
+  udp_client.shutdown if udp_client.respond_to?(:shutdown)
+  if datadog_client
+    udp_client.close
+  end
   duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
-  events_sent = THREAD_COUNT * EVENTS_PER_ITERATION * ITERATIONS
-  puts "events: #{(events_sent / duration).round(1)}/s"
+  consume.kill
   receiver.close
-  udp_client.shutdown if udp_client.respond_to?(:shutdown)
+  receiver_uds.close
+  series = SERIES_COUNT.zero? ? 1 : SERIES_COUNT
+  events_sent = THREAD_COUNT * EVENTS_PER_ITERATION * ITERATIONS * series
+  puts "events: #{(events_sent / duration).round(1).to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse}/s"
 end
+if ENABLE_PROFILING
+  Vernier.start_profile(out: "tmp/benchmark_profile_udp_sync.json")
+end
 benchmark_implementation("UDP sync", "STATSD_BUFFER_CAPACITY" => "0")
+if ENABLE_PROFILING
+  Vernier.stop_profile
+end
+if ENABLE_PROFILING
+  Vernier.start_profile(out: "tmp/benchmark_profile_udp_async.json")
+end
 benchmark_implementation("UDP batched")
+if ENABLE_PROFILING
+  Vernier.stop_profile
+end
+if ENABLE_PROFILING
+  Vernier.start_profile(out: "tmp/benchmark_profile_uds_small_packet.json")
+end
+benchmark_implementation("UDS batched with small packet", "STATSD_SOCKET_PATH" => SOCKET_PATH)
+if ENABLE_PROFILING
+  Vernier.stop_profile
+end
+if ENABLE_PROFILING
+  Vernier.start_profile(out: "tmp/benchmark_profile_uds_batched_async.json")
+end
+benchmark_implementation(
+  "UDS batched with jumbo packet",
+  "STATSD_SOCKET_PATH" => SOCKET_PATH,
+  "STATSD_MAX_PACKET_SIZE" => UDS_MAX_SEND_SIZE.to_s,
+)
+if ENABLE_PROFILING
+  Vernier.stop_profile
+end
+if ENABLE_PROFILING
+  Vernier.start_profile(out: "tmp/benchmark_udp_batched_with_aggregation.json")
+end
+benchmark_implementation(
+  "UDP batched with aggregation and 5 second interval",
+  "STATSD_ENABLE_AGGREGATION" => "true",
+  "STATSD_AGGREGATION_FLUSH_INTERVAL" => "5",
+)
+if ENABLE_PROFILING
+  Vernier.stop_profile
+end
+if ENABLE_PROFILING
+  Vernier.start_profile(out: "tmp/benchmark_uds_with_aggregation.json")
+end
+benchmark_implementation(
+  "UDS batched with aggregation and 5 second interval",
+  "STATSD_ENABLE_AGGREGATION" => "true",
+  "STATSD_AGGREGATION_FLUSH_INTERVAL" => "5",
+  "STATSD_SOCKET_PATH" => SOCKET_PATH,
+  "STATSD_MAX_PACKET_SIZE" => UDS_MAX_SEND_SIZE.to_s,
+)
+if ENABLE_PROFILING
+  Vernier.stop_profile
+end

data/benchmark/send-metrics-to-local-udp-receiver CHANGED Viewed

@@ -27,9 +27,10 @@ def benchmark_implementation(name, env = {})
     %x(git rev-parse --abbrev-ref HEAD).rstrip
   end
-  intermediate_results_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}-#{name}"
-  log_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}-#{name}.log"
-  FileUtils.mkdir_p(File.dirname(intermediate_results_filename))
+  log_dir = "#{Dir.tmpdir}/statsd-instrument-benchmarks"
+  intermediate_results_filename = File.join(log_dir, "#{File.basename($PROGRAM_NAME)}-#{name}")
+  log_filename = File.join(log_dir, "#{File.basename($PROGRAM_NAME)}-#{name}.log")
+  FileUtils.mkdir_p(log_dir)
   # Set up an UDP listener to which we can send StatsD packets
   receiver = UDPSocket.new
@@ -69,7 +70,7 @@ def benchmark_implementation(name, env = {})
     File.unlink(intermediate_results_filename)
   end
-  log_file.close
+  # log_file.close
   logs = File.read(log_filename)
   unless logs.empty?
     puts
@@ -81,3 +82,4 @@ end
 benchmark_implementation("UDP sync", "STATSD_BUFFER_CAPACITY" => "0")
 benchmark_implementation("UDP batched")
+benchmark_implementation("UDP batched with aggregation", "STATSD_ENABLE_AGGREGATION" => "true")