statsd-instrument 3.2.0 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f42d327300430dd5926c971c7d27d860acd99800cc8eeb4552892f755037553f
4
- data.tar.gz: 88c22b7ef591313d0e159a24f3875ddc7a63acf0c6d10551cac6bf069a6c0ad3
3
+ metadata.gz: bcbaac2cd4178c61bfcb484c45931bb387d81dfa632fbd114bba126c393beb75
4
+ data.tar.gz: d1101fbc534b6612ff76a282ab45d37aad3a0c185b3ccedf6dcf2ef78ce85bb1
5
5
  SHA512:
6
- metadata.gz: 8a0d04e0acf3ddb8ae85bb3d5b4a560dbb713c6c8d26654cca6d52b5b3d70a8570b424b489e33d5bcc301f419db8e1de448daaee863fb0624ff52aa10c67c9a4
7
- data.tar.gz: f56d3bd3ecd117d9a359de5445f3b00750d11151a40da26f6e902cd1c5800c8688ae1bab05f2457eb519751c642f69278425ba4541a6fe6caa41dd766dd3a21c
6
+ metadata.gz: 4cd62e31fe1dc59ae49345e598de791ba1295a18d527d88f840de7349d31e8d325cf00543e59eddf02c3fed56a58f9020ace88ca7554de2340d193e8e62cb1d4
7
+ data.tar.gz: 98fcae4a9b924ab432d745ad2c02f4ac5ed6d0e94ad1f002e80afc0f4f741f01412dbf605151813432dc828ad57f9c1747e26a7127c32950df9ffddeb90e9523
@@ -13,12 +13,15 @@ jobs:
13
13
  - name: Set up Ruby
14
14
  uses: ruby/setup-ruby@v1
15
15
  with:
16
- ruby-version: 2.6
16
+ ruby-version: 3.1
17
17
  bundler-cache: true
18
18
 
19
19
  - name: Run benchmark on branch
20
20
  run: benchmark/send-metrics-to-local-udp-receiver
21
21
 
22
+ - name: Run throughput benchmark on branch
23
+ run: benchmark/local-udp-throughput
24
+
22
25
  - uses: actions/checkout@v1
23
26
  with:
24
27
  ref: 'master'
@@ -28,3 +31,6 @@ jobs:
28
31
 
29
32
  - name: Run benchmark on master
30
33
  run: benchmark/send-metrics-to-local-udp-receiver
34
+
35
+ - name: Run throughput benchmark on master
36
+ run: benchmark/local-udp-throughput
@@ -0,0 +1,22 @@
1
+ name: Contributor License Agreement (CLA)
2
+
3
+ on:
4
+ pull_request_target:
5
+ types: [opened, synchronize]
6
+ issue_comment:
7
+ types: [created]
8
+
9
+ jobs:
10
+ cla:
11
+ runs-on: ubuntu-latest
12
+ if: |
13
+ (github.event.issue.pull_request
14
+ && !github.event.issue.pull_request.merged_at
15
+ && contains(github.event.comment.body, 'signed')
16
+ )
17
+ || (github.event.pull_request && !github.event.pull_request.merged)
18
+ steps:
19
+ - uses: Shopify/shopify-cla-action@v1
20
+ with:
21
+ github-token: ${{ secrets.GITHUB_TOKEN }}
22
+ cla-token: ${{ secrets.CLA_TOKEN }}
@@ -9,9 +9,8 @@ jobs:
9
9
  strategy:
10
10
  fail-fast: false
11
11
  matrix:
12
- ruby: ['2.6', '2.7', '3.0', '3.1']
13
-
14
- # Windows on macOS builds started failing, so they are disabled for noew
12
+ ruby: ['2.6', '2.7', '3.0', '3.1', 'ruby-head', 'jruby-9.3.7.0', 'truffleruby-22.2.0']
13
+ # Windows on macOS builds started failing, so they are disabled for now
15
14
  # platform: [windows-2019, macOS-10.14, ubuntu-18.04]
16
15
  # exclude:
17
16
  # ...
data/.rubocop.yml CHANGED
@@ -17,6 +17,18 @@ Naming/FileName:
17
17
  Exclude:
18
18
  - lib/statsd-instrument.rb
19
19
 
20
+ Metrics/ParameterLists:
21
+ Enabled: false
22
+
23
+ Metrics/BlockNesting:
24
+ Enabled: false
25
+
26
+ Style/WhileUntilModifier:
27
+ Enabled: false
28
+
29
+ Style/IdenticalConditionalBranches:
30
+ Enabled: false
31
+
20
32
  # Enable our own cops on our own repo
21
33
 
22
34
  StatsD/MetricReturnValue:
data/CHANGELOG.md CHANGED
@@ -6,7 +6,25 @@ section below.
6
6
 
7
7
  ### Unreleased changes
8
8
 
9
- _Nothing yet_
9
+ - UDP Batching has been largely refactored again. The `STATSD_FLUSH_INTERVAL` environment variable
10
+ is deprecated. It still disable batching if set to `0`, but other than that is has no effect.
11
+ Setting `STATSD_BUFFER_CAPACITY` to `0` is now the recommended way to disable batching.
12
+ - The synchronous UDP sink now use one socket per thread, instead of a single socket
13
+ protected by a mutex.
14
+
15
+ ## Version 3.3.0
16
+
17
+ - UDP Batching now has a max queue size and emitter threads will block if the queue
18
+ reaches the limit. This is to prevent the queue from growing unbounded.
19
+ More generally the UDP batching mode was optimized to improve throughput and to
20
+ flush the queue more eagerly (#309).
21
+ - Added `STATSD_BUFFER_CAPACITY` configuration.
22
+ - Added `STATSD_MAX_PACKET_SIZE` configuration.
23
+ - Require `set` explicitly, to avoid breaking tests for users of this library (#311)
24
+
25
+ ## Version 3.2.1
26
+
27
+ - Fix a bug in UDP batching that could cause the Ruby process to be stuck on exit (#291).
10
28
 
11
29
  ## Version 3.2.0
12
30
 
data/README.md CHANGED
@@ -42,9 +42,18 @@ The following environment variables are supported:
42
42
  overridden in a metric method call.
43
43
  - `STATSD_DEFAULT_TAGS`: A comma-separated list of tags to apply to all metrics.
44
44
  (Note: tags are not supported by all implementations.)
45
- - `STATSD_FLUSH_INTERVAL`: (default: `1.0`) The interval in seconds at which
46
- events are sent in batch. Only applicable to the UDP configuration. If set
47
- to `0.0`, metrics are sent immediately.
45
+ - `STATSD_BUFFER_CAPACITY`: (default: `5000`) The maximum amount of events that
46
+ may be buffered before emitting threads will start to block. Increasing this
47
+ value may help for application generating spikes of events. However if the
48
+ application emit events faster than they can be sent, increasing it won't help.
49
+ If set to `0`, batching will be disabled, and events will be sent in individual
50
+ UDP packets, which is much slower.
51
+ - `STATSD_FLUSH_INTERVAL`: (default: `1`) Deprecated. Setting this to `0` is
52
+ equivalent to setting `STATSD_BUFFER_CAPACITY` to `0`.
53
+ - `STATSD_MAX_PACKET_SIZE`: (default: `1472`) The maximum size of UDP packets.
54
+ If your network is properly configured to handle larger packets you may try
55
+ to increase this value for better performance, but most network can't handle
56
+ larger packets.
48
57
 
49
58
  ## StatsD keys
50
59
 
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "benchmark/ips"
6
+ require "tmpdir"
7
+ require "socket"
8
+ require "statsd-instrument"
9
+
10
+ def send_metrics(client)
11
+ client.increment("StatsD.increment", 10)
12
+ client.measure("StatsD.measure") { 1 + 1 }
13
+ client.gauge("StatsD.gauge", 12.0, tags: ["foo:bar", "quc"])
14
+ client.set("StatsD.set", "value", tags: { foo: "bar", baz: "quc" })
15
+ client.event("StasD.event", "12345")
16
+ client.service_check("StatsD.service_check", "ok")
17
+ end
18
+
19
+ THREAD_COUNT = Integer(ENV.fetch("THREAD_COUNT", 5))
20
+ EVENTS_PER_ITERATION = 6
21
+ ITERATIONS = 50_000
22
+ def benchmark_implementation(name, env = {})
23
+ intermediate_results_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/"
24
+ log_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}-#{name}.log"
25
+ FileUtils.mkdir_p(File.dirname(intermediate_results_filename))
26
+
27
+ # Set up an UDP listener to which we can send StatsD packets
28
+ receiver = UDPSocket.new
29
+ receiver.bind("localhost", 0)
30
+
31
+ log_file = File.open(log_filename, "w+", level: Logger::WARN)
32
+ StatsD.logger = Logger.new(log_file)
33
+
34
+ udp_client = StatsD::Instrument::Environment.new(ENV.to_h.merge(
35
+ "STATSD_ADDR" => "#{receiver.addr[2]}:#{receiver.addr[1]}",
36
+ "STATSD_IMPLEMENTATION" => "dogstatsd",
37
+ "STATSD_ENV" => "production",
38
+ ).merge(env)).client
39
+
40
+ puts "===== #{name} throughtput (#{THREAD_COUNT} threads) ====="
41
+ threads = THREAD_COUNT.times.map do
42
+ Thread.new do
43
+ count = ITERATIONS
44
+ while (count -= 1) > 0
45
+ send_metrics(udp_client)
46
+ end
47
+ end
48
+ end
49
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
50
+ threads.each(&:join)
51
+ duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
52
+ events_sent = THREAD_COUNT * EVENTS_PER_ITERATION * ITERATIONS
53
+ puts "events: #{(events_sent / duration).round(1)}/s"
54
+ receiver.close
55
+ udp_client.shutdown if udp_client.respond_to?(:shutdown)
56
+ end
57
+
58
+ benchmark_implementation("UDP sync", "STATSD_BUFFER_CAPACITY" => "0")
59
+ benchmark_implementation("UDP batched")
@@ -7,53 +7,77 @@ require "tmpdir"
7
7
  require "socket"
8
8
  require "statsd-instrument"
9
9
 
10
- revision = %x(git rev-parse HEAD).rstrip
11
- base_revision = %x(git rev-parse origin/master).rstrip
12
- branch = if revision == base_revision
13
- "master"
14
- else
15
- %x(git rev-parse --abbrev-ref HEAD).rstrip
10
+ def send_metrics(client)
11
+ client.increment("StatsD.increment", 10)
12
+ client.measure("StatsD.measure") { 1 + 1 }
13
+ client.gauge("StatsD.gauge", 12.0, tags: ["foo:bar", "quc"])
14
+ client.set("StatsD.set", "value", tags: { foo: "bar", baz: "quc" })
15
+ if client.datagram_builder_class == StatsD::Instrument::DogStatsDDatagramBuilder
16
+ client.event("StasD.event", "12345")
17
+ client.service_check("StatsD.service_check", "ok")
18
+ end
16
19
  end
17
20
 
18
- intermediate_results_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}"
19
- FileUtils.mkdir_p(File.dirname(intermediate_results_filename))
20
-
21
- # Set up an UDP listener to which we can send StatsD packets
22
- receiver = UDPSocket.new
23
- receiver.bind("localhost", 0)
24
-
25
- StatsD.singleton_client = StatsD::Instrument::Environment.new(
26
- "STATSD_ADDR" => "#{receiver.addr[2]}:#{receiver.addr[1]}",
27
- "STATSD_IMPLEMENTATION" => "dogstatsd",
28
- "STATSD_ENV" => "production",
29
- ).client
30
-
31
- report = Benchmark.ips do |bench|
32
- bench.report("StatsD metrics to local UDP receiver (branch: #{branch}, sha: #{revision[0, 7]})") do
33
- StatsD.increment("StatsD.increment", 10)
34
- StatsD.measure("StatsD.measure") { 1 + 1 }
35
- StatsD.gauge("StatsD.gauge", 12.0, tags: ["foo:bar", "quc"])
36
- StatsD.set("StatsD.set", "value", tags: { foo: "bar", baz: "quc" })
37
- if StatsD.singleton_client.datagram_builder_class == StatsD::Instrument::DogStatsDDatagramBuilder
38
- StatsD.event("StasD.event", "12345")
39
- StatsD.service_check("StatsD.service_check", "ok")
21
+ def benchmark_implementation(name, env = {})
22
+ revision = %x(git rev-parse HEAD).rstrip
23
+ base_revision = %x(git rev-parse origin/master).rstrip
24
+ branch = if revision == base_revision
25
+ "master"
26
+ else
27
+ %x(git rev-parse --abbrev-ref HEAD).rstrip
28
+ end
29
+
30
+ intermediate_results_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}-#{name}"
31
+ log_filename = "#{Dir.tmpdir}/statsd-instrument-benchmarks/#{File.basename($PROGRAM_NAME)}-#{name}.log"
32
+ FileUtils.mkdir_p(File.dirname(intermediate_results_filename))
33
+
34
+ # Set up an UDP listener to which we can send StatsD packets
35
+ receiver = UDPSocket.new
36
+ receiver.bind("localhost", 0)
37
+
38
+ log_file = File.open(log_filename, "w+", level: Logger::WARN)
39
+ StatsD.logger = Logger.new(log_file)
40
+
41
+ udp_client = StatsD::Instrument::Environment.new(ENV.to_h.merge(
42
+ "STATSD_ADDR" => "#{receiver.addr[2]}:#{receiver.addr[1]}",
43
+ "STATSD_IMPLEMENTATION" => "dogstatsd",
44
+ "STATSD_ENV" => "production",
45
+ ).merge(env)).client
46
+
47
+ puts "===== #{name} ====="
48
+ report = Benchmark.ips do |bench|
49
+ bench.report("#{name} (branch: #{branch}, sha: #{revision[0, 7]})") do
50
+ send_metrics(udp_client)
40
51
  end
52
+
53
+ # Store the results in between runs
54
+ bench.save!(intermediate_results_filename)
55
+ bench.compare!
41
56
  end
42
57
 
43
- # Store the results in between runs
44
- bench.save!(intermediate_results_filename)
45
- bench.compare!
46
- end
58
+ receiver.close
59
+ udp_client.shutdown if udp_client.respond_to?(:shutdown)
60
+
61
+ if report.entries.length == 1
62
+ puts
63
+ puts "To compare the performance of this revision against another revision (e.g. master),"
64
+ puts "check out a different branch and run this benchmark script again."
65
+ elsif ENV["KEEP_RESULTS"]
66
+ puts
67
+ puts "The intermediate results have been stored in #{intermediate_results_filename}"
68
+ else
69
+ File.unlink(intermediate_results_filename)
70
+ end
47
71
 
48
- receiver.close
49
-
50
- if report.entries.length == 1
51
- puts
52
- puts "To compare the performance of this revision against another revision (e.g. master),"
53
- puts "check out a different branch and run this benchmark script again."
54
- elsif ENV["KEEP_RESULTS"]
55
- puts
56
- puts "The intermediate results have been stored in #{intermediate_results_filename}"
57
- else
58
- File.unlink(intermediate_results_filename)
72
+ log_file.close
73
+ logs = File.read(log_filename)
74
+ unless logs.empty?
75
+ puts
76
+ puts "==== logs ===="
77
+ puts logs
78
+ end
79
+ puts "================"
59
80
  end
81
+
82
+ benchmark_implementation("UDP sync", "STATSD_BUFFER_CAPACITY" => "0")
83
+ benchmark_implementation("UDP batched")
@@ -5,12 +5,14 @@ module StatsD
5
5
  # @note This class is part of the new Client implementation that is intended
6
6
  # to become the new default in the next major release of this library.
7
7
  class BatchedUDPSink
8
- DEFAULT_FLUSH_INTERVAL = 1.0
9
- MAX_PACKET_SIZE = 508
8
+ DEFAULT_THREAD_PRIORITY = 100
9
+ DEFAULT_BUFFER_CAPACITY = 5_000
10
+ # https://docs.datadoghq.com/developers/dogstatsd/high_throughput/?code-lang=ruby#ensure-proper-packet-sizes
11
+ DEFAULT_MAX_PACKET_SIZE = 1472
10
12
 
11
- def self.for_addr(addr, flush_interval: DEFAULT_FLUSH_INTERVAL)
13
+ def self.for_addr(addr, **kwargs)
12
14
  host, port_as_string = addr.split(":", 2)
13
- new(host, Integer(port_as_string), flush_interval: flush_interval)
15
+ new(host, Integer(port_as_string), **kwargs)
14
16
  end
15
17
 
16
18
  attr_reader :host, :port
@@ -21,10 +23,22 @@ module StatsD
21
23
  end
22
24
  end
23
25
 
24
- def initialize(host, port, flush_interval: DEFAULT_FLUSH_INTERVAL)
26
+ def initialize(
27
+ host,
28
+ port,
29
+ thread_priority: DEFAULT_THREAD_PRIORITY,
30
+ buffer_capacity: DEFAULT_BUFFER_CAPACITY,
31
+ max_packet_size: DEFAULT_MAX_PACKET_SIZE
32
+ )
25
33
  @host = host
26
34
  @port = port
27
- @dispatcher = Dispatcher.new(host, port, flush_interval)
35
+ @dispatcher = Dispatcher.new(
36
+ host,
37
+ port,
38
+ buffer_capacity,
39
+ thread_priority,
40
+ max_packet_size,
41
+ )
28
42
  ObjectSpace.define_finalizer(self, self.class.finalize(@dispatcher))
29
43
  end
30
44
 
@@ -37,83 +51,125 @@ module StatsD
37
51
  self
38
52
  end
39
53
 
40
- class Dispatcher
41
- BUFFER_CLASS = if !::Object.const_defined?(:RUBY_ENGINE) || RUBY_ENGINE == "ruby"
42
- ::Array
43
- else
44
- begin
45
- gem("concurrent-ruby")
46
- rescue Gem::MissingSpecError
47
- raise Gem::MissingSpecError, "statsd-instrument depends on `concurrent-ruby` on #{RUBY_ENGINE}"
48
- end
49
- require "concurrent/array"
50
- Concurrent::Array
54
+ def shutdown(*args)
55
+ @dispatcher.shutdown(*args)
56
+ end
57
+
58
+ class Buffer < SizedQueue
59
+ def push_nonblock(item)
60
+ push(item, true)
61
+ rescue ThreadError, ClosedQueueError
62
+ nil
51
63
  end
52
64
 
53
- def initialize(host, port, flush_interval)
54
- @host = host
55
- @port = port
65
+ def inspect
66
+ "<#{self.class.name}:#{object_id} capacity=#{max} size=#{size}>"
67
+ end
68
+
69
+ def pop_nonblock
70
+ pop(true)
71
+ rescue ThreadError
72
+ nil
73
+ end
74
+ end
75
+
76
+ class Dispatcher
77
+ def initialize(host, port, buffer_capacity, thread_priority, max_packet_size)
78
+ @udp_sink = UDPSink.new(host, port)
56
79
  @interrupted = false
57
- @flush_interval = flush_interval
58
- @buffer = BUFFER_CLASS.new
80
+ @thread_priority = thread_priority
81
+ @max_packet_size = max_packet_size
82
+ @buffer_capacity = buffer_capacity
83
+ @buffer = Buffer.new(buffer_capacity)
59
84
  @dispatcher_thread = Thread.new { dispatch }
85
+ @pid = Process.pid
60
86
  end
61
87
 
62
88
  def <<(datagram)
63
- unless @dispatcher_thread&.alive?
64
- # If the dispatcher thread is dead, we assume it is because
65
- # the process was forked. So to avoid ending datagrams twice
66
- # we clear the buffer.
67
- @buffer.clear
68
- @dispatcher_thread = Thread.new { dispatch }
89
+ if !thread_healthcheck || !@buffer.push_nonblock(datagram)
90
+ # The buffer is full or the thread can't be respaned,
91
+ # we'll send the datagram synchronously
92
+ @udp_sink << datagram
69
93
  end
70
- @buffer << datagram
94
+
71
95
  self
72
96
  end
73
97
 
74
- def shutdown(wait = @flush_interval * 2)
98
+ def shutdown(wait = 2)
75
99
  @interrupted = true
100
+ @buffer.close
76
101
  if @dispatcher_thread&.alive?
77
102
  @dispatcher_thread.join(wait)
78
- else
79
- flush
80
103
  end
104
+ flush(blocking: false)
81
105
  end
82
106
 
83
107
  private
84
108
 
85
109
  NEWLINE = "\n".b.freeze
86
- def flush
87
- return if @buffer.empty?
88
110
 
89
- datagrams = @buffer.shift(@buffer.size)
90
-
91
- until datagrams.empty?
92
- packet = String.new(datagrams.pop, encoding: Encoding::BINARY, capacity: MAX_PACKET_SIZE)
111
+ def flush(blocking:)
112
+ packet = "".b
113
+ next_datagram = nil
114
+ until @buffer.closed? && @buffer.empty? && next_datagram.nil?
115
+ if blocking
116
+ next_datagram ||= @buffer.pop
117
+ break if next_datagram.nil? # queue was closed
118
+ else
119
+ next_datagram ||= @buffer.pop_nonblock
120
+ break if next_datagram.nil? # no datagram in buffer
121
+ end
93
122
 
94
- until datagrams.empty? || packet.bytesize + datagrams.first.bytesize + 1 > MAX_PACKET_SIZE
95
- packet << NEWLINE << datagrams.shift
123
+ packet << next_datagram
124
+ next_datagram = nil
125
+ unless packet.bytesize > @max_packet_size
126
+ while (next_datagram = @buffer.pop_nonblock)
127
+ if @max_packet_size - packet.bytesize - 1 > next_datagram.bytesize
128
+ packet << NEWLINE << next_datagram
129
+ else
130
+ break
131
+ end
132
+ end
96
133
  end
97
134
 
98
- send_packet(packet)
135
+ @udp_sink << packet
136
+ packet.clear
137
+ end
138
+ end
139
+
140
+ def thread_healthcheck
141
+ # TODO: We have a race condition on JRuby / Truffle here. It could cause multiple
142
+ # dispatcher threads to be spawned, which would cause problems.
143
+ # However we can't simply lock here as we might be called from a trap context.
144
+ unless @dispatcher_thread&.alive?
145
+ # If the main the main thread is dead the VM is shutting down so we won't be able
146
+ # to spawn a new thread, so we fallback to sending our datagram directly.
147
+ return false unless Thread.main.alive?
148
+
149
+ # If the dispatcher thread is dead, it might be because the process was forked.
150
+ # So to avoid sending datagrams twice we clear the buffer.
151
+ if @pid != Process.pid
152
+ StatsD.logger.info { "[#{self.class.name}] Restarting the dispatcher thread after fork" }
153
+ @pid = Process.pid
154
+ @buffer.clear
155
+ else
156
+ StatsD.logger.info { "[#{self.class.name}] Restarting the dispatcher thread" }
157
+ end
158
+ @dispatcher_thread = Thread.new { dispatch }.tap { |t| t.priority = @thread_priority }
99
159
  end
160
+ true
100
161
  end
101
162
 
102
163
  def dispatch
103
164
  until @interrupted
104
165
  begin
105
- start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
106
- flush
107
- next_sleep_duration = @flush_interval - (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
108
-
109
- sleep(next_sleep_duration) if next_sleep_duration > 0
166
+ flush(blocking: true)
110
167
  rescue => error
111
168
  report_error(error)
112
169
  end
113
170
  end
114
171
 
115
- flush
116
- invalidate_socket
172
+ flush(blocking: false)
117
173
  end
118
174
 
119
175
  def report_error(error)
@@ -121,38 +177,6 @@ module StatsD
121
177
  "[#{self.class.name}] The dispatcher thread encountered an error #{error.class}: #{error.message}"
122
178
  end
123
179
  end
124
-
125
- def send_packet(packet)
126
- retried = false
127
- socket.send(packet, 0)
128
- rescue SocketError, IOError, SystemCallError => error
129
- StatsD.logger.debug do
130
- "[#{self.class.name}] Resetting connection because of #{error.class}: #{error.message}"
131
- end
132
- invalidate_socket
133
- if retried
134
- StatsD.logger.warning do
135
- "[#{self.class.name}] Events were dropped because of #{error.class}: #{error.message}"
136
- end
137
- else
138
- retried = true
139
- retry
140
- end
141
- end
142
-
143
- def socket
144
- @socket ||= begin
145
- socket = UDPSocket.new
146
- socket.connect(@host, @port)
147
- socket
148
- end
149
- end
150
-
151
- def invalidate_socket
152
- @socket&.close
153
- ensure
154
- @socket = nil
155
- end
156
180
  end
157
181
  end
158
182
  end
@@ -35,6 +35,14 @@ module StatsD
35
35
 
36
36
  def initialize(env)
37
37
  @env = env
38
+ if env.key?("STATSD_FLUSH_INTERVAL")
39
+ value = env["STATSD_FLUSH_INTERVAL"]
40
+ if Float(value) == 0.0
41
+ warn("STATSD_FLUSH_INTERVAL=#{value} is deprecated, please set STATSD_BUFFER_CAPACITY=0 instead.")
42
+ else
43
+ warn("STATSD_FLUSH_INTERVAL=#{value} is deprecated and has no effect, please remove it.")
44
+ end
45
+ end
38
46
  end
39
47
 
40
48
  # Detects the current environment, either by asking Rails, or by inspecting environment variables.
@@ -78,8 +86,16 @@ module StatsD
78
86
  env.key?("STATSD_DEFAULT_TAGS") ? env.fetch("STATSD_DEFAULT_TAGS").split(",") : nil
79
87
  end
80
88
 
81
- def statsd_flush_interval
82
- Float(env.fetch("STATSD_FLUSH_INTERVAL", 1.0))
89
+ def statsd_buffer_capacity
90
+ Integer(env.fetch("STATSD_BUFFER_CAPACITY", StatsD::Instrument::BatchedUDPSink::DEFAULT_BUFFER_CAPACITY))
91
+ end
92
+
93
+ def statsd_batching?
94
+ statsd_buffer_capacity > 0 && Float(env.fetch("STATSD_FLUSH_INTERVAL", 1.0)) > 0.0
95
+ end
96
+
97
+ def statsd_max_packet_size
98
+ Float(env.fetch("STATSD_MAX_PACKET_SIZE", StatsD::Instrument::BatchedUDPSink::DEFAULT_MAX_PACKET_SIZE))
83
99
  end
84
100
 
85
101
  def client
@@ -89,8 +105,12 @@ module StatsD
89
105
  def default_sink_for_environment
90
106
  case environment
91
107
  when "production", "staging"
92
- if statsd_flush_interval > 0.0
93
- StatsD::Instrument::BatchedUDPSink.for_addr(statsd_addr, flush_interval: statsd_flush_interval)
108
+ if statsd_batching?
109
+ StatsD::Instrument::BatchedUDPSink.for_addr(
110
+ statsd_addr,
111
+ buffer_capacity: statsd_buffer_capacity,
112
+ max_packet_size: statsd_max_packet_size,
113
+ )
94
114
  else
95
115
  StatsD::Instrument::UDPSink.for_addr(statsd_addr)
96
116
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  module StatsD
4
6
  module Instrument
5
7
  # @private
@@ -12,11 +12,18 @@ module StatsD
12
12
 
13
13
  attr_reader :host, :port
14
14
 
15
+ FINALIZER = ->(object_id) do
16
+ Thread.list.each do |thread|
17
+ if (store = thread["StatsD::UDPSink"])
18
+ store.delete(object_id)&.close
19
+ end
20
+ end
21
+ end
22
+
15
23
  def initialize(host, port)
24
+ ObjectSpace.define_finalizer(self, FINALIZER)
16
25
  @host = host
17
26
  @port = port
18
- @mutex = Mutex.new
19
- @socket = nil
20
27
  end
21
28
 
22
29
  def sample?(sample_rate)
@@ -24,38 +31,43 @@ module StatsD
24
31
  end
25
32
 
26
33
  def <<(datagram)
27
- with_socket { |socket| socket.send(datagram, 0) }
28
- self
29
- rescue ThreadError
30
- # In cases where a TERM or KILL signal has been sent, and we send stats as
31
- # part of a signal handler, locks cannot be acquired, so we do our best
32
- # to try and send the datagram without a lock.
33
- socket.send(datagram, 0) > 0
34
- rescue SocketError, IOError, SystemCallError => error
35
- StatsD.logger.debug do
36
- "[StatsD::Instrument::UDPSink] Resetting connection because of #{error.class}: #{error.message}"
34
+ retried = false
35
+ begin
36
+ socket.send(datagram, 0)
37
+ rescue SocketError, IOError, SystemCallError => error
38
+ StatsD.logger.debug do
39
+ "[StatsD::Instrument::UDPSink] Resetting connection because of #{error.class}: #{error.message}"
40
+ end
41
+ invalidate_socket
42
+ if retried
43
+ StatsD.logger.warn do
44
+ "[#{self.class.name}] Events were dropped because of #{error.class}: #{error.message}"
45
+ end
46
+ else
47
+ retried = true
48
+ retry
49
+ end
37
50
  end
38
- invalidate_socket
51
+ self
39
52
  end
40
53
 
41
54
  private
42
55
 
43
- def with_socket
44
- @mutex.synchronize { yield(socket) }
56
+ def invalidate_socket
57
+ socket = thread_store.delete(object_id)
58
+ socket&.close
45
59
  end
46
60
 
47
61
  def socket
48
- @socket ||= begin
62
+ thread_store[object_id] ||= begin
49
63
  socket = UDPSocket.new
50
64
  socket.connect(@host, @port)
51
65
  socket
52
66
  end
53
67
  end
54
68
 
55
- def invalidate_socket
56
- @mutex.synchronize do
57
- @socket = nil
58
- end
69
+ def thread_store
70
+ Thread.current["StatsD::UDPSink"] ||= {}
59
71
  end
60
72
  end
61
73
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module StatsD
4
4
  module Instrument
5
- VERSION = "3.2.0"
5
+ VERSION = "3.4.0"
6
6
  end
7
7
  end
@@ -21,6 +21,4 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = ["lib"]
22
22
 
23
23
  spec.metadata['allowed_push_host'] = "https://rubygems.org"
24
-
25
- spec.add_development_dependency 'concurrent-ruby'
26
24
  end
@@ -64,4 +64,13 @@ class EnvironmentTest < Minitest::Test
64
64
  )
65
65
  assert_kind_of(StatsD::Instrument::UDPSink, env.client.sink)
66
66
  end
67
+
68
+ def test_client_from_env_uses_regular_udp_sink_when_buffer_capacity_is_0
69
+ env = StatsD::Instrument::Environment.new(
70
+ "STATSD_USE_NEW_CLIENT" => "1",
71
+ "STATSD_ENV" => "staging",
72
+ "STATSD_BUFFER_CAPACITY" => "0",
73
+ )
74
+ assert_kind_of(StatsD::Instrument::UDPSink, env.client.sink)
75
+ end
67
76
  end
@@ -32,17 +32,18 @@ module UDPSinkTests
32
32
  refute(udp_sink.sample?(0.5))
33
33
  end
34
34
 
35
- def test_parallelism
35
+ def test_concurrency
36
36
  udp_sink = build_sink(@host, @port)
37
- 50.times.map { |i| Thread.new { udp_sink << "foo:#{i}|c" << "bar:#{i}|c" } }
38
- datagrams = []
39
-
40
- while @receiver.wait_readable(2)
41
- datagram, _source = @receiver.recvfrom(4000)
42
- datagrams += datagram.split("\n")
37
+ threads = 10.times.map do |i|
38
+ Thread.new do
39
+ udp_sink << "foo:#{i}|c" << "bar:#{i}|c" << "baz:#{i}|c" << "plop:#{i}|c"
40
+ end
43
41
  end
44
-
45
- assert_equal(100, datagrams.size)
42
+ threads.each(&:join)
43
+ udp_sink.shutdown if udp_sink.respond_to?(:shutdown)
44
+ assert_equal(40, read_datagrams(40).size)
45
+ ensure
46
+ threads&.each(&:kill)
46
47
  end
47
48
 
48
49
  class SimpleFormatter < ::Logger::Formatter
@@ -53,31 +54,39 @@ module UDPSinkTests
53
54
 
54
55
  def test_sends_datagram_in_signal_handler
55
56
  udp_sink = build_sink(@host, @port)
56
- Signal.trap("USR1") { udp_sink << "exiting:1|c" }
57
-
58
- pid = fork do
59
- sleep(5)
57
+ Signal.trap("USR1") do
58
+ udp_sink << "exiting:1|c"
59
+ udp_sink << "exiting:1|d"
60
60
  end
61
61
 
62
+ Process.kill("USR1", Process.pid)
63
+ assert_equal(["exiting:1|c", "exiting:1|d"], read_datagrams(2))
64
+ ensure
62
65
  Signal.trap("USR1", "DEFAULT")
63
-
64
- Process.kill("USR1", pid)
65
- @receiver.wait_readable(1)
66
- assert_equal("exiting:1|c", @receiver.recvfrom_nonblock(100).first)
67
- Process.kill("KILL", pid)
68
- rescue NotImplementedError
69
- pass("Fork is not implemented on #{RUBY_PLATFORM}")
70
66
  end
71
67
 
72
68
  def test_sends_datagram_before_exit
73
69
  udp_sink = build_sink(@host, @port)
74
- fork do
70
+ pid = fork do
75
71
  udp_sink << "exiting:1|c"
76
- Process.exit(0)
72
+ udp_sink << "exiting:1|d"
77
73
  end
74
+ Process.wait(pid)
75
+ assert_equal(["exiting:1|c", "exiting:1|d"], read_datagrams(2))
76
+ rescue NotImplementedError
77
+ pass("Fork is not implemented on #{RUBY_PLATFORM}")
78
+ end
78
79
 
79
- @receiver.wait_readable(1)
80
- assert_equal("exiting:1|c", @receiver.recvfrom_nonblock(100).first)
80
+ def test_sends_datagram_in_at_exit_callback
81
+ udp_sink = build_sink(@host, @port)
82
+ pid = fork do
83
+ at_exit do
84
+ udp_sink << "exiting:1|c"
85
+ udp_sink << "exiting:1|d"
86
+ end
87
+ end
88
+ Process.wait(pid)
89
+ assert_equal(["exiting:1|c", "exiting:1|d"], read_datagrams(2))
81
90
  rescue NotImplementedError
82
91
  pass("Fork is not implemented on #{RUBY_PLATFORM}")
83
92
  end
@@ -86,11 +95,11 @@ module UDPSinkTests
86
95
  udp_sink = build_sink(@host, @port)
87
96
  fork do
88
97
  udp_sink << "exiting:1|c"
98
+ udp_sink << "exiting:1|d"
89
99
  Process.kill("TERM", Process.pid)
90
100
  end
91
101
 
92
- @receiver.wait_readable(1)
93
- assert_equal("exiting:1|c", @receiver.recvfrom_nonblock(100).first)
102
+ assert_equal(["exiting:1|c", "exiting:1|d"], read_datagrams(2))
94
103
  rescue NotImplementedError
95
104
  pass("Fork is not implemented on #{RUBY_PLATFORM}")
96
105
  end
@@ -101,6 +110,19 @@ module UDPSinkTests
101
110
  @sink_class.new(host, port)
102
111
  end
103
112
 
113
+ def read_datagrams(count, timeout: ENV["CI"] ? 5 : 1)
114
+ datagrams = []
115
+ count.times do
116
+ if @receiver.wait_readable(timeout)
117
+ datagrams += @receiver.recvfrom(2000).first.lines(chomp: true)
118
+ break if datagrams.size >= count
119
+ else
120
+ break
121
+ end
122
+ end
123
+ datagrams
124
+ end
125
+
104
126
  class UDPSinkTest < Minitest::Test
105
127
  include UDPSinkTests
106
128
 
@@ -127,8 +149,9 @@ module UDPSinkTests
127
149
  seq = sequence("connect_fail_connect_succeed")
128
150
  socket.expects(:connect).with("localhost", 8125).in_sequence(seq)
129
151
  socket.expects(:send).raises(Errno::EDESTADDRREQ).in_sequence(seq)
152
+ socket.expects(:close).in_sequence(seq)
130
153
  socket.expects(:connect).with("localhost", 8125).in_sequence(seq)
131
- socket.expects(:send).returns(1).in_sequence(seq)
154
+ socket.expects(:send).twice.returns(1).in_sequence(seq)
132
155
 
133
156
  udp_sink = build_sink("localhost", 8125)
134
157
  udp_sink << "foo:1|c"
@@ -145,7 +168,7 @@ module UDPSinkTests
145
168
  end
146
169
  end
147
170
 
148
- class BatchedUDPSinkTest < Minitest::Test
171
+ module BatchedUDPSinkTests
149
172
  include UDPSinkTests
150
173
 
151
174
  def setup
@@ -154,28 +177,24 @@ module UDPSinkTests
154
177
  @host = @receiver.addr[2]
155
178
  @port = @receiver.addr[1]
156
179
  @sink_class = StatsD::Instrument::BatchedUDPSink
180
+ @sinks = []
157
181
  end
158
182
 
159
183
  def teardown
160
184
  @receiver.close
185
+ @sinks.each(&:shutdown)
161
186
  end
162
187
 
163
- def test_parallelism_buffering
164
- udp_sink = build_sink(@host, @port)
165
- 50.times.map do |i|
166
- Thread.new do
167
- udp_sink << "foo:#{i}|c" << "bar:#{i}|c" << "baz:#{i}|c" << "plop:#{i}|c"
168
- end
169
- end
170
-
171
- datagrams = []
172
-
173
- while @receiver.wait_readable(2)
174
- datagram, _source = @receiver.recvfrom(1000)
175
- datagrams += datagram.split("\n")
176
- end
188
+ private
177
189
 
178
- assert_equal(200, datagrams.size)
190
+ def build_sink(host = @host, port = @port)
191
+ sink = @sink_class.new(host, port, buffer_capacity: 50)
192
+ @sinks << sink
193
+ sink
179
194
  end
180
195
  end
196
+
197
+ class BatchedUDPSinkTest < Minitest::Test
198
+ include BatchedUDPSinkTests
199
+ end
181
200
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsd-instrument
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jesse Storimer
@@ -10,22 +10,8 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-06-22 00:00:00.000000000 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: concurrent-ruby
17
- requirement: !ruby/object:Gem::Requirement
18
- requirements:
19
- - - ">="
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
23
- prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- version: '0'
13
+ date: 2022-08-29 00:00:00.000000000 Z
14
+ dependencies: []
29
15
  description: A StatsD client for Ruby apps. Provides metaprogramming methods to inject
30
16
  StatsD instrumentation into your code.
31
17
  email:
@@ -35,8 +21,8 @@ extensions: []
35
21
  extra_rdoc_files: []
36
22
  files:
37
23
  - ".github/CODEOWNERS"
38
- - ".github/probots.yml"
39
24
  - ".github/workflows/benchmark.yml"
25
+ - ".github/workflows/cla.yml"
40
26
  - ".github/workflows/lint.yml"
41
27
  - ".github/workflows/tests.yml"
42
28
  - ".gitignore"
@@ -49,6 +35,7 @@ files:
49
35
  - README.md
50
36
  - Rakefile
51
37
  - benchmark/README.md
38
+ - benchmark/local-udp-throughput
52
39
  - benchmark/send-metrics-to-dev-null-log
53
40
  - benchmark/send-metrics-to-local-udp-receiver
54
41
  - bin/rake
data/.github/probots.yml DELETED
@@ -1,2 +0,0 @@
1
- enabled:
2
- - cla