speedshop-cloudwatch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speedshop
4
+ module Cloudwatch
5
+ class Puma
6
+ def collect
7
+ stats = ::Puma.stats_hash
8
+
9
+ if stats[:worker_status]
10
+ %i[workers booted_workers old_workers].each do |m|
11
+ Reporter.instance.report(metric: metric_name_for(m), value: stats[m] || 0)
12
+ end
13
+ report_aggregate_worker_stats(stats)
14
+ else
15
+ # Single mode - report worker stats without dimensions
16
+ %i[running backlog pool_capacity max_threads].each do |m|
17
+ Reporter.instance.report(metric: metric_name_for(m), value: stats[m] || 0)
18
+ end
19
+ end
20
+ rescue => e
21
+ Speedshop::Cloudwatch.log_error("Failed to collect Puma metrics: #{e.message}", e)
22
+ end
23
+
24
+ private
25
+
26
+ def report_aggregate_worker_stats(stats)
27
+ statuses = stats[:worker_status].map { |w| w[:last_status] || {} }
28
+ metrics = %i[running backlog pool_capacity max_threads]
29
+
30
+ metrics.each do |m|
31
+ values = statuses.map { |s| s[m] }.compact
32
+ next if values.empty?
33
+
34
+ sample_count = values.length
35
+ sum = values.inject(0) { |acc, v| acc + v.to_f }
36
+ minimum = values.min.to_f
37
+ maximum = values.max.to_f
38
+
39
+ Reporter.instance.report(
40
+ metric: metric_name_for(m),
41
+ statistic_values: {
42
+ sample_count: sample_count,
43
+ sum: sum,
44
+ minimum: minimum,
45
+ maximum: maximum
46
+ },
47
+ integration: :puma
48
+ )
49
+ end
50
+ end
51
+
52
+ def metric_name_for(symbol)
53
+ symbol.to_s.split("_").map(&:capitalize).join.to_sym
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speedshop
4
+ module Cloudwatch
5
+ class Rack
6
+ def initialize(app)
7
+ @app = app
8
+ end
9
+
10
+ def call(env)
11
+ begin
12
+ if (header = env["HTTP_X_REQUEST_START"] || env["HTTP_X_QUEUE_START"])
13
+ queue_time = (Time.now.to_f * 1000) - header.gsub("t=", "").to_f
14
+ Reporter.instance.report(metric: :RequestQueueTime, value: queue_time)
15
+ end
16
+ rescue => e
17
+ Speedshop::Cloudwatch.log_error("Failed to collect Rack metrics: #{e.message}", e)
18
+ end
19
+ @app.call(env)
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speedshop
4
+ module Cloudwatch
5
+ class Railtie < ::Rails::Railtie
6
+ initializer "speedshop.cloudwatch.insert_middleware", before: :build_middleware_stack do |app|
7
+ unless caller.any? { |c| c.include?("console_command.rb") || c.include?("runner_command.rb") } || self.class.in_rake_task?
8
+ app.config.middleware.insert_before 0, Speedshop::Cloudwatch::Rack
9
+ end
10
+ end
11
+
12
+ def self.in_rake_task?
13
+ return false unless defined?(::Rake) && ::Rake.respond_to?(:application)
14
+ tasks = ::Rake.application&.top_level_tasks
15
+ tasks&.any? || false
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,315 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "singleton"
4
+ require_relative "metrics"
5
+
6
+ module Speedshop
7
+ module Cloudwatch
8
+ class Reporter
9
+ include Singleton
10
+
11
+ def initialize
12
+ @mutex = Mutex.new
13
+ @condition_variable = ConditionVariable.new
14
+ @queue = []
15
+ @collectors = []
16
+ @thread = nil
17
+ @pid = Process.pid
18
+ @running = false
19
+ @dropped_since_last_flush = 0
20
+ @last_overflow_log = nil
21
+ end
22
+
23
+ def start!
24
+ return if !config.environment_enabled? || started?
25
+
26
+ @mutex.synchronize do
27
+ return if started?
28
+
29
+ initialize_collectors
30
+ if forked?
31
+ @collectors.clear
32
+ @queue.clear
33
+ end
34
+
35
+ Speedshop::Cloudwatch.log_info("Starting metric reporter (collectors: #{@collectors.map(&:class).join(", ")})")
36
+ @running = true
37
+ @thread = Thread.new do
38
+ Thread.current.thread_variable_set(:fork_safe, true)
39
+ Thread.current.name = "scw_reporter"
40
+ run_loop
41
+ end
42
+ end
43
+ end
44
+
45
+ def started?
46
+ @running && @thread&.alive?
47
+ end
48
+
49
+ def stop!
50
+ thread_to_join = nil
51
+ @mutex.synchronize do
52
+ return unless @running
53
+ Speedshop::Cloudwatch.log_info("Stopping metric reporter")
54
+ @running = false
55
+ @condition_variable.signal
56
+ thread_to_join = @thread
57
+ @thread = @pid = nil
58
+ @collectors.clear
59
+ end
60
+
61
+ return unless thread_to_join
62
+
63
+ result = thread_to_join.join(2)
64
+ if result.nil?
65
+ Speedshop::Cloudwatch.log_info("Reporter thread did not finish within 2s timeout")
66
+ else
67
+ Speedshop::Cloudwatch.log_info("Reporter thread stopped gracefully")
68
+ end
69
+ end
70
+
71
+ def report(metric:, value: nil, statistic_values: nil, dimensions: {}, integration: nil)
72
+ return unless config.environment_enabled?
73
+
74
+ metric_name = metric.to_sym
75
+ int = integration || find_integration_for_metric(metric_name)
76
+ return unless int
77
+ return unless metric_allowed?(int, metric_name)
78
+
79
+ metric_object = METRICS[int]&.find { |m| m.name == metric_name }
80
+ ns = config.namespaces[int]
81
+ unit = metric_object&.unit || "None"
82
+
83
+ dimensions_array = dimensions.map { |k, v| {name: k.to_s, value: v.to_s} }
84
+ all_dimensions = dimensions_array + custom_dimensions
85
+
86
+ datum = {metric_name: metric_name.to_s, namespace: ns, unit: unit,
87
+ dimensions: all_dimensions, timestamp: Time.now}
88
+ if statistic_values
89
+ datum[:statistic_values] = statistic_values
90
+ else
91
+ datum[:value] = value
92
+ end
93
+
94
+ @mutex.synchronize do
95
+ if @queue.size >= config.queue_max_size
96
+ @queue.shift
97
+ @dropped_since_last_flush += 1
98
+ end
99
+ @queue << datum
100
+ end
101
+
102
+ start! unless started?
103
+ end
104
+
105
+ def clear_all
106
+ @mutex.synchronize do
107
+ @queue.clear
108
+ @collectors.clear
109
+ end
110
+ end
111
+
112
+ # Force immediate metrics collection and flush (for testing)
113
+ # This bypasses the normal interval-based flushing
114
+ def flush_now!
115
+ return unless @running
116
+
117
+ collect_metrics
118
+ flush_metrics
119
+ end
120
+
121
+ def self.reset
122
+ if instance_variable_defined?(:@singleton__instance__)
123
+ reporter = instance_variable_get(:@singleton__instance__)
124
+ reporter&.stop! if reporter&.started?
125
+ reporter&.clear_all
126
+ end
127
+ instance_variable_set(:@singleton__instance__, nil)
128
+ end
129
+
130
+ private
131
+
132
+ def config
133
+ Config.instance
134
+ end
135
+
136
+ def forked?
137
+ @pid != Process.pid
138
+ end
139
+
140
+ def initialize_collectors
141
+ config.collectors.each do |integration|
142
+ @collectors << Speedshop::Cloudwatch::Puma.new if integration == :puma
143
+ @collectors << Speedshop::Cloudwatch::Sidekiq.new if integration == :sidekiq
144
+ rescue => e
145
+ Speedshop::Cloudwatch.log_error("Failed to initialize collector for #{integration}: #{e.message}", e)
146
+ end
147
+ end
148
+
149
+ def run_loop
150
+ while @running
151
+ @mutex.synchronize do
152
+ @condition_variable.wait(@mutex, config.interval) if @running
153
+ end
154
+ break unless @running
155
+ collect_metrics
156
+ flush_metrics
157
+ end
158
+
159
+ flush_metrics
160
+ rescue => e
161
+ Speedshop::Cloudwatch.log_error("Reporter error: #{e.message}", e)
162
+ end
163
+
164
+ def collect_metrics
165
+ @collectors.each do |collector|
166
+ collector.collect
167
+ rescue => e
168
+ Speedshop::Cloudwatch.log_error("Collector error: #{e.message}", e)
169
+ end
170
+ end
171
+
172
+ def flush_metrics
173
+ metrics = drain_queue
174
+ log_overflow_if_needed
175
+ return unless metrics
176
+
177
+ high_resolution = config.interval.to_i < 60
178
+ metrics.group_by { |m| m[:namespace] }.each do |namespace, ns_metrics|
179
+ process_namespace(namespace, ns_metrics, high_resolution)
180
+ end
181
+ rescue => e
182
+ Speedshop::Cloudwatch.log_error("Failed to send metrics: #{e.message}", e)
183
+ end
184
+
185
+ def drain_queue
186
+ buf = nil
187
+ @mutex.synchronize do
188
+ return nil if @queue.empty?
189
+ buf = @queue
190
+ @queue = []
191
+ end
192
+ buf
193
+ end
194
+
195
+ def process_namespace(namespace, ns_metrics, high_resolution)
196
+ config.logger.debug "Speedshop::Cloudwatch: Sending #{ns_metrics.size} metrics to namespace #{namespace}"
197
+ aggregated = aggregate_namespace_metrics(ns_metrics)
198
+ metric_data = build_metric_data(aggregated, high_resolution)
199
+ send_batches(namespace, metric_data)
200
+ end
201
+
202
+ def build_metric_data(aggregated, high_resolution)
203
+ aggregated.map do |m|
204
+ datum = {
205
+ metric_name: m[:metric_name],
206
+ unit: m[:unit],
207
+ timestamp: m[:timestamp],
208
+ dimensions: m[:dimensions]
209
+ }
210
+ if m[:statistic_values]
211
+ datum[:statistic_values] = m[:statistic_values]
212
+ else
213
+ datum[:value] = m[:value]
214
+ end
215
+ datum[:storage_resolution] = 1 if high_resolution
216
+ datum
217
+ end
218
+ end
219
+
220
+ def send_batches(namespace, metric_data)
221
+ metric_data.each_slice(20) do |batch|
222
+ config.client.put_metric_data(namespace: namespace, metric_data: batch)
223
+ end
224
+ end
225
+
226
+ def aggregate_namespace_metrics(ns_metrics)
227
+ group_metrics(ns_metrics).map { |items| aggregate_group(items) }
228
+ end
229
+
230
+ def group_metrics(ns_metrics)
231
+ groups = {}
232
+ ns_metrics.each do |m|
233
+ key = [m[:metric_name], m[:unit], normalized_dimensions_key(m[:dimensions])]
234
+ (groups[key] ||= []) << m
235
+ end
236
+ groups.values
237
+ end
238
+
239
+ def aggregate_group(items)
240
+ return items.first if items.size == 1
241
+
242
+ sample_count, sum, minimum, maximum = aggregate_values(items)
243
+ {
244
+ metric_name: items.first[:metric_name],
245
+ unit: items.first[:unit],
246
+ dimensions: items.first[:dimensions],
247
+ timestamp: Time.now,
248
+ statistic_values: build_statistic_values(sample_count, sum, minimum, maximum)
249
+ }
250
+ end
251
+
252
+ def aggregate_values(items)
253
+ sample_count = 0.0
254
+ sum = 0.0
255
+ minimum = Float::INFINITY
256
+ maximum = -Float::INFINITY
257
+
258
+ items.each do |item|
259
+ if item[:statistic_values]
260
+ sv = item[:statistic_values]
261
+ sc = sv[:sample_count].to_f
262
+ sample_count += sc
263
+ sum += sv[:sum].to_f
264
+ minimum = [minimum, sv[:minimum].to_f].min
265
+ maximum = [maximum, sv[:maximum].to_f].max
266
+ elsif item.key?(:value)
267
+ v = item[:value].to_f
268
+ sample_count += 1.0
269
+ sum += v
270
+ minimum = [minimum, v].min
271
+ maximum = [maximum, v].max
272
+ end
273
+ end
274
+
275
+ [sample_count, sum, minimum, maximum]
276
+ end
277
+
278
+ def build_statistic_values(sample_count, sum, minimum, maximum)
279
+ {
280
+ sample_count: sample_count,
281
+ sum: sum,
282
+ minimum: minimum.finite? ? minimum : 0.0,
283
+ maximum: maximum.finite? ? maximum : 0.0
284
+ }
285
+ end
286
+
287
+ def normalized_dimensions_key(dims)
288
+ (dims || []).sort_by { |d| d[:name].to_s }.map { |d| "#{d[:name]}=#{d[:value]}" }.join("|")
289
+ end
290
+
291
+ def metric_allowed?(integration, metric_name)
292
+ config.metrics[integration].include?(metric_name.to_sym)
293
+ end
294
+
295
+ def custom_dimensions
296
+ config.dimensions.map { |name, value| {name: name.to_s, value: value.to_s} }
297
+ end
298
+
299
+ def find_integration_for_metric(metric_name)
300
+ METRICS.find { |int, metrics| metrics.any? { |m| m.name == metric_name } }&.first
301
+ end
302
+
303
+ def log_overflow_if_needed
304
+ dropped = nil
305
+ @mutex.synchronize do
306
+ dropped = @dropped_since_last_flush
307
+ @dropped_since_last_flush = 0
308
+ end
309
+ return unless dropped > 0
310
+
311
+ Speedshop::Cloudwatch.log_error("Queue overflow: dropped #{dropped} oldest metric(s) (max queue size: #{config.queue_max_size})")
312
+ end
313
+ end
314
+ end
315
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Portions of this code adapted from sidekiq-cloudwatchmetrics
4
+ # Copyright (c) 2018 Samuel Cochran
5
+ # https://github.com/sj26/sidekiq-cloudwatchmetrics
6
+ #
7
+ # The MIT License (MIT)
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ # of this software and associated documentation files (the "Software"), to deal
11
+ # in the Software without restriction, including without limitation the rights
12
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ # copies of the Software, and to permit persons to whom the Software is
14
+ # furnished to do so, subject to the following conditions:
15
+ #
16
+ # The above copyright notice and this permission notice shall be included in
17
+ # all copies or substantial portions of the Software.
18
+ #
19
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
+ # THE SOFTWARE.
26
+
27
+ require "sidekiq/api" if defined?(::Sidekiq)
28
+
29
+ module Speedshop
30
+ module Cloudwatch
31
+ class Sidekiq
32
+ def collect
33
+ stats = ::Sidekiq::Stats.new
34
+ processes = ::Sidekiq::ProcessSet.new.to_a
35
+
36
+ report_stats(stats)
37
+ report_utilization(processes)
38
+ report_queue_metrics
39
+ rescue => e
40
+ Speedshop::Cloudwatch.log_error("Failed to collect Sidekiq metrics: #{e.message}", e)
41
+ end
42
+
43
+ class << self
44
+ def setup_lifecycle_hooks
45
+ ::Sidekiq.configure_server do |sidekiq_config|
46
+ if defined?(Sidekiq::Enterprise)
47
+ sidekiq_config.on(:leader) do
48
+ Speedshop::Cloudwatch.configure { |c| c.collectors << :sidekiq }
49
+ Speedshop::Cloudwatch.start!
50
+ end
51
+ else
52
+ sidekiq_config.on(:startup) do
53
+ Speedshop::Cloudwatch.configure { |c| c.collectors << :sidekiq }
54
+ Speedshop::Cloudwatch.start!
55
+ end
56
+ end
57
+
58
+ sidekiq_config.on(:quiet) do
59
+ Speedshop::Cloudwatch.stop!
60
+ end
61
+
62
+ sidekiq_config.on(:shutdown) do
63
+ Speedshop::Cloudwatch.stop!
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def reporter
72
+ Speedshop::Cloudwatch.reporter
73
+ end
74
+
75
+ def report_stats(stats)
76
+ {
77
+ EnqueuedJobs: stats.enqueued, ProcessedJobs: stats.processed, FailedJobs: stats.failed,
78
+ ScheduledJobs: stats.scheduled_size, RetryJobs: stats.retry_size, DeadJobs: stats.dead_size,
79
+ Workers: stats.workers_size, Processes: stats.processes_size,
80
+ DefaultQueueLatency: stats.default_queue_latency
81
+ }.each { |m, v| reporter.report(metric: m, value: v, integration: :sidekiq) }
82
+ end
83
+
84
+ def report_utilization(processes)
85
+ capacity = processes.sum { |p| p["concurrency"] }
86
+ reporter.report(metric: :Capacity, value: capacity)
87
+
88
+ utilization = avg_utilization(processes) * 100.0
89
+ reporter.report(metric: :Utilization, value: utilization) unless utilization.nan?
90
+ end
91
+
92
+ def report_queue_metrics
93
+ queues_to_monitor.each do |q|
94
+ reporter.report(metric: :QueueLatency, value: q.latency, dimensions: {QueueName: q.name})
95
+ reporter.report(metric: :QueueSize, value: q.size, dimensions: {QueueName: q.name})
96
+ end
97
+ end
98
+
99
+ def queues_to_monitor
100
+ all_queues = ::Sidekiq::Queue.all
101
+ configured = Speedshop::Cloudwatch.config.sidekiq_queues
102
+
103
+ if configured.nil? || configured.empty?
104
+ all_queues
105
+ else
106
+ all_queues.select { |q| configured.include?(q.name) }
107
+ end
108
+ end
109
+
110
+ def avg_utilization(processes)
111
+ utils = processes.map { |p| p["busy"] / p["concurrency"].to_f }.reject(&:nan?)
112
+ utils.sum / utils.size.to_f
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ Speedshop::Cloudwatch::Sidekiq.setup_lifecycle_hooks
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speedshop
4
+ module Cloudwatch
5
+ VERSION = "0.1.0"
6
+ end
7
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "aws-sdk-cloudwatch"
4
+ require "speedshop/cloudwatch/config"
5
+ require "speedshop/cloudwatch/reporter"
6
+ require "speedshop/cloudwatch/version"
7
+
8
+ module Speedshop
9
+ module Cloudwatch
10
+ class Error < StandardError; end
11
+
12
+ class << self
13
+ def configure
14
+ yield Config.instance if block_given?
15
+ Config.instance
16
+ end
17
+
18
+ def config
19
+ Config.instance
20
+ end
21
+
22
+ def reporter
23
+ Reporter.instance
24
+ end
25
+
26
+ def start!
27
+ reporter.start!
28
+ end
29
+
30
+ def stop!
31
+ reporter.stop!
32
+ end
33
+
34
+ def log_info(msg)
35
+ Config.instance.logger.info "Speedshop::Cloudwatch: #{msg}"
36
+ end
37
+
38
+ def log_error(msg, exception = nil)
39
+ Config.instance.logger.error "Speedshop::Cloudwatch: #{msg}"
40
+ Config.instance.logger.debug exception.backtrace.join("\n") if exception&.backtrace
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ at_exit do
47
+ Speedshop::Cloudwatch.stop!
48
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "speedshop/cloudwatch/all"
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/speedshop/cloudwatch/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "speedshop-cloudwatch"
7
+ spec.version = Speedshop::Cloudwatch::VERSION
8
+ spec.authors = ["Nate Berkopec"]
9
+ spec.email = ["nate.berkopec@speedshop.co"]
10
+
11
+ spec.summary = "Ruby application integration with AWS CloudWatch for Puma, Rack, Sidekiq, and ActiveJob"
12
+ spec.description = "This gem helps integrate your Ruby application with AWS CloudWatch, reporting metrics from Puma, Rack, Sidekiq, and ActiveJob in background threads to avoid adding latency to requests and jobs."
13
+ spec.homepage = "https://github.com/nateberkopec/speedshop-cloudwatch"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 2.7.0"
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = "https://github.com/nateberkopec/speedshop-cloudwatch"
20
+
21
+ spec.files = IO.popen(%w[git ls-files -z], chdir: __dir__, err: IO::NULL) do |ls|
22
+ ls.readlines("\x0", chomp: true).select do |path|
23
+ path.start_with?("lib/", "bin/", "docs/") ||
24
+ %w[README.md LICENSE.txt speedshop-cloudwatch.gemspec Rakefile].include?(path)
25
+ end
26
+ end
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_dependency "aws-sdk-cloudwatch", ">= 1.81.0"
30
+ end