speedshop-cloudwatch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +278 -0
- data/Rakefile +40 -0
- data/bin/console +11 -0
- data/bin/setup +15 -0
- data/lib/speedshop/cloudwatch/active_job.rb +24 -0
- data/lib/speedshop/cloudwatch/all.rb +8 -0
- data/lib/speedshop/cloudwatch/config.rb +61 -0
- data/lib/speedshop/cloudwatch/metrics.rb +181 -0
- data/lib/speedshop/cloudwatch/puma.rb +57 -0
- data/lib/speedshop/cloudwatch/rack.rb +23 -0
- data/lib/speedshop/cloudwatch/railtie.rb +19 -0
- data/lib/speedshop/cloudwatch/reporter.rb +315 -0
- data/lib/speedshop/cloudwatch/sidekiq.rb +118 -0
- data/lib/speedshop/cloudwatch/version.rb +7 -0
- data/lib/speedshop/cloudwatch.rb +48 -0
- data/lib/speedshop-cloudwatch.rb +3 -0
- data/speedshop-cloudwatch.gemspec +30 -0
- metadata +81 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Speedshop
|
|
4
|
+
module Cloudwatch
|
|
5
|
+
class Puma
|
|
6
|
+
def collect
|
|
7
|
+
stats = ::Puma.stats_hash
|
|
8
|
+
|
|
9
|
+
if stats[:worker_status]
|
|
10
|
+
%i[workers booted_workers old_workers].each do |m|
|
|
11
|
+
Reporter.instance.report(metric: metric_name_for(m), value: stats[m] || 0)
|
|
12
|
+
end
|
|
13
|
+
report_aggregate_worker_stats(stats)
|
|
14
|
+
else
|
|
15
|
+
# Single mode - report worker stats without dimensions
|
|
16
|
+
%i[running backlog pool_capacity max_threads].each do |m|
|
|
17
|
+
Reporter.instance.report(metric: metric_name_for(m), value: stats[m] || 0)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
rescue => e
|
|
21
|
+
Speedshop::Cloudwatch.log_error("Failed to collect Puma metrics: #{e.message}", e)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def report_aggregate_worker_stats(stats)
|
|
27
|
+
statuses = stats[:worker_status].map { |w| w[:last_status] || {} }
|
|
28
|
+
metrics = %i[running backlog pool_capacity max_threads]
|
|
29
|
+
|
|
30
|
+
metrics.each do |m|
|
|
31
|
+
values = statuses.map { |s| s[m] }.compact
|
|
32
|
+
next if values.empty?
|
|
33
|
+
|
|
34
|
+
sample_count = values.length
|
|
35
|
+
sum = values.inject(0) { |acc, v| acc + v.to_f }
|
|
36
|
+
minimum = values.min.to_f
|
|
37
|
+
maximum = values.max.to_f
|
|
38
|
+
|
|
39
|
+
Reporter.instance.report(
|
|
40
|
+
metric: metric_name_for(m),
|
|
41
|
+
statistic_values: {
|
|
42
|
+
sample_count: sample_count,
|
|
43
|
+
sum: sum,
|
|
44
|
+
minimum: minimum,
|
|
45
|
+
maximum: maximum
|
|
46
|
+
},
|
|
47
|
+
integration: :puma
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def metric_name_for(symbol)
|
|
53
|
+
symbol.to_s.split("_").map(&:capitalize).join.to_sym
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Speedshop
|
|
4
|
+
module Cloudwatch
|
|
5
|
+
class Rack
|
|
6
|
+
def initialize(app)
|
|
7
|
+
@app = app
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def call(env)
|
|
11
|
+
begin
|
|
12
|
+
if (header = env["HTTP_X_REQUEST_START"] || env["HTTP_X_QUEUE_START"])
|
|
13
|
+
queue_time = (Time.now.to_f * 1000) - header.gsub("t=", "").to_f
|
|
14
|
+
Reporter.instance.report(metric: :RequestQueueTime, value: queue_time)
|
|
15
|
+
end
|
|
16
|
+
rescue => e
|
|
17
|
+
Speedshop::Cloudwatch.log_error("Failed to collect Rack metrics: #{e.message}", e)
|
|
18
|
+
end
|
|
19
|
+
@app.call(env)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Speedshop
|
|
4
|
+
module Cloudwatch
|
|
5
|
+
class Railtie < ::Rails::Railtie
|
|
6
|
+
initializer "speedshop.cloudwatch.insert_middleware", before: :build_middleware_stack do |app|
|
|
7
|
+
unless caller.any? { |c| c.include?("console_command.rb") || c.include?("runner_command.rb") } || self.class.in_rake_task?
|
|
8
|
+
app.config.middleware.insert_before 0, Speedshop::Cloudwatch::Rack
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.in_rake_task?
|
|
13
|
+
return false unless defined?(::Rake) && ::Rake.respond_to?(:application)
|
|
14
|
+
tasks = ::Rake.application&.top_level_tasks
|
|
15
|
+
tasks&.any? || false
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "singleton"
|
|
4
|
+
require_relative "metrics"
|
|
5
|
+
|
|
6
|
+
module Speedshop
|
|
7
|
+
module Cloudwatch
|
|
8
|
+
class Reporter
|
|
9
|
+
include Singleton
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@mutex = Mutex.new
|
|
13
|
+
@condition_variable = ConditionVariable.new
|
|
14
|
+
@queue = []
|
|
15
|
+
@collectors = []
|
|
16
|
+
@thread = nil
|
|
17
|
+
@pid = Process.pid
|
|
18
|
+
@running = false
|
|
19
|
+
@dropped_since_last_flush = 0
|
|
20
|
+
@last_overflow_log = nil
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def start!
|
|
24
|
+
return if !config.environment_enabled? || started?
|
|
25
|
+
|
|
26
|
+
@mutex.synchronize do
|
|
27
|
+
return if started?
|
|
28
|
+
|
|
29
|
+
initialize_collectors
|
|
30
|
+
if forked?
|
|
31
|
+
@collectors.clear
|
|
32
|
+
@queue.clear
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
Speedshop::Cloudwatch.log_info("Starting metric reporter (collectors: #{@collectors.map(&:class).join(", ")})")
|
|
36
|
+
@running = true
|
|
37
|
+
@thread = Thread.new do
|
|
38
|
+
Thread.current.thread_variable_set(:fork_safe, true)
|
|
39
|
+
Thread.current.name = "scw_reporter"
|
|
40
|
+
run_loop
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def started?
|
|
46
|
+
@running && @thread&.alive?
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def stop!
|
|
50
|
+
thread_to_join = nil
|
|
51
|
+
@mutex.synchronize do
|
|
52
|
+
return unless @running
|
|
53
|
+
Speedshop::Cloudwatch.log_info("Stopping metric reporter")
|
|
54
|
+
@running = false
|
|
55
|
+
@condition_variable.signal
|
|
56
|
+
thread_to_join = @thread
|
|
57
|
+
@thread = @pid = nil
|
|
58
|
+
@collectors.clear
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
return unless thread_to_join
|
|
62
|
+
|
|
63
|
+
result = thread_to_join.join(2)
|
|
64
|
+
if result.nil?
|
|
65
|
+
Speedshop::Cloudwatch.log_info("Reporter thread did not finish within 2s timeout")
|
|
66
|
+
else
|
|
67
|
+
Speedshop::Cloudwatch.log_info("Reporter thread stopped gracefully")
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def report(metric:, value: nil, statistic_values: nil, dimensions: {}, integration: nil)
|
|
72
|
+
return unless config.environment_enabled?
|
|
73
|
+
|
|
74
|
+
metric_name = metric.to_sym
|
|
75
|
+
int = integration || find_integration_for_metric(metric_name)
|
|
76
|
+
return unless int
|
|
77
|
+
return unless metric_allowed?(int, metric_name)
|
|
78
|
+
|
|
79
|
+
metric_object = METRICS[int]&.find { |m| m.name == metric_name }
|
|
80
|
+
ns = config.namespaces[int]
|
|
81
|
+
unit = metric_object&.unit || "None"
|
|
82
|
+
|
|
83
|
+
dimensions_array = dimensions.map { |k, v| {name: k.to_s, value: v.to_s} }
|
|
84
|
+
all_dimensions = dimensions_array + custom_dimensions
|
|
85
|
+
|
|
86
|
+
datum = {metric_name: metric_name.to_s, namespace: ns, unit: unit,
|
|
87
|
+
dimensions: all_dimensions, timestamp: Time.now}
|
|
88
|
+
if statistic_values
|
|
89
|
+
datum[:statistic_values] = statistic_values
|
|
90
|
+
else
|
|
91
|
+
datum[:value] = value
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
@mutex.synchronize do
|
|
95
|
+
if @queue.size >= config.queue_max_size
|
|
96
|
+
@queue.shift
|
|
97
|
+
@dropped_since_last_flush += 1
|
|
98
|
+
end
|
|
99
|
+
@queue << datum
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
start! unless started?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def clear_all
|
|
106
|
+
@mutex.synchronize do
|
|
107
|
+
@queue.clear
|
|
108
|
+
@collectors.clear
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Force immediate metrics collection and flush (for testing)
|
|
113
|
+
# This bypasses the normal interval-based flushing
|
|
114
|
+
def flush_now!
|
|
115
|
+
return unless @running
|
|
116
|
+
|
|
117
|
+
collect_metrics
|
|
118
|
+
flush_metrics
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def self.reset
|
|
122
|
+
if instance_variable_defined?(:@singleton__instance__)
|
|
123
|
+
reporter = instance_variable_get(:@singleton__instance__)
|
|
124
|
+
reporter&.stop! if reporter&.started?
|
|
125
|
+
reporter&.clear_all
|
|
126
|
+
end
|
|
127
|
+
instance_variable_set(:@singleton__instance__, nil)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
def config
|
|
133
|
+
Config.instance
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def forked?
|
|
137
|
+
@pid != Process.pid
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def initialize_collectors
|
|
141
|
+
config.collectors.each do |integration|
|
|
142
|
+
@collectors << Speedshop::Cloudwatch::Puma.new if integration == :puma
|
|
143
|
+
@collectors << Speedshop::Cloudwatch::Sidekiq.new if integration == :sidekiq
|
|
144
|
+
rescue => e
|
|
145
|
+
Speedshop::Cloudwatch.log_error("Failed to initialize collector for #{integration}: #{e.message}", e)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def run_loop
|
|
150
|
+
while @running
|
|
151
|
+
@mutex.synchronize do
|
|
152
|
+
@condition_variable.wait(@mutex, config.interval) if @running
|
|
153
|
+
end
|
|
154
|
+
break unless @running
|
|
155
|
+
collect_metrics
|
|
156
|
+
flush_metrics
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
flush_metrics
|
|
160
|
+
rescue => e
|
|
161
|
+
Speedshop::Cloudwatch.log_error("Reporter error: #{e.message}", e)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def collect_metrics
|
|
165
|
+
@collectors.each do |collector|
|
|
166
|
+
collector.collect
|
|
167
|
+
rescue => e
|
|
168
|
+
Speedshop::Cloudwatch.log_error("Collector error: #{e.message}", e)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def flush_metrics
|
|
173
|
+
metrics = drain_queue
|
|
174
|
+
log_overflow_if_needed
|
|
175
|
+
return unless metrics
|
|
176
|
+
|
|
177
|
+
high_resolution = config.interval.to_i < 60
|
|
178
|
+
metrics.group_by { |m| m[:namespace] }.each do |namespace, ns_metrics|
|
|
179
|
+
process_namespace(namespace, ns_metrics, high_resolution)
|
|
180
|
+
end
|
|
181
|
+
rescue => e
|
|
182
|
+
Speedshop::Cloudwatch.log_error("Failed to send metrics: #{e.message}", e)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def drain_queue
|
|
186
|
+
buf = nil
|
|
187
|
+
@mutex.synchronize do
|
|
188
|
+
return nil if @queue.empty?
|
|
189
|
+
buf = @queue
|
|
190
|
+
@queue = []
|
|
191
|
+
end
|
|
192
|
+
buf
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def process_namespace(namespace, ns_metrics, high_resolution)
|
|
196
|
+
config.logger.debug "Speedshop::Cloudwatch: Sending #{ns_metrics.size} metrics to namespace #{namespace}"
|
|
197
|
+
aggregated = aggregate_namespace_metrics(ns_metrics)
|
|
198
|
+
metric_data = build_metric_data(aggregated, high_resolution)
|
|
199
|
+
send_batches(namespace, metric_data)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def build_metric_data(aggregated, high_resolution)
|
|
203
|
+
aggregated.map do |m|
|
|
204
|
+
datum = {
|
|
205
|
+
metric_name: m[:metric_name],
|
|
206
|
+
unit: m[:unit],
|
|
207
|
+
timestamp: m[:timestamp],
|
|
208
|
+
dimensions: m[:dimensions]
|
|
209
|
+
}
|
|
210
|
+
if m[:statistic_values]
|
|
211
|
+
datum[:statistic_values] = m[:statistic_values]
|
|
212
|
+
else
|
|
213
|
+
datum[:value] = m[:value]
|
|
214
|
+
end
|
|
215
|
+
datum[:storage_resolution] = 1 if high_resolution
|
|
216
|
+
datum
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def send_batches(namespace, metric_data)
|
|
221
|
+
metric_data.each_slice(20) do |batch|
|
|
222
|
+
config.client.put_metric_data(namespace: namespace, metric_data: batch)
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def aggregate_namespace_metrics(ns_metrics)
|
|
227
|
+
group_metrics(ns_metrics).map { |items| aggregate_group(items) }
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def group_metrics(ns_metrics)
|
|
231
|
+
groups = {}
|
|
232
|
+
ns_metrics.each do |m|
|
|
233
|
+
key = [m[:metric_name], m[:unit], normalized_dimensions_key(m[:dimensions])]
|
|
234
|
+
(groups[key] ||= []) << m
|
|
235
|
+
end
|
|
236
|
+
groups.values
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def aggregate_group(items)
|
|
240
|
+
return items.first if items.size == 1
|
|
241
|
+
|
|
242
|
+
sample_count, sum, minimum, maximum = aggregate_values(items)
|
|
243
|
+
{
|
|
244
|
+
metric_name: items.first[:metric_name],
|
|
245
|
+
unit: items.first[:unit],
|
|
246
|
+
dimensions: items.first[:dimensions],
|
|
247
|
+
timestamp: Time.now,
|
|
248
|
+
statistic_values: build_statistic_values(sample_count, sum, minimum, maximum)
|
|
249
|
+
}
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def aggregate_values(items)
|
|
253
|
+
sample_count = 0.0
|
|
254
|
+
sum = 0.0
|
|
255
|
+
minimum = Float::INFINITY
|
|
256
|
+
maximum = -Float::INFINITY
|
|
257
|
+
|
|
258
|
+
items.each do |item|
|
|
259
|
+
if item[:statistic_values]
|
|
260
|
+
sv = item[:statistic_values]
|
|
261
|
+
sc = sv[:sample_count].to_f
|
|
262
|
+
sample_count += sc
|
|
263
|
+
sum += sv[:sum].to_f
|
|
264
|
+
minimum = [minimum, sv[:minimum].to_f].min
|
|
265
|
+
maximum = [maximum, sv[:maximum].to_f].max
|
|
266
|
+
elsif item.key?(:value)
|
|
267
|
+
v = item[:value].to_f
|
|
268
|
+
sample_count += 1.0
|
|
269
|
+
sum += v
|
|
270
|
+
minimum = [minimum, v].min
|
|
271
|
+
maximum = [maximum, v].max
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
[sample_count, sum, minimum, maximum]
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def build_statistic_values(sample_count, sum, minimum, maximum)
|
|
279
|
+
{
|
|
280
|
+
sample_count: sample_count,
|
|
281
|
+
sum: sum,
|
|
282
|
+
minimum: minimum.finite? ? minimum : 0.0,
|
|
283
|
+
maximum: maximum.finite? ? maximum : 0.0
|
|
284
|
+
}
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def normalized_dimensions_key(dims)
|
|
288
|
+
(dims || []).sort_by { |d| d[:name].to_s }.map { |d| "#{d[:name]}=#{d[:value]}" }.join("|")
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def metric_allowed?(integration, metric_name)
|
|
292
|
+
config.metrics[integration].include?(metric_name.to_sym)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def custom_dimensions
|
|
296
|
+
config.dimensions.map { |name, value| {name: name.to_s, value: value.to_s} }
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def find_integration_for_metric(metric_name)
|
|
300
|
+
METRICS.find { |int, metrics| metrics.any? { |m| m.name == metric_name } }&.first
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def log_overflow_if_needed
|
|
304
|
+
dropped = nil
|
|
305
|
+
@mutex.synchronize do
|
|
306
|
+
dropped = @dropped_since_last_flush
|
|
307
|
+
@dropped_since_last_flush = 0
|
|
308
|
+
end
|
|
309
|
+
return unless dropped > 0
|
|
310
|
+
|
|
311
|
+
Speedshop::Cloudwatch.log_error("Queue overflow: dropped #{dropped} oldest metric(s) (max queue size: #{config.queue_max_size})")
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Portions of this code adapted from sidekiq-cloudwatchmetrics
|
|
4
|
+
# Copyright (c) 2018 Samuel Cochran
|
|
5
|
+
# https://github.com/sj26/sidekiq-cloudwatchmetrics
|
|
6
|
+
#
|
|
7
|
+
# The MIT License (MIT)
|
|
8
|
+
#
|
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
# in the Software without restriction, including without limitation the rights
|
|
12
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
# furnished to do so, subject to the following conditions:
|
|
15
|
+
#
|
|
16
|
+
# The above copyright notice and this permission notice shall be included in
|
|
17
|
+
# all copies or substantial portions of the Software.
|
|
18
|
+
#
|
|
19
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
25
|
+
# THE SOFTWARE.
|
|
26
|
+
|
|
27
|
+
require "sidekiq/api" if defined?(::Sidekiq)
|
|
28
|
+
|
|
29
|
+
module Speedshop
|
|
30
|
+
module Cloudwatch
|
|
31
|
+
class Sidekiq
|
|
32
|
+
def collect
|
|
33
|
+
stats = ::Sidekiq::Stats.new
|
|
34
|
+
processes = ::Sidekiq::ProcessSet.new.to_a
|
|
35
|
+
|
|
36
|
+
report_stats(stats)
|
|
37
|
+
report_utilization(processes)
|
|
38
|
+
report_queue_metrics
|
|
39
|
+
rescue => e
|
|
40
|
+
Speedshop::Cloudwatch.log_error("Failed to collect Sidekiq metrics: #{e.message}", e)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
class << self
|
|
44
|
+
def setup_lifecycle_hooks
|
|
45
|
+
::Sidekiq.configure_server do |sidekiq_config|
|
|
46
|
+
if defined?(Sidekiq::Enterprise)
|
|
47
|
+
sidekiq_config.on(:leader) do
|
|
48
|
+
Speedshop::Cloudwatch.configure { |c| c.collectors << :sidekiq }
|
|
49
|
+
Speedshop::Cloudwatch.start!
|
|
50
|
+
end
|
|
51
|
+
else
|
|
52
|
+
sidekiq_config.on(:startup) do
|
|
53
|
+
Speedshop::Cloudwatch.configure { |c| c.collectors << :sidekiq }
|
|
54
|
+
Speedshop::Cloudwatch.start!
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
sidekiq_config.on(:quiet) do
|
|
59
|
+
Speedshop::Cloudwatch.stop!
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
sidekiq_config.on(:shutdown) do
|
|
63
|
+
Speedshop::Cloudwatch.stop!
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def reporter
|
|
72
|
+
Speedshop::Cloudwatch.reporter
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def report_stats(stats)
|
|
76
|
+
{
|
|
77
|
+
EnqueuedJobs: stats.enqueued, ProcessedJobs: stats.processed, FailedJobs: stats.failed,
|
|
78
|
+
ScheduledJobs: stats.scheduled_size, RetryJobs: stats.retry_size, DeadJobs: stats.dead_size,
|
|
79
|
+
Workers: stats.workers_size, Processes: stats.processes_size,
|
|
80
|
+
DefaultQueueLatency: stats.default_queue_latency
|
|
81
|
+
}.each { |m, v| reporter.report(metric: m, value: v, integration: :sidekiq) }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def report_utilization(processes)
|
|
85
|
+
capacity = processes.sum { |p| p["concurrency"] }
|
|
86
|
+
reporter.report(metric: :Capacity, value: capacity)
|
|
87
|
+
|
|
88
|
+
utilization = avg_utilization(processes) * 100.0
|
|
89
|
+
reporter.report(metric: :Utilization, value: utilization) unless utilization.nan?
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def report_queue_metrics
|
|
93
|
+
queues_to_monitor.each do |q|
|
|
94
|
+
reporter.report(metric: :QueueLatency, value: q.latency, dimensions: {QueueName: q.name})
|
|
95
|
+
reporter.report(metric: :QueueSize, value: q.size, dimensions: {QueueName: q.name})
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def queues_to_monitor
|
|
100
|
+
all_queues = ::Sidekiq::Queue.all
|
|
101
|
+
configured = Speedshop::Cloudwatch.config.sidekiq_queues
|
|
102
|
+
|
|
103
|
+
if configured.nil? || configured.empty?
|
|
104
|
+
all_queues
|
|
105
|
+
else
|
|
106
|
+
all_queues.select { |q| configured.include?(q.name) }
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def avg_utilization(processes)
|
|
111
|
+
utils = processes.map { |p| p["busy"] / p["concurrency"].to_f }.reject(&:nan?)
|
|
112
|
+
utils.sum / utils.size.to_f
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
Speedshop::Cloudwatch::Sidekiq.setup_lifecycle_hooks
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "aws-sdk-cloudwatch"
|
|
4
|
+
require "speedshop/cloudwatch/config"
|
|
5
|
+
require "speedshop/cloudwatch/reporter"
|
|
6
|
+
require "speedshop/cloudwatch/version"
|
|
7
|
+
|
|
8
|
+
module Speedshop
|
|
9
|
+
module Cloudwatch
|
|
10
|
+
class Error < StandardError; end
|
|
11
|
+
|
|
12
|
+
class << self
|
|
13
|
+
def configure
|
|
14
|
+
yield Config.instance if block_given?
|
|
15
|
+
Config.instance
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def config
|
|
19
|
+
Config.instance
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def reporter
|
|
23
|
+
Reporter.instance
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def start!
|
|
27
|
+
reporter.start!
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def stop!
|
|
31
|
+
reporter.stop!
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def log_info(msg)
|
|
35
|
+
Config.instance.logger.info "Speedshop::Cloudwatch: #{msg}"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def log_error(msg, exception = nil)
|
|
39
|
+
Config.instance.logger.error "Speedshop::Cloudwatch: #{msg}"
|
|
40
|
+
Config.instance.logger.debug exception.backtrace.join("\n") if exception&.backtrace
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
at_exit do
|
|
47
|
+
Speedshop::Cloudwatch.stop!
|
|
48
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/speedshop/cloudwatch/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "speedshop-cloudwatch"
|
|
7
|
+
spec.version = Speedshop::Cloudwatch::VERSION
|
|
8
|
+
spec.authors = ["Nate Berkopec"]
|
|
9
|
+
spec.email = ["nate.berkopec@speedshop.co"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "Ruby application integration with AWS CloudWatch for Puma, Rack, Sidekiq, and ActiveJob"
|
|
12
|
+
spec.description = "This gem helps integrate your Ruby application with AWS CloudWatch, reporting metrics from Puma, Rack, Sidekiq, and ActiveJob in background threads to avoid adding latency to requests and jobs."
|
|
13
|
+
spec.homepage = "https://github.com/nateberkopec/speedshop-cloudwatch"
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
spec.required_ruby_version = ">= 2.7.0"
|
|
16
|
+
|
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
|
18
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
19
|
+
spec.metadata["source_code_uri"] = "https://github.com/nateberkopec/speedshop-cloudwatch"
|
|
20
|
+
|
|
21
|
+
spec.files = IO.popen(%w[git ls-files -z], chdir: __dir__, err: IO::NULL) do |ls|
|
|
22
|
+
ls.readlines("\x0", chomp: true).select do |path|
|
|
23
|
+
path.start_with?("lib/", "bin/", "docs/") ||
|
|
24
|
+
%w[README.md LICENSE.txt speedshop-cloudwatch.gemspec Rakefile].include?(path)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
spec.require_paths = ["lib"]
|
|
28
|
+
|
|
29
|
+
spec.add_dependency "aws-sdk-cloudwatch", ">= 1.81.0"
|
|
30
|
+
end
|