rails_autoscale_agent 0.7.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/tasks.json +85 -0
- data/CHANGELOG.md +115 -0
- data/Gemfile +13 -1
- data/README.md +65 -17
- data/lib/rails_autoscale_agent/autoscale_api.rb +13 -4
- data/lib/rails_autoscale_agent/config.rb +29 -8
- data/lib/rails_autoscale_agent/logger.rb +28 -14
- data/lib/rails_autoscale_agent/measurement.rb +5 -3
- data/lib/rails_autoscale_agent/middleware.rb +8 -13
- data/lib/rails_autoscale_agent/railtie.rb +4 -0
- data/lib/rails_autoscale_agent/registration.rb +4 -2
- data/lib/rails_autoscale_agent/report.rb +7 -9
- data/lib/rails_autoscale_agent/reporter.rb +37 -23
- data/lib/rails_autoscale_agent/request.rb +29 -15
- data/lib/rails_autoscale_agent/store.rb +7 -2
- data/lib/rails_autoscale_agent/version.rb +1 -1
- data/lib/rails_autoscale_agent/worker_adapters/delayed_job.rb +97 -0
- data/lib/rails_autoscale_agent/worker_adapters/que.rb +71 -0
- data/lib/rails_autoscale_agent/worker_adapters/resque.rb +50 -0
- data/lib/rails_autoscale_agent/worker_adapters/sidekiq.rb +69 -18
- data/rails_autoscale_agent.gemspec +1 -10
- metadata +14 -135
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'active_support/core_ext/module/delegation'
|
4
3
|
require 'rails_autoscale_agent/config'
|
4
|
+
require 'logger'
|
5
5
|
|
6
6
|
module RailsAutoscaleAgent
|
7
7
|
module Logger
|
@@ -11,23 +11,37 @@ module RailsAutoscaleAgent
|
|
11
11
|
end
|
12
12
|
|
13
13
|
class LoggerProxy < Struct.new(:logger)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
TAG = '[RailsAutoscale]'
|
15
|
+
|
16
|
+
def error(msg)
|
17
|
+
logger.error tag(msg)
|
18
|
+
end
|
19
|
+
|
20
|
+
def warn(msg)
|
21
|
+
logger.warn tag(msg)
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
|
25
|
-
# to be chatty by default.
|
26
|
-
logger.debug(*args) if ENV['RAILS_AUTOSCALE_LOG_LEVEL'] == 'DEBUG'
|
24
|
+
def info(msg)
|
25
|
+
logger.info tag(msg) unless Config.instance.quiet?
|
27
26
|
end
|
28
27
|
|
29
|
-
def
|
30
|
-
|
28
|
+
def debug(msg)
|
29
|
+
# Silence debug logs by default to avoiding being overly chatty (Rails logger defaults
|
30
|
+
# to DEBUG level in production). Setting RAILS_AUTOSCALE_DEBUG=true enables debug logs,
|
31
|
+
# even if the underlying logger severity level is INFO.
|
32
|
+
if Config.instance.debug?
|
33
|
+
if logger.respond_to?(:debug?) && logger.debug?
|
34
|
+
logger.debug tag(msg)
|
35
|
+
elsif logger.respond_to?(:info?) && logger.info?
|
36
|
+
logger.info tag("[DEBUG] #{msg}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def tag(msg)
|
44
|
+
"#{TAG} #{msg}"
|
31
45
|
end
|
32
46
|
end
|
33
47
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RailsAutoscaleAgent
|
4
|
-
class Measurement < Struct.new(:time, :value, :queue_name)
|
5
|
-
|
6
|
-
|
4
|
+
class Measurement < Struct.new(:time, :value, :queue_name, :metric)
|
5
|
+
# No queue_name is assumed to be a web request measurement
|
6
|
+
# Metrics: qt = queue time (default), qd = queue depth (needed for Resque support)
|
7
|
+
def initialize(time, value, queue_name = nil, metric = nil)
|
8
|
+
super time.utc, value.to_i, queue_name, metric
|
7
9
|
end
|
8
10
|
end
|
9
11
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'rails_autoscale_agent/logger'
|
4
3
|
require 'rails_autoscale_agent/store'
|
5
4
|
require 'rails_autoscale_agent/reporter'
|
6
5
|
require 'rails_autoscale_agent/config'
|
@@ -8,25 +7,21 @@ require 'rails_autoscale_agent/request'
|
|
8
7
|
|
9
8
|
module RailsAutoscaleAgent
|
10
9
|
class Middleware
|
11
|
-
include Logger
|
12
|
-
|
13
10
|
def initialize(app)
|
14
11
|
@app = app
|
15
12
|
end
|
16
13
|
|
17
14
|
def call(env)
|
18
|
-
|
19
|
-
|
20
|
-
request = Request.new(env, config)
|
15
|
+
config = Config.instance
|
16
|
+
request = Request.new(env, config)
|
21
17
|
|
22
|
-
|
23
|
-
|
18
|
+
store = Store.instance
|
19
|
+
Reporter.start(config, store)
|
24
20
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
21
|
+
if !request.ignore? && queue_time = request.queue_time
|
22
|
+
# NOTE: Expose queue time to the app
|
23
|
+
env['queue_time'] = queue_time
|
24
|
+
store.push queue_time
|
30
25
|
end
|
31
26
|
|
32
27
|
@app.call(env)
|
@@ -1,10 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'rails_autoscale_agent/middleware'
|
4
|
+
require 'rails_autoscale_agent/logger'
|
4
5
|
|
5
6
|
module RailsAutoscaleAgent
|
6
7
|
class Railtie < Rails::Railtie
|
8
|
+
include Logger
|
9
|
+
|
7
10
|
initializer "rails_autoscale_agent.middleware" do |app|
|
11
|
+
logger.info "Preparing middleware"
|
8
12
|
app.middleware.insert_before Rack::Runtime, Middleware
|
9
13
|
end
|
10
14
|
end
|
@@ -3,15 +3,17 @@
|
|
3
3
|
require 'rails_autoscale_agent/version'
|
4
4
|
|
5
5
|
module RailsAutoscaleAgent
|
6
|
-
class Registration < Struct.new(:config)
|
6
|
+
class Registration < Struct.new(:config, :worker_adapters)
|
7
7
|
|
8
8
|
def to_params
|
9
9
|
{
|
10
10
|
dyno: config.dyno,
|
11
|
-
pid:
|
11
|
+
pid: Process.pid,
|
12
12
|
ruby_version: RUBY_VERSION,
|
13
13
|
rails_version: defined?(Rails) && Rails.version,
|
14
14
|
gem_version: VERSION,
|
15
|
+
# example: { worker_adapters: 'Sidekiq,Que' }
|
16
|
+
worker_adapters: worker_adapters.map { |o| o.class.name.split('::').last }.join(','),
|
15
17
|
}
|
16
18
|
end
|
17
19
|
end
|
@@ -12,21 +12,19 @@ module RailsAutoscaleAgent
|
|
12
12
|
def to_params(config)
|
13
13
|
{
|
14
14
|
dyno: config.dyno,
|
15
|
-
pid:
|
15
|
+
pid: Process.pid,
|
16
16
|
}
|
17
17
|
end
|
18
18
|
|
19
19
|
def to_csv
|
20
20
|
String.new.tap do |result|
|
21
21
|
@measurements.each do |measurement|
|
22
|
-
result <<
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
result << measurement.queue_name
|
29
|
-
end
|
22
|
+
result << [
|
23
|
+
measurement.time.to_i,
|
24
|
+
measurement.value,
|
25
|
+
measurement.queue_name,
|
26
|
+
measurement.metric,
|
27
|
+
].join(',')
|
30
28
|
|
31
29
|
result << "\n"
|
32
30
|
end
|
@@ -5,7 +5,6 @@ require 'rails_autoscale_agent/logger'
|
|
5
5
|
require 'rails_autoscale_agent/autoscale_api'
|
6
6
|
require 'rails_autoscale_agent/time_rounder'
|
7
7
|
require 'rails_autoscale_agent/registration'
|
8
|
-
require 'rails_autoscale_agent/worker_adapters/sidekiq'
|
9
8
|
|
10
9
|
# Reporter wakes up every minute to send metrics to the RailsAutoscale API
|
11
10
|
|
@@ -21,31 +20,29 @@ module RailsAutoscaleAgent
|
|
21
20
|
def start!(config, store)
|
22
21
|
@started = true
|
23
22
|
@worker_adapters = config.worker_adapters.select(&:enabled?)
|
23
|
+
@dyno_num = config.dyno.to_s.split('.').last.to_i
|
24
24
|
|
25
|
-
if !config.api_base_url
|
25
|
+
if !config.api_base_url && !config.dev_mode?
|
26
26
|
logger.info "Reporter not started: #{config.addon_name}_URL is not set"
|
27
27
|
return
|
28
28
|
end
|
29
29
|
|
30
30
|
Thread.new do
|
31
|
-
|
32
|
-
register!(config)
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
rescue => ex
|
43
|
-
# Exceptions in threads other than the main thread will fail silently
|
44
|
-
# https://ruby-doc.org/core-2.2.0/Thread.html#class-Thread-label-Exception+handling
|
45
|
-
logger.error "Reporter error: #{ex.inspect}"
|
46
|
-
logger.error ex.backtrace.join("\n")
|
31
|
+
loop do
|
32
|
+
register!(config, @worker_adapters) unless @registered
|
33
|
+
|
34
|
+
# Stagger reporting to spread out reports from many processes
|
35
|
+
multiplier = 1 - (rand / 4) # between 0.75 and 1.0
|
36
|
+
sleep config.report_interval * multiplier
|
37
|
+
|
38
|
+
# It's redundant to report worker metrics from every web dyno, so only report from web.1
|
39
|
+
if @dyno_num == 1
|
40
|
+
@worker_adapters.map do |adapter|
|
41
|
+
report_exceptions(config) { adapter.collect!(store) }
|
47
42
|
end
|
48
43
|
end
|
44
|
+
|
45
|
+
report_exceptions(config) { report!(config, store) }
|
49
46
|
end
|
50
47
|
end
|
51
48
|
end
|
@@ -54,6 +51,8 @@ module RailsAutoscaleAgent
|
|
54
51
|
@started
|
55
52
|
end
|
56
53
|
|
54
|
+
private
|
55
|
+
|
57
56
|
def report!(config, store)
|
58
57
|
report = store.pop_report
|
59
58
|
|
@@ -61,7 +60,7 @@ module RailsAutoscaleAgent
|
|
61
60
|
logger.info "Reporting #{report.measurements.size} measurements"
|
62
61
|
|
63
62
|
params = report.to_params(config)
|
64
|
-
result = AutoscaleApi.new(config
|
63
|
+
result = AutoscaleApi.new(config).report_metrics!(params, report.to_csv)
|
65
64
|
|
66
65
|
case result
|
67
66
|
when AutoscaleApi::SuccessResponse
|
@@ -74,20 +73,35 @@ module RailsAutoscaleAgent
|
|
74
73
|
end
|
75
74
|
end
|
76
75
|
|
77
|
-
def register!(config)
|
78
|
-
params = Registration.new(config).to_params
|
79
|
-
result = AutoscaleApi.new(config
|
76
|
+
def register!(config, worker_adapters)
|
77
|
+
params = Registration.new(config, worker_adapters).to_params
|
78
|
+
result = AutoscaleApi.new(config).register_reporter!(params)
|
80
79
|
|
81
80
|
case result
|
82
81
|
when AutoscaleApi::SuccessResponse
|
82
|
+
@registered = true
|
83
83
|
config.report_interval = result.data['report_interval'] if result.data['report_interval']
|
84
84
|
config.max_request_size = result.data['max_request_size'] if result.data['max_request_size']
|
85
|
-
worker_adapters_msg =
|
85
|
+
worker_adapters_msg = worker_adapters.map { |a| a.class.name }.join(', ')
|
86
86
|
logger.info "Reporter starting, will report every #{config.report_interval} seconds or so. Worker adapters: [#{worker_adapters_msg}]"
|
87
87
|
when AutoscaleApi::FailureResponse
|
88
88
|
logger.error "Reporter failed to register: #{result.failure_message}"
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
|
+
def report_exceptions(config)
|
93
|
+
begin
|
94
|
+
yield
|
95
|
+
rescue => ex
|
96
|
+
# Exceptions in threads other than the main thread will fail silently
|
97
|
+
# https://ruby-doc.org/core-2.2.0/Thread.html#class-Thread-label-Exception+handling
|
98
|
+
logger.error "Reporter error: #{ex.inspect}"
|
99
|
+
AutoscaleApi.new(config).report_exception!(ex)
|
100
|
+
end
|
101
|
+
rescue => ex
|
102
|
+
# An exception was encountered while trying to report the original exception.
|
103
|
+
# Swallow the error so the reporter continues to report.
|
104
|
+
logger.error "Exception reporting error: #{ex.inspect}"
|
105
|
+
end
|
92
106
|
end
|
93
107
|
end
|
@@ -1,35 +1,49 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'rails_autoscale_agent/logger'
|
4
|
+
|
3
5
|
module RailsAutoscaleAgent
|
4
6
|
class Request
|
5
7
|
include Logger
|
6
8
|
|
7
|
-
attr_reader :id, :entered_queue_at, :path, :method, :size
|
8
|
-
|
9
9
|
def initialize(env, config)
|
10
10
|
@config = config
|
11
11
|
@id = env['HTTP_X_REQUEST_ID']
|
12
|
-
@path = env['PATH_INFO']
|
13
|
-
@method = env['REQUEST_METHOD'].downcase
|
14
12
|
@size = env['rack.input'].respond_to?(:size) ? env['rack.input'].size : 0
|
15
|
-
|
16
|
-
|
17
|
-
@entered_queue_at = Time.at(unix_millis.to_f / 1000)
|
18
|
-
end
|
13
|
+
@request_body_wait = env['puma.request_body_wait'].to_i
|
14
|
+
@request_start_header = env['HTTP_X_REQUEST_START']
|
19
15
|
end
|
20
16
|
|
21
17
|
def ignore?
|
22
18
|
@config.ignore_large_requests? && @size > @config.max_request_size
|
23
19
|
end
|
24
20
|
|
25
|
-
def
|
26
|
-
if
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
21
|
+
def started_at
|
22
|
+
if @request_start_header
|
23
|
+
# Heroku sets the header as an integer, measured in milliseconds.
|
24
|
+
# If nginx is involved, it might be in seconds with fractional milliseconds,
|
25
|
+
# and it might be preceeded by "t=". We can all cases by removing non-digits
|
26
|
+
# and treating as milliseconds.
|
27
|
+
Time.at(@request_start_header.gsub(/\D/, '').to_i / 1000.0)
|
28
|
+
elsif @config.dev_mode?
|
29
|
+
# In dev mode, fake a queue time of 0-1000ms
|
30
|
+
Time.now - rand + @request_body_wait
|
32
31
|
end
|
33
32
|
end
|
33
|
+
|
34
|
+
def queue_time(now = Time.now)
|
35
|
+
return if started_at.nil?
|
36
|
+
|
37
|
+
queue_time = ((now - started_at) * 1000).to_i
|
38
|
+
|
39
|
+
# Subtract the time Puma spent waiting on the request body. It's irrelevant to capacity-related queue time.
|
40
|
+
# Without this, slow clients and large request payloads will skew queue time.
|
41
|
+
queue_time -= @request_body_wait
|
42
|
+
|
43
|
+
logger.debug "Request queue_time=#{queue_time}ms body_wait=#{@request_body_wait}ms request_id=#{@id} size=#{@size}"
|
44
|
+
|
45
|
+
# Safeguard against negative queue times (should not happen in practice)
|
46
|
+
queue_time > 0 ? queue_time : 0
|
47
|
+
end
|
34
48
|
end
|
35
49
|
end
|
@@ -15,11 +15,16 @@ module RailsAutoscaleAgent
|
|
15
15
|
@measurements = []
|
16
16
|
end
|
17
17
|
|
18
|
-
def push(value, time = Time.now, queue_name = nil)
|
19
|
-
|
18
|
+
def push(value, time = Time.now, queue_name = nil, metric = nil)
|
19
|
+
# If it's been two minutes since clearing out the store, stop collecting measurements.
|
20
|
+
# There could be an issue with the reporter, and continuing to collect will consume linear memory.
|
21
|
+
return if @last_pop && @last_pop < Time.now - 120
|
22
|
+
|
23
|
+
@measurements << Measurement.new(time, value, queue_name, metric)
|
20
24
|
end
|
21
25
|
|
22
26
|
def pop_report
|
27
|
+
@last_pop = Time.now
|
23
28
|
report = Report.new
|
24
29
|
|
25
30
|
while measurement = @measurements.shift
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rails_autoscale_agent/logger'
|
4
|
+
|
5
|
+
module RailsAutoscaleAgent
|
6
|
+
module WorkerAdapters
|
7
|
+
class DelayedJob
|
8
|
+
include RailsAutoscaleAgent::Logger
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
attr_writer :queues
|
12
|
+
|
13
|
+
def enabled?
|
14
|
+
if defined?(::Delayed::Job) && defined?(::Delayed::Backend::ActiveRecord)
|
15
|
+
log_msg = String.new("DelayedJob enabled (#{::ActiveRecord::Base.default_timezone})")
|
16
|
+
log_msg << " with long-running job support" if track_long_running_jobs?
|
17
|
+
logger.info log_msg
|
18
|
+
true
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def collect!(store)
|
23
|
+
log_msg = String.new
|
24
|
+
t = Time.now.utc
|
25
|
+
sql = <<~SQL
|
26
|
+
SELECT COALESCE(queue, 'default'), min(run_at)
|
27
|
+
FROM delayed_jobs
|
28
|
+
WHERE locked_at IS NULL
|
29
|
+
AND failed_at IS NULL
|
30
|
+
GROUP BY queue
|
31
|
+
SQL
|
32
|
+
|
33
|
+
run_at_by_queue = Hash[select_rows(sql)]
|
34
|
+
|
35
|
+
# Don't collect worker metrics if there are unreasonable number of queues
|
36
|
+
if run_at_by_queue.size > 50
|
37
|
+
logger.debug "Skipping DelayedJob metrics - #{run_at_by_queue.size} queues"
|
38
|
+
return
|
39
|
+
end
|
40
|
+
|
41
|
+
self.queues = queues | run_at_by_queue.keys
|
42
|
+
|
43
|
+
if track_long_running_jobs?
|
44
|
+
sql = <<~SQL
|
45
|
+
SELECT COALESCE(queue, 'default'), count(*)
|
46
|
+
FROM delayed_jobs
|
47
|
+
WHERE locked_at IS NOT NULL
|
48
|
+
AND locked_by IS NOT NULL
|
49
|
+
AND failed_at IS NULL
|
50
|
+
GROUP BY 1
|
51
|
+
SQL
|
52
|
+
|
53
|
+
busy_count_by_queue = Hash[select_rows(sql)]
|
54
|
+
self.queues = queues | busy_count_by_queue.keys
|
55
|
+
end
|
56
|
+
|
57
|
+
queues.each do |queue|
|
58
|
+
run_at = run_at_by_queue[queue]
|
59
|
+
# DateTime.parse assumes a UTC string
|
60
|
+
run_at = DateTime.parse(run_at) if run_at.is_a?(String)
|
61
|
+
latency_ms = run_at ? ((t - run_at)*1000).ceil : 0
|
62
|
+
latency_ms = 0 if latency_ms < 0
|
63
|
+
|
64
|
+
store.push latency_ms, t, queue
|
65
|
+
log_msg << "dj-qt.#{queue}=#{latency_ms} "
|
66
|
+
|
67
|
+
if track_long_running_jobs?
|
68
|
+
busy_count = busy_count_by_queue[queue] || 0
|
69
|
+
store.push busy_count, Time.now, queue, :busy
|
70
|
+
log_msg << "dj-busy.#{queue}=#{busy_count} "
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
logger.debug log_msg unless log_msg.empty?
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def queues
|
80
|
+
# Track the known queues so we can continue reporting on queues that don't
|
81
|
+
# have enqueued jobs at the time of reporting.
|
82
|
+
# Assume a "default" queue so we always report *something*, even when nothing
|
83
|
+
# is enqueued.
|
84
|
+
@queues ||= Set.new(['default'])
|
85
|
+
end
|
86
|
+
|
87
|
+
def track_long_running_jobs?
|
88
|
+
Config.instance.track_long_running_jobs
|
89
|
+
end
|
90
|
+
|
91
|
+
def select_rows(sql)
|
92
|
+
# This ensures the agent doesn't hold onto a DB connection any longer than necessary
|
93
|
+
ActiveRecord::Base.connection_pool.with_connection { |c| c.select_rows(sql) }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|