rails_autoscale_agent 0.7.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/core_ext/module/delegation'
4
3
  require 'rails_autoscale_agent/config'
4
+ require 'logger'
5
5
 
6
6
  module RailsAutoscaleAgent
7
7
  module Logger
@@ -11,23 +11,37 @@ module RailsAutoscaleAgent
11
11
  end
12
12
 
13
13
  class LoggerProxy < Struct.new(:logger)
14
- def tagged(*tags, &block)
15
- if logger.respond_to?(:tagged)
16
- logger.tagged *tags, &block
17
- else
18
- # NOTE: Quack like ActiveSupport::TaggedLogging, but don't reimplement
19
- yield self
20
- end
14
+ TAG = '[RailsAutoscale]'
15
+
16
+ def error(msg)
17
+ logger.error tag(msg)
18
+ end
19
+
20
+ def warn(msg)
21
+ logger.warn tag(msg)
21
22
  end
22
23
 
23
- def debug(*args)
24
- # Rails logger defaults to DEBUG level in production, but I don't want
25
- # to be chatty by default.
26
- logger.debug(*args) if ENV['RAILS_AUTOSCALE_LOG_LEVEL'] == 'DEBUG'
24
+ def info(msg)
25
+ logger.info tag(msg) unless Config.instance.quiet?
27
26
  end
28
27
 
29
- def method_missing(name, *args, &block)
30
- logger.send name, *args, &block
28
+ def debug(msg)
29
+ # Silence debug logs by default to avoiding being overly chatty (Rails logger defaults
30
+ # to DEBUG level in production). Setting RAILS_AUTOSCALE_DEBUG=true enables debug logs,
31
+ # even if the underlying logger severity level is INFO.
32
+ if Config.instance.debug?
33
+ if logger.respond_to?(:debug?) && logger.debug?
34
+ logger.debug tag(msg)
35
+ elsif logger.respond_to?(:info?) && logger.info?
36
+ logger.info tag("[DEBUG] #{msg}")
37
+ end
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def tag(msg)
44
+ "#{TAG} #{msg}"
31
45
  end
32
46
  end
33
47
  end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RailsAutoscaleAgent
4
- class Measurement < Struct.new(:time, :value, :queue_name)
5
- def initialize(time, value, queue_name = nil)
6
- super time.utc, value.to_i, queue_name
4
+ class Measurement < Struct.new(:time, :value, :queue_name, :metric)
5
+ # No queue_name is assumed to be a web request measurement
6
+ # Metrics: qt = queue time (default), qd = queue depth (needed for Resque support)
7
+ def initialize(time, value, queue_name = nil, metric = nil)
8
+ super time.utc, value.to_i, queue_name, metric
7
9
  end
8
10
  end
9
11
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rails_autoscale_agent/logger'
4
3
  require 'rails_autoscale_agent/store'
5
4
  require 'rails_autoscale_agent/reporter'
6
5
  require 'rails_autoscale_agent/config'
@@ -8,25 +7,21 @@ require 'rails_autoscale_agent/request'
8
7
 
9
8
  module RailsAutoscaleAgent
10
9
  class Middleware
11
- include Logger
12
-
13
10
  def initialize(app)
14
11
  @app = app
15
12
  end
16
13
 
17
14
  def call(env)
18
- logger.tagged 'RailsAutoscale' do
19
- config = Config.instance
20
- request = Request.new(env, config)
15
+ config = Config.instance
16
+ request = Request.new(env, config)
21
17
 
22
- store = Store.instance
23
- Reporter.start(config, store)
18
+ store = Store.instance
19
+ Reporter.start(config, store)
24
20
 
25
- if !request.ignore? && queue_time = request.queue_time
26
- # NOTE: Expose queue time to the app
27
- env['queue_time'] = queue_time
28
- store.push queue_time
29
- end
21
+ if !request.ignore? && queue_time = request.queue_time
22
+ # NOTE: Expose queue time to the app
23
+ env['queue_time'] = queue_time
24
+ store.push queue_time
30
25
  end
31
26
 
32
27
  @app.call(env)
@@ -1,10 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rails_autoscale_agent/middleware'
4
+ require 'rails_autoscale_agent/logger'
4
5
 
5
6
  module RailsAutoscaleAgent
6
7
  class Railtie < Rails::Railtie
8
+ include Logger
9
+
7
10
  initializer "rails_autoscale_agent.middleware" do |app|
11
+ logger.info "Preparing middleware"
8
12
  app.middleware.insert_before Rack::Runtime, Middleware
9
13
  end
10
14
  end
@@ -3,15 +3,17 @@
3
3
  require 'rails_autoscale_agent/version'
4
4
 
5
5
  module RailsAutoscaleAgent
6
- class Registration < Struct.new(:config)
6
+ class Registration < Struct.new(:config, :worker_adapters)
7
7
 
8
8
  def to_params
9
9
  {
10
10
  dyno: config.dyno,
11
- pid: config.pid,
11
+ pid: Process.pid,
12
12
  ruby_version: RUBY_VERSION,
13
13
  rails_version: defined?(Rails) && Rails.version,
14
14
  gem_version: VERSION,
15
+ # example: { worker_adapters: 'Sidekiq,Que' }
16
+ worker_adapters: worker_adapters.map { |o| o.class.name.split('::').last }.join(','),
15
17
  }
16
18
  end
17
19
  end
@@ -12,21 +12,19 @@ module RailsAutoscaleAgent
12
12
  def to_params(config)
13
13
  {
14
14
  dyno: config.dyno,
15
- pid: config.pid,
15
+ pid: Process.pid,
16
16
  }
17
17
  end
18
18
 
19
19
  def to_csv
20
20
  String.new.tap do |result|
21
21
  @measurements.each do |measurement|
22
- result << measurement.time.to_i.to_s
23
- result << ','
24
- result << measurement.value.to_s
25
-
26
- if measurement.queue_name
27
- result << ','
28
- result << measurement.queue_name
29
- end
22
+ result << [
23
+ measurement.time.to_i,
24
+ measurement.value,
25
+ measurement.queue_name,
26
+ measurement.metric,
27
+ ].join(',')
30
28
 
31
29
  result << "\n"
32
30
  end
@@ -5,7 +5,6 @@ require 'rails_autoscale_agent/logger'
5
5
  require 'rails_autoscale_agent/autoscale_api'
6
6
  require 'rails_autoscale_agent/time_rounder'
7
7
  require 'rails_autoscale_agent/registration'
8
- require 'rails_autoscale_agent/worker_adapters/sidekiq'
9
8
 
10
9
  # Reporter wakes up every minute to send metrics to the RailsAutoscale API
11
10
 
@@ -21,31 +20,29 @@ module RailsAutoscaleAgent
21
20
  def start!(config, store)
22
21
  @started = true
23
22
  @worker_adapters = config.worker_adapters.select(&:enabled?)
23
+ @dyno_num = config.dyno.to_s.split('.').last.to_i
24
24
 
25
- if !config.api_base_url
25
+ if !config.api_base_url && !config.dev_mode?
26
26
  logger.info "Reporter not started: #{config.addon_name}_URL is not set"
27
27
  return
28
28
  end
29
29
 
30
30
  Thread.new do
31
- logger.tagged 'RailsAutoscale' do
32
- register!(config)
33
-
34
- loop do
35
- # Stagger reporting to spread out reports from many processes
36
- multiplier = 1 - (rand / 4) # between 0.75 and 1.0
37
- sleep config.report_interval * multiplier
38
-
39
- begin
40
- @worker_adapters.map { |a| a.collect!(store) }
41
- report!(config, store)
42
- rescue => ex
43
- # Exceptions in threads other than the main thread will fail silently
44
- # https://ruby-doc.org/core-2.2.0/Thread.html#class-Thread-label-Exception+handling
45
- logger.error "Reporter error: #{ex.inspect}"
46
- logger.error ex.backtrace.join("\n")
31
+ loop do
32
+ register!(config, @worker_adapters) unless @registered
33
+
34
+ # Stagger reporting to spread out reports from many processes
35
+ multiplier = 1 - (rand / 4) # between 0.75 and 1.0
36
+ sleep config.report_interval * multiplier
37
+
38
+ # It's redundant to report worker metrics from every web dyno, so only report from web.1
39
+ if @dyno_num == 1
40
+ @worker_adapters.map do |adapter|
41
+ report_exceptions(config) { adapter.collect!(store) }
47
42
  end
48
43
  end
44
+
45
+ report_exceptions(config) { report!(config, store) }
49
46
  end
50
47
  end
51
48
  end
@@ -54,6 +51,8 @@ module RailsAutoscaleAgent
54
51
  @started
55
52
  end
56
53
 
54
+ private
55
+
57
56
  def report!(config, store)
58
57
  report = store.pop_report
59
58
 
@@ -61,7 +60,7 @@ module RailsAutoscaleAgent
61
60
  logger.info "Reporting #{report.measurements.size} measurements"
62
61
 
63
62
  params = report.to_params(config)
64
- result = AutoscaleApi.new(config.api_base_url).report_metrics!(params, report.to_csv)
63
+ result = AutoscaleApi.new(config).report_metrics!(params, report.to_csv)
65
64
 
66
65
  case result
67
66
  when AutoscaleApi::SuccessResponse
@@ -74,20 +73,35 @@ module RailsAutoscaleAgent
74
73
  end
75
74
  end
76
75
 
77
- def register!(config)
78
- params = Registration.new(config).to_params
79
- result = AutoscaleApi.new(config.api_base_url).register_reporter!(params)
76
+ def register!(config, worker_adapters)
77
+ params = Registration.new(config, worker_adapters).to_params
78
+ result = AutoscaleApi.new(config).register_reporter!(params)
80
79
 
81
80
  case result
82
81
  when AutoscaleApi::SuccessResponse
82
+ @registered = true
83
83
  config.report_interval = result.data['report_interval'] if result.data['report_interval']
84
84
  config.max_request_size = result.data['max_request_size'] if result.data['max_request_size']
85
- worker_adapters_msg = @worker_adapters.map { |a| a.class.name }.join(', ')
85
+ worker_adapters_msg = worker_adapters.map { |a| a.class.name }.join(', ')
86
86
  logger.info "Reporter starting, will report every #{config.report_interval} seconds or so. Worker adapters: [#{worker_adapters_msg}]"
87
87
  when AutoscaleApi::FailureResponse
88
88
  logger.error "Reporter failed to register: #{result.failure_message}"
89
89
  end
90
90
  end
91
91
 
92
+ def report_exceptions(config)
93
+ begin
94
+ yield
95
+ rescue => ex
96
+ # Exceptions in threads other than the main thread will fail silently
97
+ # https://ruby-doc.org/core-2.2.0/Thread.html#class-Thread-label-Exception+handling
98
+ logger.error "Reporter error: #{ex.inspect}"
99
+ AutoscaleApi.new(config).report_exception!(ex)
100
+ end
101
+ rescue => ex
102
+ # An exception was encountered while trying to report the original exception.
103
+ # Swallow the error so the reporter continues to report.
104
+ logger.error "Exception reporting error: #{ex.inspect}"
105
+ end
92
106
  end
93
107
  end
@@ -1,35 +1,49 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'rails_autoscale_agent/logger'
4
+
3
5
  module RailsAutoscaleAgent
4
6
  class Request
5
7
  include Logger
6
8
 
7
- attr_reader :id, :entered_queue_at, :path, :method, :size
8
-
9
9
  def initialize(env, config)
10
10
  @config = config
11
11
  @id = env['HTTP_X_REQUEST_ID']
12
- @path = env['PATH_INFO']
13
- @method = env['REQUEST_METHOD'].downcase
14
12
  @size = env['rack.input'].respond_to?(:size) ? env['rack.input'].size : 0
15
-
16
- if unix_millis = env['HTTP_X_REQUEST_START']
17
- @entered_queue_at = Time.at(unix_millis.to_f / 1000)
18
- end
13
+ @request_body_wait = env['puma.request_body_wait'].to_i
14
+ @request_start_header = env['HTTP_X_REQUEST_START']
19
15
  end
20
16
 
21
17
  def ignore?
22
18
  @config.ignore_large_requests? && @size > @config.max_request_size
23
19
  end
24
20
 
25
- def queue_time
26
- if entered_queue_at
27
- queue_time = ((Time.now - entered_queue_at) * 1000).to_i
28
- queue_time = 0 if queue_time < 0
29
- logger.debug "Collected queue_time=#{queue_time}ms request_id=#{id} request_size=#{size}"
30
-
31
- queue_time
21
+ def started_at
22
+ if @request_start_header
23
+ # Heroku sets the header as an integer, measured in milliseconds.
24
+ # If nginx is involved, it might be in seconds with fractional milliseconds,
25
+ # and it might be preceeded by "t=". We can all cases by removing non-digits
26
+ # and treating as milliseconds.
27
+ Time.at(@request_start_header.gsub(/\D/, '').to_i / 1000.0)
28
+ elsif @config.dev_mode?
29
+ # In dev mode, fake a queue time of 0-1000ms
30
+ Time.now - rand + @request_body_wait
32
31
  end
33
32
  end
33
+
34
+ def queue_time(now = Time.now)
35
+ return if started_at.nil?
36
+
37
+ queue_time = ((now - started_at) * 1000).to_i
38
+
39
+ # Subtract the time Puma spent waiting on the request body. It's irrelevant to capacity-related queue time.
40
+ # Without this, slow clients and large request payloads will skew queue time.
41
+ queue_time -= @request_body_wait
42
+
43
+ logger.debug "Request queue_time=#{queue_time}ms body_wait=#{@request_body_wait}ms request_id=#{@id} size=#{@size}"
44
+
45
+ # Safeguard against negative queue times (should not happen in practice)
46
+ queue_time > 0 ? queue_time : 0
47
+ end
34
48
  end
35
49
  end
@@ -15,11 +15,16 @@ module RailsAutoscaleAgent
15
15
  @measurements = []
16
16
  end
17
17
 
18
- def push(value, time = Time.now, queue_name = nil)
19
- @measurements << Measurement.new(time, value, queue_name)
18
+ def push(value, time = Time.now, queue_name = nil, metric = nil)
19
+ # If it's been two minutes since clearing out the store, stop collecting measurements.
20
+ # There could be an issue with the reporter, and continuing to collect will consume linear memory.
21
+ return if @last_pop && @last_pop < Time.now - 120
22
+
23
+ @measurements << Measurement.new(time, value, queue_name, metric)
20
24
  end
21
25
 
22
26
  def pop_report
27
+ @last_pop = Time.now
23
28
  report = Report.new
24
29
 
25
30
  while measurement = @measurements.shift
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RailsAutoscaleAgent
4
- VERSION = "0.7.0"
4
+ VERSION = "0.10.2"
5
5
  end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails_autoscale_agent/logger'
4
+
5
+ module RailsAutoscaleAgent
6
+ module WorkerAdapters
7
+ class DelayedJob
8
+ include RailsAutoscaleAgent::Logger
9
+ include Singleton
10
+
11
+ attr_writer :queues
12
+
13
+ def enabled?
14
+ if defined?(::Delayed::Job) && defined?(::Delayed::Backend::ActiveRecord)
15
+ log_msg = String.new("DelayedJob enabled (#{::ActiveRecord::Base.default_timezone})")
16
+ log_msg << " with long-running job support" if track_long_running_jobs?
17
+ logger.info log_msg
18
+ true
19
+ end
20
+ end
21
+
22
+ def collect!(store)
23
+ log_msg = String.new
24
+ t = Time.now.utc
25
+ sql = <<~SQL
26
+ SELECT COALESCE(queue, 'default'), min(run_at)
27
+ FROM delayed_jobs
28
+ WHERE locked_at IS NULL
29
+ AND failed_at IS NULL
30
+ GROUP BY queue
31
+ SQL
32
+
33
+ run_at_by_queue = Hash[select_rows(sql)]
34
+
35
+ # Don't collect worker metrics if there are unreasonable number of queues
36
+ if run_at_by_queue.size > 50
37
+ logger.debug "Skipping DelayedJob metrics - #{run_at_by_queue.size} queues"
38
+ return
39
+ end
40
+
41
+ self.queues = queues | run_at_by_queue.keys
42
+
43
+ if track_long_running_jobs?
44
+ sql = <<~SQL
45
+ SELECT COALESCE(queue, 'default'), count(*)
46
+ FROM delayed_jobs
47
+ WHERE locked_at IS NOT NULL
48
+ AND locked_by IS NOT NULL
49
+ AND failed_at IS NULL
50
+ GROUP BY 1
51
+ SQL
52
+
53
+ busy_count_by_queue = Hash[select_rows(sql)]
54
+ self.queues = queues | busy_count_by_queue.keys
55
+ end
56
+
57
+ queues.each do |queue|
58
+ run_at = run_at_by_queue[queue]
59
+ # DateTime.parse assumes a UTC string
60
+ run_at = DateTime.parse(run_at) if run_at.is_a?(String)
61
+ latency_ms = run_at ? ((t - run_at)*1000).ceil : 0
62
+ latency_ms = 0 if latency_ms < 0
63
+
64
+ store.push latency_ms, t, queue
65
+ log_msg << "dj-qt.#{queue}=#{latency_ms} "
66
+
67
+ if track_long_running_jobs?
68
+ busy_count = busy_count_by_queue[queue] || 0
69
+ store.push busy_count, Time.now, queue, :busy
70
+ log_msg << "dj-busy.#{queue}=#{busy_count} "
71
+ end
72
+ end
73
+
74
+ logger.debug log_msg unless log_msg.empty?
75
+ end
76
+
77
+ private
78
+
79
+ def queues
80
+ # Track the known queues so we can continue reporting on queues that don't
81
+ # have enqueued jobs at the time of reporting.
82
+ # Assume a "default" queue so we always report *something*, even when nothing
83
+ # is enqueued.
84
+ @queues ||= Set.new(['default'])
85
+ end
86
+
87
+ def track_long_running_jobs?
88
+ Config.instance.track_long_running_jobs
89
+ end
90
+
91
+ def select_rows(sql)
92
+ # This ensures the agent doesn't hold onto a DB connection any longer than necessary
93
+ ActiveRecord::Base.connection_pool.with_connection { |c| c.select_rows(sql) }
94
+ end
95
+ end
96
+ end
97
+ end