neetodeploy-autoscale 2.0.6 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bffc52d124f0b677da6669f6b34b26347b0710396d362a17af10cb22dbf029a9
4
- data.tar.gz: dd98640212220a0d4426d2fce3066f4764e1194a79da602b03428bc0c10e7dc3
3
+ metadata.gz: 5064a81417af764a12450cf56db7a8dd1e6b15c784536808ffcb84be9603b255
4
+ data.tar.gz: d2bcff1b1bc78f3dc00996c3149d3cba1bd25b78bfbfc6ef8f08e66357464068
5
5
  SHA512:
6
- metadata.gz: 1e1b0fe71a3a89605214d79c971f63f52bbd75804dabf4b75b817824e65941a861fe7f32f159efc7126f5d53da8b1755b954d96efe8bb66341097a9afcad38a6
7
- data.tar.gz: fa8e816a7be8dbdb7395272cd1c4ce67509b1048fb41feca137a62802ffb8607ebc3edc399c17faaf1a9fdbcfce5d5c6ef39ca5f0bf7cd959badcf1ebc46ce78
6
+ metadata.gz: 738f8a948ac3a6767b200c41fe4175a17e5a57c253931cc50c0cc875115a9bc9decfe0851a28933254928107c7f7993b2825c735649f565d83358891bbe12acb
7
+ data.tar.gz: d631a416a5c2e441545081db9545eca86ebe52756e273bb6124083b05d77c9a4a5d8225f2bf80a5d951752fcb148b102fd182fd87a6f733fb4c0c5299c6a1613
@@ -2,29 +2,35 @@ module Neetodeploy
2
2
  class Config
3
3
  include Singleton
4
4
 
5
- attr_accessor :disable_auto_scale_gem, :disable_sidekiq_metrics, :app_name, :metrics_server_url,
6
- :metrics_server_auth_token, :report_interval_seconds, :use_puma_queue_size
5
+ DEFAULT_METRICS_URL = "http://nd-queue-time-exporter-web-deployment:3000/metrics"
6
+ DEFAULT_BATCH_URL = "http://nd-queue-time-exporter-web-deployment:3000/metrics/batch"
7
+ DEFAULT_AUTH_TOKEN = "K0An3O3MSyEEMTCnRd1IHgGjdGQkzy"
8
+ DEFAULT_REPORT_INTERVAL = 10
9
+
10
+ attr_reader :app_name, :metrics_server_url, :metrics_server_batch_url,
11
+ :metrics_server_auth_token, :report_interval_seconds
7
12
 
8
13
  def initialize
9
- @disable_auto_scale_gem = ENV["DISABLE_NEETO_DEPLOY_AUTOSCALE"]
10
- @disable_sidekiq_metrics = ENV["DISABLE_NEETO_DEPLOY_SIDEKIQ_METRICS"]
11
14
  @app_name = ENV["NEETODEPLOY_APP_NAME"]
12
- @metrics_server_url = "http://nd-queue-time-exporter-web-deployment:3000/metrics"
13
- @metrics_server_auth_token = "K0An3O3MSyEEMTCnRd1IHgGjdGQkzy"
14
- @report_interval_seconds = 10
15
- @use_puma_queue_size = ENV["NEETODEPLOY_USE_PUMA_QUEUE_SIZE"] == "true"
15
+ @metrics_server_url = DEFAULT_METRICS_URL
16
+ @metrics_server_batch_url = DEFAULT_BATCH_URL
17
+ @metrics_server_auth_token = DEFAULT_AUTH_TOKEN
18
+ @report_interval_seconds = (ENV["NEETODEPLOY_REPORT_INTERVAL_SECONDS"]&.to_i || DEFAULT_REPORT_INTERVAL)
19
+ @gem_disabled = ENV["DISABLE_NEETO_DEPLOY_AUTOSCALE"] == "true"
20
+ @sidekiq_metrics_disabled = ENV["DISABLE_NEETO_DEPLOY_SIDEKIQ_METRICS"] == "true"
21
+ @batch_processing_enabled = ENV["NEETODEPLOY_BATCH_PROCESSING_ENABLED"] == "true"
16
22
  end
17
23
 
18
24
  def gem_disabled?
19
- disable_auto_scale_gem == "true"
25
+ @gem_disabled
20
26
  end
21
27
 
22
28
  def sidekiq_metrics_disabled?
23
- disable_sidekiq_metrics == "true"
29
+ @sidekiq_metrics_disabled
24
30
  end
25
31
 
26
- def use_puma_queue_size?
27
- use_puma_queue_size
32
+ def batch_processing_enabled?
33
+ @batch_processing_enabled
28
34
  end
29
35
  end
30
36
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "singleton"
4
- require "json"
5
4
  require "neetodeploy/autoscale/reporter"
6
5
  require "neetodeploy/autoscale/logger"
7
6
  require "neetodeploy/autoscale/config"
@@ -13,16 +12,17 @@ module Neetodeploy
13
12
  include Singleton
14
13
  include NeetoDeploy::Logger
15
14
 
15
+ PERCENTILES = { p90: 0.90, p95: 0.95, p99: 0.99 }.freeze
16
+ RESTART_DELAY = 5
17
+
16
18
  def self.start
17
19
  instance.start! unless instance.running?
18
20
  end
19
21
 
20
22
  def start!
21
- config = Config.instance
22
- mode = config.use_puma_queue_size? ? "Puma queue size" : "response time"
23
- logger.info("Starting background worker to collect #{mode} metrics")
23
+ logger.info("Starting background worker to collect metrics")
24
24
  @pid = Process.pid
25
- start_thread_with_collector_loop
25
+ start_collector_thread
26
26
  end
27
27
 
28
28
  def stop!
@@ -35,83 +35,74 @@ module Neetodeploy
35
35
  @pid == Process.pid && @thread&.alive?
36
36
  end
37
37
 
38
- def start_thread_with_collector_loop(config = Config.instance)
38
+ private
39
+
40
+ def start_collector_thread(config = Config.instance)
39
41
  @thread = Thread.new do
40
42
  loop do
41
43
  begin
42
- metrics_store = MetricsStore.instance unless config.use_puma_queue_size?
43
- loop do
44
- if config.use_puma_queue_size?
45
- run_puma_metrics_collection(config)
46
- else
47
- run_queue_time_collection(metrics_store, config)
48
- end
49
- multiplier = 1 - (rand / 4)
50
- sleep config.report_interval_seconds * multiplier
51
- end
44
+ metrics_store = MetricsStore.instance
45
+ collect_and_report_loop(metrics_store, config)
52
46
  rescue StandardError => e
53
- logger.error("Rails metrics collector thread terminated with error: #{e.message}")
54
- logger.error(e.backtrace.join("\n")) if e.backtrace
55
-
56
- # Only restart if we're still the same process and thread should continue
57
- break unless @pid == Process.pid && !@thread.nil?
58
-
59
- logger.info("Restarting Rails metrics collector thread")
60
- sleep(5)
47
+ handle_collection_error(e, config)
61
48
  end
62
49
  end
63
50
  end
64
51
  end
65
52
 
53
+ def collect_and_report_loop(metrics_store, config)
54
+ loop do
55
+ run_queue_time_collection(metrics_store, config)
56
+ sleep(config.report_interval_seconds)
57
+ end
58
+ end
59
+
66
60
  def run_queue_time_collection(metrics_store, config)
67
61
  return if config.gem_disabled?
68
62
 
69
63
  data = metrics_store.flush
70
64
  return if data.empty?
71
65
 
72
- average_queue_time = data.sum / data.size
73
- Reporter.new(average_queue_time, "queue_time", "web").report
66
+ if config.batch_processing_enabled?
67
+ report_batch_metrics(data, config)
68
+ else
69
+ report_average_metric(data, config)
70
+ end
74
71
  end
75
72
 
76
- def run_puma_metrics_collection(config)
77
- return unless puma_available?
78
- return if config.gem_disabled?
73
+ def report_batch_metrics(data, config)
74
+ sorted_data = data.sort
75
+ size = sorted_data.size
79
76
 
80
- begin
81
- queue_size = get_puma_queue_size
82
- return if queue_size.nil?
77
+ metrics = [
78
+ { metric_name: "queue_time_p90", metric_value: percentile_value(sorted_data, size, 0.90) },
79
+ { metric_name: "queue_time_p95", metric_value: percentile_value(sorted_data, size, 0.95) },
80
+ { metric_name: "queue_time_p99", metric_value: percentile_value(sorted_data, size, 0.99) },
81
+ { metric_name: "queue_time", metric_value: data.sum / data.size }
82
+ ]
83
83
 
84
- Reporter.new(queue_size, "puma_queue_size", "web").report
85
- rescue StandardError => e
86
- logger.error("Error collecting Puma queue size metrics: #{e.message}")
87
- end
84
+ Reporter.report_batch(metrics, "web", nil, config)
88
85
  end
89
86
 
90
- private
87
+ def report_average_metric(data, config)
88
+ average_queue_time = data.sum.fdiv(data.size).round
89
+ Reporter.new(average_queue_time, "queue_time", "web", nil, config).report
90
+ end
91
91
 
92
- def puma_available?
93
- defined?(::Puma) && ::Puma.respond_to?(:stats)
94
- end
92
+ def percentile_value(sorted_data, size, percentile)
93
+ index = [(size * percentile).ceil - 1, 0].max
94
+ sorted_data[index]
95
+ end
95
96
 
96
- def get_puma_queue_size
97
- stats_json = ::Puma.stats
98
- return nil if stats_json.nil? || stats_json.empty?
97
+ def handle_collection_error(error, config)
98
+ logger.error("Rails metrics collector thread error: #{error.message}")
99
+ logger.error(error.backtrace.join("\n")) if error.backtrace
99
100
 
100
- stats = JSON.parse(stats_json)
101
+ should_restart = @pid == Process.pid && @thread
102
+ logger.info("Restarting Rails metrics collector thread") if should_restart
101
103
 
102
- # For clustered mode (multiple workers)
103
- if stats["worker_status"]
104
- # Sum backlog from all workers
105
- stats["worker_status"].sum do |worker|
106
- worker.dig("last_status", "backlog") || 0
107
- end
108
- # For single mode
109
- elsif stats["backlog"]
110
- stats["backlog"]
111
- else
112
- nil
113
- end
114
- end
104
+ sleep(RESTART_DELAY) if should_restart
105
+ end
115
106
  end
116
107
  end
117
108
  end
@@ -11,16 +11,21 @@ module Neetodeploy
11
11
 
12
12
  def initialize
13
13
  @metrics = []
14
+ @mutex = Mutex.new
14
15
  end
15
16
 
16
17
  def push(queue_time)
17
- @metrics << queue_time
18
+ @mutex.synchronize do
19
+ @metrics << queue_time
20
+ end
18
21
  end
19
22
 
20
23
  def flush
21
- result = @metrics
22
- @metrics = []
23
- result
24
+ @mutex.synchronize do
25
+ result = @metrics
26
+ @metrics = []
27
+ result
28
+ end
24
29
  end
25
30
  end
26
31
  end
@@ -3,7 +3,6 @@
3
3
  require "neetodeploy/autoscale/rails/metrics"
4
4
  require "neetodeploy/autoscale/rails/metrics_store"
5
5
  require "neetodeploy/autoscale/rails/metrics_collector"
6
- require "neetodeploy/autoscale/config"
7
6
 
8
7
  module Neetodeploy
9
8
  module Rails
@@ -13,18 +12,13 @@ module Neetodeploy
13
12
  end
14
13
 
15
14
  def call(env)
16
- config = Config.instance
15
+ metrics = Metrics.new(env)
16
+ queue_time = metrics.queue_time unless metrics.ignore?
17
17
  MetricsCollector.start
18
18
 
19
- # Only collect queue time per request if using the old response time method
20
- unless config.use_puma_queue_size?
21
- metrics = Metrics.new(env)
22
- queue_time = metrics.queue_time unless metrics.ignore?
23
-
24
- if queue_time
25
- store = MetricsStore.instance
26
- store.push queue_time
27
- end
19
+ if queue_time
20
+ store = MetricsStore.instance
21
+ store.push queue_time
28
22
  end
29
23
 
30
24
  @app.call(env)
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "net/http"
4
4
  require "time"
5
+ require "json"
5
6
  require "neetodeploy/autoscale/config"
6
7
  require "neetodeploy/autoscale/logger"
7
8
 
@@ -18,39 +19,100 @@ module Neetodeploy
18
19
  end
19
20
 
20
21
  def report
21
- params = common_params
22
- params[:queue_name] = @queue_name if @queue_name
23
- url = build_url(params)
24
- logger.info("Reporting #{@metric_name} for #{@process_type} dyno: #{@metric_value}")
25
-
22
+ url = build_url(common_params)
23
+ logger.info("Reporting #{@metric_name} for #{@process_type}: #{@metric_value}")
26
24
  post_request(url)
27
25
  end
28
26
 
29
- private
27
+ def self.report_batch(metrics, process_type, queue_name = nil, config = Config.instance)
28
+ return if metrics.empty?
30
29
 
31
- def common_params
32
- {
33
- app_name: @config.app_name,
34
- queue_time: @metric_value, # Keep queue_time parameter name for backward compatibility with metrics server
35
- metric_name: @metric_name,
36
- process_type: @process_type
37
- }
30
+ payload = build_batch_payload(metrics, process_type, queue_name, config)
31
+ url = config.batch_processing_enabled? ? config.metrics_server_batch_url : config.metrics_server_url
32
+
33
+ logger.info("Batch reporting #{metrics.size} metrics for #{process_type}")
34
+ post_batch_request(url, payload, config)
38
35
  end
39
36
 
40
- def post_request(url)
41
- post = Net::HTTP::Post.new(url)
42
- post["AuthToken"] = @config.metrics_server_auth_token
37
+ private
43
38
 
44
- Net::HTTP.start(url.host, url.port, use_ssl: false) do |http|
45
- http.request(post)
39
+ def common_params
40
+ {
41
+ app_name: @config.app_name,
42
+ queue_time: @metric_value,
43
+ metric_name: @metric_name,
44
+ process_type: @process_type,
45
+ queue_name: @queue_name
46
+ }.compact
46
47
  end
47
- end
48
48
 
49
- def build_url(params = {})
50
- exporter_url = URI.parse(@config.metrics_server_url)
51
- exporter_url.query = URI.encode_www_form(params)
49
+ def build_url(params)
50
+ url = URI.parse(@config.metrics_server_url)
51
+ url.query = URI.encode_www_form(params)
52
+ url
53
+ end
52
54
 
53
- exporter_url
54
- end
55
+ def post_request(url)
56
+ http_request(url, build_post_request(url, @config.metrics_server_auth_token))
57
+ end
58
+
59
+ def self.build_batch_payload(metrics, process_type, queue_name, config)
60
+ {
61
+ app_name: config.app_name,
62
+ process_type: process_type,
63
+ queue_name: queue_name,
64
+ metrics: metrics.map { |m| { metric_name: m[:metric_name], metric_value: m[:metric_value].to_i } }
65
+ }.compact
66
+ end
67
+
68
+ def self.post_batch_request(url, payload, config)
69
+ uri = URI.parse(url)
70
+ request = build_post_request(uri, config.metrics_server_auth_token)
71
+ request.body = encode_form_data(payload)
72
+ request["Content-Type"] = "application/x-www-form-urlencoded"
73
+
74
+ http_request(uri, request)
75
+ rescue StandardError => e
76
+ logger.error("Error batch reporting metrics: #{e.message}")
77
+ raise
78
+ end
79
+
80
+ def self.build_post_request(uri, auth_token)
81
+ post = Net::HTTP::Post.new(uri)
82
+ post["AuthToken"] = auth_token
83
+ post
84
+ end
85
+
86
+ def self.encode_form_data(payload)
87
+ form_parts = [
88
+ "app_name=#{encode_param(payload[:app_name])}",
89
+ "process_type=#{encode_param(payload[:process_type])}"
90
+ ]
91
+
92
+ form_parts << "queue_name=#{encode_param(payload[:queue_name])}" if payload[:queue_name]
93
+
94
+ if payload[:metrics].is_a?(Array)
95
+ payload[:metrics].each do |metric|
96
+ form_parts << "metrics[][metric_name]=#{encode_param(metric[:metric_name])}"
97
+ form_parts << "metrics[][metric_value]=#{encode_param(metric[:metric_value])}"
98
+ end
99
+ end
100
+
101
+ form_parts.join("&")
102
+ end
103
+
104
+ def self.encode_param(value)
105
+ URI.encode_www_form_component(value.to_s)
106
+ end
107
+
108
+ def self.http_request(uri, request)
109
+ Net::HTTP.start(uri.host, uri.port, use_ssl: false) do |http|
110
+ http.request(request)
111
+ end
112
+ end
113
+
114
+ def http_request(uri, request)
115
+ self.class.http_request(uri, request)
116
+ end
55
117
  end
56
118
  end
@@ -29,30 +29,24 @@ module Neetodeploy
29
29
  end
30
30
 
31
31
  def running?
32
- @pid == Process.pid && @thread&.alive?
32
+ @pid == Process.pid and @thread&.alive?
33
33
  end
34
34
 
35
35
  def start_thread_with_collector_loop(config = Config.instance)
36
36
  @thread = Thread.new do
37
- # Use a non-recursive loop pattern to avoid SystemStackError and ThreadError on shutdown
38
37
  loop do
39
- begin
40
- loop do
41
- run_sidekiq_metrics_collection
42
- multiplier = 1 - (rand / 4)
43
- sleep config.report_interval_seconds * multiplier
44
- end
45
- rescue StandardError => e
46
- logger.error("Sidekiq metrics collector thread terminated with error: #{e.message}")
47
- logger.error(e.backtrace.join("\n")) if e.backtrace
48
-
49
- # Only restart if we're still the same process and thread should continue
50
- # Check if Sidekiq is shutting down to avoid ThreadError during shutdown
51
- break unless @pid == Process.pid && !@thread.nil? && sidekiq_running?
52
-
53
- logger.info("Restarting Sidekiq metrics collector thread")
54
- sleep(5)
55
- end
38
+ run_sidekiq_metrics_collection
39
+ multiplier = 1 - (rand / 4)
40
+ sleep config.report_interval_seconds * multiplier
41
+ end
42
+ rescue StandardError => e
43
+ logger.error("Sidekiq metrics collector thread terminated with error: #{e.message}")
44
+ logger.error(e.backtrace.join("\n")) if e.backtrace
45
+ ensure
46
+ if @pid == Process.pid && !@thread.nil?
47
+ logger.info("Restarting Sidekiq metrics collector thread")
48
+ sleep(5)
49
+ start_thread_with_collector_loop
56
50
  end
57
51
  end
58
52
  end
@@ -79,11 +73,6 @@ module Neetodeploy
79
73
  defined?(::Sidekiq) && ::Sidekiq.respond_to?(:redis)
80
74
  end
81
75
 
82
- def sidekiq_running?
83
- # Check if Sidekiq server is still running to avoid restarting during shutdown
84
- defined?(::Sidekiq) && ::Sidekiq.respond_to?(:server?) && ::Sidekiq.server?
85
- end
86
-
87
76
  def collect_queue_metrics(queue)
88
77
  queue_name = queue.name
89
78
  latency_ms = (queue.latency * 1000).ceil
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Neetodeploy
4
4
  module Autoscale
5
- VERSION = "2.0.6"
5
+ VERSION = "2.0.9"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: neetodeploy-autoscale
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.6
4
+ version: 2.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sreeram Venkitesh
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-14 00:00:00.000000000 Z
11
+ date: 2025-11-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: For automatically scaling your Rails application based on network metrics
14
14
  email: