process_balancer 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +90 -0
- data/Rakefile +8 -0
- data/TODO.adoc +1 -0
- data/exe/process_balancer +17 -0
- data/lib/process_balancer.rb +138 -0
- data/lib/process_balancer/base.rb +80 -0
- data/lib/process_balancer/cli.rb +257 -0
- data/lib/process_balancer/lock/advisory_lock.rb +21 -0
- data/lib/process_balancer/lock/simple_redis.rb +79 -0
- data/lib/process_balancer/manager.rb +185 -0
- data/lib/process_balancer/private/cancellation.rb +116 -0
- data/lib/process_balancer/rails.rb +29 -0
- data/lib/process_balancer/redis_connection.rb +56 -0
- data/lib/process_balancer/util.rb +37 -0
- data/lib/process_balancer/version.rb +5 -0
- data/lib/process_balancer/watcher.rb +113 -0
- data/lib/process_balancer/worker.rb +76 -0
- data/process_balancer.gemspec +45 -0
- metadata +273 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProcessBalancer
|
4
|
+
module Lock
|
5
|
+
# This is lock implementation using advisory locks on the database via the with_advisory_lock gem
|
6
|
+
module AdvisoryLock
|
7
|
+
# class to wrap the lock handling and provide the "extend!" method contract
|
8
|
+
class DummyLock
|
9
|
+
def extend!; end
|
10
|
+
end
|
11
|
+
|
12
|
+
def worker_lock
|
13
|
+
key = "worker_lock_#{job_id}_#{worker_index}"
|
14
|
+
lock = DummyLock.new
|
15
|
+
ActiveRecord::Base.with_advisory_lock(key) do
|
16
|
+
yield lock
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProcessBalancer
|
4
|
+
module Lock
|
5
|
+
# This is a simple implementation of a lock to ensure only one job runner is running for a worker
|
6
|
+
# This is only save for a single redis instance setup
|
7
|
+
# something more resilient should be used instead,
|
8
|
+
# e.g. and advisory lock in a DB or using RedLock ( https://github.com/leandromoreira/redlock-rb )
|
9
|
+
module SimpleRedis
|
10
|
+
def self.time_source
|
11
|
+
@time_source ||= if defined?(Process::CLOCK_MONOTONIC)
|
12
|
+
proc { (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1000).to_i }
|
13
|
+
else
|
14
|
+
proc { (Time.now.to_f * 1000).to_i }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# class to wrap the lock handling and provide the "extend!" method contract
|
19
|
+
class LockHandler
|
20
|
+
def initialize(key, value, ttl)
|
21
|
+
@key = key
|
22
|
+
@value = value
|
23
|
+
@ttl = ttl
|
24
|
+
@acquired = false
|
25
|
+
end
|
26
|
+
|
27
|
+
def acquire!
|
28
|
+
time_source = ProcessBalancer::Lock::SimpleRedis.time_source
|
29
|
+
|
30
|
+
timeout_ms = 5000
|
31
|
+
wait_time = 0.02..0.1
|
32
|
+
start = time_source.call
|
33
|
+
|
34
|
+
sleep(rand(wait_time)) while !(@acquired = try_lock) && (time_source.call - start) < timeout_ms
|
35
|
+
end
|
36
|
+
|
37
|
+
def release!
|
38
|
+
ProcessBalancer.redis do |c|
|
39
|
+
c.del(@key)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def acquired?
|
44
|
+
@acquired
|
45
|
+
end
|
46
|
+
|
47
|
+
def try_lock
|
48
|
+
ProcessBalancer.redis do |c|
|
49
|
+
c.set(@key, @value, nx: true, ex: @ttl)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def extend!
|
54
|
+
ProcessBalancer.redis do |c|
|
55
|
+
c.watch(@key)
|
56
|
+
if c.get(@key) == @value
|
57
|
+
c.multi do
|
58
|
+
c.set(@key, @value, ex: @ttl)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def worker_lock
|
66
|
+
lock = LockHandler.new("lock_#{job_id}_#{worker_index}", ProcessBalancer.identity, runtime_lock_timeout)
|
67
|
+
lock.acquire!
|
68
|
+
|
69
|
+
if lock.acquired?
|
70
|
+
begin
|
71
|
+
yield lock
|
72
|
+
ensure
|
73
|
+
lock.release!
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'English'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
require_relative 'util'
|
7
|
+
require_relative 'watcher'
|
8
|
+
|
9
|
+
require 'concurrent/atomic/atomic_fixnum'
|
10
|
+
require 'concurrent/executor/thread_pool_executor'
|
11
|
+
|
12
|
+
module ProcessBalancer
|
13
|
+
class Manager # :nodoc:
|
14
|
+
include Util
|
15
|
+
|
16
|
+
def initialize(options)
|
17
|
+
@options = options
|
18
|
+
@done = false
|
19
|
+
@process_index = Concurrent::AtomicFixnum.new(-1)
|
20
|
+
@process_count = Concurrent::AtomicFixnum.new(0)
|
21
|
+
@pool = Concurrent::ThreadPoolExecutor.new(max_threads: @options[:max_threads], fallback_policy: :discard)
|
22
|
+
|
23
|
+
setup_job_watchers
|
24
|
+
end
|
25
|
+
|
26
|
+
def process_count
|
27
|
+
@process_count.value
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_index
|
31
|
+
v = @process_index.value
|
32
|
+
v == -1 ? nil : v
|
33
|
+
end
|
34
|
+
|
35
|
+
def workers_for_job(job_id)
|
36
|
+
stopping? ? 0 : ProcessBalancer.scheduled_workers(job_id)
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
@thread = start_thread('heartbeat', &method(:run_heartbeat))
|
41
|
+
end
|
42
|
+
|
43
|
+
def quiet
|
44
|
+
return if @done
|
45
|
+
|
46
|
+
@done = true
|
47
|
+
|
48
|
+
update_jobs
|
49
|
+
end
|
50
|
+
|
51
|
+
def stop
|
52
|
+
quiet
|
53
|
+
|
54
|
+
@pool.shutdown
|
55
|
+
@pool.wait_for_termination(ProcessBalancer.options[:shutdown_timeout])
|
56
|
+
@pool.kill
|
57
|
+
|
58
|
+
clear_heartbeat
|
59
|
+
end
|
60
|
+
|
61
|
+
def stopping?
|
62
|
+
@done
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def process_count=(value)
|
68
|
+
@process_count.value = value
|
69
|
+
end
|
70
|
+
|
71
|
+
def process_index=(value)
|
72
|
+
v = value.nil? ? -1 : value
|
73
|
+
|
74
|
+
@process_index.value = v
|
75
|
+
end
|
76
|
+
|
77
|
+
def run_heartbeat
|
78
|
+
loop do
|
79
|
+
heartbeat
|
80
|
+
sleep 5
|
81
|
+
end
|
82
|
+
logger.info('Heartbeat stopping...')
|
83
|
+
end
|
84
|
+
|
85
|
+
def clear_heartbeat
|
86
|
+
redis do |c|
|
87
|
+
c.lrem(PROCESSES_KEY, 0, identity)
|
88
|
+
end
|
89
|
+
rescue StandardError
|
90
|
+
# ignore errors
|
91
|
+
end
|
92
|
+
|
93
|
+
def heartbeat
|
94
|
+
update_process_index
|
95
|
+
|
96
|
+
_exists, msg = update_state
|
97
|
+
|
98
|
+
if msg
|
99
|
+
::Process.kill(msg, $PID)
|
100
|
+
else
|
101
|
+
update_jobs
|
102
|
+
end
|
103
|
+
rescue StandardError => e
|
104
|
+
logger.error("heartbeat: #{e.message} @ #{e.backtrace_locations&.first || ''}")
|
105
|
+
end
|
106
|
+
|
107
|
+
def update_state
|
108
|
+
exists, _, _, msg = redis do |c|
|
109
|
+
c.multi do
|
110
|
+
c.exists(identity)
|
111
|
+
c.hmset(identity, 'info', info_json, 'beat', Time.now.to_f, 'quiet', @done, 'worker', process_index)
|
112
|
+
c.expire(identity, 60)
|
113
|
+
c.rpop("#{identity}-signals")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
[exists, msg]
|
118
|
+
end
|
119
|
+
|
120
|
+
def update_process_index
|
121
|
+
redis do |c|
|
122
|
+
c.watch(PROCESSES_KEY)
|
123
|
+
|
124
|
+
workers = c.lrange(PROCESSES_KEY, 0, -1)
|
125
|
+
num_workers = workers.size
|
126
|
+
index = workers.find_index(identity)
|
127
|
+
|
128
|
+
if index.nil?
|
129
|
+
new_length = c.multi do
|
130
|
+
c.rpush(PROCESSES_KEY, identity)
|
131
|
+
end
|
132
|
+
unless new_length.nil?
|
133
|
+
num_workers = new_length.first
|
134
|
+
index = new_length.first - 1
|
135
|
+
end
|
136
|
+
else
|
137
|
+
c.unwatch
|
138
|
+
end
|
139
|
+
self.process_index = index
|
140
|
+
self.process_count = num_workers
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def update_jobs
|
145
|
+
watcher_stats = {}
|
146
|
+
@watchers.each do |job_id, watcher|
|
147
|
+
watcher.update_worker_config(process_index, process_count, workers_for_job(job_id))
|
148
|
+
watcher_stats[job_id] = JSON.dump(watcher.stats)
|
149
|
+
end
|
150
|
+
|
151
|
+
workers_key = "#{identity}:workers"
|
152
|
+
redis do |c|
|
153
|
+
c.multi do
|
154
|
+
c.del(workers_key)
|
155
|
+
watcher_stats.each do |job_id, stats_data|
|
156
|
+
c.hset(workers_key, job_id, stats_data)
|
157
|
+
end
|
158
|
+
c.expire(workers_key, 60)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def setup_job_watchers
|
164
|
+
@watchers = {}
|
165
|
+
@options.fetch(:job_sets, []).each do |job_config|
|
166
|
+
job_id = job_config[:id]
|
167
|
+
logger.debug "Starting watcher for #{job_id}"
|
168
|
+
@watchers[job_id] = Watcher.new(@pool, job_config)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def info
|
173
|
+
@info ||= {
|
174
|
+
hostname: hostname,
|
175
|
+
pid: ::Process.pid,
|
176
|
+
identity: identity,
|
177
|
+
max_threads: @options[:max_threads],
|
178
|
+
}
|
179
|
+
end
|
180
|
+
|
181
|
+
def info_json
|
182
|
+
@info_json ||= JSON.dump(info)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent/options'
|
4
|
+
require 'concurrent/atomic/atomic_reference'
|
5
|
+
require 'concurrent/promises'
|
6
|
+
|
7
|
+
# This is a private copy of Concurrent::Cancellation from concurrent-ruby-edge so we do not depend on the edge gem
|
8
|
+
|
9
|
+
module ProcessBalancer
|
10
|
+
module Private
|
11
|
+
Synchronization = Concurrent::Synchronization
|
12
|
+
Promises = Concurrent::Promises
|
13
|
+
CancelledOperationError = Concurrent::CancelledOperationError
|
14
|
+
|
15
|
+
# TODO (pitr-ch 27-Mar-2016): cooperation with mutex, condition, select etc?
|
16
|
+
# TODO (pitr-ch 10-Dec-2018): integrate with enumerator?
|
17
|
+
# token.cancelable(array.each_with_index).each do |v, i|
|
18
|
+
# # stops iterating when cancelled
|
19
|
+
# end
|
20
|
+
# token.cancelable(array).each_with_index do |v, i|
|
21
|
+
# # stops iterating when cancelled
|
22
|
+
# end
|
23
|
+
|
24
|
+
# The Cancellation abstraction provides cooperative cancellation.
|
25
|
+
#
|
26
|
+
# The standard methods `Thread#raise` of `Thread#kill` available in Ruby
|
27
|
+
# are very dangerous (see linked the blog posts bellow).
|
28
|
+
# Therefore concurrent-ruby provides an alternative.
|
29
|
+
# * <https://jvns.ca/blog/2015/11/27/why-rubys-timeout-is-dangerous-and-thread-dot-raise-is-terrifying/>
|
30
|
+
# * <http://www.mikeperham.com/2015/05/08/timeout-rubys-most-dangerous-api/>
|
31
|
+
# * <http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html>
|
32
|
+
#
|
33
|
+
# It provides an object which represents a task which can be executed,
|
34
|
+
# the task has to get the reference to the object and periodically cooperatively check that it is not cancelled.
|
35
|
+
# Good practices to make tasks cancellable:
|
36
|
+
# * check cancellation every cycle of a loop which does significant work,
|
37
|
+
# * do all blocking actions in a loop with a timeout then on timeout check cancellation
|
38
|
+
# and if ok block again with the timeout
|
39
|
+
#
|
40
|
+
# The idea was inspired by <https://msdn.microsoft.com/en-us/library/dd537607(v=vs.110).aspx>
|
41
|
+
# @!macro warn.edge
|
42
|
+
#
|
43
|
+
# {include:file:docs-source/cancellation.out.md}
|
44
|
+
class Cancellation < Synchronization::Object
|
45
|
+
safe_initialization!
|
46
|
+
|
47
|
+
# Create Cancellation which will cancel itself in given time
|
48
|
+
#
|
49
|
+
# @!macro promises.param.intended_time
|
50
|
+
# @return [Cancellation]
|
51
|
+
def self.timeout(intended_time)
|
52
|
+
new Concurrent::Promises.schedule(intended_time)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Creates the cancellation object.
|
56
|
+
#
|
57
|
+
# @param [Promises::Future, Promises::Event] origin of the cancellation.
|
58
|
+
# When it is resolved the cancellation is canceled.
|
59
|
+
# @example
|
60
|
+
# cancellation, origin = Concurrent::Cancellation.new
|
61
|
+
# @see #to_ary
|
62
|
+
def initialize(origin = Promises.resolvable_event)
|
63
|
+
super()
|
64
|
+
@Origin = origin
|
65
|
+
end
|
66
|
+
|
67
|
+
# Allow to multi-assign the Cancellation object
|
68
|
+
# @return [Array(Cancellation, Promises::Future), Array(Cancellation, Promises::Event)]
|
69
|
+
# @example
|
70
|
+
# cancellation = Concurrent::Cancellation.new
|
71
|
+
# cancellation, origin = Concurrent::Cancellation.new
|
72
|
+
def to_ary
|
73
|
+
[self, @Origin]
|
74
|
+
end
|
75
|
+
|
76
|
+
# The event or future which is the origin of the cancellation
|
77
|
+
# @return [Promises::Future, Promises::Event]
|
78
|
+
def origin
|
79
|
+
@Origin
|
80
|
+
end
|
81
|
+
|
82
|
+
# Is the cancellation cancelled?
|
83
|
+
# Respective, was the origin of the cancellation resolved.
|
84
|
+
# @return [true, false]
|
85
|
+
def canceled?
|
86
|
+
@Origin.resolved?
|
87
|
+
end
|
88
|
+
|
89
|
+
# Raise error when cancelled
|
90
|
+
# @param [#exception] error to be risen
|
91
|
+
# @raise the error
|
92
|
+
# @return [self]
|
93
|
+
def check!(error = CancelledOperationError)
|
94
|
+
raise error if canceled?
|
95
|
+
self
|
96
|
+
end
|
97
|
+
|
98
|
+
# Creates a new Cancellation which is cancelled when first
|
99
|
+
# of the supplied cancellations or self is cancelled.
|
100
|
+
#
|
101
|
+
# @param [Cancellation] cancellations to combine
|
102
|
+
# @return [Cancellation] new cancellation
|
103
|
+
def join(*cancellations)
|
104
|
+
Cancellation.new Promises.any_event(*[@Origin, *cancellations.map(&:origin)])
|
105
|
+
end
|
106
|
+
|
107
|
+
# Short string representation.
|
108
|
+
# @return [String]
|
109
|
+
def to_s
|
110
|
+
format '%s %s>', super[0..-2], canceled? ? 'canceled' : 'pending'
|
111
|
+
end
|
112
|
+
|
113
|
+
alias_method :inspect, :to_s
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProcessBalancer
|
4
|
+
# Rails integration
|
5
|
+
class Rails < ::Rails::Engine
|
6
|
+
config.after_initialize do
|
7
|
+
ProcessBalancer.configure do |config|
|
8
|
+
if config.server?
|
9
|
+
config.options[:reloader] = ProcessBalancer::Rails::Reloader.new
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# cleanup active record connections
|
15
|
+
class Reloader
|
16
|
+
def initialize(app = ::Rails.application)
|
17
|
+
@app = app
|
18
|
+
end
|
19
|
+
|
20
|
+
def call
|
21
|
+
@app.reloader.wrap do
|
22
|
+
yield
|
23
|
+
end
|
24
|
+
# ensure
|
25
|
+
# ActiveRecord::Base.clear_active_connections!
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'connection_pool'
|
4
|
+
require 'redis'
|
5
|
+
|
6
|
+
module ProcessBalancer
|
7
|
+
module RedisConnection # :nodoc:
|
8
|
+
def self.create(options = {})
|
9
|
+
options[:url] = determine_redis_provider
|
10
|
+
size = options[:size] || 2
|
11
|
+
pool_timeout = options[:pool_timeout] || 1
|
12
|
+
ConnectionPool.new(timeout: pool_timeout, size: size) do
|
13
|
+
build_client(options)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class << self
|
18
|
+
private
|
19
|
+
|
20
|
+
def determine_redis_provider
|
21
|
+
if ENV['REDIS_PROVIDER'] =~ /[^A-Za-z_]/
|
22
|
+
ProcessBalancer.logger.error 'REDIS_PROVIDER should be set to the name of the environment variable that contains the redis URL'
|
23
|
+
end
|
24
|
+
ENV[
|
25
|
+
ENV['REDIS_PROVIDER'] || 'REDIS_URL'
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
def client_opts(options)
|
30
|
+
opts = options.dup
|
31
|
+
opts.delete(:namespace)
|
32
|
+
|
33
|
+
opts[:driver] ||= Redis::Connection.drivers.last || 'ruby'
|
34
|
+
opts[:reconnect_attempts] ||= 1
|
35
|
+
opts
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_client(options)
|
39
|
+
namespace = options[:namespace]
|
40
|
+
|
41
|
+
client = Redis.new client_opts(options)
|
42
|
+
if namespace
|
43
|
+
begin
|
44
|
+
require 'redis/namespace'
|
45
|
+
Redis::Namespace.new(namespace, redis: client)
|
46
|
+
rescue LoadError
|
47
|
+
ProcessBalancer.logger.error "Your redis configuration uses namespace '#{namespace}' but redis-namespace gem is not in your Gemfile"
|
48
|
+
exit(-127)
|
49
|
+
end
|
50
|
+
else
|
51
|
+
client
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|