process_balancer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +90 -0
- data/Rakefile +8 -0
- data/TODO.adoc +1 -0
- data/exe/process_balancer +17 -0
- data/lib/process_balancer.rb +138 -0
- data/lib/process_balancer/base.rb +80 -0
- data/lib/process_balancer/cli.rb +257 -0
- data/lib/process_balancer/lock/advisory_lock.rb +21 -0
- data/lib/process_balancer/lock/simple_redis.rb +79 -0
- data/lib/process_balancer/manager.rb +185 -0
- data/lib/process_balancer/private/cancellation.rb +116 -0
- data/lib/process_balancer/rails.rb +29 -0
- data/lib/process_balancer/redis_connection.rb +56 -0
- data/lib/process_balancer/util.rb +37 -0
- data/lib/process_balancer/version.rb +5 -0
- data/lib/process_balancer/watcher.rb +113 -0
- data/lib/process_balancer/worker.rb +76 -0
- data/process_balancer.gemspec +45 -0
- metadata +273 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProcessBalancer
|
4
|
+
module Lock
|
5
|
+
# This is lock implementation using advisory locks on the database via the with_advisory_lock gem
|
6
|
+
module AdvisoryLock
|
7
|
+
# class to wrap the lock handling and provide the "extend!" method contract
|
8
|
+
class DummyLock
|
9
|
+
def extend!; end
|
10
|
+
end
|
11
|
+
|
12
|
+
def worker_lock
|
13
|
+
key = "worker_lock_#{job_id}_#{worker_index}"
|
14
|
+
lock = DummyLock.new
|
15
|
+
ActiveRecord::Base.with_advisory_lock(key) do
|
16
|
+
yield lock
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProcessBalancer
|
4
|
+
module Lock
|
5
|
+
# This is a simple implementation of a lock to ensure only one job runner is running for a worker
|
6
|
+
# This is only save for a single redis instance setup
|
7
|
+
# something more resilient should be used instead,
|
8
|
+
# e.g. and advisory lock in a DB or using RedLock ( https://github.com/leandromoreira/redlock-rb )
|
9
|
+
module SimpleRedis
|
10
|
+
def self.time_source
|
11
|
+
@time_source ||= if defined?(Process::CLOCK_MONOTONIC)
|
12
|
+
proc { (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1000).to_i }
|
13
|
+
else
|
14
|
+
proc { (Time.now.to_f * 1000).to_i }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# class to wrap the lock handling and provide the "extend!" method contract
|
19
|
+
class LockHandler
|
20
|
+
def initialize(key, value, ttl)
|
21
|
+
@key = key
|
22
|
+
@value = value
|
23
|
+
@ttl = ttl
|
24
|
+
@acquired = false
|
25
|
+
end
|
26
|
+
|
27
|
+
def acquire!
|
28
|
+
time_source = ProcessBalancer::Lock::SimpleRedis.time_source
|
29
|
+
|
30
|
+
timeout_ms = 5000
|
31
|
+
wait_time = 0.02..0.1
|
32
|
+
start = time_source.call
|
33
|
+
|
34
|
+
sleep(rand(wait_time)) while !(@acquired = try_lock) && (time_source.call - start) < timeout_ms
|
35
|
+
end
|
36
|
+
|
37
|
+
def release!
|
38
|
+
ProcessBalancer.redis do |c|
|
39
|
+
c.del(@key)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def acquired?
|
44
|
+
@acquired
|
45
|
+
end
|
46
|
+
|
47
|
+
def try_lock
|
48
|
+
ProcessBalancer.redis do |c|
|
49
|
+
c.set(@key, @value, nx: true, ex: @ttl)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def extend!
|
54
|
+
ProcessBalancer.redis do |c|
|
55
|
+
c.watch(@key)
|
56
|
+
if c.get(@key) == @value
|
57
|
+
c.multi do
|
58
|
+
c.set(@key, @value, ex: @ttl)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def worker_lock
|
66
|
+
lock = LockHandler.new("lock_#{job_id}_#{worker_index}", ProcessBalancer.identity, runtime_lock_timeout)
|
67
|
+
lock.acquire!
|
68
|
+
|
69
|
+
if lock.acquired?
|
70
|
+
begin
|
71
|
+
yield lock
|
72
|
+
ensure
|
73
|
+
lock.release!
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'English'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
require_relative 'util'
|
7
|
+
require_relative 'watcher'
|
8
|
+
|
9
|
+
require 'concurrent/atomic/atomic_fixnum'
|
10
|
+
require 'concurrent/executor/thread_pool_executor'
|
11
|
+
|
12
|
+
module ProcessBalancer
|
13
|
+
class Manager # :nodoc:
|
14
|
+
include Util
|
15
|
+
|
16
|
+
def initialize(options)
|
17
|
+
@options = options
|
18
|
+
@done = false
|
19
|
+
@process_index = Concurrent::AtomicFixnum.new(-1)
|
20
|
+
@process_count = Concurrent::AtomicFixnum.new(0)
|
21
|
+
@pool = Concurrent::ThreadPoolExecutor.new(max_threads: @options[:max_threads], fallback_policy: :discard)
|
22
|
+
|
23
|
+
setup_job_watchers
|
24
|
+
end
|
25
|
+
|
26
|
+
def process_count
|
27
|
+
@process_count.value
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_index
|
31
|
+
v = @process_index.value
|
32
|
+
v == -1 ? nil : v
|
33
|
+
end
|
34
|
+
|
35
|
+
def workers_for_job(job_id)
|
36
|
+
stopping? ? 0 : ProcessBalancer.scheduled_workers(job_id)
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
@thread = start_thread('heartbeat', &method(:run_heartbeat))
|
41
|
+
end
|
42
|
+
|
43
|
+
def quiet
|
44
|
+
return if @done
|
45
|
+
|
46
|
+
@done = true
|
47
|
+
|
48
|
+
update_jobs
|
49
|
+
end
|
50
|
+
|
51
|
+
def stop
|
52
|
+
quiet
|
53
|
+
|
54
|
+
@pool.shutdown
|
55
|
+
@pool.wait_for_termination(ProcessBalancer.options[:shutdown_timeout])
|
56
|
+
@pool.kill
|
57
|
+
|
58
|
+
clear_heartbeat
|
59
|
+
end
|
60
|
+
|
61
|
+
def stopping?
|
62
|
+
@done
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def process_count=(value)
|
68
|
+
@process_count.value = value
|
69
|
+
end
|
70
|
+
|
71
|
+
def process_index=(value)
|
72
|
+
v = value.nil? ? -1 : value
|
73
|
+
|
74
|
+
@process_index.value = v
|
75
|
+
end
|
76
|
+
|
77
|
+
def run_heartbeat
|
78
|
+
loop do
|
79
|
+
heartbeat
|
80
|
+
sleep 5
|
81
|
+
end
|
82
|
+
logger.info('Heartbeat stopping...')
|
83
|
+
end
|
84
|
+
|
85
|
+
def clear_heartbeat
|
86
|
+
redis do |c|
|
87
|
+
c.lrem(PROCESSES_KEY, 0, identity)
|
88
|
+
end
|
89
|
+
rescue StandardError
|
90
|
+
# ignore errors
|
91
|
+
end
|
92
|
+
|
93
|
+
def heartbeat
|
94
|
+
update_process_index
|
95
|
+
|
96
|
+
_exists, msg = update_state
|
97
|
+
|
98
|
+
if msg
|
99
|
+
::Process.kill(msg, $PID)
|
100
|
+
else
|
101
|
+
update_jobs
|
102
|
+
end
|
103
|
+
rescue StandardError => e
|
104
|
+
logger.error("heartbeat: #{e.message} @ #{e.backtrace_locations&.first || ''}")
|
105
|
+
end
|
106
|
+
|
107
|
+
def update_state
|
108
|
+
exists, _, _, msg = redis do |c|
|
109
|
+
c.multi do
|
110
|
+
c.exists(identity)
|
111
|
+
c.hmset(identity, 'info', info_json, 'beat', Time.now.to_f, 'quiet', @done, 'worker', process_index)
|
112
|
+
c.expire(identity, 60)
|
113
|
+
c.rpop("#{identity}-signals")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
[exists, msg]
|
118
|
+
end
|
119
|
+
|
120
|
+
def update_process_index
|
121
|
+
redis do |c|
|
122
|
+
c.watch(PROCESSES_KEY)
|
123
|
+
|
124
|
+
workers = c.lrange(PROCESSES_KEY, 0, -1)
|
125
|
+
num_workers = workers.size
|
126
|
+
index = workers.find_index(identity)
|
127
|
+
|
128
|
+
if index.nil?
|
129
|
+
new_length = c.multi do
|
130
|
+
c.rpush(PROCESSES_KEY, identity)
|
131
|
+
end
|
132
|
+
unless new_length.nil?
|
133
|
+
num_workers = new_length.first
|
134
|
+
index = new_length.first - 1
|
135
|
+
end
|
136
|
+
else
|
137
|
+
c.unwatch
|
138
|
+
end
|
139
|
+
self.process_index = index
|
140
|
+
self.process_count = num_workers
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def update_jobs
|
145
|
+
watcher_stats = {}
|
146
|
+
@watchers.each do |job_id, watcher|
|
147
|
+
watcher.update_worker_config(process_index, process_count, workers_for_job(job_id))
|
148
|
+
watcher_stats[job_id] = JSON.dump(watcher.stats)
|
149
|
+
end
|
150
|
+
|
151
|
+
workers_key = "#{identity}:workers"
|
152
|
+
redis do |c|
|
153
|
+
c.multi do
|
154
|
+
c.del(workers_key)
|
155
|
+
watcher_stats.each do |job_id, stats_data|
|
156
|
+
c.hset(workers_key, job_id, stats_data)
|
157
|
+
end
|
158
|
+
c.expire(workers_key, 60)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def setup_job_watchers
|
164
|
+
@watchers = {}
|
165
|
+
@options.fetch(:job_sets, []).each do |job_config|
|
166
|
+
job_id = job_config[:id]
|
167
|
+
logger.debug "Starting watcher for #{job_id}"
|
168
|
+
@watchers[job_id] = Watcher.new(@pool, job_config)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def info
|
173
|
+
@info ||= {
|
174
|
+
hostname: hostname,
|
175
|
+
pid: ::Process.pid,
|
176
|
+
identity: identity,
|
177
|
+
max_threads: @options[:max_threads],
|
178
|
+
}
|
179
|
+
end
|
180
|
+
|
181
|
+
def info_json
|
182
|
+
@info_json ||= JSON.dump(info)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent/options'
|
4
|
+
require 'concurrent/atomic/atomic_reference'
|
5
|
+
require 'concurrent/promises'
|
6
|
+
|
7
|
+
# This is a private copy of Concurrent::Cancellation from concurrent-ruby-edge so we do not depend on the edge gem
|
8
|
+
|
9
|
+
module ProcessBalancer
|
10
|
+
module Private
|
11
|
+
Synchronization = Concurrent::Synchronization
|
12
|
+
Promises = Concurrent::Promises
|
13
|
+
CancelledOperationError = Concurrent::CancelledOperationError
|
14
|
+
|
15
|
+
# TODO (pitr-ch 27-Mar-2016): cooperation with mutex, condition, select etc?
|
16
|
+
# TODO (pitr-ch 10-Dec-2018): integrate with enumerator?
|
17
|
+
# token.cancelable(array.each_with_index).each do |v, i|
|
18
|
+
# # stops iterating when cancelled
|
19
|
+
# end
|
20
|
+
# token.cancelable(array).each_with_index do |v, i|
|
21
|
+
# # stops iterating when cancelled
|
22
|
+
# end
|
23
|
+
|
24
|
+
# The Cancellation abstraction provides cooperative cancellation.
|
25
|
+
#
|
26
|
+
# The standard methods `Thread#raise` of `Thread#kill` available in Ruby
|
27
|
+
# are very dangerous (see linked the blog posts bellow).
|
28
|
+
# Therefore concurrent-ruby provides an alternative.
|
29
|
+
# * <https://jvns.ca/blog/2015/11/27/why-rubys-timeout-is-dangerous-and-thread-dot-raise-is-terrifying/>
|
30
|
+
# * <http://www.mikeperham.com/2015/05/08/timeout-rubys-most-dangerous-api/>
|
31
|
+
# * <http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html>
|
32
|
+
#
|
33
|
+
# It provides an object which represents a task which can be executed,
|
34
|
+
# the task has to get the reference to the object and periodically cooperatively check that it is not cancelled.
|
35
|
+
# Good practices to make tasks cancellable:
|
36
|
+
# * check cancellation every cycle of a loop which does significant work,
|
37
|
+
# * do all blocking actions in a loop with a timeout then on timeout check cancellation
|
38
|
+
# and if ok block again with the timeout
|
39
|
+
#
|
40
|
+
# The idea was inspired by <https://msdn.microsoft.com/en-us/library/dd537607(v=vs.110).aspx>
|
41
|
+
# @!macro warn.edge
|
42
|
+
#
|
43
|
+
# {include:file:docs-source/cancellation.out.md}
|
44
|
+
class Cancellation < Synchronization::Object
|
45
|
+
safe_initialization!
|
46
|
+
|
47
|
+
# Create Cancellation which will cancel itself in given time
|
48
|
+
#
|
49
|
+
# @!macro promises.param.intended_time
|
50
|
+
# @return [Cancellation]
|
51
|
+
def self.timeout(intended_time)
|
52
|
+
new Concurrent::Promises.schedule(intended_time)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Creates the cancellation object.
|
56
|
+
#
|
57
|
+
# @param [Promises::Future, Promises::Event] origin of the cancellation.
|
58
|
+
# When it is resolved the cancellation is canceled.
|
59
|
+
# @example
|
60
|
+
# cancellation, origin = Concurrent::Cancellation.new
|
61
|
+
# @see #to_ary
|
62
|
+
def initialize(origin = Promises.resolvable_event)
|
63
|
+
super()
|
64
|
+
@Origin = origin
|
65
|
+
end
|
66
|
+
|
67
|
+
# Allow to multi-assign the Cancellation object
|
68
|
+
# @return [Array(Cancellation, Promises::Future), Array(Cancellation, Promises::Event)]
|
69
|
+
# @example
|
70
|
+
# cancellation = Concurrent::Cancellation.new
|
71
|
+
# cancellation, origin = Concurrent::Cancellation.new
|
72
|
+
def to_ary
|
73
|
+
[self, @Origin]
|
74
|
+
end
|
75
|
+
|
76
|
+
# The event or future which is the origin of the cancellation
|
77
|
+
# @return [Promises::Future, Promises::Event]
|
78
|
+
def origin
|
79
|
+
@Origin
|
80
|
+
end
|
81
|
+
|
82
|
+
# Is the cancellation cancelled?
|
83
|
+
# Respective, was the origin of the cancellation resolved.
|
84
|
+
# @return [true, false]
|
85
|
+
def canceled?
|
86
|
+
@Origin.resolved?
|
87
|
+
end
|
88
|
+
|
89
|
+
# Raise error when cancelled
|
90
|
+
# @param [#exception] error to be risen
|
91
|
+
# @raise the error
|
92
|
+
# @return [self]
|
93
|
+
def check!(error = CancelledOperationError)
|
94
|
+
raise error if canceled?
|
95
|
+
self
|
96
|
+
end
|
97
|
+
|
98
|
+
# Creates a new Cancellation which is cancelled when first
|
99
|
+
# of the supplied cancellations or self is cancelled.
|
100
|
+
#
|
101
|
+
# @param [Cancellation] cancellations to combine
|
102
|
+
# @return [Cancellation] new cancellation
|
103
|
+
def join(*cancellations)
|
104
|
+
Cancellation.new Promises.any_event(*[@Origin, *cancellations.map(&:origin)])
|
105
|
+
end
|
106
|
+
|
107
|
+
# Short string representation.
|
108
|
+
# @return [String]
|
109
|
+
def to_s
|
110
|
+
format '%s %s>', super[0..-2], canceled? ? 'canceled' : 'pending'
|
111
|
+
end
|
112
|
+
|
113
|
+
alias_method :inspect, :to_s
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProcessBalancer
|
4
|
+
# Rails integration
|
5
|
+
class Rails < ::Rails::Engine
|
6
|
+
config.after_initialize do
|
7
|
+
ProcessBalancer.configure do |config|
|
8
|
+
if config.server?
|
9
|
+
config.options[:reloader] = ProcessBalancer::Rails::Reloader.new
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# cleanup active record connections
|
15
|
+
class Reloader
|
16
|
+
def initialize(app = ::Rails.application)
|
17
|
+
@app = app
|
18
|
+
end
|
19
|
+
|
20
|
+
def call
|
21
|
+
@app.reloader.wrap do
|
22
|
+
yield
|
23
|
+
end
|
24
|
+
# ensure
|
25
|
+
# ActiveRecord::Base.clear_active_connections!
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'connection_pool'
|
4
|
+
require 'redis'
|
5
|
+
|
6
|
+
module ProcessBalancer
|
7
|
+
module RedisConnection # :nodoc:
|
8
|
+
def self.create(options = {})
|
9
|
+
options[:url] = determine_redis_provider
|
10
|
+
size = options[:size] || 2
|
11
|
+
pool_timeout = options[:pool_timeout] || 1
|
12
|
+
ConnectionPool.new(timeout: pool_timeout, size: size) do
|
13
|
+
build_client(options)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class << self
|
18
|
+
private
|
19
|
+
|
20
|
+
def determine_redis_provider
|
21
|
+
if ENV['REDIS_PROVIDER'] =~ /[^A-Za-z_]/
|
22
|
+
ProcessBalancer.logger.error 'REDIS_PROVIDER should be set to the name of the environment variable that contains the redis URL'
|
23
|
+
end
|
24
|
+
ENV[
|
25
|
+
ENV['REDIS_PROVIDER'] || 'REDIS_URL'
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
def client_opts(options)
|
30
|
+
opts = options.dup
|
31
|
+
opts.delete(:namespace)
|
32
|
+
|
33
|
+
opts[:driver] ||= Redis::Connection.drivers.last || 'ruby'
|
34
|
+
opts[:reconnect_attempts] ||= 1
|
35
|
+
opts
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_client(options)
|
39
|
+
namespace = options[:namespace]
|
40
|
+
|
41
|
+
client = Redis.new client_opts(options)
|
42
|
+
if namespace
|
43
|
+
begin
|
44
|
+
require 'redis/namespace'
|
45
|
+
Redis::Namespace.new(namespace, redis: client)
|
46
|
+
rescue LoadError
|
47
|
+
ProcessBalancer.logger.error "Your redis configuration uses namespace '#{namespace}' but redis-namespace gem is not in your Gemfile"
|
48
|
+
exit(-127)
|
49
|
+
end
|
50
|
+
else
|
51
|
+
client
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|