collective 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +13 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/Gemfile +16 -0
- data/Guardfile +6 -0
- data/README +7 -0
- data/Rakefile +11 -0
- data/bin/collective +37 -0
- data/collective.gemspec +23 -0
- data/demo/demo +36 -0
- data/demo/demo.rb +30 -0
- data/demo/demo3 +36 -0
- data/demo/job1.rb +31 -0
- data/demo/job2.rb +42 -0
- data/demo/job3.rb +44 -0
- data/demo/populate.rb +22 -0
- data/lib/collective.rb +52 -0
- data/lib/collective/checker.rb +51 -0
- data/lib/collective/configuration.rb +219 -0
- data/lib/collective/idler.rb +81 -0
- data/lib/collective/key.rb +48 -0
- data/lib/collective/lifecycle_observer.rb +25 -0
- data/lib/collective/log.rb +29 -0
- data/lib/collective/messager.rb +218 -0
- data/lib/collective/mocks/storage.rb +108 -0
- data/lib/collective/monitor.rb +58 -0
- data/lib/collective/policy.rb +60 -0
- data/lib/collective/pool.rb +180 -0
- data/lib/collective/redis/storage.rb +142 -0
- data/lib/collective/registry.rb +123 -0
- data/lib/collective/squiggly.rb +20 -0
- data/lib/collective/utilities/airbrake_observer.rb +26 -0
- data/lib/collective/utilities/hoptoad_observer.rb +26 -0
- data/lib/collective/utilities/log_observer.rb +40 -0
- data/lib/collective/utilities/observeable.rb +18 -0
- data/lib/collective/utilities/observer_base.rb +59 -0
- data/lib/collective/utilities/process.rb +82 -0
- data/lib/collective/utilities/signal_hook.rb +47 -0
- data/lib/collective/utilities/storage_base.rb +41 -0
- data/lib/collective/version.rb +3 -0
- data/lib/collective/worker.rb +161 -0
- data/spec/checker_spec.rb +20 -0
- data/spec/configuration_spec.rb +24 -0
- data/spec/helper.rb +33 -0
- data/spec/idler_spec.rb +58 -0
- data/spec/key_spec.rb +41 -0
- data/spec/messager_spec.rb +131 -0
- data/spec/mocks/storage_spec.rb +108 -0
- data/spec/monitor_spec.rb +15 -0
- data/spec/policy_spec.rb +43 -0
- data/spec/pool_spec.rb +119 -0
- data/spec/redis/storage_spec.rb +133 -0
- data/spec/registry_spec.rb +52 -0
- data/spec/support/jobs.rb +58 -0
- data/spec/support/redis.rb +22 -0
- data/spec/support/timing.rb +32 -0
- data/spec/utilities/observer_base_spec.rb +50 -0
- data/spec/utilities/process_spec.rb +17 -0
- data/spec/worker_spec.rb +121 -0
- data/unused/times.rb +45 -0
- metadata +148 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
class Collective::Mocks::Storage
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@storage = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def reconnect_after_fork
|
10
|
+
# nop
|
11
|
+
end
|
12
|
+
|
13
|
+
# Simple values
|
14
|
+
|
15
|
+
def put( key, value )
|
16
|
+
@storage[key] = value
|
17
|
+
end
|
18
|
+
|
19
|
+
def get( key )
|
20
|
+
@storage[key]
|
21
|
+
end
|
22
|
+
|
23
|
+
def del( key )
|
24
|
+
@storage.delete( key )
|
25
|
+
end
|
26
|
+
|
27
|
+
# Sets
|
28
|
+
|
29
|
+
def set_add( key, value )
|
30
|
+
@storage[key] ||= []
|
31
|
+
@storage[key] << value unless @storage[key].member?(value)
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_size( key )
|
35
|
+
(@storage[key] || [] ).size
|
36
|
+
end
|
37
|
+
|
38
|
+
def set_remove( key, value )
|
39
|
+
(@storage[key] || [] ).delete( value )
|
40
|
+
end
|
41
|
+
|
42
|
+
def set_member?( key, value )
|
43
|
+
(@storage[key] || []).member?( value )
|
44
|
+
end
|
45
|
+
|
46
|
+
def set_get_all( key )
|
47
|
+
@storage[key] || []
|
48
|
+
end
|
49
|
+
|
50
|
+
# Maps
|
51
|
+
|
52
|
+
def map_set( key, name, value )
|
53
|
+
@storage[key] ||= {}
|
54
|
+
@storage[key][name] = value
|
55
|
+
end
|
56
|
+
|
57
|
+
def map_get( key, name )
|
58
|
+
(@storage[key] || {}) [name]
|
59
|
+
end
|
60
|
+
|
61
|
+
def map_get_all_keys( key )
|
62
|
+
(@storage[key] || {}).keys
|
63
|
+
end
|
64
|
+
|
65
|
+
def map_size( key )
|
66
|
+
(@storage[key] || {} ).size
|
67
|
+
end
|
68
|
+
|
69
|
+
def map_del( key )
|
70
|
+
@storage.delete( key )
|
71
|
+
end
|
72
|
+
|
73
|
+
# Priority Queue
|
74
|
+
|
75
|
+
def queue_add( queue_name, item, score )
|
76
|
+
queue = @storage[queue_name] ||= []
|
77
|
+
queue << [ item, score ]
|
78
|
+
queue.sort_by! { |it| it.last }
|
79
|
+
end
|
80
|
+
|
81
|
+
# pop the lowest item from the queue IFF it scores <= max_score
|
82
|
+
def queue_pop( queue_name, max_score = Time.now.to_i )
|
83
|
+
queue = @storage[queue_name] || []
|
84
|
+
return nil if queue.size == 0
|
85
|
+
if queue.first.last <= max_score then
|
86
|
+
queue.shift.first
|
87
|
+
else
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def queue_pop_sync( queue_name, max_score = Time.now.to_i, options = {} )
|
93
|
+
timeout = options[:timeout] || 1
|
94
|
+
deadline = Time.now.to_f + timeout
|
95
|
+
|
96
|
+
loop do
|
97
|
+
result = queue_pop( queue_name, max_score )
|
98
|
+
return result if result
|
99
|
+
|
100
|
+
raise Timeout::Error if Time.now.to_f > deadline
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def queue_del( queue_name )
|
105
|
+
@storage.delete( queue_name )
|
106
|
+
end
|
107
|
+
|
108
|
+
end # Collective::Mocks::Storage
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'ruby-debug'
|
3
|
+
|
4
|
+
class Collective::Monitor
|
5
|
+
|
6
|
+
include Collective::Log
|
7
|
+
|
8
|
+
attr :pools
|
9
|
+
|
10
|
+
def initialize( configuration )
|
11
|
+
@pools = configuration.jobs.map do |kind,options|
|
12
|
+
pool = Collective::Pool.new( kind, Collective::Policy.resolve(options) )
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
def monitor
|
18
|
+
status = {}
|
19
|
+
|
20
|
+
job = ->() do
|
21
|
+
changed = false
|
22
|
+
pools.each do |pool|
|
23
|
+
|
24
|
+
log pool.name
|
25
|
+
previous = status[pool.name]
|
26
|
+
current = pool.synchronize log: true
|
27
|
+
|
28
|
+
if previous != current then
|
29
|
+
status[pool.name] = current
|
30
|
+
changed = true
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
changed
|
35
|
+
end
|
36
|
+
|
37
|
+
job = Collective::Idler.new( job, min_sleep: 1, max_sleep: 10 )
|
38
|
+
|
39
|
+
ok = true
|
40
|
+
trap("TERM") { ok = false }
|
41
|
+
while ok do
|
42
|
+
job.call
|
43
|
+
end
|
44
|
+
end # monitor
|
45
|
+
|
46
|
+
def stop_all
|
47
|
+
pools.each do |pool|
|
48
|
+
pool.stop_all
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def restart
|
53
|
+
pools.each do |pool|
|
54
|
+
pool.restart
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require "ostruct"
|
4
|
+
|
5
|
+
class Collective::Policy
|
6
|
+
|
7
|
+
DEFAULTS = {
|
8
|
+
pool_min_workers: 1,
|
9
|
+
pool_max_workers: 10,
|
10
|
+
worker_idle_max_sleep: 64.0,
|
11
|
+
worker_idle_min_sleep: 0.125,
|
12
|
+
worker_idle_spin_down: 900,
|
13
|
+
worker_none_spin_up: 86400,
|
14
|
+
worker_max_jobs: 100, # a worker should automatically exit after this many jobs
|
15
|
+
worker_max_lifetime: 1000, # a worker should automatically exit after this time
|
16
|
+
worker_late: 10, # a worker is overdue after this time with no heartbeat
|
17
|
+
worker_hung: 100, # a worker will be killed after this time
|
18
|
+
storage: :mock,
|
19
|
+
observers: []
|
20
|
+
}
|
21
|
+
|
22
|
+
class Instance
|
23
|
+
|
24
|
+
# including options[:policy] will merge over these options
|
25
|
+
def initialize( options = {} )
|
26
|
+
if options[:policy] then
|
27
|
+
policy = options.delete(:policy)
|
28
|
+
defaults = policy.dup
|
29
|
+
else
|
30
|
+
defaults = DEFAULTS
|
31
|
+
end
|
32
|
+
|
33
|
+
options = Hash[ options.map { |k,v| [ k.to_sym, v ] } ] # poor man's symbolize keys
|
34
|
+
@options = defaults.merge( options )
|
35
|
+
end
|
36
|
+
|
37
|
+
def storage
|
38
|
+
Collective::Utilities::StorageBase.resolve @options[:storage]
|
39
|
+
end
|
40
|
+
|
41
|
+
def method_missing( symbol, *arguments )
|
42
|
+
@options[symbol.to_sym]
|
43
|
+
end
|
44
|
+
|
45
|
+
def dup
|
46
|
+
@options.dup
|
47
|
+
end
|
48
|
+
|
49
|
+
end # Instance
|
50
|
+
|
51
|
+
class << self
|
52
|
+
|
53
|
+
def resolve( options = {} )
|
54
|
+
# this will dup either an Instance or a Hash
|
55
|
+
Collective::Policy::Instance.new(options.dup)
|
56
|
+
end
|
57
|
+
|
58
|
+
end # class
|
59
|
+
|
60
|
+
end # Collective::Policy
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
=begin
|
4
|
+
|
5
|
+
A pool is a collection of workers, each of which is a separate process.
|
6
|
+
All workers are of the same kind (class).
|
7
|
+
|
8
|
+
=end
|
9
|
+
|
10
|
+
class Collective::Pool
|
11
|
+
|
12
|
+
include Collective::Log
|
13
|
+
|
14
|
+
attr :kind # job class
|
15
|
+
attr :name
|
16
|
+
attr :policy
|
17
|
+
attr :registry
|
18
|
+
attr :storage # where to store worker details
|
19
|
+
|
20
|
+
def initialize( kind, policy_prototype = {} )
|
21
|
+
if kind.kind_of?(Array) then
|
22
|
+
kind, policy_prototype = kind.first, kind.last
|
23
|
+
end
|
24
|
+
@kind = kind
|
25
|
+
@policy = Collective::Policy.resolve(policy_prototype) or raise
|
26
|
+
@name = @policy.name || kind.name or raise Collective::ConfigurationError, "Pool or Job must have a name"
|
27
|
+
@storage = policy.storage
|
28
|
+
@registry = Collective::Registry.new( name, storage )
|
29
|
+
|
30
|
+
# type checks
|
31
|
+
policy.pool_min_workers
|
32
|
+
registry.workers
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# @param options[:log] can be true
|
37
|
+
# @returns the checked worker lists
|
38
|
+
def synchronize( options = {} )
|
39
|
+
do_log = options.delete(:log)
|
40
|
+
raise if options.size > 0
|
41
|
+
|
42
|
+
checklist = registry.checked_workers( policy )
|
43
|
+
live_count = checklist.live.size
|
44
|
+
|
45
|
+
if do_log then
|
46
|
+
check_live_workers( checklist )
|
47
|
+
check_late_workers( checklist )
|
48
|
+
check_hung_workers( checklist )
|
49
|
+
check_dead_workers( checklist )
|
50
|
+
end
|
51
|
+
|
52
|
+
if (need = policy.pool_min_workers - live_count) > 0 then
|
53
|
+
# launch workers
|
54
|
+
need.times do
|
55
|
+
spawn wait: true
|
56
|
+
end
|
57
|
+
|
58
|
+
elsif (excess = live_count - policy.pool_max_workers) > 0 then
|
59
|
+
# spin down some workers
|
60
|
+
# try to find LOCAL workers to spin down first
|
61
|
+
locals = checklist.live.select { |k| k.host == Collective::Key.local_host }
|
62
|
+
if locals.size > 0 then
|
63
|
+
reap locals.first, wait: true
|
64
|
+
else
|
65
|
+
reap checklist.live.first, wait: true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
checklist = registry.checked_workers( policy )
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
def mq
|
74
|
+
@mq ||= begin
|
75
|
+
key = Collective::Key.new( "#{name}-pool", Process.pid )
|
76
|
+
me = Collective::Messager.new storage, my_address: key
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
# tell all workers to quit
|
82
|
+
def stop_all
|
83
|
+
checklist = registry.checked_workers( policy )
|
84
|
+
checklist.live.each { |key| reap(key) }
|
85
|
+
checklist.late.each { |key| reap(key) }
|
86
|
+
checklist.hung.each { |key| reap(key) }
|
87
|
+
checklist.dead.each { |key| reap(key) }
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
# this really should be protected but it's convenient to be able to force a spawn
|
92
|
+
# param options[:wait] can true to wait until after the process is spawned
|
93
|
+
def spawn( options = {} )
|
94
|
+
wait = options.delete(:wait)
|
95
|
+
raise if options.size > 0
|
96
|
+
|
97
|
+
if ! wait then
|
98
|
+
Collective::Worker.spawn kind, registry: registry, policy: policy, name: name
|
99
|
+
return
|
100
|
+
end
|
101
|
+
|
102
|
+
before = registry.checked_workers( policy ).live
|
103
|
+
|
104
|
+
Collective::Worker.spawn kind, registry: registry, policy: policy, name: name
|
105
|
+
|
106
|
+
Collective::Idler.wait_until( 10 ) do
|
107
|
+
after = registry.checked_workers( policy ).live
|
108
|
+
diff = ( after - before ).select { |k| k.host == Collective::Key.local_host }
|
109
|
+
diff.size > 0
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
# shut down a worker
|
115
|
+
def reap( key, options = {} )
|
116
|
+
wait = options.delete(:wait)
|
117
|
+
raise if options.size > 0
|
118
|
+
|
119
|
+
if key.host == Collective::Key.local_host then
|
120
|
+
::Process.kill( "TERM", key.pid )
|
121
|
+
Collective::Utilities::Process.wait_and_terminate key.pid, timeout: 10
|
122
|
+
else
|
123
|
+
mq.send "Quit", to: key
|
124
|
+
end
|
125
|
+
|
126
|
+
if wait then
|
127
|
+
Collective::Idler.wait_until( 10 ) do
|
128
|
+
live = registry.checked_workers( policy ).live
|
129
|
+
! live.member? key
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# ----------------------------------------------------------------------------
|
135
|
+
protected
|
136
|
+
# ----------------------------------------------------------------------------
|
137
|
+
|
138
|
+
def check_live_workers( checked )
|
139
|
+
if live = checked.live and live.size > 0 then
|
140
|
+
log "Live worker count #{live.size}; members: #{live.inspect}"
|
141
|
+
live.size
|
142
|
+
else
|
143
|
+
0
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def check_late_workers( checked )
|
149
|
+
if late = checked.late and late.size > 0 then
|
150
|
+
log "Late worker count #{late.size}; members: #{late.inspect}"
|
151
|
+
late.size
|
152
|
+
else
|
153
|
+
0
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
def check_hung_workers( checked )
|
159
|
+
if hung = checked.hung and hung.size > 0 then
|
160
|
+
log "Hung worker count #{hung.size}"
|
161
|
+
hung.each do |key|
|
162
|
+
log "Killing #{key}"
|
163
|
+
Collective::Utilities::Process.wait_and_terminate( key.pid )
|
164
|
+
registry.unregister(key)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
0
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
def check_dead_workers( checked )
|
172
|
+
if dead = checked.dead and dead.size > 0 then
|
173
|
+
log "Dead worker count #{dead.size}; members: #{dead.inspect}"
|
174
|
+
dead.size
|
175
|
+
else
|
176
|
+
0
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
end # Collective::Pool
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require "redis"
|
4
|
+
require "redis-namespace"
|
5
|
+
require "timeout"
|
6
|
+
|
7
|
+
class Collective::Redis::Storage
|
8
|
+
|
9
|
+
def initialize( redis = nil )
|
10
|
+
self.redis = redis if redis
|
11
|
+
end
|
12
|
+
|
13
|
+
def reconnect_after_fork
|
14
|
+
redis.client.disconnect
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
# Simple values
|
19
|
+
|
20
|
+
def put( key, value )
|
21
|
+
redis.set( key, value )
|
22
|
+
end
|
23
|
+
|
24
|
+
def get( key )
|
25
|
+
redis.get( key )
|
26
|
+
end
|
27
|
+
|
28
|
+
def del( key )
|
29
|
+
redis.del( key )
|
30
|
+
end
|
31
|
+
|
32
|
+
# Sets
|
33
|
+
|
34
|
+
def set_add( set_name, value )
|
35
|
+
redis.sadd( set_name, value )
|
36
|
+
end
|
37
|
+
|
38
|
+
def set_size( set_name )
|
39
|
+
redis.scard( set_name )
|
40
|
+
end
|
41
|
+
|
42
|
+
def set_remove( set_name, value )
|
43
|
+
redis.srem( set_name, value )
|
44
|
+
end
|
45
|
+
|
46
|
+
def set_get_all( set_name )
|
47
|
+
redis.smembers( set_name )
|
48
|
+
end
|
49
|
+
|
50
|
+
def set_member?( set_name, value )
|
51
|
+
redis.sismember( set_name, value )
|
52
|
+
end
|
53
|
+
|
54
|
+
# Priority Queue
|
55
|
+
|
56
|
+
def queue_add( queue_name, item, score = Time.now.to_i )
|
57
|
+
score = score.to_f
|
58
|
+
begin
|
59
|
+
redis.zadd( queue_name, score, item )
|
60
|
+
rescue Exception => x
|
61
|
+
raise x, "Failed zadd( #{queue_name.inspect}, #{score.inspect}, #{item.inspect} ) because of an error: #{x.message}", x.backtrace
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# pop the lowest item from the queue IFF it scores <= max_score
|
66
|
+
def queue_pop( queue_name, max_score = Time.now.to_i )
|
67
|
+
# Option 1: zrange, check score, accept or discard
|
68
|
+
# Option 2: zrangebyscore with limit, then zremrangebyrank
|
69
|
+
|
70
|
+
redis.watch( queue_name )
|
71
|
+
it = redis.zrangebyscore( queue_name, 0, max_score, limit: [0,1] ).first
|
72
|
+
if it then
|
73
|
+
ok = redis.multi { |r| r.zremrangebyrank( queue_name, 0, 0 ) }
|
74
|
+
it = nil if ! ok
|
75
|
+
else
|
76
|
+
redis.unwatch
|
77
|
+
end
|
78
|
+
it
|
79
|
+
end
|
80
|
+
|
81
|
+
def queue_pop_sync( queue_name, max_score = Time.now.to_i, options = {} )
|
82
|
+
timeout = options[:timeout] || 1
|
83
|
+
deadline = Time.now.to_f + timeout
|
84
|
+
|
85
|
+
loop do
|
86
|
+
result = queue_pop( queue_name, max_score )
|
87
|
+
return result if result
|
88
|
+
|
89
|
+
raise Timeout::Error if Time.now.to_f > deadline
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def queue_del( queue_name )
|
94
|
+
redis.del( queue_name )
|
95
|
+
end
|
96
|
+
|
97
|
+
# Maps
|
98
|
+
|
99
|
+
def map_set( map_name, key, value )
|
100
|
+
redis.hset( map_name, key, value )
|
101
|
+
end
|
102
|
+
|
103
|
+
def map_get( map_name, key )
|
104
|
+
redis.hget( map_name, key )
|
105
|
+
end
|
106
|
+
|
107
|
+
def map_get_all_keys( map_name )
|
108
|
+
redis.hkeys( map_name )
|
109
|
+
end
|
110
|
+
|
111
|
+
def map_size( map_name )
|
112
|
+
redis.hlen( map_name )
|
113
|
+
end
|
114
|
+
|
115
|
+
def map_del( map_name, key )
|
116
|
+
redis.hdel( map_name, key )
|
117
|
+
end
|
118
|
+
|
119
|
+
# ----------------------------------------------------------------------------
|
120
|
+
# Redis
|
121
|
+
# ----------------------------------------------------------------------------
|
122
|
+
|
123
|
+
# @param redis_client can only be set once
|
124
|
+
def redis=( redis_or_options )
|
125
|
+
raise Collective::ConfigurationError if @redis
|
126
|
+
|
127
|
+
case redis_or_options
|
128
|
+
when Hash
|
129
|
+
options = redis_or_options.dup
|
130
|
+
namespace = options.delete(:namespace)
|
131
|
+
@redis = Redis.connect(options)
|
132
|
+
@redis = Redis::Namespace.new( namespace, redis: @redis ) if namespace
|
133
|
+
else
|
134
|
+
@redis = redis_or_options
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def redis
|
139
|
+
@redis ||= ::Redis.connect( url: "redis://127.0.0.1:6379/1" )
|
140
|
+
end
|
141
|
+
|
142
|
+
end # Collective::Redis::Storage
|