collective 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/.autotest +13 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +16 -0
  5. data/Guardfile +6 -0
  6. data/README +7 -0
  7. data/Rakefile +11 -0
  8. data/bin/collective +37 -0
  9. data/collective.gemspec +23 -0
  10. data/demo/demo +36 -0
  11. data/demo/demo.rb +30 -0
  12. data/demo/demo3 +36 -0
  13. data/demo/job1.rb +31 -0
  14. data/demo/job2.rb +42 -0
  15. data/demo/job3.rb +44 -0
  16. data/demo/populate.rb +22 -0
  17. data/lib/collective.rb +52 -0
  18. data/lib/collective/checker.rb +51 -0
  19. data/lib/collective/configuration.rb +219 -0
  20. data/lib/collective/idler.rb +81 -0
  21. data/lib/collective/key.rb +48 -0
  22. data/lib/collective/lifecycle_observer.rb +25 -0
  23. data/lib/collective/log.rb +29 -0
  24. data/lib/collective/messager.rb +218 -0
  25. data/lib/collective/mocks/storage.rb +108 -0
  26. data/lib/collective/monitor.rb +58 -0
  27. data/lib/collective/policy.rb +60 -0
  28. data/lib/collective/pool.rb +180 -0
  29. data/lib/collective/redis/storage.rb +142 -0
  30. data/lib/collective/registry.rb +123 -0
  31. data/lib/collective/squiggly.rb +20 -0
  32. data/lib/collective/utilities/airbrake_observer.rb +26 -0
  33. data/lib/collective/utilities/hoptoad_observer.rb +26 -0
  34. data/lib/collective/utilities/log_observer.rb +40 -0
  35. data/lib/collective/utilities/observeable.rb +18 -0
  36. data/lib/collective/utilities/observer_base.rb +59 -0
  37. data/lib/collective/utilities/process.rb +82 -0
  38. data/lib/collective/utilities/signal_hook.rb +47 -0
  39. data/lib/collective/utilities/storage_base.rb +41 -0
  40. data/lib/collective/version.rb +3 -0
  41. data/lib/collective/worker.rb +161 -0
  42. data/spec/checker_spec.rb +20 -0
  43. data/spec/configuration_spec.rb +24 -0
  44. data/spec/helper.rb +33 -0
  45. data/spec/idler_spec.rb +58 -0
  46. data/spec/key_spec.rb +41 -0
  47. data/spec/messager_spec.rb +131 -0
  48. data/spec/mocks/storage_spec.rb +108 -0
  49. data/spec/monitor_spec.rb +15 -0
  50. data/spec/policy_spec.rb +43 -0
  51. data/spec/pool_spec.rb +119 -0
  52. data/spec/redis/storage_spec.rb +133 -0
  53. data/spec/registry_spec.rb +52 -0
  54. data/spec/support/jobs.rb +58 -0
  55. data/spec/support/redis.rb +22 -0
  56. data/spec/support/timing.rb +32 -0
  57. data/spec/utilities/observer_base_spec.rb +50 -0
  58. data/spec/utilities/process_spec.rb +17 -0
  59. data/spec/worker_spec.rb +121 -0
  60. data/unused/times.rb +45 -0
  61. metadata +148 -0
@@ -0,0 +1,108 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ class Collective::Mocks::Storage
4
+
5
+ def initialize
6
+ @storage = {}
7
+ end
8
+
9
+ def reconnect_after_fork
10
+ # nop
11
+ end
12
+
13
+ # Simple values
14
+
15
+ def put( key, value )
16
+ @storage[key] = value
17
+ end
18
+
19
+ def get( key )
20
+ @storage[key]
21
+ end
22
+
23
+ def del( key )
24
+ @storage.delete( key )
25
+ end
26
+
27
+ # Sets
28
+
29
+ def set_add( key, value )
30
+ @storage[key] ||= []
31
+ @storage[key] << value unless @storage[key].member?(value)
32
+ end
33
+
34
+ def set_size( key )
35
+ (@storage[key] || [] ).size
36
+ end
37
+
38
+ def set_remove( key, value )
39
+ (@storage[key] || [] ).delete( value )
40
+ end
41
+
42
+ def set_member?( key, value )
43
+ (@storage[key] || []).member?( value )
44
+ end
45
+
46
+ def set_get_all( key )
47
+ @storage[key] || []
48
+ end
49
+
50
+ # Maps
51
+
52
+ def map_set( key, name, value )
53
+ @storage[key] ||= {}
54
+ @storage[key][name] = value
55
+ end
56
+
57
+ def map_get( key, name )
58
+ (@storage[key] || {}) [name]
59
+ end
60
+
61
+ def map_get_all_keys( key )
62
+ (@storage[key] || {}).keys
63
+ end
64
+
65
+ def map_size( key )
66
+ (@storage[key] || {} ).size
67
+ end
68
+
69
+ def map_del( key )
70
+ @storage.delete( key )
71
+ end
72
+
73
+ # Priority Queue
74
+
75
+ def queue_add( queue_name, item, score )
76
+ queue = @storage[queue_name] ||= []
77
+ queue << [ item, score ]
78
+ queue.sort_by! { |it| it.last }
79
+ end
80
+
81
+ # pop the lowest item from the queue IFF it scores <= max_score
82
+ def queue_pop( queue_name, max_score = Time.now.to_i )
83
+ queue = @storage[queue_name] || []
84
+ return nil if queue.size == 0
85
+ if queue.first.last <= max_score then
86
+ queue.shift.first
87
+ else
88
+ nil
89
+ end
90
+ end
91
+
92
+ def queue_pop_sync( queue_name, max_score = Time.now.to_i, options = {} )
93
+ timeout = options[:timeout] || 1
94
+ deadline = Time.now.to_f + timeout
95
+
96
+ loop do
97
+ result = queue_pop( queue_name, max_score )
98
+ return result if result
99
+
100
+ raise Timeout::Error if Time.now.to_f > deadline
101
+ end
102
+ end
103
+
104
+ def queue_del( queue_name )
105
+ @storage.delete( queue_name )
106
+ end
107
+
108
+ end # Collective::Mocks::Storage
@@ -0,0 +1,58 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'ruby-debug'
3
+
4
+ class Collective::Monitor
5
+
6
+ include Collective::Log
7
+
8
+ attr :pools
9
+
10
+ def initialize( configuration )
11
+ @pools = configuration.jobs.map do |kind,options|
12
+ pool = Collective::Pool.new( kind, Collective::Policy.resolve(options) )
13
+ end
14
+ end
15
+
16
+
17
+ def monitor
18
+ status = {}
19
+
20
+ job = ->() do
21
+ changed = false
22
+ pools.each do |pool|
23
+
24
+ log pool.name
25
+ previous = status[pool.name]
26
+ current = pool.synchronize log: true
27
+
28
+ if previous != current then
29
+ status[pool.name] = current
30
+ changed = true
31
+ end
32
+
33
+ end
34
+ changed
35
+ end
36
+
37
+ job = Collective::Idler.new( job, min_sleep: 1, max_sleep: 10 )
38
+
39
+ ok = true
40
+ trap("TERM") { ok = false }
41
+ while ok do
42
+ job.call
43
+ end
44
+ end # monitor
45
+
46
+ def stop_all
47
+ pools.each do |pool|
48
+ pool.stop_all
49
+ end
50
+ end
51
+
52
+ def restart
53
+ pools.each do |pool|
54
+ pool.restart
55
+ end
56
+ end
57
+
58
+ end
@@ -0,0 +1,60 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "ostruct"
4
+
5
+ class Collective::Policy
6
+
7
+ DEFAULTS = {
8
+ pool_min_workers: 1,
9
+ pool_max_workers: 10,
10
+ worker_idle_max_sleep: 64.0,
11
+ worker_idle_min_sleep: 0.125,
12
+ worker_idle_spin_down: 900,
13
+ worker_none_spin_up: 86400,
14
+ worker_max_jobs: 100, # a worker should automatically exit after this many jobs
15
+ worker_max_lifetime: 1000, # a worker should automatically exit after this time
16
+ worker_late: 10, # a worker is overdue after this time with no heartbeat
17
+ worker_hung: 100, # a worker will be killed after this time
18
+ storage: :mock,
19
+ observers: []
20
+ }
21
+
22
+ class Instance
23
+
24
+ # including options[:policy] will merge over these options
25
+ def initialize( options = {} )
26
+ if options[:policy] then
27
+ policy = options.delete(:policy)
28
+ defaults = policy.dup
29
+ else
30
+ defaults = DEFAULTS
31
+ end
32
+
33
+ options = Hash[ options.map { |k,v| [ k.to_sym, v ] } ] # poor man's symbolize keys
34
+ @options = defaults.merge( options )
35
+ end
36
+
37
+ def storage
38
+ Collective::Utilities::StorageBase.resolve @options[:storage]
39
+ end
40
+
41
+ def method_missing( symbol, *arguments )
42
+ @options[symbol.to_sym]
43
+ end
44
+
45
+ def dup
46
+ @options.dup
47
+ end
48
+
49
+ end # Instance
50
+
51
+ class << self
52
+
53
+ def resolve( options = {} )
54
+ # this will dup either an Instance or a Hash
55
+ Collective::Policy::Instance.new(options.dup)
56
+ end
57
+
58
+ end # class
59
+
60
+ end # Collective::Policy
@@ -0,0 +1,180 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ =begin
4
+
5
+ A pool is a collection of workers, each of which is a separate process.
6
+ All workers are of the same kind (class).
7
+
8
+ =end
9
+
10
+ class Collective::Pool
11
+
12
+ include Collective::Log
13
+
14
+ attr :kind # job class
15
+ attr :name
16
+ attr :policy
17
+ attr :registry
18
+ attr :storage # where to store worker details
19
+
20
+ def initialize( kind, policy_prototype = {} )
21
+ if kind.kind_of?(Array) then
22
+ kind, policy_prototype = kind.first, kind.last
23
+ end
24
+ @kind = kind
25
+ @policy = Collective::Policy.resolve(policy_prototype) or raise
26
+ @name = @policy.name || kind.name or raise Collective::ConfigurationError, "Pool or Job must have a name"
27
+ @storage = policy.storage
28
+ @registry = Collective::Registry.new( name, storage )
29
+
30
+ # type checks
31
+ policy.pool_min_workers
32
+ registry.workers
33
+ end
34
+
35
+
36
+ # @param options[:log] can be true
37
+ # @returns the checked worker lists
38
+ def synchronize( options = {} )
39
+ do_log = options.delete(:log)
40
+ raise if options.size > 0
41
+
42
+ checklist = registry.checked_workers( policy )
43
+ live_count = checklist.live.size
44
+
45
+ if do_log then
46
+ check_live_workers( checklist )
47
+ check_late_workers( checklist )
48
+ check_hung_workers( checklist )
49
+ check_dead_workers( checklist )
50
+ end
51
+
52
+ if (need = policy.pool_min_workers - live_count) > 0 then
53
+ # launch workers
54
+ need.times do
55
+ spawn wait: true
56
+ end
57
+
58
+ elsif (excess = live_count - policy.pool_max_workers) > 0 then
59
+ # spin down some workers
60
+ # try to find LOCAL workers to spin down first
61
+ locals = checklist.live.select { |k| k.host == Collective::Key.local_host }
62
+ if locals.size > 0 then
63
+ reap locals.first, wait: true
64
+ else
65
+ reap checklist.live.first, wait: true
66
+ end
67
+ end
68
+
69
+ checklist = registry.checked_workers( policy )
70
+ end
71
+
72
+
73
+ def mq
74
+ @mq ||= begin
75
+ key = Collective::Key.new( "#{name}-pool", Process.pid )
76
+ me = Collective::Messager.new storage, my_address: key
77
+ end
78
+ end
79
+
80
+
81
+ # tell all workers to quit
82
+ def stop_all
83
+ checklist = registry.checked_workers( policy )
84
+ checklist.live.each { |key| reap(key) }
85
+ checklist.late.each { |key| reap(key) }
86
+ checklist.hung.each { |key| reap(key) }
87
+ checklist.dead.each { |key| reap(key) }
88
+ end
89
+
90
+
91
+ # this really should be protected but it's convenient to be able to force a spawn
92
+ # param options[:wait] can true to wait until after the process is spawned
93
+ def spawn( options = {} )
94
+ wait = options.delete(:wait)
95
+ raise if options.size > 0
96
+
97
+ if ! wait then
98
+ Collective::Worker.spawn kind, registry: registry, policy: policy, name: name
99
+ return
100
+ end
101
+
102
+ before = registry.checked_workers( policy ).live
103
+
104
+ Collective::Worker.spawn kind, registry: registry, policy: policy, name: name
105
+
106
+ Collective::Idler.wait_until( 10 ) do
107
+ after = registry.checked_workers( policy ).live
108
+ diff = ( after - before ).select { |k| k.host == Collective::Key.local_host }
109
+ diff.size > 0
110
+ end
111
+ end
112
+
113
+
114
+ # shut down a worker
115
+ def reap( key, options = {} )
116
+ wait = options.delete(:wait)
117
+ raise if options.size > 0
118
+
119
+ if key.host == Collective::Key.local_host then
120
+ ::Process.kill( "TERM", key.pid )
121
+ Collective::Utilities::Process.wait_and_terminate key.pid, timeout: 10
122
+ else
123
+ mq.send "Quit", to: key
124
+ end
125
+
126
+ if wait then
127
+ Collective::Idler.wait_until( 10 ) do
128
+ live = registry.checked_workers( policy ).live
129
+ ! live.member? key
130
+ end
131
+ end
132
+ end
133
+
134
+ # ----------------------------------------------------------------------------
135
+ protected
136
+ # ----------------------------------------------------------------------------
137
+
138
+ def check_live_workers( checked )
139
+ if live = checked.live and live.size > 0 then
140
+ log "Live worker count #{live.size}; members: #{live.inspect}"
141
+ live.size
142
+ else
143
+ 0
144
+ end
145
+ end
146
+
147
+
148
+ def check_late_workers( checked )
149
+ if late = checked.late and late.size > 0 then
150
+ log "Late worker count #{late.size}; members: #{late.inspect}"
151
+ late.size
152
+ else
153
+ 0
154
+ end
155
+ end
156
+
157
+
158
+ def check_hung_workers( checked )
159
+ if hung = checked.hung and hung.size > 0 then
160
+ log "Hung worker count #{hung.size}"
161
+ hung.each do |key|
162
+ log "Killing #{key}"
163
+ Collective::Utilities::Process.wait_and_terminate( key.pid )
164
+ registry.unregister(key)
165
+ end
166
+ end
167
+ 0
168
+ end
169
+
170
+
171
+ def check_dead_workers( checked )
172
+ if dead = checked.dead and dead.size > 0 then
173
+ log "Dead worker count #{dead.size}; members: #{dead.inspect}"
174
+ dead.size
175
+ else
176
+ 0
177
+ end
178
+ end
179
+
180
+ end # Collective::Pool
@@ -0,0 +1,142 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "redis"
4
+ require "redis-namespace"
5
+ require "timeout"
6
+
7
+ class Collective::Redis::Storage
8
+
9
+ def initialize( redis = nil )
10
+ self.redis = redis if redis
11
+ end
12
+
13
+ def reconnect_after_fork
14
+ redis.client.disconnect
15
+ end
16
+
17
+
18
+ # Simple values
19
+
20
+ def put( key, value )
21
+ redis.set( key, value )
22
+ end
23
+
24
+ def get( key )
25
+ redis.get( key )
26
+ end
27
+
28
+ def del( key )
29
+ redis.del( key )
30
+ end
31
+
32
+ # Sets
33
+
34
+ def set_add( set_name, value )
35
+ redis.sadd( set_name, value )
36
+ end
37
+
38
+ def set_size( set_name )
39
+ redis.scard( set_name )
40
+ end
41
+
42
+ def set_remove( set_name, value )
43
+ redis.srem( set_name, value )
44
+ end
45
+
46
+ def set_get_all( set_name )
47
+ redis.smembers( set_name )
48
+ end
49
+
50
+ def set_member?( set_name, value )
51
+ redis.sismember( set_name, value )
52
+ end
53
+
54
+ # Priority Queue
55
+
56
+ def queue_add( queue_name, item, score = Time.now.to_i )
57
+ score = score.to_f
58
+ begin
59
+ redis.zadd( queue_name, score, item )
60
+ rescue Exception => x
61
+ raise x, "Failed zadd( #{queue_name.inspect}, #{score.inspect}, #{item.inspect} ) because of an error: #{x.message}", x.backtrace
62
+ end
63
+ end
64
+
65
+ # pop the lowest item from the queue IFF it scores <= max_score
66
+ def queue_pop( queue_name, max_score = Time.now.to_i )
67
+ # Option 1: zrange, check score, accept or discard
68
+ # Option 2: zrangebyscore with limit, then zremrangebyrank
69
+
70
+ redis.watch( queue_name )
71
+ it = redis.zrangebyscore( queue_name, 0, max_score, limit: [0,1] ).first
72
+ if it then
73
+ ok = redis.multi { |r| r.zremrangebyrank( queue_name, 0, 0 ) }
74
+ it = nil if ! ok
75
+ else
76
+ redis.unwatch
77
+ end
78
+ it
79
+ end
80
+
81
+ def queue_pop_sync( queue_name, max_score = Time.now.to_i, options = {} )
82
+ timeout = options[:timeout] || 1
83
+ deadline = Time.now.to_f + timeout
84
+
85
+ loop do
86
+ result = queue_pop( queue_name, max_score )
87
+ return result if result
88
+
89
+ raise Timeout::Error if Time.now.to_f > deadline
90
+ end
91
+ end
92
+
93
+ def queue_del( queue_name )
94
+ redis.del( queue_name )
95
+ end
96
+
97
+ # Maps
98
+
99
+ def map_set( map_name, key, value )
100
+ redis.hset( map_name, key, value )
101
+ end
102
+
103
+ def map_get( map_name, key )
104
+ redis.hget( map_name, key )
105
+ end
106
+
107
+ def map_get_all_keys( map_name )
108
+ redis.hkeys( map_name )
109
+ end
110
+
111
+ def map_size( map_name )
112
+ redis.hlen( map_name )
113
+ end
114
+
115
+ def map_del( map_name, key )
116
+ redis.hdel( map_name, key )
117
+ end
118
+
119
+ # ----------------------------------------------------------------------------
120
+ # Redis
121
+ # ----------------------------------------------------------------------------
122
+
123
+ # @param redis_client can only be set once
124
+ def redis=( redis_or_options )
125
+ raise Collective::ConfigurationError if @redis
126
+
127
+ case redis_or_options
128
+ when Hash
129
+ options = redis_or_options.dup
130
+ namespace = options.delete(:namespace)
131
+ @redis = Redis.connect(options)
132
+ @redis = Redis::Namespace.new( namespace, redis: @redis ) if namespace
133
+ else
134
+ @redis = redis_or_options
135
+ end
136
+ end
137
+
138
+ def redis
139
+ @redis ||= ::Redis.connect( url: "redis://127.0.0.1:6379/1" )
140
+ end
141
+
142
+ end # Collective::Redis::Storage