mlanett-hive 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.autotest +13 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +16 -0
  5. data/Guardfile +6 -0
  6. data/README +9 -0
  7. data/Rakefile +11 -0
  8. data/bin/hive +37 -0
  9. data/demo/demo +36 -0
  10. data/demo/demo.rb +30 -0
  11. data/demo/demo3 +36 -0
  12. data/demo/job1.rb +31 -0
  13. data/demo/job2.rb +42 -0
  14. data/demo/job3.rb +44 -0
  15. data/demo/populate.rb +22 -0
  16. data/hive.gemspec +21 -0
  17. data/lib/hive.rb +42 -0
  18. data/lib/hive/checker.rb +51 -0
  19. data/lib/hive/configuration.rb +251 -0
  20. data/lib/hive/idler.rb +81 -0
  21. data/lib/hive/key.rb +48 -0
  22. data/lib/hive/lifecycle_observer.rb +25 -0
  23. data/lib/hive/log.rb +29 -0
  24. data/lib/hive/messager.rb +217 -0
  25. data/lib/hive/mocks/storage.rb +112 -0
  26. data/lib/hive/monitor.rb +57 -0
  27. data/lib/hive/policy.rb +68 -0
  28. data/lib/hive/pool.rb +180 -0
  29. data/lib/hive/redis/storage.rb +145 -0
  30. data/lib/hive/registry.rb +123 -0
  31. data/lib/hive/squiggly.rb +20 -0
  32. data/lib/hive/trace.rb +5 -0
  33. data/lib/hive/utilities/airbrake_observer.rb +26 -0
  34. data/lib/hive/utilities/hoptoad_observer.rb +26 -0
  35. data/lib/hive/utilities/log_observer.rb +40 -0
  36. data/lib/hive/utilities/observeable.rb +18 -0
  37. data/lib/hive/utilities/observer_base.rb +59 -0
  38. data/lib/hive/utilities/process.rb +82 -0
  39. data/lib/hive/utilities/resolver.rb +12 -0
  40. data/lib/hive/utilities/signal_hook.rb +47 -0
  41. data/lib/hive/utilities/storage_base.rb +41 -0
  42. data/lib/hive/version.rb +3 -0
  43. data/lib/hive/worker.rb +162 -0
  44. data/spec/checker_spec.rb +20 -0
  45. data/spec/configuration_spec.rb +50 -0
  46. data/spec/helper.rb +33 -0
  47. data/spec/idler_spec.rb +58 -0
  48. data/spec/key_spec.rb +41 -0
  49. data/spec/messager_spec.rb +131 -0
  50. data/spec/mocks/storage_spec.rb +108 -0
  51. data/spec/monitor_spec.rb +15 -0
  52. data/spec/policy_spec.rb +43 -0
  53. data/spec/pool_spec.rb +119 -0
  54. data/spec/redis/storage_spec.rb +133 -0
  55. data/spec/registry_spec.rb +52 -0
  56. data/spec/support/jobs.rb +68 -0
  57. data/spec/support/redis.rb +22 -0
  58. data/spec/support/timing.rb +32 -0
  59. data/spec/utilities/observer_base_spec.rb +50 -0
  60. data/spec/utilities/process_spec.rb +17 -0
  61. data/spec/worker_spec.rb +121 -0
  62. data/unused/times.rb +45 -0
  63. metadata +148 -0
@@ -0,0 +1,251 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "optparse"
4
+
5
+ =begin
6
+
7
+ Evaluate a ruby configuration file in the context of a Hive Configuration instance.
8
+ Offers a DSL to build the jobs as well as setting before/after-fork hooks.
9
+
10
+ Hive configuration:
11
+
12
+ env()
13
+ set_env(ENV)
14
+ --env=ENV
15
+ Sets the environment.
16
+ Used in pid file and log file naming.
17
+ Defaults to RAILS_ENV || RACK_ENV || "test".
18
+
19
+ chdir(DIR)
20
+ --chdir=DIR
21
+ Changes the working directory. Creates it if necessary.
22
+ Takes effect immediately.
23
+ Can only be set once. Has no effect if specified more than once.
24
+ Defaults to /tmp/$NAME
25
+
26
+ name()
27
+ name=(NAME)
28
+ --name=NAME
29
+ Sets the name of the process.
30
+ Defaults to the base name of the configuration file.
31
+ Used in pid file and log file naming.
32
+
33
+ --path=PATH
34
+ add_path(PATH)
35
+ Adds a path to the Ruby load path.
36
+ Can be used multiple times.
37
+
38
+ --require=LIB
39
+ Requires a library or Ruby gem.
40
+ Can be used multiple times.
41
+
42
+ =end
43
+
44
+ class Hive::Configuration
45
+
46
+ def self.parse( argv = ARGV )
47
+ us = new
48
+
49
+ optparse = OptionParser.new do |opts|
50
+ opts.banner = "Usage: #{__FILE__} [options]* configuration_file_rb"
51
+ opts.on( "-c", "--chdir DIR", "Change working directory." ) { |d| us.chdir(d) }
52
+ opts.on( "-e", "--env ENV", "Set environment (env).") { |e| us.set_env(e) }
53
+ opts.on( "-h", "--help", "Display this usage summary." ) { puts opts; exit }
54
+ opts.on( "-n", "--name NAME", "Set daemon's name.") { |n| us.set_name(n) }
55
+ opts.on( "-p", "--path PATH", "Add to load path.") { |d| us.add_path(d) }
56
+ opts.on( "-r", "--require LIB", "Require a library.") { |l| us.require_lib(l) }
57
+ opts.on( "-s", "--script DSL", "Include DSL script.") { |s| us.load_script(s) }
58
+ opts.on( "-v", "--verbose", "Print stuff out.") { |s| us.verbose += 1 }
59
+ opts.on( "--dry-run", "Don't launch the daemon.") { us.dry_run = true }
60
+ end.parse!(argv)
61
+
62
+ while argv.any? && File.exists?(argv.first) do
63
+ us.load_file( argv.shift )
64
+ end
65
+
66
+ us.args = argv
67
+ us.finalize
68
+ end
69
+
70
+ include Hive::Log
71
+
72
+ attr :env
73
+ attr :root
74
+ attr :name, true
75
+ attr :verbose, true
76
+ attr :dry_run, true
77
+ attr :args, true
78
+ attr :before_forks
79
+ attr :after_forks
80
+
81
+ attr :defaults
82
+ attr :pools
83
+
84
+ # I'm not sure why this is so complicated.
85
+ class PoolEnumerator
86
+ def initialize pools
87
+ @pools = pools
88
+ end
89
+ include Enumerable
90
+ def each(&block)
91
+ them = @pools.each
92
+ it = nil
93
+ loop do
94
+ begin
95
+ it = them.next
96
+ rescue StopIteration => x
97
+ break
98
+ end
99
+ policy = Hive::Policy.resolve(it.last)
100
+ yield([ it.first, policy ])
101
+ end
102
+ end
103
+ end
104
+
105
+ def policies
106
+ PoolEnumerator.new(pools)
107
+ end
108
+
109
+ def initialize( filename = nil )
110
+ @verbose = 0
111
+ @dry_run = false
112
+ @defaults = {}
113
+ @pools = {}
114
+ load_file(filename) if filename
115
+ end
116
+
117
+ def load_script(string)
118
+ log "Loading #{string}" if verbose >= 2
119
+ instance_eval(string)
120
+ end
121
+
122
+ def load_file(filename)
123
+ log "Loading #{filename}" if verbose >= 1
124
+ instance_eval(File.read(filename),filename)
125
+ if ! name then
126
+ n = File.basename(filename).sub(/\.[^.]*$/,'')
127
+ @name = n if n.size > 0
128
+ end
129
+ end
130
+
131
+ def finalize()
132
+ if ! env then
133
+ @env = ( ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "test" )
134
+ log "Defaulting env to #{env}" if verbose >= 1
135
+ end
136
+ if ! name then
137
+ @name = "hive"
138
+ log "Defaulting name to #{name}" if verbose >= 1
139
+ end
140
+ if ! @root then
141
+ chdir(default_root)
142
+ end
143
+ log inspect if verbose >= 2
144
+ freeze
145
+ self
146
+ end
147
+
148
+ def options_for_daemon_spawn
149
+ mkdirp root, "#{root}/log", "#{root}/tmp", "#{root}/tmp/pids" if ! dry_run
150
+ return {
151
+ working_dir: root,
152
+ log_file: "#{root}/log/#{name}_#{env}.log",
153
+ pid_file: "#{root}/tmp/pids/#{name}_#{env}.pid",
154
+ sync_log: local?
155
+ }
156
+ end
157
+
158
+ def args_for_daemon_spawn
159
+ args + [self]
160
+ end
161
+
162
+ # ----------------------------------------------------------------------------
163
+ # DSL
164
+ # ----------------------------------------------------------------------------
165
+
166
+ def set_env(env)
167
+ @env = env
168
+ end
169
+
170
+ def set_name(name)
171
+ @name = name
172
+ end
173
+
174
+ # takes effect immediately
175
+ def chdir(path)
176
+ if ! @root then
177
+ p = File.expand_path(path)
178
+ mkdirp(p) if ! dry_run
179
+ Dir.chdir(p)
180
+ log "Changed working directory (root) to #{p}" if verbose >= 1
181
+ @root = p
182
+ else
183
+ log "Warning: working directory already set to #{root}; not changing to #{path}"
184
+ end
185
+ end
186
+
187
+ # takes effect immediately
188
+ def add_path(path)
189
+ p = File.expand_path(path)
190
+ log "Added #{p} to load path" if verbose >= 2
191
+ $:.push(p) unless $:.member?(p)
192
+ end
193
+
194
+ # convenience for -r on the command line
195
+ def require_lib(r)
196
+ require(r)
197
+ log "Required #{r}" if verbose >= 2
198
+ end
199
+
200
+ def set_default(key,value)
201
+ # values which are arrays get merged, but nil will overwrite
202
+ case value
203
+ when Array
204
+ @defaults[key] = (@defaults[key] || []) + value
205
+ else
206
+ @defaults[key] = value
207
+ end
208
+ end
209
+
210
+ def set_defaults(options)
211
+ options.each { |k,v| set_default(k,v) }
212
+ end
213
+
214
+ def add_pool( name, options = {} )
215
+ before_forks = (options[:before_forks] || []) + (self.before_forks || [])
216
+ after_forks = (options[:after_forks] || []) + (self.after_forks || [])
217
+ options = defaults.merge(options).merge before_forks: before_forks, after_forks: after_forks
218
+ pools[name] = options
219
+ log "Added pool for #{name}" if verbose == 1
220
+ log "Added pool for #{name} with #{options}" if verbose >= 2
221
+ end
222
+
223
+ def before_fork(&block)
224
+ @before_forks ||= []
225
+ @before_forks << block
226
+ end
227
+
228
+ def after_fork(&block)
229
+ @after_forks ||= []
230
+ @after_forks << block
231
+ end
232
+
233
+ # ----------------------------------------------------------------------------
234
+ private
235
+ # ----------------------------------------------------------------------------
236
+
237
+ LOCAL_ENVS = [ "development", "test" ]
238
+
239
+ def local?
240
+ LOCAL_ENVS.member?(env)
241
+ end
242
+
243
+ def default_root
244
+ local? ? "." : "/tmp/#{name}"
245
+ end
246
+
247
+ def mkdirp(*ps)
248
+ ps.each { |p| Dir.mkdir(p) if ! Dir.exists?(p) }
249
+ end
250
+
251
+ end
data/lib/hive/idler.rb ADDED
@@ -0,0 +1,81 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ =begin
4
+
5
+ Idler wraps some other callable (a proc or object which responds to #call)
6
+ The callable should return a falsy value when it did nothing,
7
+ or a truthy value when it did something.
8
+ The idler will sleep when there is nothing to do.
9
+
10
+ =end
11
+
12
+ class Hive::Idler
13
+
14
+ MIN_SLEEP = 0.125
15
+ MAX_SLEEP = 1.0
16
+
17
+ attr :sleep
18
+
19
+ def initialize( callable = nil, options = {}, &callable_block )
20
+ @callable = callable || callable_block
21
+ raise unless @callable.respond_to?(:call)
22
+
23
+ @max_sleep = options[:max_sleep] || MAX_SLEEP
24
+ raise if @max_sleep <= 0
25
+
26
+ @min_sleep = options[:min_sleep] || MIN_SLEEP
27
+ raise if @min_sleep <= 0
28
+ raise if @max_sleep < @min_sleep
29
+
30
+ @sleep = nil
31
+ end
32
+
33
+ def call( *args, &block )
34
+
35
+ result = call_with_wakefulness( @callable, *args, &block )
36
+
37
+ if result then
38
+ wake
39
+ else
40
+ sleep_more
41
+ end
42
+
43
+ return result
44
+ end
45
+
46
+ def call_with_wakefulness( callable, *args, &block )
47
+ begin
48
+ callable.call(*args,&block)
49
+ rescue Exception # when errors occur,
50
+ @sleep = @min_sleep # reduce sleeping almost all the way (but not to 0)
51
+ raise # do not consume any exceptions
52
+ end
53
+ end
54
+
55
+ def sleep_more
56
+ if @sleep then
57
+ @sleep = [ @sleep * 2, @max_sleep ].min
58
+ else
59
+ @sleep = @min_sleep
60
+ end
61
+ Kernel.sleep(@sleep) if @sleep # Interrupt will propogate through sleep().
62
+ end
63
+
64
+ def wake
65
+ @sleep = nil
66
+ end
67
+
68
+ module Utilities
69
+ # execute test repeatedly, until timeout, or until test returns true
70
+ def wait_until( timeout = 1, &test )
71
+ tester = Hive::Idler.new(test)
72
+ finish = Time.now.to_f + timeout
73
+ loop do
74
+ break if tester.call
75
+ break if finish < Time.now.to_f
76
+ end
77
+ end
78
+ end
79
+ extend Utilities
80
+
81
+ end # Hive::Idler
data/lib/hive/key.rb ADDED
@@ -0,0 +1,48 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ =begin
4
+
5
+ A key uniquely identifies a worker.
6
+
7
+ =end
8
+
9
+ class Hive::Key
10
+
11
+ attr :name
12
+ attr :pid
13
+ attr :host
14
+
15
+ def initialize( name, pid, host = Hive::Key.local_host )
16
+ @name = name
17
+ @pid = pid.to_i
18
+ @host = host
19
+ end
20
+
21
+ def ==(other)
22
+ self.equal?(other) ||
23
+ ( name == other.name && pid == other.pid && host == other.host )
24
+ end
25
+
26
+ # e.g. processor-1234@foo.example.com
27
+ def to_s
28
+ "%s-%i@%s" % [ name, pid, host ]
29
+ end
30
+
31
+ def self.parse(key_string)
32
+ key_string =~ /^(.*)-([0-9]+)@([^@]+)$/ or raise MalformedKey.new(key_string)
33
+ new( $1, $2, $3 )
34
+ end
35
+
36
+ # ----------------------------------------------------------------------------
37
+ # Utilities
38
+ # ----------------------------------------------------------------------------
39
+
40
+ # @returns something like foo.example.com
41
+ def self.local_host
42
+ @local_host ||= `hostname`.chomp.strip
43
+ end
44
+
45
+ class MalformedKey < StandardError
46
+ end
47
+
48
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ class Hive::LifecycleObserver < Hive::Utilities::ObserverBase
4
+
5
+ attr :key
6
+ attr :registry
7
+
8
+ def initialize( key, registry )
9
+ @key = key
10
+ @registry = registry
11
+ end
12
+
13
+ def worker_started
14
+ registry.register( key )
15
+ end
16
+
17
+ def worker_heartbeat( upcount = 0 )
18
+ registry.update( key )
19
+ end
20
+
21
+ def worker_stopped
22
+ registry.unregister( key )
23
+ end
24
+
25
+ end # Hive::LifecycleObserver
data/lib/hive/log.rb ADDED
@@ -0,0 +1,29 @@
1
+ module Hive::Log
2
+
3
+ def log( *args )
4
+ logger.print(format_for_logging(*args))
5
+ logger.flush
6
+ end
7
+
8
+ def format_for_logging( *args )
9
+ message = [
10
+ #(Time.now.strftime "%Y%m%d%H%M%S"),
11
+ Time.now.to_i,
12
+ " [",
13
+ Process.pid,
14
+ (Thread.current[:name] || Thread.current.object_id unless Thread.current == Thread.main),
15
+ "] ",
16
+ args.join(", "),
17
+ "\n"
18
+ ].compact.join
19
+ end
20
+
21
+ def logger
22
+ @logger ||= STDOUT
23
+ end
24
+
25
+ def logger=( other )
26
+ @logger = other
27
+ end
28
+
29
+ end # Hive::Log
@@ -0,0 +1,217 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "digest/md5"
4
+ require "json"
5
+
6
+ =begin
7
+
8
+ Messager is used to send messages between processes, and receive responses.
9
+ Messager messages are asynchronous and not ordered.
10
+
11
+ =end
12
+
13
+ class Hive::Messager
14
+
15
+ attr :callbacks
16
+ attr :storage
17
+ attr :my_address
18
+ attr :to_address
19
+
20
+ # @param options[:to_address] is optional
21
+ # @param options[:my_address] is required
22
+ def initialize( storage, options = {} )
23
+ @callbacks = {}
24
+ @storage = storage
25
+ @to_address = options[:to_address]
26
+ @my_address = options[:my_address] or raise "must specify my address"
27
+ # type checking
28
+ storage.get("test")
29
+ end
30
+
31
+ # write to another queue
32
+ # @param options[:to] is required if :to_address was not given
33
+ # @returns an id
34
+ def send( body, options = {} )
35
+ to = options[:to] || to_address or raise "must specify to address"
36
+ from = options[:from] || my_address or raise "must specify from address"
37
+ now = options[:at] || Time.now
38
+ message = Message.new( options.merge( to: to, from: my_address, at: now, body: body ) )
39
+ blob = message.to_json
40
+
41
+ storage.queue_add( queue_name(to), blob, now.to_i )
42
+ message.id
43
+ end
44
+
45
+ # register a handler for a given id
46
+ # the handler is removed when it is called
47
+ def expect( match, &callback )
48
+ @callbacks[match] = callback
49
+ self
50
+ end
51
+
52
+ # sends a new message to the original message source and with reply_to_id from the original message
53
+ # @param options[:to] must be the original message
54
+ # @e.g. reply "Ok", to: question
55
+ def reply( body, options )
56
+ original = options[:to] or raise "must reply to: message"
57
+ send( body, to: original.from, reply_to_id: original.id )
58
+ end
59
+
60
+ # @param reply_block takes (body, headers)
61
+ def expect_reply( src_id, &reply_block )
62
+ raise
63
+ end
64
+
65
+ # read from my queue
66
+ # check to see if there are any messages, and dispatch them
67
+ # @returns true if processed a message, false otherwise
68
+ def receive()
69
+ now = Time.now.to_i
70
+ json = storage.queue_pop( queue_name, now )
71
+ if json then
72
+ message = Message.parse(json)
73
+ callback = find_callback( message )
74
+ callback.call( message )
75
+ true
76
+ else
77
+ false
78
+ end
79
+ end
80
+
81
+ # ----------------------------------------------------------------------------
82
+ # Message contains the body and critical headers for Messager
83
+ # ----------------------------------------------------------------------------
84
+
85
+ class Message
86
+
87
+ attr :to # destination host
88
+ attr :from # source host
89
+ attr :at # timestamp of message generation
90
+ attr :body # JSON-compatible
91
+ attr :id # autogenerated if not supplied
92
+ attr :reply_to_id # optional
93
+
94
+ def initialize( data )
95
+ data = ::Hive::Messager.symbolize(data)
96
+ @to = data[:to] or raise "must specify to address"
97
+ @from = data[:from] or raise "must specify from address"
98
+ @at = (data[:at] || Time.now).to_f
99
+ @body = data[:body]
100
+ @id = data[:id] || Digest::MD5.hexdigest([from,at,body].join)
101
+ @reply_to_id = data[:reply_to_id]
102
+ end
103
+
104
+ def to_hash
105
+ blob = { to: to, from: from, at: at, body: body, id: id }
106
+ blob[:reply_to_id] = reply_to_id if reply_to_id
107
+ blob
108
+ end
109
+
110
+ def to_json
111
+ to_hash.to_json
112
+ end
113
+
114
+ def to_s
115
+ to_json
116
+ end
117
+
118
+ def self.parse( json )
119
+ new( JSON.parse(json) )
120
+ end
121
+
122
+ end # Message
123
+
124
+ # ----------------------------------------------------------------------------
125
+ # Utilities
126
+ # ----------------------------------------------------------------------------
127
+
128
+ def self.stringify(map)
129
+ Hash[ map.map { |k,v| [ k.to_s, v ] } ]
130
+ end
131
+
132
+ def self.symbolize(map)
133
+ Hash[ map.map { |k,v| [ k.to_sym, v ] } ]
134
+ end
135
+
136
+ # ----------------------------------------------------------------------------
137
+ protected
138
+ # ----------------------------------------------------------------------------
139
+
140
+ def queue_name( other_address = nil )
141
+ if other_address then
142
+ "messages:#{other_address}"
143
+ else
144
+ @queue_name ||= "messages:#{my_address}"
145
+ end
146
+ end
147
+
148
+ # ----------------------------------------------------------------------------
149
+ # Match
150
+ # ----------------------------------------------------------------------------
151
+
152
+ class NoMatch < Exception
153
+ end
154
+
155
+ class Counter
156
+ def match
157
+ @value ||= 0
158
+ @value += 1
159
+ end
160
+ def fail
161
+ @value = nil
162
+ raise NoMatch
163
+ end
164
+ def value
165
+ raise NoMatch if !@value
166
+ @value
167
+ end
168
+ end
169
+
170
+ def find_callback( message )
171
+ best_result = nil
172
+ best_score = nil
173
+ callbacks.each do |match,callback|
174
+ begin
175
+ counter = Counter.new
176
+ compare_match( message, match, counter )
177
+ if !best_score || counter.value > best_score then
178
+ best_score = counter.value
179
+ best_result = callback
180
+ end
181
+ rescue NoMatch
182
+ # next
183
+ end
184
+ end
185
+ return best_result if best_result
186
+ raise NoMatch
187
+ end
188
+
189
+ def compare_match( message, match, counter )
190
+ case match
191
+ when String, Regexp
192
+ compare( message.body, match, counter )
193
+ when Hash
194
+ compare( message.to_hash, match, counter )
195
+ end
196
+ end
197
+
198
+ def compare( item, match, counter )
199
+ case match
200
+ when Numeric
201
+ return item.kind_of?(Numeric) && item == match ? counter.match : counter.fail
202
+ when String
203
+ return item.kind_of?(String) && item == match ? counter.match : counter.fail
204
+ when Regexp
205
+ return item.kind_of?(String) && item =~ match ? counter.match : counter.fail
206
+ when Hash
207
+ counter.fail if ! item.kind_of?(Hash)
208
+ match.each do |k,v|
209
+ counter.fail if ! item.has_key?(k)
210
+ compare( item[k], match[k], counter )
211
+ end
212
+ else
213
+ raise "Can not compare using #{match.inspect}"
214
+ end
215
+ end
216
+
217
+ end