droid 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/conf ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+ require 'socket'
3
+
4
+ def wait_for_tcp_port(port, options={})
5
+ options[:timeout] ||= 30
6
+ options[:host] ||= "localhost"
7
+
8
+ options[:timeout].times do
9
+ begin
10
+ x = TCPSocket.new(options[:host], port)
11
+ x.close
12
+ return
13
+ rescue
14
+ sleep 1
15
+ next
16
+ end
17
+ end
18
+
19
+ raise "#{options[:host]}:#{port} did not come up after #{options[:timeout]} seconds"
20
+ end
21
+
22
+ puts "Waiting for RabbitMQ"
23
+ wait_for_tcp_port(5672)
24
+
25
+ puts `rabbitmqctl add_vhost /heroku`
26
+
27
+ %w[hermes core].each do |agent|
28
+ puts `rabbitmqctl add_user #{agent} thisisheroku!`
29
+ puts `rabbitmqctl set_permissions -p /heroku #{agent} \".*\" \".*\" \".*\"`
30
+ end
31
+
data/lib/droid.rb ADDED
@@ -0,0 +1,531 @@
1
+ require 'socket'
2
+ require 'digest/md5'
3
+ require File.dirname(__FILE__) + '/../vendor/logger_client/init'
4
+
5
+ $:.unshift *Dir[File.dirname(__FILE__) + '/../vendor/*/lib']
6
+ require 'json'
7
+ require 'mq'
8
+ require 'time'
9
+ require 'bunny'
10
+
11
+ require File.dirname(__FILE__) + '/utilization'
12
+
13
+ class Droid
14
+
15
+ class BadPayload < RuntimeError; end
16
+
17
+ ## basic ops
18
+ ## ######
19
+ ## publish / broadcast
20
+ ## listen / subscribe
21
+
22
+ def self.con_type
23
+ Thread.current['con_type'] ||= :sync
24
+ end
25
+
26
+ def self.con_type=(type)
27
+ Thread.current['con_type'] = type
28
+ end
29
+
30
+ def self.async?
31
+ return @@async == true rescue false
32
+ end
33
+
34
+ def self.async(&blk)
35
+ ensure_con_type(:async, &blk)
36
+ end
37
+
38
+ def self.sync(&blk)
39
+ ensure_con_type(:sync, &blk)
40
+ end
41
+
42
+ def self.ensure_con_type(type, &blk)
43
+ old_type = con_type
44
+ self.con_type = type
45
+ begin
46
+ blk.call
47
+ ensure
48
+ self.con_type = old_type
49
+ end
50
+ end
51
+
52
+ def self.new_event_hash
53
+ s = Time.now.to_s + self.object_id.to_s + rand(100).to_s
54
+ Digest::MD5.hexdigest(s)
55
+ end
56
+
57
+ def self.queue(name, options = {})
58
+ reconnect_on_error do
59
+ if con_type == :async
60
+ MQ.queue(name, options)
61
+ else
62
+ bunny.queue(name, options)
63
+ end
64
+ end
65
+ end
66
+
67
+ if defined? Bunny::Client # the blog below doesn't work with newer versions of bunny
68
+ # Disable bunny's 1 second socket connect timeout since the
69
+ # reconnect_on_error method sets up a separate 10 second timeout.
70
+ # Using a timeout of zero sets an "infinite" timeout and has the nice
71
+ # benefit of not starting up another thread.
72
+ ::Bunny::Client.send(:remove_const, :CONNECT_TIMEOUT)
73
+ ::Bunny::Client::CONNECT_TIMEOUT = 0
74
+ end
75
+
76
+ def self.reconnect_on_error
77
+ Timeout::timeout(20) do
78
+ begin
79
+ yield
80
+ rescue Bunny::ProtocolError
81
+ sleep 0.5
82
+ retry
83
+ rescue Bunny::ConnectionError
84
+ sleep 0.5
85
+ @@bunny = nil
86
+ retry
87
+ rescue Bunny::ServerDownError
88
+ sleep 0.5
89
+ @@bunny = nil
90
+ retry
91
+ end
92
+ end
93
+ end
94
+
95
+ def self.call(key, payload, options={})
96
+ sync do
97
+ reply_to = key + '.reply.' + Droid.gensym
98
+ @q = nil
99
+ begin
100
+ reconnect_on_error do
101
+
102
+ ## this is retarded - i shouldn't be binding here - just popping the queue - need to teach hermes/em
103
+ @q = queue(reply_to, :auto_delete => true)
104
+ @q.bind(exchange, :key => reply_to)
105
+
106
+ payload[:reply_to] = reply_to
107
+ publish(key, payload, options)
108
+
109
+ pop(reply_to)
110
+ end
111
+ ensure
112
+ # for some reason the auto_delete flag is not working correctly with Bunny
113
+ # so we're deleting the queue manually here
114
+ @q.delete if @q
115
+ end
116
+ end
117
+ end
118
+
119
+ def self.pop(queue)
120
+ loop do
121
+ raise "POP must be sync" unless con_type == :sync
122
+ result = queue(queue).pop
123
+ result = result[:payload] if result.is_a?(Hash)
124
+ return JSON.parse(result) unless result == :queue_empty
125
+ sleep 0.1
126
+ end
127
+ end
128
+
129
+ def self.push(queue_name, payload, options={})
130
+ reconnect_on_error do
131
+ queue(queue_name).publish(payload_to_data(payload, options))
132
+ end
133
+ end
134
+
135
+ def self.payload_to_data(payload, options)
136
+ raise BadPayload unless payload.is_a?(Hash)
137
+
138
+ payload[:event_hash] ||= new_event_hash
139
+ payload[:published_on] = options[:published_on] || Time.now.getgm.to_i
140
+ payload[:ttl] ||= (options[:ttl].to_i || DEFAULT_TTL)
141
+
142
+ payload.to_json
143
+ end
144
+
145
+ def self.publish(key, payload, options={})
146
+ res =
147
+ reconnect_on_error do
148
+ exchange.publish(payload_to_data(payload, options), :key => key, :immediate => options[:immediate])
149
+ end
150
+
151
+ unless options[:log] == false
152
+ Log.notice "#{key} published #{payload_summary(payload)}",
153
+ :event_hash => payload[:event_hash],
154
+ :addendum => payload.inspect
155
+ end
156
+
157
+ res
158
+ end
159
+
160
+ def self.header_keys
161
+ @header_keys ||= [:exchange, :delivery_mode, :delivery_tag, :redelivered, :consumer_tag, :content_type, :key, :priority]
162
+ end
163
+
164
+ def self.payload_summary(payload)
165
+ payload = payload.select do |k, v|
166
+ !header_keys.include?(k.to_sym)
167
+ end
168
+ return ' -> (empty payload)' if payload.empty?
169
+ resume = payload.map do |k, v|
170
+ v = v.to_s
171
+ v = v[0..17] + '...' if v.size > 20
172
+ "#{k}=#{v}"
173
+ end[0,15].join(', ')
174
+ resume << " (and #{payload.size - 15} more)" if payload.size > 15
175
+ " -> #{resume}"
176
+ end
177
+
178
+ def self.exchange
179
+ if con_type == :async
180
+ MQ.topic('amq.topic')
181
+ else
182
+ bunny.exchange("amq.topic")
183
+ end
184
+ end
185
+
186
+ def self.default_options
187
+ uri = URI.parse(ENV["AMQP_URI"] || 'rabbit://guest:guest@localhost:5672/')
188
+ raise "invalid AMQP_URI [#{uri.to_s}]" unless uri.scheme == "rabbit"
189
+ {
190
+ :vhost => uri.path,
191
+ :host => uri.host,
192
+ :user => uri.user,
193
+ :port => uri.port,
194
+ :pass => uri.password
195
+ }
196
+ end
197
+
198
+ def self.default_config
199
+ default_options
200
+ end
201
+
202
+ def self.new_bunny
203
+ b = Bunny.new(default_options)
204
+ b.start
205
+ b
206
+ end
207
+
208
+ def self.bunny
209
+ @@bunny ||= new_bunny
210
+ end
211
+
212
+ def self.start(options = nil, &blk)
213
+ async do
214
+ begin
215
+ Signal.trap('INT') { AMQP.stop{ EM.stop } }
216
+ Signal.trap('TERM'){ AMQP.stop{ EM.stop } }
217
+ EM.run do
218
+ AMQP.start(options || default_options)
219
+ blk.call if blk
220
+ end
221
+ rescue AMQP::Error => e
222
+ STDERR.puts "Caught #{e.class}, sleeping to avoid inittab thrashing"
223
+ sleep 5
224
+ STDERR.puts "Done."
225
+ raise
226
+ end
227
+ end
228
+ end
229
+
230
+ def self.stop_safe
231
+ EM.add_timer(1) { AMQP.stop { EM.stop }}
232
+ end
233
+
234
+ def self.gen_queue(droid, key)
235
+ dn = droid
236
+ dn = dn.name if dn.respond_to?(:name)
237
+ dn ||= "d"
238
+ dn.gsub!(" ", "")
239
+ "#{self.gen_instance_queue(key)}.#{dn}"
240
+ end
241
+
242
+ def self.gen_instance_queue(key)
243
+ "#{key}.#{LocalStats.slot}.#{LocalStats.ion_instance_id}"
244
+ end
245
+
246
+ def self.gensym
247
+ values = [
248
+ rand(0x0010000),
249
+ rand(0x0010000),
250
+ rand(0x0010000),
251
+ rand(0x0010000),
252
+ rand(0x0010000),
253
+ rand(0x1000000),
254
+ rand(0x1000000),
255
+ ]
256
+ "%04x%04x%04x%04x%04x%06x%06x" % values
257
+ end
258
+
259
+ def self.wait_for_tcp_port(host, port, options={:retries => 5, :timeout => 5})
260
+ require 'timeout'
261
+ options[:retries].times do
262
+ begin
263
+ Timeout::timeout(options[:timeout]) {
264
+ TCPSocket.new(host.to_s, port).close
265
+ }
266
+ return
267
+ rescue Object
268
+ Log.notice "#{host}:#{port} not available, waiting..."
269
+ sleep 1
270
+ end
271
+ end
272
+
273
+ raise "#{host}:#{port} did not come up after #{options[:retries]} retries"
274
+ end
275
+
276
+ # Trap exceptions leaving the block and log them. Do not re-raise.
277
+ def self.trap_exceptions
278
+ yield
279
+ rescue => boom
280
+ Log.default_error boom
281
+ end
282
+
283
+ # Add a one-shot timer.
284
+ def self.timer(duration, &bk)
285
+ EM.add_timer(duration) { trap_exceptions(&bk) }
286
+ end
287
+
288
+ # Add a periodic timer. If the now argument is true, run the block
289
+ # immediately in addition to scheduling the periodic timer.
290
+ def self.periodic_timer(duration, now=false, &bk)
291
+ timer(1, &bk) if now
292
+ EM.add_periodic_timer(duration) { trap_exceptions(&bk) }
293
+ end
294
+
295
+ def timer(duration, &bk) ; self.class.timer(duration, &bk) ; end
296
+ def periodic_timer(duration, now=false, &bk) ; self.class.periodic_timer(duration, now, &bk) ; end
297
+
298
+ class Basic
299
+ DEFAULT_TTL = 300
300
+
301
+ def initialize(droid, options={})
302
+ @droid = droid
303
+ end
304
+
305
+ def exchange
306
+ Droid.exchange
307
+ end
308
+
309
+ def headers
310
+ @headers ||= { :event_hash => self.event_hash }
311
+ end
312
+
313
+ def event_hash
314
+ @event_hash ||= Droid.new_event_hash
315
+ end
316
+
317
+ def publish(key, payload, options={}, &blk)
318
+ raise BadPayload unless payload.is_a?(Hash)
319
+
320
+ result =if blk
321
+ headers[:reply_to] = key + '.reply.' + Droid.gensym
322
+ @droid.listen4(headers[:reply_to], { :temp => true }, &blk)
323
+ end
324
+
325
+ Droid.publish(key, headers.merge(payload), {:log => true}.merge(options))
326
+
327
+ result
328
+ end
329
+
330
+ def payload_summary(payload)
331
+ Droid.payload_summary(payload)
332
+ end
333
+ end
334
+
335
+ class Listener < Basic
336
+ attr_accessor :params
337
+
338
+ def initialize(droid, key, options={})
339
+ @key = key
340
+ @options = options
341
+ queue = @options.delete(:queue) || Droid.gen_queue(droid, key)
342
+ auto_delete = @options.has_key?(:auto_delete) ? !!@options.delete(:auto_delete) : true
343
+ @mq = MQ.new
344
+ @prefetch = !!@options[:prefetch]
345
+ @mq.prefetch(@options[:prefetch]) if @prefetch
346
+ @q = @mq.queue(queue, :auto_delete => auto_delete)
347
+ super(droid, options)
348
+ end
349
+
350
+ def destroy
351
+ @q.unsubscribe
352
+ @mq.close
353
+ end
354
+
355
+ def mq
356
+ @mq
357
+ end
358
+
359
+ def exchange
360
+ Droid.exchange
361
+ end
362
+
363
+ def error(e)
364
+ begin
365
+ publish("event.error", :event_hash => headers[:event_hash]) # hermes fail whale
366
+ msg = "#{e.class}: #{e.message}\n #{e.backtrace.join("\n ")}\n"
367
+ stderr_puts "About to log error #{headers[:event_hash]}"
368
+ stderr_puts e.message
369
+ stderr_puts msg
370
+ Log.error e.message, :addendum => msg, :event_hash => headers[:event_hash], :exception => e
371
+ @droid.error_handler.call(@message, e, self) if @droid.error_handler
372
+ rescue Exception => e
373
+ stderr_puts "error handling error! #{e.inspect}"
374
+ end
375
+ end
376
+
377
+ def stderr_puts(msg)
378
+ STDERR.puts msg
379
+ end
380
+
381
+ def defer(&blk)
382
+ EM.defer(lambda do
383
+ begin
384
+ blk.call
385
+ rescue => e
386
+ error(e)
387
+ end
388
+ end)
389
+ end
390
+
391
+ def reply(payload, options={})
392
+ publish(headers[:reply_to], payload, options)
393
+ end
394
+
395
+ def listen(opts={}, &blk)
396
+ opts[:temp] = opts[:temp] === true
397
+ opts[:ack] = opts[:ack] === true
398
+
399
+ if @prefetch
400
+ opts[:ack] = true # we must ack messages received in order for prefetch to work
401
+ opts[:temp] = false # doesn't make sense for it to be temporary if we're setting prefetch
402
+ end
403
+ @q.bind(exchange, :key => @key ).subscribe(:ack => opts[:ack]) do |info, data|
404
+ Utilization.monitor(@key, :temp => opts[:temp]) do
405
+ begin
406
+ parse(info, data)
407
+
408
+ if opts[:detail]
409
+ callargs = [self.dup, info, data]
410
+ else
411
+ callargs = [self.dup]
412
+ end
413
+
414
+ ttl = headers[:ttl]
415
+ now = Time.now.getgm.to_i
416
+ published_on = headers[:published_on]
417
+
418
+ Log.notice "#{@key} received #{payload_summary(params)}"
419
+
420
+ if ttl == -1 or now <= (published_on + ttl)
421
+ @droid.before_filter.call(*callargs) if @droid.before_filter
422
+ blk.call(*callargs)
423
+ else
424
+ Log.error "#{@key} message timeout", :addendum => payload_summary(params)
425
+ info.ack if opts[:ack]
426
+ end
427
+
428
+ duration = Time.now.getgm.to_i - now
429
+
430
+ Log.notice "(#{duration} seconds) SLOW AMQP #{@key} #{payload_summary(params)}" if duration > 3
431
+ rescue => e
432
+ error(e)
433
+ ensure
434
+ if opts[:temp]
435
+ @q.unbind(exchange)
436
+ @q.delete
437
+ end
438
+ end
439
+ end
440
+ end
441
+
442
+ self
443
+ end
444
+
445
+ def requeue(opts={})
446
+ opts[:ttl] ||= 10
447
+
448
+ now = Time.now.getgm.to_i
449
+
450
+ payload = @params.merge(extra_headers)
451
+ payload.delete('ttl')
452
+ payload[:ttl] = opts[:ttl]
453
+
454
+ newpayload = Droid.payload_to_data(payload, :published_on => (headers[:published_on] || now))
455
+ @q.publish(newpayload)
456
+ end
457
+
458
+ def parse(info, data)
459
+ @headers = nil
460
+ @params = JSON.parse(data)
461
+
462
+ headers[:event_hash] = @params.delete('event_hash') if @params['event_hash']
463
+ headers[:reply_to] = @params.delete('reply_to') if @params['reply_to']
464
+ headers[:published_on] = @params.delete('published_on').to_i rescue 0
465
+ headers[:ttl] = @params.delete('ttl').to_i rescue -1
466
+ headers[:ttl] = -1 if headers[:ttl] == 0
467
+ headers.merge!(info.properties) # add protocol headers
468
+ end
469
+
470
+ def extra_headers
471
+ extra = {}
472
+ [:event_hash, :reply_to, :published_on].each do |key|
473
+ extra[key] = headers[key]
474
+ end
475
+ extra
476
+ end
477
+
478
+ def [](key)
479
+ @params[key.to_s]
480
+ end
481
+
482
+ def unsubscribe
483
+ @q.unsubscribe
484
+ end
485
+ end
486
+
487
+ attr_accessor :name
488
+ attr_reader :error_handler, :before_filter
489
+
490
+ def initialize(name, credentials, &blk)
491
+ Log.notice "=== #{name} droid initializing"
492
+ credentials[:port] ||= 5672
493
+ self.class.wait_for_tcp_port(credentials[:host], credentials[:port], :retries => 6) # retry for 30s before giving up
494
+
495
+ @name = name
496
+ Log.notice "=== #{name} droid starting"
497
+ self.class.start(credentials) do
498
+ blk.call(self)
499
+ end
500
+ self
501
+ end
502
+
503
+ def publish(key, payload={}, options={}, &blk)
504
+ Basic.new(self, options).publish(key, payload, options, &blk)
505
+ end
506
+
507
+ def listen4(key, options={}, &blk)
508
+ Listener.new(self, key, options).listen({
509
+ :temp => options.delete(:temp),
510
+ :detail => options.delete(:detail),
511
+ :ack => options.delete(:ack),
512
+ }, &blk)
513
+ end
514
+
515
+ def on_error(&blk)
516
+ @error_handler = blk
517
+ end
518
+
519
+ def before_filter(&blk)
520
+ blk ? @before_filter = blk : @before_filter
521
+ end
522
+
523
+ def stats(&blk)
524
+ @stats = blk
525
+ Log.notice call_stats
526
+ end
527
+
528
+ def call_stats
529
+ @stats ? @stats.call : nil
530
+ end
531
+ end