roundhouse-x 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +16 -0
  4. data/3.0-Upgrade.md +70 -0
  5. data/Changes.md +1127 -0
  6. data/Gemfile +27 -0
  7. data/LICENSE +7 -0
  8. data/README.md +52 -0
  9. data/Rakefile +9 -0
  10. data/bin/roundhouse +19 -0
  11. data/bin/roundhousectl +93 -0
  12. data/lib/generators/roundhouse/templates/worker.rb.erb +9 -0
  13. data/lib/generators/roundhouse/templates/worker_spec.rb.erb +6 -0
  14. data/lib/generators/roundhouse/templates/worker_test.rb.erb +8 -0
  15. data/lib/generators/roundhouse/worker_generator.rb +49 -0
  16. data/lib/roundhouse/actor.rb +39 -0
  17. data/lib/roundhouse/api.rb +859 -0
  18. data/lib/roundhouse/cli.rb +396 -0
  19. data/lib/roundhouse/client.rb +210 -0
  20. data/lib/roundhouse/core_ext.rb +105 -0
  21. data/lib/roundhouse/exception_handler.rb +30 -0
  22. data/lib/roundhouse/fetch.rb +154 -0
  23. data/lib/roundhouse/launcher.rb +98 -0
  24. data/lib/roundhouse/logging.rb +104 -0
  25. data/lib/roundhouse/manager.rb +236 -0
  26. data/lib/roundhouse/middleware/chain.rb +149 -0
  27. data/lib/roundhouse/middleware/i18n.rb +41 -0
  28. data/lib/roundhouse/middleware/server/active_record.rb +13 -0
  29. data/lib/roundhouse/middleware/server/logging.rb +40 -0
  30. data/lib/roundhouse/middleware/server/retry_jobs.rb +206 -0
  31. data/lib/roundhouse/monitor.rb +124 -0
  32. data/lib/roundhouse/paginator.rb +42 -0
  33. data/lib/roundhouse/processor.rb +159 -0
  34. data/lib/roundhouse/rails.rb +24 -0
  35. data/lib/roundhouse/redis_connection.rb +77 -0
  36. data/lib/roundhouse/scheduled.rb +115 -0
  37. data/lib/roundhouse/testing/inline.rb +28 -0
  38. data/lib/roundhouse/testing.rb +193 -0
  39. data/lib/roundhouse/util.rb +68 -0
  40. data/lib/roundhouse/version.rb +3 -0
  41. data/lib/roundhouse/web.rb +264 -0
  42. data/lib/roundhouse/web_helpers.rb +249 -0
  43. data/lib/roundhouse/worker.rb +90 -0
  44. data/lib/roundhouse.rb +177 -0
  45. data/roundhouse.gemspec +27 -0
  46. data/test/config.yml +9 -0
  47. data/test/env_based_config.yml +11 -0
  48. data/test/fake_env.rb +0 -0
  49. data/test/fixtures/en.yml +2 -0
  50. data/test/helper.rb +49 -0
  51. data/test/test_api.rb +521 -0
  52. data/test/test_cli.rb +389 -0
  53. data/test/test_client.rb +294 -0
  54. data/test/test_exception_handler.rb +55 -0
  55. data/test/test_fetch.rb +206 -0
  56. data/test/test_logging.rb +34 -0
  57. data/test/test_manager.rb +169 -0
  58. data/test/test_middleware.rb +160 -0
  59. data/test/test_monitor.rb +258 -0
  60. data/test/test_processor.rb +176 -0
  61. data/test/test_rails.rb +23 -0
  62. data/test/test_redis_connection.rb +127 -0
  63. data/test/test_retry.rb +390 -0
  64. data/test/test_roundhouse.rb +87 -0
  65. data/test/test_scheduled.rb +120 -0
  66. data/test/test_scheduling.rb +75 -0
  67. data/test/test_testing.rb +78 -0
  68. data/test/test_testing_fake.rb +240 -0
  69. data/test/test_testing_inline.rb +65 -0
  70. data/test/test_util.rb +18 -0
  71. data/test/test_web.rb +605 -0
  72. data/test/test_web_helpers.rb +52 -0
  73. data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
  74. data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
  75. data/web/assets/images/logo.png +0 -0
  76. data/web/assets/images/status/active.png +0 -0
  77. data/web/assets/images/status/idle.png +0 -0
  78. data/web/assets/images/status-sd8051fd480.png +0 -0
  79. data/web/assets/javascripts/application.js +83 -0
  80. data/web/assets/javascripts/dashboard.js +300 -0
  81. data/web/assets/javascripts/locales/README.md +27 -0
  82. data/web/assets/javascripts/locales/jquery.timeago.ar.js +96 -0
  83. data/web/assets/javascripts/locales/jquery.timeago.bg.js +18 -0
  84. data/web/assets/javascripts/locales/jquery.timeago.bs.js +49 -0
  85. data/web/assets/javascripts/locales/jquery.timeago.ca.js +18 -0
  86. data/web/assets/javascripts/locales/jquery.timeago.cs.js +18 -0
  87. data/web/assets/javascripts/locales/jquery.timeago.cy.js +20 -0
  88. data/web/assets/javascripts/locales/jquery.timeago.da.js +18 -0
  89. data/web/assets/javascripts/locales/jquery.timeago.de.js +18 -0
  90. data/web/assets/javascripts/locales/jquery.timeago.el.js +18 -0
  91. data/web/assets/javascripts/locales/jquery.timeago.en-short.js +20 -0
  92. data/web/assets/javascripts/locales/jquery.timeago.en.js +20 -0
  93. data/web/assets/javascripts/locales/jquery.timeago.es.js +18 -0
  94. data/web/assets/javascripts/locales/jquery.timeago.et.js +18 -0
  95. data/web/assets/javascripts/locales/jquery.timeago.fa.js +22 -0
  96. data/web/assets/javascripts/locales/jquery.timeago.fi.js +28 -0
  97. data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +16 -0
  98. data/web/assets/javascripts/locales/jquery.timeago.fr.js +17 -0
  99. data/web/assets/javascripts/locales/jquery.timeago.he.js +18 -0
  100. data/web/assets/javascripts/locales/jquery.timeago.hr.js +49 -0
  101. data/web/assets/javascripts/locales/jquery.timeago.hu.js +18 -0
  102. data/web/assets/javascripts/locales/jquery.timeago.hy.js +18 -0
  103. data/web/assets/javascripts/locales/jquery.timeago.id.js +18 -0
  104. data/web/assets/javascripts/locales/jquery.timeago.it.js +16 -0
  105. data/web/assets/javascripts/locales/jquery.timeago.ja.js +19 -0
  106. data/web/assets/javascripts/locales/jquery.timeago.ko.js +17 -0
  107. data/web/assets/javascripts/locales/jquery.timeago.lt.js +20 -0
  108. data/web/assets/javascripts/locales/jquery.timeago.mk.js +20 -0
  109. data/web/assets/javascripts/locales/jquery.timeago.nl.js +20 -0
  110. data/web/assets/javascripts/locales/jquery.timeago.no.js +18 -0
  111. data/web/assets/javascripts/locales/jquery.timeago.pl.js +31 -0
  112. data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +16 -0
  113. data/web/assets/javascripts/locales/jquery.timeago.pt.js +16 -0
  114. data/web/assets/javascripts/locales/jquery.timeago.ro.js +18 -0
  115. data/web/assets/javascripts/locales/jquery.timeago.rs.js +49 -0
  116. data/web/assets/javascripts/locales/jquery.timeago.ru.js +34 -0
  117. data/web/assets/javascripts/locales/jquery.timeago.sk.js +18 -0
  118. data/web/assets/javascripts/locales/jquery.timeago.sl.js +44 -0
  119. data/web/assets/javascripts/locales/jquery.timeago.sv.js +18 -0
  120. data/web/assets/javascripts/locales/jquery.timeago.th.js +20 -0
  121. data/web/assets/javascripts/locales/jquery.timeago.tr.js +16 -0
  122. data/web/assets/javascripts/locales/jquery.timeago.uk.js +34 -0
  123. data/web/assets/javascripts/locales/jquery.timeago.uz.js +19 -0
  124. data/web/assets/javascripts/locales/jquery.timeago.zh-cn.js +20 -0
  125. data/web/assets/javascripts/locales/jquery.timeago.zh-tw.js +20 -0
  126. data/web/assets/stylesheets/application.css +746 -0
  127. data/web/assets/stylesheets/bootstrap.css +9 -0
  128. data/web/locales/cs.yml +68 -0
  129. data/web/locales/da.yml +68 -0
  130. data/web/locales/de.yml +69 -0
  131. data/web/locales/el.yml +68 -0
  132. data/web/locales/en.yml +77 -0
  133. data/web/locales/es.yml +69 -0
  134. data/web/locales/fr.yml +69 -0
  135. data/web/locales/hi.yml +75 -0
  136. data/web/locales/it.yml +69 -0
  137. data/web/locales/ja.yml +69 -0
  138. data/web/locales/ko.yml +68 -0
  139. data/web/locales/nl.yml +68 -0
  140. data/web/locales/no.yml +69 -0
  141. data/web/locales/pl.yml +59 -0
  142. data/web/locales/pt-br.yml +68 -0
  143. data/web/locales/pt.yml +67 -0
  144. data/web/locales/ru.yml +75 -0
  145. data/web/locales/sv.yml +68 -0
  146. data/web/locales/ta.yml +75 -0
  147. data/web/locales/zh-cn.yml +68 -0
  148. data/web/locales/zh-tw.yml +68 -0
  149. data/web/views/_footer.erb +22 -0
  150. data/web/views/_job_info.erb +84 -0
  151. data/web/views/_nav.erb +66 -0
  152. data/web/views/_paging.erb +23 -0
  153. data/web/views/_poll_js.erb +5 -0
  154. data/web/views/_poll_link.erb +7 -0
  155. data/web/views/_status.erb +4 -0
  156. data/web/views/_summary.erb +40 -0
  157. data/web/views/busy.erb +90 -0
  158. data/web/views/dashboard.erb +75 -0
  159. data/web/views/dead.erb +34 -0
  160. data/web/views/layout.erb +31 -0
  161. data/web/views/morgue.erb +71 -0
  162. data/web/views/queue.erb +45 -0
  163. data/web/views/queues.erb +27 -0
  164. data/web/views/retries.erb +74 -0
  165. data/web/views/retry.erb +34 -0
  166. data/web/views/scheduled.erb +54 -0
  167. data/web/views/scheduled_job_info.erb +8 -0
  168. metadata +404 -0
@@ -0,0 +1,30 @@
1
+ require 'roundhouse'
2
+
3
+ module Roundhouse
4
+ module ExceptionHandler
5
+
6
+ class Logger
7
+ def call(ex, ctxHash)
8
+ Roundhouse.logger.warn(ctxHash) if !ctxHash.empty?
9
+ Roundhouse.logger.warn "#{ex.class.name}: #{ex.message}"
10
+ Roundhouse.logger.warn ex.backtrace.join("\n") unless ex.backtrace.nil?
11
+ end
12
+
13
+ # Set up default handler which just logs the error
14
+ Roundhouse.error_handlers << Roundhouse::ExceptionHandler::Logger.new
15
+ end
16
+
17
+ def handle_exception(ex, ctxHash={})
18
+ Roundhouse.error_handlers.each do |handler|
19
+ begin
20
+ handler.call(ex, ctxHash)
21
+ rescue => ex
22
+ Roundhouse.logger.error "!!! ERROR HANDLER THREW AN ERROR !!!"
23
+ Roundhouse.logger.error ex
24
+ Roundhouse.logger.error ex.backtrace.join("\n") unless ex.backtrace.nil?
25
+ end
26
+ end
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,154 @@
1
+ require 'roundhouse'
2
+ require 'roundhouse/monitor'
3
+ require 'roundhouse/util'
4
+ require 'roundhouse/actor'
5
+
6
+ module Roundhouse
7
+ ##
8
+ # The Fetcher blocks on Redis, waiting for a message to process
9
+ # from the queues. It gets the message and hands it to the Manager
10
+ # to assign to a ready Processor.
11
+ class Fetcher
12
+ include Util
13
+ include Actor
14
+
15
+ TIMEOUT = 1
16
+
17
+ attr_reader :down
18
+
19
+ def initialize(mgr, options)
20
+ @down = nil
21
+ @mgr = mgr
22
+ @strategy = Fetcher.strategy.new(options)
23
+ end
24
+
25
+ # Fetching is straightforward: the Manager makes a fetch
26
+ # request for each idle processor when Roundhouse starts and
27
+ # then issues a new fetch request every time a Processor
28
+ # finishes a message.
29
+ #
30
+ # Because we have to shut down cleanly, we can't block
31
+ # forever and we can't loop forever. Instead we reschedule
32
+ # a new fetch if the current fetch turned up nothing.
33
+ def fetch
34
+ watchdog('Fetcher#fetch died') do
35
+ return if Roundhouse::Fetcher.done?
36
+
37
+ begin
38
+ work = @strategy.retrieve_work
39
+ ::Roundhouse.logger.info("Redis is online, #{Time.now - @down} sec downtime") if @down
40
+ @down = nil
41
+
42
+ if work
43
+ @mgr.async.assign(work)
44
+ else
45
+ after(0) { fetch }
46
+ end
47
+ rescue => ex
48
+ handle_fetch_exception(ex)
49
+ end
50
+
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def pause
57
+ sleep(TIMEOUT)
58
+ end
59
+
60
+ def handle_fetch_exception(ex)
61
+ if !@down
62
+ logger.error("Error fetching message: #{ex}")
63
+ ex.backtrace.each do |bt|
64
+ logger.error(bt)
65
+ end
66
+ end
67
+ @down ||= Time.now
68
+ pause
69
+ after(0) { fetch }
70
+ rescue Celluloid::TaskTerminated
71
+ # If redis is down when we try to shut down, all the fetch backlog
72
+ # raises these errors. Haven't been able to figure out what I'm doing wrong.
73
+ end
74
+
75
+ # Ugh. Say hello to a bloody hack.
76
+ # Can't find a clean way to get the fetcher to just stop processing
77
+ # its mailbox when shutdown starts.
78
+ def self.done!
79
+ @done = true
80
+ end
81
+
82
+ def self.reset # testing only
83
+ @done = nil
84
+ end
85
+
86
+ def self.done?
87
+ defined?(@done) && @done
88
+ end
89
+
90
+ def self.strategy
91
+ Roundhouse.options[:fetch] || RoundRobinFetch
92
+ end
93
+ end
94
+
95
+ class RoundRobinFetch
96
+ def initialize(options = nil)
97
+ end
98
+
99
+ def retrieve_work
100
+ work = Roundhouse.redis { |conn| Roundhouse::Monitor.await_next_job(conn) }
101
+ UnitOfWork.new(*work) if work
102
+ end
103
+
104
+ # By leaving this as a class method, it can be pluggable and used by the Manager actor. Making it
105
+ # an instance method will make it async to the Fetcher actor
106
+ def self.bulk_requeue(inprogress, options)
107
+ return if inprogress.empty?
108
+
109
+ Roundhouse.logger.debug { "Re-queueing terminated jobs" }
110
+ jobs_to_requeue = {}
111
+ inprogress.each do |unit_of_work|
112
+ jobs_to_requeue[unit_of_work.queue_id] ||= []
113
+ jobs_to_requeue[unit_of_work.queue_id] << unit_of_work.message
114
+ end
115
+
116
+ Roundhouse.redis do |conn|
117
+ conn.pipelined do
118
+ jobs_to_requeue.each do |queue_id, jobs|
119
+ Roundhouse::Monitor.requeue(conn, queue_id, jobs)
120
+ end
121
+ end
122
+ # REFACTOR NOTE: This has to happen outside the pipelining since
123
+ # we need to read. We can refactor to put this back
124
+ # after converting the Monitor operations as EVAL scripts
125
+ jobs_to_requeue.keys.each do |queue_id|
126
+ Roundhouse::Monitor.push(conn, queue_id)
127
+ end
128
+ end
129
+ Roundhouse.logger.info("Pushed #{inprogress.size} messages back to Redis")
130
+ rescue => ex
131
+ Roundhouse.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
132
+ end
133
+
134
+ UnitOfWork = Struct.new(:full_queue_name, :message) do
135
+ QUEUE_REGEX = /.*#{Roundhouse::Monitor::QUEUE}:/.freeze
136
+
137
+ def acknowledge
138
+ # nothing to do
139
+ end
140
+
141
+ def queue_id
142
+ full_queue_name.gsub(QUEUE_REGEX, '')
143
+ end
144
+
145
+ def requeue
146
+ Roundhouse.redis do |conn|
147
+ Roundhouse::Monitor.requeue(conn, queue_id, message)
148
+ Roundhouse::Monitor.push(conn, queue_id)
149
+ end
150
+ end
151
+ end
152
+
153
+ end
154
+ end
@@ -0,0 +1,98 @@
1
+ require 'roundhouse/actor'
2
+ require 'roundhouse/manager'
3
+ require 'roundhouse/fetch'
4
+ require 'roundhouse/scheduled'
5
+
6
+ module Roundhouse
7
+ # The Launcher is a very simple Actor whose job is to
8
+ # start, monitor and stop the core Actors in Roundhouse.
9
+ # If any of these actors die, the Roundhouse process exits
10
+ # immediately.
11
+ class Launcher
12
+ include Actor
13
+ include Util
14
+
15
+ trap_exit :actor_died
16
+
17
+ attr_reader :manager, :poller, :fetcher
18
+
19
+ def initialize(options)
20
+ @condvar = Celluloid::Condition.new
21
+ @manager = Roundhouse::Manager.new_link(@condvar, options)
22
+ @poller = Roundhouse::Scheduled::Poller.new_link
23
+ @fetcher = Roundhouse::Fetcher.new_link(@manager, options)
24
+ @manager.fetcher = @fetcher
25
+ @done = false
26
+ @options = options
27
+ end
28
+
29
+ def actor_died(actor, reason)
30
+ # https://github.com/mperham/sidekiq/issues/2057#issuecomment-66485477
31
+ return if @done || !reason
32
+
33
+ Roundhouse.logger.warn("Roundhouse died due to the following error, cannot recover, process exiting")
34
+ handle_exception(reason)
35
+ exit(1)
36
+ end
37
+
38
+ def run
39
+ watchdog('Launcher#run') do
40
+ manager.async.start
41
+ poller.async.poll(true)
42
+
43
+ start_heartbeat
44
+ end
45
+ end
46
+
47
+ def stop
48
+ watchdog('Launcher#stop') do
49
+ @done = true
50
+ Roundhouse::Fetcher.done!
51
+ fetcher.terminate if fetcher.alive?
52
+ poller.terminate if poller.alive?
53
+
54
+ manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
55
+ @condvar.wait
56
+ manager.terminate
57
+
58
+ # Requeue everything in case there was a worker who grabbed work while stopped
59
+ # This call is a no-op in Roundhouse but necessary for Roundhouse Pro.
60
+ Roundhouse::Fetcher.strategy.bulk_requeue([], @options)
61
+
62
+ stop_heartbeat
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def start_heartbeat
69
+ key = identity
70
+ data = {
71
+ 'hostname' => hostname,
72
+ 'started_at' => Time.now.to_f,
73
+ 'pid' => $$,
74
+ 'tag' => @options[:tag] || '',
75
+ 'concurrency' => @options[:concurrency],
76
+ 'queues' => @options[:queues].uniq,
77
+ 'labels' => Roundhouse.options[:labels],
78
+ 'identity' => identity,
79
+ }
80
+ # this data doesn't change so dump it to a string
81
+ # now so we don't need to dump it every heartbeat.
82
+ json = Roundhouse.dump_json(data)
83
+ manager.heartbeat(key, data, json)
84
+ end
85
+
86
+ def stop_heartbeat
87
+ Roundhouse.redis do |conn|
88
+ conn.pipelined do
89
+ conn.srem('processes', identity)
90
+ conn.del("#{identity}:workers")
91
+ end
92
+ end
93
+ rescue
94
+ # best effort, ignore network errors
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,104 @@
1
+ require 'time'
2
+ require 'logger'
3
+
4
+ module Roundhouse
5
+ module Logging
6
+
7
+ class Pretty < Logger::Formatter
8
+ SPACE = " "
9
+
10
+ # Provide a call() method that returns the formatted message.
11
+ def call(severity, time, program_name, message)
12
+ "#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
13
+ end
14
+
15
+ def context
16
+ c = Thread.current[:roundhouse_context]
17
+ " #{c.join(SPACE)}" if c && c.any?
18
+ end
19
+ end
20
+
21
+ class WithoutTimestamp < Pretty
22
+ def call(severity, time, program_name, message)
23
+ "#{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
24
+ end
25
+ end
26
+
27
+ def self.with_context(msg)
28
+ Thread.current[:roundhouse_context] ||= []
29
+ Thread.current[:roundhouse_context] << msg
30
+ yield
31
+ ensure
32
+ Thread.current[:roundhouse_context].pop
33
+ end
34
+
35
+ def self.initialize_logger(log_target = STDOUT)
36
+ oldlogger = defined?(@logger) ? @logger : nil
37
+ @logger = Logger.new(log_target)
38
+ @logger.level = Logger::INFO
39
+ @logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
40
+ oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
41
+ @logger
42
+ end
43
+
44
+ def self.logger
45
+ defined?(@logger) ? @logger : initialize_logger
46
+ end
47
+
48
+ def self.logger=(log)
49
+ @logger = (log ? log : Logger.new('/dev/null'))
50
+ end
51
+
52
+ # This reopens ALL logfiles in the process that have been rotated
53
+ # using logrotate(8) (without copytruncate) or similar tools.
54
+ # A +File+ object is considered for reopening if it is:
55
+ # 1) opened with the O_APPEND and O_WRONLY flags
56
+ # 2) the current open file handle does not match its original open path
57
+ # 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
58
+ # Returns the number of files reopened
59
+ def self.reopen_logs
60
+ to_reopen = []
61
+ append_flags = File::WRONLY | File::APPEND
62
+
63
+ ObjectSpace.each_object(File) do |fp|
64
+ begin
65
+ if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
66
+ to_reopen << fp
67
+ end
68
+ rescue IOError, Errno::EBADF
69
+ end
70
+ end
71
+
72
+ nr = 0
73
+ to_reopen.each do |fp|
74
+ orig_st = begin
75
+ fp.stat
76
+ rescue IOError, Errno::EBADF
77
+ next
78
+ end
79
+
80
+ begin
81
+ b = File.stat(fp.path)
82
+ next if orig_st.ino == b.ino && orig_st.dev == b.dev
83
+ rescue Errno::ENOENT
84
+ end
85
+
86
+ begin
87
+ File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
88
+ fp.sync = true
89
+ nr += 1
90
+ rescue IOError, Errno::EBADF
91
+ # not much we can do...
92
+ end
93
+ end
94
+ nr
95
+ rescue RuntimeError => ex
96
+ # RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
97
+ puts "Unable to reopen logs: #{ex.message}"
98
+ end
99
+
100
+ def logger
101
+ Roundhouse::Logging.logger
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,236 @@
1
+ # encoding: utf-8
2
+ require 'roundhouse/util'
3
+ require 'roundhouse/actor'
4
+ require 'roundhouse/processor'
5
+ require 'roundhouse/fetch'
6
+
7
+ module Roundhouse
8
+
9
+ ##
10
+ # The main router in the system. This
11
+ # manages the processor state and accepts messages
12
+ # from Redis to be dispatched to an idle processor.
13
+ #
14
+ class Manager
15
+ include Util
16
+ include Actor
17
+ trap_exit :processor_died
18
+
19
+ attr_reader :ready
20
+ attr_reader :busy
21
+ attr_accessor :fetcher
22
+
23
+ SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
24
+ JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
25
+
26
+ def initialize(condvar, options={})
27
+ logger.debug { options.inspect }
28
+ @options = options
29
+ @count = options[:concurrency] || 25
30
+ raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
31
+ @done_callback = nil
32
+ @finished = condvar
33
+
34
+ @in_progress = {}
35
+ @threads = {}
36
+ @done = false
37
+ @busy = []
38
+ @ready = @count.times.map do
39
+ p = Processor.new_link(current_actor)
40
+ p.proxy_id = p.object_id
41
+ p
42
+ end
43
+ end
44
+
45
+ def stop(options={})
46
+ watchdog('Manager#stop died') do
47
+ should_shutdown = options[:shutdown]
48
+ timeout = options[:timeout]
49
+
50
+ @done = true
51
+
52
+ logger.info { "Terminating #{@ready.size} quiet workers" }
53
+ @ready.each { |x| x.terminate if x.alive? }
54
+ @ready.clear
55
+
56
+ return if clean_up_for_graceful_shutdown
57
+
58
+ hard_shutdown_in timeout if should_shutdown
59
+ end
60
+ end
61
+
62
+ def clean_up_for_graceful_shutdown
63
+ if @busy.empty?
64
+ shutdown
65
+ return true
66
+ end
67
+
68
+ after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
69
+ false
70
+ end
71
+
72
+ def start
73
+ @ready.each { dispatch }
74
+ end
75
+
76
+ def when_done(&blk)
77
+ @done_callback = blk
78
+ end
79
+
80
+ def processor_done(processor)
81
+ watchdog('Manager#processor_done died') do
82
+ @done_callback.call(processor) if @done_callback
83
+ @in_progress.delete(processor.object_id)
84
+ @threads.delete(processor.object_id)
85
+ @busy.delete(processor)
86
+ if stopped?
87
+ processor.terminate if processor.alive?
88
+ shutdown if @busy.empty?
89
+ else
90
+ @ready << processor if processor.alive?
91
+ end
92
+ dispatch
93
+ end
94
+ end
95
+
96
+ def processor_died(processor, reason)
97
+ watchdog("Manager#processor_died died") do
98
+ @in_progress.delete(processor.object_id)
99
+ @threads.delete(processor.object_id)
100
+ @busy.delete(processor)
101
+
102
+ unless stopped?
103
+ p = Processor.new_link(current_actor)
104
+ p.proxy_id = p.object_id
105
+ @ready << p
106
+ dispatch
107
+ else
108
+ shutdown if @busy.empty?
109
+ end
110
+ end
111
+ end
112
+
113
+ def assign(work)
114
+ watchdog("Manager#assign died") do
115
+ if stopped?
116
+ # Race condition between Manager#stop if Fetcher
117
+ # is blocked on redis and gets a message after
118
+ # all the ready Processors have been stopped.
119
+ # Push the message back to redis.
120
+ work.requeue
121
+ else
122
+ processor = @ready.pop
123
+ @in_progress[processor.object_id] = work
124
+ @busy << processor
125
+ processor.async.process(work)
126
+ end
127
+ end
128
+ end
129
+
130
+ # A hack worthy of Rube Goldberg. We need to be able
131
+ # to hard stop a working thread. But there's no way for us to
132
+ # get handle to the underlying thread performing work for a processor
133
+ # so we have it call us and tell us.
134
+ def real_thread(proxy_id, thr)
135
+ @threads[proxy_id] = thr
136
+ end
137
+
138
+ PROCTITLES = [
139
+ proc { 'roundhouse'.freeze },
140
+ proc { Roundhouse::VERSION },
141
+ proc { |mgr, data| data['tag'] },
142
+ proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
143
+ proc { |mgr, data| "stopping" if mgr.stopped? },
144
+ ]
145
+
146
+ def heartbeat(key, data, json)
147
+ results = PROCTITLES.map {|x| x.(self, data) }
148
+ results.compact!
149
+ $0 = results.join(' ')
150
+
151
+ ❤(key, json)
152
+ after(5) do
153
+ heartbeat(key, data, json)
154
+ end
155
+ end
156
+
157
+ def stopped?
158
+ @done
159
+ end
160
+
161
+ private
162
+
163
+ def ❤(key, json)
164
+ begin
165
+ _, _, _, msg = Roundhouse.redis do |conn|
166
+ conn.multi do
167
+ conn.sadd('processes', key)
168
+ conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
169
+ conn.expire(key, 60)
170
+ conn.rpop("#{key}-signals")
171
+ end
172
+ end
173
+
174
+ return unless msg
175
+
176
+ if JVM_RESERVED_SIGNALS.include?(msg)
177
+ Roundhouse::CLI.instance.handle_signal(msg)
178
+ else
179
+ ::Process.kill(msg, $$)
180
+ end
181
+ rescue => e
182
+ # ignore all redis/network issues
183
+ logger.error("heartbeat: #{e.message}")
184
+ end
185
+ end
186
+
187
+ def hard_shutdown_in(delay)
188
+ logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
189
+
190
+ after(delay) do
191
+ watchdog("Manager#hard_shutdown_in died") do
192
+ # We've reached the timeout and we still have busy workers.
193
+ # They must die but their messages shall live on.
194
+ logger.warn { "Terminating #{@busy.size} busy worker threads" }
195
+ logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
196
+
197
+ requeue
198
+
199
+ @busy.each do |processor|
200
+ if processor.alive? && t = @threads.delete(processor.object_id)
201
+ t.raise Shutdown
202
+ end
203
+ end
204
+
205
+ @finished.signal
206
+ end
207
+ end
208
+ end
209
+
210
+ def dispatch
211
+ return if stopped?
212
+ # This is a safety check to ensure we haven't leaked
213
+ # processors somehow.
214
+ raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
215
+ raise "No ready processor!?" if @ready.empty?
216
+
217
+ @fetcher.async.fetch
218
+ end
219
+
220
+ def shutdown
221
+ requeue
222
+ @finished.signal
223
+ end
224
+
225
+ def requeue
226
+ # Re-enqueue terminated jobs
227
+ # NOTE: You may notice that we may push a job back to redis before
228
+ # the worker thread is terminated. This is ok because Roundhouse's
229
+ # contract says that jobs are run AT LEAST once. Process termination
230
+ # is delayed until we're certain the jobs are back in Redis because
231
+ # it is worse to lose a job than to run it twice.
232
+ Roundhouse::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
233
+ @in_progress.clear
234
+ end
235
+ end
236
+ end