roundhouse-x 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +16 -0
  4. data/3.0-Upgrade.md +70 -0
  5. data/Changes.md +1127 -0
  6. data/Gemfile +27 -0
  7. data/LICENSE +7 -0
  8. data/README.md +52 -0
  9. data/Rakefile +9 -0
  10. data/bin/roundhouse +19 -0
  11. data/bin/roundhousectl +93 -0
  12. data/lib/generators/roundhouse/templates/worker.rb.erb +9 -0
  13. data/lib/generators/roundhouse/templates/worker_spec.rb.erb +6 -0
  14. data/lib/generators/roundhouse/templates/worker_test.rb.erb +8 -0
  15. data/lib/generators/roundhouse/worker_generator.rb +49 -0
  16. data/lib/roundhouse/actor.rb +39 -0
  17. data/lib/roundhouse/api.rb +859 -0
  18. data/lib/roundhouse/cli.rb +396 -0
  19. data/lib/roundhouse/client.rb +210 -0
  20. data/lib/roundhouse/core_ext.rb +105 -0
  21. data/lib/roundhouse/exception_handler.rb +30 -0
  22. data/lib/roundhouse/fetch.rb +154 -0
  23. data/lib/roundhouse/launcher.rb +98 -0
  24. data/lib/roundhouse/logging.rb +104 -0
  25. data/lib/roundhouse/manager.rb +236 -0
  26. data/lib/roundhouse/middleware/chain.rb +149 -0
  27. data/lib/roundhouse/middleware/i18n.rb +41 -0
  28. data/lib/roundhouse/middleware/server/active_record.rb +13 -0
  29. data/lib/roundhouse/middleware/server/logging.rb +40 -0
  30. data/lib/roundhouse/middleware/server/retry_jobs.rb +206 -0
  31. data/lib/roundhouse/monitor.rb +124 -0
  32. data/lib/roundhouse/paginator.rb +42 -0
  33. data/lib/roundhouse/processor.rb +159 -0
  34. data/lib/roundhouse/rails.rb +24 -0
  35. data/lib/roundhouse/redis_connection.rb +77 -0
  36. data/lib/roundhouse/scheduled.rb +115 -0
  37. data/lib/roundhouse/testing/inline.rb +28 -0
  38. data/lib/roundhouse/testing.rb +193 -0
  39. data/lib/roundhouse/util.rb +68 -0
  40. data/lib/roundhouse/version.rb +3 -0
  41. data/lib/roundhouse/web.rb +264 -0
  42. data/lib/roundhouse/web_helpers.rb +249 -0
  43. data/lib/roundhouse/worker.rb +90 -0
  44. data/lib/roundhouse.rb +177 -0
  45. data/roundhouse.gemspec +27 -0
  46. data/test/config.yml +9 -0
  47. data/test/env_based_config.yml +11 -0
  48. data/test/fake_env.rb +0 -0
  49. data/test/fixtures/en.yml +2 -0
  50. data/test/helper.rb +49 -0
  51. data/test/test_api.rb +521 -0
  52. data/test/test_cli.rb +389 -0
  53. data/test/test_client.rb +294 -0
  54. data/test/test_exception_handler.rb +55 -0
  55. data/test/test_fetch.rb +206 -0
  56. data/test/test_logging.rb +34 -0
  57. data/test/test_manager.rb +169 -0
  58. data/test/test_middleware.rb +160 -0
  59. data/test/test_monitor.rb +258 -0
  60. data/test/test_processor.rb +176 -0
  61. data/test/test_rails.rb +23 -0
  62. data/test/test_redis_connection.rb +127 -0
  63. data/test/test_retry.rb +390 -0
  64. data/test/test_roundhouse.rb +87 -0
  65. data/test/test_scheduled.rb +120 -0
  66. data/test/test_scheduling.rb +75 -0
  67. data/test/test_testing.rb +78 -0
  68. data/test/test_testing_fake.rb +240 -0
  69. data/test/test_testing_inline.rb +65 -0
  70. data/test/test_util.rb +18 -0
  71. data/test/test_web.rb +605 -0
  72. data/test/test_web_helpers.rb +52 -0
  73. data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
  74. data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
  75. data/web/assets/images/logo.png +0 -0
  76. data/web/assets/images/status/active.png +0 -0
  77. data/web/assets/images/status/idle.png +0 -0
  78. data/web/assets/images/status-sd8051fd480.png +0 -0
  79. data/web/assets/javascripts/application.js +83 -0
  80. data/web/assets/javascripts/dashboard.js +300 -0
  81. data/web/assets/javascripts/locales/README.md +27 -0
  82. data/web/assets/javascripts/locales/jquery.timeago.ar.js +96 -0
  83. data/web/assets/javascripts/locales/jquery.timeago.bg.js +18 -0
  84. data/web/assets/javascripts/locales/jquery.timeago.bs.js +49 -0
  85. data/web/assets/javascripts/locales/jquery.timeago.ca.js +18 -0
  86. data/web/assets/javascripts/locales/jquery.timeago.cs.js +18 -0
  87. data/web/assets/javascripts/locales/jquery.timeago.cy.js +20 -0
  88. data/web/assets/javascripts/locales/jquery.timeago.da.js +18 -0
  89. data/web/assets/javascripts/locales/jquery.timeago.de.js +18 -0
  90. data/web/assets/javascripts/locales/jquery.timeago.el.js +18 -0
  91. data/web/assets/javascripts/locales/jquery.timeago.en-short.js +20 -0
  92. data/web/assets/javascripts/locales/jquery.timeago.en.js +20 -0
  93. data/web/assets/javascripts/locales/jquery.timeago.es.js +18 -0
  94. data/web/assets/javascripts/locales/jquery.timeago.et.js +18 -0
  95. data/web/assets/javascripts/locales/jquery.timeago.fa.js +22 -0
  96. data/web/assets/javascripts/locales/jquery.timeago.fi.js +28 -0
  97. data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +16 -0
  98. data/web/assets/javascripts/locales/jquery.timeago.fr.js +17 -0
  99. data/web/assets/javascripts/locales/jquery.timeago.he.js +18 -0
  100. data/web/assets/javascripts/locales/jquery.timeago.hr.js +49 -0
  101. data/web/assets/javascripts/locales/jquery.timeago.hu.js +18 -0
  102. data/web/assets/javascripts/locales/jquery.timeago.hy.js +18 -0
  103. data/web/assets/javascripts/locales/jquery.timeago.id.js +18 -0
  104. data/web/assets/javascripts/locales/jquery.timeago.it.js +16 -0
  105. data/web/assets/javascripts/locales/jquery.timeago.ja.js +19 -0
  106. data/web/assets/javascripts/locales/jquery.timeago.ko.js +17 -0
  107. data/web/assets/javascripts/locales/jquery.timeago.lt.js +20 -0
  108. data/web/assets/javascripts/locales/jquery.timeago.mk.js +20 -0
  109. data/web/assets/javascripts/locales/jquery.timeago.nl.js +20 -0
  110. data/web/assets/javascripts/locales/jquery.timeago.no.js +18 -0
  111. data/web/assets/javascripts/locales/jquery.timeago.pl.js +31 -0
  112. data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +16 -0
  113. data/web/assets/javascripts/locales/jquery.timeago.pt.js +16 -0
  114. data/web/assets/javascripts/locales/jquery.timeago.ro.js +18 -0
  115. data/web/assets/javascripts/locales/jquery.timeago.rs.js +49 -0
  116. data/web/assets/javascripts/locales/jquery.timeago.ru.js +34 -0
  117. data/web/assets/javascripts/locales/jquery.timeago.sk.js +18 -0
  118. data/web/assets/javascripts/locales/jquery.timeago.sl.js +44 -0
  119. data/web/assets/javascripts/locales/jquery.timeago.sv.js +18 -0
  120. data/web/assets/javascripts/locales/jquery.timeago.th.js +20 -0
  121. data/web/assets/javascripts/locales/jquery.timeago.tr.js +16 -0
  122. data/web/assets/javascripts/locales/jquery.timeago.uk.js +34 -0
  123. data/web/assets/javascripts/locales/jquery.timeago.uz.js +19 -0
  124. data/web/assets/javascripts/locales/jquery.timeago.zh-cn.js +20 -0
  125. data/web/assets/javascripts/locales/jquery.timeago.zh-tw.js +20 -0
  126. data/web/assets/stylesheets/application.css +746 -0
  127. data/web/assets/stylesheets/bootstrap.css +9 -0
  128. data/web/locales/cs.yml +68 -0
  129. data/web/locales/da.yml +68 -0
  130. data/web/locales/de.yml +69 -0
  131. data/web/locales/el.yml +68 -0
  132. data/web/locales/en.yml +77 -0
  133. data/web/locales/es.yml +69 -0
  134. data/web/locales/fr.yml +69 -0
  135. data/web/locales/hi.yml +75 -0
  136. data/web/locales/it.yml +69 -0
  137. data/web/locales/ja.yml +69 -0
  138. data/web/locales/ko.yml +68 -0
  139. data/web/locales/nl.yml +68 -0
  140. data/web/locales/no.yml +69 -0
  141. data/web/locales/pl.yml +59 -0
  142. data/web/locales/pt-br.yml +68 -0
  143. data/web/locales/pt.yml +67 -0
  144. data/web/locales/ru.yml +75 -0
  145. data/web/locales/sv.yml +68 -0
  146. data/web/locales/ta.yml +75 -0
  147. data/web/locales/zh-cn.yml +68 -0
  148. data/web/locales/zh-tw.yml +68 -0
  149. data/web/views/_footer.erb +22 -0
  150. data/web/views/_job_info.erb +84 -0
  151. data/web/views/_nav.erb +66 -0
  152. data/web/views/_paging.erb +23 -0
  153. data/web/views/_poll_js.erb +5 -0
  154. data/web/views/_poll_link.erb +7 -0
  155. data/web/views/_status.erb +4 -0
  156. data/web/views/_summary.erb +40 -0
  157. data/web/views/busy.erb +90 -0
  158. data/web/views/dashboard.erb +75 -0
  159. data/web/views/dead.erb +34 -0
  160. data/web/views/layout.erb +31 -0
  161. data/web/views/morgue.erb +71 -0
  162. data/web/views/queue.erb +45 -0
  163. data/web/views/queues.erb +27 -0
  164. data/web/views/retries.erb +74 -0
  165. data/web/views/retry.erb +34 -0
  166. data/web/views/scheduled.erb +54 -0
  167. data/web/views/scheduled_job_info.erb +8 -0
  168. metadata +404 -0
@@ -0,0 +1,30 @@
1
+ require 'roundhouse'
2
+
3
+ module Roundhouse
4
+ module ExceptionHandler
5
+
6
+ class Logger
7
+ def call(ex, ctxHash)
8
+ Roundhouse.logger.warn(ctxHash) if !ctxHash.empty?
9
+ Roundhouse.logger.warn "#{ex.class.name}: #{ex.message}"
10
+ Roundhouse.logger.warn ex.backtrace.join("\n") unless ex.backtrace.nil?
11
+ end
12
+
13
+ # Set up default handler which just logs the error
14
+ Roundhouse.error_handlers << Roundhouse::ExceptionHandler::Logger.new
15
+ end
16
+
17
+ def handle_exception(ex, ctxHash={})
18
+ Roundhouse.error_handlers.each do |handler|
19
+ begin
20
+ handler.call(ex, ctxHash)
21
+ rescue => ex
22
+ Roundhouse.logger.error "!!! ERROR HANDLER THREW AN ERROR !!!"
23
+ Roundhouse.logger.error ex
24
+ Roundhouse.logger.error ex.backtrace.join("\n") unless ex.backtrace.nil?
25
+ end
26
+ end
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,154 @@
1
+ require 'roundhouse'
2
+ require 'roundhouse/monitor'
3
+ require 'roundhouse/util'
4
+ require 'roundhouse/actor'
5
+
6
+ module Roundhouse
7
+ ##
8
+ # The Fetcher blocks on Redis, waiting for a message to process
9
+ # from the queues. It gets the message and hands it to the Manager
10
+ # to assign to a ready Processor.
11
+ class Fetcher
12
+ include Util
13
+ include Actor
14
+
15
+ TIMEOUT = 1
16
+
17
+ attr_reader :down
18
+
19
+ def initialize(mgr, options)
20
+ @down = nil
21
+ @mgr = mgr
22
+ @strategy = Fetcher.strategy.new(options)
23
+ end
24
+
25
+ # Fetching is straightforward: the Manager makes a fetch
26
+ # request for each idle processor when Roundhouse starts and
27
+ # then issues a new fetch request every time a Processor
28
+ # finishes a message.
29
+ #
30
+ # Because we have to shut down cleanly, we can't block
31
+ # forever and we can't loop forever. Instead we reschedule
32
+ # a new fetch if the current fetch turned up nothing.
33
+ def fetch
34
+ watchdog('Fetcher#fetch died') do
35
+ return if Roundhouse::Fetcher.done?
36
+
37
+ begin
38
+ work = @strategy.retrieve_work
39
+ ::Roundhouse.logger.info("Redis is online, #{Time.now - @down} sec downtime") if @down
40
+ @down = nil
41
+
42
+ if work
43
+ @mgr.async.assign(work)
44
+ else
45
+ after(0) { fetch }
46
+ end
47
+ rescue => ex
48
+ handle_fetch_exception(ex)
49
+ end
50
+
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def pause
57
+ sleep(TIMEOUT)
58
+ end
59
+
60
+ def handle_fetch_exception(ex)
61
+ if !@down
62
+ logger.error("Error fetching message: #{ex}")
63
+ ex.backtrace.each do |bt|
64
+ logger.error(bt)
65
+ end
66
+ end
67
+ @down ||= Time.now
68
+ pause
69
+ after(0) { fetch }
70
+ rescue Celluloid::TaskTerminated
71
+ # If redis is down when we try to shut down, all the fetch backlog
72
+ # raises these errors. Haven't been able to figure out what I'm doing wrong.
73
+ end
74
+
75
+ # Ugh. Say hello to a bloody hack.
76
+ # Can't find a clean way to get the fetcher to just stop processing
77
+ # its mailbox when shutdown starts.
78
+ def self.done!
79
+ @done = true
80
+ end
81
+
82
+ def self.reset # testing only
83
+ @done = nil
84
+ end
85
+
86
+ def self.done?
87
+ defined?(@done) && @done
88
+ end
89
+
90
+ def self.strategy
91
+ Roundhouse.options[:fetch] || RoundRobinFetch
92
+ end
93
+ end
94
+
95
+ class RoundRobinFetch
96
+ def initialize(options = nil)
97
+ end
98
+
99
+ def retrieve_work
100
+ work = Roundhouse.redis { |conn| Roundhouse::Monitor.await_next_job(conn) }
101
+ UnitOfWork.new(*work) if work
102
+ end
103
+
104
+ # By leaving this as a class method, it can be pluggable and used by the Manager actor. Making it
105
+ # an instance method will make it async to the Fetcher actor
106
+ def self.bulk_requeue(inprogress, options)
107
+ return if inprogress.empty?
108
+
109
+ Roundhouse.logger.debug { "Re-queueing terminated jobs" }
110
+ jobs_to_requeue = {}
111
+ inprogress.each do |unit_of_work|
112
+ jobs_to_requeue[unit_of_work.queue_id] ||= []
113
+ jobs_to_requeue[unit_of_work.queue_id] << unit_of_work.message
114
+ end
115
+
116
+ Roundhouse.redis do |conn|
117
+ conn.pipelined do
118
+ jobs_to_requeue.each do |queue_id, jobs|
119
+ Roundhouse::Monitor.requeue(conn, queue_id, jobs)
120
+ end
121
+ end
122
+ # REFACTOR NOTE: This has to happen outside the pipelining since
123
+ # we need to read. We can refactor to put this back
124
+ # after converting the Monitor operations as EVAL scripts
125
+ jobs_to_requeue.keys.each do |queue_id|
126
+ Roundhouse::Monitor.push(conn, queue_id)
127
+ end
128
+ end
129
+ Roundhouse.logger.info("Pushed #{inprogress.size} messages back to Redis")
130
+ rescue => ex
131
+ Roundhouse.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
132
+ end
133
+
134
+ UnitOfWork = Struct.new(:full_queue_name, :message) do
135
+ QUEUE_REGEX = /.*#{Roundhouse::Monitor::QUEUE}:/.freeze
136
+
137
+ def acknowledge
138
+ # nothing to do
139
+ end
140
+
141
+ def queue_id
142
+ full_queue_name.gsub(QUEUE_REGEX, '')
143
+ end
144
+
145
+ def requeue
146
+ Roundhouse.redis do |conn|
147
+ Roundhouse::Monitor.requeue(conn, queue_id, message)
148
+ Roundhouse::Monitor.push(conn, queue_id)
149
+ end
150
+ end
151
+ end
152
+
153
+ end
154
+ end
@@ -0,0 +1,98 @@
1
+ require 'roundhouse/actor'
2
+ require 'roundhouse/manager'
3
+ require 'roundhouse/fetch'
4
+ require 'roundhouse/scheduled'
5
+
6
+ module Roundhouse
7
+ # The Launcher is a very simple Actor whose job is to
8
+ # start, monitor and stop the core Actors in Roundhouse.
9
+ # If any of these actors die, the Roundhouse process exits
10
+ # immediately.
11
+ class Launcher
12
+ include Actor
13
+ include Util
14
+
15
+ trap_exit :actor_died
16
+
17
+ attr_reader :manager, :poller, :fetcher
18
+
19
+ def initialize(options)
20
+ @condvar = Celluloid::Condition.new
21
+ @manager = Roundhouse::Manager.new_link(@condvar, options)
22
+ @poller = Roundhouse::Scheduled::Poller.new_link
23
+ @fetcher = Roundhouse::Fetcher.new_link(@manager, options)
24
+ @manager.fetcher = @fetcher
25
+ @done = false
26
+ @options = options
27
+ end
28
+
29
+ def actor_died(actor, reason)
30
+ # https://github.com/mperham/sidekiq/issues/2057#issuecomment-66485477
31
+ return if @done || !reason
32
+
33
+ Roundhouse.logger.warn("Roundhouse died due to the following error, cannot recover, process exiting")
34
+ handle_exception(reason)
35
+ exit(1)
36
+ end
37
+
38
+ def run
39
+ watchdog('Launcher#run') do
40
+ manager.async.start
41
+ poller.async.poll(true)
42
+
43
+ start_heartbeat
44
+ end
45
+ end
46
+
47
+ def stop
48
+ watchdog('Launcher#stop') do
49
+ @done = true
50
+ Roundhouse::Fetcher.done!
51
+ fetcher.terminate if fetcher.alive?
52
+ poller.terminate if poller.alive?
53
+
54
+ manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
55
+ @condvar.wait
56
+ manager.terminate
57
+
58
+ # Requeue everything in case there was a worker who grabbed work while stopped
59
+ # This call is a no-op in Roundhouse but necessary for Roundhouse Pro.
60
+ Roundhouse::Fetcher.strategy.bulk_requeue([], @options)
61
+
62
+ stop_heartbeat
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def start_heartbeat
69
+ key = identity
70
+ data = {
71
+ 'hostname' => hostname,
72
+ 'started_at' => Time.now.to_f,
73
+ 'pid' => $$,
74
+ 'tag' => @options[:tag] || '',
75
+ 'concurrency' => @options[:concurrency],
76
+ 'queues' => @options[:queues].uniq,
77
+ 'labels' => Roundhouse.options[:labels],
78
+ 'identity' => identity,
79
+ }
80
+ # this data doesn't change so dump it to a string
81
+ # now so we don't need to dump it every heartbeat.
82
+ json = Roundhouse.dump_json(data)
83
+ manager.heartbeat(key, data, json)
84
+ end
85
+
86
+ def stop_heartbeat
87
+ Roundhouse.redis do |conn|
88
+ conn.pipelined do
89
+ conn.srem('processes', identity)
90
+ conn.del("#{identity}:workers")
91
+ end
92
+ end
93
+ rescue
94
+ # best effort, ignore network errors
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,104 @@
1
+ require 'time'
2
+ require 'logger'
3
+
4
+ module Roundhouse
5
+ module Logging
6
+
7
+ class Pretty < Logger::Formatter
8
+ SPACE = " "
9
+
10
+ # Provide a call() method that returns the formatted message.
11
+ def call(severity, time, program_name, message)
12
+ "#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
13
+ end
14
+
15
+ def context
16
+ c = Thread.current[:roundhouse_context]
17
+ " #{c.join(SPACE)}" if c && c.any?
18
+ end
19
+ end
20
+
21
+ class WithoutTimestamp < Pretty
22
+ def call(severity, time, program_name, message)
23
+ "#{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
24
+ end
25
+ end
26
+
27
+ def self.with_context(msg)
28
+ Thread.current[:roundhouse_context] ||= []
29
+ Thread.current[:roundhouse_context] << msg
30
+ yield
31
+ ensure
32
+ Thread.current[:roundhouse_context].pop
33
+ end
34
+
35
+ def self.initialize_logger(log_target = STDOUT)
36
+ oldlogger = defined?(@logger) ? @logger : nil
37
+ @logger = Logger.new(log_target)
38
+ @logger.level = Logger::INFO
39
+ @logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
40
+ oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
41
+ @logger
42
+ end
43
+
44
+ def self.logger
45
+ defined?(@logger) ? @logger : initialize_logger
46
+ end
47
+
48
+ def self.logger=(log)
49
+ @logger = (log ? log : Logger.new('/dev/null'))
50
+ end
51
+
52
+ # This reopens ALL logfiles in the process that have been rotated
53
+ # using logrotate(8) (without copytruncate) or similar tools.
54
+ # A +File+ object is considered for reopening if it is:
55
+ # 1) opened with the O_APPEND and O_WRONLY flags
56
+ # 2) the current open file handle does not match its original open path
57
+ # 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
58
+ # Returns the number of files reopened
59
+ def self.reopen_logs
60
+ to_reopen = []
61
+ append_flags = File::WRONLY | File::APPEND
62
+
63
+ ObjectSpace.each_object(File) do |fp|
64
+ begin
65
+ if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
66
+ to_reopen << fp
67
+ end
68
+ rescue IOError, Errno::EBADF
69
+ end
70
+ end
71
+
72
+ nr = 0
73
+ to_reopen.each do |fp|
74
+ orig_st = begin
75
+ fp.stat
76
+ rescue IOError, Errno::EBADF
77
+ next
78
+ end
79
+
80
+ begin
81
+ b = File.stat(fp.path)
82
+ next if orig_st.ino == b.ino && orig_st.dev == b.dev
83
+ rescue Errno::ENOENT
84
+ end
85
+
86
+ begin
87
+ File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
88
+ fp.sync = true
89
+ nr += 1
90
+ rescue IOError, Errno::EBADF
91
+ # not much we can do...
92
+ end
93
+ end
94
+ nr
95
+ rescue RuntimeError => ex
96
+ # RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
97
+ puts "Unable to reopen logs: #{ex.message}"
98
+ end
99
+
100
+ def logger
101
+ Roundhouse::Logging.logger
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,236 @@
1
+ # encoding: utf-8
2
+ require 'roundhouse/util'
3
+ require 'roundhouse/actor'
4
+ require 'roundhouse/processor'
5
+ require 'roundhouse/fetch'
6
+
7
+ module Roundhouse
8
+
9
+ ##
10
+ # The main router in the system. This
11
+ # manages the processor state and accepts messages
12
+ # from Redis to be dispatched to an idle processor.
13
+ #
14
+ class Manager
15
+ include Util
16
+ include Actor
17
+ trap_exit :processor_died
18
+
19
+ attr_reader :ready
20
+ attr_reader :busy
21
+ attr_accessor :fetcher
22
+
23
+ SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
24
+ JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
25
+
26
+ def initialize(condvar, options={})
27
+ logger.debug { options.inspect }
28
+ @options = options
29
+ @count = options[:concurrency] || 25
30
+ raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
31
+ @done_callback = nil
32
+ @finished = condvar
33
+
34
+ @in_progress = {}
35
+ @threads = {}
36
+ @done = false
37
+ @busy = []
38
+ @ready = @count.times.map do
39
+ p = Processor.new_link(current_actor)
40
+ p.proxy_id = p.object_id
41
+ p
42
+ end
43
+ end
44
+
45
+ def stop(options={})
46
+ watchdog('Manager#stop died') do
47
+ should_shutdown = options[:shutdown]
48
+ timeout = options[:timeout]
49
+
50
+ @done = true
51
+
52
+ logger.info { "Terminating #{@ready.size} quiet workers" }
53
+ @ready.each { |x| x.terminate if x.alive? }
54
+ @ready.clear
55
+
56
+ return if clean_up_for_graceful_shutdown
57
+
58
+ hard_shutdown_in timeout if should_shutdown
59
+ end
60
+ end
61
+
62
+ def clean_up_for_graceful_shutdown
63
+ if @busy.empty?
64
+ shutdown
65
+ return true
66
+ end
67
+
68
+ after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
69
+ false
70
+ end
71
+
72
+ def start
73
+ @ready.each { dispatch }
74
+ end
75
+
76
+ def when_done(&blk)
77
+ @done_callback = blk
78
+ end
79
+
80
+ def processor_done(processor)
81
+ watchdog('Manager#processor_done died') do
82
+ @done_callback.call(processor) if @done_callback
83
+ @in_progress.delete(processor.object_id)
84
+ @threads.delete(processor.object_id)
85
+ @busy.delete(processor)
86
+ if stopped?
87
+ processor.terminate if processor.alive?
88
+ shutdown if @busy.empty?
89
+ else
90
+ @ready << processor if processor.alive?
91
+ end
92
+ dispatch
93
+ end
94
+ end
95
+
96
+ def processor_died(processor, reason)
97
+ watchdog("Manager#processor_died died") do
98
+ @in_progress.delete(processor.object_id)
99
+ @threads.delete(processor.object_id)
100
+ @busy.delete(processor)
101
+
102
+ unless stopped?
103
+ p = Processor.new_link(current_actor)
104
+ p.proxy_id = p.object_id
105
+ @ready << p
106
+ dispatch
107
+ else
108
+ shutdown if @busy.empty?
109
+ end
110
+ end
111
+ end
112
+
113
+ def assign(work)
114
+ watchdog("Manager#assign died") do
115
+ if stopped?
116
+ # Race condition between Manager#stop if Fetcher
117
+ # is blocked on redis and gets a message after
118
+ # all the ready Processors have been stopped.
119
+ # Push the message back to redis.
120
+ work.requeue
121
+ else
122
+ processor = @ready.pop
123
+ @in_progress[processor.object_id] = work
124
+ @busy << processor
125
+ processor.async.process(work)
126
+ end
127
+ end
128
+ end
129
+
130
+ # A hack worthy of Rube Goldberg. We need to be able
131
+ # to hard stop a working thread. But there's no way for us to
132
+ # get handle to the underlying thread performing work for a processor
133
+ # so we have it call us and tell us.
134
+ def real_thread(proxy_id, thr)
135
+ @threads[proxy_id] = thr
136
+ end
137
+
138
+ PROCTITLES = [
139
+ proc { 'roundhouse'.freeze },
140
+ proc { Roundhouse::VERSION },
141
+ proc { |mgr, data| data['tag'] },
142
+ proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
143
+ proc { |mgr, data| "stopping" if mgr.stopped? },
144
+ ]
145
+
146
+ def heartbeat(key, data, json)
147
+ results = PROCTITLES.map {|x| x.(self, data) }
148
+ results.compact!
149
+ $0 = results.join(' ')
150
+
151
+ ❤(key, json)
152
+ after(5) do
153
+ heartbeat(key, data, json)
154
+ end
155
+ end
156
+
157
+ def stopped?
158
+ @done
159
+ end
160
+
161
+ private
162
+
163
+ def ❤(key, json)
164
+ begin
165
+ _, _, _, msg = Roundhouse.redis do |conn|
166
+ conn.multi do
167
+ conn.sadd('processes', key)
168
+ conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
169
+ conn.expire(key, 60)
170
+ conn.rpop("#{key}-signals")
171
+ end
172
+ end
173
+
174
+ return unless msg
175
+
176
+ if JVM_RESERVED_SIGNALS.include?(msg)
177
+ Roundhouse::CLI.instance.handle_signal(msg)
178
+ else
179
+ ::Process.kill(msg, $$)
180
+ end
181
+ rescue => e
182
+ # ignore all redis/network issues
183
+ logger.error("heartbeat: #{e.message}")
184
+ end
185
+ end
186
+
187
+ def hard_shutdown_in(delay)
188
+ logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
189
+
190
+ after(delay) do
191
+ watchdog("Manager#hard_shutdown_in died") do
192
+ # We've reached the timeout and we still have busy workers.
193
+ # They must die but their messages shall live on.
194
+ logger.warn { "Terminating #{@busy.size} busy worker threads" }
195
+ logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
196
+
197
+ requeue
198
+
199
+ @busy.each do |processor|
200
+ if processor.alive? && t = @threads.delete(processor.object_id)
201
+ t.raise Shutdown
202
+ end
203
+ end
204
+
205
+ @finished.signal
206
+ end
207
+ end
208
+ end
209
+
210
+ def dispatch
211
+ return if stopped?
212
+ # This is a safety check to ensure we haven't leaked
213
+ # processors somehow.
214
+ raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
215
+ raise "No ready processor!?" if @ready.empty?
216
+
217
+ @fetcher.async.fetch
218
+ end
219
+
220
+ def shutdown
221
+ requeue
222
+ @finished.signal
223
+ end
224
+
225
+ def requeue
226
+ # Re-enqueue terminated jobs
227
+ # NOTE: You may notice that we may push a job back to redis before
228
+ # the worker thread is terminated. This is ok because Roundhouse's
229
+ # contract says that jobs are run AT LEAST once. Process termination
230
+ # is delayed until we're certain the jobs are back in Redis because
231
+ # it is worse to lose a job than to run it twice.
232
+ Roundhouse::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
233
+ @in_progress.clear
234
+ end
235
+ end
236
+ end