roundhouse-x 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +16 -0
- data/3.0-Upgrade.md +70 -0
- data/Changes.md +1127 -0
- data/Gemfile +27 -0
- data/LICENSE +7 -0
- data/README.md +52 -0
- data/Rakefile +9 -0
- data/bin/roundhouse +19 -0
- data/bin/roundhousectl +93 -0
- data/lib/generators/roundhouse/templates/worker.rb.erb +9 -0
- data/lib/generators/roundhouse/templates/worker_spec.rb.erb +6 -0
- data/lib/generators/roundhouse/templates/worker_test.rb.erb +8 -0
- data/lib/generators/roundhouse/worker_generator.rb +49 -0
- data/lib/roundhouse/actor.rb +39 -0
- data/lib/roundhouse/api.rb +859 -0
- data/lib/roundhouse/cli.rb +396 -0
- data/lib/roundhouse/client.rb +210 -0
- data/lib/roundhouse/core_ext.rb +105 -0
- data/lib/roundhouse/exception_handler.rb +30 -0
- data/lib/roundhouse/fetch.rb +154 -0
- data/lib/roundhouse/launcher.rb +98 -0
- data/lib/roundhouse/logging.rb +104 -0
- data/lib/roundhouse/manager.rb +236 -0
- data/lib/roundhouse/middleware/chain.rb +149 -0
- data/lib/roundhouse/middleware/i18n.rb +41 -0
- data/lib/roundhouse/middleware/server/active_record.rb +13 -0
- data/lib/roundhouse/middleware/server/logging.rb +40 -0
- data/lib/roundhouse/middleware/server/retry_jobs.rb +206 -0
- data/lib/roundhouse/monitor.rb +124 -0
- data/lib/roundhouse/paginator.rb +42 -0
- data/lib/roundhouse/processor.rb +159 -0
- data/lib/roundhouse/rails.rb +24 -0
- data/lib/roundhouse/redis_connection.rb +77 -0
- data/lib/roundhouse/scheduled.rb +115 -0
- data/lib/roundhouse/testing/inline.rb +28 -0
- data/lib/roundhouse/testing.rb +193 -0
- data/lib/roundhouse/util.rb +68 -0
- data/lib/roundhouse/version.rb +3 -0
- data/lib/roundhouse/web.rb +264 -0
- data/lib/roundhouse/web_helpers.rb +249 -0
- data/lib/roundhouse/worker.rb +90 -0
- data/lib/roundhouse.rb +177 -0
- data/roundhouse.gemspec +27 -0
- data/test/config.yml +9 -0
- data/test/env_based_config.yml +11 -0
- data/test/fake_env.rb +0 -0
- data/test/fixtures/en.yml +2 -0
- data/test/helper.rb +49 -0
- data/test/test_api.rb +521 -0
- data/test/test_cli.rb +389 -0
- data/test/test_client.rb +294 -0
- data/test/test_exception_handler.rb +55 -0
- data/test/test_fetch.rb +206 -0
- data/test/test_logging.rb +34 -0
- data/test/test_manager.rb +169 -0
- data/test/test_middleware.rb +160 -0
- data/test/test_monitor.rb +258 -0
- data/test/test_processor.rb +176 -0
- data/test/test_rails.rb +23 -0
- data/test/test_redis_connection.rb +127 -0
- data/test/test_retry.rb +390 -0
- data/test/test_roundhouse.rb +87 -0
- data/test/test_scheduled.rb +120 -0
- data/test/test_scheduling.rb +75 -0
- data/test/test_testing.rb +78 -0
- data/test/test_testing_fake.rb +240 -0
- data/test/test_testing_inline.rb +65 -0
- data/test/test_util.rb +18 -0
- data/test/test_web.rb +605 -0
- data/test/test_web_helpers.rb +52 -0
- data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
- data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
- data/web/assets/images/logo.png +0 -0
- data/web/assets/images/status/active.png +0 -0
- data/web/assets/images/status/idle.png +0 -0
- data/web/assets/images/status-sd8051fd480.png +0 -0
- data/web/assets/javascripts/application.js +83 -0
- data/web/assets/javascripts/dashboard.js +300 -0
- data/web/assets/javascripts/locales/README.md +27 -0
- data/web/assets/javascripts/locales/jquery.timeago.ar.js +96 -0
- data/web/assets/javascripts/locales/jquery.timeago.bg.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.bs.js +49 -0
- data/web/assets/javascripts/locales/jquery.timeago.ca.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.cs.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.cy.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.da.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.de.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.el.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.en-short.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.en.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.es.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.et.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.fa.js +22 -0
- data/web/assets/javascripts/locales/jquery.timeago.fi.js +28 -0
- data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.fr.js +17 -0
- data/web/assets/javascripts/locales/jquery.timeago.he.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.hr.js +49 -0
- data/web/assets/javascripts/locales/jquery.timeago.hu.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.hy.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.id.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.it.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.ja.js +19 -0
- data/web/assets/javascripts/locales/jquery.timeago.ko.js +17 -0
- data/web/assets/javascripts/locales/jquery.timeago.lt.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.mk.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.nl.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.no.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.pl.js +31 -0
- data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.pt.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.ro.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.rs.js +49 -0
- data/web/assets/javascripts/locales/jquery.timeago.ru.js +34 -0
- data/web/assets/javascripts/locales/jquery.timeago.sk.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.sl.js +44 -0
- data/web/assets/javascripts/locales/jquery.timeago.sv.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.th.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.tr.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.uk.js +34 -0
- data/web/assets/javascripts/locales/jquery.timeago.uz.js +19 -0
- data/web/assets/javascripts/locales/jquery.timeago.zh-cn.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.zh-tw.js +20 -0
- data/web/assets/stylesheets/application.css +746 -0
- data/web/assets/stylesheets/bootstrap.css +9 -0
- data/web/locales/cs.yml +68 -0
- data/web/locales/da.yml +68 -0
- data/web/locales/de.yml +69 -0
- data/web/locales/el.yml +68 -0
- data/web/locales/en.yml +77 -0
- data/web/locales/es.yml +69 -0
- data/web/locales/fr.yml +69 -0
- data/web/locales/hi.yml +75 -0
- data/web/locales/it.yml +69 -0
- data/web/locales/ja.yml +69 -0
- data/web/locales/ko.yml +68 -0
- data/web/locales/nl.yml +68 -0
- data/web/locales/no.yml +69 -0
- data/web/locales/pl.yml +59 -0
- data/web/locales/pt-br.yml +68 -0
- data/web/locales/pt.yml +67 -0
- data/web/locales/ru.yml +75 -0
- data/web/locales/sv.yml +68 -0
- data/web/locales/ta.yml +75 -0
- data/web/locales/zh-cn.yml +68 -0
- data/web/locales/zh-tw.yml +68 -0
- data/web/views/_footer.erb +22 -0
- data/web/views/_job_info.erb +84 -0
- data/web/views/_nav.erb +66 -0
- data/web/views/_paging.erb +23 -0
- data/web/views/_poll_js.erb +5 -0
- data/web/views/_poll_link.erb +7 -0
- data/web/views/_status.erb +4 -0
- data/web/views/_summary.erb +40 -0
- data/web/views/busy.erb +90 -0
- data/web/views/dashboard.erb +75 -0
- data/web/views/dead.erb +34 -0
- data/web/views/layout.erb +31 -0
- data/web/views/morgue.erb +71 -0
- data/web/views/queue.erb +45 -0
- data/web/views/queues.erb +27 -0
- data/web/views/retries.erb +74 -0
- data/web/views/retry.erb +34 -0
- data/web/views/scheduled.erb +54 -0
- data/web/views/scheduled_job_info.erb +8 -0
- metadata +404 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'roundhouse'
|
2
|
+
|
3
|
+
module Roundhouse
|
4
|
+
module ExceptionHandler
|
5
|
+
|
6
|
+
class Logger
|
7
|
+
def call(ex, ctxHash)
|
8
|
+
Roundhouse.logger.warn(ctxHash) if !ctxHash.empty?
|
9
|
+
Roundhouse.logger.warn "#{ex.class.name}: #{ex.message}"
|
10
|
+
Roundhouse.logger.warn ex.backtrace.join("\n") unless ex.backtrace.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
# Set up default handler which just logs the error
|
14
|
+
Roundhouse.error_handlers << Roundhouse::ExceptionHandler::Logger.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle_exception(ex, ctxHash={})
|
18
|
+
Roundhouse.error_handlers.each do |handler|
|
19
|
+
begin
|
20
|
+
handler.call(ex, ctxHash)
|
21
|
+
rescue => ex
|
22
|
+
Roundhouse.logger.error "!!! ERROR HANDLER THREW AN ERROR !!!"
|
23
|
+
Roundhouse.logger.error ex
|
24
|
+
Roundhouse.logger.error ex.backtrace.join("\n") unless ex.backtrace.nil?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'roundhouse'
|
2
|
+
require 'roundhouse/monitor'
|
3
|
+
require 'roundhouse/util'
|
4
|
+
require 'roundhouse/actor'
|
5
|
+
|
6
|
+
module Roundhouse
|
7
|
+
##
|
8
|
+
# The Fetcher blocks on Redis, waiting for a message to process
|
9
|
+
# from the queues. It gets the message and hands it to the Manager
|
10
|
+
# to assign to a ready Processor.
|
11
|
+
class Fetcher
|
12
|
+
include Util
|
13
|
+
include Actor
|
14
|
+
|
15
|
+
TIMEOUT = 1
|
16
|
+
|
17
|
+
attr_reader :down
|
18
|
+
|
19
|
+
def initialize(mgr, options)
|
20
|
+
@down = nil
|
21
|
+
@mgr = mgr
|
22
|
+
@strategy = Fetcher.strategy.new(options)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Fetching is straightforward: the Manager makes a fetch
|
26
|
+
# request for each idle processor when Roundhouse starts and
|
27
|
+
# then issues a new fetch request every time a Processor
|
28
|
+
# finishes a message.
|
29
|
+
#
|
30
|
+
# Because we have to shut down cleanly, we can't block
|
31
|
+
# forever and we can't loop forever. Instead we reschedule
|
32
|
+
# a new fetch if the current fetch turned up nothing.
|
33
|
+
def fetch
|
34
|
+
watchdog('Fetcher#fetch died') do
|
35
|
+
return if Roundhouse::Fetcher.done?
|
36
|
+
|
37
|
+
begin
|
38
|
+
work = @strategy.retrieve_work
|
39
|
+
::Roundhouse.logger.info("Redis is online, #{Time.now - @down} sec downtime") if @down
|
40
|
+
@down = nil
|
41
|
+
|
42
|
+
if work
|
43
|
+
@mgr.async.assign(work)
|
44
|
+
else
|
45
|
+
after(0) { fetch }
|
46
|
+
end
|
47
|
+
rescue => ex
|
48
|
+
handle_fetch_exception(ex)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def pause
|
57
|
+
sleep(TIMEOUT)
|
58
|
+
end
|
59
|
+
|
60
|
+
def handle_fetch_exception(ex)
|
61
|
+
if !@down
|
62
|
+
logger.error("Error fetching message: #{ex}")
|
63
|
+
ex.backtrace.each do |bt|
|
64
|
+
logger.error(bt)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
@down ||= Time.now
|
68
|
+
pause
|
69
|
+
after(0) { fetch }
|
70
|
+
rescue Celluloid::TaskTerminated
|
71
|
+
# If redis is down when we try to shut down, all the fetch backlog
|
72
|
+
# raises these errors. Haven't been able to figure out what I'm doing wrong.
|
73
|
+
end
|
74
|
+
|
75
|
+
# Ugh. Say hello to a bloody hack.
|
76
|
+
# Can't find a clean way to get the fetcher to just stop processing
|
77
|
+
# its mailbox when shutdown starts.
|
78
|
+
def self.done!
|
79
|
+
@done = true
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.reset # testing only
|
83
|
+
@done = nil
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.done?
|
87
|
+
defined?(@done) && @done
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.strategy
|
91
|
+
Roundhouse.options[:fetch] || RoundRobinFetch
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class RoundRobinFetch
|
96
|
+
def initialize(options = nil)
|
97
|
+
end
|
98
|
+
|
99
|
+
def retrieve_work
|
100
|
+
work = Roundhouse.redis { |conn| Roundhouse::Monitor.await_next_job(conn) }
|
101
|
+
UnitOfWork.new(*work) if work
|
102
|
+
end
|
103
|
+
|
104
|
+
# By leaving this as a class method, it can be pluggable and used by the Manager actor. Making it
|
105
|
+
# an instance method will make it async to the Fetcher actor
|
106
|
+
def self.bulk_requeue(inprogress, options)
|
107
|
+
return if inprogress.empty?
|
108
|
+
|
109
|
+
Roundhouse.logger.debug { "Re-queueing terminated jobs" }
|
110
|
+
jobs_to_requeue = {}
|
111
|
+
inprogress.each do |unit_of_work|
|
112
|
+
jobs_to_requeue[unit_of_work.queue_id] ||= []
|
113
|
+
jobs_to_requeue[unit_of_work.queue_id] << unit_of_work.message
|
114
|
+
end
|
115
|
+
|
116
|
+
Roundhouse.redis do |conn|
|
117
|
+
conn.pipelined do
|
118
|
+
jobs_to_requeue.each do |queue_id, jobs|
|
119
|
+
Roundhouse::Monitor.requeue(conn, queue_id, jobs)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
# REFACTOR NOTE: This has to happen outside the pipelining since
|
123
|
+
# we need to read. We can refactor to put this back
|
124
|
+
# after converting the Monitor operations as EVAL scripts
|
125
|
+
jobs_to_requeue.keys.each do |queue_id|
|
126
|
+
Roundhouse::Monitor.push(conn, queue_id)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
Roundhouse.logger.info("Pushed #{inprogress.size} messages back to Redis")
|
130
|
+
rescue => ex
|
131
|
+
Roundhouse.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
|
132
|
+
end
|
133
|
+
|
134
|
+
UnitOfWork = Struct.new(:full_queue_name, :message) do
|
135
|
+
QUEUE_REGEX = /.*#{Roundhouse::Monitor::QUEUE}:/.freeze
|
136
|
+
|
137
|
+
def acknowledge
|
138
|
+
# nothing to do
|
139
|
+
end
|
140
|
+
|
141
|
+
def queue_id
|
142
|
+
full_queue_name.gsub(QUEUE_REGEX, '')
|
143
|
+
end
|
144
|
+
|
145
|
+
def requeue
|
146
|
+
Roundhouse.redis do |conn|
|
147
|
+
Roundhouse::Monitor.requeue(conn, queue_id, message)
|
148
|
+
Roundhouse::Monitor.push(conn, queue_id)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'roundhouse/actor'
|
2
|
+
require 'roundhouse/manager'
|
3
|
+
require 'roundhouse/fetch'
|
4
|
+
require 'roundhouse/scheduled'
|
5
|
+
|
6
|
+
module Roundhouse
|
7
|
+
# The Launcher is a very simple Actor whose job is to
|
8
|
+
# start, monitor and stop the core Actors in Roundhouse.
|
9
|
+
# If any of these actors die, the Roundhouse process exits
|
10
|
+
# immediately.
|
11
|
+
class Launcher
|
12
|
+
include Actor
|
13
|
+
include Util
|
14
|
+
|
15
|
+
trap_exit :actor_died
|
16
|
+
|
17
|
+
attr_reader :manager, :poller, :fetcher
|
18
|
+
|
19
|
+
def initialize(options)
|
20
|
+
@condvar = Celluloid::Condition.new
|
21
|
+
@manager = Roundhouse::Manager.new_link(@condvar, options)
|
22
|
+
@poller = Roundhouse::Scheduled::Poller.new_link
|
23
|
+
@fetcher = Roundhouse::Fetcher.new_link(@manager, options)
|
24
|
+
@manager.fetcher = @fetcher
|
25
|
+
@done = false
|
26
|
+
@options = options
|
27
|
+
end
|
28
|
+
|
29
|
+
def actor_died(actor, reason)
|
30
|
+
# https://github.com/mperham/sidekiq/issues/2057#issuecomment-66485477
|
31
|
+
return if @done || !reason
|
32
|
+
|
33
|
+
Roundhouse.logger.warn("Roundhouse died due to the following error, cannot recover, process exiting")
|
34
|
+
handle_exception(reason)
|
35
|
+
exit(1)
|
36
|
+
end
|
37
|
+
|
38
|
+
def run
|
39
|
+
watchdog('Launcher#run') do
|
40
|
+
manager.async.start
|
41
|
+
poller.async.poll(true)
|
42
|
+
|
43
|
+
start_heartbeat
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def stop
|
48
|
+
watchdog('Launcher#stop') do
|
49
|
+
@done = true
|
50
|
+
Roundhouse::Fetcher.done!
|
51
|
+
fetcher.terminate if fetcher.alive?
|
52
|
+
poller.terminate if poller.alive?
|
53
|
+
|
54
|
+
manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
|
55
|
+
@condvar.wait
|
56
|
+
manager.terminate
|
57
|
+
|
58
|
+
# Requeue everything in case there was a worker who grabbed work while stopped
|
59
|
+
# This call is a no-op in Roundhouse but necessary for Roundhouse Pro.
|
60
|
+
Roundhouse::Fetcher.strategy.bulk_requeue([], @options)
|
61
|
+
|
62
|
+
stop_heartbeat
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def start_heartbeat
|
69
|
+
key = identity
|
70
|
+
data = {
|
71
|
+
'hostname' => hostname,
|
72
|
+
'started_at' => Time.now.to_f,
|
73
|
+
'pid' => $$,
|
74
|
+
'tag' => @options[:tag] || '',
|
75
|
+
'concurrency' => @options[:concurrency],
|
76
|
+
'queues' => @options[:queues].uniq,
|
77
|
+
'labels' => Roundhouse.options[:labels],
|
78
|
+
'identity' => identity,
|
79
|
+
}
|
80
|
+
# this data doesn't change so dump it to a string
|
81
|
+
# now so we don't need to dump it every heartbeat.
|
82
|
+
json = Roundhouse.dump_json(data)
|
83
|
+
manager.heartbeat(key, data, json)
|
84
|
+
end
|
85
|
+
|
86
|
+
def stop_heartbeat
|
87
|
+
Roundhouse.redis do |conn|
|
88
|
+
conn.pipelined do
|
89
|
+
conn.srem('processes', identity)
|
90
|
+
conn.del("#{identity}:workers")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
rescue
|
94
|
+
# best effort, ignore network errors
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module Roundhouse
|
5
|
+
module Logging
|
6
|
+
|
7
|
+
class Pretty < Logger::Formatter
|
8
|
+
SPACE = " "
|
9
|
+
|
10
|
+
# Provide a call() method that returns the formatted message.
|
11
|
+
def call(severity, time, program_name, message)
|
12
|
+
"#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
def context
|
16
|
+
c = Thread.current[:roundhouse_context]
|
17
|
+
" #{c.join(SPACE)}" if c && c.any?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class WithoutTimestamp < Pretty
|
22
|
+
def call(severity, time, program_name, message)
|
23
|
+
"#{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.with_context(msg)
|
28
|
+
Thread.current[:roundhouse_context] ||= []
|
29
|
+
Thread.current[:roundhouse_context] << msg
|
30
|
+
yield
|
31
|
+
ensure
|
32
|
+
Thread.current[:roundhouse_context].pop
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.initialize_logger(log_target = STDOUT)
|
36
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
37
|
+
@logger = Logger.new(log_target)
|
38
|
+
@logger.level = Logger::INFO
|
39
|
+
@logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
|
40
|
+
oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
|
41
|
+
@logger
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.logger
|
45
|
+
defined?(@logger) ? @logger : initialize_logger
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.logger=(log)
|
49
|
+
@logger = (log ? log : Logger.new('/dev/null'))
|
50
|
+
end
|
51
|
+
|
52
|
+
# This reopens ALL logfiles in the process that have been rotated
|
53
|
+
# using logrotate(8) (without copytruncate) or similar tools.
|
54
|
+
# A +File+ object is considered for reopening if it is:
|
55
|
+
# 1) opened with the O_APPEND and O_WRONLY flags
|
56
|
+
# 2) the current open file handle does not match its original open path
|
57
|
+
# 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
|
58
|
+
# Returns the number of files reopened
|
59
|
+
def self.reopen_logs
|
60
|
+
to_reopen = []
|
61
|
+
append_flags = File::WRONLY | File::APPEND
|
62
|
+
|
63
|
+
ObjectSpace.each_object(File) do |fp|
|
64
|
+
begin
|
65
|
+
if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
|
66
|
+
to_reopen << fp
|
67
|
+
end
|
68
|
+
rescue IOError, Errno::EBADF
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
nr = 0
|
73
|
+
to_reopen.each do |fp|
|
74
|
+
orig_st = begin
|
75
|
+
fp.stat
|
76
|
+
rescue IOError, Errno::EBADF
|
77
|
+
next
|
78
|
+
end
|
79
|
+
|
80
|
+
begin
|
81
|
+
b = File.stat(fp.path)
|
82
|
+
next if orig_st.ino == b.ino && orig_st.dev == b.dev
|
83
|
+
rescue Errno::ENOENT
|
84
|
+
end
|
85
|
+
|
86
|
+
begin
|
87
|
+
File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
|
88
|
+
fp.sync = true
|
89
|
+
nr += 1
|
90
|
+
rescue IOError, Errno::EBADF
|
91
|
+
# not much we can do...
|
92
|
+
end
|
93
|
+
end
|
94
|
+
nr
|
95
|
+
rescue RuntimeError => ex
|
96
|
+
# RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
|
97
|
+
puts "Unable to reopen logs: #{ex.message}"
|
98
|
+
end
|
99
|
+
|
100
|
+
def logger
|
101
|
+
Roundhouse::Logging.logger
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'roundhouse/util'
|
3
|
+
require 'roundhouse/actor'
|
4
|
+
require 'roundhouse/processor'
|
5
|
+
require 'roundhouse/fetch'
|
6
|
+
|
7
|
+
module Roundhouse
|
8
|
+
|
9
|
+
##
|
10
|
+
# The main router in the system. This
|
11
|
+
# manages the processor state and accepts messages
|
12
|
+
# from Redis to be dispatched to an idle processor.
|
13
|
+
#
|
14
|
+
class Manager
|
15
|
+
include Util
|
16
|
+
include Actor
|
17
|
+
trap_exit :processor_died
|
18
|
+
|
19
|
+
attr_reader :ready
|
20
|
+
attr_reader :busy
|
21
|
+
attr_accessor :fetcher
|
22
|
+
|
23
|
+
SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
|
24
|
+
JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
|
25
|
+
|
26
|
+
def initialize(condvar, options={})
|
27
|
+
logger.debug { options.inspect }
|
28
|
+
@options = options
|
29
|
+
@count = options[:concurrency] || 25
|
30
|
+
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
31
|
+
@done_callback = nil
|
32
|
+
@finished = condvar
|
33
|
+
|
34
|
+
@in_progress = {}
|
35
|
+
@threads = {}
|
36
|
+
@done = false
|
37
|
+
@busy = []
|
38
|
+
@ready = @count.times.map do
|
39
|
+
p = Processor.new_link(current_actor)
|
40
|
+
p.proxy_id = p.object_id
|
41
|
+
p
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def stop(options={})
|
46
|
+
watchdog('Manager#stop died') do
|
47
|
+
should_shutdown = options[:shutdown]
|
48
|
+
timeout = options[:timeout]
|
49
|
+
|
50
|
+
@done = true
|
51
|
+
|
52
|
+
logger.info { "Terminating #{@ready.size} quiet workers" }
|
53
|
+
@ready.each { |x| x.terminate if x.alive? }
|
54
|
+
@ready.clear
|
55
|
+
|
56
|
+
return if clean_up_for_graceful_shutdown
|
57
|
+
|
58
|
+
hard_shutdown_in timeout if should_shutdown
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def clean_up_for_graceful_shutdown
|
63
|
+
if @busy.empty?
|
64
|
+
shutdown
|
65
|
+
return true
|
66
|
+
end
|
67
|
+
|
68
|
+
after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
|
69
|
+
false
|
70
|
+
end
|
71
|
+
|
72
|
+
def start
|
73
|
+
@ready.each { dispatch }
|
74
|
+
end
|
75
|
+
|
76
|
+
def when_done(&blk)
|
77
|
+
@done_callback = blk
|
78
|
+
end
|
79
|
+
|
80
|
+
def processor_done(processor)
|
81
|
+
watchdog('Manager#processor_done died') do
|
82
|
+
@done_callback.call(processor) if @done_callback
|
83
|
+
@in_progress.delete(processor.object_id)
|
84
|
+
@threads.delete(processor.object_id)
|
85
|
+
@busy.delete(processor)
|
86
|
+
if stopped?
|
87
|
+
processor.terminate if processor.alive?
|
88
|
+
shutdown if @busy.empty?
|
89
|
+
else
|
90
|
+
@ready << processor if processor.alive?
|
91
|
+
end
|
92
|
+
dispatch
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def processor_died(processor, reason)
|
97
|
+
watchdog("Manager#processor_died died") do
|
98
|
+
@in_progress.delete(processor.object_id)
|
99
|
+
@threads.delete(processor.object_id)
|
100
|
+
@busy.delete(processor)
|
101
|
+
|
102
|
+
unless stopped?
|
103
|
+
p = Processor.new_link(current_actor)
|
104
|
+
p.proxy_id = p.object_id
|
105
|
+
@ready << p
|
106
|
+
dispatch
|
107
|
+
else
|
108
|
+
shutdown if @busy.empty?
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def assign(work)
|
114
|
+
watchdog("Manager#assign died") do
|
115
|
+
if stopped?
|
116
|
+
# Race condition between Manager#stop if Fetcher
|
117
|
+
# is blocked on redis and gets a message after
|
118
|
+
# all the ready Processors have been stopped.
|
119
|
+
# Push the message back to redis.
|
120
|
+
work.requeue
|
121
|
+
else
|
122
|
+
processor = @ready.pop
|
123
|
+
@in_progress[processor.object_id] = work
|
124
|
+
@busy << processor
|
125
|
+
processor.async.process(work)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# A hack worthy of Rube Goldberg. We need to be able
|
131
|
+
# to hard stop a working thread. But there's no way for us to
|
132
|
+
# get handle to the underlying thread performing work for a processor
|
133
|
+
# so we have it call us and tell us.
|
134
|
+
def real_thread(proxy_id, thr)
|
135
|
+
@threads[proxy_id] = thr
|
136
|
+
end
|
137
|
+
|
138
|
+
PROCTITLES = [
|
139
|
+
proc { 'roundhouse'.freeze },
|
140
|
+
proc { Roundhouse::VERSION },
|
141
|
+
proc { |mgr, data| data['tag'] },
|
142
|
+
proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
|
143
|
+
proc { |mgr, data| "stopping" if mgr.stopped? },
|
144
|
+
]
|
145
|
+
|
146
|
+
def heartbeat(key, data, json)
|
147
|
+
results = PROCTITLES.map {|x| x.(self, data) }
|
148
|
+
results.compact!
|
149
|
+
$0 = results.join(' ')
|
150
|
+
|
151
|
+
❤(key, json)
|
152
|
+
after(5) do
|
153
|
+
heartbeat(key, data, json)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def stopped?
|
158
|
+
@done
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
def ❤(key, json)
|
164
|
+
begin
|
165
|
+
_, _, _, msg = Roundhouse.redis do |conn|
|
166
|
+
conn.multi do
|
167
|
+
conn.sadd('processes', key)
|
168
|
+
conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
|
169
|
+
conn.expire(key, 60)
|
170
|
+
conn.rpop("#{key}-signals")
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
return unless msg
|
175
|
+
|
176
|
+
if JVM_RESERVED_SIGNALS.include?(msg)
|
177
|
+
Roundhouse::CLI.instance.handle_signal(msg)
|
178
|
+
else
|
179
|
+
::Process.kill(msg, $$)
|
180
|
+
end
|
181
|
+
rescue => e
|
182
|
+
# ignore all redis/network issues
|
183
|
+
logger.error("heartbeat: #{e.message}")
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def hard_shutdown_in(delay)
|
188
|
+
logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
|
189
|
+
|
190
|
+
after(delay) do
|
191
|
+
watchdog("Manager#hard_shutdown_in died") do
|
192
|
+
# We've reached the timeout and we still have busy workers.
|
193
|
+
# They must die but their messages shall live on.
|
194
|
+
logger.warn { "Terminating #{@busy.size} busy worker threads" }
|
195
|
+
logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
|
196
|
+
|
197
|
+
requeue
|
198
|
+
|
199
|
+
@busy.each do |processor|
|
200
|
+
if processor.alive? && t = @threads.delete(processor.object_id)
|
201
|
+
t.raise Shutdown
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
@finished.signal
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def dispatch
|
211
|
+
return if stopped?
|
212
|
+
# This is a safety check to ensure we haven't leaked
|
213
|
+
# processors somehow.
|
214
|
+
raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
|
215
|
+
raise "No ready processor!?" if @ready.empty?
|
216
|
+
|
217
|
+
@fetcher.async.fetch
|
218
|
+
end
|
219
|
+
|
220
|
+
def shutdown
|
221
|
+
requeue
|
222
|
+
@finished.signal
|
223
|
+
end
|
224
|
+
|
225
|
+
def requeue
|
226
|
+
# Re-enqueue terminated jobs
|
227
|
+
# NOTE: You may notice that we may push a job back to redis before
|
228
|
+
# the worker thread is terminated. This is ok because Roundhouse's
|
229
|
+
# contract says that jobs are run AT LEAST once. Process termination
|
230
|
+
# is delayed until we're certain the jobs are back in Redis because
|
231
|
+
# it is worse to lose a job than to run it twice.
|
232
|
+
Roundhouse::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
|
233
|
+
@in_progress.clear
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|