roundhouse-x 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +16 -0
- data/3.0-Upgrade.md +70 -0
- data/Changes.md +1127 -0
- data/Gemfile +27 -0
- data/LICENSE +7 -0
- data/README.md +52 -0
- data/Rakefile +9 -0
- data/bin/roundhouse +19 -0
- data/bin/roundhousectl +93 -0
- data/lib/generators/roundhouse/templates/worker.rb.erb +9 -0
- data/lib/generators/roundhouse/templates/worker_spec.rb.erb +6 -0
- data/lib/generators/roundhouse/templates/worker_test.rb.erb +8 -0
- data/lib/generators/roundhouse/worker_generator.rb +49 -0
- data/lib/roundhouse/actor.rb +39 -0
- data/lib/roundhouse/api.rb +859 -0
- data/lib/roundhouse/cli.rb +396 -0
- data/lib/roundhouse/client.rb +210 -0
- data/lib/roundhouse/core_ext.rb +105 -0
- data/lib/roundhouse/exception_handler.rb +30 -0
- data/lib/roundhouse/fetch.rb +154 -0
- data/lib/roundhouse/launcher.rb +98 -0
- data/lib/roundhouse/logging.rb +104 -0
- data/lib/roundhouse/manager.rb +236 -0
- data/lib/roundhouse/middleware/chain.rb +149 -0
- data/lib/roundhouse/middleware/i18n.rb +41 -0
- data/lib/roundhouse/middleware/server/active_record.rb +13 -0
- data/lib/roundhouse/middleware/server/logging.rb +40 -0
- data/lib/roundhouse/middleware/server/retry_jobs.rb +206 -0
- data/lib/roundhouse/monitor.rb +124 -0
- data/lib/roundhouse/paginator.rb +42 -0
- data/lib/roundhouse/processor.rb +159 -0
- data/lib/roundhouse/rails.rb +24 -0
- data/lib/roundhouse/redis_connection.rb +77 -0
- data/lib/roundhouse/scheduled.rb +115 -0
- data/lib/roundhouse/testing/inline.rb +28 -0
- data/lib/roundhouse/testing.rb +193 -0
- data/lib/roundhouse/util.rb +68 -0
- data/lib/roundhouse/version.rb +3 -0
- data/lib/roundhouse/web.rb +264 -0
- data/lib/roundhouse/web_helpers.rb +249 -0
- data/lib/roundhouse/worker.rb +90 -0
- data/lib/roundhouse.rb +177 -0
- data/roundhouse.gemspec +27 -0
- data/test/config.yml +9 -0
- data/test/env_based_config.yml +11 -0
- data/test/fake_env.rb +0 -0
- data/test/fixtures/en.yml +2 -0
- data/test/helper.rb +49 -0
- data/test/test_api.rb +521 -0
- data/test/test_cli.rb +389 -0
- data/test/test_client.rb +294 -0
- data/test/test_exception_handler.rb +55 -0
- data/test/test_fetch.rb +206 -0
- data/test/test_logging.rb +34 -0
- data/test/test_manager.rb +169 -0
- data/test/test_middleware.rb +160 -0
- data/test/test_monitor.rb +258 -0
- data/test/test_processor.rb +176 -0
- data/test/test_rails.rb +23 -0
- data/test/test_redis_connection.rb +127 -0
- data/test/test_retry.rb +390 -0
- data/test/test_roundhouse.rb +87 -0
- data/test/test_scheduled.rb +120 -0
- data/test/test_scheduling.rb +75 -0
- data/test/test_testing.rb +78 -0
- data/test/test_testing_fake.rb +240 -0
- data/test/test_testing_inline.rb +65 -0
- data/test/test_util.rb +18 -0
- data/test/test_web.rb +605 -0
- data/test/test_web_helpers.rb +52 -0
- data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
- data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
- data/web/assets/images/logo.png +0 -0
- data/web/assets/images/status/active.png +0 -0
- data/web/assets/images/status/idle.png +0 -0
- data/web/assets/images/status-sd8051fd480.png +0 -0
- data/web/assets/javascripts/application.js +83 -0
- data/web/assets/javascripts/dashboard.js +300 -0
- data/web/assets/javascripts/locales/README.md +27 -0
- data/web/assets/javascripts/locales/jquery.timeago.ar.js +96 -0
- data/web/assets/javascripts/locales/jquery.timeago.bg.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.bs.js +49 -0
- data/web/assets/javascripts/locales/jquery.timeago.ca.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.cs.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.cy.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.da.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.de.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.el.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.en-short.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.en.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.es.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.et.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.fa.js +22 -0
- data/web/assets/javascripts/locales/jquery.timeago.fi.js +28 -0
- data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.fr.js +17 -0
- data/web/assets/javascripts/locales/jquery.timeago.he.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.hr.js +49 -0
- data/web/assets/javascripts/locales/jquery.timeago.hu.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.hy.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.id.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.it.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.ja.js +19 -0
- data/web/assets/javascripts/locales/jquery.timeago.ko.js +17 -0
- data/web/assets/javascripts/locales/jquery.timeago.lt.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.mk.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.nl.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.no.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.pl.js +31 -0
- data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.pt.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.ro.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.rs.js +49 -0
- data/web/assets/javascripts/locales/jquery.timeago.ru.js +34 -0
- data/web/assets/javascripts/locales/jquery.timeago.sk.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.sl.js +44 -0
- data/web/assets/javascripts/locales/jquery.timeago.sv.js +18 -0
- data/web/assets/javascripts/locales/jquery.timeago.th.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.tr.js +16 -0
- data/web/assets/javascripts/locales/jquery.timeago.uk.js +34 -0
- data/web/assets/javascripts/locales/jquery.timeago.uz.js +19 -0
- data/web/assets/javascripts/locales/jquery.timeago.zh-cn.js +20 -0
- data/web/assets/javascripts/locales/jquery.timeago.zh-tw.js +20 -0
- data/web/assets/stylesheets/application.css +746 -0
- data/web/assets/stylesheets/bootstrap.css +9 -0
- data/web/locales/cs.yml +68 -0
- data/web/locales/da.yml +68 -0
- data/web/locales/de.yml +69 -0
- data/web/locales/el.yml +68 -0
- data/web/locales/en.yml +77 -0
- data/web/locales/es.yml +69 -0
- data/web/locales/fr.yml +69 -0
- data/web/locales/hi.yml +75 -0
- data/web/locales/it.yml +69 -0
- data/web/locales/ja.yml +69 -0
- data/web/locales/ko.yml +68 -0
- data/web/locales/nl.yml +68 -0
- data/web/locales/no.yml +69 -0
- data/web/locales/pl.yml +59 -0
- data/web/locales/pt-br.yml +68 -0
- data/web/locales/pt.yml +67 -0
- data/web/locales/ru.yml +75 -0
- data/web/locales/sv.yml +68 -0
- data/web/locales/ta.yml +75 -0
- data/web/locales/zh-cn.yml +68 -0
- data/web/locales/zh-tw.yml +68 -0
- data/web/views/_footer.erb +22 -0
- data/web/views/_job_info.erb +84 -0
- data/web/views/_nav.erb +66 -0
- data/web/views/_paging.erb +23 -0
- data/web/views/_poll_js.erb +5 -0
- data/web/views/_poll_link.erb +7 -0
- data/web/views/_status.erb +4 -0
- data/web/views/_summary.erb +40 -0
- data/web/views/busy.erb +90 -0
- data/web/views/dashboard.erb +75 -0
- data/web/views/dead.erb +34 -0
- data/web/views/layout.erb +31 -0
- data/web/views/morgue.erb +71 -0
- data/web/views/queue.erb +45 -0
- data/web/views/queues.erb +27 -0
- data/web/views/retries.erb +74 -0
- data/web/views/retry.erb +34 -0
- data/web/views/scheduled.erb +54 -0
- data/web/views/scheduled_job_info.erb +8 -0
- metadata +404 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'roundhouse'
|
|
2
|
+
|
|
3
|
+
module Roundhouse
|
|
4
|
+
module ExceptionHandler
|
|
5
|
+
|
|
6
|
+
class Logger
|
|
7
|
+
def call(ex, ctxHash)
|
|
8
|
+
Roundhouse.logger.warn(ctxHash) if !ctxHash.empty?
|
|
9
|
+
Roundhouse.logger.warn "#{ex.class.name}: #{ex.message}"
|
|
10
|
+
Roundhouse.logger.warn ex.backtrace.join("\n") unless ex.backtrace.nil?
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Set up default handler which just logs the error
|
|
14
|
+
Roundhouse.error_handlers << Roundhouse::ExceptionHandler::Logger.new
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def handle_exception(ex, ctxHash={})
|
|
18
|
+
Roundhouse.error_handlers.each do |handler|
|
|
19
|
+
begin
|
|
20
|
+
handler.call(ex, ctxHash)
|
|
21
|
+
rescue => ex
|
|
22
|
+
Roundhouse.logger.error "!!! ERROR HANDLER THREW AN ERROR !!!"
|
|
23
|
+
Roundhouse.logger.error ex
|
|
24
|
+
Roundhouse.logger.error ex.backtrace.join("\n") unless ex.backtrace.nil?
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
require 'roundhouse'
|
|
2
|
+
require 'roundhouse/monitor'
|
|
3
|
+
require 'roundhouse/util'
|
|
4
|
+
require 'roundhouse/actor'
|
|
5
|
+
|
|
6
|
+
module Roundhouse
|
|
7
|
+
##
|
|
8
|
+
# The Fetcher blocks on Redis, waiting for a message to process
|
|
9
|
+
# from the queues. It gets the message and hands it to the Manager
|
|
10
|
+
# to assign to a ready Processor.
|
|
11
|
+
class Fetcher
|
|
12
|
+
include Util
|
|
13
|
+
include Actor
|
|
14
|
+
|
|
15
|
+
TIMEOUT = 1
|
|
16
|
+
|
|
17
|
+
attr_reader :down
|
|
18
|
+
|
|
19
|
+
def initialize(mgr, options)
|
|
20
|
+
@down = nil
|
|
21
|
+
@mgr = mgr
|
|
22
|
+
@strategy = Fetcher.strategy.new(options)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Fetching is straightforward: the Manager makes a fetch
|
|
26
|
+
# request for each idle processor when Roundhouse starts and
|
|
27
|
+
# then issues a new fetch request every time a Processor
|
|
28
|
+
# finishes a message.
|
|
29
|
+
#
|
|
30
|
+
# Because we have to shut down cleanly, we can't block
|
|
31
|
+
# forever and we can't loop forever. Instead we reschedule
|
|
32
|
+
# a new fetch if the current fetch turned up nothing.
|
|
33
|
+
def fetch
|
|
34
|
+
watchdog('Fetcher#fetch died') do
|
|
35
|
+
return if Roundhouse::Fetcher.done?
|
|
36
|
+
|
|
37
|
+
begin
|
|
38
|
+
work = @strategy.retrieve_work
|
|
39
|
+
::Roundhouse.logger.info("Redis is online, #{Time.now - @down} sec downtime") if @down
|
|
40
|
+
@down = nil
|
|
41
|
+
|
|
42
|
+
if work
|
|
43
|
+
@mgr.async.assign(work)
|
|
44
|
+
else
|
|
45
|
+
after(0) { fetch }
|
|
46
|
+
end
|
|
47
|
+
rescue => ex
|
|
48
|
+
handle_fetch_exception(ex)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def pause
|
|
57
|
+
sleep(TIMEOUT)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def handle_fetch_exception(ex)
|
|
61
|
+
if !@down
|
|
62
|
+
logger.error("Error fetching message: #{ex}")
|
|
63
|
+
ex.backtrace.each do |bt|
|
|
64
|
+
logger.error(bt)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
@down ||= Time.now
|
|
68
|
+
pause
|
|
69
|
+
after(0) { fetch }
|
|
70
|
+
rescue Celluloid::TaskTerminated
|
|
71
|
+
# If redis is down when we try to shut down, all the fetch backlog
|
|
72
|
+
# raises these errors. Haven't been able to figure out what I'm doing wrong.
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Ugh. Say hello to a bloody hack.
|
|
76
|
+
# Can't find a clean way to get the fetcher to just stop processing
|
|
77
|
+
# its mailbox when shutdown starts.
|
|
78
|
+
def self.done!
|
|
79
|
+
@done = true
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def self.reset # testing only
|
|
83
|
+
@done = nil
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def self.done?
|
|
87
|
+
defined?(@done) && @done
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def self.strategy
|
|
91
|
+
Roundhouse.options[:fetch] || RoundRobinFetch
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
class RoundRobinFetch
|
|
96
|
+
def initialize(options = nil)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def retrieve_work
|
|
100
|
+
work = Roundhouse.redis { |conn| Roundhouse::Monitor.await_next_job(conn) }
|
|
101
|
+
UnitOfWork.new(*work) if work
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# By leaving this as a class method, it can be pluggable and used by the Manager actor. Making it
|
|
105
|
+
# an instance method will make it async to the Fetcher actor
|
|
106
|
+
def self.bulk_requeue(inprogress, options)
|
|
107
|
+
return if inprogress.empty?
|
|
108
|
+
|
|
109
|
+
Roundhouse.logger.debug { "Re-queueing terminated jobs" }
|
|
110
|
+
jobs_to_requeue = {}
|
|
111
|
+
inprogress.each do |unit_of_work|
|
|
112
|
+
jobs_to_requeue[unit_of_work.queue_id] ||= []
|
|
113
|
+
jobs_to_requeue[unit_of_work.queue_id] << unit_of_work.message
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
Roundhouse.redis do |conn|
|
|
117
|
+
conn.pipelined do
|
|
118
|
+
jobs_to_requeue.each do |queue_id, jobs|
|
|
119
|
+
Roundhouse::Monitor.requeue(conn, queue_id, jobs)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
# REFACTOR NOTE: This has to happen outside the pipelining since
|
|
123
|
+
# we need to read. We can refactor to put this back
|
|
124
|
+
# after converting the Monitor operations as EVAL scripts
|
|
125
|
+
jobs_to_requeue.keys.each do |queue_id|
|
|
126
|
+
Roundhouse::Monitor.push(conn, queue_id)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
Roundhouse.logger.info("Pushed #{inprogress.size} messages back to Redis")
|
|
130
|
+
rescue => ex
|
|
131
|
+
Roundhouse.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
UnitOfWork = Struct.new(:full_queue_name, :message) do
|
|
135
|
+
QUEUE_REGEX = /.*#{Roundhouse::Monitor::QUEUE}:/.freeze
|
|
136
|
+
|
|
137
|
+
def acknowledge
|
|
138
|
+
# nothing to do
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def queue_id
|
|
142
|
+
full_queue_name.gsub(QUEUE_REGEX, '')
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def requeue
|
|
146
|
+
Roundhouse.redis do |conn|
|
|
147
|
+
Roundhouse::Monitor.requeue(conn, queue_id, message)
|
|
148
|
+
Roundhouse::Monitor.push(conn, queue_id)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
require 'roundhouse/actor'
|
|
2
|
+
require 'roundhouse/manager'
|
|
3
|
+
require 'roundhouse/fetch'
|
|
4
|
+
require 'roundhouse/scheduled'
|
|
5
|
+
|
|
6
|
+
module Roundhouse
|
|
7
|
+
# The Launcher is a very simple Actor whose job is to
|
|
8
|
+
# start, monitor and stop the core Actors in Roundhouse.
|
|
9
|
+
# If any of these actors die, the Roundhouse process exits
|
|
10
|
+
# immediately.
|
|
11
|
+
class Launcher
|
|
12
|
+
include Actor
|
|
13
|
+
include Util
|
|
14
|
+
|
|
15
|
+
trap_exit :actor_died
|
|
16
|
+
|
|
17
|
+
attr_reader :manager, :poller, :fetcher
|
|
18
|
+
|
|
19
|
+
def initialize(options)
|
|
20
|
+
@condvar = Celluloid::Condition.new
|
|
21
|
+
@manager = Roundhouse::Manager.new_link(@condvar, options)
|
|
22
|
+
@poller = Roundhouse::Scheduled::Poller.new_link
|
|
23
|
+
@fetcher = Roundhouse::Fetcher.new_link(@manager, options)
|
|
24
|
+
@manager.fetcher = @fetcher
|
|
25
|
+
@done = false
|
|
26
|
+
@options = options
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def actor_died(actor, reason)
|
|
30
|
+
# https://github.com/mperham/sidekiq/issues/2057#issuecomment-66485477
|
|
31
|
+
return if @done || !reason
|
|
32
|
+
|
|
33
|
+
Roundhouse.logger.warn("Roundhouse died due to the following error, cannot recover, process exiting")
|
|
34
|
+
handle_exception(reason)
|
|
35
|
+
exit(1)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def run
|
|
39
|
+
watchdog('Launcher#run') do
|
|
40
|
+
manager.async.start
|
|
41
|
+
poller.async.poll(true)
|
|
42
|
+
|
|
43
|
+
start_heartbeat
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def stop
|
|
48
|
+
watchdog('Launcher#stop') do
|
|
49
|
+
@done = true
|
|
50
|
+
Roundhouse::Fetcher.done!
|
|
51
|
+
fetcher.terminate if fetcher.alive?
|
|
52
|
+
poller.terminate if poller.alive?
|
|
53
|
+
|
|
54
|
+
manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
|
|
55
|
+
@condvar.wait
|
|
56
|
+
manager.terminate
|
|
57
|
+
|
|
58
|
+
# Requeue everything in case there was a worker who grabbed work while stopped
|
|
59
|
+
# This call is a no-op in Roundhouse but necessary for Roundhouse Pro.
|
|
60
|
+
Roundhouse::Fetcher.strategy.bulk_requeue([], @options)
|
|
61
|
+
|
|
62
|
+
stop_heartbeat
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def start_heartbeat
|
|
69
|
+
key = identity
|
|
70
|
+
data = {
|
|
71
|
+
'hostname' => hostname,
|
|
72
|
+
'started_at' => Time.now.to_f,
|
|
73
|
+
'pid' => $$,
|
|
74
|
+
'tag' => @options[:tag] || '',
|
|
75
|
+
'concurrency' => @options[:concurrency],
|
|
76
|
+
'queues' => @options[:queues].uniq,
|
|
77
|
+
'labels' => Roundhouse.options[:labels],
|
|
78
|
+
'identity' => identity,
|
|
79
|
+
}
|
|
80
|
+
# this data doesn't change so dump it to a string
|
|
81
|
+
# now so we don't need to dump it every heartbeat.
|
|
82
|
+
json = Roundhouse.dump_json(data)
|
|
83
|
+
manager.heartbeat(key, data, json)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def stop_heartbeat
|
|
87
|
+
Roundhouse.redis do |conn|
|
|
88
|
+
conn.pipelined do
|
|
89
|
+
conn.srem('processes', identity)
|
|
90
|
+
conn.del("#{identity}:workers")
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
rescue
|
|
94
|
+
# best effort, ignore network errors
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
require 'logger'
|
|
3
|
+
|
|
4
|
+
module Roundhouse
|
|
5
|
+
module Logging
|
|
6
|
+
|
|
7
|
+
class Pretty < Logger::Formatter
|
|
8
|
+
SPACE = " "
|
|
9
|
+
|
|
10
|
+
# Provide a call() method that returns the formatted message.
|
|
11
|
+
def call(severity, time, program_name, message)
|
|
12
|
+
"#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def context
|
|
16
|
+
c = Thread.current[:roundhouse_context]
|
|
17
|
+
" #{c.join(SPACE)}" if c && c.any?
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class WithoutTimestamp < Pretty
|
|
22
|
+
def call(severity, time, program_name, message)
|
|
23
|
+
"#{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def self.with_context(msg)
|
|
28
|
+
Thread.current[:roundhouse_context] ||= []
|
|
29
|
+
Thread.current[:roundhouse_context] << msg
|
|
30
|
+
yield
|
|
31
|
+
ensure
|
|
32
|
+
Thread.current[:roundhouse_context].pop
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.initialize_logger(log_target = STDOUT)
|
|
36
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
|
37
|
+
@logger = Logger.new(log_target)
|
|
38
|
+
@logger.level = Logger::INFO
|
|
39
|
+
@logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
|
|
40
|
+
oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
|
|
41
|
+
@logger
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.logger
|
|
45
|
+
defined?(@logger) ? @logger : initialize_logger
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def self.logger=(log)
|
|
49
|
+
@logger = (log ? log : Logger.new('/dev/null'))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# This reopens ALL logfiles in the process that have been rotated
|
|
53
|
+
# using logrotate(8) (without copytruncate) or similar tools.
|
|
54
|
+
# A +File+ object is considered for reopening if it is:
|
|
55
|
+
# 1) opened with the O_APPEND and O_WRONLY flags
|
|
56
|
+
# 2) the current open file handle does not match its original open path
|
|
57
|
+
# 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
|
|
58
|
+
# Returns the number of files reopened
|
|
59
|
+
def self.reopen_logs
|
|
60
|
+
to_reopen = []
|
|
61
|
+
append_flags = File::WRONLY | File::APPEND
|
|
62
|
+
|
|
63
|
+
ObjectSpace.each_object(File) do |fp|
|
|
64
|
+
begin
|
|
65
|
+
if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
|
|
66
|
+
to_reopen << fp
|
|
67
|
+
end
|
|
68
|
+
rescue IOError, Errno::EBADF
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
nr = 0
|
|
73
|
+
to_reopen.each do |fp|
|
|
74
|
+
orig_st = begin
|
|
75
|
+
fp.stat
|
|
76
|
+
rescue IOError, Errno::EBADF
|
|
77
|
+
next
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
begin
|
|
81
|
+
b = File.stat(fp.path)
|
|
82
|
+
next if orig_st.ino == b.ino && orig_st.dev == b.dev
|
|
83
|
+
rescue Errno::ENOENT
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
begin
|
|
87
|
+
File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
|
|
88
|
+
fp.sync = true
|
|
89
|
+
nr += 1
|
|
90
|
+
rescue IOError, Errno::EBADF
|
|
91
|
+
# not much we can do...
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
nr
|
|
95
|
+
rescue RuntimeError => ex
|
|
96
|
+
# RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
|
|
97
|
+
puts "Unable to reopen logs: #{ex.message}"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def logger
|
|
101
|
+
Roundhouse::Logging.logger
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require 'roundhouse/util'
|
|
3
|
+
require 'roundhouse/actor'
|
|
4
|
+
require 'roundhouse/processor'
|
|
5
|
+
require 'roundhouse/fetch'
|
|
6
|
+
|
|
7
|
+
module Roundhouse
|
|
8
|
+
|
|
9
|
+
##
|
|
10
|
+
# The main router in the system. This
|
|
11
|
+
# manages the processor state and accepts messages
|
|
12
|
+
# from Redis to be dispatched to an idle processor.
|
|
13
|
+
#
|
|
14
|
+
class Manager
|
|
15
|
+
include Util
|
|
16
|
+
include Actor
|
|
17
|
+
trap_exit :processor_died
|
|
18
|
+
|
|
19
|
+
attr_reader :ready
|
|
20
|
+
attr_reader :busy
|
|
21
|
+
attr_accessor :fetcher
|
|
22
|
+
|
|
23
|
+
SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
|
|
24
|
+
JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
|
|
25
|
+
|
|
26
|
+
def initialize(condvar, options={})
|
|
27
|
+
logger.debug { options.inspect }
|
|
28
|
+
@options = options
|
|
29
|
+
@count = options[:concurrency] || 25
|
|
30
|
+
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
|
31
|
+
@done_callback = nil
|
|
32
|
+
@finished = condvar
|
|
33
|
+
|
|
34
|
+
@in_progress = {}
|
|
35
|
+
@threads = {}
|
|
36
|
+
@done = false
|
|
37
|
+
@busy = []
|
|
38
|
+
@ready = @count.times.map do
|
|
39
|
+
p = Processor.new_link(current_actor)
|
|
40
|
+
p.proxy_id = p.object_id
|
|
41
|
+
p
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def stop(options={})
|
|
46
|
+
watchdog('Manager#stop died') do
|
|
47
|
+
should_shutdown = options[:shutdown]
|
|
48
|
+
timeout = options[:timeout]
|
|
49
|
+
|
|
50
|
+
@done = true
|
|
51
|
+
|
|
52
|
+
logger.info { "Terminating #{@ready.size} quiet workers" }
|
|
53
|
+
@ready.each { |x| x.terminate if x.alive? }
|
|
54
|
+
@ready.clear
|
|
55
|
+
|
|
56
|
+
return if clean_up_for_graceful_shutdown
|
|
57
|
+
|
|
58
|
+
hard_shutdown_in timeout if should_shutdown
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def clean_up_for_graceful_shutdown
|
|
63
|
+
if @busy.empty?
|
|
64
|
+
shutdown
|
|
65
|
+
return true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
|
|
69
|
+
false
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def start
|
|
73
|
+
@ready.each { dispatch }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def when_done(&blk)
|
|
77
|
+
@done_callback = blk
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def processor_done(processor)
|
|
81
|
+
watchdog('Manager#processor_done died') do
|
|
82
|
+
@done_callback.call(processor) if @done_callback
|
|
83
|
+
@in_progress.delete(processor.object_id)
|
|
84
|
+
@threads.delete(processor.object_id)
|
|
85
|
+
@busy.delete(processor)
|
|
86
|
+
if stopped?
|
|
87
|
+
processor.terminate if processor.alive?
|
|
88
|
+
shutdown if @busy.empty?
|
|
89
|
+
else
|
|
90
|
+
@ready << processor if processor.alive?
|
|
91
|
+
end
|
|
92
|
+
dispatch
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def processor_died(processor, reason)
|
|
97
|
+
watchdog("Manager#processor_died died") do
|
|
98
|
+
@in_progress.delete(processor.object_id)
|
|
99
|
+
@threads.delete(processor.object_id)
|
|
100
|
+
@busy.delete(processor)
|
|
101
|
+
|
|
102
|
+
unless stopped?
|
|
103
|
+
p = Processor.new_link(current_actor)
|
|
104
|
+
p.proxy_id = p.object_id
|
|
105
|
+
@ready << p
|
|
106
|
+
dispatch
|
|
107
|
+
else
|
|
108
|
+
shutdown if @busy.empty?
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def assign(work)
|
|
114
|
+
watchdog("Manager#assign died") do
|
|
115
|
+
if stopped?
|
|
116
|
+
# Race condition between Manager#stop if Fetcher
|
|
117
|
+
# is blocked on redis and gets a message after
|
|
118
|
+
# all the ready Processors have been stopped.
|
|
119
|
+
# Push the message back to redis.
|
|
120
|
+
work.requeue
|
|
121
|
+
else
|
|
122
|
+
processor = @ready.pop
|
|
123
|
+
@in_progress[processor.object_id] = work
|
|
124
|
+
@busy << processor
|
|
125
|
+
processor.async.process(work)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# A hack worthy of Rube Goldberg. We need to be able
|
|
131
|
+
# to hard stop a working thread. But there's no way for us to
|
|
132
|
+
# get handle to the underlying thread performing work for a processor
|
|
133
|
+
# so we have it call us and tell us.
|
|
134
|
+
def real_thread(proxy_id, thr)
|
|
135
|
+
@threads[proxy_id] = thr
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
PROCTITLES = [
|
|
139
|
+
proc { 'roundhouse'.freeze },
|
|
140
|
+
proc { Roundhouse::VERSION },
|
|
141
|
+
proc { |mgr, data| data['tag'] },
|
|
142
|
+
proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
|
|
143
|
+
proc { |mgr, data| "stopping" if mgr.stopped? },
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
def heartbeat(key, data, json)
|
|
147
|
+
results = PROCTITLES.map {|x| x.(self, data) }
|
|
148
|
+
results.compact!
|
|
149
|
+
$0 = results.join(' ')
|
|
150
|
+
|
|
151
|
+
❤(key, json)
|
|
152
|
+
after(5) do
|
|
153
|
+
heartbeat(key, data, json)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def stopped?
|
|
158
|
+
@done
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
private
|
|
162
|
+
|
|
163
|
+
def ❤(key, json)
|
|
164
|
+
begin
|
|
165
|
+
_, _, _, msg = Roundhouse.redis do |conn|
|
|
166
|
+
conn.multi do
|
|
167
|
+
conn.sadd('processes', key)
|
|
168
|
+
conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
|
|
169
|
+
conn.expire(key, 60)
|
|
170
|
+
conn.rpop("#{key}-signals")
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
return unless msg
|
|
175
|
+
|
|
176
|
+
if JVM_RESERVED_SIGNALS.include?(msg)
|
|
177
|
+
Roundhouse::CLI.instance.handle_signal(msg)
|
|
178
|
+
else
|
|
179
|
+
::Process.kill(msg, $$)
|
|
180
|
+
end
|
|
181
|
+
rescue => e
|
|
182
|
+
# ignore all redis/network issues
|
|
183
|
+
logger.error("heartbeat: #{e.message}")
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def hard_shutdown_in(delay)
|
|
188
|
+
logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
|
|
189
|
+
|
|
190
|
+
after(delay) do
|
|
191
|
+
watchdog("Manager#hard_shutdown_in died") do
|
|
192
|
+
# We've reached the timeout and we still have busy workers.
|
|
193
|
+
# They must die but their messages shall live on.
|
|
194
|
+
logger.warn { "Terminating #{@busy.size} busy worker threads" }
|
|
195
|
+
logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
|
|
196
|
+
|
|
197
|
+
requeue
|
|
198
|
+
|
|
199
|
+
@busy.each do |processor|
|
|
200
|
+
if processor.alive? && t = @threads.delete(processor.object_id)
|
|
201
|
+
t.raise Shutdown
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
@finished.signal
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def dispatch
|
|
211
|
+
return if stopped?
|
|
212
|
+
# This is a safety check to ensure we haven't leaked
|
|
213
|
+
# processors somehow.
|
|
214
|
+
raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
|
|
215
|
+
raise "No ready processor!?" if @ready.empty?
|
|
216
|
+
|
|
217
|
+
@fetcher.async.fetch
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def shutdown
|
|
221
|
+
requeue
|
|
222
|
+
@finished.signal
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def requeue
|
|
226
|
+
# Re-enqueue terminated jobs
|
|
227
|
+
# NOTE: You may notice that we may push a job back to redis before
|
|
228
|
+
# the worker thread is terminated. This is ok because Roundhouse's
|
|
229
|
+
# contract says that jobs are run AT LEAST once. Process termination
|
|
230
|
+
# is delayed until we're certain the jobs are back in Redis because
|
|
231
|
+
# it is worse to lose a job than to run it twice.
|
|
232
|
+
Roundhouse::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
|
|
233
|
+
@in_progress.clear
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|