patriot-workflow-scheduler 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/bin/patriot +8 -0
- data/bin/patriot-init +35 -0
- data/lib/patriot.rb +11 -0
- data/lib/patriot/command.rb +71 -0
- data/lib/patriot/command/base.rb +199 -0
- data/lib/patriot/command/command_group.rb +43 -0
- data/lib/patriot/command/command_macro.rb +141 -0
- data/lib/patriot/command/composite.rb +49 -0
- data/lib/patriot/command/parser.rb +78 -0
- data/lib/patriot/command/sh_command.rb +42 -0
- data/lib/patriot/controller.rb +2 -0
- data/lib/patriot/controller/package_controller.rb +81 -0
- data/lib/patriot/controller/worker_admin_controller.rb +159 -0
- data/lib/patriot/job_store.rb +66 -0
- data/lib/patriot/job_store/base.rb +159 -0
- data/lib/patriot/job_store/factory.rb +19 -0
- data/lib/patriot/job_store/in_memory_store.rb +252 -0
- data/lib/patriot/job_store/job.rb +118 -0
- data/lib/patriot/job_store/job_ticket.rb +30 -0
- data/lib/patriot/job_store/rdb_job_store.rb +353 -0
- data/lib/patriot/tool.rb +2 -0
- data/lib/patriot/tool/batch_parser.rb +102 -0
- data/lib/patriot/tool/patriot_command.rb +48 -0
- data/lib/patriot/tool/patriot_commands/execute.rb +92 -0
- data/lib/patriot/tool/patriot_commands/job.rb +62 -0
- data/lib/patriot/tool/patriot_commands/plugin.rb +41 -0
- data/lib/patriot/tool/patriot_commands/register.rb +77 -0
- data/lib/patriot/tool/patriot_commands/upgrade.rb +24 -0
- data/lib/patriot/tool/patriot_commands/validate.rb +84 -0
- data/lib/patriot/tool/patriot_commands/worker.rb +35 -0
- data/lib/patriot/tool/patriot_commands/worker_admin.rb +60 -0
- data/lib/patriot/util.rb +14 -0
- data/lib/patriot/util/config.rb +58 -0
- data/lib/patriot/util/config/base.rb +22 -0
- data/lib/patriot/util/config/inifile_config.rb +63 -0
- data/lib/patriot/util/cron_format_parser.rb +104 -0
- data/lib/patriot/util/date_util.rb +200 -0
- data/lib/patriot/util/db_client.rb +65 -0
- data/lib/patriot/util/db_client/base.rb +142 -0
- data/lib/patriot/util/db_client/hash_record.rb +53 -0
- data/lib/patriot/util/db_client/record.rb +25 -0
- data/lib/patriot/util/logger.rb +24 -0
- data/lib/patriot/util/logger/facade.rb +33 -0
- data/lib/patriot/util/logger/factory.rb +59 -0
- data/lib/patriot/util/logger/log4r_factory.rb +111 -0
- data/lib/patriot/util/logger/webrick_log_factory.rb +47 -0
- data/lib/patriot/util/param.rb +73 -0
- data/lib/patriot/util/retry.rb +30 -0
- data/lib/patriot/util/script.rb +52 -0
- data/lib/patriot/util/system.rb +120 -0
- data/lib/patriot/worker.rb +35 -0
- data/lib/patriot/worker/base.rb +153 -0
- data/lib/patriot/worker/info_server.rb +90 -0
- data/lib/patriot/worker/job_store_server.rb +32 -0
- data/lib/patriot/worker/multi_node_worker.rb +157 -0
- data/lib/patriot/worker/servlet.rb +23 -0
- data/lib/patriot/worker/servlet/job_servlet.rb +128 -0
- data/lib/patriot/worker/servlet/worker_status_servlet.rb +44 -0
- data/skel/batch/sample/daily/test.pbc +4 -0
- data/skel/config/patriot.ini +21 -0
- data/skel/public/css/bootstrap.css +2495 -0
- data/skel/public/css/original.css +54 -0
- data/skel/public/js/bootstrap-alerts.js +124 -0
- data/skel/public/js/bootstrap-buttons.js +64 -0
- data/skel/public/js/bootstrap-dropdown.js +55 -0
- data/skel/public/js/bootstrap-modal.js +260 -0
- data/skel/public/js/bootstrap-popover.js +90 -0
- data/skel/public/js/bootstrap-scrollspy.js +107 -0
- data/skel/public/js/bootstrap-tabs.js +80 -0
- data/skel/public/js/bootstrap-twipsy.js +321 -0
- data/skel/public/js/jquery-1.6.4.min.js +4 -0
- data/skel/public/templates/_jobs.erb +97 -0
- data/skel/public/templates/job.erb +119 -0
- data/skel/public/templates/jobs.erb +21 -0
- data/skel/public/templates/jobs_deleted.erb +6 -0
- data/skel/public/templates/layout.erb +103 -0
- data/skel/public/templates/state_updated.erb +6 -0
- metadata +235 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'webrick'
|
3
|
+
require 'singleton'
|
4
|
+
require 'patriot/util/logger/factory'
|
5
|
+
|
6
|
+
module Patriot
|
7
|
+
module Util
|
8
|
+
module Logger
|
9
|
+
# a logger factory implementation based on Webrick Logger
|
10
|
+
class WebrickLogFactory < Patriot::Util::Logger::Factory
|
11
|
+
# configuration key for log file
|
12
|
+
LOG_FILE_KEY = :log_file
|
13
|
+
# configuration key for log level
|
14
|
+
LOG_LEVEL_KEY = :log_level
|
15
|
+
|
16
|
+
include Singleton
|
17
|
+
|
18
|
+
# @see Patriot::Util::Logger::Factory
|
19
|
+
def build(name, config)
|
20
|
+
log_file = get_log_file(config)
|
21
|
+
log_level = get_log_level(config)
|
22
|
+
logger = WEBrick::BasicLog.new(log_file, log_level)
|
23
|
+
return logger
|
24
|
+
end
|
25
|
+
private :build
|
26
|
+
|
27
|
+
# @param config [Patriot::Util::Config::Base]
|
28
|
+
# @return [String] path to the log file
|
29
|
+
def get_log_file(config)
|
30
|
+
log_file = config.get(LOG_FILE_KEY)
|
31
|
+
return log_file
|
32
|
+
end
|
33
|
+
private :get_log_file
|
34
|
+
|
35
|
+
# get log level from configuration
|
36
|
+
# @param config [Patriot::Util::Config::Base]
|
37
|
+
def get_log_level(config)
|
38
|
+
log_level = config.get(LOG_LEVEL_KEY)
|
39
|
+
const = "WEBrick::BasicLog::#{log_level}".to_sym
|
40
|
+
return const
|
41
|
+
end
|
42
|
+
private :get_log_level
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Patriot
|
2
|
+
module Util
|
3
|
+
# namespace for parameter handling functions
|
4
|
+
module Param
|
5
|
+
|
6
|
+
# replace parameter values in command attribute valeus
|
7
|
+
# @param attr_val an attribute value to be evaluated
|
8
|
+
def eval_attr(attr_val)
|
9
|
+
if attr_val.is_a?(Hash)
|
10
|
+
entries = {}
|
11
|
+
attr_val.each{|k,v| entries[eval_attr(k)] = eval_attr(v) }
|
12
|
+
return entries
|
13
|
+
elsif attr_val.is_a?(Array)
|
14
|
+
return attr_val.map{|e| eval_attr(e)}
|
15
|
+
elsif attr_val.is_a?(String)
|
16
|
+
return eval_string_attr(attr_val)
|
17
|
+
else
|
18
|
+
# only evaluate attributes in String
|
19
|
+
return attr_val
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# evaluate variables in a string expression
|
24
|
+
# @param str [String] a string expression to be evaluated
|
25
|
+
# @param vars [Hash] variables used in the evaluation
|
26
|
+
# @return [String] a evaluated string expression
|
27
|
+
def eval_string_attr(str, vars = {})
|
28
|
+
s = StringScanner.new(str)
|
29
|
+
s.scan(/(.*?)\#\{/m)
|
30
|
+
# retrun immediatelly if variables are not contained
|
31
|
+
return str unless s.matched?
|
32
|
+
prefix = s[1]
|
33
|
+
nest = 1 # depth of parenthesis
|
34
|
+
var = "" # variable expression
|
35
|
+
prev_rest = s.rest
|
36
|
+
while nest > 0
|
37
|
+
tmp = s.scan(/(.*?)[\{\}]/m) # for hash objects, etc
|
38
|
+
if s.matched?
|
39
|
+
if /.*?\{/ =~ tmp
|
40
|
+
nest = nest + 1
|
41
|
+
var << s[0]
|
42
|
+
else
|
43
|
+
nest = nest - 1
|
44
|
+
if nest > 0
|
45
|
+
var << s[0]
|
46
|
+
else
|
47
|
+
# does not include the last parenthesis indicates end of the variable
|
48
|
+
var << s[1]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
raise "infinte loop #{str} : rest #{s.rest} : #{nest}" if prev_rest == s.rest
|
53
|
+
prev_rest = s.rest
|
54
|
+
end
|
55
|
+
# evaluate the variable
|
56
|
+
var_binding = build_var_binding(vars)
|
57
|
+
var_binding = binding if var_binding.nil?
|
58
|
+
evaled_var = eval var, var_binding
|
59
|
+
# farther variables are handled by next invocation
|
60
|
+
return "#{prefix}#{evaled_var}#{eval_string_attr(prev_rest, vars)}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def build_var_binding(vars)
|
64
|
+
return nil if vars.empty?
|
65
|
+
raise "illegal key var exist in #{vars.inspect}" if vars.has_key?('vars')
|
66
|
+
assign_exps = [vars.map{|k,v| "#{k} = vars['#{k}']"}] | ["binding"]
|
67
|
+
return eval assign_exps.join(";")
|
68
|
+
end
|
69
|
+
private :build_var_binding
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
module Patriot
|
3
|
+
module Util
|
4
|
+
# a mudule enables operations to be retried
|
5
|
+
module Retry
|
6
|
+
# execute block and retry the block
|
7
|
+
# @param retry_config :num_retry the max number of retry, :wait_time retry interval
|
8
|
+
# @raise an exception thrown from the block
|
9
|
+
# @return return value of the block
|
10
|
+
def execute_with_retry(retry_config = {}, &blk)
|
11
|
+
retry_config = {:num_retry => 3, :wait_time => 3}.merge(retry_config)
|
12
|
+
e = nil
|
13
|
+
1.upto(retry_config[:num_retry]) do |i|
|
14
|
+
begin
|
15
|
+
return yield
|
16
|
+
rescue Exception => e
|
17
|
+
if @logger
|
18
|
+
@logger.error "fail to execute (#{i}) #{blk.to_s}"
|
19
|
+
@logger.error e
|
20
|
+
$@.each{|m| @logger.error m}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
sleep retry_config[:wait_time]
|
24
|
+
end
|
25
|
+
raise e unless e.nil?
|
26
|
+
end
|
27
|
+
module_function :execute_with_retry
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Patriot
|
2
|
+
module Util
|
3
|
+
# a module to find target files
|
4
|
+
module Script
|
5
|
+
include Patriot::Util::DateUtil
|
6
|
+
|
7
|
+
# get target batch files from a given path
|
8
|
+
# @param path [String] path to target directory
|
9
|
+
# @param date [String] target date in '%Y-%m-%d'
|
10
|
+
# @param opt [Hash]
|
11
|
+
# @option opt :all [Boolean] force target all files
|
12
|
+
# @return [Array<String>] a list of target files
|
13
|
+
def get_batch_files(path, date, opt = {})
|
14
|
+
return [path] if File.file?(path) && File.extname(path) == ".pbc"
|
15
|
+
files = []
|
16
|
+
opt = target_option(date, opt)
|
17
|
+
files = Dir.glob("#{path}/**/*.pbc").find_all do |file|
|
18
|
+
target_file?(file, opt)
|
19
|
+
end
|
20
|
+
return files
|
21
|
+
end
|
22
|
+
|
23
|
+
def target_option(date, opt = {})
|
24
|
+
opt = {:all => false}.merge(opt)
|
25
|
+
unless opt[:all]
|
26
|
+
d = date.split('-')
|
27
|
+
opt[:day] = true unless opt.has_key?(:day)
|
28
|
+
unless opt.has_key?(:month)
|
29
|
+
opt[:month] = date_add(date,1) =~ /[\d]{4}-[\d]{2}-01/ ? true : false
|
30
|
+
end
|
31
|
+
unless opt.has_key?(:week)
|
32
|
+
opt[:week] = Date.new(d[0].to_i, d[1].to_i, d[2].to_i).wday
|
33
|
+
end
|
34
|
+
end
|
35
|
+
return opt
|
36
|
+
end
|
37
|
+
private :target_option
|
38
|
+
|
39
|
+
def target_file?(file, options)
|
40
|
+
case
|
41
|
+
when options[:all] then true
|
42
|
+
when file =~ /\/daily\// && options[:day] then true
|
43
|
+
when file =~ /\/monthly\// && options[:month] then true
|
44
|
+
when file =~ /\/weekly\/#{options[:week]}\// then true
|
45
|
+
else false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
private :target_file?
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'time'
|
3
|
+
module Patriot
|
4
|
+
module Util
|
5
|
+
# module for interaction with OS
|
6
|
+
module System
|
7
|
+
# exception in case of OS command failed
|
8
|
+
class ExternalCommandException < Exception; end
|
9
|
+
|
10
|
+
# suffix of file where stdout is written
|
11
|
+
STDOUT_SUFFIX=".stdout"
|
12
|
+
# suffix of file where stderr is written
|
13
|
+
STDERR_SUFFIX=".stderr"
|
14
|
+
|
15
|
+
# configuration key for tmp directory where stdout/stderr are written
|
16
|
+
PATRIOT_TMP_DIR_KEY = "patriot.tmp.dir"
|
17
|
+
# defatul path to the tmp directory
|
18
|
+
DEFAULT_PATRIOT_TMP_DIR = "/tmp/patriot-workflow-scheduler"
|
19
|
+
# max size of error message included in exceptions
|
20
|
+
MAX_ERROR_MSG_SIZE_KEY = "patriot.max.error.size"
|
21
|
+
# defaut max size of error message
|
22
|
+
DEFAULT_MAX_ERROR_MSG_SIZE = 256
|
23
|
+
|
24
|
+
# @private
|
25
|
+
# get path to tmp directory (visible for test)
|
26
|
+
def tmp_dir(pid, dt, ts, tmp_dir = DEFAULT_PATRIOT_TMP_DIR)
|
27
|
+
prefix = "p#{pid.to_s}"
|
28
|
+
prefix = "j#{Thread.current[Patriot::Worker::JOB_ID_IN_EXECUTION]}" if Thread.current[Patriot::Worker::JOB_ID_IN_EXECUTION]
|
29
|
+
ts_exp = Time.at(ts).strftime("%Y%m%d_%H%M%S")
|
30
|
+
return File.join(tmp_dir, dt, "#{prefix}_#{ts_exp}")
|
31
|
+
end
|
32
|
+
|
33
|
+
# @private
|
34
|
+
# fork and execute the command (visible for test)
|
35
|
+
def do_fork(cmd, dt, ts, tmp_dir = DEFAULT_PATRIOT_TMP_DIR)
|
36
|
+
cid = fork do
|
37
|
+
tmpdir = tmp_dir($$, dt, ts, tmp_dir)
|
38
|
+
FileUtils.mkdir_p(tmpdir, {:mode => 0777})
|
39
|
+
std_out = File.join(tmpdir, "#{$$.to_i}#{STDOUT_SUFFIX}")
|
40
|
+
std_err = File.join(tmpdir, "#{$$.to_i}#{STDERR_SUFFIX}")
|
41
|
+
STDOUT.reopen(std_out,"w")
|
42
|
+
STDERR.reopen(std_err,"w")
|
43
|
+
exec(cmd)
|
44
|
+
end
|
45
|
+
return cid
|
46
|
+
end
|
47
|
+
|
48
|
+
# execute command on OS
|
49
|
+
# @param command [String] command to be executed
|
50
|
+
# @yield block for error handling
|
51
|
+
def execute_command(command, &blk)
|
52
|
+
so, se = nil
|
53
|
+
|
54
|
+
time_obj = Time.now
|
55
|
+
ts = time_obj.to_i
|
56
|
+
dt = time_obj.strftime("%Y-%m-%d")
|
57
|
+
|
58
|
+
tmp_dir_base = @config.get(PATRIOT_TMP_DIR_KEY, DEFAULT_PATRIOT_TMP_DIR)
|
59
|
+
|
60
|
+
# the forked variable is used for checking whether fork invocation hangs.
|
61
|
+
# (due to https://redmine.ruby-lang.org/issues/5240 ?)
|
62
|
+
forked = false
|
63
|
+
until forked
|
64
|
+
cid = do_fork(command, dt, ts, tmp_dir_base)
|
65
|
+
tmpdir = tmp_dir(cid, dt, ts, tmp_dir_base)
|
66
|
+
i = 0
|
67
|
+
# If fork hangs, output directory would not be created.
|
68
|
+
# wait at most 5 seconds for the directory created.
|
69
|
+
until forked || i > 5
|
70
|
+
sleep(1)
|
71
|
+
forked = File.exist?(tmpdir)
|
72
|
+
i = i+1
|
73
|
+
end
|
74
|
+
# fork hanged, kill the hanged process.
|
75
|
+
unless forked
|
76
|
+
# check whether cid is id of child process to avoid to kill unrelated processes
|
77
|
+
begin
|
78
|
+
if Process.waitpid(cid, Process::WNOHANG).nil?
|
79
|
+
@logger.warn("forked process :#{cid} hanged. kill #{cid}")
|
80
|
+
Process.kill("KILL", cid)
|
81
|
+
@logger.warn("SIGKILL sent to #{cid}")
|
82
|
+
Process.waitpid(cid)
|
83
|
+
@logger.warn("#{cid} is killed")
|
84
|
+
else
|
85
|
+
raise ExternalCommandException, "#{cid} is not a child of this"
|
86
|
+
end
|
87
|
+
rescue Exception => e
|
88
|
+
@logger.warn "failed to kill hanged process #{cid}"
|
89
|
+
raise e
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
@logger.info "executing #{command}: results stored in #{tmpdir}"
|
95
|
+
pid, status = Process.waitpid2(cid)
|
96
|
+
so = File.join(tmpdir, "#{cid.to_i}#{STDOUT_SUFFIX}")
|
97
|
+
se = File.join(tmpdir, "#{cid.to_i}#{STDERR_SUFFIX}")
|
98
|
+
|
99
|
+
@logger.info "#{command} is finished"
|
100
|
+
return so if status.exitstatus == 0
|
101
|
+
@logger.warn "#{command} end with exit status #{status.exitstatus}"
|
102
|
+
if block_given?
|
103
|
+
yield(status, so, se)
|
104
|
+
else
|
105
|
+
max_err_size = @config.get(MAX_ERROR_MSG_SIZE_KEY, DEFAULT_MAX_ERROR_MSG_SIZE)
|
106
|
+
err_size = File.stat(se).size
|
107
|
+
err_msg = "#{command}\n#{se} :"
|
108
|
+
if err_size < max_err_size
|
109
|
+
File.open(se){|f| err_msg = "#{err_msg}\n#{f.read}"}
|
110
|
+
else
|
111
|
+
err_msg = "#{err_msg} \n the size of stderr is #{err_size} (> #{max_err_size}"
|
112
|
+
end
|
113
|
+
raise ExternalCommandException, err_msg
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Patriot
|
2
|
+
# namespace for workers
|
3
|
+
module Worker
|
4
|
+
# states of worker
|
5
|
+
module Status
|
6
|
+
# worker is active
|
7
|
+
ACTIVE = 'ACTIVE'
|
8
|
+
# worker sleeps (don't execute jobs)
|
9
|
+
SLEEP = 'SLEEP'
|
10
|
+
# worker is going to down
|
11
|
+
SHUTDOWN = 'SHUTDOWN'
|
12
|
+
end
|
13
|
+
|
14
|
+
# name of thread local variable for job_id executed by the thread
|
15
|
+
JOB_ID_IN_EXECUTION = :JOB_ID_IN_EXECUTION
|
16
|
+
|
17
|
+
# SIGNAL used for graceful shutdown
|
18
|
+
SIGNAL_FOR_GRACEFUL_SHUTDOWN = ['INT', 'TERM']
|
19
|
+
# SIGNAL used for getting thread dump
|
20
|
+
SIGNAL_FOR_THREAD_DUMP = ['QUIT']
|
21
|
+
|
22
|
+
# default fetch cycle for getting job tickets in minutes
|
23
|
+
DEFAULT_FETCH_CYCLE = 300
|
24
|
+
# default limit of the number of jobs fetched at once
|
25
|
+
DEFAULT_FETCH_LIMIT = 200
|
26
|
+
# default worker name (for logging)
|
27
|
+
DEFAULT_WORKER_NAME = 'default'
|
28
|
+
|
29
|
+
require 'patriot/worker/servlet'
|
30
|
+
require 'patriot/worker/info_server'
|
31
|
+
require 'patriot/worker/base'
|
32
|
+
autoload :MultiNodeWorker, 'patriot/worker/multi_node_worker'
|
33
|
+
autoload :JobStoreServer, 'patriot/worker/job_store_server'
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'patriot/command'
|
3
|
+
|
4
|
+
module Patriot
|
5
|
+
module Worker
|
6
|
+
|
7
|
+
# @abstract
|
8
|
+
# base class for worker implementations
|
9
|
+
class Base
|
10
|
+
|
11
|
+
include Patriot::Util::Logger
|
12
|
+
include Patriot::Util::Retry
|
13
|
+
include Patriot::JobStore::Factory
|
14
|
+
|
15
|
+
attr_accessor :host, :status, :cycle, :job_store
|
16
|
+
|
17
|
+
# @param config [Patriot::Util::Config::Base]
|
18
|
+
def initialize(config)
|
19
|
+
raise "configuration is nil" if config.nil?
|
20
|
+
@logger = create_logger(config)
|
21
|
+
@config = config
|
22
|
+
@job_store = create_jobstore(Patriot::JobStore::ROOT_STORE_ID, @config)
|
23
|
+
@host = `hostname`.chomp
|
24
|
+
@cycle = config.get('fetch_cycle', Patriot::Worker::DEFAULT_FETCH_CYCLE).to_i
|
25
|
+
@fetch_limit = config.get('fetch_limit', Patriot::Worker::DEFAULT_FETCH_LIMIT).to_i
|
26
|
+
@worker_name = config.get('worker_name', Patriot::Worker::DEFAULT_WORKER_NAME)
|
27
|
+
@pid_file = File.join($home, 'run', "patriot-worker_#{@worker_name}.pid")
|
28
|
+
@info_server = Patriot::Worker::InfoServer.new(self,@config)
|
29
|
+
end
|
30
|
+
|
31
|
+
# execute a job
|
32
|
+
# @param [Patriot::JobStore::JobTicket] job_ticket a ticket of job to be executed
|
33
|
+
# @return [Patriot::Command::ExitCode]
|
34
|
+
def execute_job(job_ticket)
|
35
|
+
job_ticket.exec_host = @host
|
36
|
+
job_ticket.exec_node = Thread.current[:name]
|
37
|
+
begin
|
38
|
+
response = @job_store.offer_to_execute(job_ticket)
|
39
|
+
rescue Exception => e
|
40
|
+
@logger.error e
|
41
|
+
return Patriot::Command::ExitCode::FAILED
|
42
|
+
end
|
43
|
+
|
44
|
+
# already executed by other node
|
45
|
+
return Patriot::Command::ExitCode::SKIPPED if response.nil?
|
46
|
+
|
47
|
+
@logger.info " executing job: #{job_ticket.job_id}"
|
48
|
+
command = response[:command]
|
49
|
+
job_ticket.execution_id = response[:execution_id]
|
50
|
+
job_ticket.exit_code = command.skip_on_fail? ? Patriot::Command::ExitCode::FAILURE_SKIPPED : Patriot::Command::ExitCode::FAILED
|
51
|
+
begin
|
52
|
+
command.execute
|
53
|
+
job_ticket.exit_code = Patriot::Command::ExitCode::SUCCEEDED
|
54
|
+
rescue Exception => e
|
55
|
+
@logger.warn " job : #{job_ticket.job_id} failed"
|
56
|
+
job_ticket.description = e.to_s
|
57
|
+
else
|
58
|
+
job_ticket.description = command.description
|
59
|
+
ensure
|
60
|
+
begin
|
61
|
+
execute_with_retry{ @job_store.report_completion_status(job_ticket) }
|
62
|
+
rescue Exception => job_store_error
|
63
|
+
@logger.error job_store_error
|
64
|
+
end
|
65
|
+
end
|
66
|
+
return job_ticket.exit_code
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Integer] pid if the worker is running, otherwise nil
|
70
|
+
def get_pid
|
71
|
+
return nil unless File.exists?(@pid_file)
|
72
|
+
pid = nil
|
73
|
+
File.open(@pid_file,'r'){|f| pid = f.read.strip.to_i }
|
74
|
+
begin
|
75
|
+
Process.getpgid(pid)
|
76
|
+
rescue Errno::ESRCH
|
77
|
+
@logger.warn("process #{pid} not exist but pid file remains")
|
78
|
+
return nil
|
79
|
+
end
|
80
|
+
return pid
|
81
|
+
end
|
82
|
+
|
83
|
+
# send a request graceful shutdown to a running worker
|
84
|
+
# @return [Boolean] true worker is running and request is sent, otherwise false
|
85
|
+
def request_shutdown
|
86
|
+
pid = get_pid
|
87
|
+
if pid.nil?
|
88
|
+
@logger.info("worker #{@worker_name} does not exist")
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
Process.kill(SIGNAL_FOR_GRACEFUL_SHUTDOWN[0], pid.to_i)
|
92
|
+
return true
|
93
|
+
end
|
94
|
+
|
95
|
+
# main entry point of worker processing
|
96
|
+
def start_worker
|
97
|
+
return unless get_pid.nil?
|
98
|
+
|
99
|
+
@logger.info "starting worker #{@node}@#{@host}"
|
100
|
+
File.open(@pid_file, 'w') {|f| f.write($$)} # save pid for shutdown
|
101
|
+
set_traps
|
102
|
+
@info_server.start_server
|
103
|
+
@logger.info "initiating worker #{@node}@#{@host}"
|
104
|
+
init_worker
|
105
|
+
@status = Patriot::Worker::Status::ACTIVE
|
106
|
+
@logger.info "start worker #{@node}@#{@host}"
|
107
|
+
run_worker
|
108
|
+
@logger.info "shutting down worker #{@node}@#{@host}"
|
109
|
+
stop_worker
|
110
|
+
# should be last since worker_admin judge availability from the info_server
|
111
|
+
@info_server.shutdown_server
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
# should be overrided in sub class
|
116
|
+
# This method is for implementation-specific configuration
|
117
|
+
def init_worker
|
118
|
+
raise NotImplementedError
|
119
|
+
end
|
120
|
+
|
121
|
+
# should be overrided in sub class
|
122
|
+
# Main loop in which the worker fetches and executes jobs should be implemented here
|
123
|
+
def run_worker
|
124
|
+
raise NotImplementedError
|
125
|
+
end
|
126
|
+
|
127
|
+
# should be overrided in sub class
|
128
|
+
# Tasks for tearing down the worker should be implemented here
|
129
|
+
def stop_worker
|
130
|
+
raise NotImplementedError
|
131
|
+
end
|
132
|
+
|
133
|
+
def set_traps
|
134
|
+
Patriot::Worker::SIGNAL_FOR_GRACEFUL_SHUTDOWN.each do |s|
|
135
|
+
Signal.trap(s) do
|
136
|
+
@logger.info "SIG#{s}: worker will terminate"
|
137
|
+
@status = Patriot::Worker::Status::SHUTDOWN
|
138
|
+
end
|
139
|
+
end
|
140
|
+
Patriot::Worker::SIGNAL_FOR_THREAD_DUMP.each do |s|
|
141
|
+
Signal.trap(s) do
|
142
|
+
# TODO output to separated stream
|
143
|
+
Thread.list.each do |t|
|
144
|
+
@logger.info("Thread #{t[:name]}\n#{t.backtrace.map{|bt| "\t#{bt}"}.join("\n")}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
private :set_traps
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|