patriot-workflow-scheduler 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/bin/patriot +8 -0
- data/bin/patriot-init +35 -0
- data/lib/patriot.rb +11 -0
- data/lib/patriot/command.rb +71 -0
- data/lib/patriot/command/base.rb +199 -0
- data/lib/patriot/command/command_group.rb +43 -0
- data/lib/patriot/command/command_macro.rb +141 -0
- data/lib/patriot/command/composite.rb +49 -0
- data/lib/patriot/command/parser.rb +78 -0
- data/lib/patriot/command/sh_command.rb +42 -0
- data/lib/patriot/controller.rb +2 -0
- data/lib/patriot/controller/package_controller.rb +81 -0
- data/lib/patriot/controller/worker_admin_controller.rb +159 -0
- data/lib/patriot/job_store.rb +66 -0
- data/lib/patriot/job_store/base.rb +159 -0
- data/lib/patriot/job_store/factory.rb +19 -0
- data/lib/patriot/job_store/in_memory_store.rb +252 -0
- data/lib/patriot/job_store/job.rb +118 -0
- data/lib/patriot/job_store/job_ticket.rb +30 -0
- data/lib/patriot/job_store/rdb_job_store.rb +353 -0
- data/lib/patriot/tool.rb +2 -0
- data/lib/patriot/tool/batch_parser.rb +102 -0
- data/lib/patriot/tool/patriot_command.rb +48 -0
- data/lib/patriot/tool/patriot_commands/execute.rb +92 -0
- data/lib/patriot/tool/patriot_commands/job.rb +62 -0
- data/lib/patriot/tool/patriot_commands/plugin.rb +41 -0
- data/lib/patriot/tool/patriot_commands/register.rb +77 -0
- data/lib/patriot/tool/patriot_commands/upgrade.rb +24 -0
- data/lib/patriot/tool/patriot_commands/validate.rb +84 -0
- data/lib/patriot/tool/patriot_commands/worker.rb +35 -0
- data/lib/patriot/tool/patriot_commands/worker_admin.rb +60 -0
- data/lib/patriot/util.rb +14 -0
- data/lib/patriot/util/config.rb +58 -0
- data/lib/patriot/util/config/base.rb +22 -0
- data/lib/patriot/util/config/inifile_config.rb +63 -0
- data/lib/patriot/util/cron_format_parser.rb +104 -0
- data/lib/patriot/util/date_util.rb +200 -0
- data/lib/patriot/util/db_client.rb +65 -0
- data/lib/patriot/util/db_client/base.rb +142 -0
- data/lib/patriot/util/db_client/hash_record.rb +53 -0
- data/lib/patriot/util/db_client/record.rb +25 -0
- data/lib/patriot/util/logger.rb +24 -0
- data/lib/patriot/util/logger/facade.rb +33 -0
- data/lib/patriot/util/logger/factory.rb +59 -0
- data/lib/patriot/util/logger/log4r_factory.rb +111 -0
- data/lib/patriot/util/logger/webrick_log_factory.rb +47 -0
- data/lib/patriot/util/param.rb +73 -0
- data/lib/patriot/util/retry.rb +30 -0
- data/lib/patriot/util/script.rb +52 -0
- data/lib/patriot/util/system.rb +120 -0
- data/lib/patriot/worker.rb +35 -0
- data/lib/patriot/worker/base.rb +153 -0
- data/lib/patriot/worker/info_server.rb +90 -0
- data/lib/patriot/worker/job_store_server.rb +32 -0
- data/lib/patriot/worker/multi_node_worker.rb +157 -0
- data/lib/patriot/worker/servlet.rb +23 -0
- data/lib/patriot/worker/servlet/job_servlet.rb +128 -0
- data/lib/patriot/worker/servlet/worker_status_servlet.rb +44 -0
- data/skel/batch/sample/daily/test.pbc +4 -0
- data/skel/config/patriot.ini +21 -0
- data/skel/public/css/bootstrap.css +2495 -0
- data/skel/public/css/original.css +54 -0
- data/skel/public/js/bootstrap-alerts.js +124 -0
- data/skel/public/js/bootstrap-buttons.js +64 -0
- data/skel/public/js/bootstrap-dropdown.js +55 -0
- data/skel/public/js/bootstrap-modal.js +260 -0
- data/skel/public/js/bootstrap-popover.js +90 -0
- data/skel/public/js/bootstrap-scrollspy.js +107 -0
- data/skel/public/js/bootstrap-tabs.js +80 -0
- data/skel/public/js/bootstrap-twipsy.js +321 -0
- data/skel/public/js/jquery-1.6.4.min.js +4 -0
- data/skel/public/templates/_jobs.erb +97 -0
- data/skel/public/templates/job.erb +119 -0
- data/skel/public/templates/jobs.erb +21 -0
- data/skel/public/templates/jobs_deleted.erb +6 -0
- data/skel/public/templates/layout.erb +103 -0
- data/skel/public/templates/state_updated.erb +6 -0
- metadata +235 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'webrick'
|
3
|
+
require 'singleton'
|
4
|
+
require 'patriot/util/logger/factory'
|
5
|
+
|
6
|
+
module Patriot
|
7
|
+
module Util
|
8
|
+
module Logger
|
9
|
+
# a logger factory implementation based on Webrick Logger
|
10
|
+
class WebrickLogFactory < Patriot::Util::Logger::Factory
|
11
|
+
# configuration key for log file
|
12
|
+
LOG_FILE_KEY = :log_file
|
13
|
+
# configuration key for log level
|
14
|
+
LOG_LEVEL_KEY = :log_level
|
15
|
+
|
16
|
+
include Singleton
|
17
|
+
|
18
|
+
# @see Patriot::Util::Logger::Factory
|
19
|
+
def build(name, config)
|
20
|
+
log_file = get_log_file(config)
|
21
|
+
log_level = get_log_level(config)
|
22
|
+
logger = WEBrick::BasicLog.new(log_file, log_level)
|
23
|
+
return logger
|
24
|
+
end
|
25
|
+
private :build
|
26
|
+
|
27
|
+
# @param config [Patriot::Util::Config::Base]
|
28
|
+
# @return [String] path to the log file
|
29
|
+
def get_log_file(config)
|
30
|
+
log_file = config.get(LOG_FILE_KEY)
|
31
|
+
return log_file
|
32
|
+
end
|
33
|
+
private :get_log_file
|
34
|
+
|
35
|
+
# get log level from configuration
|
36
|
+
# @param config [Patriot::Util::Config::Base]
|
37
|
+
def get_log_level(config)
|
38
|
+
log_level = config.get(LOG_LEVEL_KEY)
|
39
|
+
const = "WEBrick::BasicLog::#{log_level}".to_sym
|
40
|
+
return const
|
41
|
+
end
|
42
|
+
private :get_log_level
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Patriot
|
2
|
+
module Util
|
3
|
+
# namespace for parameter handling functions
|
4
|
+
module Param
|
5
|
+
|
6
|
+
# replace parameter values in command attribute valeus
|
7
|
+
# @param attr_val an attribute value to be evaluated
|
8
|
+
def eval_attr(attr_val)
|
9
|
+
if attr_val.is_a?(Hash)
|
10
|
+
entries = {}
|
11
|
+
attr_val.each{|k,v| entries[eval_attr(k)] = eval_attr(v) }
|
12
|
+
return entries
|
13
|
+
elsif attr_val.is_a?(Array)
|
14
|
+
return attr_val.map{|e| eval_attr(e)}
|
15
|
+
elsif attr_val.is_a?(String)
|
16
|
+
return eval_string_attr(attr_val)
|
17
|
+
else
|
18
|
+
# only evaluate attributes in String
|
19
|
+
return attr_val
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# evaluate variables in a string expression
|
24
|
+
# @param str [String] a string expression to be evaluated
|
25
|
+
# @param vars [Hash] variables used in the evaluation
|
26
|
+
# @return [String] a evaluated string expression
|
27
|
+
def eval_string_attr(str, vars = {})
|
28
|
+
s = StringScanner.new(str)
|
29
|
+
s.scan(/(.*?)\#\{/m)
|
30
|
+
# retrun immediatelly if variables are not contained
|
31
|
+
return str unless s.matched?
|
32
|
+
prefix = s[1]
|
33
|
+
nest = 1 # depth of parenthesis
|
34
|
+
var = "" # variable expression
|
35
|
+
prev_rest = s.rest
|
36
|
+
while nest > 0
|
37
|
+
tmp = s.scan(/(.*?)[\{\}]/m) # for hash objects, etc
|
38
|
+
if s.matched?
|
39
|
+
if /.*?\{/ =~ tmp
|
40
|
+
nest = nest + 1
|
41
|
+
var << s[0]
|
42
|
+
else
|
43
|
+
nest = nest - 1
|
44
|
+
if nest > 0
|
45
|
+
var << s[0]
|
46
|
+
else
|
47
|
+
# does not include the last parenthesis indicates end of the variable
|
48
|
+
var << s[1]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
raise "infinte loop #{str} : rest #{s.rest} : #{nest}" if prev_rest == s.rest
|
53
|
+
prev_rest = s.rest
|
54
|
+
end
|
55
|
+
# evaluate the variable
|
56
|
+
var_binding = build_var_binding(vars)
|
57
|
+
var_binding = binding if var_binding.nil?
|
58
|
+
evaled_var = eval var, var_binding
|
59
|
+
# farther variables are handled by next invocation
|
60
|
+
return "#{prefix}#{evaled_var}#{eval_string_attr(prev_rest, vars)}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def build_var_binding(vars)
|
64
|
+
return nil if vars.empty?
|
65
|
+
raise "illegal key var exist in #{vars.inspect}" if vars.has_key?('vars')
|
66
|
+
assign_exps = [vars.map{|k,v| "#{k} = vars['#{k}']"}] | ["binding"]
|
67
|
+
return eval assign_exps.join(";")
|
68
|
+
end
|
69
|
+
private :build_var_binding
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
module Patriot
|
3
|
+
module Util
|
4
|
+
# a mudule enables operations to be retried
|
5
|
+
module Retry
|
6
|
+
# execute block and retry the block
|
7
|
+
# @param retry_config :num_retry the max number of retry, :wait_time retry interval
|
8
|
+
# @raise an exception thrown from the block
|
9
|
+
# @return return value of the block
|
10
|
+
def execute_with_retry(retry_config = {}, &blk)
|
11
|
+
retry_config = {:num_retry => 3, :wait_time => 3}.merge(retry_config)
|
12
|
+
e = nil
|
13
|
+
1.upto(retry_config[:num_retry]) do |i|
|
14
|
+
begin
|
15
|
+
return yield
|
16
|
+
rescue Exception => e
|
17
|
+
if @logger
|
18
|
+
@logger.error "fail to execute (#{i}) #{blk.to_s}"
|
19
|
+
@logger.error e
|
20
|
+
$@.each{|m| @logger.error m}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
sleep retry_config[:wait_time]
|
24
|
+
end
|
25
|
+
raise e unless e.nil?
|
26
|
+
end
|
27
|
+
module_function :execute_with_retry
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Patriot
|
2
|
+
module Util
|
3
|
+
# a module to find target files
|
4
|
+
module Script
|
5
|
+
include Patriot::Util::DateUtil
|
6
|
+
|
7
|
+
# get target batch files from a given path
|
8
|
+
# @param path [String] path to target directory
|
9
|
+
# @param date [String] target date in '%Y-%m-%d'
|
10
|
+
# @param opt [Hash]
|
11
|
+
# @option opt :all [Boolean] force target all files
|
12
|
+
# @return [Array<String>] a list of target files
|
13
|
+
def get_batch_files(path, date, opt = {})
|
14
|
+
return [path] if File.file?(path) && File.extname(path) == ".pbc"
|
15
|
+
files = []
|
16
|
+
opt = target_option(date, opt)
|
17
|
+
files = Dir.glob("#{path}/**/*.pbc").find_all do |file|
|
18
|
+
target_file?(file, opt)
|
19
|
+
end
|
20
|
+
return files
|
21
|
+
end
|
22
|
+
|
23
|
+
def target_option(date, opt = {})
|
24
|
+
opt = {:all => false}.merge(opt)
|
25
|
+
unless opt[:all]
|
26
|
+
d = date.split('-')
|
27
|
+
opt[:day] = true unless opt.has_key?(:day)
|
28
|
+
unless opt.has_key?(:month)
|
29
|
+
opt[:month] = date_add(date,1) =~ /[\d]{4}-[\d]{2}-01/ ? true : false
|
30
|
+
end
|
31
|
+
unless opt.has_key?(:week)
|
32
|
+
opt[:week] = Date.new(d[0].to_i, d[1].to_i, d[2].to_i).wday
|
33
|
+
end
|
34
|
+
end
|
35
|
+
return opt
|
36
|
+
end
|
37
|
+
private :target_option
|
38
|
+
|
39
|
+
def target_file?(file, options)
|
40
|
+
case
|
41
|
+
when options[:all] then true
|
42
|
+
when file =~ /\/daily\// && options[:day] then true
|
43
|
+
when file =~ /\/monthly\// && options[:month] then true
|
44
|
+
when file =~ /\/weekly\/#{options[:week]}\// then true
|
45
|
+
else false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
private :target_file?
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'time'
|
3
|
+
module Patriot
|
4
|
+
module Util
|
5
|
+
# module for interaction with OS
|
6
|
+
module System
|
7
|
+
# exception in case of OS command failed
|
8
|
+
class ExternalCommandException < Exception; end
|
9
|
+
|
10
|
+
# suffix of file where stdout is written
|
11
|
+
STDOUT_SUFFIX=".stdout"
|
12
|
+
# suffix of file where stderr is written
|
13
|
+
STDERR_SUFFIX=".stderr"
|
14
|
+
|
15
|
+
# configuration key for tmp directory where stdout/stderr are written
|
16
|
+
PATRIOT_TMP_DIR_KEY = "patriot.tmp.dir"
|
17
|
+
# defatul path to the tmp directory
|
18
|
+
DEFAULT_PATRIOT_TMP_DIR = "/tmp/patriot-workflow-scheduler"
|
19
|
+
# max size of error message included in exceptions
|
20
|
+
MAX_ERROR_MSG_SIZE_KEY = "patriot.max.error.size"
|
21
|
+
# defaut max size of error message
|
22
|
+
DEFAULT_MAX_ERROR_MSG_SIZE = 256
|
23
|
+
|
24
|
+
# @private
|
25
|
+
# get path to tmp directory (visible for test)
|
26
|
+
def tmp_dir(pid, dt, ts, tmp_dir = DEFAULT_PATRIOT_TMP_DIR)
|
27
|
+
prefix = "p#{pid.to_s}"
|
28
|
+
prefix = "j#{Thread.current[Patriot::Worker::JOB_ID_IN_EXECUTION]}" if Thread.current[Patriot::Worker::JOB_ID_IN_EXECUTION]
|
29
|
+
ts_exp = Time.at(ts).strftime("%Y%m%d_%H%M%S")
|
30
|
+
return File.join(tmp_dir, dt, "#{prefix}_#{ts_exp}")
|
31
|
+
end
|
32
|
+
|
33
|
+
# @private
|
34
|
+
# fork and execute the command (visible for test)
|
35
|
+
def do_fork(cmd, dt, ts, tmp_dir = DEFAULT_PATRIOT_TMP_DIR)
|
36
|
+
cid = fork do
|
37
|
+
tmpdir = tmp_dir($$, dt, ts, tmp_dir)
|
38
|
+
FileUtils.mkdir_p(tmpdir, {:mode => 0777})
|
39
|
+
std_out = File.join(tmpdir, "#{$$.to_i}#{STDOUT_SUFFIX}")
|
40
|
+
std_err = File.join(tmpdir, "#{$$.to_i}#{STDERR_SUFFIX}")
|
41
|
+
STDOUT.reopen(std_out,"w")
|
42
|
+
STDERR.reopen(std_err,"w")
|
43
|
+
exec(cmd)
|
44
|
+
end
|
45
|
+
return cid
|
46
|
+
end
|
47
|
+
|
48
|
+
# execute command on OS
|
49
|
+
# @param command [String] command to be executed
|
50
|
+
# @yield block for error handling
|
51
|
+
def execute_command(command, &blk)
|
52
|
+
so, se = nil
|
53
|
+
|
54
|
+
time_obj = Time.now
|
55
|
+
ts = time_obj.to_i
|
56
|
+
dt = time_obj.strftime("%Y-%m-%d")
|
57
|
+
|
58
|
+
tmp_dir_base = @config.get(PATRIOT_TMP_DIR_KEY, DEFAULT_PATRIOT_TMP_DIR)
|
59
|
+
|
60
|
+
# the forked variable is used for checking whether fork invocation hangs.
|
61
|
+
# (due to https://redmine.ruby-lang.org/issues/5240 ?)
|
62
|
+
forked = false
|
63
|
+
until forked
|
64
|
+
cid = do_fork(command, dt, ts, tmp_dir_base)
|
65
|
+
tmpdir = tmp_dir(cid, dt, ts, tmp_dir_base)
|
66
|
+
i = 0
|
67
|
+
# If fork hangs, output directory would not be created.
|
68
|
+
# wait at most 5 seconds for the directory created.
|
69
|
+
until forked || i > 5
|
70
|
+
sleep(1)
|
71
|
+
forked = File.exist?(tmpdir)
|
72
|
+
i = i+1
|
73
|
+
end
|
74
|
+
# fork hanged, kill the hanged process.
|
75
|
+
unless forked
|
76
|
+
# check whether cid is id of child process to avoid to kill unrelated processes
|
77
|
+
begin
|
78
|
+
if Process.waitpid(cid, Process::WNOHANG).nil?
|
79
|
+
@logger.warn("forked process :#{cid} hanged. kill #{cid}")
|
80
|
+
Process.kill("KILL", cid)
|
81
|
+
@logger.warn("SIGKILL sent to #{cid}")
|
82
|
+
Process.waitpid(cid)
|
83
|
+
@logger.warn("#{cid} is killed")
|
84
|
+
else
|
85
|
+
raise ExternalCommandException, "#{cid} is not a child of this"
|
86
|
+
end
|
87
|
+
rescue Exception => e
|
88
|
+
@logger.warn "failed to kill hanged process #{cid}"
|
89
|
+
raise e
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
@logger.info "executing #{command}: results stored in #{tmpdir}"
|
95
|
+
pid, status = Process.waitpid2(cid)
|
96
|
+
so = File.join(tmpdir, "#{cid.to_i}#{STDOUT_SUFFIX}")
|
97
|
+
se = File.join(tmpdir, "#{cid.to_i}#{STDERR_SUFFIX}")
|
98
|
+
|
99
|
+
@logger.info "#{command} is finished"
|
100
|
+
return so if status.exitstatus == 0
|
101
|
+
@logger.warn "#{command} end with exit status #{status.exitstatus}"
|
102
|
+
if block_given?
|
103
|
+
yield(status, so, se)
|
104
|
+
else
|
105
|
+
max_err_size = @config.get(MAX_ERROR_MSG_SIZE_KEY, DEFAULT_MAX_ERROR_MSG_SIZE)
|
106
|
+
err_size = File.stat(se).size
|
107
|
+
err_msg = "#{command}\n#{se} :"
|
108
|
+
if err_size < max_err_size
|
109
|
+
File.open(se){|f| err_msg = "#{err_msg}\n#{f.read}"}
|
110
|
+
else
|
111
|
+
err_msg = "#{err_msg} \n the size of stderr is #{err_size} (> #{max_err_size}"
|
112
|
+
end
|
113
|
+
raise ExternalCommandException, err_msg
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Patriot
|
2
|
+
# namespace for workers
|
3
|
+
module Worker
|
4
|
+
# states of worker
|
5
|
+
module Status
|
6
|
+
# worker is active
|
7
|
+
ACTIVE = 'ACTIVE'
|
8
|
+
# worker sleeps (don't execute jobs)
|
9
|
+
SLEEP = 'SLEEP'
|
10
|
+
# worker is going to down
|
11
|
+
SHUTDOWN = 'SHUTDOWN'
|
12
|
+
end
|
13
|
+
|
14
|
+
# name of thread local variable for job_id executed by the thread
|
15
|
+
JOB_ID_IN_EXECUTION = :JOB_ID_IN_EXECUTION
|
16
|
+
|
17
|
+
# SIGNAL used for graceful shutdown
|
18
|
+
SIGNAL_FOR_GRACEFUL_SHUTDOWN = ['INT', 'TERM']
|
19
|
+
# SIGNAL used for getting thread dump
|
20
|
+
SIGNAL_FOR_THREAD_DUMP = ['QUIT']
|
21
|
+
|
22
|
+
# default fetch cycle for getting job tickets in minutes
|
23
|
+
DEFAULT_FETCH_CYCLE = 300
|
24
|
+
# default limit of the number of jobs fetched at once
|
25
|
+
DEFAULT_FETCH_LIMIT = 200
|
26
|
+
# default worker name (for logging)
|
27
|
+
DEFAULT_WORKER_NAME = 'default'
|
28
|
+
|
29
|
+
require 'patriot/worker/servlet'
|
30
|
+
require 'patriot/worker/info_server'
|
31
|
+
require 'patriot/worker/base'
|
32
|
+
autoload :MultiNodeWorker, 'patriot/worker/multi_node_worker'
|
33
|
+
autoload :JobStoreServer, 'patriot/worker/job_store_server'
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'patriot/command'
|
3
|
+
|
4
|
+
module Patriot
|
5
|
+
module Worker
|
6
|
+
|
7
|
+
# @abstract
|
8
|
+
# base class for worker implementations
|
9
|
+
class Base
|
10
|
+
|
11
|
+
include Patriot::Util::Logger
|
12
|
+
include Patriot::Util::Retry
|
13
|
+
include Patriot::JobStore::Factory
|
14
|
+
|
15
|
+
attr_accessor :host, :status, :cycle, :job_store
|
16
|
+
|
17
|
+
# @param config [Patriot::Util::Config::Base]
|
18
|
+
def initialize(config)
|
19
|
+
raise "configuration is nil" if config.nil?
|
20
|
+
@logger = create_logger(config)
|
21
|
+
@config = config
|
22
|
+
@job_store = create_jobstore(Patriot::JobStore::ROOT_STORE_ID, @config)
|
23
|
+
@host = `hostname`.chomp
|
24
|
+
@cycle = config.get('fetch_cycle', Patriot::Worker::DEFAULT_FETCH_CYCLE).to_i
|
25
|
+
@fetch_limit = config.get('fetch_limit', Patriot::Worker::DEFAULT_FETCH_LIMIT).to_i
|
26
|
+
@worker_name = config.get('worker_name', Patriot::Worker::DEFAULT_WORKER_NAME)
|
27
|
+
@pid_file = File.join($home, 'run', "patriot-worker_#{@worker_name}.pid")
|
28
|
+
@info_server = Patriot::Worker::InfoServer.new(self,@config)
|
29
|
+
end
|
30
|
+
|
31
|
+
# execute a job
|
32
|
+
# @param [Patriot::JobStore::JobTicket] job_ticket a ticket of job to be executed
|
33
|
+
# @return [Patriot::Command::ExitCode]
|
34
|
+
def execute_job(job_ticket)
|
35
|
+
job_ticket.exec_host = @host
|
36
|
+
job_ticket.exec_node = Thread.current[:name]
|
37
|
+
begin
|
38
|
+
response = @job_store.offer_to_execute(job_ticket)
|
39
|
+
rescue Exception => e
|
40
|
+
@logger.error e
|
41
|
+
return Patriot::Command::ExitCode::FAILED
|
42
|
+
end
|
43
|
+
|
44
|
+
# already executed by other node
|
45
|
+
return Patriot::Command::ExitCode::SKIPPED if response.nil?
|
46
|
+
|
47
|
+
@logger.info " executing job: #{job_ticket.job_id}"
|
48
|
+
command = response[:command]
|
49
|
+
job_ticket.execution_id = response[:execution_id]
|
50
|
+
job_ticket.exit_code = command.skip_on_fail? ? Patriot::Command::ExitCode::FAILURE_SKIPPED : Patriot::Command::ExitCode::FAILED
|
51
|
+
begin
|
52
|
+
command.execute
|
53
|
+
job_ticket.exit_code = Patriot::Command::ExitCode::SUCCEEDED
|
54
|
+
rescue Exception => e
|
55
|
+
@logger.warn " job : #{job_ticket.job_id} failed"
|
56
|
+
job_ticket.description = e.to_s
|
57
|
+
else
|
58
|
+
job_ticket.description = command.description
|
59
|
+
ensure
|
60
|
+
begin
|
61
|
+
execute_with_retry{ @job_store.report_completion_status(job_ticket) }
|
62
|
+
rescue Exception => job_store_error
|
63
|
+
@logger.error job_store_error
|
64
|
+
end
|
65
|
+
end
|
66
|
+
return job_ticket.exit_code
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Integer] pid if the worker is running, otherwise nil
|
70
|
+
def get_pid
|
71
|
+
return nil unless File.exists?(@pid_file)
|
72
|
+
pid = nil
|
73
|
+
File.open(@pid_file,'r'){|f| pid = f.read.strip.to_i }
|
74
|
+
begin
|
75
|
+
Process.getpgid(pid)
|
76
|
+
rescue Errno::ESRCH
|
77
|
+
@logger.warn("process #{pid} not exist but pid file remains")
|
78
|
+
return nil
|
79
|
+
end
|
80
|
+
return pid
|
81
|
+
end
|
82
|
+
|
83
|
+
# send a request graceful shutdown to a running worker
|
84
|
+
# @return [Boolean] true worker is running and request is sent, otherwise false
|
85
|
+
def request_shutdown
|
86
|
+
pid = get_pid
|
87
|
+
if pid.nil?
|
88
|
+
@logger.info("worker #{@worker_name} does not exist")
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
Process.kill(SIGNAL_FOR_GRACEFUL_SHUTDOWN[0], pid.to_i)
|
92
|
+
return true
|
93
|
+
end
|
94
|
+
|
95
|
+
# main entry point of worker processing
|
96
|
+
def start_worker
|
97
|
+
return unless get_pid.nil?
|
98
|
+
|
99
|
+
@logger.info "starting worker #{@node}@#{@host}"
|
100
|
+
File.open(@pid_file, 'w') {|f| f.write($$)} # save pid for shutdown
|
101
|
+
set_traps
|
102
|
+
@info_server.start_server
|
103
|
+
@logger.info "initiating worker #{@node}@#{@host}"
|
104
|
+
init_worker
|
105
|
+
@status = Patriot::Worker::Status::ACTIVE
|
106
|
+
@logger.info "start worker #{@node}@#{@host}"
|
107
|
+
run_worker
|
108
|
+
@logger.info "shutting down worker #{@node}@#{@host}"
|
109
|
+
stop_worker
|
110
|
+
# should be last since worker_admin judge availability from the info_server
|
111
|
+
@info_server.shutdown_server
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
# should be overrided in sub class
|
116
|
+
# This method is for implementation-specific configuration
|
117
|
+
def init_worker
|
118
|
+
raise NotImplementedError
|
119
|
+
end
|
120
|
+
|
121
|
+
# should be overrided in sub class
|
122
|
+
# Main loop in which the worker fetches and executes jobs should be implemented here
|
123
|
+
def run_worker
|
124
|
+
raise NotImplementedError
|
125
|
+
end
|
126
|
+
|
127
|
+
# should be overrided in sub class
|
128
|
+
# Tasks for tearing down the worker should be implemented here
|
129
|
+
def stop_worker
|
130
|
+
raise NotImplementedError
|
131
|
+
end
|
132
|
+
|
133
|
+
def set_traps
|
134
|
+
Patriot::Worker::SIGNAL_FOR_GRACEFUL_SHUTDOWN.each do |s|
|
135
|
+
Signal.trap(s) do
|
136
|
+
@logger.info "SIG#{s}: worker will terminate"
|
137
|
+
@status = Patriot::Worker::Status::SHUTDOWN
|
138
|
+
end
|
139
|
+
end
|
140
|
+
Patriot::Worker::SIGNAL_FOR_THREAD_DUMP.each do |s|
|
141
|
+
Signal.trap(s) do
|
142
|
+
# TODO output to separated stream
|
143
|
+
Thread.list.each do |t|
|
144
|
+
@logger.info("Thread #{t[:name]}\n#{t.backtrace.map{|bt| "\t#{bt}"}.join("\n")}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
private :set_traps
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|