batch-kit 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +22 -0
- data/README.md +165 -0
- data/lib/batch-kit.rb +9 -0
- data/lib/batch-kit/arguments.rb +57 -0
- data/lib/batch-kit/config.rb +517 -0
- data/lib/batch-kit/configurable.rb +68 -0
- data/lib/batch-kit/core_ext/enumerable.rb +97 -0
- data/lib/batch-kit/core_ext/file.rb +69 -0
- data/lib/batch-kit/core_ext/file_utils.rb +103 -0
- data/lib/batch-kit/core_ext/hash.rb +17 -0
- data/lib/batch-kit/core_ext/numeric.rb +17 -0
- data/lib/batch-kit/core_ext/string.rb +88 -0
- data/lib/batch-kit/database.rb +133 -0
- data/lib/batch-kit/database/java_util_log_handler.rb +65 -0
- data/lib/batch-kit/database/log4r_outputter.rb +57 -0
- data/lib/batch-kit/database/models.rb +548 -0
- data/lib/batch-kit/database/schema.rb +229 -0
- data/lib/batch-kit/encryption.rb +7 -0
- data/lib/batch-kit/encryption/java_encryption.rb +178 -0
- data/lib/batch-kit/encryption/ruby_encryption.rb +175 -0
- data/lib/batch-kit/events.rb +157 -0
- data/lib/batch-kit/framework/acts_as_job.rb +197 -0
- data/lib/batch-kit/framework/acts_as_sequence.rb +123 -0
- data/lib/batch-kit/framework/definable.rb +169 -0
- data/lib/batch-kit/framework/job.rb +121 -0
- data/lib/batch-kit/framework/job_definition.rb +105 -0
- data/lib/batch-kit/framework/job_run.rb +145 -0
- data/lib/batch-kit/framework/runnable.rb +235 -0
- data/lib/batch-kit/framework/sequence.rb +87 -0
- data/lib/batch-kit/framework/sequence_definition.rb +38 -0
- data/lib/batch-kit/framework/sequence_run.rb +48 -0
- data/lib/batch-kit/framework/task_definition.rb +89 -0
- data/lib/batch-kit/framework/task_run.rb +53 -0
- data/lib/batch-kit/helpers/date_time.rb +54 -0
- data/lib/batch-kit/helpers/email.rb +198 -0
- data/lib/batch-kit/helpers/html.rb +175 -0
- data/lib/batch-kit/helpers/process.rb +101 -0
- data/lib/batch-kit/helpers/zip.rb +30 -0
- data/lib/batch-kit/job.rb +11 -0
- data/lib/batch-kit/lockable.rb +138 -0
- data/lib/batch-kit/loggable.rb +78 -0
- data/lib/batch-kit/logging.rb +169 -0
- data/lib/batch-kit/logging/java_util_logger.rb +87 -0
- data/lib/batch-kit/logging/log4r_logger.rb +71 -0
- data/lib/batch-kit/logging/null_logger.rb +35 -0
- data/lib/batch-kit/logging/stdout_logger.rb +96 -0
- data/lib/batch-kit/resources.rb +191 -0
- data/lib/batch-kit/sequence.rb +7 -0
- metadata +122 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
require_relative 'events'
|
2
|
+
require_relative 'database/schema'
|
3
|
+
|
4
|
+
|
5
|
+
class BatchKit
|
6
|
+
|
7
|
+
# Implements functionality for persisting details of jobs run in a relational
|
8
|
+
# database, via the Sequel database library.
|
9
|
+
class Database
|
10
|
+
|
11
|
+
|
12
|
+
# Instantiate a database back-end for persisting job and task runs.
|
13
|
+
#
|
14
|
+
# @param options [Hash] An options hash, passed on to the
|
15
|
+
# {BatchKit::Database::Schema#initialize Schema} instance.
|
16
|
+
def initialize(options = {})
|
17
|
+
@options = options
|
18
|
+
@schema = Schema.new(options)
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Log database messages under the batch.database namespace.
|
23
|
+
def log
|
24
|
+
@log ||= BatchKit::LogManager.logger('batch.database')
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Connect to a back-end database for persistence.
|
29
|
+
#
|
30
|
+
# @param args [Array<String>] Connection details to be passed to
|
31
|
+
# the {BatchKit::Database::Schema#connect} method.
|
32
|
+
def connect(*args)
|
33
|
+
@schema.connect(*args)
|
34
|
+
|
35
|
+
# We can only include the models once we have connected
|
36
|
+
require_relative 'database/models'
|
37
|
+
|
38
|
+
# Check if the database schema is up-to-date
|
39
|
+
MD5.check_schema(@schema)
|
40
|
+
|
41
|
+
# Perform housekeeping tasks
|
42
|
+
perform_housekeeping
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# Purges detail records that are older than the retention threshhold.
|
47
|
+
def perform_housekeeping
|
48
|
+
# Only do housekeeping once per day
|
49
|
+
return if JobRun.where{job_start_time > Date.today}.count > 0
|
50
|
+
|
51
|
+
log.info "Performing batch database housekeeping"
|
52
|
+
|
53
|
+
# Abort jobs in Executing state that have not logged for 6+ hours
|
54
|
+
@schema.connection.transaction do
|
55
|
+
cutoff = Time.now - 6 * 60 * 60
|
56
|
+
exec_jobs = JobRun.where(job_status: 'EXECUTING').map(:job_run)
|
57
|
+
curr_jobs = JobRunLog.select_group(:job_run).
|
58
|
+
where(job_run: exec_jobs).having{max(log_time) > cutoff}.map(:job_run)
|
59
|
+
abort_jobs = JobRun.where(job_run: exec_jobs - curr_jobs).all
|
60
|
+
if abort_jobs.count > 0
|
61
|
+
log.detail "Cleaning up #{abort_jobs.count} zombie jobs"
|
62
|
+
abort_tasks = TaskRun.where(job_run: abort_jobs.map(&:id), task_status: 'EXECUTING')
|
63
|
+
abort_tasks.each(&:timeout)
|
64
|
+
abort_jobs.each(&:timeout)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Purge locks that expired 6+ hours ago
|
69
|
+
@schema.connection.transaction do
|
70
|
+
purge_date = Time.now - 6 * 60 * 60
|
71
|
+
Lock.where{lock_expires_at < purge_date}.delete
|
72
|
+
end
|
73
|
+
|
74
|
+
# Purge log records for old job runs
|
75
|
+
@schema.connection.transaction do
|
76
|
+
purge_date = Date.today - @options.fetch(:log_retention_days, 60)
|
77
|
+
purge_job_runs = JobRun.where(job_purged_flag: false).
|
78
|
+
where{job_start_time < purge_date}.map(:job_run)
|
79
|
+
if purge_job_runs.count > 0
|
80
|
+
log.detail "Purging log records for #{purge_job_runs.count} job runs"
|
81
|
+
purge_job_runs.each_slice(1000).each do |purge_ids|
|
82
|
+
JobRunLog.where(job_run: purge_ids).delete
|
83
|
+
JobRun.where(job_run: purge_ids).update(job_purged_flag: true)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Purge old task and job runs
|
89
|
+
@schema.connection.transaction do
|
90
|
+
purge_date = Date.today - @options.fetch(:job_run_retention_days, 365)
|
91
|
+
purge_job_runs = JobRun.where{job_start_time < purge_date}.map(:job_run)
|
92
|
+
if purge_job_runs.count > 0
|
93
|
+
log.detail "Purging job and task run records for #{purge_job_runs.count} job runs"
|
94
|
+
purge_job_runs.each_slice(1000).each do |purge_ids|
|
95
|
+
JobRunArg.where(job_run: purge_ids).delete
|
96
|
+
TaskRun.where(job_run: purge_ids).delete
|
97
|
+
JobRun.where(job_run: purge_ids).delete
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Purge old request runs
|
103
|
+
@schema.connection.transaction do
|
104
|
+
purge_date = Date.today - @options.fetch(:request_retention_days, 90)
|
105
|
+
purge_requests = Request.where{request_launched_at < purge_date}.map(:request_id)
|
106
|
+
if purge_requests.count > 0
|
107
|
+
log.detail "Purging request records for #{purge_requests.count} requests"
|
108
|
+
purge_requests.each_slice(1000).each do |purge_ids|
|
109
|
+
Request.where(request_id: purge_ids).delete
|
110
|
+
Requestor.where(request_id: purge_ids).delete
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Purge jobs with no runs
|
116
|
+
@schema.connection.transaction do
|
117
|
+
purge_jobs = Job.left_join(:batch_job_run, :job_id => :job_id).
|
118
|
+
where(Sequel.qualify(:batch_job_run, :job_id) => nil).
|
119
|
+
select(Sequel.qualify(:batch_job, :job_id)).map(:job_id)
|
120
|
+
if purge_jobs.count > 0
|
121
|
+
log.detail "Purging #{purge_jobs.count} old jobs"
|
122
|
+
purge_jobs.each_slice(1000).each do |purge_ids|
|
123
|
+
JobRunFailure.where(job_id: purge_ids).delete
|
124
|
+
Task.where(job_id: purge_ids).delete
|
125
|
+
Job.where(job_id: purge_ids).delete
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
class BatchKit
|
2
|
+
|
3
|
+
class Database
|
4
|
+
|
5
|
+
|
6
|
+
class JavaUtilLogHandler < Java::JavaUtilLogging::Handler
|
7
|
+
|
8
|
+
# Create a new java.util.logging handler for recording log records
|
9
|
+
# to the database.
|
10
|
+
#
|
11
|
+
# @param job_run [JobRun] A JobRun object representing the job run
|
12
|
+
# that is to be logged.
|
13
|
+
# @param opts [Hash] An options hash.
|
14
|
+
# @option opts [Fixnum] :max_lines The maximium number of lines to
|
15
|
+
# log to the database. Default is 10,000.
|
16
|
+
# @option opts [Fixnum] :max_errors The maximum number of errors to
|
17
|
+
# ignore before disabling further attempts to store log messages.
|
18
|
+
def initialize(job_run, opts = {})
|
19
|
+
super()
|
20
|
+
@job_run_id = job_run.job_run_id
|
21
|
+
@log_line = 0
|
22
|
+
@errors = 0
|
23
|
+
@max_lines = opts.fetch(:max_lines, 10_000)
|
24
|
+
@max_errors = opts.fetch(:max_errors, 3)
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def close
|
29
|
+
@job_run_id = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def flush
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def publish(event)
|
38
|
+
if @job_run_id && @errors < @max_errors &&
|
39
|
+
event.level.intValue >= Java::JavaUtilLogging::Level::FINE.intValue
|
40
|
+
if @log_line < @max_lines || event.level >= Java::JavaUtilLogging::Level::WARNING
|
41
|
+
msg = event.getMessage[0...1000].strip
|
42
|
+
return unless msg.length > 0
|
43
|
+
@log_line += 1
|
44
|
+
log_name = (event.getLoggerName[-40..-1] || event.getLoggerName)
|
45
|
+
level = event.level
|
46
|
+
begin
|
47
|
+
JobRunLog.new(job_run: @job_run_id, log_line: @log_line,
|
48
|
+
thread_id: event.getThreadID,
|
49
|
+
log_time: Time.at(event.getMillis / 1000.0), log_name: log_name,
|
50
|
+
log_level: level, log_message: msg).save
|
51
|
+
rescue
|
52
|
+
# Disable logging if an exception occurs
|
53
|
+
@errors += 1
|
54
|
+
raise
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class BatchKit
|
2
|
+
|
3
|
+
class Database
|
4
|
+
|
5
|
+
# Outputs Log4r log events to the BATCH_JOB_RUN_LOG table.
|
6
|
+
class Log4ROutputter < Log4r::Outputter
|
7
|
+
|
8
|
+
|
9
|
+
# Create a new database outputter for a single job run
|
10
|
+
#
|
11
|
+
# @param job_run [JobRun] A JobRun object representing the job run
|
12
|
+
# that is to be logged.
|
13
|
+
# @param opts [Hash] An options hash.
|
14
|
+
# @option opts [Fixnum] :max_lines The maximium number of lines to
|
15
|
+
# log to the database. Default is 10,000.
|
16
|
+
# @option opts [Fixnum] :max_errors The maximum number of errors to
|
17
|
+
# ignore before disabling further attempts to store log messages.
|
18
|
+
def initialize(job_run, opts = {})
|
19
|
+
super('db_output')
|
20
|
+
@job_run_id = job_run.job_run_id
|
21
|
+
@log_line = 0
|
22
|
+
@errors = 0
|
23
|
+
@max_lines = opts.fetch(:max_lines, 10_000)
|
24
|
+
@max_errors = opts.fetch(:max_errors, 3)
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Formats a log event, and writes it to the BATCH_JOB_RUN_LOG table
|
29
|
+
def format(event)
|
30
|
+
if @errors < @max_errors && event.level >= Log4r::DETAIL
|
31
|
+
if @log_line < @max_lines || event.level >= Log4r::WARN
|
32
|
+
msg = event.data.to_s[0...1000].strip
|
33
|
+
return unless msg.length > 0
|
34
|
+
@log_line += 1
|
35
|
+
log_name = (event.fullname[-40..-1] || event.fullname).gsub('::', '.')
|
36
|
+
thread_id = Log4r::MDC.get(:thread_id)
|
37
|
+
level = Log4r::LNAMES[event.level]
|
38
|
+
begin
|
39
|
+
JobRunLog.new(job_run: @job_run_id, log_line: @log_line,
|
40
|
+
log_time: Time.now, log_name: log_name,
|
41
|
+
log_level: level, thread_id: thread_id && thread_id[0..8],
|
42
|
+
log_message: msg).save
|
43
|
+
rescue
|
44
|
+
# Disable logging if an exception occurs
|
45
|
+
@errors += 1
|
46
|
+
raise
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
@@ -0,0 +1,548 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
|
4
|
+
class BatchKit
|
5
|
+
|
6
|
+
class Database
|
7
|
+
|
8
|
+
Sequel::Model.plugin :dirty
|
9
|
+
|
10
|
+
# Records an MD5 hash of String objects, which are used to detect when
|
11
|
+
# items such as jobs have changed. This in turn is used to increment a
|
12
|
+
# version number on objects.
|
13
|
+
class MD5 < Sequel::Model(:batch_md5)
|
14
|
+
|
15
|
+
|
16
|
+
# Locate the MD5 record for the object named +obj_name+ whose type
|
17
|
+
# is +obj_type+.
|
18
|
+
def self.for(obj_name, obj_type, digest)
|
19
|
+
self.where(Sequel.function(:upper, :object_name) => obj_name.upcase,
|
20
|
+
Sequel.function(:upper, :object_type) => obj_type.upcase,
|
21
|
+
:md5_digest => digest).first
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
# Checks that the BatchKit database tables have been deployed and match
|
26
|
+
# the table definitions in schema.rb.
|
27
|
+
def self.check_schema(schema)
|
28
|
+
schema_file = IO.read("#{File.dirname(__FILE__)}/schema.rb")
|
29
|
+
ok, md5 = self.check('SCHEMA', 'schema.rb', schema_file)
|
30
|
+
unless ok
|
31
|
+
# TODO: Find a better way to update schema for table changes;
|
32
|
+
# This method throws away all history
|
33
|
+
schema.drop_tables
|
34
|
+
schema.create_tables
|
35
|
+
md5.save
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
# Checks to see if the recorded MD5 digest of +string+ matches the MD5
|
41
|
+
# digest of +string+ as calculated by Digest::MD5.
|
42
|
+
#
|
43
|
+
# @return [Boolean, String] Returns two values in an array: a boolean
|
44
|
+
# indicating whether the digest value is the same, and the actual
|
45
|
+
# calculated value for the MD5 digest of +string+.
|
46
|
+
def self.check(obj_type, obj_name, string)
|
47
|
+
digest = Digest::MD5.hexdigest(string)
|
48
|
+
# Attempt to retrieve the MD5 for the schema; could fail if not deployed
|
49
|
+
md5 = self.for(obj_name, obj_type, digest) rescue nil
|
50
|
+
if md5
|
51
|
+
[md5.md5_id, md5]
|
52
|
+
else
|
53
|
+
[nil, self.new(obj_type, obj_name, string, digest)]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
# Create a new MD5 hash of an object
|
59
|
+
def initialize(obj_type, obj_name, string, digest = nil)
|
60
|
+
obj_ver = self.class.where(Sequel.function(:upper, :object_name) => obj_name.upcase,
|
61
|
+
Sequel.function(:upper, :object_type) => obj_type.upcase).
|
62
|
+
max(:object_version) || 0
|
63
|
+
super(object_type: obj_type, object_name: obj_name,
|
64
|
+
object_version: obj_ver + 1,
|
65
|
+
md5_digest: digest || Digest::MD5.hexdigest(string),
|
66
|
+
md5_created_at: model.dataset.current_datetime)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
# Records details of job definitions
|
74
|
+
class Job < Sequel::Model(:batch_job)
|
75
|
+
|
76
|
+
many_to_one :md5, class: MD5, key: :job_file_md5_id
|
77
|
+
|
78
|
+
plugin :timestamps, create: :job_created_at, update: :job_modified_at,
|
79
|
+
update_on_create: true
|
80
|
+
|
81
|
+
|
82
|
+
# Ensures that the job described by +job_def+ has been registered in
|
83
|
+
# the batch database.
|
84
|
+
def self.register(job_def)
|
85
|
+
job = self.where(job_class: job_def.job_class.name,
|
86
|
+
job_host: job_def.computer).first
|
87
|
+
job_file = IO.read(job_def.file)
|
88
|
+
ok, md5 = MD5.check('JOB', "//#{job_def.computer}/#{job_def.file}", job_file)
|
89
|
+
md5.save unless ok
|
90
|
+
if job
|
91
|
+
# Existing job
|
92
|
+
unless ok == job.job_file_md5_id
|
93
|
+
job.update(job_name: job_def.name, job_method: job_def.method_name,
|
94
|
+
job_desc: job_def.description, job_file: job_def.file,
|
95
|
+
job_version: md5.object_version, md5: md5)
|
96
|
+
end
|
97
|
+
else
|
98
|
+
# New job
|
99
|
+
job = self.new(job_def, md5).save
|
100
|
+
end
|
101
|
+
job_def.job_id = job.job_id
|
102
|
+
job_def.job_version = job.job_version
|
103
|
+
job
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def log
|
108
|
+
@log ||= LogManager.logger('batch-kit.job')
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def initialize(job_def, md5)
|
113
|
+
log.detail "Registering job '#{job_def.name}' on #{job_def.computer} in batch database"
|
114
|
+
super(job_name: job_def.name, job_class: job_def.job_class.name,
|
115
|
+
job_method: job_def.method_name, job_desc: job_def.description,
|
116
|
+
job_host: job_def.computer, job_file: job_def.file,
|
117
|
+
job_version: md5.object_version, md5: md5,
|
118
|
+
job_run_count: 0, job_success_count: 0, job_fail_count: 0,
|
119
|
+
job_abort_count: 0, job_min_success_duration_ms: 0,
|
120
|
+
job_max_success_duration_ms: 0, job_mean_success_duration_ms: 0,
|
121
|
+
job_m2_success_duration_ms: 0)
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
# Record the start of a job run
|
126
|
+
#
|
127
|
+
# @param job_run [JobRun] The JobRun instance that has commenced.
|
128
|
+
def job_start(job_run)
|
129
|
+
self.job_last_run_at = job_run.start_time
|
130
|
+
self.job_run_count += 1
|
131
|
+
self.save
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Record the successful completion of the JobRun.
|
136
|
+
#
|
137
|
+
# @param job_run [JobRun] The JobRun instance that has completed.
|
138
|
+
def job_success(job_run)
|
139
|
+
self.job_success_count += 1
|
140
|
+
n = self.job_success_count
|
141
|
+
ms = job_run.elapsed * 1000
|
142
|
+
delta = ms - self.job_mean_success_duration_ms
|
143
|
+
self.job_min_success_duration_ms = self.job_min_success_duration_ms == 0 ?
|
144
|
+
ms : [self.job_min_success_duration_ms, ms].min
|
145
|
+
self.job_max_success_duration_ms = self.job_max_success_duration_ms == 0 ?
|
146
|
+
ms : [self.job_max_success_duration_ms, ms].max
|
147
|
+
mean = self.job_mean_success_duration_ms += delta / n
|
148
|
+
self.job_m2_success_duration_ms += delta * (ms - mean)
|
149
|
+
self.save
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
# Record the failure of a JobRun.
|
154
|
+
#
|
155
|
+
# @param job_run [JobRun] The JobRun instance that has failed.
|
156
|
+
def job_failure(job_run)
|
157
|
+
self.job_fail_count += 1
|
158
|
+
self.save
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
# Record that a JobRun has been aborted.
|
163
|
+
#
|
164
|
+
# @param job_run [JobRun] The JobRun instance that has aborted.
|
165
|
+
def job_abort(job_run)
|
166
|
+
self.job_abort_count += 1
|
167
|
+
self.save
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# Record that a JobRun has timed out. This happens when the database
|
172
|
+
# finds an instance in the table that has been running for a long
|
173
|
+
# period without any activity.
|
174
|
+
#
|
175
|
+
# @param job_run [JobRun] The JobRun instance that has aborted.
|
176
|
+
def job_timeout(job_run)
|
177
|
+
self.job_abort_count += 1
|
178
|
+
self.save
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
Events.subscribe(nil, 'job_run.pre-execute') do |job_obj, job_run, *args|
|
183
|
+
Job.register(job_run.definition) if job_run.persist?
|
184
|
+
true
|
185
|
+
end
|
186
|
+
Events.subscribe(nil, 'job_run.execute') do |job_obj, job_run, *args|
|
187
|
+
Job[job_run.job_id].job_start(job_run) if job_run.persist?
|
188
|
+
end
|
189
|
+
Events.subscribe(nil, 'job_run.success') do |job_obj, job_run, result|
|
190
|
+
Job[job_run.job_id].job_success(job_run) if job_run.persist?
|
191
|
+
end
|
192
|
+
Events.subscribe(nil, 'job_run.failure') do |job_obj, job_run, ex|
|
193
|
+
Job[job_run.job_id].job_failure(job_run) if job_run.persist?
|
194
|
+
end
|
195
|
+
Events.subscribe(nil, 'job_run.abort') do |job_obj, job_run|
|
196
|
+
Job[job_run.job_id].job_abort(job_run) if job_run.persist?
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
# Records details of Task definitions
|
204
|
+
class Task < Sequel::Model(:batch_task)
|
205
|
+
|
206
|
+
many_to_one :job, class: Job, key: :job_id
|
207
|
+
|
208
|
+
plugin :timestamps, create: :task_created_at, update: :task_modified_at,
|
209
|
+
update_on_create: true
|
210
|
+
|
211
|
+
|
212
|
+
def self.register(job_def)
|
213
|
+
Task.where(job_id: job_def.job_id).update(task_current_flag: false)
|
214
|
+
job_def.tasks.each do |task_key, task_def|
|
215
|
+
task = self.where(job_id: job_def.job_id,
|
216
|
+
task_method: task_def.method_name.to_s).first
|
217
|
+
if task
|
218
|
+
task.update(task_name: task_def.name, task_class: task_def.task_class.name,
|
219
|
+
task_desc: task_def.description, task_current_flag: 'Y')
|
220
|
+
else
|
221
|
+
task = Task.new(task_def).save
|
222
|
+
end
|
223
|
+
task_def.task_id = task.task_id
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
def initialize(task_def)
|
229
|
+
super(job_id: task_def.job.job_id, job_version: task_def.job.job_version,
|
230
|
+
task_name: task_def.name, task_class: task_def.task_class.name,
|
231
|
+
task_method: task_def.method_name.to_s, task_desc: task_def.description,
|
232
|
+
task_run_count: 0, task_success_count: 0, task_fail_count: 0,
|
233
|
+
task_abort_count: 0, task_min_success_duration_ms: 0,
|
234
|
+
task_max_success_duration_ms: 0, task_mean_success_duration_ms: 0,
|
235
|
+
task_m2_success_duration_ms: 0)
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
def task_start(task_run)
|
240
|
+
self.task_last_run_at = task_run.start_time
|
241
|
+
self.task_run_count += 1
|
242
|
+
self.save
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
def task_success(task_run)
|
247
|
+
self.task_success_count += 1
|
248
|
+
n = self.task_success_count
|
249
|
+
ms = task_run.elapsed * 1000
|
250
|
+
delta = ms - self.task_mean_success_duration_ms
|
251
|
+
self.task_min_success_duration_ms = self.task_min_success_duration_ms == 0 ?
|
252
|
+
ms : [self.task_min_success_duration_ms, ms].min
|
253
|
+
self.task_max_success_duration_ms = self.task_max_success_duration_ms == 0 ?
|
254
|
+
ms : [self.task_max_success_duration_ms, ms].max
|
255
|
+
mean = self.task_mean_success_duration_ms += delta / n
|
256
|
+
self.task_m2_success_duration_ms += delta * (ms - mean)
|
257
|
+
self.save
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
def task_failure(task_run)
|
262
|
+
self.task_fail_count += 1
|
263
|
+
self.save
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def task_abort(task_run)
|
268
|
+
self.task_abort_count += 1
|
269
|
+
self.save
|
270
|
+
end
|
271
|
+
|
272
|
+
|
273
|
+
def task_timeout(task_run)
|
274
|
+
self.task_abort_count += 1
|
275
|
+
self.save
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
Events.subscribe(nil, 'job_run.pre-execute') do |job_obj, job_run, *args|
|
280
|
+
Task.register(job_run.definition) if job_run.persist?
|
281
|
+
end
|
282
|
+
|
283
|
+
Events.subscribe(nil, 'task_run.execute') do |job_obj, task_run, *args|
|
284
|
+
Task[task_run.task_id].task_start(task_run) if task_run.persist?
|
285
|
+
end
|
286
|
+
Events.subscribe(nil, 'task_run.success') do |job_obj, task_run, result|
|
287
|
+
Task[task_run.task_id].task_success(task_run) if task_run.persist?
|
288
|
+
end
|
289
|
+
Events.subscribe(nil, 'task_run.failure') do |job_obj, task_run, ex|
|
290
|
+
Task[task_run.task_id].task_failure(task_run) if task_run.persist?
|
291
|
+
end
|
292
|
+
Events.subscribe(nil, 'task_run.abort') do |job_obj, task_run|
|
293
|
+
Task[task_run.task_id].task_abort(task_run) if task_run.persist?
|
294
|
+
end
|
295
|
+
|
296
|
+
end
|
297
|
+
|
298
|
+
|
299
|
+
|
300
|
+
# Records details of job runs
|
301
|
+
class JobRun < Sequel::Model(:batch_job_run)
|
302
|
+
|
303
|
+
many_to_one :job, class: Job, key: :job_id
|
304
|
+
|
305
|
+
|
306
|
+
def initialize(job_run)
|
307
|
+
super(job_id: job_run.job_id, job_instance: job_run.instance,
|
308
|
+
job_version: job_run.job_version, job_run_by: job_run.run_by,
|
309
|
+
job_cmd_line: job_run.cmd_line, job_start_time: job_run.start_time,
|
310
|
+
job_status: job_run.status.to_s.upcase, job_pid: job_run.pid)
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def job_start(job_run)
|
315
|
+
self.save
|
316
|
+
job_run.job_run_id = self.job_run
|
317
|
+
end
|
318
|
+
|
319
|
+
|
320
|
+
def job_end(job_run)
|
321
|
+
self.job_end_time = job_run.end_time
|
322
|
+
self.job_status = job_run.status.to_s.upcase
|
323
|
+
self.job_pid = nil
|
324
|
+
self.job_exit_code = job_run.exit_code
|
325
|
+
self.save
|
326
|
+
end
|
327
|
+
|
328
|
+
|
329
|
+
def timeout
|
330
|
+
self.job_end_time = Time.now
|
331
|
+
self.job_status = 'TIMEOUT'
|
332
|
+
self.job_pid = nil
|
333
|
+
self.job_exit_code = -1
|
334
|
+
self.save
|
335
|
+
|
336
|
+
Job[self.job_id].job_timeout(self)
|
337
|
+
end
|
338
|
+
|
339
|
+
|
340
|
+
Events.subscribe(nil, 'job_run.execute', position: 0) do |job_obj, job_run, *args|
|
341
|
+
JobRun.new(job_run).job_start(job_run) if job_run.persist?
|
342
|
+
end
|
343
|
+
Events.subscribe(nil, 'job_run.post-execute') do |job_obj, job_run, ok|
|
344
|
+
JobRun[job_run.job_run_id].job_end(job_run) if job_run.persist?
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
|
350
|
+
|
351
|
+
# Captures the value of all defined command-line arguments to the job
|
352
|
+
class JobRunArg < Sequel::Model(:batch_job_run_arg)
|
353
|
+
|
354
|
+
unrestrict_primary_key
|
355
|
+
|
356
|
+
|
357
|
+
def self.from(job_run)
|
358
|
+
job_run.job_args && job_run.job_args.each_pair do |name, val|
|
359
|
+
v = case val
|
360
|
+
when String, Numeric, TrueClass, FalseClass then val
|
361
|
+
else val.inspect
|
362
|
+
end
|
363
|
+
JobRunArg.new(job_run.job_run_id, name, v).save
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
|
368
|
+
def initialize(job_run, name, val)
|
369
|
+
super(job_run: job_run, job_arg_name: name, job_arg_value: val)
|
370
|
+
end
|
371
|
+
|
372
|
+
|
373
|
+
Events.subscribe(nil, 'job_run.execute') do |job_obj, job_run, *args|
|
374
|
+
JobRunArg.from(job_run) if job_run.persist?
|
375
|
+
end
|
376
|
+
|
377
|
+
end
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
# Captures details of a job run exception
|
382
|
+
class JobRunFailure < Sequel::Model(:batch_job_run_failure)
|
383
|
+
|
384
|
+
many_to_one :job, class: Job, key: :job_id
|
385
|
+
|
386
|
+
|
387
|
+
def initialize(job_run, ex)
|
388
|
+
super(job_run: job_run.job_run_id, job_id: job_run.definition.job_id,
|
389
|
+
job_version: job_run.definition.job_version, job_failed_at: Time.now,
|
390
|
+
exception_message: ex.message[0...500],
|
391
|
+
exception_backtrace: ex.backtrace.join("\n")[0...4000])
|
392
|
+
end
|
393
|
+
|
394
|
+
|
395
|
+
Events.subscribe(nil, 'job_run.failure') do |job_obj, job_run, ex|
|
396
|
+
JobRunFailure.new(job_run, ex).save if job_run.persist?
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|
400
|
+
|
401
|
+
|
402
|
+
|
403
|
+
# Capture details of a task run
|
404
|
+
class TaskRun < Sequel::Model(:batch_task_run)
|
405
|
+
|
406
|
+
many_to_one :task, class: Task, key: :task_id
|
407
|
+
|
408
|
+
|
409
|
+
def initialize(task_run)
|
410
|
+
super(task_id: task_run.task_id, job_run: task_run.job_run.job_run_id,
|
411
|
+
task_instance: task_run.instance, task_start_time: task_run.start_time,
|
412
|
+
task_status: task_run.status.to_s.upcase)
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
def task_start(task_run)
|
417
|
+
self.save
|
418
|
+
task_run.task_run_id = self.task_run
|
419
|
+
end
|
420
|
+
|
421
|
+
|
422
|
+
def task_end(task_run)
|
423
|
+
self.task_end_time = task_run.end_time
|
424
|
+
self.task_status = task_run.status.to_s.upcase
|
425
|
+
self.task_exit_code = task_run.exit_code
|
426
|
+
self.save
|
427
|
+
end
|
428
|
+
|
429
|
+
|
430
|
+
def timeout
|
431
|
+
self.task_end_time = Time.now
|
432
|
+
self.task_status = 'TIMEOUT'
|
433
|
+
self.task_exit_code = -1
|
434
|
+
self.save
|
435
|
+
|
436
|
+
Task[task_id].task_timeout(self)
|
437
|
+
end
|
438
|
+
|
439
|
+
|
440
|
+
|
441
|
+
Events.subscribe(nil, 'task_run.execute', position: 0) do |job_obj, task_run, *args|
|
442
|
+
TaskRun.new(task_run).task_start(task_run) if task_run.persist?
|
443
|
+
end
|
444
|
+
Events.subscribe(nil, 'task_run.post-execute') do |job_obj, task_run, ok|
|
445
|
+
TaskRun[task_run.task_run_id].task_end(task_run) if task_run.persist?
|
446
|
+
end
|
447
|
+
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
|
452
|
+
# Model for a single log message
|
453
|
+
class JobRunLog < Sequel::Model(:batch_job_run_log)
|
454
|
+
|
455
|
+
unrestrict_primary_key
|
456
|
+
|
457
|
+
|
458
|
+
def self.install_log_handler(job_run, logger)
|
459
|
+
case LogManager.log_framework
|
460
|
+
when :java_util_logging
|
461
|
+
require_relative 'java_util_log_handler'
|
462
|
+
handler = JavaUtilLogHandler.new(job_run)
|
463
|
+
logger.addHandler(handler)
|
464
|
+
when :log4r
|
465
|
+
require_relative 'log4r_outputter'
|
466
|
+
outputter = Log4ROutputter.new(job_run)
|
467
|
+
logger.add(outputter)
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
|
472
|
+
Events.subscribe(nil, 'job_run.execute') do |job_obj, job_run, *args|
|
473
|
+
if job_run.persist? && (logger = job_obj.respond_to?(:log) && job_obj.log)
|
474
|
+
JobRunLog.install_log_handler(job_run, logger)
|
475
|
+
end
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
|
480
|
+
|
481
|
+
# Model for a lock
|
482
|
+
class Lock < Sequel::Model(:batch_lock)
|
483
|
+
|
484
|
+
unrestrict_primary_key
|
485
|
+
|
486
|
+
|
487
|
+
def self.lock?(job_run, lock_name, lock_timeout, lock_holder = nil)
|
488
|
+
lock_expires_at = nil
|
489
|
+
self.dataset.db.transaction do
|
490
|
+
lock_rec = self.where(lock_name: lock_name).first
|
491
|
+
if lock_rec
|
492
|
+
if lock_rec.lock_expires_at < Time.now
|
493
|
+
self.where(lock_name: lock_name).delete
|
494
|
+
lock_rec = nil
|
495
|
+
else
|
496
|
+
lock_job = JobRun.join(Job, :job_id => :job_id).where(job_run: lock_rec.job_run).first
|
497
|
+
if lock_holder
|
498
|
+
lock_holder[:lock_expires_at] = lock_rec.lock_expires_at.getlocal
|
499
|
+
lock_holder[:lock_holder] = "job '#{lock_job[:job_name]}' (job run #{lock_rec.job_run})"
|
500
|
+
end
|
501
|
+
end
|
502
|
+
end
|
503
|
+
if lock_rec.nil?
|
504
|
+
lock_expires_at = Time.now + lock_timeout
|
505
|
+
if job_run.persist?
|
506
|
+
self.new(lock_name: lock_name, job_run: job_run.job_run_id,
|
507
|
+
lock_created_at: Time.now,
|
508
|
+
lock_expires_at: lock_expires_at).save
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
lock_expires_at
|
513
|
+
end
|
514
|
+
|
515
|
+
|
516
|
+
def self.unlock?(job_run, lock_name)
|
517
|
+
unlocked = false
|
518
|
+
if job_run.persist?
|
519
|
+
self.where(lock_name: lock_name,
|
520
|
+
job_run: job_run.job_run_id).delete
|
521
|
+
unlocked = true
|
522
|
+
end
|
523
|
+
unlocked
|
524
|
+
end
|
525
|
+
|
526
|
+
|
527
|
+
Events.subscribe(Runnable, 'lock?') do |job_run, lock_name, lock_timeout, lock_holder|
|
528
|
+
Lock.lock?(job_run, lock_name, lock_timeout, lock_holder)
|
529
|
+
end
|
530
|
+
Events.subscribe(Runnable, 'unlock?') do |job_run, lock_name|
|
531
|
+
Lock.unlock?(job_run, lock_name)
|
532
|
+
end
|
533
|
+
|
534
|
+
end
|
535
|
+
|
536
|
+
|
537
|
+
|
538
|
+
class Request < Sequel::Model(:batch_request)
|
539
|
+
end
|
540
|
+
|
541
|
+
|
542
|
+
class Requestor < Sequel::Model(:batch_requestor)
|
543
|
+
end
|
544
|
+
|
545
|
+
|
546
|
+
end
|
547
|
+
|
548
|
+
end
|