batch-kit 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +22 -0
- data/README.md +165 -0
- data/lib/batch-kit.rb +9 -0
- data/lib/batch-kit/arguments.rb +57 -0
- data/lib/batch-kit/config.rb +517 -0
- data/lib/batch-kit/configurable.rb +68 -0
- data/lib/batch-kit/core_ext/enumerable.rb +97 -0
- data/lib/batch-kit/core_ext/file.rb +69 -0
- data/lib/batch-kit/core_ext/file_utils.rb +103 -0
- data/lib/batch-kit/core_ext/hash.rb +17 -0
- data/lib/batch-kit/core_ext/numeric.rb +17 -0
- data/lib/batch-kit/core_ext/string.rb +88 -0
- data/lib/batch-kit/database.rb +133 -0
- data/lib/batch-kit/database/java_util_log_handler.rb +65 -0
- data/lib/batch-kit/database/log4r_outputter.rb +57 -0
- data/lib/batch-kit/database/models.rb +548 -0
- data/lib/batch-kit/database/schema.rb +229 -0
- data/lib/batch-kit/encryption.rb +7 -0
- data/lib/batch-kit/encryption/java_encryption.rb +178 -0
- data/lib/batch-kit/encryption/ruby_encryption.rb +175 -0
- data/lib/batch-kit/events.rb +157 -0
- data/lib/batch-kit/framework/acts_as_job.rb +197 -0
- data/lib/batch-kit/framework/acts_as_sequence.rb +123 -0
- data/lib/batch-kit/framework/definable.rb +169 -0
- data/lib/batch-kit/framework/job.rb +121 -0
- data/lib/batch-kit/framework/job_definition.rb +105 -0
- data/lib/batch-kit/framework/job_run.rb +145 -0
- data/lib/batch-kit/framework/runnable.rb +235 -0
- data/lib/batch-kit/framework/sequence.rb +87 -0
- data/lib/batch-kit/framework/sequence_definition.rb +38 -0
- data/lib/batch-kit/framework/sequence_run.rb +48 -0
- data/lib/batch-kit/framework/task_definition.rb +89 -0
- data/lib/batch-kit/framework/task_run.rb +53 -0
- data/lib/batch-kit/helpers/date_time.rb +54 -0
- data/lib/batch-kit/helpers/email.rb +198 -0
- data/lib/batch-kit/helpers/html.rb +175 -0
- data/lib/batch-kit/helpers/process.rb +101 -0
- data/lib/batch-kit/helpers/zip.rb +30 -0
- data/lib/batch-kit/job.rb +11 -0
- data/lib/batch-kit/lockable.rb +138 -0
- data/lib/batch-kit/loggable.rb +78 -0
- data/lib/batch-kit/logging.rb +169 -0
- data/lib/batch-kit/logging/java_util_logger.rb +87 -0
- data/lib/batch-kit/logging/log4r_logger.rb +71 -0
- data/lib/batch-kit/logging/null_logger.rb +35 -0
- data/lib/batch-kit/logging/stdout_logger.rb +96 -0
- data/lib/batch-kit/resources.rb +191 -0
- data/lib/batch-kit/sequence.rb +7 -0
- metadata +122 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
require_relative 'events'
|
2
|
+
require_relative 'database/schema'
|
3
|
+
|
4
|
+
|
5
|
+
class BatchKit
|
6
|
+
|
7
|
+
# Implements functionality for persisting details of jobs run in a relational
|
8
|
+
# database, via the Sequel database library.
|
9
|
+
class Database
|
10
|
+
|
11
|
+
|
12
|
+
# Instantiate a database back-end for persisting job and task runs.
|
13
|
+
#
|
14
|
+
# @param options [Hash] An options hash, passed on to the
|
15
|
+
# {BatchKit::Database::Schema#initialize Schema} instance.
|
16
|
+
def initialize(options = {})
|
17
|
+
@options = options
|
18
|
+
@schema = Schema.new(options)
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Log database messages under the batch.database namespace.
|
23
|
+
def log
|
24
|
+
@log ||= BatchKit::LogManager.logger('batch.database')
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Connect to a back-end database for persistence.
|
29
|
+
#
|
30
|
+
# @param args [Array<String>] Connection details to be passed to
|
31
|
+
# the {BatchKit::Database::Schema#connect} method.
|
32
|
+
def connect(*args)
|
33
|
+
@schema.connect(*args)
|
34
|
+
|
35
|
+
# We can only include the models once we have connected
|
36
|
+
require_relative 'database/models'
|
37
|
+
|
38
|
+
# Check if the database schema is up-to-date
|
39
|
+
MD5.check_schema(@schema)
|
40
|
+
|
41
|
+
# Perform housekeeping tasks
|
42
|
+
perform_housekeeping
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# Purges detail records that are older than the retention threshhold.
|
47
|
+
def perform_housekeeping
|
48
|
+
# Only do housekeeping once per day
|
49
|
+
return if JobRun.where{job_start_time > Date.today}.count > 0
|
50
|
+
|
51
|
+
log.info "Performing batch database housekeeping"
|
52
|
+
|
53
|
+
# Abort jobs in Executing state that have not logged for 6+ hours
|
54
|
+
@schema.connection.transaction do
|
55
|
+
cutoff = Time.now - 6 * 60 * 60
|
56
|
+
exec_jobs = JobRun.where(job_status: 'EXECUTING').map(:job_run)
|
57
|
+
curr_jobs = JobRunLog.select_group(:job_run).
|
58
|
+
where(job_run: exec_jobs).having{max(log_time) > cutoff}.map(:job_run)
|
59
|
+
abort_jobs = JobRun.where(job_run: exec_jobs - curr_jobs).all
|
60
|
+
if abort_jobs.count > 0
|
61
|
+
log.detail "Cleaning up #{abort_jobs.count} zombie jobs"
|
62
|
+
abort_tasks = TaskRun.where(job_run: abort_jobs.map(&:id), task_status: 'EXECUTING')
|
63
|
+
abort_tasks.each(&:timeout)
|
64
|
+
abort_jobs.each(&:timeout)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Purge locks that expired 6+ hours ago
|
69
|
+
@schema.connection.transaction do
|
70
|
+
purge_date = Time.now - 6 * 60 * 60
|
71
|
+
Lock.where{lock_expires_at < purge_date}.delete
|
72
|
+
end
|
73
|
+
|
74
|
+
# Purge log records for old job runs
|
75
|
+
@schema.connection.transaction do
|
76
|
+
purge_date = Date.today - @options.fetch(:log_retention_days, 60)
|
77
|
+
purge_job_runs = JobRun.where(job_purged_flag: false).
|
78
|
+
where{job_start_time < purge_date}.map(:job_run)
|
79
|
+
if purge_job_runs.count > 0
|
80
|
+
log.detail "Purging log records for #{purge_job_runs.count} job runs"
|
81
|
+
purge_job_runs.each_slice(1000).each do |purge_ids|
|
82
|
+
JobRunLog.where(job_run: purge_ids).delete
|
83
|
+
JobRun.where(job_run: purge_ids).update(job_purged_flag: true)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Purge old task and job runs
|
89
|
+
@schema.connection.transaction do
|
90
|
+
purge_date = Date.today - @options.fetch(:job_run_retention_days, 365)
|
91
|
+
purge_job_runs = JobRun.where{job_start_time < purge_date}.map(:job_run)
|
92
|
+
if purge_job_runs.count > 0
|
93
|
+
log.detail "Purging job and task run records for #{purge_job_runs.count} job runs"
|
94
|
+
purge_job_runs.each_slice(1000).each do |purge_ids|
|
95
|
+
JobRunArg.where(job_run: purge_ids).delete
|
96
|
+
TaskRun.where(job_run: purge_ids).delete
|
97
|
+
JobRun.where(job_run: purge_ids).delete
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Purge old request runs
|
103
|
+
@schema.connection.transaction do
|
104
|
+
purge_date = Date.today - @options.fetch(:request_retention_days, 90)
|
105
|
+
purge_requests = Request.where{request_launched_at < purge_date}.map(:request_id)
|
106
|
+
if purge_requests.count > 0
|
107
|
+
log.detail "Purging request records for #{purge_requests.count} requests"
|
108
|
+
purge_requests.each_slice(1000).each do |purge_ids|
|
109
|
+
Request.where(request_id: purge_ids).delete
|
110
|
+
Requestor.where(request_id: purge_ids).delete
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Purge jobs with no runs
|
116
|
+
@schema.connection.transaction do
|
117
|
+
purge_jobs = Job.left_join(:batch_job_run, :job_id => :job_id).
|
118
|
+
where(Sequel.qualify(:batch_job_run, :job_id) => nil).
|
119
|
+
select(Sequel.qualify(:batch_job, :job_id)).map(:job_id)
|
120
|
+
if purge_jobs.count > 0
|
121
|
+
log.detail "Purging #{purge_jobs.count} old jobs"
|
122
|
+
purge_jobs.each_slice(1000).each do |purge_ids|
|
123
|
+
JobRunFailure.where(job_id: purge_ids).delete
|
124
|
+
Task.where(job_id: purge_ids).delete
|
125
|
+
Job.where(job_id: purge_ids).delete
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
class BatchKit
|
2
|
+
|
3
|
+
class Database
|
4
|
+
|
5
|
+
|
6
|
+
class JavaUtilLogHandler < Java::JavaUtilLogging::Handler
|
7
|
+
|
8
|
+
# Create a new java.util.logging handler for recording log records
|
9
|
+
# to the database.
|
10
|
+
#
|
11
|
+
# @param job_run [JobRun] A JobRun object representing the job run
|
12
|
+
# that is to be logged.
|
13
|
+
# @param opts [Hash] An options hash.
|
14
|
+
# @option opts [Fixnum] :max_lines The maximium number of lines to
|
15
|
+
# log to the database. Default is 10,000.
|
16
|
+
# @option opts [Fixnum] :max_errors The maximum number of errors to
|
17
|
+
# ignore before disabling further attempts to store log messages.
|
18
|
+
def initialize(job_run, opts = {})
|
19
|
+
super()
|
20
|
+
@job_run_id = job_run.job_run_id
|
21
|
+
@log_line = 0
|
22
|
+
@errors = 0
|
23
|
+
@max_lines = opts.fetch(:max_lines, 10_000)
|
24
|
+
@max_errors = opts.fetch(:max_errors, 3)
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def close
|
29
|
+
@job_run_id = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def flush
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def publish(event)
|
38
|
+
if @job_run_id && @errors < @max_errors &&
|
39
|
+
event.level.intValue >= Java::JavaUtilLogging::Level::FINE.intValue
|
40
|
+
if @log_line < @max_lines || event.level >= Java::JavaUtilLogging::Level::WARNING
|
41
|
+
msg = event.getMessage[0...1000].strip
|
42
|
+
return unless msg.length > 0
|
43
|
+
@log_line += 1
|
44
|
+
log_name = (event.getLoggerName[-40..-1] || event.getLoggerName)
|
45
|
+
level = event.level
|
46
|
+
begin
|
47
|
+
JobRunLog.new(job_run: @job_run_id, log_line: @log_line,
|
48
|
+
thread_id: event.getThreadID,
|
49
|
+
log_time: Time.at(event.getMillis / 1000.0), log_name: log_name,
|
50
|
+
log_level: level, log_message: msg).save
|
51
|
+
rescue
|
52
|
+
# Disable logging if an exception occurs
|
53
|
+
@errors += 1
|
54
|
+
raise
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class BatchKit
|
2
|
+
|
3
|
+
class Database
|
4
|
+
|
5
|
+
# Outputs Log4r log events to the BATCH_JOB_RUN_LOG table.
|
6
|
+
class Log4ROutputter < Log4r::Outputter
|
7
|
+
|
8
|
+
|
9
|
+
# Create a new database outputter for a single job run
|
10
|
+
#
|
11
|
+
# @param job_run [JobRun] A JobRun object representing the job run
|
12
|
+
# that is to be logged.
|
13
|
+
# @param opts [Hash] An options hash.
|
14
|
+
# @option opts [Fixnum] :max_lines The maximium number of lines to
|
15
|
+
# log to the database. Default is 10,000.
|
16
|
+
# @option opts [Fixnum] :max_errors The maximum number of errors to
|
17
|
+
# ignore before disabling further attempts to store log messages.
|
18
|
+
def initialize(job_run, opts = {})
|
19
|
+
super('db_output')
|
20
|
+
@job_run_id = job_run.job_run_id
|
21
|
+
@log_line = 0
|
22
|
+
@errors = 0
|
23
|
+
@max_lines = opts.fetch(:max_lines, 10_000)
|
24
|
+
@max_errors = opts.fetch(:max_errors, 3)
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Formats a log event, and writes it to the BATCH_JOB_RUN_LOG table
|
29
|
+
def format(event)
|
30
|
+
if @errors < @max_errors && event.level >= Log4r::DETAIL
|
31
|
+
if @log_line < @max_lines || event.level >= Log4r::WARN
|
32
|
+
msg = event.data.to_s[0...1000].strip
|
33
|
+
return unless msg.length > 0
|
34
|
+
@log_line += 1
|
35
|
+
log_name = (event.fullname[-40..-1] || event.fullname).gsub('::', '.')
|
36
|
+
thread_id = Log4r::MDC.get(:thread_id)
|
37
|
+
level = Log4r::LNAMES[event.level]
|
38
|
+
begin
|
39
|
+
JobRunLog.new(job_run: @job_run_id, log_line: @log_line,
|
40
|
+
log_time: Time.now, log_name: log_name,
|
41
|
+
log_level: level, thread_id: thread_id && thread_id[0..8],
|
42
|
+
log_message: msg).save
|
43
|
+
rescue
|
44
|
+
# Disable logging if an exception occurs
|
45
|
+
@errors += 1
|
46
|
+
raise
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
@@ -0,0 +1,548 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
|
4
|
+
class BatchKit
|
5
|
+
|
6
|
+
class Database
|
7
|
+
|
8
|
+
Sequel::Model.plugin :dirty
|
9
|
+
|
10
|
+
# Records an MD5 hash of String objects, which are used to detect when
|
11
|
+
# items such as jobs have changed. This in turn is used to increment a
|
12
|
+
# version number on objects.
|
13
|
+
class MD5 < Sequel::Model(:batch_md5)
|
14
|
+
|
15
|
+
|
16
|
+
# Locate the MD5 record for the object named +obj_name+ whose type
|
17
|
+
# is +obj_type+.
|
18
|
+
def self.for(obj_name, obj_type, digest)
|
19
|
+
self.where(Sequel.function(:upper, :object_name) => obj_name.upcase,
|
20
|
+
Sequel.function(:upper, :object_type) => obj_type.upcase,
|
21
|
+
:md5_digest => digest).first
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
# Checks that the BatchKit database tables have been deployed and match
|
26
|
+
# the table definitions in schema.rb.
|
27
|
+
def self.check_schema(schema)
|
28
|
+
schema_file = IO.read("#{File.dirname(__FILE__)}/schema.rb")
|
29
|
+
ok, md5 = self.check('SCHEMA', 'schema.rb', schema_file)
|
30
|
+
unless ok
|
31
|
+
# TODO: Find a better way to update schema for table changes;
|
32
|
+
# This method throws away all history
|
33
|
+
schema.drop_tables
|
34
|
+
schema.create_tables
|
35
|
+
md5.save
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
# Checks to see if the recorded MD5 digest of +string+ matches the MD5
|
41
|
+
# digest of +string+ as calculated by Digest::MD5.
|
42
|
+
#
|
43
|
+
# @return [Boolean, String] Returns two values in an array: a boolean
|
44
|
+
# indicating whether the digest value is the same, and the actual
|
45
|
+
# calculated value for the MD5 digest of +string+.
|
46
|
+
def self.check(obj_type, obj_name, string)
|
47
|
+
digest = Digest::MD5.hexdigest(string)
|
48
|
+
# Attempt to retrieve the MD5 for the schema; could fail if not deployed
|
49
|
+
md5 = self.for(obj_name, obj_type, digest) rescue nil
|
50
|
+
if md5
|
51
|
+
[md5.md5_id, md5]
|
52
|
+
else
|
53
|
+
[nil, self.new(obj_type, obj_name, string, digest)]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
# Create a new MD5 hash of an object
|
59
|
+
def initialize(obj_type, obj_name, string, digest = nil)
|
60
|
+
obj_ver = self.class.where(Sequel.function(:upper, :object_name) => obj_name.upcase,
|
61
|
+
Sequel.function(:upper, :object_type) => obj_type.upcase).
|
62
|
+
max(:object_version) || 0
|
63
|
+
super(object_type: obj_type, object_name: obj_name,
|
64
|
+
object_version: obj_ver + 1,
|
65
|
+
md5_digest: digest || Digest::MD5.hexdigest(string),
|
66
|
+
md5_created_at: model.dataset.current_datetime)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
# Records details of job definitions
|
74
|
+
class Job < Sequel::Model(:batch_job)
|
75
|
+
|
76
|
+
many_to_one :md5, class: MD5, key: :job_file_md5_id
|
77
|
+
|
78
|
+
plugin :timestamps, create: :job_created_at, update: :job_modified_at,
|
79
|
+
update_on_create: true
|
80
|
+
|
81
|
+
|
82
|
+
# Ensures that the job described by +job_def+ has been registered in
|
83
|
+
# the batch database.
|
84
|
+
def self.register(job_def)
|
85
|
+
job = self.where(job_class: job_def.job_class.name,
|
86
|
+
job_host: job_def.computer).first
|
87
|
+
job_file = IO.read(job_def.file)
|
88
|
+
ok, md5 = MD5.check('JOB', "//#{job_def.computer}/#{job_def.file}", job_file)
|
89
|
+
md5.save unless ok
|
90
|
+
if job
|
91
|
+
# Existing job
|
92
|
+
unless ok == job.job_file_md5_id
|
93
|
+
job.update(job_name: job_def.name, job_method: job_def.method_name,
|
94
|
+
job_desc: job_def.description, job_file: job_def.file,
|
95
|
+
job_version: md5.object_version, md5: md5)
|
96
|
+
end
|
97
|
+
else
|
98
|
+
# New job
|
99
|
+
job = self.new(job_def, md5).save
|
100
|
+
end
|
101
|
+
job_def.job_id = job.job_id
|
102
|
+
job_def.job_version = job.job_version
|
103
|
+
job
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def log
|
108
|
+
@log ||= LogManager.logger('batch-kit.job')
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def initialize(job_def, md5)
|
113
|
+
log.detail "Registering job '#{job_def.name}' on #{job_def.computer} in batch database"
|
114
|
+
super(job_name: job_def.name, job_class: job_def.job_class.name,
|
115
|
+
job_method: job_def.method_name, job_desc: job_def.description,
|
116
|
+
job_host: job_def.computer, job_file: job_def.file,
|
117
|
+
job_version: md5.object_version, md5: md5,
|
118
|
+
job_run_count: 0, job_success_count: 0, job_fail_count: 0,
|
119
|
+
job_abort_count: 0, job_min_success_duration_ms: 0,
|
120
|
+
job_max_success_duration_ms: 0, job_mean_success_duration_ms: 0,
|
121
|
+
job_m2_success_duration_ms: 0)
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
# Record the start of a job run
|
126
|
+
#
|
127
|
+
# @param job_run [JobRun] The JobRun instance that has commenced.
|
128
|
+
def job_start(job_run)
|
129
|
+
self.job_last_run_at = job_run.start_time
|
130
|
+
self.job_run_count += 1
|
131
|
+
self.save
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Record the successful completion of the JobRun.
|
136
|
+
#
|
137
|
+
# @param job_run [JobRun] The JobRun instance that has completed.
|
138
|
+
def job_success(job_run)
|
139
|
+
self.job_success_count += 1
|
140
|
+
n = self.job_success_count
|
141
|
+
ms = job_run.elapsed * 1000
|
142
|
+
delta = ms - self.job_mean_success_duration_ms
|
143
|
+
self.job_min_success_duration_ms = self.job_min_success_duration_ms == 0 ?
|
144
|
+
ms : [self.job_min_success_duration_ms, ms].min
|
145
|
+
self.job_max_success_duration_ms = self.job_max_success_duration_ms == 0 ?
|
146
|
+
ms : [self.job_max_success_duration_ms, ms].max
|
147
|
+
mean = self.job_mean_success_duration_ms += delta / n
|
148
|
+
self.job_m2_success_duration_ms += delta * (ms - mean)
|
149
|
+
self.save
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
# Record the failure of a JobRun.
|
154
|
+
#
|
155
|
+
# @param job_run [JobRun] The JobRun instance that has failed.
|
156
|
+
def job_failure(job_run)
|
157
|
+
self.job_fail_count += 1
|
158
|
+
self.save
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
# Record that a JobRun has been aborted.
|
163
|
+
#
|
164
|
+
# @param job_run [JobRun] The JobRun instance that has aborted.
|
165
|
+
def job_abort(job_run)
|
166
|
+
self.job_abort_count += 1
|
167
|
+
self.save
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# Record that a JobRun has timed out. This happens when the database
|
172
|
+
# finds an instance in the table that has been running for a long
|
173
|
+
# period without any activity.
|
174
|
+
#
|
175
|
+
# @param job_run [JobRun] The JobRun instance that has aborted.
|
176
|
+
def job_timeout(job_run)
|
177
|
+
self.job_abort_count += 1
|
178
|
+
self.save
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
Events.subscribe(nil, 'job_run.pre-execute') do |job_obj, job_run, *args|
|
183
|
+
Job.register(job_run.definition) if job_run.persist?
|
184
|
+
true
|
185
|
+
end
|
186
|
+
Events.subscribe(nil, 'job_run.execute') do |job_obj, job_run, *args|
|
187
|
+
Job[job_run.job_id].job_start(job_run) if job_run.persist?
|
188
|
+
end
|
189
|
+
Events.subscribe(nil, 'job_run.success') do |job_obj, job_run, result|
|
190
|
+
Job[job_run.job_id].job_success(job_run) if job_run.persist?
|
191
|
+
end
|
192
|
+
Events.subscribe(nil, 'job_run.failure') do |job_obj, job_run, ex|
|
193
|
+
Job[job_run.job_id].job_failure(job_run) if job_run.persist?
|
194
|
+
end
|
195
|
+
Events.subscribe(nil, 'job_run.abort') do |job_obj, job_run|
|
196
|
+
Job[job_run.job_id].job_abort(job_run) if job_run.persist?
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
# Records details of Task definitions
|
204
|
+
class Task < Sequel::Model(:batch_task)
|
205
|
+
|
206
|
+
many_to_one :job, class: Job, key: :job_id
|
207
|
+
|
208
|
+
plugin :timestamps, create: :task_created_at, update: :task_modified_at,
|
209
|
+
update_on_create: true
|
210
|
+
|
211
|
+
|
212
|
+
def self.register(job_def)
|
213
|
+
Task.where(job_id: job_def.job_id).update(task_current_flag: false)
|
214
|
+
job_def.tasks.each do |task_key, task_def|
|
215
|
+
task = self.where(job_id: job_def.job_id,
|
216
|
+
task_method: task_def.method_name.to_s).first
|
217
|
+
if task
|
218
|
+
task.update(task_name: task_def.name, task_class: task_def.task_class.name,
|
219
|
+
task_desc: task_def.description, task_current_flag: 'Y')
|
220
|
+
else
|
221
|
+
task = Task.new(task_def).save
|
222
|
+
end
|
223
|
+
task_def.task_id = task.task_id
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
def initialize(task_def)
|
229
|
+
super(job_id: task_def.job.job_id, job_version: task_def.job.job_version,
|
230
|
+
task_name: task_def.name, task_class: task_def.task_class.name,
|
231
|
+
task_method: task_def.method_name.to_s, task_desc: task_def.description,
|
232
|
+
task_run_count: 0, task_success_count: 0, task_fail_count: 0,
|
233
|
+
task_abort_count: 0, task_min_success_duration_ms: 0,
|
234
|
+
task_max_success_duration_ms: 0, task_mean_success_duration_ms: 0,
|
235
|
+
task_m2_success_duration_ms: 0)
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
def task_start(task_run)
|
240
|
+
self.task_last_run_at = task_run.start_time
|
241
|
+
self.task_run_count += 1
|
242
|
+
self.save
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
def task_success(task_run)
|
247
|
+
self.task_success_count += 1
|
248
|
+
n = self.task_success_count
|
249
|
+
ms = task_run.elapsed * 1000
|
250
|
+
delta = ms - self.task_mean_success_duration_ms
|
251
|
+
self.task_min_success_duration_ms = self.task_min_success_duration_ms == 0 ?
|
252
|
+
ms : [self.task_min_success_duration_ms, ms].min
|
253
|
+
self.task_max_success_duration_ms = self.task_max_success_duration_ms == 0 ?
|
254
|
+
ms : [self.task_max_success_duration_ms, ms].max
|
255
|
+
mean = self.task_mean_success_duration_ms += delta / n
|
256
|
+
self.task_m2_success_duration_ms += delta * (ms - mean)
|
257
|
+
self.save
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
def task_failure(task_run)
|
262
|
+
self.task_fail_count += 1
|
263
|
+
self.save
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def task_abort(task_run)
|
268
|
+
self.task_abort_count += 1
|
269
|
+
self.save
|
270
|
+
end
|
271
|
+
|
272
|
+
|
273
|
+
def task_timeout(task_run)
|
274
|
+
self.task_abort_count += 1
|
275
|
+
self.save
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
Events.subscribe(nil, 'job_run.pre-execute') do |job_obj, job_run, *args|
|
280
|
+
Task.register(job_run.definition) if job_run.persist?
|
281
|
+
end
|
282
|
+
|
283
|
+
Events.subscribe(nil, 'task_run.execute') do |job_obj, task_run, *args|
|
284
|
+
Task[task_run.task_id].task_start(task_run) if task_run.persist?
|
285
|
+
end
|
286
|
+
Events.subscribe(nil, 'task_run.success') do |job_obj, task_run, result|
|
287
|
+
Task[task_run.task_id].task_success(task_run) if task_run.persist?
|
288
|
+
end
|
289
|
+
Events.subscribe(nil, 'task_run.failure') do |job_obj, task_run, ex|
|
290
|
+
Task[task_run.task_id].task_failure(task_run) if task_run.persist?
|
291
|
+
end
|
292
|
+
Events.subscribe(nil, 'task_run.abort') do |job_obj, task_run|
|
293
|
+
Task[task_run.task_id].task_abort(task_run) if task_run.persist?
|
294
|
+
end
|
295
|
+
|
296
|
+
end
|
297
|
+
|
298
|
+
|
299
|
+
|
300
|
+
# Records details of job runs
|
301
|
+
class JobRun < Sequel::Model(:batch_job_run)
|
302
|
+
|
303
|
+
many_to_one :job, class: Job, key: :job_id
|
304
|
+
|
305
|
+
|
306
|
+
def initialize(job_run)
|
307
|
+
super(job_id: job_run.job_id, job_instance: job_run.instance,
|
308
|
+
job_version: job_run.job_version, job_run_by: job_run.run_by,
|
309
|
+
job_cmd_line: job_run.cmd_line, job_start_time: job_run.start_time,
|
310
|
+
job_status: job_run.status.to_s.upcase, job_pid: job_run.pid)
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def job_start(job_run)
|
315
|
+
self.save
|
316
|
+
job_run.job_run_id = self.job_run
|
317
|
+
end
|
318
|
+
|
319
|
+
|
320
|
+
def job_end(job_run)
|
321
|
+
self.job_end_time = job_run.end_time
|
322
|
+
self.job_status = job_run.status.to_s.upcase
|
323
|
+
self.job_pid = nil
|
324
|
+
self.job_exit_code = job_run.exit_code
|
325
|
+
self.save
|
326
|
+
end
|
327
|
+
|
328
|
+
|
329
|
+
def timeout
|
330
|
+
self.job_end_time = Time.now
|
331
|
+
self.job_status = 'TIMEOUT'
|
332
|
+
self.job_pid = nil
|
333
|
+
self.job_exit_code = -1
|
334
|
+
self.save
|
335
|
+
|
336
|
+
Job[self.job_id].job_timeout(self)
|
337
|
+
end
|
338
|
+
|
339
|
+
|
340
|
+
Events.subscribe(nil, 'job_run.execute', position: 0) do |job_obj, job_run, *args|
|
341
|
+
JobRun.new(job_run).job_start(job_run) if job_run.persist?
|
342
|
+
end
|
343
|
+
Events.subscribe(nil, 'job_run.post-execute') do |job_obj, job_run, ok|
|
344
|
+
JobRun[job_run.job_run_id].job_end(job_run) if job_run.persist?
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
|
350
|
+
|
351
|
+
# Captures the value of all defined command-line arguments to the job
|
352
|
+
class JobRunArg < Sequel::Model(:batch_job_run_arg)
|
353
|
+
|
354
|
+
unrestrict_primary_key
|
355
|
+
|
356
|
+
|
357
|
+
def self.from(job_run)
|
358
|
+
job_run.job_args && job_run.job_args.each_pair do |name, val|
|
359
|
+
v = case val
|
360
|
+
when String, Numeric, TrueClass, FalseClass then val
|
361
|
+
else val.inspect
|
362
|
+
end
|
363
|
+
JobRunArg.new(job_run.job_run_id, name, v).save
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
|
368
|
+
def initialize(job_run, name, val)
|
369
|
+
super(job_run: job_run, job_arg_name: name, job_arg_value: val)
|
370
|
+
end
|
371
|
+
|
372
|
+
|
373
|
+
Events.subscribe(nil, 'job_run.execute') do |job_obj, job_run, *args|
|
374
|
+
JobRunArg.from(job_run) if job_run.persist?
|
375
|
+
end
|
376
|
+
|
377
|
+
end
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
# Captures details of a job run exception
|
382
|
+
class JobRunFailure < Sequel::Model(:batch_job_run_failure)
|
383
|
+
|
384
|
+
many_to_one :job, class: Job, key: :job_id
|
385
|
+
|
386
|
+
|
387
|
+
def initialize(job_run, ex)
|
388
|
+
super(job_run: job_run.job_run_id, job_id: job_run.definition.job_id,
|
389
|
+
job_version: job_run.definition.job_version, job_failed_at: Time.now,
|
390
|
+
exception_message: ex.message[0...500],
|
391
|
+
exception_backtrace: ex.backtrace.join("\n")[0...4000])
|
392
|
+
end
|
393
|
+
|
394
|
+
|
395
|
+
Events.subscribe(nil, 'job_run.failure') do |job_obj, job_run, ex|
|
396
|
+
JobRunFailure.new(job_run, ex).save if job_run.persist?
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|
400
|
+
|
401
|
+
|
402
|
+
|
403
|
+
# Capture details of a task run
|
404
|
+
class TaskRun < Sequel::Model(:batch_task_run)
|
405
|
+
|
406
|
+
many_to_one :task, class: Task, key: :task_id
|
407
|
+
|
408
|
+
|
409
|
+
def initialize(task_run)
|
410
|
+
super(task_id: task_run.task_id, job_run: task_run.job_run.job_run_id,
|
411
|
+
task_instance: task_run.instance, task_start_time: task_run.start_time,
|
412
|
+
task_status: task_run.status.to_s.upcase)
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
def task_start(task_run)
|
417
|
+
self.save
|
418
|
+
task_run.task_run_id = self.task_run
|
419
|
+
end
|
420
|
+
|
421
|
+
|
422
|
+
def task_end(task_run)
|
423
|
+
self.task_end_time = task_run.end_time
|
424
|
+
self.task_status = task_run.status.to_s.upcase
|
425
|
+
self.task_exit_code = task_run.exit_code
|
426
|
+
self.save
|
427
|
+
end
|
428
|
+
|
429
|
+
|
430
|
+
def timeout
|
431
|
+
self.task_end_time = Time.now
|
432
|
+
self.task_status = 'TIMEOUT'
|
433
|
+
self.task_exit_code = -1
|
434
|
+
self.save
|
435
|
+
|
436
|
+
Task[task_id].task_timeout(self)
|
437
|
+
end
|
438
|
+
|
439
|
+
|
440
|
+
|
441
|
+
Events.subscribe(nil, 'task_run.execute', position: 0) do |job_obj, task_run, *args|
|
442
|
+
TaskRun.new(task_run).task_start(task_run) if task_run.persist?
|
443
|
+
end
|
444
|
+
Events.subscribe(nil, 'task_run.post-execute') do |job_obj, task_run, ok|
|
445
|
+
TaskRun[task_run.task_run_id].task_end(task_run) if task_run.persist?
|
446
|
+
end
|
447
|
+
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
|
452
|
+
# Model for a single log message
|
453
|
+
class JobRunLog < Sequel::Model(:batch_job_run_log)
|
454
|
+
|
455
|
+
unrestrict_primary_key
|
456
|
+
|
457
|
+
|
458
|
+
def self.install_log_handler(job_run, logger)
|
459
|
+
case LogManager.log_framework
|
460
|
+
when :java_util_logging
|
461
|
+
require_relative 'java_util_log_handler'
|
462
|
+
handler = JavaUtilLogHandler.new(job_run)
|
463
|
+
logger.addHandler(handler)
|
464
|
+
when :log4r
|
465
|
+
require_relative 'log4r_outputter'
|
466
|
+
outputter = Log4ROutputter.new(job_run)
|
467
|
+
logger.add(outputter)
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
|
472
|
+
Events.subscribe(nil, 'job_run.execute') do |job_obj, job_run, *args|
|
473
|
+
if job_run.persist? && (logger = job_obj.respond_to?(:log) && job_obj.log)
|
474
|
+
JobRunLog.install_log_handler(job_run, logger)
|
475
|
+
end
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
|
480
|
+
|
481
|
+
# Model for a lock
|
482
|
+
class Lock < Sequel::Model(:batch_lock)
|
483
|
+
|
484
|
+
unrestrict_primary_key
|
485
|
+
|
486
|
+
|
487
|
+
def self.lock?(job_run, lock_name, lock_timeout, lock_holder = nil)
|
488
|
+
lock_expires_at = nil
|
489
|
+
self.dataset.db.transaction do
|
490
|
+
lock_rec = self.where(lock_name: lock_name).first
|
491
|
+
if lock_rec
|
492
|
+
if lock_rec.lock_expires_at < Time.now
|
493
|
+
self.where(lock_name: lock_name).delete
|
494
|
+
lock_rec = nil
|
495
|
+
else
|
496
|
+
lock_job = JobRun.join(Job, :job_id => :job_id).where(job_run: lock_rec.job_run).first
|
497
|
+
if lock_holder
|
498
|
+
lock_holder[:lock_expires_at] = lock_rec.lock_expires_at.getlocal
|
499
|
+
lock_holder[:lock_holder] = "job '#{lock_job[:job_name]}' (job run #{lock_rec.job_run})"
|
500
|
+
end
|
501
|
+
end
|
502
|
+
end
|
503
|
+
if lock_rec.nil?
|
504
|
+
lock_expires_at = Time.now + lock_timeout
|
505
|
+
if job_run.persist?
|
506
|
+
self.new(lock_name: lock_name, job_run: job_run.job_run_id,
|
507
|
+
lock_created_at: Time.now,
|
508
|
+
lock_expires_at: lock_expires_at).save
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
lock_expires_at
|
513
|
+
end
|
514
|
+
|
515
|
+
|
516
|
+
def self.unlock?(job_run, lock_name)
|
517
|
+
unlocked = false
|
518
|
+
if job_run.persist?
|
519
|
+
self.where(lock_name: lock_name,
|
520
|
+
job_run: job_run.job_run_id).delete
|
521
|
+
unlocked = true
|
522
|
+
end
|
523
|
+
unlocked
|
524
|
+
end
|
525
|
+
|
526
|
+
|
527
|
+
Events.subscribe(Runnable, 'lock?') do |job_run, lock_name, lock_timeout, lock_holder|
|
528
|
+
Lock.lock?(job_run, lock_name, lock_timeout, lock_holder)
|
529
|
+
end
|
530
|
+
Events.subscribe(Runnable, 'unlock?') do |job_run, lock_name|
|
531
|
+
Lock.unlock?(job_run, lock_name)
|
532
|
+
end
|
533
|
+
|
534
|
+
end
|
535
|
+
|
536
|
+
|
537
|
+
|
538
|
+
class Request < Sequel::Model(:batch_request)
|
539
|
+
end
|
540
|
+
|
541
|
+
|
542
|
+
class Requestor < Sequel::Model(:batch_requestor)
|
543
|
+
end
|
544
|
+
|
545
|
+
|
546
|
+
end
|
547
|
+
|
548
|
+
end
|