ood_core 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/ood_core/batch_connect/template.rb +17 -6
- data/lib/ood_core/batch_connect/templates/vnc.rb +2 -2
- data/lib/ood_core/job/adapters/drmaa.rb +1002 -0
- data/lib/ood_core/job/adapters/helper.rb +18 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +4 -3
- data/lib/ood_core/job/adapters/lsf.rb +4 -2
- data/lib/ood_core/job/adapters/pbspro.rb +19 -8
- data/lib/ood_core/job/adapters/sge/batch.rb +203 -0
- data/lib/ood_core/job/adapters/sge/helper.rb +65 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +116 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +138 -0
- data/lib/ood_core/job/adapters/sge.rb +163 -0
- data/lib/ood_core/job/adapters/slurm.rb +16 -5
- data/lib/ood_core/job/adapters/torque/attributes.rb +109 -0
- data/lib/ood_core/job/adapters/torque/batch.rb +470 -0
- data/lib/ood_core/job/adapters/torque/error.rb +403 -0
- data/lib/ood_core/job/adapters/torque/ffi.rb +430 -0
- data/lib/ood_core/job/adapters/torque.rb +23 -18
- data/lib/ood_core/job/status.rb +3 -13
- data/lib/ood_core/refinements/drmaa_extensions.rb +21 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +3 -3
- metadata +23 -9
@@ -0,0 +1,18 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
module Adapters
|
4
|
+
# @api private
|
5
|
+
class Helper
|
6
|
+
# Get the configured path to a command allowing overrides from bin_overrides
|
7
|
+
# @param cmd [String] the desired command
|
8
|
+
# @param bin_default [String] the default place to find cmd on the file system
|
9
|
+
# @param bin_overrides [Hash<String, String>] commands associated with the full path to their replacement
|
10
|
+
# e.g. {'squeue' => '/usr/local/slurm/bin/squeue'}
|
11
|
+
# @return [String] path to the configured command
|
12
|
+
def self.bin_path(cmd, bin_default, bin_overrides)
|
13
|
+
bin_overrides.fetch(cmd.to_s) { Pathname.new(bin_default.to_s).join(cmd.to_s).to_s }
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -2,20 +2,21 @@
|
|
2
2
|
#
|
3
3
|
# @api private
|
4
4
|
class OodCore::Job::Adapters::Lsf::Batch
|
5
|
-
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster
|
5
|
+
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
|
6
6
|
|
7
7
|
# The root exception class that all LSF-specific exceptions inherit
|
8
8
|
# from
|
9
9
|
class Error < StandardError; end
|
10
10
|
|
11
11
|
# @param bin [#to_s] path to LSF installation binaries
|
12
|
-
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", **_)
|
12
|
+
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
|
13
13
|
@bindir = Pathname.new(bindir.to_s)
|
14
14
|
|
15
15
|
@envdir = Pathname.new(envdir.to_s)
|
16
16
|
@libdir = Pathname.new(libdir.to_s)
|
17
17
|
@serverdir = Pathname.new(serverdir.to_s)
|
18
18
|
@cluster = cluster.to_s
|
19
|
+
@bin_overrides = bin_overrides
|
19
20
|
end
|
20
21
|
|
21
22
|
def default_env
|
@@ -139,7 +140,7 @@ class OodCore::Job::Adapters::Lsf::Batch
|
|
139
140
|
private
|
140
141
|
# Call a forked Lsf command for a given cluster
|
141
142
|
def call(cmd, *args, env: {}, stdin: "")
|
142
|
-
cmd =
|
143
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
143
144
|
args = cluster_args + args
|
144
145
|
env = default_env.merge(env.to_h)
|
145
146
|
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/job/adapters/helper"
|
2
3
|
|
3
4
|
module OodCore
|
4
5
|
module Job
|
@@ -12,6 +13,7 @@ module OodCore
|
|
12
13
|
# @option config [#to_s] :envdir ('') Path to lsf client conf dir
|
13
14
|
# @option config [#to_s] :serverdir ('') Path to lsf client etc dir
|
14
15
|
# @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
15
17
|
def self.build_lsf(config)
|
16
18
|
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
17
19
|
Adapters::Lsf.new(batch: batch)
|
@@ -123,8 +125,8 @@ module OodCore
|
|
123
125
|
elsif owners.count == 0
|
124
126
|
[]
|
125
127
|
else
|
126
|
-
|
127
|
-
|
128
|
+
batch.get_jobs_for_user(owners.first).map { |v| info_for_batch_hash(v) }
|
129
|
+
end
|
128
130
|
rescue Batch::Error => e
|
129
131
|
raise JobAdapterError, e.message
|
130
132
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "time"
|
2
2
|
require "ood_core/refinements/hash_extensions"
|
3
|
+
require "ood_core/job/adapters/helper"
|
3
4
|
|
4
5
|
module OodCore
|
5
6
|
module Job
|
@@ -12,12 +13,14 @@ module OodCore
|
|
12
13
|
# @option config [Object] :exec (nil) Path to PBS Pro executables
|
13
14
|
# @option config [Object] :qstat_factor (nil) Deciding factor on how to
|
14
15
|
# call qstat for a user
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
|
15
17
|
def self.build_pbspro(config)
|
16
18
|
c = config.to_h.compact.symbolize_keys
|
17
19
|
host = c.fetch(:host, nil)
|
18
|
-
|
20
|
+
pbs_exec = c.fetch(:exec, nil)
|
19
21
|
qstat_factor = c.fetch(:qstat_factor, nil)
|
20
|
-
|
22
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
23
|
+
pbspro = Adapters::PBSPro::Batch.new(host: host, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
|
21
24
|
Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
|
22
25
|
end
|
23
26
|
end
|
@@ -40,9 +43,15 @@ module OodCore
|
|
40
43
|
|
41
44
|
# The path containing the PBS executables
|
42
45
|
# @example
|
43
|
-
# my_batch.
|
46
|
+
# my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
|
44
47
|
# @return [Pathname, nil] path to pbs executables
|
45
|
-
attr_reader :
|
48
|
+
attr_reader :pbs_exec
|
49
|
+
|
50
|
+
# Optional overrides for PBS Pro client executables
|
51
|
+
# @example
|
52
|
+
# {'qsub' => '/usr/local/bin/qsub'}
|
53
|
+
# @return Hash<String, String>
|
54
|
+
attr_reader :bin_overrides
|
46
55
|
|
47
56
|
# The root exception class that all PBS Pro-specific exceptions
|
48
57
|
# inherit from
|
@@ -50,9 +59,10 @@ module OodCore
|
|
50
59
|
|
51
60
|
# @param host [#to_s, nil] the batch server host
|
52
61
|
# @param exec [#to_s, nil] path to pbs executables
|
53
|
-
def initialize(host: nil,
|
62
|
+
def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
|
54
63
|
@host = host && host.to_s
|
55
|
-
@
|
64
|
+
@pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
|
65
|
+
@bin_overrides = bin_overrides
|
56
66
|
end
|
57
67
|
|
58
68
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -147,11 +157,12 @@ module OodCore
|
|
147
157
|
# Call a forked PBS Pro command for a given batch server
|
148
158
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
149
159
|
cmd = cmd.to_s
|
150
|
-
|
160
|
+
bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
|
161
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
151
162
|
args = args.map(&:to_s)
|
152
163
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
153
164
|
env["PBS_DEFAULT"] = host.to_s if host
|
154
|
-
env["PBS_EXEC"] =
|
165
|
+
env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
|
155
166
|
chdir ||= "."
|
156
167
|
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
157
168
|
s.success? ? o : raise(Error, e)
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# Patch to allow runtime setting of the libdrmaa path
|
2
|
+
module FFI_DRMAA
|
3
|
+
def self.libdrmaa_path
|
4
|
+
@libdrmaa_path || 'libdrmaa.so'
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.libdrmaa_path=(path)
|
8
|
+
@libdrmaa_path = path
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# Object used for simplified communication with a SGE batch server
|
13
|
+
#
|
14
|
+
# @api private
|
15
|
+
class OodCore::Job::Adapters::Sge::Batch
|
16
|
+
using OodCore::Refinements::HashExtensions
|
17
|
+
|
18
|
+
attr_reader :bin, :bin_overrides, :conf, :cluster, :sge_root, :helper
|
19
|
+
|
20
|
+
require "ood_core/job/adapters/sge/qstat_xml_j_r_listener"
|
21
|
+
require "ood_core/job/adapters/sge/qstat_xml_r_listener"
|
22
|
+
require "ood_core/job/adapters/sge/helper"
|
23
|
+
require "ood_core/job/adapters/helper"
|
24
|
+
require 'time'
|
25
|
+
|
26
|
+
class Error < StandardError; end
|
27
|
+
|
28
|
+
# @param opts [#to_h] the options defining this adapter
|
29
|
+
# @option opts [Batch] :batch The Sge batch object
|
30
|
+
#
|
31
|
+
# @api private
|
32
|
+
# @see Factory.build_sge
|
33
|
+
def initialize(config)
|
34
|
+
@cluster = config.fetch(:cluster, nil)
|
35
|
+
@conf = Pathname.new(config.fetch(:conf, nil))
|
36
|
+
@bin = Pathname.new(config.fetch(:bin, nil))
|
37
|
+
@sge_root = config.key?(:sge_root) && config[:sge_root] ? Pathname.new(config[:sge_root]) : nil
|
38
|
+
@bin_overrides = config.fetch(:bin_overrides, {})
|
39
|
+
|
40
|
+
load_drmaa(config[:libdrmaa_path]) if sge_root
|
41
|
+
|
42
|
+
@helper = OodCore::Job::Adapters::Sge::Helper.new
|
43
|
+
end
|
44
|
+
|
45
|
+
def load_drmaa(libdrmaa_path)
|
46
|
+
FFI_DRMAA.libdrmaa_path = libdrmaa_path if libdrmaa_path
|
47
|
+
require "ood_core/job/adapters/drmaa"
|
48
|
+
require "ood_core/refinements/drmaa_extensions"
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get OodCore::Job::Info for every enqueued job, optionally filtering on owner
|
52
|
+
# @param owner [#to_s] the owner or owner list
|
53
|
+
# @return [Array<OodCore::Job::Info>]
|
54
|
+
def get_all(owner: nil)
|
55
|
+
listener = QstatXmlRListener.new
|
56
|
+
argv = ['qstat', '-r', '-xml']
|
57
|
+
argv += ['-u', owner] unless owner.nil?
|
58
|
+
REXML::Parsers::StreamParser.new(call(*argv), listener).parse
|
59
|
+
|
60
|
+
listener.parsed_jobs.map{|job_hash| OodCore::Job::Info.new(**post_process_qstat_job_hash(job_hash))}
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get OodCore::Job::Info for a job_id that may still be in the queue
|
64
|
+
#
|
65
|
+
# If @sge_root is nil or libdrmaa is not loaded then we cannot use DRMAA. Using
|
66
|
+
# DRMAA provides better job status and should always be chosen if it is possible.
|
67
|
+
#
|
68
|
+
# When qstat is called in XML mode for a job id that is not in the queue invalid XML
|
69
|
+
# is returned. The second line of the invalid XML contains the string '<unknown_jobs'
|
70
|
+
# which will be used to recognize this case.
|
71
|
+
#
|
72
|
+
# @param job_id [#to_s]
|
73
|
+
# @return [OodCore::Job::Info]
|
74
|
+
def get_info_enqueued_job(job_id)
|
75
|
+
job_info = OodCore::Job::Info.new(id: job_id.to_s, status: :completed)
|
76
|
+
argv = ['qstat', '-r', '-xml', '-j', job_id.to_s]
|
77
|
+
|
78
|
+
begin
|
79
|
+
results = call(*argv)
|
80
|
+
listener = QstatXmlJRListener.new
|
81
|
+
REXML::Parsers::StreamParser.new(results, listener).parse
|
82
|
+
|
83
|
+
job_hash = listener.parsed_job
|
84
|
+
job_hash[:status] = get_status_from_drmma(job_id) if can_use_drmaa?
|
85
|
+
|
86
|
+
job_info = OodCore::Job::Info.new(**job_hash)
|
87
|
+
rescue REXML::ParseException => e
|
88
|
+
# If the error is something other than a job not being found by qstat re-raise the error
|
89
|
+
unless results =~ /unknown_jobs/
|
90
|
+
raise e, "REXML::ParseException error and command '#{argv.join(' ')}' produced results that didn't contain string 'unknown_jobs'. ParseException: #{e.message}"
|
91
|
+
end
|
92
|
+
rescue DRMAA::DRMAAInvalidArgumentError => e
|
93
|
+
raise Error, e.message
|
94
|
+
end
|
95
|
+
|
96
|
+
job_info
|
97
|
+
end
|
98
|
+
|
99
|
+
def can_use_drmaa?
|
100
|
+
sge_root && Object.const_defined?('DRMAA')
|
101
|
+
end
|
102
|
+
|
103
|
+
# Call qhold
|
104
|
+
# @param job_id [#to_s]
|
105
|
+
# @return [void]
|
106
|
+
def hold(job_id)
|
107
|
+
call('qhold', job_id)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Call qrls
|
111
|
+
# @param job_id [#to_s]
|
112
|
+
# @return [void]
|
113
|
+
def release(job_id)
|
114
|
+
call('qrls', job_id)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Call qdel
|
118
|
+
# @param job_id [#to_s]
|
119
|
+
# @return [void]
|
120
|
+
def delete(job_id)
|
121
|
+
call('qdel', job_id)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Call qsub with arguments and the scripts content
|
125
|
+
# @param job_id [#to_s]
|
126
|
+
# @return job_id [String]
|
127
|
+
def submit(content, args)
|
128
|
+
cmd = ['qsub'] + args
|
129
|
+
@helper.parse_job_id_from_qsub(call(*cmd, :stdin => content))
|
130
|
+
end
|
131
|
+
|
132
|
+
# Call a forked SGE command for a given batch server
|
133
|
+
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
134
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
135
|
+
args = args.map(&:to_s)
|
136
|
+
|
137
|
+
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
138
|
+
chdir ||= "."
|
139
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
140
|
+
s.success? ? o : raise(Error, e)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Adapted from http://www.softpanorama.org/HPC/Grid_engine/Queues/queue_states.shtml
|
144
|
+
STATE_MAP = {
|
145
|
+
'EhRqw' => :undetermined, # all pending states with error
|
146
|
+
'Ehqw' => :undetermined, # all pending states with error
|
147
|
+
'Eqw' => :undetermined, # all pending states with error
|
148
|
+
'RS' => :suspended, # all suspended with re-submit
|
149
|
+
'RT' => :suspended, # all suspended with re-submit
|
150
|
+
'Rr' => :running, # running, re-submit
|
151
|
+
'Rs' => :suspended, # all suspended with re-submit
|
152
|
+
'Rt' => :running, # transferring, re-submit
|
153
|
+
'RtS' => :suspended, # all suspended with re-submit
|
154
|
+
'RtT' => :suspended, # all suspended with re-submit
|
155
|
+
'Rts' => :suspended, # all suspended with re-submit
|
156
|
+
'S' => :suspended, # queue suspended
|
157
|
+
'T' => :suspended, # queue suspended by alarm
|
158
|
+
'dRS' => :completed, # all running and suspended states with deletion
|
159
|
+
'dRT' => :completed, # all running and suspended states with deletion
|
160
|
+
'dRr' => :completed, # all running and suspended states with deletion
|
161
|
+
'dRs' => :completed, # all running and suspended states with deletion
|
162
|
+
'dRt' => :completed, # all running and suspended states with deletion
|
163
|
+
'dS' => :completed, # all running and suspended states with deletion
|
164
|
+
'dT' => :completed, # all running and suspended states with deletion
|
165
|
+
'dr' => :completed, # all running and suspended states with deletion
|
166
|
+
'ds' => :completed, # all running and suspended states with deletion
|
167
|
+
'dt' => :completed, # all running and suspended states with deletion
|
168
|
+
'hRwq' => :queued_held, # pending, system hold, re-queue
|
169
|
+
'hqw' => :queued_held, # pending, system hold
|
170
|
+
'qw' => :queued, # pending
|
171
|
+
'r' => :running, # running
|
172
|
+
's' => :suspended, # suspended
|
173
|
+
't' => :running, # transferring
|
174
|
+
'tS' => :suspended, # queue suspended
|
175
|
+
'tT' => :suspended, # queue suspended by alarm
|
176
|
+
'ts' => :suspended, # obsuspended
|
177
|
+
}
|
178
|
+
|
179
|
+
def translate_sge_state(sge_state_code)
|
180
|
+
STATE_MAP.fetch(sge_state_code, :undetermined)
|
181
|
+
end
|
182
|
+
|
183
|
+
def translate_drmaa_state(drmaa_state_code)
|
184
|
+
DRMAA::DRMMA_TO_OOD_STATE_MAP.fetch(drmaa_state_code, :undetermined)
|
185
|
+
end
|
186
|
+
|
187
|
+
def post_process_qstat_job_hash(job_hash)
|
188
|
+
# dispatch is not set if the job is not running
|
189
|
+
if ! job_hash.key?(:wallclock_time)
|
190
|
+
job_hash[:wallclock_time] = job_hash.key?(:dispatch_time) ? Time.now.to_i - job_hash[:dispatch_time] : 0
|
191
|
+
end
|
192
|
+
|
193
|
+
job_hash[:status] = translate_sge_state(job_hash[:status])
|
194
|
+
|
195
|
+
job_hash
|
196
|
+
end
|
197
|
+
|
198
|
+
# Get the job status using DRMAA
|
199
|
+
def get_status_from_drmma(job_id)
|
200
|
+
ENV['SGE_ROOT'] = sge_root.to_s
|
201
|
+
translate_drmaa_state(DRMAA::SessionSingleton.instance.job_ps(job_id.to_s))
|
202
|
+
end
|
203
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
class OodCore::Job::Adapters::Sge::Helper
|
2
|
+
require 'ood_core/job/adapters/sge'
|
3
|
+
|
4
|
+
using OodCore::Refinements::ArrayExtensions
|
5
|
+
|
6
|
+
# Convert seconds to duration
|
7
|
+
# @param time [#to_i]
|
8
|
+
# @return [String] an SGE qsub compatible wallclock limit
|
9
|
+
def seconds_to_duration(time)
|
10
|
+
time = time.to_i
|
11
|
+
"%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
|
12
|
+
end
|
13
|
+
|
14
|
+
# Convert script and job dependencies to qsub argument vector
|
15
|
+
# @return args [Array<String>]
|
16
|
+
def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
|
17
|
+
raise_error_on_unsupported_args(script, after: after, afterok: afterok, afternotok: afternotok, afterany: afterany)
|
18
|
+
|
19
|
+
args = []
|
20
|
+
args += ['-h'] if script.submit_as_hold
|
21
|
+
args += ['-r', 'yes'] if script.rerunnable
|
22
|
+
script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
|
23
|
+
args += ['-wd', script.workdir] unless script.workdir.nil?
|
24
|
+
|
25
|
+
on_event_email = []
|
26
|
+
on_event_email << 'b' if script.email_on_started # beginning
|
27
|
+
on_event_email << 'ea' if script.email_on_terminated # end, aborted
|
28
|
+
|
29
|
+
args += ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
|
30
|
+
|
31
|
+
afterok = Array(afterok).map(&:to_s)
|
32
|
+
args += ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
|
33
|
+
|
34
|
+
# ignoring email_on_started
|
35
|
+
args += ['-N', script.job_name] unless script.job_name.nil?
|
36
|
+
args += ['-e', script.error_path] unless script.error_path.nil?
|
37
|
+
args += ['-o', script.output_path] unless script.output_path.nil?
|
38
|
+
args += ['-ar', script.reservation_id] unless script.reservation_id.nil?
|
39
|
+
args += ['-q', script.queue_name] unless script.queue_name.nil?
|
40
|
+
args += ['-p', script.priority] unless script.priority.nil?
|
41
|
+
args += ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
|
42
|
+
args += ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
43
|
+
args += ['-P', script.accounting_id] unless script.accounting_id.nil?
|
44
|
+
args += Array.wrap(script.native) if script.native
|
45
|
+
|
46
|
+
args
|
47
|
+
end
|
48
|
+
|
49
|
+
# Raise exceptions when adapter is asked to perform an action that SGE does not support
|
50
|
+
# @raise [Error] when an incompatible action is requested
|
51
|
+
def raise_error_on_unsupported_args(script, after:, afterok:, afternotok:, afterany:)
|
52
|
+
# SGE job dependencies only supports one kind of event: completion
|
53
|
+
raise OodCore::Job::Adapters::Sge::Error.new('SGE does not support job dependencies on after start') if after && ! after.empty?
|
54
|
+
raise OodCore::Job::Adapters::Sge::Error.new('SGE does not support job dependencies on after not ok') if afternotok && ! afternotok.empty?
|
55
|
+
raise OodCore::Job::Adapters::Sge::Error.new('SGE does not support job dependencies on after any') if afterany && ! afterany.empty?
|
56
|
+
end
|
57
|
+
|
58
|
+
# Extract the job id from qsub's output
|
59
|
+
# e.g. Your job 1043 ("job_16") has been submitted
|
60
|
+
# @param qsub_output [#to_s]
|
61
|
+
# @return job_id [String]
|
62
|
+
def parse_job_id_from_qsub(qsub_output)
|
63
|
+
/Your job(?:-array)? (?<job_id>[0-9]+)/.match(qsub_output)[:job_id]
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'rexml/streamlistener'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
# An XML stream listener to build an array of OodCore::Job::Info from qstat output
|
6
|
+
#
|
7
|
+
# Handles parsing `qstat -xml -r -j` which provides:
|
8
|
+
# :accounting_id
|
9
|
+
# :id
|
10
|
+
# :job_name
|
11
|
+
# :job_owner
|
12
|
+
# :procs
|
13
|
+
# :queue_name
|
14
|
+
# :status
|
15
|
+
# :wallclock_limit
|
16
|
+
|
17
|
+
|
18
|
+
# :wallclock_time # HOW LONG HAS IT BEEN RUNNING?
|
19
|
+
|
20
|
+
class QstatXmlJRListener
|
21
|
+
# [Hash]
|
22
|
+
attr_reader :parsed_job
|
23
|
+
|
24
|
+
include REXML::StreamListener
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@parsed_job = {
|
28
|
+
:status => :queued,
|
29
|
+
:procs => 1, # un-knowable from SGE qstat output
|
30
|
+
:native => {} # TODO: improve native attribute reporting
|
31
|
+
}
|
32
|
+
@current_text = nil
|
33
|
+
@current_request = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def tag_end(name)
|
37
|
+
case name
|
38
|
+
when 'JB_ja_tasks'
|
39
|
+
end_JB_ja_tasks
|
40
|
+
when 'JB_job_number'
|
41
|
+
end_JB_job_number
|
42
|
+
when 'JB_job_name'
|
43
|
+
end_JB_job_name
|
44
|
+
when 'JB_owner'
|
45
|
+
end_JB_owner
|
46
|
+
when 'JB_project'
|
47
|
+
end_JB_project
|
48
|
+
when 'JB_submission_time'
|
49
|
+
end_JB_submission_time
|
50
|
+
when 'hard_request'
|
51
|
+
end_hard_request
|
52
|
+
when 'JAT_start_time'
|
53
|
+
end_JAT_start_time
|
54
|
+
when 'CE_name'
|
55
|
+
end_CE_name
|
56
|
+
when 'CE_stringval'
|
57
|
+
end_CE_stringval
|
58
|
+
when 'QR_name'
|
59
|
+
end_QR_name
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Always store text nodes temporarily
|
64
|
+
def text(text)
|
65
|
+
@current_text = text
|
66
|
+
end
|
67
|
+
|
68
|
+
# Attributes we need
|
69
|
+
def end_JB_job_number
|
70
|
+
@parsed_job[:id] = @current_text
|
71
|
+
end
|
72
|
+
|
73
|
+
def end_JB_owner
|
74
|
+
@parsed_job[:job_owner] = @current_text
|
75
|
+
end
|
76
|
+
|
77
|
+
def end_JB_project
|
78
|
+
@parsed_job[:accounting_id] = @current_text
|
79
|
+
end
|
80
|
+
|
81
|
+
def end_JB_job_name
|
82
|
+
@parsed_job[:job_name] = @current_text
|
83
|
+
end
|
84
|
+
|
85
|
+
def end_JB_submission_time
|
86
|
+
@parsed_job[:submission_time] = @current_text.to_i
|
87
|
+
end
|
88
|
+
|
89
|
+
def end_JB_ja_tasks
|
90
|
+
@parsed_job[:status] = :running
|
91
|
+
end
|
92
|
+
|
93
|
+
def end_JAT_start_time
|
94
|
+
@parsed_job[:status] = :running
|
95
|
+
@parsed_job[:dispatch_time] = @current_text.to_i
|
96
|
+
@parsed_job[:wallclock_time] = Time.now.to_i - @parsed_job[:dispatch_time]
|
97
|
+
end
|
98
|
+
|
99
|
+
def end_CE_name
|
100
|
+
@current_request = @current_text
|
101
|
+
end
|
102
|
+
|
103
|
+
def end_CE_stringval
|
104
|
+
return nil if @current_request.nil?
|
105
|
+
|
106
|
+
case @current_request
|
107
|
+
when 'h_rt' # hard run time limit
|
108
|
+
@parsed_job[:wallclock_limit] = @current_text.to_i
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def end_QR_name
|
113
|
+
@parsed_job[:queue_name] = @current_text
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'rexml/streamlistener'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
# An XML stream listener to build an array of OodCore::Job::Info from qstat output
|
6
|
+
#
|
7
|
+
# Handles parsing `qstat -xml -r` which provides:
|
8
|
+
# :accounting_id
|
9
|
+
# :id
|
10
|
+
# :job_name
|
11
|
+
# :job_owner
|
12
|
+
# :procs
|
13
|
+
# :queue_name
|
14
|
+
# :status
|
15
|
+
# :wallclock_limit
|
16
|
+
|
17
|
+
class QstatXmlRListener
|
18
|
+
# [Array<Hash>]
|
19
|
+
attr_reader :parsed_jobs
|
20
|
+
|
21
|
+
include REXML::StreamListener
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@parsed_jobs = []
|
25
|
+
@current_job = {
|
26
|
+
:native => {} # TODO: improve native reporting
|
27
|
+
}
|
28
|
+
@current_text = nil
|
29
|
+
|
30
|
+
@current_request = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def tag_start(name, attributes)
|
34
|
+
case name
|
35
|
+
when 'hard_request'
|
36
|
+
start_hard_request(attributes)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def tag_end(name)
|
41
|
+
case name
|
42
|
+
when 'job_list'
|
43
|
+
end_job_list
|
44
|
+
when 'JB_job_number'
|
45
|
+
end_JB_job_number
|
46
|
+
when 'JB_name'
|
47
|
+
end_JB_name
|
48
|
+
when 'JB_owner'
|
49
|
+
end_JB_owner
|
50
|
+
when 'JB_project'
|
51
|
+
end_JB_project
|
52
|
+
when 'state'
|
53
|
+
end_state
|
54
|
+
when 'slots'
|
55
|
+
end_slots
|
56
|
+
when 'JB_submission_time'
|
57
|
+
end_JB_submission_time
|
58
|
+
when 'hard_req_queue'
|
59
|
+
end_hard_req_queue
|
60
|
+
when 'JAT_start_time'
|
61
|
+
end_JAT_start_time
|
62
|
+
when 'hard_request'
|
63
|
+
end_hard_request
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Always store text nodes temporarily
|
68
|
+
def text(text)
|
69
|
+
@current_text = text
|
70
|
+
end
|
71
|
+
|
72
|
+
# Handle hard_request tags
|
73
|
+
#
|
74
|
+
# Multiple hard_request tags may be present and will be differentiated using their name attribute
|
75
|
+
def start_hard_request(attributes)
|
76
|
+
if attributes.key?('name')
|
77
|
+
@current_request = attributes['name']
|
78
|
+
else
|
79
|
+
@current_request = nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Attributes we need
|
84
|
+
def end_JB_job_number
|
85
|
+
@current_job[:id] = @current_text
|
86
|
+
end
|
87
|
+
|
88
|
+
def end_JB_owner
|
89
|
+
@current_job[:job_owner] = @current_text
|
90
|
+
end
|
91
|
+
|
92
|
+
def end_JB_project
|
93
|
+
@current_job[:accounting_id] = @current_text
|
94
|
+
end
|
95
|
+
|
96
|
+
def end_JB_name
|
97
|
+
@current_job[:job_name] = @current_text
|
98
|
+
end
|
99
|
+
|
100
|
+
# Note that this is the native SGE type
|
101
|
+
def end_state
|
102
|
+
@current_job[:status] = @current_text
|
103
|
+
end
|
104
|
+
|
105
|
+
def end_slots
|
106
|
+
@current_job[:procs] = @current_text.to_i
|
107
|
+
end
|
108
|
+
|
109
|
+
def end_hard_req_queue
|
110
|
+
@current_job[:queue_name] = @current_text
|
111
|
+
end
|
112
|
+
|
113
|
+
def end_JB_submission_time
|
114
|
+
@current_job[:submission_time] = DateTime.parse(@current_text).to_time.to_i
|
115
|
+
end
|
116
|
+
|
117
|
+
def end_JAT_start_time
|
118
|
+
@current_job[:dispatch_time] = DateTime.parse(@current_text).to_time.to_i
|
119
|
+
end
|
120
|
+
|
121
|
+
def end_hard_request
|
122
|
+
return nil if @current_request.nil?
|
123
|
+
|
124
|
+
case @current_request
|
125
|
+
when 'h_rt' # hard run time limit
|
126
|
+
@current_job[:wallclock_limit] = @current_text.to_i
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Store a completed job and reset current_job for the next pass
|
131
|
+
def end_job_list
|
132
|
+
@parsed_jobs << @current_job
|
133
|
+
@current_job = {
|
134
|
+
:native => {}
|
135
|
+
}
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|