ood_core 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/ood_core/batch_connect/template.rb +17 -6
- data/lib/ood_core/batch_connect/templates/vnc.rb +2 -2
- data/lib/ood_core/job/adapters/drmaa.rb +1002 -0
- data/lib/ood_core/job/adapters/helper.rb +18 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +4 -3
- data/lib/ood_core/job/adapters/lsf.rb +4 -2
- data/lib/ood_core/job/adapters/pbspro.rb +19 -8
- data/lib/ood_core/job/adapters/sge/batch.rb +203 -0
- data/lib/ood_core/job/adapters/sge/helper.rb +65 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +116 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +138 -0
- data/lib/ood_core/job/adapters/sge.rb +163 -0
- data/lib/ood_core/job/adapters/slurm.rb +16 -5
- data/lib/ood_core/job/adapters/torque/attributes.rb +109 -0
- data/lib/ood_core/job/adapters/torque/batch.rb +470 -0
- data/lib/ood_core/job/adapters/torque/error.rb +403 -0
- data/lib/ood_core/job/adapters/torque/ffi.rb +430 -0
- data/lib/ood_core/job/adapters/torque.rb +23 -18
- data/lib/ood_core/job/status.rb +3 -13
- data/lib/ood_core/refinements/drmaa_extensions.rb +21 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +3 -3
- metadata +23 -9
@@ -0,0 +1,18 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
module Adapters
|
4
|
+
# @api private
|
5
|
+
class Helper
|
6
|
+
# Get the configured path to a command allowing overrides from bin_overrides
|
7
|
+
# @param cmd [String] the desired command
|
8
|
+
# @param bin_default [String] the default place to find cmd on the file system
|
9
|
+
# @param bin_overrides [Hash<String, String>] commands associated with the full path to their replacement
|
10
|
+
# e.g. {'squeue' => '/usr/local/slurm/bin/squeue'}
|
11
|
+
# @return [String] path to the configured command
|
12
|
+
def self.bin_path(cmd, bin_default, bin_overrides)
|
13
|
+
bin_overrides.fetch(cmd.to_s) { Pathname.new(bin_default.to_s).join(cmd.to_s).to_s }
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -2,20 +2,21 @@
|
|
2
2
|
#
|
3
3
|
# @api private
|
4
4
|
class OodCore::Job::Adapters::Lsf::Batch
|
5
|
-
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster
|
5
|
+
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
|
6
6
|
|
7
7
|
# The root exception class that all LSF-specific exceptions inherit
|
8
8
|
# from
|
9
9
|
class Error < StandardError; end
|
10
10
|
|
11
11
|
# @param bin [#to_s] path to LSF installation binaries
|
12
|
-
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", **_)
|
12
|
+
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
|
13
13
|
@bindir = Pathname.new(bindir.to_s)
|
14
14
|
|
15
15
|
@envdir = Pathname.new(envdir.to_s)
|
16
16
|
@libdir = Pathname.new(libdir.to_s)
|
17
17
|
@serverdir = Pathname.new(serverdir.to_s)
|
18
18
|
@cluster = cluster.to_s
|
19
|
+
@bin_overrides = bin_overrides
|
19
20
|
end
|
20
21
|
|
21
22
|
def default_env
|
@@ -139,7 +140,7 @@ class OodCore::Job::Adapters::Lsf::Batch
|
|
139
140
|
private
|
140
141
|
# Call a forked Lsf command for a given cluster
|
141
142
|
def call(cmd, *args, env: {}, stdin: "")
|
142
|
-
cmd =
|
143
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
143
144
|
args = cluster_args + args
|
144
145
|
env = default_env.merge(env.to_h)
|
145
146
|
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/job/adapters/helper"
|
2
3
|
|
3
4
|
module OodCore
|
4
5
|
module Job
|
@@ -12,6 +13,7 @@ module OodCore
|
|
12
13
|
# @option config [#to_s] :envdir ('') Path to lsf client conf dir
|
13
14
|
# @option config [#to_s] :serverdir ('') Path to lsf client etc dir
|
14
15
|
# @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
15
17
|
def self.build_lsf(config)
|
16
18
|
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
17
19
|
Adapters::Lsf.new(batch: batch)
|
@@ -123,8 +125,8 @@ module OodCore
|
|
123
125
|
elsif owners.count == 0
|
124
126
|
[]
|
125
127
|
else
|
126
|
-
|
127
|
-
|
128
|
+
batch.get_jobs_for_user(owners.first).map { |v| info_for_batch_hash(v) }
|
129
|
+
end
|
128
130
|
rescue Batch::Error => e
|
129
131
|
raise JobAdapterError, e.message
|
130
132
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "time"
|
2
2
|
require "ood_core/refinements/hash_extensions"
|
3
|
+
require "ood_core/job/adapters/helper"
|
3
4
|
|
4
5
|
module OodCore
|
5
6
|
module Job
|
@@ -12,12 +13,14 @@ module OodCore
|
|
12
13
|
# @option config [Object] :exec (nil) Path to PBS Pro executables
|
13
14
|
# @option config [Object] :qstat_factor (nil) Deciding factor on how to
|
14
15
|
# call qstat for a user
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
|
15
17
|
def self.build_pbspro(config)
|
16
18
|
c = config.to_h.compact.symbolize_keys
|
17
19
|
host = c.fetch(:host, nil)
|
18
|
-
|
20
|
+
pbs_exec = c.fetch(:exec, nil)
|
19
21
|
qstat_factor = c.fetch(:qstat_factor, nil)
|
20
|
-
|
22
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
23
|
+
pbspro = Adapters::PBSPro::Batch.new(host: host, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
|
21
24
|
Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
|
22
25
|
end
|
23
26
|
end
|
@@ -40,9 +43,15 @@ module OodCore
|
|
40
43
|
|
41
44
|
# The path containing the PBS executables
|
42
45
|
# @example
|
43
|
-
# my_batch.
|
46
|
+
# my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
|
44
47
|
# @return [Pathname, nil] path to pbs executables
|
45
|
-
attr_reader :
|
48
|
+
attr_reader :pbs_exec
|
49
|
+
|
50
|
+
# Optional overrides for PBS Pro client executables
|
51
|
+
# @example
|
52
|
+
# {'qsub' => '/usr/local/bin/qsub'}
|
53
|
+
# @return Hash<String, String>
|
54
|
+
attr_reader :bin_overrides
|
46
55
|
|
47
56
|
# The root exception class that all PBS Pro-specific exceptions
|
48
57
|
# inherit from
|
@@ -50,9 +59,10 @@ module OodCore
|
|
50
59
|
|
51
60
|
# @param host [#to_s, nil] the batch server host
|
52
61
|
# @param exec [#to_s, nil] path to pbs executables
|
53
|
-
def initialize(host: nil,
|
62
|
+
def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
|
54
63
|
@host = host && host.to_s
|
55
|
-
@
|
64
|
+
@pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
|
65
|
+
@bin_overrides = bin_overrides
|
56
66
|
end
|
57
67
|
|
58
68
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -147,11 +157,12 @@ module OodCore
|
|
147
157
|
# Call a forked PBS Pro command for a given batch server
|
148
158
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
149
159
|
cmd = cmd.to_s
|
150
|
-
|
160
|
+
bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
|
161
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
151
162
|
args = args.map(&:to_s)
|
152
163
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
153
164
|
env["PBS_DEFAULT"] = host.to_s if host
|
154
|
-
env["PBS_EXEC"] =
|
165
|
+
env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
|
155
166
|
chdir ||= "."
|
156
167
|
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
157
168
|
s.success? ? o : raise(Error, e)
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# Patch to allow runtime setting of the libdrmaa path
|
2
|
+
module FFI_DRMAA
|
3
|
+
def self.libdrmaa_path
|
4
|
+
@libdrmaa_path || 'libdrmaa.so'
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.libdrmaa_path=(path)
|
8
|
+
@libdrmaa_path = path
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# Object used for simplified communication with a SGE batch server
|
13
|
+
#
|
14
|
+
# @api private
|
15
|
+
class OodCore::Job::Adapters::Sge::Batch
|
16
|
+
using OodCore::Refinements::HashExtensions
|
17
|
+
|
18
|
+
attr_reader :bin, :bin_overrides, :conf, :cluster, :sge_root, :helper
|
19
|
+
|
20
|
+
require "ood_core/job/adapters/sge/qstat_xml_j_r_listener"
|
21
|
+
require "ood_core/job/adapters/sge/qstat_xml_r_listener"
|
22
|
+
require "ood_core/job/adapters/sge/helper"
|
23
|
+
require "ood_core/job/adapters/helper"
|
24
|
+
require 'time'
|
25
|
+
|
26
|
+
class Error < StandardError; end
|
27
|
+
|
28
|
+
# @param opts [#to_h] the options defining this adapter
|
29
|
+
# @option opts [Batch] :batch The Sge batch object
|
30
|
+
#
|
31
|
+
# @api private
|
32
|
+
# @see Factory.build_sge
|
33
|
+
def initialize(config)
|
34
|
+
@cluster = config.fetch(:cluster, nil)
|
35
|
+
@conf = Pathname.new(config.fetch(:conf, nil))
|
36
|
+
@bin = Pathname.new(config.fetch(:bin, nil))
|
37
|
+
@sge_root = config.key?(:sge_root) && config[:sge_root] ? Pathname.new(config[:sge_root]) : nil
|
38
|
+
@bin_overrides = config.fetch(:bin_overrides, {})
|
39
|
+
|
40
|
+
load_drmaa(config[:libdrmaa_path]) if sge_root
|
41
|
+
|
42
|
+
@helper = OodCore::Job::Adapters::Sge::Helper.new
|
43
|
+
end
|
44
|
+
|
45
|
+
def load_drmaa(libdrmaa_path)
|
46
|
+
FFI_DRMAA.libdrmaa_path = libdrmaa_path if libdrmaa_path
|
47
|
+
require "ood_core/job/adapters/drmaa"
|
48
|
+
require "ood_core/refinements/drmaa_extensions"
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get OodCore::Job::Info for every enqueued job, optionally filtering on owner
|
52
|
+
# @param owner [#to_s] the owner or owner list
|
53
|
+
# @return [Array<OodCore::Job::Info>]
|
54
|
+
def get_all(owner: nil)
|
55
|
+
listener = QstatXmlRListener.new
|
56
|
+
argv = ['qstat', '-r', '-xml']
|
57
|
+
argv += ['-u', owner] unless owner.nil?
|
58
|
+
REXML::Parsers::StreamParser.new(call(*argv), listener).parse
|
59
|
+
|
60
|
+
listener.parsed_jobs.map{|job_hash| OodCore::Job::Info.new(**post_process_qstat_job_hash(job_hash))}
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get OodCore::Job::Info for a job_id that may still be in the queue
|
64
|
+
#
|
65
|
+
# If @sge_root is nil or libdrmaa is not loaded then we cannot use DRMAA. Using
|
66
|
+
# DRMAA provides better job status and should always be chosen if it is possible.
|
67
|
+
#
|
68
|
+
# When qstat is called in XML mode for a job id that is not in the queue invalid XML
|
69
|
+
# is returned. The second line of the invalid XML contains the string '<unknown_jobs'
|
70
|
+
# which will be used to recognize this case.
|
71
|
+
#
|
72
|
+
# @param job_id [#to_s]
|
73
|
+
# @return [OodCore::Job::Info]
|
74
|
+
def get_info_enqueued_job(job_id)
|
75
|
+
job_info = OodCore::Job::Info.new(id: job_id.to_s, status: :completed)
|
76
|
+
argv = ['qstat', '-r', '-xml', '-j', job_id.to_s]
|
77
|
+
|
78
|
+
begin
|
79
|
+
results = call(*argv)
|
80
|
+
listener = QstatXmlJRListener.new
|
81
|
+
REXML::Parsers::StreamParser.new(results, listener).parse
|
82
|
+
|
83
|
+
job_hash = listener.parsed_job
|
84
|
+
job_hash[:status] = get_status_from_drmma(job_id) if can_use_drmaa?
|
85
|
+
|
86
|
+
job_info = OodCore::Job::Info.new(**job_hash)
|
87
|
+
rescue REXML::ParseException => e
|
88
|
+
# If the error is something other than a job not being found by qstat re-raise the error
|
89
|
+
unless results =~ /unknown_jobs/
|
90
|
+
raise e, "REXML::ParseException error and command '#{argv.join(' ')}' produced results that didn't contain string 'unknown_jobs'. ParseException: #{e.message}"
|
91
|
+
end
|
92
|
+
rescue DRMAA::DRMAAInvalidArgumentError => e
|
93
|
+
raise Error, e.message
|
94
|
+
end
|
95
|
+
|
96
|
+
job_info
|
97
|
+
end
|
98
|
+
|
99
|
+
def can_use_drmaa?
|
100
|
+
sge_root && Object.const_defined?('DRMAA')
|
101
|
+
end
|
102
|
+
|
103
|
+
# Call qhold
|
104
|
+
# @param job_id [#to_s]
|
105
|
+
# @return [void]
|
106
|
+
def hold(job_id)
|
107
|
+
call('qhold', job_id)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Call qrls
|
111
|
+
# @param job_id [#to_s]
|
112
|
+
# @return [void]
|
113
|
+
def release(job_id)
|
114
|
+
call('qrls', job_id)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Call qdel
|
118
|
+
# @param job_id [#to_s]
|
119
|
+
# @return [void]
|
120
|
+
def delete(job_id)
|
121
|
+
call('qdel', job_id)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Call qsub with arguments and the scripts content
|
125
|
+
# @param job_id [#to_s]
|
126
|
+
# @return job_id [String]
|
127
|
+
def submit(content, args)
|
128
|
+
cmd = ['qsub'] + args
|
129
|
+
@helper.parse_job_id_from_qsub(call(*cmd, :stdin => content))
|
130
|
+
end
|
131
|
+
|
132
|
+
# Call a forked SGE command for a given batch server
|
133
|
+
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
134
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
135
|
+
args = args.map(&:to_s)
|
136
|
+
|
137
|
+
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
138
|
+
chdir ||= "."
|
139
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
140
|
+
s.success? ? o : raise(Error, e)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Adapted from http://www.softpanorama.org/HPC/Grid_engine/Queues/queue_states.shtml
|
144
|
+
STATE_MAP = {
|
145
|
+
'EhRqw' => :undetermined, # all pending states with error
|
146
|
+
'Ehqw' => :undetermined, # all pending states with error
|
147
|
+
'Eqw' => :undetermined, # all pending states with error
|
148
|
+
'RS' => :suspended, # all suspended with re-submit
|
149
|
+
'RT' => :suspended, # all suspended with re-submit
|
150
|
+
'Rr' => :running, # running, re-submit
|
151
|
+
'Rs' => :suspended, # all suspended with re-submit
|
152
|
+
'Rt' => :running, # transferring, re-submit
|
153
|
+
'RtS' => :suspended, # all suspended with re-submit
|
154
|
+
'RtT' => :suspended, # all suspended with re-submit
|
155
|
+
'Rts' => :suspended, # all suspended with re-submit
|
156
|
+
'S' => :suspended, # queue suspended
|
157
|
+
'T' => :suspended, # queue suspended by alarm
|
158
|
+
'dRS' => :completed, # all running and suspended states with deletion
|
159
|
+
'dRT' => :completed, # all running and suspended states with deletion
|
160
|
+
'dRr' => :completed, # all running and suspended states with deletion
|
161
|
+
'dRs' => :completed, # all running and suspended states with deletion
|
162
|
+
'dRt' => :completed, # all running and suspended states with deletion
|
163
|
+
'dS' => :completed, # all running and suspended states with deletion
|
164
|
+
'dT' => :completed, # all running and suspended states with deletion
|
165
|
+
'dr' => :completed, # all running and suspended states with deletion
|
166
|
+
'ds' => :completed, # all running and suspended states with deletion
|
167
|
+
'dt' => :completed, # all running and suspended states with deletion
|
168
|
+
'hRwq' => :queued_held, # pending, system hold, re-queue
|
169
|
+
'hqw' => :queued_held, # pending, system hold
|
170
|
+
'qw' => :queued, # pending
|
171
|
+
'r' => :running, # running
|
172
|
+
's' => :suspended, # suspended
|
173
|
+
't' => :running, # transferring
|
174
|
+
'tS' => :suspended, # queue suspended
|
175
|
+
'tT' => :suspended, # queue suspended by alarm
|
176
|
+
'ts' => :suspended, # obsuspended
|
177
|
+
}
|
178
|
+
|
179
|
+
def translate_sge_state(sge_state_code)
|
180
|
+
STATE_MAP.fetch(sge_state_code, :undetermined)
|
181
|
+
end
|
182
|
+
|
183
|
+
def translate_drmaa_state(drmaa_state_code)
|
184
|
+
DRMAA::DRMMA_TO_OOD_STATE_MAP.fetch(drmaa_state_code, :undetermined)
|
185
|
+
end
|
186
|
+
|
187
|
+
def post_process_qstat_job_hash(job_hash)
|
188
|
+
# dispatch is not set if the job is not running
|
189
|
+
if ! job_hash.key?(:wallclock_time)
|
190
|
+
job_hash[:wallclock_time] = job_hash.key?(:dispatch_time) ? Time.now.to_i - job_hash[:dispatch_time] : 0
|
191
|
+
end
|
192
|
+
|
193
|
+
job_hash[:status] = translate_sge_state(job_hash[:status])
|
194
|
+
|
195
|
+
job_hash
|
196
|
+
end
|
197
|
+
|
198
|
+
# Get the job status using DRMAA
|
199
|
+
def get_status_from_drmma(job_id)
|
200
|
+
ENV['SGE_ROOT'] = sge_root.to_s
|
201
|
+
translate_drmaa_state(DRMAA::SessionSingleton.instance.job_ps(job_id.to_s))
|
202
|
+
end
|
203
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
class OodCore::Job::Adapters::Sge::Helper
|
2
|
+
require 'ood_core/job/adapters/sge'
|
3
|
+
|
4
|
+
using OodCore::Refinements::ArrayExtensions
|
5
|
+
|
6
|
+
# Convert seconds to duration
|
7
|
+
# @param time [#to_i]
|
8
|
+
# @return [String] an SGE qsub compatible wallclock limit
|
9
|
+
def seconds_to_duration(time)
|
10
|
+
time = time.to_i
|
11
|
+
"%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
|
12
|
+
end
|
13
|
+
|
14
|
+
# Convert script and job dependencies to qsub argument vector
|
15
|
+
# @return args [Array<String>]
|
16
|
+
def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
|
17
|
+
raise_error_on_unsupported_args(script, after: after, afterok: afterok, afternotok: afternotok, afterany: afterany)
|
18
|
+
|
19
|
+
args = []
|
20
|
+
args += ['-h'] if script.submit_as_hold
|
21
|
+
args += ['-r', 'yes'] if script.rerunnable
|
22
|
+
script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
|
23
|
+
args += ['-wd', script.workdir] unless script.workdir.nil?
|
24
|
+
|
25
|
+
on_event_email = []
|
26
|
+
on_event_email << 'b' if script.email_on_started # beginning
|
27
|
+
on_event_email << 'ea' if script.email_on_terminated # end, aborted
|
28
|
+
|
29
|
+
args += ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
|
30
|
+
|
31
|
+
afterok = Array(afterok).map(&:to_s)
|
32
|
+
args += ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
|
33
|
+
|
34
|
+
# ignoring email_on_started
|
35
|
+
args += ['-N', script.job_name] unless script.job_name.nil?
|
36
|
+
args += ['-e', script.error_path] unless script.error_path.nil?
|
37
|
+
args += ['-o', script.output_path] unless script.output_path.nil?
|
38
|
+
args += ['-ar', script.reservation_id] unless script.reservation_id.nil?
|
39
|
+
args += ['-q', script.queue_name] unless script.queue_name.nil?
|
40
|
+
args += ['-p', script.priority] unless script.priority.nil?
|
41
|
+
args += ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
|
42
|
+
args += ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
43
|
+
args += ['-P', script.accounting_id] unless script.accounting_id.nil?
|
44
|
+
args += Array.wrap(script.native) if script.native
|
45
|
+
|
46
|
+
args
|
47
|
+
end
|
48
|
+
|
49
|
+
# Raise exceptions when adapter is asked to perform an action that SGE does not support
|
50
|
+
# @raise [Error] when an incompatible action is requested
|
51
|
+
def raise_error_on_unsupported_args(script, after:, afterok:, afternotok:, afterany:)
|
52
|
+
# SGE job dependencies only supports one kind of event: completion
|
53
|
+
raise OodCore::Job::Adapters::Sge::Error.new('SGE does not support job dependencies on after start') if after && ! after.empty?
|
54
|
+
raise OodCore::Job::Adapters::Sge::Error.new('SGE does not support job dependencies on after not ok') if afternotok && ! afternotok.empty?
|
55
|
+
raise OodCore::Job::Adapters::Sge::Error.new('SGE does not support job dependencies on after any') if afterany && ! afterany.empty?
|
56
|
+
end
|
57
|
+
|
58
|
+
# Extract the job id from qsub's output
|
59
|
+
# e.g. Your job 1043 ("job_16") has been submitted
|
60
|
+
# @param qsub_output [#to_s]
|
61
|
+
# @return job_id [String]
|
62
|
+
def parse_job_id_from_qsub(qsub_output)
|
63
|
+
/Your job(?:-array)? (?<job_id>[0-9]+)/.match(qsub_output)[:job_id]
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'rexml/streamlistener'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
# An XML stream listener to build an array of OodCore::Job::Info from qstat output
|
6
|
+
#
|
7
|
+
# Handles parsing `qstat -xml -r -j` which provides:
|
8
|
+
# :accounting_id
|
9
|
+
# :id
|
10
|
+
# :job_name
|
11
|
+
# :job_owner
|
12
|
+
# :procs
|
13
|
+
# :queue_name
|
14
|
+
# :status
|
15
|
+
# :wallclock_limit
|
16
|
+
|
17
|
+
|
18
|
+
# :wallclock_time # HOW LONG HAS IT BEEN RUNNING?
|
19
|
+
|
20
|
+
class QstatXmlJRListener
|
21
|
+
# [Hash]
|
22
|
+
attr_reader :parsed_job
|
23
|
+
|
24
|
+
include REXML::StreamListener
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@parsed_job = {
|
28
|
+
:status => :queued,
|
29
|
+
:procs => 1, # un-knowable from SGE qstat output
|
30
|
+
:native => {} # TODO: improve native attribute reporting
|
31
|
+
}
|
32
|
+
@current_text = nil
|
33
|
+
@current_request = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def tag_end(name)
|
37
|
+
case name
|
38
|
+
when 'JB_ja_tasks'
|
39
|
+
end_JB_ja_tasks
|
40
|
+
when 'JB_job_number'
|
41
|
+
end_JB_job_number
|
42
|
+
when 'JB_job_name'
|
43
|
+
end_JB_job_name
|
44
|
+
when 'JB_owner'
|
45
|
+
end_JB_owner
|
46
|
+
when 'JB_project'
|
47
|
+
end_JB_project
|
48
|
+
when 'JB_submission_time'
|
49
|
+
end_JB_submission_time
|
50
|
+
when 'hard_request'
|
51
|
+
end_hard_request
|
52
|
+
when 'JAT_start_time'
|
53
|
+
end_JAT_start_time
|
54
|
+
when 'CE_name'
|
55
|
+
end_CE_name
|
56
|
+
when 'CE_stringval'
|
57
|
+
end_CE_stringval
|
58
|
+
when 'QR_name'
|
59
|
+
end_QR_name
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Always store text nodes temporarily
|
64
|
+
def text(text)
|
65
|
+
@current_text = text
|
66
|
+
end
|
67
|
+
|
68
|
+
# Attributes we need
|
69
|
+
def end_JB_job_number
|
70
|
+
@parsed_job[:id] = @current_text
|
71
|
+
end
|
72
|
+
|
73
|
+
def end_JB_owner
|
74
|
+
@parsed_job[:job_owner] = @current_text
|
75
|
+
end
|
76
|
+
|
77
|
+
def end_JB_project
|
78
|
+
@parsed_job[:accounting_id] = @current_text
|
79
|
+
end
|
80
|
+
|
81
|
+
def end_JB_job_name
|
82
|
+
@parsed_job[:job_name] = @current_text
|
83
|
+
end
|
84
|
+
|
85
|
+
def end_JB_submission_time
|
86
|
+
@parsed_job[:submission_time] = @current_text.to_i
|
87
|
+
end
|
88
|
+
|
89
|
+
def end_JB_ja_tasks
|
90
|
+
@parsed_job[:status] = :running
|
91
|
+
end
|
92
|
+
|
93
|
+
def end_JAT_start_time
|
94
|
+
@parsed_job[:status] = :running
|
95
|
+
@parsed_job[:dispatch_time] = @current_text.to_i
|
96
|
+
@parsed_job[:wallclock_time] = Time.now.to_i - @parsed_job[:dispatch_time]
|
97
|
+
end
|
98
|
+
|
99
|
+
def end_CE_name
|
100
|
+
@current_request = @current_text
|
101
|
+
end
|
102
|
+
|
103
|
+
def end_CE_stringval
|
104
|
+
return nil if @current_request.nil?
|
105
|
+
|
106
|
+
case @current_request
|
107
|
+
when 'h_rt' # hard run time limit
|
108
|
+
@parsed_job[:wallclock_limit] = @current_text.to_i
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def end_QR_name
|
113
|
+
@parsed_job[:queue_name] = @current_text
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'rexml/streamlistener'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
# An XML stream listener to build an array of OodCore::Job::Info from qstat output
|
6
|
+
#
|
7
|
+
# Handles parsing `qstat -xml -r` which provides:
|
8
|
+
# :accounting_id
|
9
|
+
# :id
|
10
|
+
# :job_name
|
11
|
+
# :job_owner
|
12
|
+
# :procs
|
13
|
+
# :queue_name
|
14
|
+
# :status
|
15
|
+
# :wallclock_limit
|
16
|
+
|
17
|
+
class QstatXmlRListener
|
18
|
+
# [Array<Hash>]
|
19
|
+
attr_reader :parsed_jobs
|
20
|
+
|
21
|
+
include REXML::StreamListener
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@parsed_jobs = []
|
25
|
+
@current_job = {
|
26
|
+
:native => {} # TODO: improve native reporting
|
27
|
+
}
|
28
|
+
@current_text = nil
|
29
|
+
|
30
|
+
@current_request = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def tag_start(name, attributes)
|
34
|
+
case name
|
35
|
+
when 'hard_request'
|
36
|
+
start_hard_request(attributes)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def tag_end(name)
|
41
|
+
case name
|
42
|
+
when 'job_list'
|
43
|
+
end_job_list
|
44
|
+
when 'JB_job_number'
|
45
|
+
end_JB_job_number
|
46
|
+
when 'JB_name'
|
47
|
+
end_JB_name
|
48
|
+
when 'JB_owner'
|
49
|
+
end_JB_owner
|
50
|
+
when 'JB_project'
|
51
|
+
end_JB_project
|
52
|
+
when 'state'
|
53
|
+
end_state
|
54
|
+
when 'slots'
|
55
|
+
end_slots
|
56
|
+
when 'JB_submission_time'
|
57
|
+
end_JB_submission_time
|
58
|
+
when 'hard_req_queue'
|
59
|
+
end_hard_req_queue
|
60
|
+
when 'JAT_start_time'
|
61
|
+
end_JAT_start_time
|
62
|
+
when 'hard_request'
|
63
|
+
end_hard_request
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Always store text nodes temporarily
|
68
|
+
def text(text)
|
69
|
+
@current_text = text
|
70
|
+
end
|
71
|
+
|
72
|
+
# Handle hard_request tags
|
73
|
+
#
|
74
|
+
# Multiple hard_request tags may be present and will be differentiated using their name attribute
|
75
|
+
def start_hard_request(attributes)
|
76
|
+
if attributes.key?('name')
|
77
|
+
@current_request = attributes['name']
|
78
|
+
else
|
79
|
+
@current_request = nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Attributes we need
|
84
|
+
def end_JB_job_number
|
85
|
+
@current_job[:id] = @current_text
|
86
|
+
end
|
87
|
+
|
88
|
+
def end_JB_owner
|
89
|
+
@current_job[:job_owner] = @current_text
|
90
|
+
end
|
91
|
+
|
92
|
+
def end_JB_project
|
93
|
+
@current_job[:accounting_id] = @current_text
|
94
|
+
end
|
95
|
+
|
96
|
+
def end_JB_name
|
97
|
+
@current_job[:job_name] = @current_text
|
98
|
+
end
|
99
|
+
|
100
|
+
# Note that this is the native SGE type
|
101
|
+
def end_state
|
102
|
+
@current_job[:status] = @current_text
|
103
|
+
end
|
104
|
+
|
105
|
+
def end_slots
|
106
|
+
@current_job[:procs] = @current_text.to_i
|
107
|
+
end
|
108
|
+
|
109
|
+
def end_hard_req_queue
|
110
|
+
@current_job[:queue_name] = @current_text
|
111
|
+
end
|
112
|
+
|
113
|
+
def end_JB_submission_time
|
114
|
+
@current_job[:submission_time] = DateTime.parse(@current_text).to_time.to_i
|
115
|
+
end
|
116
|
+
|
117
|
+
def end_JAT_start_time
|
118
|
+
@current_job[:dispatch_time] = DateTime.parse(@current_text).to_time.to_i
|
119
|
+
end
|
120
|
+
|
121
|
+
def end_hard_request
|
122
|
+
return nil if @current_request.nil?
|
123
|
+
|
124
|
+
case @current_request
|
125
|
+
when 'h_rt' # hard run time limit
|
126
|
+
@current_job[:wallclock_limit] = @current_text.to_i
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Store a completed job and reset current_job for the next pass
|
131
|
+
def end_job_list
|
132
|
+
@parsed_jobs << @current_job
|
133
|
+
@current_job = {
|
134
|
+
:native => {}
|
135
|
+
}
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|