ood_core 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/CHANGELOG.md +11 -1
- data/lib/ood_core.rb +1 -0
- data/lib/ood_core/job/adapter.rb +65 -3
- data/lib/ood_core/job/adapters/lsf.rb +6 -2
- data/lib/ood_core/job/adapters/pbspro.rb +6 -2
- data/lib/ood_core/job/adapters/sge.rb +4 -4
- data/lib/ood_core/job/adapters/sge/batch.rb +18 -7
- data/lib/ood_core/job/adapters/sge/helper.rb +1 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +77 -3
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +10 -0
- data/lib/ood_core/job/adapters/slurm.rb +22 -8
- data/lib/ood_core/job/adapters/torque.rb +44 -8
- data/lib/ood_core/job/array_ids.rb +55 -0
- data/lib/ood_core/job/info.rb +22 -2
- data/lib/ood_core/job/script.rb +23 -17
- data/lib/ood_core/job/status.rb +15 -2
- data/lib/ood_core/job/task.rb +24 -0
- data/lib/ood_core/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8a526602e6c6b59b6d943d299dc4e442cfd354a768669b4bc03a9423e12cf418
|
4
|
+
data.tar.gz: 5220c4b20c1de287afdcad2eece623952c58aea735c1786f9956912563277e85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f63f8aff330f033ef8fe0dad0d07629e3704463441f1a910920f37a86d48a4cab059182403b9cb6f1bd6a300213b1cff45315b43354fa0d2a9aaaba2f7bc54c8
|
7
|
+
data.tar.gz: d77d8d5130a3f20ac9e54667b10de5a476322c38491f63abe58e0ab192d23b22fc764481254abaf9def825a4ee6707646f1b3c264ed9a5ec4fa76add0f34295a
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.8.0] - 2019-01-29
|
10
|
+
### Added
|
11
|
+
- info_all_each and info_where_owner_each super class methods
|
12
|
+
- job array support for Torque, Slurm, and SGE (currently missing from LSF and PBSPro)
|
13
|
+
- `OodCore::Job::Status#precedence` for the ability to get an overall status for a group of jobs
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
- Fix SGE adapter to specify `-u '*'` when calling qstat to get all jobs
|
17
|
+
|
9
18
|
## [0.7.1] - 2019-01-11
|
10
19
|
### Fixed
|
11
20
|
- Fixed crash when libdrmaa is used to query for a job no longer in the queue
|
@@ -156,7 +165,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
156
165
|
### Added
|
157
166
|
- Initial release!
|
158
167
|
|
159
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
168
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.8.0...HEAD
|
169
|
+
[0.8.0]: https://github.com/OSC/ood_core/compare/v0.7.1...v0.8.0
|
160
170
|
[0.7.1]: https://github.com/OSC/ood_core/compare/v0.7.0...v0.7.1
|
161
171
|
[0.7.0]: https://github.com/OSC/ood_core/compare/v0.6.0...v0.7.0
|
162
172
|
[0.6.0]: https://github.com/OSC/ood_core/compare/v0.5.1...v0.6.0
|
data/lib/ood_core.rb
CHANGED
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -36,18 +36,80 @@ module OodCore
|
|
36
36
|
# Retrieve info for all jobs from the resource manager
|
37
37
|
# @abstract Subclass is expected to implement {#info_all}
|
38
38
|
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
39
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
40
|
+
# This array specifies only attrs you want, in addition to id and status.
|
41
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
42
|
+
# to have a value for any attr besides the ones specified and id and status.
|
43
|
+
#
|
44
|
+
# For certain adapters this may speed up the response since
|
45
|
+
# adapters can get by without populating the entire Info object
|
39
46
|
# @return [Array<Info>] information describing submitted jobs
|
40
|
-
def info_all
|
47
|
+
def info_all(attrs: nil)
|
41
48
|
raise NotImplementedError, "subclass did not define #info_all"
|
42
49
|
end
|
43
50
|
|
44
51
|
# Retrieve info for all jobs for a given owner or owners from the
|
45
52
|
# resource manager
|
46
53
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
54
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
55
|
+
# This array specifies only attrs you want, in addition to id and status.
|
56
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
57
|
+
# to have a value for any attr besides the ones specified and id and status.
|
58
|
+
#
|
59
|
+
# For certain adapters this may speed up the response since
|
60
|
+
# adapters can get by without populating the entire Info object
|
47
61
|
# @return [Array<Info>] information describing submitted jobs
|
48
|
-
def info_where_owner(owner)
|
62
|
+
def info_where_owner(owner, attrs: nil)
|
49
63
|
owner = Array.wrap(owner).map(&:to_s)
|
50
|
-
|
64
|
+
|
65
|
+
# must at least have job_owner to filter by job_owner
|
66
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
67
|
+
|
68
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
69
|
+
end
|
70
|
+
|
71
|
+
# Iterate over each job Info object
|
72
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
73
|
+
# This array specifies only attrs you want, in addition to id and status.
|
74
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
75
|
+
# to have a value for any attr besides the ones specified and id and status.
|
76
|
+
#
|
77
|
+
# For certain adapters this may speed up the response since
|
78
|
+
# adapters can get by without populating the entire Info object
|
79
|
+
# @yield [Info] of each job to block
|
80
|
+
# @return [Enumerator] if no block given
|
81
|
+
def info_all_each(attrs: nil)
|
82
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
83
|
+
|
84
|
+
info_all(attrs: attrs).each do |job|
|
85
|
+
yield job
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Iterate over each job Info object
|
90
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
91
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
92
|
+
# This array specifies only attrs you want, in addition to id and status.
|
93
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
94
|
+
# to have a value for any attr besides the ones specified and id and status.
|
95
|
+
#
|
96
|
+
# For certain adapters this may speed up the response since
|
97
|
+
# adapters can get by without populating the entire Info object
|
98
|
+
# @yield [Info] of each job to block
|
99
|
+
# @return [Enumerator] if no block given
|
100
|
+
def info_where_owner_each(owner, attrs: nil)
|
101
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
102
|
+
|
103
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
104
|
+
yield job
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Whether the adapter supports job arrays
|
109
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
110
|
+
# explicitly do not
|
111
|
+
def supports_job_arrays?
|
112
|
+
true
|
51
113
|
end
|
52
114
|
|
53
115
|
# Retrieve job info from the resource manager
|
@@ -108,7 +108,7 @@ module OodCore
|
|
108
108
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
109
109
|
# @return [Array<Info>] information describing submitted jobs
|
110
110
|
# @see Adapter#info_all
|
111
|
-
def info_all
|
111
|
+
def info_all(attrs: nil)
|
112
112
|
batch.get_jobs.map { |v| info_for_batch_hash(v) }
|
113
113
|
rescue Batch::Error => e
|
114
114
|
raise JobAdapterError, e.message
|
@@ -118,7 +118,7 @@ module OodCore
|
|
118
118
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
119
119
|
# @return [Array<Info>] information describing submitted jobs
|
120
120
|
# @see Adapter#info_where_owner
|
121
|
-
def info_where_owner(owner)
|
121
|
+
def info_where_owner(owner, attrs: nil)
|
122
122
|
owners = Array.wrap(owner).map(&:to_s)
|
123
123
|
if owners.count > 1
|
124
124
|
super
|
@@ -131,6 +131,10 @@ module OodCore
|
|
131
131
|
raise JobAdapterError, e.message
|
132
132
|
end
|
133
133
|
|
134
|
+
def supports_job_arrays?
|
135
|
+
false
|
136
|
+
end
|
137
|
+
|
134
138
|
# Retrieve job status from resource manager
|
135
139
|
# @param id [#to_s] the id of the job
|
136
140
|
# @raise [JobAdapterError] if something goes wrong getting job status
|
@@ -279,7 +279,7 @@ module OodCore
|
|
279
279
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
280
280
|
# @return [Array<Info>] information describing submitted jobs
|
281
281
|
# @see Adapter#info_all
|
282
|
-
def info_all
|
282
|
+
def info_all(attrs: nil)
|
283
283
|
@pbspro.get_jobs.map do |v|
|
284
284
|
parse_job_info(v)
|
285
285
|
end
|
@@ -292,7 +292,7 @@ module OodCore
|
|
292
292
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
293
293
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
294
294
|
# @return [Array<Info>] information describing submitted jobs
|
295
|
-
def info_where_owner(owner)
|
295
|
+
def info_where_owner(owner, attrs: nil)
|
296
296
|
owner = Array.wrap(owner).map(&:to_s)
|
297
297
|
|
298
298
|
usr_jobs = @pbspro.select_jobs(args: ["-u", owner.join(",")])
|
@@ -307,6 +307,10 @@ module OodCore
|
|
307
307
|
end
|
308
308
|
end
|
309
309
|
|
310
|
+
def supports_job_arrays?
|
311
|
+
false
|
312
|
+
end
|
313
|
+
|
310
314
|
# Retrieve job info from the resource manager
|
311
315
|
# @param id [#to_s] the id of the job
|
312
316
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
@@ -90,8 +90,8 @@ module OodCore
|
|
90
90
|
|
91
91
|
# Retrieve info for all jobs from the resource manager
|
92
92
|
# @return [Array<Info>] information describing submitted jobs
|
93
|
-
def info_all
|
94
|
-
@batch.get_all
|
93
|
+
def info_all(attrs: nil)
|
94
|
+
@batch.get_all(owner: '*')
|
95
95
|
rescue Batch::Error => e
|
96
96
|
raise JobAdapterError, e.message
|
97
97
|
end
|
@@ -101,7 +101,7 @@ module OodCore
|
|
101
101
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
102
102
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
103
103
|
# @return [Array<Info>] information describing submitted jobs
|
104
|
-
def info_where_owner(owner)
|
104
|
+
def info_where_owner(owner, attrs: nil)
|
105
105
|
owner = Array.wrap(owner).map(&:to_s).join(',')
|
106
106
|
@batch.get_all(owner: owner)
|
107
107
|
rescue Batch::Error => e
|
@@ -160,4 +160,4 @@ module OodCore
|
|
160
160
|
end
|
161
161
|
end
|
162
162
|
end
|
163
|
-
end
|
163
|
+
end
|
@@ -95,13 +95,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
95
95
|
|
96
96
|
job_hash = listener.parsed_job
|
97
97
|
|
98
|
-
|
99
|
-
begin
|
100
|
-
job_hash[:status] = get_status_from_drmma(job_id)
|
101
|
-
rescue DRMAA::DRMAAInvalidArgumentError => e
|
102
|
-
raise Error, e.message
|
103
|
-
end
|
104
|
-
end
|
98
|
+
update_job_hash_status!(job_hash)
|
105
99
|
|
106
100
|
job_info = OodCore::Job::Info.new(**job_hash)
|
107
101
|
rescue REXML::ParseException => e
|
@@ -117,6 +111,22 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
117
111
|
job_info
|
118
112
|
end
|
119
113
|
|
114
|
+
def update_job_hash_status!(job_hash)
|
115
|
+
if get_status_from_drmaa?(job_hash)
|
116
|
+
begin
|
117
|
+
job_hash[:status] = get_status_from_drmma(job_hash[:id])
|
118
|
+
rescue DRMAA::DRMAAInvalidArgumentError => e
|
119
|
+
raise Error, e.message
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_status_from_drmaa?(job_hash)
|
125
|
+
# DRMAA does not recognize the parent task in job arrays
|
126
|
+
# e.g. 123 is invalid if it is an array job, while 123.4 is valid
|
127
|
+
can_use_drmaa? && job_hash[:tasks].empty?
|
128
|
+
end
|
129
|
+
|
120
130
|
def can_use_drmaa?
|
121
131
|
@can_use_drmaa
|
122
132
|
end
|
@@ -212,6 +222,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
212
222
|
end
|
213
223
|
|
214
224
|
job_hash[:status] = translate_sge_state(job_hash[:status])
|
225
|
+
update_job_hash_status!(job_hash)
|
215
226
|
|
216
227
|
job_hash
|
217
228
|
end
|
@@ -41,6 +41,7 @@ class OodCore::Job::Adapters::Sge::Helper
|
|
41
41
|
args += ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
|
42
42
|
args += ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
43
43
|
args += ['-P', script.accounting_id] unless script.accounting_id.nil?
|
44
|
+
args += ['-t', script.job_array_request] unless script.job_array_request.nil?
|
44
45
|
args += Array.wrap(script.native) if script.native
|
45
46
|
|
46
47
|
args
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rexml/document'
|
2
2
|
require 'rexml/streamlistener'
|
3
3
|
require 'date'
|
4
|
+
require 'ood_core'
|
5
|
+
require 'ood_core/job/array_ids'
|
4
6
|
|
5
7
|
# An XML stream listener to build an array of OodCore::Job::Info from qstat output
|
6
8
|
#
|
@@ -13,9 +15,7 @@ require 'date'
|
|
13
15
|
# :queue_name
|
14
16
|
# :status
|
15
17
|
# :wallclock_limit
|
16
|
-
|
17
|
-
|
18
|
-
# :wallclock_time # HOW LONG HAS IT BEEN RUNNING?
|
18
|
+
# :wallclock_time
|
19
19
|
|
20
20
|
class QstatXmlJRListener
|
21
21
|
# [Hash]
|
@@ -25,12 +25,28 @@ class QstatXmlJRListener
|
|
25
25
|
|
26
26
|
def initialize
|
27
27
|
@parsed_job = {
|
28
|
+
:tasks => [],
|
28
29
|
:status => :queued,
|
29
30
|
:procs => 1, # un-knowable from SGE qstat output
|
30
31
|
:native => {} # TODO: improve native attribute reporting
|
31
32
|
}
|
32
33
|
@current_text = nil
|
33
34
|
@current_request = nil
|
35
|
+
|
36
|
+
@processing_job_array_spec = false
|
37
|
+
@job_array_spec = {
|
38
|
+
start: nil,
|
39
|
+
stop: nil,
|
40
|
+
step: 1, # Step can have a default of 1
|
41
|
+
}
|
42
|
+
@running_tasks = []
|
43
|
+
end
|
44
|
+
|
45
|
+
def tag_start(name, attrs)
|
46
|
+
case name
|
47
|
+
when 'task_id_range'
|
48
|
+
toggle_processing_array_spec
|
49
|
+
end
|
34
50
|
end
|
35
51
|
|
36
52
|
def tag_end(name)
|
@@ -57,6 +73,18 @@ class QstatXmlJRListener
|
|
57
73
|
end_CE_stringval
|
58
74
|
when 'QR_name'
|
59
75
|
end_QR_name
|
76
|
+
when 'JAT_task_number'
|
77
|
+
end_JAT_task_number
|
78
|
+
when 'djob_info'
|
79
|
+
finalize_parsed_job
|
80
|
+
when 'RN_min'
|
81
|
+
set_job_array_piece(:start)
|
82
|
+
when 'RN_max'
|
83
|
+
set_job_array_piece(:stop)
|
84
|
+
when 'RN_step'
|
85
|
+
set_job_array_piece(:step)
|
86
|
+
when 'task_id_range'
|
87
|
+
toggle_processing_array_spec
|
60
88
|
end
|
61
89
|
end
|
62
90
|
|
@@ -112,5 +140,51 @@ class QstatXmlJRListener
|
|
112
140
|
def end_QR_name
|
113
141
|
@parsed_job[:queue_name] = @current_text
|
114
142
|
end
|
143
|
+
|
144
|
+
# Used to record a running Job Array task
|
145
|
+
def end_JAT_task_number
|
146
|
+
@running_tasks << @current_text
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_job_array_piece(key)
|
150
|
+
@job_array_spec[key] = @current_text if @processing_job_array_spec
|
151
|
+
end
|
152
|
+
|
153
|
+
def spec_string
|
154
|
+
# If any of the job_array_spec values are nil then return a default spec_string
|
155
|
+
if @job_array_spec.values.any? { |value| value.nil? }
|
156
|
+
'1-1:1'
|
157
|
+
else
|
158
|
+
'%{start}-%{stop}:%{step}' % @job_array_spec
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def build_tasks
|
163
|
+
all_task_ids = OodCore::Job::ArrayIds.new(spec_string).ids
|
164
|
+
highest_id_running = @running_tasks.sort.last.to_i
|
165
|
+
|
166
|
+
@running_tasks.sort.map{
|
167
|
+
|task_id| { :id => task_id, :status => :running }
|
168
|
+
} + all_task_ids.select{
|
169
|
+
|task_id| task_id > highest_id_running
|
170
|
+
}.map{
|
171
|
+
|task_id| { :id => task_id, :status => :queued }
|
172
|
+
}
|
173
|
+
end
|
174
|
+
|
175
|
+
# Used to finalize the parsed job
|
176
|
+
def finalize_parsed_job
|
177
|
+
@parsed_job[:tasks] = build_tasks if need_to_build_job_array?
|
178
|
+
end
|
179
|
+
|
180
|
+
# The XML output will always contain nodes for task_id_range, even when the
|
181
|
+
# job is not an array job.
|
182
|
+
def need_to_build_job_array?
|
183
|
+
spec_string != '1-1:1'
|
184
|
+
end
|
185
|
+
|
186
|
+
def toggle_processing_array_spec
|
187
|
+
@processing_job_array_spec = ! @processing_job_array_spec
|
188
|
+
end
|
115
189
|
end
|
116
190
|
|
@@ -23,6 +23,7 @@ class QstatXmlRListener
|
|
23
23
|
def initialize
|
24
24
|
@parsed_jobs = []
|
25
25
|
@current_job = {
|
26
|
+
:tasks => [],
|
26
27
|
:native => {} # TODO: improve native reporting
|
27
28
|
}
|
28
29
|
@current_text = nil
|
@@ -61,6 +62,8 @@ class QstatXmlRListener
|
|
61
62
|
end_JAT_start_time
|
62
63
|
when 'hard_request'
|
63
64
|
end_hard_request
|
65
|
+
when 'tasks'
|
66
|
+
add_child_tasks
|
64
67
|
end
|
65
68
|
end
|
66
69
|
|
@@ -131,8 +134,15 @@ class QstatXmlRListener
|
|
131
134
|
def end_job_list
|
132
135
|
@parsed_jobs << @current_job
|
133
136
|
@current_job = {
|
137
|
+
:tasks => [],
|
134
138
|
:native => {}
|
135
139
|
}
|
136
140
|
end
|
141
|
+
|
142
|
+
def add_child_tasks
|
143
|
+
@current_job[:tasks] = OodCore::Job::ArrayIds.new(@current_text).ids.sort.map{
|
144
|
+
|task_id| { :id => task_id, :status => :queued }
|
145
|
+
}
|
146
|
+
end
|
137
147
|
end
|
138
148
|
|
@@ -292,6 +292,7 @@ module OodCore
|
|
292
292
|
args += ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
|
293
293
|
args += ["-A", script.accounting_id] unless script.accounting_id.nil?
|
294
294
|
args += ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
295
|
+
args += ['-a', script.job_array_request] unless script.job_array_request.nil?
|
295
296
|
# ignore nodes, don't know how to do this for slurm
|
296
297
|
|
297
298
|
# Set dependencies
|
@@ -326,7 +327,7 @@ module OodCore
|
|
326
327
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
327
328
|
# @return [Array<Info>] information describing submitted jobs
|
328
329
|
# @see Adapter#info_all
|
329
|
-
def info_all
|
330
|
+
def info_all(attrs: nil)
|
330
331
|
@slurm.get_jobs.map do |v|
|
331
332
|
parse_job_info(v)
|
332
333
|
end
|
@@ -345,13 +346,8 @@ module OodCore
|
|
345
346
|
parse_job_info(v)
|
346
347
|
end
|
347
348
|
|
348
|
-
#
|
349
|
-
|
350
|
-
# given job id (if we can't find it, we assume it has completed)
|
351
|
-
info_ary.detect( -> { Info.new(id: id, status: :completed) } ) do |info|
|
352
|
-
# Match the job id or the formatted job & task id "1234_0"
|
353
|
-
info.id == id || info.native[:array_job_task_id] == id
|
354
|
-
end
|
349
|
+
# If no job was found we assume that it has completed
|
350
|
+
info_ary.empty? ? Info.new(id: id, status: :completed) : handle_job_array(info_ary, id)
|
355
351
|
rescue Batch::Error => e
|
356
352
|
# set completed status if can't find job id
|
357
353
|
if /Invalid job id specified/ =~ e.message
|
@@ -500,6 +496,24 @@ module OodCore
|
|
500
496
|
native: v
|
501
497
|
)
|
502
498
|
end
|
499
|
+
|
500
|
+
def handle_job_array(info_ary, id)
|
501
|
+
# If only one job was returned we return it
|
502
|
+
return info_ary.first unless info_ary.length > 1
|
503
|
+
|
504
|
+
parent_task_hash = {:tasks => []}
|
505
|
+
|
506
|
+
info_ary.map do |task_info|
|
507
|
+
parent_task_hash[:tasks] << {:id => task_info.id, :status => task_info.status}
|
508
|
+
|
509
|
+
if task_info.id == id || task_info.native[:array_job_task_id] == id
|
510
|
+
# Merge hashes without clobbering the child tasks
|
511
|
+
parent_task_hash.merge!(task_info.to_h.select{|k, v| k != :tasks})
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
Info.new(**parent_task_hash)
|
516
|
+
end
|
503
517
|
end
|
504
518
|
end
|
505
519
|
end
|
@@ -113,6 +113,7 @@ module OodCore
|
|
113
113
|
headers.merge!(Execution_Time: script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")) unless script.start_time.nil?
|
114
114
|
headers.merge!(Account_Name: script.accounting_id) unless script.accounting_id.nil?
|
115
115
|
headers.merge!(depend: depend.join(',')) unless depend.empty?
|
116
|
+
headers.merge!(job_array_request: script.job_array_request) unless script.job_array_request.nil?
|
116
117
|
|
117
118
|
# Set resources
|
118
119
|
resources = {}
|
@@ -150,7 +151,7 @@ module OodCore
|
|
150
151
|
args += ["-A", script.accounting_id] unless script.accounting_id.nil?
|
151
152
|
args += ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
152
153
|
args += ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
153
|
-
|
154
|
+
args += ['-t', script.job_array_request] unless script.job_array_request.nil?
|
154
155
|
# Set environment variables
|
155
156
|
env = script.job_environment.to_h
|
156
157
|
args += ["-v", env.keys.join(",")] unless env.empty?
|
@@ -173,7 +174,7 @@ module OodCore
|
|
173
174
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
174
175
|
# @return [Array<Info>] information describing submitted jobs
|
175
176
|
# @see Adapter#info_all
|
176
|
-
def info_all
|
177
|
+
def info_all(attrs: nil)
|
177
178
|
@pbs.get_jobs.map do |k, v|
|
178
179
|
parse_job_info(k, v)
|
179
180
|
end
|
@@ -186,7 +187,7 @@ module OodCore
|
|
186
187
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
187
188
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
188
189
|
# @return [Array<Info>] information describing submitted jobs
|
189
|
-
def info_where_owner(owner)
|
190
|
+
def info_where_owner(owner, attrs: nil)
|
190
191
|
owner = Array.wrap(owner).map(&:to_s)
|
191
192
|
@pbs.select_jobs(
|
192
193
|
attribs: [
|
@@ -206,7 +207,13 @@ module OodCore
|
|
206
207
|
# @see Adapter#info
|
207
208
|
def info(id)
|
208
209
|
id = id.to_s
|
209
|
-
|
210
|
+
result = @pbs.get_job(id)
|
211
|
+
|
212
|
+
if result.keys.length == 1
|
213
|
+
parse_job_info(*result.flatten)
|
214
|
+
else
|
215
|
+
parse_job_array(id, result)
|
216
|
+
end
|
210
217
|
rescue Torque::FFI::UnkjobidError
|
211
218
|
# set completed status if can't find job id
|
212
219
|
Info.new(
|
@@ -224,8 +231,13 @@ module OodCore
|
|
224
231
|
# @see Adapter#status
|
225
232
|
def status(id)
|
226
233
|
id = id.to_s
|
227
|
-
|
228
|
-
|
234
|
+
@pbs.get_job(id, filters: [:job_state]).values.map {
|
235
|
+
|job_status| OodCore::Job::Status.new(
|
236
|
+
state: STATE_MAP.fetch(
|
237
|
+
job_status[:job_state], :undetermined
|
238
|
+
)
|
239
|
+
)
|
240
|
+
}.max
|
229
241
|
rescue Torque::FFI::UnkjobidError
|
230
242
|
# set completed status if can't find job id
|
231
243
|
Status.new(state: :completed)
|
@@ -300,8 +312,31 @@ module OodCore
|
|
300
312
|
end
|
301
313
|
end
|
302
314
|
|
315
|
+
def parse_job_array(parent_id, result)
|
316
|
+
results = result.to_a
|
317
|
+
|
318
|
+
parse_job_info(
|
319
|
+
parent_id,
|
320
|
+
results.first.last.tap { |info_hash| info_hash[:exec_host] = aggregate_exec_host(results) },
|
321
|
+
tasks: generate_task_list(results)
|
322
|
+
)
|
323
|
+
end
|
324
|
+
|
325
|
+
def aggregate_exec_host(results)
|
326
|
+
results.map { |k,v| v[:exec_host] }.compact.sort.uniq.join("+")
|
327
|
+
end
|
328
|
+
|
329
|
+
def generate_task_list(results)
|
330
|
+
results.map do |k, v|
|
331
|
+
{
|
332
|
+
:id => k,
|
333
|
+
:status => STATE_MAP.fetch(v[:job_state], :undetermined)
|
334
|
+
}
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
303
338
|
# Parse hash describing PBS job status
|
304
|
-
def parse_job_info(k, v)
|
339
|
+
def parse_job_info(k, v, tasks: [])
|
305
340
|
/^(?<job_owner>[\w-]+)@/ =~ v[:Job_Owner]
|
306
341
|
allocated_nodes = parse_nodes(v[:exec_host] || "")
|
307
342
|
procs = allocated_nodes.inject(0) { |sum, x| sum + x[:procs] }
|
@@ -329,7 +364,8 @@ module OodCore
|
|
329
364
|
cpu_time: duration_in_seconds(v.fetch(:resources_used, {})[:cput]),
|
330
365
|
submission_time: v[:ctime],
|
331
366
|
dispatch_time: v[:start_time],
|
332
|
-
native: v
|
367
|
+
native: v,
|
368
|
+
tasks: tasks
|
333
369
|
)
|
334
370
|
end
|
335
371
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Builds a sorted array of job ids given a job array spec string
|
2
|
+
#
|
3
|
+
# Job array spec strings:
|
4
|
+
# 1 Single id
|
5
|
+
# 1-10 Range
|
6
|
+
# 1-10:2 Range with step
|
7
|
+
# 1-10,13 Compound (range with single id)
|
8
|
+
#
|
9
|
+
# Note that Ranges are expected to be inclusive
|
10
|
+
module OodCore
|
11
|
+
module Job
|
12
|
+
class ArrayIds
|
13
|
+
attr_reader :ids
|
14
|
+
def initialize(spec_string)
|
15
|
+
@ids = []
|
16
|
+
parse_spec_string(spec_string)
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
def parse_spec_string(spec_string)
|
21
|
+
@ids = get_components(spec_string).map{
|
22
|
+
|component| process_component(component)
|
23
|
+
}.reduce(:+).sort
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_components(spec_string)
|
27
|
+
discard_percent_modifier(spec_string).split(',')
|
28
|
+
end
|
29
|
+
|
30
|
+
# A few adapters use percent to define an arrays maximum number of
|
31
|
+
# simultaneous tasks. The percent is expected to come at the end.
|
32
|
+
def discard_percent_modifier(spec_string)
|
33
|
+
spec_string.split('%').first
|
34
|
+
end
|
35
|
+
|
36
|
+
def process_component(component)
|
37
|
+
is_range?(component) ? get_range(component) : [ component.to_i ]
|
38
|
+
end
|
39
|
+
|
40
|
+
def get_range(component)
|
41
|
+
raw_range, raw_step = component.split(':')
|
42
|
+
start, stop = raw_range.split('-').map(&:to_i)
|
43
|
+
range = Range.new(start, stop)
|
44
|
+
step = raw_step.to_i
|
45
|
+
step = 1 if step == 0
|
46
|
+
|
47
|
+
range.step(step).to_a
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_range?(component)
|
51
|
+
component.include?('-')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/ood_core/job/info.rb
CHANGED
@@ -65,6 +65,11 @@ module OodCore
|
|
65
65
|
# @return [Object] native info
|
66
66
|
attr_reader :native
|
67
67
|
|
68
|
+
# List of job array child task statuses
|
69
|
+
# @note only relevant for job arrays
|
70
|
+
# @return [Array<Task>] tasks
|
71
|
+
attr_reader :tasks
|
72
|
+
|
68
73
|
# @param id [#to_s] job id
|
69
74
|
# @param status [#to_sym] job state
|
70
75
|
# @param allocated_nodes [Array<#to_h>] allocated nodes
|
@@ -79,12 +84,14 @@ module OodCore
|
|
79
84
|
# @param cpu_time [#to_i, nil] cpu time
|
80
85
|
# @param submission_time [#to_i, nil] submission time
|
81
86
|
# @param dispatch_time [#to_i, nil] dispatch time
|
87
|
+
# @param tasks [Array<Hash>] tasks e.g. { id: '12345.owens-batch', status: :running }
|
82
88
|
# @param native [Object] native info
|
83
89
|
def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
|
84
90
|
job_name: nil, job_owner: nil, accounting_id: nil,
|
85
91
|
procs: nil, queue_name: nil, wallclock_time: nil,
|
86
92
|
wallclock_limit: nil, cpu_time: nil, submission_time: nil,
|
87
|
-
dispatch_time: nil, native: nil,
|
93
|
+
dispatch_time: nil, native: nil, tasks: [],
|
94
|
+
**_)
|
88
95
|
@id = id.to_s
|
89
96
|
@status = Status.new(state: status.to_sym)
|
90
97
|
@allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(n.to_h) }
|
@@ -99,6 +106,10 @@ module OodCore
|
|
99
106
|
@cpu_time = cpu_time && cpu_time.to_i
|
100
107
|
@submission_time = submission_time && Time.at(submission_time.to_i)
|
101
108
|
@dispatch_time = dispatch_time && Time.at(dispatch_time.to_i)
|
109
|
+
@tasks = tasks.map {|task_status| Task.new(**task_status)}
|
110
|
+
|
111
|
+
@status = job_array_aggregate_status unless @tasks.empty?
|
112
|
+
|
102
113
|
@native = native
|
103
114
|
end
|
104
115
|
|
@@ -120,7 +131,8 @@ module OodCore
|
|
120
131
|
cpu_time: cpu_time,
|
121
132
|
submission_time: submission_time,
|
122
133
|
dispatch_time: dispatch_time,
|
123
|
-
native: native
|
134
|
+
native: native,
|
135
|
+
tasks: tasks
|
124
136
|
}
|
125
137
|
end
|
126
138
|
|
@@ -143,6 +155,14 @@ module OodCore
|
|
143
155
|
def hash
|
144
156
|
[self.class, to_h].hash
|
145
157
|
end
|
158
|
+
|
159
|
+
private
|
160
|
+
|
161
|
+
# Generate an aggregate status from child tasks
|
162
|
+
# @return [OodCore::Job::Status]
|
163
|
+
def job_array_aggregate_status
|
164
|
+
@tasks.map { |task_status| task_status.status }.max
|
165
|
+
end
|
146
166
|
end
|
147
167
|
end
|
148
168
|
end
|
data/lib/ood_core/job/script.rb
CHANGED
@@ -95,6 +95,10 @@ module OodCore
|
|
95
95
|
# @return [String, nil] accounting id
|
96
96
|
attr_reader :accounting_id
|
97
97
|
|
98
|
+
# The job array request, commonly in the format '$START-$STOP'
|
99
|
+
# @return [String, nil] job array request
|
100
|
+
attr_reader :job_array_request
|
101
|
+
|
98
102
|
# Object detailing any native specifications that are implementation specific
|
99
103
|
# @note Should not be used at all costs.
|
100
104
|
# @return [Object, nil] native specifications
|
@@ -128,7 +132,7 @@ module OodCore
|
|
128
132
|
job_name: nil, shell_path: nil, input_path: nil,
|
129
133
|
output_path: nil, error_path: nil, reservation_id: nil,
|
130
134
|
queue_name: nil, priority: nil, start_time: nil,
|
131
|
-
wall_time: nil, accounting_id: nil, native: nil, **_)
|
135
|
+
wall_time: nil, accounting_id: nil, job_array_request: nil, native: nil, **_)
|
132
136
|
@content = content.to_s
|
133
137
|
|
134
138
|
@submit_as_hold = submit_as_hold
|
@@ -136,22 +140,23 @@ module OodCore
|
|
136
140
|
@email_on_started = email_on_started
|
137
141
|
@email_on_terminated = email_on_terminated
|
138
142
|
|
139
|
-
@args
|
140
|
-
@job_environment
|
141
|
-
@workdir
|
142
|
-
@email
|
143
|
-
@job_name
|
144
|
-
@shell_path
|
145
|
-
@input_path
|
146
|
-
@output_path
|
147
|
-
@error_path
|
148
|
-
@reservation_id
|
149
|
-
@queue_name
|
150
|
-
@priority
|
151
|
-
@start_time
|
152
|
-
@wall_time
|
153
|
-
@accounting_id
|
154
|
-
@
|
143
|
+
@args = args && args.map(&:to_s)
|
144
|
+
@job_environment = job_environment && job_environment.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
145
|
+
@workdir = workdir && Pathname.new(workdir.to_s)
|
146
|
+
@email = email && Array.wrap(email).map(&:to_s)
|
147
|
+
@job_name = job_name && job_name.to_s
|
148
|
+
@shell_path = shell_path && Pathname.new(shell_path.to_s)
|
149
|
+
@input_path = input_path && Pathname.new(input_path.to_s)
|
150
|
+
@output_path = output_path && Pathname.new(output_path.to_s)
|
151
|
+
@error_path = error_path && Pathname.new(error_path.to_s)
|
152
|
+
@reservation_id = reservation_id && reservation_id.to_s
|
153
|
+
@queue_name = queue_name && queue_name.to_s
|
154
|
+
@priority = priority && priority.to_i
|
155
|
+
@start_time = start_time && Time.at(start_time.to_i)
|
156
|
+
@wall_time = wall_time && wall_time.to_i
|
157
|
+
@accounting_id = accounting_id && accounting_id.to_s
|
158
|
+
@job_array_request = job_array_request && job_array_request.to_s
|
159
|
+
@native = native
|
155
160
|
end
|
156
161
|
|
157
162
|
# Convert object to hash
|
@@ -178,6 +183,7 @@ module OodCore
|
|
178
183
|
start_time: start_time,
|
179
184
|
wall_time: wall_time,
|
180
185
|
accounting_id: accounting_id,
|
186
|
+
job_array_request: job_array_request,
|
181
187
|
native: native
|
182
188
|
}
|
183
189
|
end
|
data/lib/ood_core/job/status.rb
CHANGED
@@ -22,14 +22,16 @@ module OodCore
|
|
22
22
|
#
|
23
23
|
# # Job is completed and not running on an execution host
|
24
24
|
# :completed
|
25
|
+
#
|
26
|
+
# @note that this list's order is meaningful and should not be sorted lexigraphically
|
25
27
|
def states
|
26
28
|
%i(
|
27
29
|
undetermined
|
28
|
-
|
30
|
+
completed
|
29
31
|
queued_held
|
32
|
+
queued
|
30
33
|
running
|
31
34
|
suspended
|
32
|
-
completed
|
33
35
|
)
|
34
36
|
end
|
35
37
|
end
|
@@ -113,6 +115,17 @@ module OodCore
|
|
113
115
|
self == state
|
114
116
|
end
|
115
117
|
end
|
118
|
+
|
119
|
+
def precedence
|
120
|
+
self.class.states.index(@state)
|
121
|
+
end
|
122
|
+
|
123
|
+
# The comparison operator for sorting values.
|
124
|
+
#
|
125
|
+
# @return [Integer] Comparison value based on precedence
|
126
|
+
def <=>(other)
|
127
|
+
precedence <=> other.precedence
|
128
|
+
end
|
116
129
|
end
|
117
130
|
end
|
118
131
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
class Task
|
4
|
+
attr_reader :id
|
5
|
+
attr_reader :status
|
6
|
+
|
7
|
+
def initialize(id:, status:, **_)
|
8
|
+
@task_id = id
|
9
|
+
@status = OodCore::Job::Status.new(state: status)
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_h
|
13
|
+
{
|
14
|
+
:id => id,
|
15
|
+
:status => status
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.to_h == other.to_h
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-01-
|
13
|
+
date: 2019-01-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -147,6 +147,7 @@ files:
|
|
147
147
|
- lib/ood_core/cluster.rb
|
148
148
|
- lib/ood_core/clusters.rb
|
149
149
|
- lib/ood_core/errors.rb
|
150
|
+
- lib/ood_core/job/._task_status.rb
|
150
151
|
- lib/ood_core/job/adapter.rb
|
151
152
|
- lib/ood_core/job/adapters/drmaa.rb
|
152
153
|
- lib/ood_core/job/adapters/helper.rb
|
@@ -165,11 +166,13 @@ files:
|
|
165
166
|
- lib/ood_core/job/adapters/torque/batch.rb
|
166
167
|
- lib/ood_core/job/adapters/torque/error.rb
|
167
168
|
- lib/ood_core/job/adapters/torque/ffi.rb
|
169
|
+
- lib/ood_core/job/array_ids.rb
|
168
170
|
- lib/ood_core/job/factory.rb
|
169
171
|
- lib/ood_core/job/info.rb
|
170
172
|
- lib/ood_core/job/node_info.rb
|
171
173
|
- lib/ood_core/job/script.rb
|
172
174
|
- lib/ood_core/job/status.rb
|
175
|
+
- lib/ood_core/job/task.rb
|
173
176
|
- lib/ood_core/refinements/array_extensions.rb
|
174
177
|
- lib/ood_core/refinements/drmaa_extensions.rb
|
175
178
|
- lib/ood_core/refinements/hash_extensions.rb
|
@@ -195,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
195
198
|
version: '0'
|
196
199
|
requirements: []
|
197
200
|
rubyforge_project:
|
198
|
-
rubygems_version: 2.
|
201
|
+
rubygems_version: 2.7.3
|
199
202
|
signing_key:
|
200
203
|
specification_version: 4
|
201
204
|
summary: Open OnDemand core library
|