ood_core 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/CHANGELOG.md +11 -1
- data/lib/ood_core.rb +1 -0
- data/lib/ood_core/job/adapter.rb +65 -3
- data/lib/ood_core/job/adapters/lsf.rb +6 -2
- data/lib/ood_core/job/adapters/pbspro.rb +6 -2
- data/lib/ood_core/job/adapters/sge.rb +4 -4
- data/lib/ood_core/job/adapters/sge/batch.rb +18 -7
- data/lib/ood_core/job/adapters/sge/helper.rb +1 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +77 -3
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +10 -0
- data/lib/ood_core/job/adapters/slurm.rb +22 -8
- data/lib/ood_core/job/adapters/torque.rb +44 -8
- data/lib/ood_core/job/array_ids.rb +55 -0
- data/lib/ood_core/job/info.rb +22 -2
- data/lib/ood_core/job/script.rb +23 -17
- data/lib/ood_core/job/status.rb +15 -2
- data/lib/ood_core/job/task.rb +24 -0
- data/lib/ood_core/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8a526602e6c6b59b6d943d299dc4e442cfd354a768669b4bc03a9423e12cf418
|
4
|
+
data.tar.gz: 5220c4b20c1de287afdcad2eece623952c58aea735c1786f9956912563277e85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f63f8aff330f033ef8fe0dad0d07629e3704463441f1a910920f37a86d48a4cab059182403b9cb6f1bd6a300213b1cff45315b43354fa0d2a9aaaba2f7bc54c8
|
7
|
+
data.tar.gz: d77d8d5130a3f20ac9e54667b10de5a476322c38491f63abe58e0ab192d23b22fc764481254abaf9def825a4ee6707646f1b3c264ed9a5ec4fa76add0f34295a
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.8.0] - 2019-01-29
|
10
|
+
### Added
|
11
|
+
- info_all_each and info_where_owner_each super class methods
|
12
|
+
- job array support for Torque, Slurm, and SGE (currently missing from LSF and PBSPro)
|
13
|
+
- `OodCore::Job::Status#precedence` for the ability to get an overall status for a group of jobs
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
- Fix SGE adapter to specify `-u '*'` when calling qstat to get all jobs
|
17
|
+
|
9
18
|
## [0.7.1] - 2019-01-11
|
10
19
|
### Fixed
|
11
20
|
- Fixed crash when libdrmaa is used to query for a job no longer in the queue
|
@@ -156,7 +165,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
156
165
|
### Added
|
157
166
|
- Initial release!
|
158
167
|
|
159
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
168
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.8.0...HEAD
|
169
|
+
[0.8.0]: https://github.com/OSC/ood_core/compare/v0.7.1...v0.8.0
|
160
170
|
[0.7.1]: https://github.com/OSC/ood_core/compare/v0.7.0...v0.7.1
|
161
171
|
[0.7.0]: https://github.com/OSC/ood_core/compare/v0.6.0...v0.7.0
|
162
172
|
[0.6.0]: https://github.com/OSC/ood_core/compare/v0.5.1...v0.6.0
|
data/lib/ood_core.rb
CHANGED
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -36,18 +36,80 @@ module OodCore
|
|
36
36
|
# Retrieve info for all jobs from the resource manager
|
37
37
|
# @abstract Subclass is expected to implement {#info_all}
|
38
38
|
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
39
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
40
|
+
# This array specifies only attrs you want, in addition to id and status.
|
41
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
42
|
+
# to have a value for any attr besides the ones specified and id and status.
|
43
|
+
#
|
44
|
+
# For certain adapters this may speed up the response since
|
45
|
+
# adapters can get by without populating the entire Info object
|
39
46
|
# @return [Array<Info>] information describing submitted jobs
|
40
|
-
def info_all
|
47
|
+
def info_all(attrs: nil)
|
41
48
|
raise NotImplementedError, "subclass did not define #info_all"
|
42
49
|
end
|
43
50
|
|
44
51
|
# Retrieve info for all jobs for a given owner or owners from the
|
45
52
|
# resource manager
|
46
53
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
54
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
55
|
+
# This array specifies only attrs you want, in addition to id and status.
|
56
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
57
|
+
# to have a value for any attr besides the ones specified and id and status.
|
58
|
+
#
|
59
|
+
# For certain adapters this may speed up the response since
|
60
|
+
# adapters can get by without populating the entire Info object
|
47
61
|
# @return [Array<Info>] information describing submitted jobs
|
48
|
-
def info_where_owner(owner)
|
62
|
+
def info_where_owner(owner, attrs: nil)
|
49
63
|
owner = Array.wrap(owner).map(&:to_s)
|
50
|
-
|
64
|
+
|
65
|
+
# must at least have job_owner to filter by job_owner
|
66
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
67
|
+
|
68
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
69
|
+
end
|
70
|
+
|
71
|
+
# Iterate over each job Info object
|
72
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
73
|
+
# This array specifies only attrs you want, in addition to id and status.
|
74
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
75
|
+
# to have a value for any attr besides the ones specified and id and status.
|
76
|
+
#
|
77
|
+
# For certain adapters this may speed up the response since
|
78
|
+
# adapters can get by without populating the entire Info object
|
79
|
+
# @yield [Info] of each job to block
|
80
|
+
# @return [Enumerator] if no block given
|
81
|
+
def info_all_each(attrs: nil)
|
82
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
83
|
+
|
84
|
+
info_all(attrs: attrs).each do |job|
|
85
|
+
yield job
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Iterate over each job Info object
|
90
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
91
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
92
|
+
# This array specifies only attrs you want, in addition to id and status.
|
93
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
94
|
+
# to have a value for any attr besides the ones specified and id and status.
|
95
|
+
#
|
96
|
+
# For certain adapters this may speed up the response since
|
97
|
+
# adapters can get by without populating the entire Info object
|
98
|
+
# @yield [Info] of each job to block
|
99
|
+
# @return [Enumerator] if no block given
|
100
|
+
def info_where_owner_each(owner, attrs: nil)
|
101
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
102
|
+
|
103
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
104
|
+
yield job
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Whether the adapter supports job arrays
|
109
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
110
|
+
# explicitly do not
|
111
|
+
def supports_job_arrays?
|
112
|
+
true
|
51
113
|
end
|
52
114
|
|
53
115
|
# Retrieve job info from the resource manager
|
@@ -108,7 +108,7 @@ module OodCore
|
|
108
108
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
109
109
|
# @return [Array<Info>] information describing submitted jobs
|
110
110
|
# @see Adapter#info_all
|
111
|
-
def info_all
|
111
|
+
def info_all(attrs: nil)
|
112
112
|
batch.get_jobs.map { |v| info_for_batch_hash(v) }
|
113
113
|
rescue Batch::Error => e
|
114
114
|
raise JobAdapterError, e.message
|
@@ -118,7 +118,7 @@ module OodCore
|
|
118
118
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
119
119
|
# @return [Array<Info>] information describing submitted jobs
|
120
120
|
# @see Adapter#info_where_owner
|
121
|
-
def info_where_owner(owner)
|
121
|
+
def info_where_owner(owner, attrs: nil)
|
122
122
|
owners = Array.wrap(owner).map(&:to_s)
|
123
123
|
if owners.count > 1
|
124
124
|
super
|
@@ -131,6 +131,10 @@ module OodCore
|
|
131
131
|
raise JobAdapterError, e.message
|
132
132
|
end
|
133
133
|
|
134
|
+
def supports_job_arrays?
|
135
|
+
false
|
136
|
+
end
|
137
|
+
|
134
138
|
# Retrieve job status from resource manager
|
135
139
|
# @param id [#to_s] the id of the job
|
136
140
|
# @raise [JobAdapterError] if something goes wrong getting job status
|
@@ -279,7 +279,7 @@ module OodCore
|
|
279
279
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
280
280
|
# @return [Array<Info>] information describing submitted jobs
|
281
281
|
# @see Adapter#info_all
|
282
|
-
def info_all
|
282
|
+
def info_all(attrs: nil)
|
283
283
|
@pbspro.get_jobs.map do |v|
|
284
284
|
parse_job_info(v)
|
285
285
|
end
|
@@ -292,7 +292,7 @@ module OodCore
|
|
292
292
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
293
293
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
294
294
|
# @return [Array<Info>] information describing submitted jobs
|
295
|
-
def info_where_owner(owner)
|
295
|
+
def info_where_owner(owner, attrs: nil)
|
296
296
|
owner = Array.wrap(owner).map(&:to_s)
|
297
297
|
|
298
298
|
usr_jobs = @pbspro.select_jobs(args: ["-u", owner.join(",")])
|
@@ -307,6 +307,10 @@ module OodCore
|
|
307
307
|
end
|
308
308
|
end
|
309
309
|
|
310
|
+
def supports_job_arrays?
|
311
|
+
false
|
312
|
+
end
|
313
|
+
|
310
314
|
# Retrieve job info from the resource manager
|
311
315
|
# @param id [#to_s] the id of the job
|
312
316
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
@@ -90,8 +90,8 @@ module OodCore
|
|
90
90
|
|
91
91
|
# Retrieve info for all jobs from the resource manager
|
92
92
|
# @return [Array<Info>] information describing submitted jobs
|
93
|
-
def info_all
|
94
|
-
@batch.get_all
|
93
|
+
def info_all(attrs: nil)
|
94
|
+
@batch.get_all(owner: '*')
|
95
95
|
rescue Batch::Error => e
|
96
96
|
raise JobAdapterError, e.message
|
97
97
|
end
|
@@ -101,7 +101,7 @@ module OodCore
|
|
101
101
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
102
102
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
103
103
|
# @return [Array<Info>] information describing submitted jobs
|
104
|
-
def info_where_owner(owner)
|
104
|
+
def info_where_owner(owner, attrs: nil)
|
105
105
|
owner = Array.wrap(owner).map(&:to_s).join(',')
|
106
106
|
@batch.get_all(owner: owner)
|
107
107
|
rescue Batch::Error => e
|
@@ -160,4 +160,4 @@ module OodCore
|
|
160
160
|
end
|
161
161
|
end
|
162
162
|
end
|
163
|
-
end
|
163
|
+
end
|
@@ -95,13 +95,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
95
95
|
|
96
96
|
job_hash = listener.parsed_job
|
97
97
|
|
98
|
-
|
99
|
-
begin
|
100
|
-
job_hash[:status] = get_status_from_drmma(job_id)
|
101
|
-
rescue DRMAA::DRMAAInvalidArgumentError => e
|
102
|
-
raise Error, e.message
|
103
|
-
end
|
104
|
-
end
|
98
|
+
update_job_hash_status!(job_hash)
|
105
99
|
|
106
100
|
job_info = OodCore::Job::Info.new(**job_hash)
|
107
101
|
rescue REXML::ParseException => e
|
@@ -117,6 +111,22 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
117
111
|
job_info
|
118
112
|
end
|
119
113
|
|
114
|
+
def update_job_hash_status!(job_hash)
|
115
|
+
if get_status_from_drmaa?(job_hash)
|
116
|
+
begin
|
117
|
+
job_hash[:status] = get_status_from_drmma(job_hash[:id])
|
118
|
+
rescue DRMAA::DRMAAInvalidArgumentError => e
|
119
|
+
raise Error, e.message
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_status_from_drmaa?(job_hash)
|
125
|
+
# DRMAA does not recognize the parent task in job arrays
|
126
|
+
# e.g. 123 is invalid if it is an array job, while 123.4 is valid
|
127
|
+
can_use_drmaa? && job_hash[:tasks].empty?
|
128
|
+
end
|
129
|
+
|
120
130
|
def can_use_drmaa?
|
121
131
|
@can_use_drmaa
|
122
132
|
end
|
@@ -212,6 +222,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
212
222
|
end
|
213
223
|
|
214
224
|
job_hash[:status] = translate_sge_state(job_hash[:status])
|
225
|
+
update_job_hash_status!(job_hash)
|
215
226
|
|
216
227
|
job_hash
|
217
228
|
end
|
@@ -41,6 +41,7 @@ class OodCore::Job::Adapters::Sge::Helper
|
|
41
41
|
args += ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
|
42
42
|
args += ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
43
43
|
args += ['-P', script.accounting_id] unless script.accounting_id.nil?
|
44
|
+
args += ['-t', script.job_array_request] unless script.job_array_request.nil?
|
44
45
|
args += Array.wrap(script.native) if script.native
|
45
46
|
|
46
47
|
args
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rexml/document'
|
2
2
|
require 'rexml/streamlistener'
|
3
3
|
require 'date'
|
4
|
+
require 'ood_core'
|
5
|
+
require 'ood_core/job/array_ids'
|
4
6
|
|
5
7
|
# An XML stream listener to build an array of OodCore::Job::Info from qstat output
|
6
8
|
#
|
@@ -13,9 +15,7 @@ require 'date'
|
|
13
15
|
# :queue_name
|
14
16
|
# :status
|
15
17
|
# :wallclock_limit
|
16
|
-
|
17
|
-
|
18
|
-
# :wallclock_time # HOW LONG HAS IT BEEN RUNNING?
|
18
|
+
# :wallclock_time
|
19
19
|
|
20
20
|
class QstatXmlJRListener
|
21
21
|
# [Hash]
|
@@ -25,12 +25,28 @@ class QstatXmlJRListener
|
|
25
25
|
|
26
26
|
def initialize
|
27
27
|
@parsed_job = {
|
28
|
+
:tasks => [],
|
28
29
|
:status => :queued,
|
29
30
|
:procs => 1, # un-knowable from SGE qstat output
|
30
31
|
:native => {} # TODO: improve native attribute reporting
|
31
32
|
}
|
32
33
|
@current_text = nil
|
33
34
|
@current_request = nil
|
35
|
+
|
36
|
+
@processing_job_array_spec = false
|
37
|
+
@job_array_spec = {
|
38
|
+
start: nil,
|
39
|
+
stop: nil,
|
40
|
+
step: 1, # Step can have a default of 1
|
41
|
+
}
|
42
|
+
@running_tasks = []
|
43
|
+
end
|
44
|
+
|
45
|
+
def tag_start(name, attrs)
|
46
|
+
case name
|
47
|
+
when 'task_id_range'
|
48
|
+
toggle_processing_array_spec
|
49
|
+
end
|
34
50
|
end
|
35
51
|
|
36
52
|
def tag_end(name)
|
@@ -57,6 +73,18 @@ class QstatXmlJRListener
|
|
57
73
|
end_CE_stringval
|
58
74
|
when 'QR_name'
|
59
75
|
end_QR_name
|
76
|
+
when 'JAT_task_number'
|
77
|
+
end_JAT_task_number
|
78
|
+
when 'djob_info'
|
79
|
+
finalize_parsed_job
|
80
|
+
when 'RN_min'
|
81
|
+
set_job_array_piece(:start)
|
82
|
+
when 'RN_max'
|
83
|
+
set_job_array_piece(:stop)
|
84
|
+
when 'RN_step'
|
85
|
+
set_job_array_piece(:step)
|
86
|
+
when 'task_id_range'
|
87
|
+
toggle_processing_array_spec
|
60
88
|
end
|
61
89
|
end
|
62
90
|
|
@@ -112,5 +140,51 @@ class QstatXmlJRListener
|
|
112
140
|
def end_QR_name
|
113
141
|
@parsed_job[:queue_name] = @current_text
|
114
142
|
end
|
143
|
+
|
144
|
+
# Used to record a running Job Array task
|
145
|
+
def end_JAT_task_number
|
146
|
+
@running_tasks << @current_text
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_job_array_piece(key)
|
150
|
+
@job_array_spec[key] = @current_text if @processing_job_array_spec
|
151
|
+
end
|
152
|
+
|
153
|
+
def spec_string
|
154
|
+
# If any of the job_array_spec values are nil then return a default spec_string
|
155
|
+
if @job_array_spec.values.any? { |value| value.nil? }
|
156
|
+
'1-1:1'
|
157
|
+
else
|
158
|
+
'%{start}-%{stop}:%{step}' % @job_array_spec
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def build_tasks
|
163
|
+
all_task_ids = OodCore::Job::ArrayIds.new(spec_string).ids
|
164
|
+
highest_id_running = @running_tasks.sort.last.to_i
|
165
|
+
|
166
|
+
@running_tasks.sort.map{
|
167
|
+
|task_id| { :id => task_id, :status => :running }
|
168
|
+
} + all_task_ids.select{
|
169
|
+
|task_id| task_id > highest_id_running
|
170
|
+
}.map{
|
171
|
+
|task_id| { :id => task_id, :status => :queued }
|
172
|
+
}
|
173
|
+
end
|
174
|
+
|
175
|
+
# Used to finalize the parsed job
|
176
|
+
def finalize_parsed_job
|
177
|
+
@parsed_job[:tasks] = build_tasks if need_to_build_job_array?
|
178
|
+
end
|
179
|
+
|
180
|
+
# The XML output will always contain nodes for task_id_range, even when the
|
181
|
+
# job is not an array job.
|
182
|
+
def need_to_build_job_array?
|
183
|
+
spec_string != '1-1:1'
|
184
|
+
end
|
185
|
+
|
186
|
+
def toggle_processing_array_spec
|
187
|
+
@processing_job_array_spec = ! @processing_job_array_spec
|
188
|
+
end
|
115
189
|
end
|
116
190
|
|
@@ -23,6 +23,7 @@ class QstatXmlRListener
|
|
23
23
|
def initialize
|
24
24
|
@parsed_jobs = []
|
25
25
|
@current_job = {
|
26
|
+
:tasks => [],
|
26
27
|
:native => {} # TODO: improve native reporting
|
27
28
|
}
|
28
29
|
@current_text = nil
|
@@ -61,6 +62,8 @@ class QstatXmlRListener
|
|
61
62
|
end_JAT_start_time
|
62
63
|
when 'hard_request'
|
63
64
|
end_hard_request
|
65
|
+
when 'tasks'
|
66
|
+
add_child_tasks
|
64
67
|
end
|
65
68
|
end
|
66
69
|
|
@@ -131,8 +134,15 @@ class QstatXmlRListener
|
|
131
134
|
def end_job_list
|
132
135
|
@parsed_jobs << @current_job
|
133
136
|
@current_job = {
|
137
|
+
:tasks => [],
|
134
138
|
:native => {}
|
135
139
|
}
|
136
140
|
end
|
141
|
+
|
142
|
+
def add_child_tasks
|
143
|
+
@current_job[:tasks] = OodCore::Job::ArrayIds.new(@current_text).ids.sort.map{
|
144
|
+
|task_id| { :id => task_id, :status => :queued }
|
145
|
+
}
|
146
|
+
end
|
137
147
|
end
|
138
148
|
|
@@ -292,6 +292,7 @@ module OodCore
|
|
292
292
|
args += ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
|
293
293
|
args += ["-A", script.accounting_id] unless script.accounting_id.nil?
|
294
294
|
args += ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
295
|
+
args += ['-a', script.job_array_request] unless script.job_array_request.nil?
|
295
296
|
# ignore nodes, don't know how to do this for slurm
|
296
297
|
|
297
298
|
# Set dependencies
|
@@ -326,7 +327,7 @@ module OodCore
|
|
326
327
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
327
328
|
# @return [Array<Info>] information describing submitted jobs
|
328
329
|
# @see Adapter#info_all
|
329
|
-
def info_all
|
330
|
+
def info_all(attrs: nil)
|
330
331
|
@slurm.get_jobs.map do |v|
|
331
332
|
parse_job_info(v)
|
332
333
|
end
|
@@ -345,13 +346,8 @@ module OodCore
|
|
345
346
|
parse_job_info(v)
|
346
347
|
end
|
347
348
|
|
348
|
-
#
|
349
|
-
|
350
|
-
# given job id (if we can't find it, we assume it has completed)
|
351
|
-
info_ary.detect( -> { Info.new(id: id, status: :completed) } ) do |info|
|
352
|
-
# Match the job id or the formatted job & task id "1234_0"
|
353
|
-
info.id == id || info.native[:array_job_task_id] == id
|
354
|
-
end
|
349
|
+
# If no job was found we assume that it has completed
|
350
|
+
info_ary.empty? ? Info.new(id: id, status: :completed) : handle_job_array(info_ary, id)
|
355
351
|
rescue Batch::Error => e
|
356
352
|
# set completed status if can't find job id
|
357
353
|
if /Invalid job id specified/ =~ e.message
|
@@ -500,6 +496,24 @@ module OodCore
|
|
500
496
|
native: v
|
501
497
|
)
|
502
498
|
end
|
499
|
+
|
500
|
+
def handle_job_array(info_ary, id)
|
501
|
+
# If only one job was returned we return it
|
502
|
+
return info_ary.first unless info_ary.length > 1
|
503
|
+
|
504
|
+
parent_task_hash = {:tasks => []}
|
505
|
+
|
506
|
+
info_ary.map do |task_info|
|
507
|
+
parent_task_hash[:tasks] << {:id => task_info.id, :status => task_info.status}
|
508
|
+
|
509
|
+
if task_info.id == id || task_info.native[:array_job_task_id] == id
|
510
|
+
# Merge hashes without clobbering the child tasks
|
511
|
+
parent_task_hash.merge!(task_info.to_h.select{|k, v| k != :tasks})
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
Info.new(**parent_task_hash)
|
516
|
+
end
|
503
517
|
end
|
504
518
|
end
|
505
519
|
end
|
@@ -113,6 +113,7 @@ module OodCore
|
|
113
113
|
headers.merge!(Execution_Time: script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")) unless script.start_time.nil?
|
114
114
|
headers.merge!(Account_Name: script.accounting_id) unless script.accounting_id.nil?
|
115
115
|
headers.merge!(depend: depend.join(',')) unless depend.empty?
|
116
|
+
headers.merge!(job_array_request: script.job_array_request) unless script.job_array_request.nil?
|
116
117
|
|
117
118
|
# Set resources
|
118
119
|
resources = {}
|
@@ -150,7 +151,7 @@ module OodCore
|
|
150
151
|
args += ["-A", script.accounting_id] unless script.accounting_id.nil?
|
151
152
|
args += ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
152
153
|
args += ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
153
|
-
|
154
|
+
args += ['-t', script.job_array_request] unless script.job_array_request.nil?
|
154
155
|
# Set environment variables
|
155
156
|
env = script.job_environment.to_h
|
156
157
|
args += ["-v", env.keys.join(",")] unless env.empty?
|
@@ -173,7 +174,7 @@ module OodCore
|
|
173
174
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
174
175
|
# @return [Array<Info>] information describing submitted jobs
|
175
176
|
# @see Adapter#info_all
|
176
|
-
def info_all
|
177
|
+
def info_all(attrs: nil)
|
177
178
|
@pbs.get_jobs.map do |k, v|
|
178
179
|
parse_job_info(k, v)
|
179
180
|
end
|
@@ -186,7 +187,7 @@ module OodCore
|
|
186
187
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
187
188
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
188
189
|
# @return [Array<Info>] information describing submitted jobs
|
189
|
-
def info_where_owner(owner)
|
190
|
+
def info_where_owner(owner, attrs: nil)
|
190
191
|
owner = Array.wrap(owner).map(&:to_s)
|
191
192
|
@pbs.select_jobs(
|
192
193
|
attribs: [
|
@@ -206,7 +207,13 @@ module OodCore
|
|
206
207
|
# @see Adapter#info
|
207
208
|
def info(id)
|
208
209
|
id = id.to_s
|
209
|
-
|
210
|
+
result = @pbs.get_job(id)
|
211
|
+
|
212
|
+
if result.keys.length == 1
|
213
|
+
parse_job_info(*result.flatten)
|
214
|
+
else
|
215
|
+
parse_job_array(id, result)
|
216
|
+
end
|
210
217
|
rescue Torque::FFI::UnkjobidError
|
211
218
|
# set completed status if can't find job id
|
212
219
|
Info.new(
|
@@ -224,8 +231,13 @@ module OodCore
|
|
224
231
|
# @see Adapter#status
|
225
232
|
def status(id)
|
226
233
|
id = id.to_s
|
227
|
-
|
228
|
-
|
234
|
+
@pbs.get_job(id, filters: [:job_state]).values.map {
|
235
|
+
|job_status| OodCore::Job::Status.new(
|
236
|
+
state: STATE_MAP.fetch(
|
237
|
+
job_status[:job_state], :undetermined
|
238
|
+
)
|
239
|
+
)
|
240
|
+
}.max
|
229
241
|
rescue Torque::FFI::UnkjobidError
|
230
242
|
# set completed status if can't find job id
|
231
243
|
Status.new(state: :completed)
|
@@ -300,8 +312,31 @@ module OodCore
|
|
300
312
|
end
|
301
313
|
end
|
302
314
|
|
315
|
+
def parse_job_array(parent_id, result)
|
316
|
+
results = result.to_a
|
317
|
+
|
318
|
+
parse_job_info(
|
319
|
+
parent_id,
|
320
|
+
results.first.last.tap { |info_hash| info_hash[:exec_host] = aggregate_exec_host(results) },
|
321
|
+
tasks: generate_task_list(results)
|
322
|
+
)
|
323
|
+
end
|
324
|
+
|
325
|
+
def aggregate_exec_host(results)
|
326
|
+
results.map { |k,v| v[:exec_host] }.compact.sort.uniq.join("+")
|
327
|
+
end
|
328
|
+
|
329
|
+
def generate_task_list(results)
|
330
|
+
results.map do |k, v|
|
331
|
+
{
|
332
|
+
:id => k,
|
333
|
+
:status => STATE_MAP.fetch(v[:job_state], :undetermined)
|
334
|
+
}
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
303
338
|
# Parse hash describing PBS job status
|
304
|
-
def parse_job_info(k, v)
|
339
|
+
def parse_job_info(k, v, tasks: [])
|
305
340
|
/^(?<job_owner>[\w-]+)@/ =~ v[:Job_Owner]
|
306
341
|
allocated_nodes = parse_nodes(v[:exec_host] || "")
|
307
342
|
procs = allocated_nodes.inject(0) { |sum, x| sum + x[:procs] }
|
@@ -329,7 +364,8 @@ module OodCore
|
|
329
364
|
cpu_time: duration_in_seconds(v.fetch(:resources_used, {})[:cput]),
|
330
365
|
submission_time: v[:ctime],
|
331
366
|
dispatch_time: v[:start_time],
|
332
|
-
native: v
|
367
|
+
native: v,
|
368
|
+
tasks: tasks
|
333
369
|
)
|
334
370
|
end
|
335
371
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Builds a sorted array of job ids given a job array spec string
|
2
|
+
#
|
3
|
+
# Job array spec strings:
|
4
|
+
# 1 Single id
|
5
|
+
# 1-10 Range
|
6
|
+
# 1-10:2 Range with step
|
7
|
+
# 1-10,13 Compound (range with single id)
|
8
|
+
#
|
9
|
+
# Note that Ranges are expected to be inclusive
|
10
|
+
module OodCore
|
11
|
+
module Job
|
12
|
+
class ArrayIds
|
13
|
+
attr_reader :ids
|
14
|
+
def initialize(spec_string)
|
15
|
+
@ids = []
|
16
|
+
parse_spec_string(spec_string)
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
def parse_spec_string(spec_string)
|
21
|
+
@ids = get_components(spec_string).map{
|
22
|
+
|component| process_component(component)
|
23
|
+
}.reduce(:+).sort
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_components(spec_string)
|
27
|
+
discard_percent_modifier(spec_string).split(',')
|
28
|
+
end
|
29
|
+
|
30
|
+
# A few adapters use percent to define an arrays maximum number of
|
31
|
+
# simultaneous tasks. The percent is expected to come at the end.
|
32
|
+
def discard_percent_modifier(spec_string)
|
33
|
+
spec_string.split('%').first
|
34
|
+
end
|
35
|
+
|
36
|
+
def process_component(component)
|
37
|
+
is_range?(component) ? get_range(component) : [ component.to_i ]
|
38
|
+
end
|
39
|
+
|
40
|
+
def get_range(component)
|
41
|
+
raw_range, raw_step = component.split(':')
|
42
|
+
start, stop = raw_range.split('-').map(&:to_i)
|
43
|
+
range = Range.new(start, stop)
|
44
|
+
step = raw_step.to_i
|
45
|
+
step = 1 if step == 0
|
46
|
+
|
47
|
+
range.step(step).to_a
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_range?(component)
|
51
|
+
component.include?('-')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/ood_core/job/info.rb
CHANGED
@@ -65,6 +65,11 @@ module OodCore
|
|
65
65
|
# @return [Object] native info
|
66
66
|
attr_reader :native
|
67
67
|
|
68
|
+
# List of job array child task statuses
|
69
|
+
# @note only relevant for job arrays
|
70
|
+
# @return [Array<Task>] tasks
|
71
|
+
attr_reader :tasks
|
72
|
+
|
68
73
|
# @param id [#to_s] job id
|
69
74
|
# @param status [#to_sym] job state
|
70
75
|
# @param allocated_nodes [Array<#to_h>] allocated nodes
|
@@ -79,12 +84,14 @@ module OodCore
|
|
79
84
|
# @param cpu_time [#to_i, nil] cpu time
|
80
85
|
# @param submission_time [#to_i, nil] submission time
|
81
86
|
# @param dispatch_time [#to_i, nil] dispatch time
|
87
|
+
# @param tasks [Array<Hash>] tasks e.g. { id: '12345.owens-batch', status: :running }
|
82
88
|
# @param native [Object] native info
|
83
89
|
def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
|
84
90
|
job_name: nil, job_owner: nil, accounting_id: nil,
|
85
91
|
procs: nil, queue_name: nil, wallclock_time: nil,
|
86
92
|
wallclock_limit: nil, cpu_time: nil, submission_time: nil,
|
87
|
-
dispatch_time: nil, native: nil,
|
93
|
+
dispatch_time: nil, native: nil, tasks: [],
|
94
|
+
**_)
|
88
95
|
@id = id.to_s
|
89
96
|
@status = Status.new(state: status.to_sym)
|
90
97
|
@allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(n.to_h) }
|
@@ -99,6 +106,10 @@ module OodCore
|
|
99
106
|
@cpu_time = cpu_time && cpu_time.to_i
|
100
107
|
@submission_time = submission_time && Time.at(submission_time.to_i)
|
101
108
|
@dispatch_time = dispatch_time && Time.at(dispatch_time.to_i)
|
109
|
+
@tasks = tasks.map {|task_status| Task.new(**task_status)}
|
110
|
+
|
111
|
+
@status = job_array_aggregate_status unless @tasks.empty?
|
112
|
+
|
102
113
|
@native = native
|
103
114
|
end
|
104
115
|
|
@@ -120,7 +131,8 @@ module OodCore
|
|
120
131
|
cpu_time: cpu_time,
|
121
132
|
submission_time: submission_time,
|
122
133
|
dispatch_time: dispatch_time,
|
123
|
-
native: native
|
134
|
+
native: native,
|
135
|
+
tasks: tasks
|
124
136
|
}
|
125
137
|
end
|
126
138
|
|
@@ -143,6 +155,14 @@ module OodCore
|
|
143
155
|
def hash
|
144
156
|
[self.class, to_h].hash
|
145
157
|
end
|
158
|
+
|
159
|
+
private
|
160
|
+
|
161
|
+
# Generate an aggregate status from child tasks
|
162
|
+
# @return [OodCore::Job::Status]
|
163
|
+
def job_array_aggregate_status
|
164
|
+
@tasks.map { |task_status| task_status.status }.max
|
165
|
+
end
|
146
166
|
end
|
147
167
|
end
|
148
168
|
end
|
data/lib/ood_core/job/script.rb
CHANGED
@@ -95,6 +95,10 @@ module OodCore
|
|
95
95
|
# @return [String, nil] accounting id
|
96
96
|
attr_reader :accounting_id
|
97
97
|
|
98
|
+
# The job array request, commonly in the format '$START-$STOP'
|
99
|
+
# @return [String, nil] job array request
|
100
|
+
attr_reader :job_array_request
|
101
|
+
|
98
102
|
# Object detailing any native specifications that are implementation specific
|
99
103
|
# @note Should not be used at all costs.
|
100
104
|
# @return [Object, nil] native specifications
|
@@ -128,7 +132,7 @@ module OodCore
|
|
128
132
|
job_name: nil, shell_path: nil, input_path: nil,
|
129
133
|
output_path: nil, error_path: nil, reservation_id: nil,
|
130
134
|
queue_name: nil, priority: nil, start_time: nil,
|
131
|
-
wall_time: nil, accounting_id: nil, native: nil, **_)
|
135
|
+
wall_time: nil, accounting_id: nil, job_array_request: nil, native: nil, **_)
|
132
136
|
@content = content.to_s
|
133
137
|
|
134
138
|
@submit_as_hold = submit_as_hold
|
@@ -136,22 +140,23 @@ module OodCore
|
|
136
140
|
@email_on_started = email_on_started
|
137
141
|
@email_on_terminated = email_on_terminated
|
138
142
|
|
139
|
-
@args
|
140
|
-
@job_environment
|
141
|
-
@workdir
|
142
|
-
@email
|
143
|
-
@job_name
|
144
|
-
@shell_path
|
145
|
-
@input_path
|
146
|
-
@output_path
|
147
|
-
@error_path
|
148
|
-
@reservation_id
|
149
|
-
@queue_name
|
150
|
-
@priority
|
151
|
-
@start_time
|
152
|
-
@wall_time
|
153
|
-
@accounting_id
|
154
|
-
@
|
143
|
+
@args = args && args.map(&:to_s)
|
144
|
+
@job_environment = job_environment && job_environment.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
145
|
+
@workdir = workdir && Pathname.new(workdir.to_s)
|
146
|
+
@email = email && Array.wrap(email).map(&:to_s)
|
147
|
+
@job_name = job_name && job_name.to_s
|
148
|
+
@shell_path = shell_path && Pathname.new(shell_path.to_s)
|
149
|
+
@input_path = input_path && Pathname.new(input_path.to_s)
|
150
|
+
@output_path = output_path && Pathname.new(output_path.to_s)
|
151
|
+
@error_path = error_path && Pathname.new(error_path.to_s)
|
152
|
+
@reservation_id = reservation_id && reservation_id.to_s
|
153
|
+
@queue_name = queue_name && queue_name.to_s
|
154
|
+
@priority = priority && priority.to_i
|
155
|
+
@start_time = start_time && Time.at(start_time.to_i)
|
156
|
+
@wall_time = wall_time && wall_time.to_i
|
157
|
+
@accounting_id = accounting_id && accounting_id.to_s
|
158
|
+
@job_array_request = job_array_request && job_array_request.to_s
|
159
|
+
@native = native
|
155
160
|
end
|
156
161
|
|
157
162
|
# Convert object to hash
|
@@ -178,6 +183,7 @@ module OodCore
|
|
178
183
|
start_time: start_time,
|
179
184
|
wall_time: wall_time,
|
180
185
|
accounting_id: accounting_id,
|
186
|
+
job_array_request: job_array_request,
|
181
187
|
native: native
|
182
188
|
}
|
183
189
|
end
|
data/lib/ood_core/job/status.rb
CHANGED
@@ -22,14 +22,16 @@ module OodCore
|
|
22
22
|
#
|
23
23
|
# # Job is completed and not running on an execution host
|
24
24
|
# :completed
|
25
|
+
#
|
26
|
+
# @note that this list's order is meaningful and should not be sorted lexigraphically
|
25
27
|
def states
|
26
28
|
%i(
|
27
29
|
undetermined
|
28
|
-
|
30
|
+
completed
|
29
31
|
queued_held
|
32
|
+
queued
|
30
33
|
running
|
31
34
|
suspended
|
32
|
-
completed
|
33
35
|
)
|
34
36
|
end
|
35
37
|
end
|
@@ -113,6 +115,17 @@ module OodCore
|
|
113
115
|
self == state
|
114
116
|
end
|
115
117
|
end
|
118
|
+
|
119
|
+
def precedence
|
120
|
+
self.class.states.index(@state)
|
121
|
+
end
|
122
|
+
|
123
|
+
# The comparison operator for sorting values.
|
124
|
+
#
|
125
|
+
# @return [Integer] Comparison value based on precedence
|
126
|
+
def <=>(other)
|
127
|
+
precedence <=> other.precedence
|
128
|
+
end
|
116
129
|
end
|
117
130
|
end
|
118
131
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
class Task
|
4
|
+
attr_reader :id
|
5
|
+
attr_reader :status
|
6
|
+
|
7
|
+
def initialize(id:, status:, **_)
|
8
|
+
@task_id = id
|
9
|
+
@status = OodCore::Job::Status.new(state: status)
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_h
|
13
|
+
{
|
14
|
+
:id => id,
|
15
|
+
:status => status
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.to_h == other.to_h
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-01-
|
13
|
+
date: 2019-01-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -147,6 +147,7 @@ files:
|
|
147
147
|
- lib/ood_core/cluster.rb
|
148
148
|
- lib/ood_core/clusters.rb
|
149
149
|
- lib/ood_core/errors.rb
|
150
|
+
- lib/ood_core/job/._task_status.rb
|
150
151
|
- lib/ood_core/job/adapter.rb
|
151
152
|
- lib/ood_core/job/adapters/drmaa.rb
|
152
153
|
- lib/ood_core/job/adapters/helper.rb
|
@@ -165,11 +166,13 @@ files:
|
|
165
166
|
- lib/ood_core/job/adapters/torque/batch.rb
|
166
167
|
- lib/ood_core/job/adapters/torque/error.rb
|
167
168
|
- lib/ood_core/job/adapters/torque/ffi.rb
|
169
|
+
- lib/ood_core/job/array_ids.rb
|
168
170
|
- lib/ood_core/job/factory.rb
|
169
171
|
- lib/ood_core/job/info.rb
|
170
172
|
- lib/ood_core/job/node_info.rb
|
171
173
|
- lib/ood_core/job/script.rb
|
172
174
|
- lib/ood_core/job/status.rb
|
175
|
+
- lib/ood_core/job/task.rb
|
173
176
|
- lib/ood_core/refinements/array_extensions.rb
|
174
177
|
- lib/ood_core/refinements/drmaa_extensions.rb
|
175
178
|
- lib/ood_core/refinements/hash_extensions.rb
|
@@ -195,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
195
198
|
version: '0'
|
196
199
|
requirements: []
|
197
200
|
rubyforge_project:
|
198
|
-
rubygems_version: 2.
|
201
|
+
rubygems_version: 2.7.3
|
199
202
|
signing_key:
|
200
203
|
specification_version: 4
|
201
204
|
summary: Open OnDemand core library
|