ood_core 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 8a526602e6c6b59b6d943d299dc4e442cfd354a768669b4bc03a9423e12cf418
4
- data.tar.gz: 5220c4b20c1de287afdcad2eece623952c58aea735c1786f9956912563277e85
2
+ SHA1:
3
+ metadata.gz: ac5caf10cd563acf0e8ef6a4b7d421b5718dc097
4
+ data.tar.gz: c9e401652e388868a2d583751ef94d50ccb2f22a
5
5
  SHA512:
6
- metadata.gz: f63f8aff330f033ef8fe0dad0d07629e3704463441f1a910920f37a86d48a4cab059182403b9cb6f1bd6a300213b1cff45315b43354fa0d2a9aaaba2f7bc54c8
7
- data.tar.gz: d77d8d5130a3f20ac9e54667b10de5a476322c38491f63abe58e0ab192d23b22fc764481254abaf9def825a4ee6707646f1b3c264ed9a5ec4fa76add0f34295a
6
+ metadata.gz: db745be6e2bcc4a7c4bfcd31d0a47c50bb948be84d24c2bc5f45c7bab6bbf46e22d82ed86120087ce6c0e6d554d323acef53b4adde3ffc7eb801216cc419f986
7
+ data.tar.gz: '096513b3c128b32c81b19784ef56164ef74e158dbd72d37b47d51063cd6b89daac03a00abf632421cf4e3687390930b5bb3c3771b5eb7290c91fb54a2757bf21'
@@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+ ## [0.9.0] - 2019-05-04
10
+ ### Added
11
+ - Job array support for LSF and PBSPro
12
+ - Slurm adapter uses `squeue` owner filter (`-u`) for `info_where_owner`
13
+
14
+ ### Fixed
15
+ - Grid Engine adapter now starts scripts in the current directory like all other adapters
16
+ - Fixed issue where Slurm comment field might break job info parsing
17
+ - Fixed possible crash when comparing two clusters if the id of one of the clusters is nil
18
+ - Fixed bug with the live system test that impacted non-Torque systems
19
+ - Fixed bug with Slurm adapter when submit time is not available
20
+
9
21
  ## [0.8.0] - 2019-01-29
10
22
  ### Added
11
23
  - info_all_each and info_where_owner_each super class methods
@@ -165,7 +177,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
165
177
  ### Added
166
178
  - Initial release!
167
179
 
168
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.8.0...HEAD
180
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.9.0...HEAD
181
+ [0.9.0]: https://github.com/OSC/ood_core/compare/v0.8.0...v0.9.0
169
182
  [0.8.0]: https://github.com/OSC/ood_core/compare/v0.7.1...v0.8.0
170
183
  [0.7.1]: https://github.com/OSC/ood_core/compare/v0.7.0...v0.7.1
171
184
  [0.7.0]: https://github.com/OSC/ood_core/compare/v0.6.0...v0.7.0
@@ -136,7 +136,7 @@ module OodCore
136
136
  # @param other [#to_sym] object to compare against
137
137
  # @return [Boolean] whether objects are equivalent
138
138
  def ==(other)
139
- id == other.to_sym
139
+ id == other.to_s.to_sym
140
140
  end
141
141
 
142
142
  # Convert object to symbol
@@ -90,16 +90,8 @@ module OodCore
90
90
  # @return [Info] information describing submitted job
91
91
  # @see Adapter#info
92
92
  def info(id)
93
- # TODO: handle job arrays
94
- job = batch.get_job(id: id)
95
- if job
96
- info_for_batch_hash(job)
97
- else
98
- Info.new(
99
- id: id,
100
- status: :completed
101
- )
102
- end
93
+ info_ary = batch.get_job(id: id).map{|v| info_for_batch_hash(v)}
94
+ handle_job_array(info_ary, id)
103
95
  rescue Batch::Error => e
104
96
  raise JobAdapterError, e.message
105
97
  end
@@ -131,19 +123,13 @@ module OodCore
131
123
  raise JobAdapterError, e.message
132
124
  end
133
125
 
134
- def supports_job_arrays?
135
- false
136
- end
137
-
138
126
  # Retrieve job status from resource manager
139
127
  # @param id [#to_s] the id of the job
140
128
  # @raise [JobAdapterError] if something goes wrong getting job status
141
129
  # @return [Status] status of job
142
130
  # @see Adapter#status
143
131
  def status(id)
144
- job = batch.get_job(id: id)
145
- state = job ? get_state(job[:status]) : :completed
146
- Status.new(state: state)
132
+ info(id).status
147
133
  rescue Batch::Error => e
148
134
  raise JobAdapterError, e.message
149
135
  end
@@ -196,8 +182,11 @@ module OodCore
196
182
  dispatch_time = helper.parse_past_time(v[:start_time], ignore_errors: true)
197
183
  finish_time = helper.parse_past_time(v[:finish_time], ignore_errors: true)
198
184
 
185
+ # Detect job array index from name
186
+ array_index = /(\[\d+\])$/.match(v[:name])
187
+
199
188
  Info.new(
200
- id: v[:id],
189
+ id: (array_index) ? "#{v[:id]}#{array_index[1]}" : v[:id],
201
190
  status: get_state(v[:status]),
202
191
  allocated_nodes: nodes,
203
192
  submit_host: v[:from_host],
@@ -214,6 +203,40 @@ module OodCore
214
203
  native: v
215
204
  )
216
205
  end
206
+
207
+ def handle_job_array(info_ary, id)
208
+ return Info.new(id: id, status: :completed) if info_ary.nil? || info_ary.empty?
209
+ return info_ary.first if info_ary.size == 1
210
+
211
+ parent_task_hash = build_proxy_parent(info_ary.first, id)
212
+
213
+ info_ary.map do |task_info|
214
+ parent_task_hash[:tasks] << {:id => task_info.id, :status => task_info.status}
215
+ end
216
+
217
+ parent_task_hash[:status] = parent_task_hash[:tasks].map{|task| task[:status]}.max
218
+
219
+ Info.new(**parent_task_hash)
220
+ end
221
+
222
+ # Proxy the first element as the parent hash delete non-shared attributes
223
+ def build_proxy_parent(info, id)
224
+ info.to_h.merge({
225
+ :tasks => [],
226
+ :id => id
227
+ }).delete_if{
228
+ |key, _| [
229
+ :allocated_nodes, :dispatch_time,
230
+ :cpu_time, :wallclock_time, :status
231
+ ].include?(key)
232
+ }.tap{
233
+ # Remove the child array index from the :job_name
234
+
235
+ # Note that a true representation of the parent should have the
236
+ # full array spec in the name. Worth attempting to reconstruct?
237
+ |h| h[:job_name] = h[:job_name].gsub(/\[[^\]]+\]/, '')
238
+ }
239
+ end
217
240
  end
218
241
  end
219
242
  end
@@ -43,10 +43,10 @@ class OodCore::Job::Adapters::Lsf::Batch
43
43
  # Get hash detailing the specified job
44
44
  # @param id [#to_s] the id of the job to check
45
45
  # @raise [Error] if `bjobs` command exited unsuccessfully
46
- # @return [Hash] details of specified job
46
+ # @return [Array<Hash>] details of specified job
47
47
  def get_job(id:)
48
48
  args = %W( -a -w -W #{id.to_s} )
49
- parse_bjobs_output(call("bjobs", *args)).first
49
+ parse_bjobs_output(call("bjobs", *args))
50
50
  end
51
51
 
52
52
  # status fields available from bjobs
@@ -81,6 +81,8 @@ class OodCore::Job::Adapters::Lsf::Helper
81
81
  args += ["-P", script.accounting_id] unless script.accounting_id.nil?
82
82
  args += ["-cwd", script.workdir.to_s] unless script.workdir.nil?
83
83
  args += ["-J", script.job_name] unless script.job_name.nil?
84
+ args[-1] += "[#{script.job_array_request}]" unless script.job_array_request.nil?
85
+
84
86
  args += ["-q", script.queue_name] unless script.queue_name.nil?
85
87
  args += ["-U", script.reservation_id] unless script.reservation_id.nil?
86
88
  args += ["-sp", script.priority] unless script.priority.nil?
@@ -86,8 +86,7 @@ module OodCore
86
86
  # @raise [Error] if `qstat` command exited unsuccessfully
87
87
  # @return [Array<Hash>] list of details for jobs
88
88
  def get_jobs(id: "")
89
- args = ["-f"] # display all information
90
- args += ["-t"] # list subjobs
89
+ args = ["-f", "-t"] # display all information
91
90
  args += [id.to_s] unless id.to_s.empty?
92
91
  lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
93
92
 
@@ -101,7 +100,8 @@ module OodCore
101
100
  k2 ? ( hsh[k1] ||= {} and hsh[k1][k2] = value ) : ( hsh[k1] = value )
102
101
  end
103
102
  end
104
- jobs.reject { |j| /\[\]/ =~ j[:job_id] } # drop main job array jobs
103
+
104
+ jobs
105
105
  end
106
106
 
107
107
  # Select batch jobs from the batch server
@@ -181,8 +181,8 @@ module OodCore
181
181
  'U' => :suspended, # cycle-harvesting job is suspended due to keyboard activity
182
182
  'E' => :running, # job is exiting after having run
183
183
  'F' => :completed, # job is finished
184
- 'X' => :completed # subjob has completed execution or has been deleted
185
- # ignore B as it signifies a job array
184
+ 'X' => :completed, # subjob has completed execution or has been deleted
185
+ 'B' => :running # job array has at least one child running
186
186
  }
187
187
 
188
188
  # What percentage of jobs a user owns out of all jobs, used to decide
@@ -266,6 +266,8 @@ module OodCore
266
266
  # mimics what the other resource managers do)
267
267
  args += ["-j", "oe"] if script.error_path.nil?
268
268
 
269
+ args += ["-J", script.job_array_request] unless script.job_array_request.nil?
270
+
269
271
  # Set native options
270
272
  args += script.native if script.native
271
273
 
@@ -303,13 +305,21 @@ module OodCore
303
305
  if usr_jobs.size > (qstat_factor * all_jobs.size)
304
306
  super
305
307
  else
306
- usr_jobs.map { |id| info(id) }
307
- end
308
- end
308
+ begin
309
+ user_job_infos = []
310
+ usr_jobs.each do |id|
311
+ job = info(id)
312
+ user_job_infos << job
309
313
 
310
- def supports_job_arrays?
311
- false
314
+ job.tasks.each {|task| user_job_infos << job.build_child_info(task)}
315
+ end
316
+
317
+ user_job_infos
318
+ rescue Batch::Error => e
319
+ raise JobAdapterError, e.message
320
+ end
312
321
  end
322
+ end
313
323
 
314
324
  # Retrieve job info from the resource manager
315
325
  # @param id [#to_s] the id of the job
@@ -318,9 +328,18 @@ module OodCore
318
328
  # @see Adapter#info
319
329
  def info(id)
320
330
  id = id.to_s
321
- @pbspro.get_jobs(id: id).map do |v|
331
+
332
+ job_infos = @pbspro.get_jobs(id: id).map do |v|
322
333
  parse_job_info(v)
323
- end.first || Info.new(id: id, status: :completed)
334
+ end
335
+
336
+ if job_infos.empty?
337
+ Info.new(id: id, status: :completed)
338
+ elsif job_infos.length == 1
339
+ job_infos.first
340
+ else
341
+ process_job_array(id, job_infos)
342
+ end
324
343
  rescue Batch::Error => e
325
344
  # set completed status if can't find job id
326
345
  if /Unknown Job Id/ =~ e.message || /Job has finished/ =~ e.message
@@ -434,6 +453,23 @@ module OodCore
434
453
  native: v
435
454
  )
436
455
  end
456
+
457
+ # Combine the array parent with the states of its children
458
+ def process_job_array(id, jobs)
459
+ parent_job = jobs.select { |j| /\[\]/ =~ j.id }.first
460
+ parent = (parent_job) ? parent_job.to_h : {:id => id, :status => :undetermined}
461
+
462
+ # create task hashes from children
463
+ parent[:tasks] = jobs.reject { |j| /\[\]/ =~ j.id }.map do |j|
464
+ {
465
+ :id => j.id,
466
+ :status => j.status.to_sym,
467
+ :wallclock_time => j.wallclock_time
468
+ }
469
+ end
470
+
471
+ Info.new(**parent)
472
+ end
437
473
  end
438
474
  end
439
475
  end
@@ -95,7 +95,12 @@ class OodCore::Job::Adapters::Sge::Batch
95
95
 
96
96
  job_hash = listener.parsed_job
97
97
 
98
- update_job_hash_status!(job_hash)
98
+ if job_hash[:id]
99
+ update_job_hash_status!(job_hash)
100
+ else
101
+ job_hash[:id] = job_id
102
+ job_hash[:status] = :completed
103
+ end
99
104
 
100
105
  job_info = OodCore::Job::Info.new(**job_hash)
101
106
  rescue REXML::ParseException => e
@@ -115,8 +120,8 @@ class OodCore::Job::Adapters::Sge::Batch
115
120
  if get_status_from_drmaa?(job_hash)
116
121
  begin
117
122
  job_hash[:status] = get_status_from_drmma(job_hash[:id])
118
- rescue DRMAA::DRMAAInvalidArgumentError => e
119
- raise Error, e.message
123
+ rescue DRMAA::DRMAAException => e
124
+ # log DRMAA error?
120
125
  end
121
126
  end
122
127
  end
@@ -156,8 +161,7 @@ class OodCore::Job::Adapters::Sge::Batch
156
161
  # @param job_id [#to_s]
157
162
  # @return job_id [String]
158
163
  def submit(content, args)
159
- cmd = ['qsub'] + args
160
- @helper.parse_job_id_from_qsub(call(*cmd, :stdin => content))
164
+ @helper.parse_job_id_from_qsub(call('qsub', *args, :stdin => content))
161
165
  end
162
166
 
163
167
  # Call a forked SGE command for a given batch server
@@ -20,7 +20,12 @@ class OodCore::Job::Adapters::Sge::Helper
20
20
  args += ['-h'] if script.submit_as_hold
21
21
  args += ['-r', 'yes'] if script.rerunnable
22
22
  script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
- args += ['-wd', script.workdir] unless script.workdir.nil?
23
+
24
+ if script.workdir
25
+ args += ['-wd', script.workdir]
26
+ elsif ! script_contains_wd_directive?(script.content)
27
+ args += ['-cwd']
28
+ end
24
29
 
25
30
  on_event_email = []
26
31
  on_event_email << 'b' if script.email_on_started # beginning
@@ -47,6 +52,38 @@ class OodCore::Job::Adapters::Sge::Helper
47
52
  args
48
53
  end
49
54
 
55
+ # @brief Detect whether script content contains either -cwd or -wd
56
+ #
57
+ # @param content The script content
58
+ #
59
+ # Examples:
60
+ # #$-wd /home/ood/ondemand # should match
61
+ # #$ -wd /home/ood/ondemand # should match
62
+ # #$ -cwd /home/ood/ondemand # should match
63
+ # #$ -j yes -wd /home/ood/ondemand # should match
64
+ # #$ -j yes -o this-wd /home/ood/ondemand # should NOT match
65
+ # #$ -t 1-10:5 -wd /home/ood/ondemand # should NOT match
66
+ #
67
+ # @return [bool]
68
+ #
69
+ def script_contains_wd_directive?(content)
70
+ content.slice(
71
+ # Only search within the script's first 1024 characters in case the user is
72
+ # putting lots of non-line delimited data into their scripts.
73
+ 0, 1024
74
+ ).split(
75
+ "\n"
76
+ ).any? {
77
+ |line|
78
+ # String must start with #$
79
+ # Match may be:
80
+ # Immediate -c?wd
81
+ # Eventual space or tab followed by -c?wd
82
+ # String may end with multiple characters
83
+ /^#\$(?:-c?wd|.*[ \t]+-c?wd).*$/ =~ line
84
+ }
85
+ end
86
+
50
87
  # Raise exceptions when adapter is asked to perform an action that SGE does not support
51
88
  # @raise [Error] when an incompatible action is requested
52
89
  def raise_error_on_unsupported_args(script, after:, afterok:, afternotok:, afterany:)
@@ -1,5 +1,6 @@
1
1
  require "time"
2
2
  require "ood_core/refinements/hash_extensions"
3
+ require "ood_core/refinements/array_extensions"
3
4
  require "ood_core/job/adapters/helper"
4
5
 
5
6
  module OodCore
@@ -29,10 +30,14 @@ module OodCore
29
30
  # resource manager for job management.
30
31
  class Slurm < Adapter
31
32
  using Refinements::HashExtensions
33
+ using Refinements::ArrayExtensions
32
34
 
33
35
  # Object used for simplified communication with a Slurm batch server
34
36
  # @api private
35
37
  class Batch
38
+ UNIT_SEPARATOR = "\x1F"
39
+ RECORD_SEPARATOR = "\x1E"
40
+
36
41
  # The cluster of the Slurm batch server
37
42
  # @example CHPC's kingspeak cluster
38
43
  # my_batch.cluster #=> "kingspeak"
@@ -89,22 +94,66 @@ module OodCore
89
94
  # # ...
90
95
  # #]
91
96
  # @param id [#to_s] the id of the job
92
- # @param filters [Array<Symbol>] list of attributes to filter on
97
+ # @param owner [String] the owner(s) of the job
98
+ # @param attrs [Array<Symbol>, nil] list of attributes request when calling squeue
93
99
  # @raise [Error] if `squeue` command exited unsuccessfully
94
100
  # @return [Array<Hash>] list of details for jobs
95
- def get_jobs(id: "", filters: [])
96
- delim = "\x1F" # don't use "|" because FEATURES uses this
97
- options = filters.empty? ? fields : fields.slice(*filters)
98
- args = ["--all", "--states=all", "--noconvert"]
99
- args += ["-o", "#{options.values.join(delim)}"]
100
- args += ["-j", id.to_s] unless id.to_s.empty?
101
- lines = call("squeue", *args).split("\n").map(&:strip)
101
+ def get_jobs(id: "", owner: nil, attrs: nil)
102
+ fields = squeue_fields(attrs)
103
+ args = squeue_args(id: id, owner: owner, options: fields.values)
104
+
105
+ #TODO: switch mock of Open3 to be the squeue mock script
106
+ # then you can use that for performance metrics
107
+ StringIO.open(call("squeue", *args)) do |output|
108
+ advance_past_squeue_header!(output)
109
+
110
+ jobs = []
111
+ output.each_line(RECORD_SEPARATOR) do |line|
112
+ # TODO: once you can do performance metrics you can test zip against some other tools
113
+ # or just small optimizations
114
+ # for example, fields is ALREADY A HASH and we are setting the VALUES to
115
+ # "line.strip.split(unit_separator)" array
116
+ #
117
+ # i.e. store keys in an array, do Hash[[keys, values].transpose]
118
+ #
119
+ # or
120
+ #
121
+ # job = {}
122
+ # keys.each_with_index { |key, index| [key] = values[index] }
123
+ # jobs << job
124
+ #
125
+ # assuming keys and values are same length! if not we have an error!
126
+ values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
127
+ jobs << Hash[fields.keys.zip(values)] unless values.empty?
128
+ end
129
+ jobs
130
+ end
131
+ end
102
132
 
103
- lines.drop(cluster ? 2 : 1).map do |line|
104
- Hash[options.keys.zip(line.split(delim))]
133
+ def squeue_fields(attrs)
134
+ if attrs.nil?
135
+ all_squeue_fields
136
+ else
137
+ all_squeue_fields.slice(*squeue_attrs_for_info_attrs(Array.wrap(attrs) + squeue_required_fields))
105
138
  end
106
139
  end
107
140
 
141
+ def squeue_required_fields
142
+ #TODO: does this need to include ::array_job_task_id?
143
+ #TODO: does it matter that order of the output can vary depending on the arguments and if "squeue_required_fields" are included?
144
+ # previously the order was "fields.keys"; i don't think it does
145
+ [:job_id, :state_compact]
146
+ end
147
+
148
+ #TODO: write some barebones test for this? like 2 options and id or no id
149
+ def squeue_args(id: "", owner: nil, options: [])
150
+ args = ["--all", "--states=all", "--noconvert"]
151
+ args += ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
152
+ args += ["-u", owner.to_s] unless owner.to_s.empty?
153
+ args += ["-j", id.to_s] unless id.to_s.empty?
154
+ args
155
+ end
156
+
108
157
  # Put a specified job on hold
109
158
  # @example Put job "1234" on hold
110
159
  # my_batch.hold_job("1234")
@@ -147,7 +196,82 @@ module OodCore
147
196
  call("sbatch", *args, env: env, stdin: str.to_s).strip.split(";").first
148
197
  end
149
198
 
199
+ # Fields requested from a formatted `squeue` call
200
+ # Note that the order of these fields is important
201
+ def all_squeue_fields
202
+ {
203
+ account: "%a",
204
+ job_id: "%A",
205
+ exec_host: "%B",
206
+ min_cpus: "%c",
207
+ cpus: "%C",
208
+ min_tmp_disk: "%d",
209
+ nodes: "%D",
210
+ end_time: "%e",
211
+ dependency: "%E",
212
+ features: "%f",
213
+ array_job_id: "%F",
214
+ group_name: "%g",
215
+ group_id: "%G",
216
+ over_subscribe: "%h",
217
+ sockets_per_node: "%H",
218
+ array_job_task_id: "%i",
219
+ cores_per_socket: "%I",
220
+ job_name: "%j",
221
+ threads_per_core: "%J",
222
+ comment: "%k",
223
+ array_task_id: "%K",
224
+ time_limit: "%l",
225
+ time_left: "%L",
226
+ min_memory: "%m",
227
+ time_used: "%M",
228
+ req_node: "%n",
229
+ node_list: "%N",
230
+ command: "%o",
231
+ contiguous: "%O",
232
+ qos: "%q",
233
+ partition: "%P",
234
+ priority: "%Q",
235
+ reason: "%r",
236
+ start_time: "%S",
237
+ state_compact: "%t",
238
+ state: "%T",
239
+ user: "%u",
240
+ user_id: "%U",
241
+ reservation: "%v",
242
+ submit_time: "%V",
243
+ wckey: "%w",
244
+ licenses: "%W",
245
+ excluded_nodes: "%x",
246
+ core_specialization: "%X",
247
+ nice: "%y",
248
+ scheduled_nodes: "%Y",
249
+ sockets_cores_threads: "%z",
250
+ work_dir: "%Z",
251
+ gres: "%b", # must come at the end to fix a bug with Slurm 18
252
+ }
253
+ end
254
+
150
255
  private
256
+ # Modify the StringIO instance by advancing past the squeue header
257
+ #
258
+ # The first two "records" should always be discarded. Consider the
259
+ # following squeue with -M output (invisible characters shown):
260
+ #
261
+ # CLUSTER: slurm_cluster_name\n
262
+ # \x1EJOBID\x1F\x1FSTATE\n
263
+ # \x1E1\x1F\x1FR\n
264
+ # \x1E2\x1F\x1FPD\n
265
+ #
266
+ # Splitting on the record separator first gives the Cluster header,
267
+ # and then the regular header. If -M or --cluster is not specified
268
+ # the effect is the same because the record separator is at the
269
+ # start of the format string, so the first "record" would simply be
270
+ # empty.
271
+ def advance_past_squeue_header!(squeue_output)
272
+ 2.times { squeue_output.gets(RECORD_SEPARATOR) }
273
+ end
274
+
151
275
  # Call a forked Slurm command for a given cluster
152
276
  def call(cmd, *args, env: {}, stdin: "")
153
277
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
@@ -159,60 +283,25 @@ module OodCore
159
283
  s.success? ? o : raise(Error, e)
160
284
  end
161
285
 
162
- # Fields requested from a formatted `squeue` call
163
- # Note that the order of these fields is important
164
- def fields
165
- {
166
- account: "%a",
167
- job_id: "%A",
168
- exec_host: "%B",
169
- min_cpus: "%c",
170
- cpus: "%C",
171
- min_tmp_disk: "%d",
172
- nodes: "%D",
173
- end_time: "%e",
174
- dependency: "%E",
175
- features: "%f",
176
- array_job_id: "%F",
177
- group_name: "%g",
178
- group_id: "%G",
179
- over_subscribe: "%h",
180
- sockets_per_node: "%H",
181
- array_job_task_id: "%i",
182
- cores_per_socket: "%I",
183
- job_name: "%j",
184
- threads_per_core: "%J",
185
- comment: "%k",
186
- array_task_id: "%K",
187
- time_limit: "%l",
188
- time_left: "%L",
189
- min_memory: "%m",
190
- time_used: "%M",
191
- req_node: "%n",
192
- node_list: "%N",
193
- command: "%o",
194
- contiguous: "%O",
195
- qos: "%q",
196
- partition: "%P",
197
- priority: "%Q",
198
- reason: "%r",
199
- start_time: "%S",
200
- state_compact: "%t",
201
- state: "%T",
202
- user: "%u",
203
- user_id: "%U",
204
- reservation: "%v",
205
- submit_time: "%V",
206
- wckey: "%w",
207
- licenses: "%W",
208
- excluded_nodes: "%x",
209
- core_specialization: "%X",
210
- nice: "%y",
211
- scheduled_nodes: "%Y",
212
- sockets_cores_threads: "%z",
213
- work_dir: "%Z",
214
- gres: "%b", # must come at the end to fix a bug with Slurm 18
215
- }
286
+ def squeue_attrs_for_info_attrs(attrs)
287
+ attrs.map { |a|
288
+ {
289
+ id: :job_id,
290
+ status: :state_compact,
291
+ allocated_nodes: [:node_list, :scheduled_nodes],
292
+ # submit_host: nil,
293
+ job_name: :job_name,
294
+ job_owner: :user,
295
+ accounting_id: :account,
296
+ procs: :cpus,
297
+ queue_name: :partition,
298
+ wallclock_time: :time_used,
299
+ wallclock_limit: :time_limit,
300
+ # cpu_time: nil,
301
+ submission_time: :submit_time,
302
+ dispatch_time: :start_time
303
+ }.fetch(a, a)
304
+ }.flatten
216
305
  end
217
306
  end
218
307
 
@@ -328,7 +417,7 @@ module OodCore
328
417
  # @return [Array<Info>] information describing submitted jobs
329
418
  # @see Adapter#info_all
330
419
  def info_all(attrs: nil)
331
- @slurm.get_jobs.map do |v|
420
+ @slurm.get_jobs(attrs: attrs).map do |v|
332
421
  parse_job_info(v)
333
422
  end
334
423
  rescue Batch::Error => e
@@ -360,6 +449,20 @@ module OodCore
360
449
  end
361
450
  end
362
451
 
452
+ # Retrieve info for all jobs for a given owner or owners from the
453
+ # resource manager
454
+ # @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
455
+ # @raise [JobAdapterError] if something goes wrong getting job info
456
+ # @return [Array<Info>] information describing submitted jobs
457
+ def info_where_owner(owner, attrs: nil)
458
+ owner = Array.wrap(owner).map(&:to_s).join(',')
459
+ @slurm.get_jobs(owner: owner).map do |v|
460
+ parse_job_info(v)
461
+ end
462
+ rescue Batch::Error => e
463
+ raise JobAdapterError, e.message
464
+ end
465
+
363
466
  # Retrieve job status from resource manager
364
467
  # @param id [#to_s] the id of the job
365
468
  # @raise [JobAdapterError] if something goes wrong getting job status
@@ -369,7 +472,7 @@ module OodCore
369
472
  id = id.to_s
370
473
  jobs = @slurm.get_jobs(
371
474
  id: id,
372
- filters: [:job_id, :array_job_task_id, :state_compact]
475
+ attrs: [:job_id, :array_job_task_id, :state_compact]
373
476
  )
374
477
  # A job id can return multiple jobs if it corresponds to a job array
375
478
  # id, so we need to find the job that corresponds to the given job id
@@ -478,6 +581,7 @@ module OodCore
478
581
  allocated_nodes = [ { name: nil } ] * v[:nodes].to_i
479
582
  end
480
583
  end
584
+
481
585
  Info.new(
482
586
  id: v[:job_id],
483
587
  status: get_state(v[:state_compact]),
@@ -491,8 +595,8 @@ module OodCore
491
595
  wallclock_time: duration_in_seconds(v[:time_used]),
492
596
  wallclock_limit: duration_in_seconds(v[:time_limit]),
493
597
  cpu_time: nil,
494
- submission_time: Time.parse(v[:submit_time]),
495
- dispatch_time: v[:start_time] == "N/A" ? nil : Time.parse(v[:start_time]),
598
+ submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
599
+ dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
496
600
  native: v
497
601
  )
498
602
  end
@@ -500,7 +604,7 @@ module OodCore
500
604
  def handle_job_array(info_ary, id)
501
605
  # If only one job was returned we return it
502
606
  return info_ary.first unless info_ary.length > 1
503
-
607
+
504
608
  parent_task_hash = {:tasks => []}
505
609
 
506
610
  info_ary.map do |task_info|
@@ -113,6 +113,21 @@ module OodCore
113
113
  @native = native
114
114
  end
115
115
 
116
+ # Create a new Info for a child task
117
+ # @return [Info] merging the parent and the child task
118
+ def build_child_info(task)
119
+ parent_only_keys = [
120
+ :allocated_nodes,
121
+ :procs,
122
+ :cpu_time,
123
+ :dispatch_time,
124
+ :native,
125
+ :tasks
126
+ ]
127
+
128
+ new(**to_h.merge(task.to_h).delete_if{|k, v| parent_only_keys.include?(k)})
129
+ end
130
+
116
131
  # Convert object to hash
117
132
  # @return [Hash] object as hash
118
133
  def to_h
@@ -1,18 +1,19 @@
1
1
  module OodCore
2
2
  module Job
3
3
  class Task
4
- attr_reader :id
5
- attr_reader :status
4
+ attr_reader :id, :status, :wallclock_time
6
5
 
7
- def initialize(id:, status:, **_)
8
- @task_id = id
6
+ def initialize(id:, status:, wallclock_time: nil, **_)
7
+ @id = id.to_s
9
8
  @status = OodCore::Job::Status.new(state: status)
9
+ @wallclock_time = wallclock_time && wallclock_time.to_i
10
10
  end
11
11
 
12
12
  def to_h
13
13
  {
14
14
  :id => id,
15
- :status => status
15
+ :status => status,
16
+ :wallclock_time => wallclock_time
16
17
  }
17
18
  end
18
19
 
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.8.0"
3
+ VERSION = "0.9.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2019-01-29 00:00:00.000000000 Z
13
+ date: 2019-05-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -147,7 +147,6 @@ files:
147
147
  - lib/ood_core/cluster.rb
148
148
  - lib/ood_core/clusters.rb
149
149
  - lib/ood_core/errors.rb
150
- - lib/ood_core/job/._task_status.rb
151
150
  - lib/ood_core/job/adapter.rb
152
151
  - lib/ood_core/job/adapters/drmaa.rb
153
152
  - lib/ood_core/job/adapters/helper.rb
@@ -198,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
198
197
  version: '0'
199
198
  requirements: []
200
199
  rubyforge_project:
201
- rubygems_version: 2.7.3
200
+ rubygems_version: 2.6.11
202
201
  signing_key:
203
202
  specification_version: 4
204
203
  summary: Open OnDemand core library