ood_core 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 8a526602e6c6b59b6d943d299dc4e442cfd354a768669b4bc03a9423e12cf418
4
- data.tar.gz: 5220c4b20c1de287afdcad2eece623952c58aea735c1786f9956912563277e85
2
+ SHA1:
3
+ metadata.gz: ac5caf10cd563acf0e8ef6a4b7d421b5718dc097
4
+ data.tar.gz: c9e401652e388868a2d583751ef94d50ccb2f22a
5
5
  SHA512:
6
- metadata.gz: f63f8aff330f033ef8fe0dad0d07629e3704463441f1a910920f37a86d48a4cab059182403b9cb6f1bd6a300213b1cff45315b43354fa0d2a9aaaba2f7bc54c8
7
- data.tar.gz: d77d8d5130a3f20ac9e54667b10de5a476322c38491f63abe58e0ab192d23b22fc764481254abaf9def825a4ee6707646f1b3c264ed9a5ec4fa76add0f34295a
6
+ metadata.gz: db745be6e2bcc4a7c4bfcd31d0a47c50bb948be84d24c2bc5f45c7bab6bbf46e22d82ed86120087ce6c0e6d554d323acef53b4adde3ffc7eb801216cc419f986
7
+ data.tar.gz: '096513b3c128b32c81b19784ef56164ef74e158dbd72d37b47d51063cd6b89daac03a00abf632421cf4e3687390930b5bb3c3771b5eb7290c91fb54a2757bf21'
@@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+ ## [0.9.0] - 2019-05-04
10
+ ### Added
11
+ - Job array support for LSF and PBSPro
12
+ - Slurm adapter uses `squeue` owner filter (`-u`) for `info_where_owner`
13
+
14
+ ### Fixed
15
+ - Grid Engine adapter now starts scripts in the current directory like all other adapters
16
+ - Fixed issue where Slurm comment field might break job info parsing
17
+ - Fixed possible crash when comparing two clusters if the id of one of the clusters is nil
18
+ - Fixed bug with the live system test that impacted non-Torque systems
19
+ - Fixed bug with Slurm adapter when submit time is not available
20
+
9
21
  ## [0.8.0] - 2019-01-29
10
22
  ### Added
11
23
  - info_all_each and info_where_owner_each super class methods
@@ -165,7 +177,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
165
177
  ### Added
166
178
  - Initial release!
167
179
 
168
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.8.0...HEAD
180
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.9.0...HEAD
181
+ [0.9.0]: https://github.com/OSC/ood_core/compare/v0.8.0...v0.9.0
169
182
  [0.8.0]: https://github.com/OSC/ood_core/compare/v0.7.1...v0.8.0
170
183
  [0.7.1]: https://github.com/OSC/ood_core/compare/v0.7.0...v0.7.1
171
184
  [0.7.0]: https://github.com/OSC/ood_core/compare/v0.6.0...v0.7.0
@@ -136,7 +136,7 @@ module OodCore
136
136
  # @param other [#to_sym] object to compare against
137
137
  # @return [Boolean] whether objects are equivalent
138
138
  def ==(other)
139
- id == other.to_sym
139
+ id == other.to_s.to_sym
140
140
  end
141
141
 
142
142
  # Convert object to symbol
@@ -90,16 +90,8 @@ module OodCore
90
90
  # @return [Info] information describing submitted job
91
91
  # @see Adapter#info
92
92
  def info(id)
93
- # TODO: handle job arrays
94
- job = batch.get_job(id: id)
95
- if job
96
- info_for_batch_hash(job)
97
- else
98
- Info.new(
99
- id: id,
100
- status: :completed
101
- )
102
- end
93
+ info_ary = batch.get_job(id: id).map{|v| info_for_batch_hash(v)}
94
+ handle_job_array(info_ary, id)
103
95
  rescue Batch::Error => e
104
96
  raise JobAdapterError, e.message
105
97
  end
@@ -131,19 +123,13 @@ module OodCore
131
123
  raise JobAdapterError, e.message
132
124
  end
133
125
 
134
- def supports_job_arrays?
135
- false
136
- end
137
-
138
126
  # Retrieve job status from resource manager
139
127
  # @param id [#to_s] the id of the job
140
128
  # @raise [JobAdapterError] if something goes wrong getting job status
141
129
  # @return [Status] status of job
142
130
  # @see Adapter#status
143
131
  def status(id)
144
- job = batch.get_job(id: id)
145
- state = job ? get_state(job[:status]) : :completed
146
- Status.new(state: state)
132
+ info(id).status
147
133
  rescue Batch::Error => e
148
134
  raise JobAdapterError, e.message
149
135
  end
@@ -196,8 +182,11 @@ module OodCore
196
182
  dispatch_time = helper.parse_past_time(v[:start_time], ignore_errors: true)
197
183
  finish_time = helper.parse_past_time(v[:finish_time], ignore_errors: true)
198
184
 
185
+ # Detect job array index from name
186
+ array_index = /(\[\d+\])$/.match(v[:name])
187
+
199
188
  Info.new(
200
- id: v[:id],
189
+ id: (array_index) ? "#{v[:id]}#{array_index[1]}" : v[:id],
201
190
  status: get_state(v[:status]),
202
191
  allocated_nodes: nodes,
203
192
  submit_host: v[:from_host],
@@ -214,6 +203,40 @@ module OodCore
214
203
  native: v
215
204
  )
216
205
  end
206
+
207
+ def handle_job_array(info_ary, id)
208
+ return Info.new(id: id, status: :completed) if info_ary.nil? || info_ary.empty?
209
+ return info_ary.first if info_ary.size == 1
210
+
211
+ parent_task_hash = build_proxy_parent(info_ary.first, id)
212
+
213
+ info_ary.map do |task_info|
214
+ parent_task_hash[:tasks] << {:id => task_info.id, :status => task_info.status}
215
+ end
216
+
217
+ parent_task_hash[:status] = parent_task_hash[:tasks].map{|task| task[:status]}.max
218
+
219
+ Info.new(**parent_task_hash)
220
+ end
221
+
222
+ # Proxy the first element as the parent hash delete non-shared attributes
223
+ def build_proxy_parent(info, id)
224
+ info.to_h.merge({
225
+ :tasks => [],
226
+ :id => id
227
+ }).delete_if{
228
+ |key, _| [
229
+ :allocated_nodes, :dispatch_time,
230
+ :cpu_time, :wallclock_time, :status
231
+ ].include?(key)
232
+ }.tap{
233
+ # Remove the child array index from the :job_name
234
+
235
+ # Note that a true representation of the parent should have the
236
+ # full array spec in the name. Worth attempting to reconstruct?
237
+ |h| h[:job_name] = h[:job_name].gsub(/\[[^\]]+\]/, '')
238
+ }
239
+ end
217
240
  end
218
241
  end
219
242
  end
@@ -43,10 +43,10 @@ class OodCore::Job::Adapters::Lsf::Batch
43
43
  # Get hash detailing the specified job
44
44
  # @param id [#to_s] the id of the job to check
45
45
  # @raise [Error] if `bjobs` command exited unsuccessfully
46
- # @return [Hash] details of specified job
46
+ # @return [Array<Hash>] details of specified job
47
47
  def get_job(id:)
48
48
  args = %W( -a -w -W #{id.to_s} )
49
- parse_bjobs_output(call("bjobs", *args)).first
49
+ parse_bjobs_output(call("bjobs", *args))
50
50
  end
51
51
 
52
52
  # status fields available from bjobs
@@ -81,6 +81,8 @@ class OodCore::Job::Adapters::Lsf::Helper
81
81
  args += ["-P", script.accounting_id] unless script.accounting_id.nil?
82
82
  args += ["-cwd", script.workdir.to_s] unless script.workdir.nil?
83
83
  args += ["-J", script.job_name] unless script.job_name.nil?
84
+ args[-1] += "[#{script.job_array_request}]" unless script.job_array_request.nil?
85
+
84
86
  args += ["-q", script.queue_name] unless script.queue_name.nil?
85
87
  args += ["-U", script.reservation_id] unless script.reservation_id.nil?
86
88
  args += ["-sp", script.priority] unless script.priority.nil?
@@ -86,8 +86,7 @@ module OodCore
86
86
  # @raise [Error] if `qstat` command exited unsuccessfully
87
87
  # @return [Array<Hash>] list of details for jobs
88
88
  def get_jobs(id: "")
89
- args = ["-f"] # display all information
90
- args += ["-t"] # list subjobs
89
+ args = ["-f", "-t"] # display all information
91
90
  args += [id.to_s] unless id.to_s.empty?
92
91
  lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
93
92
 
@@ -101,7 +100,8 @@ module OodCore
101
100
  k2 ? ( hsh[k1] ||= {} and hsh[k1][k2] = value ) : ( hsh[k1] = value )
102
101
  end
103
102
  end
104
- jobs.reject { |j| /\[\]/ =~ j[:job_id] } # drop main job array jobs
103
+
104
+ jobs
105
105
  end
106
106
 
107
107
  # Select batch jobs from the batch server
@@ -181,8 +181,8 @@ module OodCore
181
181
  'U' => :suspended, # cycle-harvesting job is suspended due to keyboard activity
182
182
  'E' => :running, # job is exiting after having run
183
183
  'F' => :completed, # job is finished
184
- 'X' => :completed # subjob has completed execution or has been deleted
185
- # ignore B as it signifies a job array
184
+ 'X' => :completed, # subjob has completed execution or has been deleted
185
+ 'B' => :running # job array has at least one child running
186
186
  }
187
187
 
188
188
  # What percentage of jobs a user owns out of all jobs, used to decide
@@ -266,6 +266,8 @@ module OodCore
266
266
  # mimics what the other resource managers do)
267
267
  args += ["-j", "oe"] if script.error_path.nil?
268
268
 
269
+ args += ["-J", script.job_array_request] unless script.job_array_request.nil?
270
+
269
271
  # Set native options
270
272
  args += script.native if script.native
271
273
 
@@ -303,13 +305,21 @@ module OodCore
303
305
  if usr_jobs.size > (qstat_factor * all_jobs.size)
304
306
  super
305
307
  else
306
- usr_jobs.map { |id| info(id) }
307
- end
308
- end
308
+ begin
309
+ user_job_infos = []
310
+ usr_jobs.each do |id|
311
+ job = info(id)
312
+ user_job_infos << job
309
313
 
310
- def supports_job_arrays?
311
- false
314
+ job.tasks.each {|task| user_job_infos << job.build_child_info(task)}
315
+ end
316
+
317
+ user_job_infos
318
+ rescue Batch::Error => e
319
+ raise JobAdapterError, e.message
320
+ end
312
321
  end
322
+ end
313
323
 
314
324
  # Retrieve job info from the resource manager
315
325
  # @param id [#to_s] the id of the job
@@ -318,9 +328,18 @@ module OodCore
318
328
  # @see Adapter#info
319
329
  def info(id)
320
330
  id = id.to_s
321
- @pbspro.get_jobs(id: id).map do |v|
331
+
332
+ job_infos = @pbspro.get_jobs(id: id).map do |v|
322
333
  parse_job_info(v)
323
- end.first || Info.new(id: id, status: :completed)
334
+ end
335
+
336
+ if job_infos.empty?
337
+ Info.new(id: id, status: :completed)
338
+ elsif job_infos.length == 1
339
+ job_infos.first
340
+ else
341
+ process_job_array(id, job_infos)
342
+ end
324
343
  rescue Batch::Error => e
325
344
  # set completed status if can't find job id
326
345
  if /Unknown Job Id/ =~ e.message || /Job has finished/ =~ e.message
@@ -434,6 +453,23 @@ module OodCore
434
453
  native: v
435
454
  )
436
455
  end
456
+
457
+ # Combine the array parent with the states of its children
458
+ def process_job_array(id, jobs)
459
+ parent_job = jobs.select { |j| /\[\]/ =~ j.id }.first
460
+ parent = (parent_job) ? parent_job.to_h : {:id => id, :status => :undetermined}
461
+
462
+ # create task hashes from children
463
+ parent[:tasks] = jobs.reject { |j| /\[\]/ =~ j.id }.map do |j|
464
+ {
465
+ :id => j.id,
466
+ :status => j.status.to_sym,
467
+ :wallclock_time => j.wallclock_time
468
+ }
469
+ end
470
+
471
+ Info.new(**parent)
472
+ end
437
473
  end
438
474
  end
439
475
  end
@@ -95,7 +95,12 @@ class OodCore::Job::Adapters::Sge::Batch
95
95
 
96
96
  job_hash = listener.parsed_job
97
97
 
98
- update_job_hash_status!(job_hash)
98
+ if job_hash[:id]
99
+ update_job_hash_status!(job_hash)
100
+ else
101
+ job_hash[:id] = job_id
102
+ job_hash[:status] = :completed
103
+ end
99
104
 
100
105
  job_info = OodCore::Job::Info.new(**job_hash)
101
106
  rescue REXML::ParseException => e
@@ -115,8 +120,8 @@ class OodCore::Job::Adapters::Sge::Batch
115
120
  if get_status_from_drmaa?(job_hash)
116
121
  begin
117
122
  job_hash[:status] = get_status_from_drmma(job_hash[:id])
118
- rescue DRMAA::DRMAAInvalidArgumentError => e
119
- raise Error, e.message
123
+ rescue DRMAA::DRMAAException => e
124
+ # log DRMAA error?
120
125
  end
121
126
  end
122
127
  end
@@ -156,8 +161,7 @@ class OodCore::Job::Adapters::Sge::Batch
156
161
  # @param job_id [#to_s]
157
162
  # @return job_id [String]
158
163
  def submit(content, args)
159
- cmd = ['qsub'] + args
160
- @helper.parse_job_id_from_qsub(call(*cmd, :stdin => content))
164
+ @helper.parse_job_id_from_qsub(call('qsub', *args, :stdin => content))
161
165
  end
162
166
 
163
167
  # Call a forked SGE command for a given batch server
@@ -20,7 +20,12 @@ class OodCore::Job::Adapters::Sge::Helper
20
20
  args += ['-h'] if script.submit_as_hold
21
21
  args += ['-r', 'yes'] if script.rerunnable
22
22
  script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
- args += ['-wd', script.workdir] unless script.workdir.nil?
23
+
24
+ if script.workdir
25
+ args += ['-wd', script.workdir]
26
+ elsif ! script_contains_wd_directive?(script.content)
27
+ args += ['-cwd']
28
+ end
24
29
 
25
30
  on_event_email = []
26
31
  on_event_email << 'b' if script.email_on_started # beginning
@@ -47,6 +52,38 @@ class OodCore::Job::Adapters::Sge::Helper
47
52
  args
48
53
  end
49
54
 
55
+ # @brief Detect whether script content contains either -cwd or -wd
56
+ #
57
+ # @param content The script content
58
+ #
59
+ # Examples:
60
+ # #$-wd /home/ood/ondemand # should match
61
+ # #$ -wd /home/ood/ondemand # should match
62
+ # #$ -cwd /home/ood/ondemand # should match
63
+ # #$ -j yes -wd /home/ood/ondemand # should match
64
+ # #$ -j yes -o this-wd /home/ood/ondemand # should NOT match
65
+ # #$ -t 1-10:5 -wd /home/ood/ondemand # should NOT match
66
+ #
67
+ # @return [bool]
68
+ #
69
+ def script_contains_wd_directive?(content)
70
+ content.slice(
71
+ # Only search within the script's first 1024 characters in case the user is
72
+ # putting lots of non-line delimited data into their scripts.
73
+ 0, 1024
74
+ ).split(
75
+ "\n"
76
+ ).any? {
77
+ |line|
78
+ # String must start with #$
79
+ # Match may be:
80
+ # Immediate -c?wd
81
+ # Eventual space or tab followed by -c?wd
82
+ # String may end with multiple characters
83
+ /^#\$(?:-c?wd|.*[ \t]+-c?wd).*$/ =~ line
84
+ }
85
+ end
86
+
50
87
  # Raise exceptions when adapter is asked to perform an action that SGE does not support
51
88
  # @raise [Error] when an incompatible action is requested
52
89
  def raise_error_on_unsupported_args(script, after:, afterok:, afternotok:, afterany:)
@@ -1,5 +1,6 @@
1
1
  require "time"
2
2
  require "ood_core/refinements/hash_extensions"
3
+ require "ood_core/refinements/array_extensions"
3
4
  require "ood_core/job/adapters/helper"
4
5
 
5
6
  module OodCore
@@ -29,10 +30,14 @@ module OodCore
29
30
  # resource manager for job management.
30
31
  class Slurm < Adapter
31
32
  using Refinements::HashExtensions
33
+ using Refinements::ArrayExtensions
32
34
 
33
35
  # Object used for simplified communication with a Slurm batch server
34
36
  # @api private
35
37
  class Batch
38
+ UNIT_SEPARATOR = "\x1F"
39
+ RECORD_SEPARATOR = "\x1E"
40
+
36
41
  # The cluster of the Slurm batch server
37
42
  # @example CHPC's kingspeak cluster
38
43
  # my_batch.cluster #=> "kingspeak"
@@ -89,22 +94,66 @@ module OodCore
89
94
  # # ...
90
95
  # #]
91
96
  # @param id [#to_s] the id of the job
92
- # @param filters [Array<Symbol>] list of attributes to filter on
97
+ # @param owner [String] the owner(s) of the job
98
+ # @param attrs [Array<Symbol>, nil] list of attributes request when calling squeue
93
99
  # @raise [Error] if `squeue` command exited unsuccessfully
94
100
  # @return [Array<Hash>] list of details for jobs
95
- def get_jobs(id: "", filters: [])
96
- delim = "\x1F" # don't use "|" because FEATURES uses this
97
- options = filters.empty? ? fields : fields.slice(*filters)
98
- args = ["--all", "--states=all", "--noconvert"]
99
- args += ["-o", "#{options.values.join(delim)}"]
100
- args += ["-j", id.to_s] unless id.to_s.empty?
101
- lines = call("squeue", *args).split("\n").map(&:strip)
101
+ def get_jobs(id: "", owner: nil, attrs: nil)
102
+ fields = squeue_fields(attrs)
103
+ args = squeue_args(id: id, owner: owner, options: fields.values)
104
+
105
+ #TODO: switch mock of Open3 to be the squeue mock script
106
+ # then you can use that for performance metrics
107
+ StringIO.open(call("squeue", *args)) do |output|
108
+ advance_past_squeue_header!(output)
109
+
110
+ jobs = []
111
+ output.each_line(RECORD_SEPARATOR) do |line|
112
+ # TODO: once you can do performance metrics you can test zip against some other tools
113
+ # or just small optimizations
114
+ # for example, fields is ALREADY A HASH and we are setting the VALUES to
115
+ # "line.strip.split(unit_separator)" array
116
+ #
117
+ # i.e. store keys in an array, do Hash[[keys, values].transpose]
118
+ #
119
+ # or
120
+ #
121
+ # job = {}
122
+ # keys.each_with_index { |key, index| [key] = values[index] }
123
+ # jobs << job
124
+ #
125
+ # assuming keys and values are same length! if not we have an error!
126
+ values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
127
+ jobs << Hash[fields.keys.zip(values)] unless values.empty?
128
+ end
129
+ jobs
130
+ end
131
+ end
102
132
 
103
- lines.drop(cluster ? 2 : 1).map do |line|
104
- Hash[options.keys.zip(line.split(delim))]
133
+ def squeue_fields(attrs)
134
+ if attrs.nil?
135
+ all_squeue_fields
136
+ else
137
+ all_squeue_fields.slice(*squeue_attrs_for_info_attrs(Array.wrap(attrs) + squeue_required_fields))
105
138
  end
106
139
  end
107
140
 
141
+ def squeue_required_fields
142
+ #TODO: does this need to include ::array_job_task_id?
143
+ #TODO: does it matter that order of the output can vary depending on the arguments and if "squeue_required_fields" are included?
144
+ # previously the order was "fields.keys"; i don't think it does
145
+ [:job_id, :state_compact]
146
+ end
147
+
148
+ #TODO: write some barebones test for this? like 2 options and id or no id
149
+ def squeue_args(id: "", owner: nil, options: [])
150
+ args = ["--all", "--states=all", "--noconvert"]
151
+ args += ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
152
+ args += ["-u", owner.to_s] unless owner.to_s.empty?
153
+ args += ["-j", id.to_s] unless id.to_s.empty?
154
+ args
155
+ end
156
+
108
157
  # Put a specified job on hold
109
158
  # @example Put job "1234" on hold
110
159
  # my_batch.hold_job("1234")
@@ -147,7 +196,82 @@ module OodCore
147
196
  call("sbatch", *args, env: env, stdin: str.to_s).strip.split(";").first
148
197
  end
149
198
 
199
+ # Fields requested from a formatted `squeue` call
200
+ # Note that the order of these fields is important
201
+ def all_squeue_fields
202
+ {
203
+ account: "%a",
204
+ job_id: "%A",
205
+ exec_host: "%B",
206
+ min_cpus: "%c",
207
+ cpus: "%C",
208
+ min_tmp_disk: "%d",
209
+ nodes: "%D",
210
+ end_time: "%e",
211
+ dependency: "%E",
212
+ features: "%f",
213
+ array_job_id: "%F",
214
+ group_name: "%g",
215
+ group_id: "%G",
216
+ over_subscribe: "%h",
217
+ sockets_per_node: "%H",
218
+ array_job_task_id: "%i",
219
+ cores_per_socket: "%I",
220
+ job_name: "%j",
221
+ threads_per_core: "%J",
222
+ comment: "%k",
223
+ array_task_id: "%K",
224
+ time_limit: "%l",
225
+ time_left: "%L",
226
+ min_memory: "%m",
227
+ time_used: "%M",
228
+ req_node: "%n",
229
+ node_list: "%N",
230
+ command: "%o",
231
+ contiguous: "%O",
232
+ qos: "%q",
233
+ partition: "%P",
234
+ priority: "%Q",
235
+ reason: "%r",
236
+ start_time: "%S",
237
+ state_compact: "%t",
238
+ state: "%T",
239
+ user: "%u",
240
+ user_id: "%U",
241
+ reservation: "%v",
242
+ submit_time: "%V",
243
+ wckey: "%w",
244
+ licenses: "%W",
245
+ excluded_nodes: "%x",
246
+ core_specialization: "%X",
247
+ nice: "%y",
248
+ scheduled_nodes: "%Y",
249
+ sockets_cores_threads: "%z",
250
+ work_dir: "%Z",
251
+ gres: "%b", # must come at the end to fix a bug with Slurm 18
252
+ }
253
+ end
254
+
150
255
  private
256
+ # Modify the StringIO instance by advancing past the squeue header
257
+ #
258
+ # The first two "records" should always be discarded. Consider the
259
+ # following squeue with -M output (invisible characters shown):
260
+ #
261
+ # CLUSTER: slurm_cluster_name\n
262
+ # \x1EJOBID\x1F\x1FSTATE\n
263
+ # \x1E1\x1F\x1FR\n
264
+ # \x1E2\x1F\x1FPD\n
265
+ #
266
+ # Splitting on the record separator first gives the Cluster header,
267
+ # and then the regular header. If -M or --cluster is not specified
268
+ # the effect is the same because the record separator is at the
269
+ # start of the format string, so the first "record" would simply be
270
+ # empty.
271
+ def advance_past_squeue_header!(squeue_output)
272
+ 2.times { squeue_output.gets(RECORD_SEPARATOR) }
273
+ end
274
+
151
275
  # Call a forked Slurm command for a given cluster
152
276
  def call(cmd, *args, env: {}, stdin: "")
153
277
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
@@ -159,60 +283,25 @@ module OodCore
159
283
  s.success? ? o : raise(Error, e)
160
284
  end
161
285
 
162
- # Fields requested from a formatted `squeue` call
163
- # Note that the order of these fields is important
164
- def fields
165
- {
166
- account: "%a",
167
- job_id: "%A",
168
- exec_host: "%B",
169
- min_cpus: "%c",
170
- cpus: "%C",
171
- min_tmp_disk: "%d",
172
- nodes: "%D",
173
- end_time: "%e",
174
- dependency: "%E",
175
- features: "%f",
176
- array_job_id: "%F",
177
- group_name: "%g",
178
- group_id: "%G",
179
- over_subscribe: "%h",
180
- sockets_per_node: "%H",
181
- array_job_task_id: "%i",
182
- cores_per_socket: "%I",
183
- job_name: "%j",
184
- threads_per_core: "%J",
185
- comment: "%k",
186
- array_task_id: "%K",
187
- time_limit: "%l",
188
- time_left: "%L",
189
- min_memory: "%m",
190
- time_used: "%M",
191
- req_node: "%n",
192
- node_list: "%N",
193
- command: "%o",
194
- contiguous: "%O",
195
- qos: "%q",
196
- partition: "%P",
197
- priority: "%Q",
198
- reason: "%r",
199
- start_time: "%S",
200
- state_compact: "%t",
201
- state: "%T",
202
- user: "%u",
203
- user_id: "%U",
204
- reservation: "%v",
205
- submit_time: "%V",
206
- wckey: "%w",
207
- licenses: "%W",
208
- excluded_nodes: "%x",
209
- core_specialization: "%X",
210
- nice: "%y",
211
- scheduled_nodes: "%Y",
212
- sockets_cores_threads: "%z",
213
- work_dir: "%Z",
214
- gres: "%b", # must come at the end to fix a bug with Slurm 18
215
- }
286
+ def squeue_attrs_for_info_attrs(attrs)
287
+ attrs.map { |a|
288
+ {
289
+ id: :job_id,
290
+ status: :state_compact,
291
+ allocated_nodes: [:node_list, :scheduled_nodes],
292
+ # submit_host: nil,
293
+ job_name: :job_name,
294
+ job_owner: :user,
295
+ accounting_id: :account,
296
+ procs: :cpus,
297
+ queue_name: :partition,
298
+ wallclock_time: :time_used,
299
+ wallclock_limit: :time_limit,
300
+ # cpu_time: nil,
301
+ submission_time: :submit_time,
302
+ dispatch_time: :start_time
303
+ }.fetch(a, a)
304
+ }.flatten
216
305
  end
217
306
  end
218
307
 
@@ -328,7 +417,7 @@ module OodCore
328
417
  # @return [Array<Info>] information describing submitted jobs
329
418
  # @see Adapter#info_all
330
419
  def info_all(attrs: nil)
331
- @slurm.get_jobs.map do |v|
420
+ @slurm.get_jobs(attrs: attrs).map do |v|
332
421
  parse_job_info(v)
333
422
  end
334
423
  rescue Batch::Error => e
@@ -360,6 +449,20 @@ module OodCore
360
449
  end
361
450
  end
362
451
 
452
+ # Retrieve info for all jobs for a given owner or owners from the
453
+ # resource manager
454
+ # @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
455
+ # @raise [JobAdapterError] if something goes wrong getting job info
456
+ # @return [Array<Info>] information describing submitted jobs
457
+ def info_where_owner(owner, attrs: nil)
458
+ owner = Array.wrap(owner).map(&:to_s).join(',')
459
+ @slurm.get_jobs(owner: owner).map do |v|
460
+ parse_job_info(v)
461
+ end
462
+ rescue Batch::Error => e
463
+ raise JobAdapterError, e.message
464
+ end
465
+
363
466
  # Retrieve job status from resource manager
364
467
  # @param id [#to_s] the id of the job
365
468
  # @raise [JobAdapterError] if something goes wrong getting job status
@@ -369,7 +472,7 @@ module OodCore
369
472
  id = id.to_s
370
473
  jobs = @slurm.get_jobs(
371
474
  id: id,
372
- filters: [:job_id, :array_job_task_id, :state_compact]
475
+ attrs: [:job_id, :array_job_task_id, :state_compact]
373
476
  )
374
477
  # A job id can return multiple jobs if it corresponds to a job array
375
478
  # id, so we need to find the job that corresponds to the given job id
@@ -478,6 +581,7 @@ module OodCore
478
581
  allocated_nodes = [ { name: nil } ] * v[:nodes].to_i
479
582
  end
480
583
  end
584
+
481
585
  Info.new(
482
586
  id: v[:job_id],
483
587
  status: get_state(v[:state_compact]),
@@ -491,8 +595,8 @@ module OodCore
491
595
  wallclock_time: duration_in_seconds(v[:time_used]),
492
596
  wallclock_limit: duration_in_seconds(v[:time_limit]),
493
597
  cpu_time: nil,
494
- submission_time: Time.parse(v[:submit_time]),
495
- dispatch_time: v[:start_time] == "N/A" ? nil : Time.parse(v[:start_time]),
598
+ submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
599
+ dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
496
600
  native: v
497
601
  )
498
602
  end
@@ -500,7 +604,7 @@ module OodCore
500
604
  def handle_job_array(info_ary, id)
501
605
  # If only one job was returned we return it
502
606
  return info_ary.first unless info_ary.length > 1
503
-
607
+
504
608
  parent_task_hash = {:tasks => []}
505
609
 
506
610
  info_ary.map do |task_info|
@@ -113,6 +113,21 @@ module OodCore
113
113
  @native = native
114
114
  end
115
115
 
116
+ # Create a new Info for a child task
117
+ # @return [Info] merging the parent and the child task
118
+ def build_child_info(task)
119
+ parent_only_keys = [
120
+ :allocated_nodes,
121
+ :procs,
122
+ :cpu_time,
123
+ :dispatch_time,
124
+ :native,
125
+ :tasks
126
+ ]
127
+
128
+ new(**to_h.merge(task.to_h).delete_if{|k, v| parent_only_keys.include?(k)})
129
+ end
130
+
116
131
  # Convert object to hash
117
132
  # @return [Hash] object as hash
118
133
  def to_h
@@ -1,18 +1,19 @@
1
1
  module OodCore
2
2
  module Job
3
3
  class Task
4
- attr_reader :id
5
- attr_reader :status
4
+ attr_reader :id, :status, :wallclock_time
6
5
 
7
- def initialize(id:, status:, **_)
8
- @task_id = id
6
+ def initialize(id:, status:, wallclock_time: nil, **_)
7
+ @id = id.to_s
9
8
  @status = OodCore::Job::Status.new(state: status)
9
+ @wallclock_time = wallclock_time && wallclock_time.to_i
10
10
  end
11
11
 
12
12
  def to_h
13
13
  {
14
14
  :id => id,
15
- :status => status
15
+ :status => status,
16
+ :wallclock_time => wallclock_time
16
17
  }
17
18
  end
18
19
 
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.8.0"
3
+ VERSION = "0.9.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2019-01-29 00:00:00.000000000 Z
13
+ date: 2019-05-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -147,7 +147,6 @@ files:
147
147
  - lib/ood_core/cluster.rb
148
148
  - lib/ood_core/clusters.rb
149
149
  - lib/ood_core/errors.rb
150
- - lib/ood_core/job/._task_status.rb
151
150
  - lib/ood_core/job/adapter.rb
152
151
  - lib/ood_core/job/adapters/drmaa.rb
153
152
  - lib/ood_core/job/adapters/helper.rb
@@ -198,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
198
197
  version: '0'
199
198
  requirements: []
200
199
  rubyforge_project:
201
- rubygems_version: 2.7.3
200
+ rubygems_version: 2.6.11
202
201
  signing_key:
203
202
  specification_version: 4
204
203
  summary: Open OnDemand core library