ood_core 0.19.0 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f8d595b211ea1f26d22bac6d441ddc56a1dd4dc46a8439be2d60c4fdb1545b9
4
- data.tar.gz: c04312cea53252aa4cac76a1dbccab6d40f974321630f5ac97931145bb404b0d
3
+ metadata.gz: 6c4dd83d1f69c0bf61d4ddc57b1b7aef23e3309e75988121c5b502dbc35f5208
4
+ data.tar.gz: ec1a80f736557d1648c11b4cb3339c337d7ae9c0183117f14a74ab88dc7cc9c6
5
5
  SHA512:
6
- metadata.gz: c5e4d7da375953bf188f09f9124380e62a7599c43c2d758bb3fa14a7f33d397b7134cb1b18e1f9844347f2b9e375e0d864e73b6635e563cc2d127ab435eaff1c
7
- data.tar.gz: a2f76e1121289d445f3666c6430420d805dede8c49ceb60a79de6b891b9903ded50f0cbca623f2354f63cf3de72d65cfef9b31ae08762b999888368778ca3a37
6
+ metadata.gz: 8df8478fb9fb591e1c69174c620895027c9fd73a4d4a87b4db9bc064b024679f65d3799a23407b9c363962aed529398596a4fdfbbd61f5f06f0a19f3188cd9be
7
+ data.tar.gz: 5eab3b6b13b80fac696ce5417f5f61e05952351572a09e57ee196cdbeec043d280d491a3525ad1e53c940f6cb9ac9f58480c03071dddeb4a373f0dff4ad3d527
data/CHANGELOG.md CHANGED
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.20.0] - 06-03-2022
11
+
12
+ - Adapters can now respond to `cluster_info` in [752](https://github.com/OSC/ood_core/pull/752). This returns information about the cluster like how many nodes are available and so on. Only Slurm support in this release.
13
+ - `OodCore::Job::Info` now has a `gpus` attribute in [753](https://github.com/OSC/ood_core/pull/753). Only Slurm support in this release.
14
+ - Support Ruby 3 in [759](https://github.com/OSC/ood_core/pull/759)
15
+
10
16
  ## [0.19.0] - 02-03-2022
11
17
 
12
18
  ### Added
@@ -33,6 +33,15 @@ module OodCore
33
33
  raise NotImplementedError, "subclass did not define #submit"
34
34
  end
35
35
 
36
+ # Retrieve the number of active and total cpus, nodes, and gpus
37
+ # @abstract Subclass is expected to implement {#cluster_stats}
38
+ # @raise [NotImplementedError] if subclass did not define {#cluster_stats}
39
+ # @return [ClusterInfo] Object containing quantified statistics about the
40
+ # cluster's active/total cpus, nodes, and gpus
41
+ def cluster_info
42
+ raise NotImplementedError, "subclass did not define #cluster_stats"
43
+ end
44
+
36
45
  # Retrieve info for all jobs from the resource manager
37
46
  # @abstract Subclass is expected to implement {#info_all}
38
47
  # @raise [NotImplementedError] if subclass did not define {#info_all}
@@ -228,7 +228,7 @@ module OodCore
228
228
  data_hash[:submission_time] = raw['dateSubmitted'].to_i
229
229
  data_hash[:queue_name] = raw['criteriaPriority']
230
230
 
231
- Info.new(data_hash)
231
+ Info.new(**data_hash)
232
232
  end
233
233
 
234
234
  # extended data is just lines of 'key: value' value, so parse
@@ -242,7 +242,7 @@ module OodCore
242
242
 
243
243
  data.to_s.lines.drop(1).each do |line|
244
244
  match_data = ccqstat_regex.match(line)
245
- infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
245
+ infos << Info.new(**ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
246
246
  end
247
247
 
248
248
  infos
@@ -93,7 +93,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
93
93
 
94
94
  def info(id)
95
95
  pod_json = safe_call('get', 'pod', id)
96
- return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
96
+ return OodCore::Job::Info.new(**{ id: id, status: 'completed' }) if pod_json.empty?
97
97
 
98
98
  service_json = safe_call('get', 'service', service_name(id))
99
99
  secret_json = safe_call('get', 'secret', secret_name(id))
@@ -31,7 +31,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
31
31
 
32
32
  pod_hash.deep_merge!(service_hash)
33
33
  pod_hash.deep_merge!(secret_hash)
34
- OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(pod_hash)
34
+ OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(**pod_hash)
35
35
  rescue NoMethodError
36
36
  raise K8sDataError, "unable to read data correctly from json"
37
37
  end
@@ -2,8 +2,8 @@
2
2
  class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
3
3
  attr_reader :ood_connection_info
4
4
 
5
- def initialize(ood_connection_info: {}, **options)
6
- super(options)
7
- @ood_connection_info = ood_connection_info
5
+ def initialize(options)
6
+ super(**options)
7
+ @ood_connection_info = options[:ood_connection_info]
8
8
  end
9
9
  end
@@ -16,7 +16,7 @@ module OodCore
16
16
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
17
17
  # @option config [#to_s] :submit_host ('') Host to submit commands to
18
18
  def self.build_lsf(config)
19
- batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
19
+ batch = Adapters::Lsf::Batch.new(**config.to_h.symbolize_keys)
20
20
  Adapters::Lsf.new(batch: batch)
21
21
  end
22
22
  end
@@ -36,6 +36,13 @@ module OodCore
36
36
  using Refinements::HashExtensions
37
37
  using Refinements::ArrayExtensions
38
38
 
39
+ # Get integer representing the number of gpus used by a node or job,
40
+ # calculated from gres string
41
+ # @return [Integer] the number of gpus in gres
42
+ def gpus_from_gres(gres)
43
+ gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
44
+ end
45
+
39
46
  # Object used for simplified communication with a Slurm batch server
40
47
  # @api private
41
48
  class Batch
@@ -98,6 +105,22 @@ module OodCore
98
105
  @strict_host_checking = strict_host_checking
99
106
  end
100
107
 
108
+ # Get a ClusterInfo object containing information about the given cluster
109
+ # @return [ClusterInfo] object containing cluster details
110
+ def get_cluster_info
111
+ node_cpu_info = call("sinfo", "-aho %A/%D/%C").strip.split('/')
112
+ gres_length = call("sinfo", "-o %G").lines.map(&:strip).map(&:length).max + 2
113
+ gres_lines = call("sinfo", "-ahNO ,nodehost,gres:#{gres_length},gresused:#{gres_length}")
114
+ .lines.uniq.map(&:split)
115
+ ClusterInfo.new(active_nodes: node_cpu_info[0].to_i,
116
+ total_nodes: node_cpu_info[2].to_i,
117
+ active_processors: node_cpu_info[3].to_i,
118
+ total_processors: node_cpu_info[6].to_i,
119
+ active_gpus: gres_lines.sum { |line| gpus_from_gres(line[2]) },
120
+ total_gpus: gres_lines.sum { |line| gpus_from_gres(line[1]) }
121
+ )
122
+ end
123
+
101
124
  # Get a list of hashes detailing each of the jobs on the batch server
102
125
  # @example Status info for all jobs
103
126
  # my_batch.get_jobs
@@ -454,6 +477,12 @@ module OodCore
454
477
  raise JobAdapterError, e.message
455
478
  end
456
479
 
480
+ # Retrieve info about active and total cpus, gpus, and nodes
481
+ # @return [Hash] information about cluster usage
482
+ def cluster_info
483
+ @slurm.get_cluster_info
484
+ end
485
+
457
486
  # Retrieve info for all jobs from the resource manager
458
487
  # @raise [JobAdapterError] if something goes wrong getting job info
459
488
  # @return [Array<Info>] information describing submitted jobs
@@ -643,7 +672,8 @@ module OodCore
643
672
  cpu_time: nil,
644
673
  submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
645
674
  dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
646
- native: v
675
+ native: v,
676
+ gpus: gpus_from_gres(v[:gres])
647
677
  )
648
678
  end
649
679
 
@@ -0,0 +1,32 @@
1
+ module OodCore
2
+ module Job
3
+ # An object that contains details about the cluster's active and total nodes, processors, and gpus
4
+ class ClusterInfo
5
+ using Refinements::HashExtensions
6
+
7
+ attr_reader :active_nodes, :total_nodes, :active_processors, :total_processors, :active_gpu_nodes,
8
+ :total_gpu_nodes, :active_gpus, :total_gpus
9
+
10
+ def initialize(opts = {})
11
+ opts = opts.transform_keys(&:to_sym)
12
+ @active_nodes = opts.fetch(:active_nodes, nil).to_i
13
+ @total_nodes = opts.fetch(:total_nodes, nil).to_i
14
+ @active_processors = opts.fetch(:active_processors, nil).to_i
15
+ @total_processors = opts.fetch(:total_processors, nil).to_i
16
+ @active_gpus = opts.fetch(:active_gpus, nil).to_i
17
+ @total_gpus = opts.fetch(:total_gpus, nil).to_i
18
+ end
19
+
20
+ def to_h
21
+ {
22
+ active_nodes: active_nodes,
23
+ total_nodes: total_nodes,
24
+ active_processors: active_processors,
25
+ total_processors: total_processors,
26
+ active_gpus: active_gpus,
27
+ total_gpus: total_gpus
28
+ }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -65,6 +65,10 @@ module OodCore
65
65
  # @return [Object] native info
66
66
  attr_reader :native
67
67
 
68
+ # Number of gpus allocated for job
69
+ # @return [Integer, nil] allocated total number of gpus
70
+ attr_reader :gpus
71
+
68
72
  # List of job array child task statuses
69
73
  # @note only relevant for job arrays
70
74
  # @return [Array<Task>] tasks
@@ -86,15 +90,16 @@ module OodCore
86
90
  # @param dispatch_time [#to_i, nil] dispatch time
87
91
  # @param tasks [Array<Hash>] tasks e.g. { id: '12345.owens-batch', status: :running }
88
92
  # @param native [Object] native info
93
+ # @param gpus [#to_i, 0] allocated total number of gpus
89
94
  def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
90
95
  job_name: nil, job_owner: nil, accounting_id: nil,
91
96
  procs: nil, queue_name: nil, wallclock_time: nil,
92
97
  wallclock_limit: nil, cpu_time: nil, submission_time: nil,
93
- dispatch_time: nil, native: nil, tasks: [],
98
+ dispatch_time: nil, native: nil, gpus: 0, tasks: [],
94
99
  **_)
95
100
  @id = id.to_s
96
101
  @status = Status.new(state: status.to_sym)
97
- @allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(n.to_h) }
102
+ @allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(**n.to_h) }
98
103
  @submit_host = submit_host && submit_host.to_s
99
104
  @job_name = job_name && job_name.to_s
100
105
  @job_owner = job_owner && job_owner.to_s
@@ -111,6 +116,7 @@ module OodCore
111
116
  @status = job_array_aggregate_status unless @tasks.empty?
112
117
 
113
118
  @native = native
119
+ @gpus = gpus && gpus.to_i
114
120
  end
115
121
 
116
122
  # Create a new Info for a child task
@@ -147,10 +153,15 @@ module OodCore
147
153
  submission_time: submission_time,
148
154
  dispatch_time: dispatch_time,
149
155
  native: native,
156
+ gpus: gpus,
150
157
  tasks: tasks
151
158
  }
152
159
  end
153
160
 
161
+ def gpu?
162
+ gpus.positive?
163
+ end
164
+
154
165
  # The comparison operator
155
166
  # @param other [#to_h] object to compare against
156
167
  # @return [Boolean] whether objects are equivalent
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.19.0"
3
+ VERSION = "0.20.0"
4
4
  end
data/lib/ood_core.rb CHANGED
@@ -11,6 +11,7 @@ module OodCore
11
11
  require "ood_core/job/node_info"
12
12
  require "ood_core/job/script"
13
13
  require "ood_core/job/info"
14
+ require "ood_core/job/cluster_info"
14
15
  require "ood_core/job/status"
15
16
  require "ood_core/job/adapter"
16
17
  require "ood_core/job/factory"
data/ood_core.gemspec CHANGED
@@ -20,10 +20,11 @@ Gem::Specification.new do |spec|
20
20
  spec.bindir = "exe"
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
- spec.required_ruby_version = ">= 2.2.0"
23
+ spec.required_ruby_version = ">= 2.7.0"
24
24
 
25
25
  spec.add_runtime_dependency "ood_support", "~> 0.0.2"
26
26
  spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
27
+ spec.add_runtime_dependency "rexml", "~> 3.2"
27
28
  spec.add_development_dependency "bundler", "~> 2.1"
28
29
  spec.add_development_dependency "rake", "~> 13.0.1"
29
30
  spec.add_development_dependency "rspec", "~> 3.0"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2022-03-02 00:00:00.000000000 Z
13
+ date: 2022-06-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -46,6 +46,20 @@ dependencies:
46
46
  - - ">="
47
47
  - !ruby/object:Gem::Version
48
48
  version: 1.9.6
49
+ - !ruby/object:Gem::Dependency
50
+ name: rexml
51
+ requirement: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '3.2'
56
+ type: :runtime
57
+ prerelease: false
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '3.2'
49
63
  - !ruby/object:Gem::Dependency
50
64
  name: bundler
51
65
  requirement: !ruby/object:Gem::Requirement
@@ -196,6 +210,7 @@ files:
196
210
  - lib/ood_core/job/adapters/torque/error.rb
197
211
  - lib/ood_core/job/adapters/torque/ffi.rb
198
212
  - lib/ood_core/job/array_ids.rb
213
+ - lib/ood_core/job/cluster_info.rb
199
214
  - lib/ood_core/job/factory.rb
200
215
  - lib/ood_core/job/info.rb
201
216
  - lib/ood_core/job/node_info.rb
@@ -219,7 +234,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
219
234
  requirements:
220
235
  - - ">="
221
236
  - !ruby/object:Gem::Version
222
- version: 2.2.0
237
+ version: 2.7.0
223
238
  required_rubygems_version: !ruby/object:Gem::Requirement
224
239
  requirements:
225
240
  - - ">="