ood_core 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f8d595b211ea1f26d22bac6d441ddc56a1dd4dc46a8439be2d60c4fdb1545b9
4
- data.tar.gz: c04312cea53252aa4cac76a1dbccab6d40f974321630f5ac97931145bb404b0d
3
+ metadata.gz: 6c4dd83d1f69c0bf61d4ddc57b1b7aef23e3309e75988121c5b502dbc35f5208
4
+ data.tar.gz: ec1a80f736557d1648c11b4cb3339c337d7ae9c0183117f14a74ab88dc7cc9c6
5
5
  SHA512:
6
- metadata.gz: c5e4d7da375953bf188f09f9124380e62a7599c43c2d758bb3fa14a7f33d397b7134cb1b18e1f9844347f2b9e375e0d864e73b6635e563cc2d127ab435eaff1c
7
- data.tar.gz: a2f76e1121289d445f3666c6430420d805dede8c49ceb60a79de6b891b9903ded50f0cbca623f2354f63cf3de72d65cfef9b31ae08762b999888368778ca3a37
6
+ metadata.gz: 8df8478fb9fb591e1c69174c620895027c9fd73a4d4a87b4db9bc064b024679f65d3799a23407b9c363962aed529398596a4fdfbbd61f5f06f0a19f3188cd9be
7
+ data.tar.gz: 5eab3b6b13b80fac696ce5417f5f61e05952351572a09e57ee196cdbeec043d280d491a3525ad1e53c940f6cb9ac9f58480c03071dddeb4a373f0dff4ad3d527
data/CHANGELOG.md CHANGED
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.20.0] - 06-03-2022
11
+
12
+ - Adapters can now respond to `cluster_info` in [752](https://github.com/OSC/ood_core/pull/752). This returns information about the cluster like how many nodes are available and so on. Only Slurm support in this release.
13
+ - `OodCore::Job::Info` now has a `gpus` attribute in [753](https://github.com/OSC/ood_core/pull/753). Only Slurm support in this release.
14
+ - Support Ruby 3 in [759](https://github.com/OSC/ood_core/pull/759)
15
+
10
16
  ## [0.19.0] - 02-03-2022
11
17
 
12
18
  ### Added
@@ -33,6 +33,15 @@ module OodCore
33
33
  raise NotImplementedError, "subclass did not define #submit"
34
34
  end
35
35
 
36
+ # Retrieve the number of active and total cpus, nodes, and gpus
37
+ # @abstract Subclass is expected to implement {#cluster_stats}
38
+ # @raise [NotImplementedError] if subclass did not define {#cluster_stats}
39
+ # @return [ClusterInfo] Object containing quantified statistics about the
40
+ # cluster's active/total cpus, nodes, and gpus
41
+ def cluster_info
42
+ raise NotImplementedError, "subclass did not define #cluster_stats"
43
+ end
44
+
36
45
  # Retrieve info for all jobs from the resource manager
37
46
  # @abstract Subclass is expected to implement {#info_all}
38
47
  # @raise [NotImplementedError] if subclass did not define {#info_all}
@@ -228,7 +228,7 @@ module OodCore
228
228
  data_hash[:submission_time] = raw['dateSubmitted'].to_i
229
229
  data_hash[:queue_name] = raw['criteriaPriority']
230
230
 
231
- Info.new(data_hash)
231
+ Info.new(**data_hash)
232
232
  end
233
233
 
234
234
  # extended data is just lines of 'key: value' value, so parse
@@ -242,7 +242,7 @@ module OodCore
242
242
 
243
243
  data.to_s.lines.drop(1).each do |line|
244
244
  match_data = ccqstat_regex.match(line)
245
- infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
245
+ infos << Info.new(**ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
246
246
  end
247
247
 
248
248
  infos
@@ -93,7 +93,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
93
93
 
94
94
  def info(id)
95
95
  pod_json = safe_call('get', 'pod', id)
96
- return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
96
+ return OodCore::Job::Info.new(**{ id: id, status: 'completed' }) if pod_json.empty?
97
97
 
98
98
  service_json = safe_call('get', 'service', service_name(id))
99
99
  secret_json = safe_call('get', 'secret', secret_name(id))
@@ -31,7 +31,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
31
31
 
32
32
  pod_hash.deep_merge!(service_hash)
33
33
  pod_hash.deep_merge!(secret_hash)
34
- OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(pod_hash)
34
+ OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(**pod_hash)
35
35
  rescue NoMethodError
36
36
  raise K8sDataError, "unable to read data correctly from json"
37
37
  end
@@ -2,8 +2,8 @@
2
2
  class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
3
3
  attr_reader :ood_connection_info
4
4
 
5
- def initialize(ood_connection_info: {}, **options)
6
- super(options)
7
- @ood_connection_info = ood_connection_info
5
+ def initialize(options)
6
+ super(**options)
7
+ @ood_connection_info = options[:ood_connection_info]
8
8
  end
9
9
  end
@@ -16,7 +16,7 @@ module OodCore
16
16
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
17
17
  # @option config [#to_s] :submit_host ('') Host to submit commands to
18
18
  def self.build_lsf(config)
19
- batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
19
+ batch = Adapters::Lsf::Batch.new(**config.to_h.symbolize_keys)
20
20
  Adapters::Lsf.new(batch: batch)
21
21
  end
22
22
  end
@@ -36,6 +36,13 @@ module OodCore
36
36
  using Refinements::HashExtensions
37
37
  using Refinements::ArrayExtensions
38
38
 
39
+ # Get integer representing the number of gpus used by a node or job,
40
+ # calculated from gres string
41
+ # @return [Integer] the number of gpus in gres
42
+ def gpus_from_gres(gres)
43
+ gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
44
+ end
45
+
39
46
  # Object used for simplified communication with a Slurm batch server
40
47
  # @api private
41
48
  class Batch
@@ -98,6 +105,22 @@ module OodCore
98
105
  @strict_host_checking = strict_host_checking
99
106
  end
100
107
 
108
+ # Get a ClusterInfo object containing information about the given cluster
109
+ # @return [ClusterInfo] object containing cluster details
110
+ def get_cluster_info
111
+ node_cpu_info = call("sinfo", "-aho %A/%D/%C").strip.split('/')
112
+ gres_length = call("sinfo", "-o %G").lines.map(&:strip).map(&:length).max + 2
113
+ gres_lines = call("sinfo", "-ahNO ,nodehost,gres:#{gres_length},gresused:#{gres_length}")
114
+ .lines.uniq.map(&:split)
115
+ ClusterInfo.new(active_nodes: node_cpu_info[0].to_i,
116
+ total_nodes: node_cpu_info[2].to_i,
117
+ active_processors: node_cpu_info[3].to_i,
118
+ total_processors: node_cpu_info[6].to_i,
119
+ active_gpus: gres_lines.sum { |line| gpus_from_gres(line[2]) },
120
+ total_gpus: gres_lines.sum { |line| gpus_from_gres(line[1]) }
121
+ )
122
+ end
123
+
101
124
  # Get a list of hashes detailing each of the jobs on the batch server
102
125
  # @example Status info for all jobs
103
126
  # my_batch.get_jobs
@@ -454,6 +477,12 @@ module OodCore
454
477
  raise JobAdapterError, e.message
455
478
  end
456
479
 
480
+ # Retrieve info about active and total cpus, gpus, and nodes
481
+ # @return [Hash] information about cluster usage
482
+ def cluster_info
483
+ @slurm.get_cluster_info
484
+ end
485
+
457
486
  # Retrieve info for all jobs from the resource manager
458
487
  # @raise [JobAdapterError] if something goes wrong getting job info
459
488
  # @return [Array<Info>] information describing submitted jobs
@@ -643,7 +672,8 @@ module OodCore
643
672
  cpu_time: nil,
644
673
  submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
645
674
  dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
646
- native: v
675
+ native: v,
676
+ gpus: gpus_from_gres(v[:gres])
647
677
  )
648
678
  end
649
679
 
@@ -0,0 +1,32 @@
1
+ module OodCore
2
+ module Job
3
+ # An object that contains details about the cluster's active and total nodes, processors, and gpus
4
+ class ClusterInfo
5
+ using Refinements::HashExtensions
6
+
7
+ attr_reader :active_nodes, :total_nodes, :active_processors, :total_processors, :active_gpu_nodes,
8
+ :total_gpu_nodes, :active_gpus, :total_gpus
9
+
10
+ def initialize(opts = {})
11
+ opts = opts.transform_keys(&:to_sym)
12
+ @active_nodes = opts.fetch(:active_nodes, nil).to_i
13
+ @total_nodes = opts.fetch(:total_nodes, nil).to_i
14
+ @active_processors = opts.fetch(:active_processors, nil).to_i
15
+ @total_processors = opts.fetch(:total_processors, nil).to_i
16
+ @active_gpus = opts.fetch(:active_gpus, nil).to_i
17
+ @total_gpus = opts.fetch(:total_gpus, nil).to_i
18
+ end
19
+
20
+ def to_h
21
+ {
22
+ active_nodes: active_nodes,
23
+ total_nodes: total_nodes,
24
+ active_processors: active_processors,
25
+ total_processors: total_processors,
26
+ active_gpus: active_gpus,
27
+ total_gpus: total_gpus
28
+ }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -65,6 +65,10 @@ module OodCore
65
65
  # @return [Object] native info
66
66
  attr_reader :native
67
67
 
68
+ # Number of gpus allocated for job
69
+ # @return [Integer, nil] allocated total number of gpus
70
+ attr_reader :gpus
71
+
68
72
  # List of job array child task statuses
69
73
  # @note only relevant for job arrays
70
74
  # @return [Array<Task>] tasks
@@ -86,15 +90,16 @@ module OodCore
86
90
  # @param dispatch_time [#to_i, nil] dispatch time
87
91
  # @param tasks [Array<Hash>] tasks e.g. { id: '12345.owens-batch', status: :running }
88
92
  # @param native [Object] native info
93
+ # @param gpus [#to_i, 0] allocated total number of gpus
89
94
  def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
90
95
  job_name: nil, job_owner: nil, accounting_id: nil,
91
96
  procs: nil, queue_name: nil, wallclock_time: nil,
92
97
  wallclock_limit: nil, cpu_time: nil, submission_time: nil,
93
- dispatch_time: nil, native: nil, tasks: [],
98
+ dispatch_time: nil, native: nil, gpus: 0, tasks: [],
94
99
  **_)
95
100
  @id = id.to_s
96
101
  @status = Status.new(state: status.to_sym)
97
- @allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(n.to_h) }
102
+ @allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(**n.to_h) }
98
103
  @submit_host = submit_host && submit_host.to_s
99
104
  @job_name = job_name && job_name.to_s
100
105
  @job_owner = job_owner && job_owner.to_s
@@ -111,6 +116,7 @@ module OodCore
111
116
  @status = job_array_aggregate_status unless @tasks.empty?
112
117
 
113
118
  @native = native
119
+ @gpus = gpus && gpus.to_i
114
120
  end
115
121
 
116
122
  # Create a new Info for a child task
@@ -147,10 +153,15 @@ module OodCore
147
153
  submission_time: submission_time,
148
154
  dispatch_time: dispatch_time,
149
155
  native: native,
156
+ gpus: gpus,
150
157
  tasks: tasks
151
158
  }
152
159
  end
153
160
 
161
+ def gpu?
162
+ gpus.positive?
163
+ end
164
+
154
165
  # The comparison operator
155
166
  # @param other [#to_h] object to compare against
156
167
  # @return [Boolean] whether objects are equivalent
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.19.0"
3
+ VERSION = "0.20.0"
4
4
  end
data/lib/ood_core.rb CHANGED
@@ -11,6 +11,7 @@ module OodCore
11
11
  require "ood_core/job/node_info"
12
12
  require "ood_core/job/script"
13
13
  require "ood_core/job/info"
14
+ require "ood_core/job/cluster_info"
14
15
  require "ood_core/job/status"
15
16
  require "ood_core/job/adapter"
16
17
  require "ood_core/job/factory"
data/ood_core.gemspec CHANGED
@@ -20,10 +20,11 @@ Gem::Specification.new do |spec|
20
20
  spec.bindir = "exe"
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
- spec.required_ruby_version = ">= 2.2.0"
23
+ spec.required_ruby_version = ">= 2.7.0"
24
24
 
25
25
  spec.add_runtime_dependency "ood_support", "~> 0.0.2"
26
26
  spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
27
+ spec.add_runtime_dependency "rexml", "~> 3.2"
27
28
  spec.add_development_dependency "bundler", "~> 2.1"
28
29
  spec.add_development_dependency "rake", "~> 13.0.1"
29
30
  spec.add_development_dependency "rspec", "~> 3.0"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2022-03-02 00:00:00.000000000 Z
13
+ date: 2022-06-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -46,6 +46,20 @@ dependencies:
46
46
  - - ">="
47
47
  - !ruby/object:Gem::Version
48
48
  version: 1.9.6
49
+ - !ruby/object:Gem::Dependency
50
+ name: rexml
51
+ requirement: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '3.2'
56
+ type: :runtime
57
+ prerelease: false
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '3.2'
49
63
  - !ruby/object:Gem::Dependency
50
64
  name: bundler
51
65
  requirement: !ruby/object:Gem::Requirement
@@ -196,6 +210,7 @@ files:
196
210
  - lib/ood_core/job/adapters/torque/error.rb
197
211
  - lib/ood_core/job/adapters/torque/ffi.rb
198
212
  - lib/ood_core/job/array_ids.rb
213
+ - lib/ood_core/job/cluster_info.rb
199
214
  - lib/ood_core/job/factory.rb
200
215
  - lib/ood_core/job/info.rb
201
216
  - lib/ood_core/job/node_info.rb
@@ -219,7 +234,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
219
234
  requirements:
220
235
  - - ">="
221
236
  - !ruby/object:Gem::Version
222
- version: 2.2.0
237
+ version: 2.7.0
223
238
  required_rubygems_version: !ruby/object:Gem::Requirement
224
239
  requirements:
225
240
  - - ">="