ood_core 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/ood_core/job/adapter.rb +9 -0
- data/lib/ood_core/job/adapters/ccq.rb +2 -2
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +1 -1
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +1 -1
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +3 -3
- data/lib/ood_core/job/adapters/lsf.rb +1 -1
- data/lib/ood_core/job/adapters/slurm.rb +31 -1
- data/lib/ood_core/job/cluster_info.rb +32 -0
- data/lib/ood_core/job/info.rb +13 -2
- data/lib/ood_core/version.rb +1 -1
- data/lib/ood_core.rb +1 -0
- data/ood_core.gemspec +2 -1
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c4dd83d1f69c0bf61d4ddc57b1b7aef23e3309e75988121c5b502dbc35f5208
|
4
|
+
data.tar.gz: ec1a80f736557d1648c11b4cb3339c337d7ae9c0183117f14a74ab88dc7cc9c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8df8478fb9fb591e1c69174c620895027c9fd73a4d4a87b4db9bc064b024679f65d3799a23407b9c363962aed529398596a4fdfbbd61f5f06f0a19f3188cd9be
|
7
|
+
data.tar.gz: 5eab3b6b13b80fac696ce5417f5f61e05952351572a09e57ee196cdbeec043d280d491a3525ad1e53c940f6cb9ac9f58480c03071dddeb4a373f0dff4ad3d527
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.20.0] - 06-03-2022
|
11
|
+
|
12
|
+
- Adapters can now respond to `cluster_info` in [752](https://github.com/OSC/ood_core/pull/752). This returns information about the cluster like how many nodes are available and so on. Only Slurm support in this release.
|
13
|
+
- `OodCore::Job::Info` now has a `gpus` attribute in [753](https://github.com/OSC/ood_core/pull/753). Only Slurm support in this release.
|
14
|
+
- Support Ruby 3 in [759](https://github.com/OSC/ood_core/pull/759)
|
15
|
+
|
10
16
|
## [0.19.0] - 02-03-2022
|
11
17
|
|
12
18
|
### Added
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -33,6 +33,15 @@ module OodCore
|
|
33
33
|
raise NotImplementedError, "subclass did not define #submit"
|
34
34
|
end
|
35
35
|
|
36
|
+
# Retrieve the number of active and total cpus, nodes, and gpus
|
37
|
+
# @abstract Subclass is expected to implement {#cluster_stats}
|
38
|
+
# @raise [NotImplementedError] if subclass did not define {#cluster_stats}
|
39
|
+
# @return [ClusterInfo] Object containing quantified statistics about the
|
40
|
+
# cluster's active/total cpus, nodes, and gpus
|
41
|
+
def cluster_info
|
42
|
+
raise NotImplementedError, "subclass did not define #cluster_stats"
|
43
|
+
end
|
44
|
+
|
36
45
|
# Retrieve info for all jobs from the resource manager
|
37
46
|
# @abstract Subclass is expected to implement {#info_all}
|
38
47
|
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
@@ -228,7 +228,7 @@ module OodCore
|
|
228
228
|
data_hash[:submission_time] = raw['dateSubmitted'].to_i
|
229
229
|
data_hash[:queue_name] = raw['criteriaPriority']
|
230
230
|
|
231
|
-
Info.new(data_hash)
|
231
|
+
Info.new(**data_hash)
|
232
232
|
end
|
233
233
|
|
234
234
|
# extended data is just lines of 'key: value' value, so parse
|
@@ -242,7 +242,7 @@ module OodCore
|
|
242
242
|
|
243
243
|
data.to_s.lines.drop(1).each do |line|
|
244
244
|
match_data = ccqstat_regex.match(line)
|
245
|
-
infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
|
245
|
+
infos << Info.new(**ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
|
246
246
|
end
|
247
247
|
|
248
248
|
infos
|
@@ -93,7 +93,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
93
93
|
|
94
94
|
def info(id)
|
95
95
|
pod_json = safe_call('get', 'pod', id)
|
96
|
-
return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
|
96
|
+
return OodCore::Job::Info.new(**{ id: id, status: 'completed' }) if pod_json.empty?
|
97
97
|
|
98
98
|
service_json = safe_call('get', 'service', service_name(id))
|
99
99
|
secret_json = safe_call('get', 'secret', secret_name(id))
|
@@ -31,7 +31,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
|
|
31
31
|
|
32
32
|
pod_hash.deep_merge!(service_hash)
|
33
33
|
pod_hash.deep_merge!(secret_hash)
|
34
|
-
OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(pod_hash)
|
34
|
+
OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(**pod_hash)
|
35
35
|
rescue NoMethodError
|
36
36
|
raise K8sDataError, "unable to read data correctly from json"
|
37
37
|
end
|
@@ -2,8 +2,8 @@
|
|
2
2
|
class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
|
3
3
|
attr_reader :ood_connection_info
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
super(options)
|
7
|
-
@ood_connection_info = ood_connection_info
|
5
|
+
def initialize(options)
|
6
|
+
super(**options)
|
7
|
+
@ood_connection_info = options[:ood_connection_info]
|
8
8
|
end
|
9
9
|
end
|
@@ -16,7 +16,7 @@ module OodCore
|
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
17
17
|
# @option config [#to_s] :submit_host ('') Host to submit commands to
|
18
18
|
def self.build_lsf(config)
|
19
|
-
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
19
|
+
batch = Adapters::Lsf::Batch.new(**config.to_h.symbolize_keys)
|
20
20
|
Adapters::Lsf.new(batch: batch)
|
21
21
|
end
|
22
22
|
end
|
@@ -36,6 +36,13 @@ module OodCore
|
|
36
36
|
using Refinements::HashExtensions
|
37
37
|
using Refinements::ArrayExtensions
|
38
38
|
|
39
|
+
# Get integer representing the number of gpus used by a node or job,
|
40
|
+
# calculated from gres string
|
41
|
+
# @return [Integer] the number of gpus in gres
|
42
|
+
def gpus_from_gres(gres)
|
43
|
+
gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
|
44
|
+
end
|
45
|
+
|
39
46
|
# Object used for simplified communication with a Slurm batch server
|
40
47
|
# @api private
|
41
48
|
class Batch
|
@@ -98,6 +105,22 @@ module OodCore
|
|
98
105
|
@strict_host_checking = strict_host_checking
|
99
106
|
end
|
100
107
|
|
108
|
+
# Get a ClusterInfo object containing information about the given cluster
|
109
|
+
# @return [ClusterInfo] object containing cluster details
|
110
|
+
def get_cluster_info
|
111
|
+
node_cpu_info = call("sinfo", "-aho %A/%D/%C").strip.split('/')
|
112
|
+
gres_length = call("sinfo", "-o %G").lines.map(&:strip).map(&:length).max + 2
|
113
|
+
gres_lines = call("sinfo", "-ahNO ,nodehost,gres:#{gres_length},gresused:#{gres_length}")
|
114
|
+
.lines.uniq.map(&:split)
|
115
|
+
ClusterInfo.new(active_nodes: node_cpu_info[0].to_i,
|
116
|
+
total_nodes: node_cpu_info[2].to_i,
|
117
|
+
active_processors: node_cpu_info[3].to_i,
|
118
|
+
total_processors: node_cpu_info[6].to_i,
|
119
|
+
active_gpus: gres_lines.sum { |line| gpus_from_gres(line[2]) },
|
120
|
+
total_gpus: gres_lines.sum { |line| gpus_from_gres(line[1]) }
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
101
124
|
# Get a list of hashes detailing each of the jobs on the batch server
|
102
125
|
# @example Status info for all jobs
|
103
126
|
# my_batch.get_jobs
|
@@ -454,6 +477,12 @@ module OodCore
|
|
454
477
|
raise JobAdapterError, e.message
|
455
478
|
end
|
456
479
|
|
480
|
+
# Retrieve info about active and total cpus, gpus, and nodes
|
481
|
+
# @return [Hash] information about cluster usage
|
482
|
+
def cluster_info
|
483
|
+
@slurm.get_cluster_info
|
484
|
+
end
|
485
|
+
|
457
486
|
# Retrieve info for all jobs from the resource manager
|
458
487
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
459
488
|
# @return [Array<Info>] information describing submitted jobs
|
@@ -643,7 +672,8 @@ module OodCore
|
|
643
672
|
cpu_time: nil,
|
644
673
|
submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
|
645
674
|
dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
|
646
|
-
native: v
|
675
|
+
native: v,
|
676
|
+
gpus: gpus_from_gres(v[:gres])
|
647
677
|
)
|
648
678
|
end
|
649
679
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
# An object that contains details about the cluster's active and total nodes, processors, and gpus
|
4
|
+
class ClusterInfo
|
5
|
+
using Refinements::HashExtensions
|
6
|
+
|
7
|
+
attr_reader :active_nodes, :total_nodes, :active_processors, :total_processors, :active_gpu_nodes,
|
8
|
+
:total_gpu_nodes, :active_gpus, :total_gpus
|
9
|
+
|
10
|
+
def initialize(opts = {})
|
11
|
+
opts = opts.transform_keys(&:to_sym)
|
12
|
+
@active_nodes = opts.fetch(:active_nodes, nil).to_i
|
13
|
+
@total_nodes = opts.fetch(:total_nodes, nil).to_i
|
14
|
+
@active_processors = opts.fetch(:active_processors, nil).to_i
|
15
|
+
@total_processors = opts.fetch(:total_processors, nil).to_i
|
16
|
+
@active_gpus = opts.fetch(:active_gpus, nil).to_i
|
17
|
+
@total_gpus = opts.fetch(:total_gpus, nil).to_i
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_h
|
21
|
+
{
|
22
|
+
active_nodes: active_nodes,
|
23
|
+
total_nodes: total_nodes,
|
24
|
+
active_processors: active_processors,
|
25
|
+
total_processors: total_processors,
|
26
|
+
active_gpus: active_gpus,
|
27
|
+
total_gpus: total_gpus
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/ood_core/job/info.rb
CHANGED
@@ -65,6 +65,10 @@ module OodCore
|
|
65
65
|
# @return [Object] native info
|
66
66
|
attr_reader :native
|
67
67
|
|
68
|
+
# Number of gpus allocated for job
|
69
|
+
# @return [Integer, nil] allocated total number of gpus
|
70
|
+
attr_reader :gpus
|
71
|
+
|
68
72
|
# List of job array child task statuses
|
69
73
|
# @note only relevant for job arrays
|
70
74
|
# @return [Array<Task>] tasks
|
@@ -86,15 +90,16 @@ module OodCore
|
|
86
90
|
# @param dispatch_time [#to_i, nil] dispatch time
|
87
91
|
# @param tasks [Array<Hash>] tasks e.g. { id: '12345.owens-batch', status: :running }
|
88
92
|
# @param native [Object] native info
|
93
|
+
# @param gpus [#to_i, 0] allocated total number of gpus
|
89
94
|
def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
|
90
95
|
job_name: nil, job_owner: nil, accounting_id: nil,
|
91
96
|
procs: nil, queue_name: nil, wallclock_time: nil,
|
92
97
|
wallclock_limit: nil, cpu_time: nil, submission_time: nil,
|
93
|
-
dispatch_time: nil, native: nil, tasks: [],
|
98
|
+
dispatch_time: nil, native: nil, gpus: 0, tasks: [],
|
94
99
|
**_)
|
95
100
|
@id = id.to_s
|
96
101
|
@status = Status.new(state: status.to_sym)
|
97
|
-
@allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(n.to_h) }
|
102
|
+
@allocated_nodes = allocated_nodes.map { |n| NodeInfo.new(**n.to_h) }
|
98
103
|
@submit_host = submit_host && submit_host.to_s
|
99
104
|
@job_name = job_name && job_name.to_s
|
100
105
|
@job_owner = job_owner && job_owner.to_s
|
@@ -111,6 +116,7 @@ module OodCore
|
|
111
116
|
@status = job_array_aggregate_status unless @tasks.empty?
|
112
117
|
|
113
118
|
@native = native
|
119
|
+
@gpus = gpus && gpus.to_i
|
114
120
|
end
|
115
121
|
|
116
122
|
# Create a new Info for a child task
|
@@ -147,10 +153,15 @@ module OodCore
|
|
147
153
|
submission_time: submission_time,
|
148
154
|
dispatch_time: dispatch_time,
|
149
155
|
native: native,
|
156
|
+
gpus: gpus,
|
150
157
|
tasks: tasks
|
151
158
|
}
|
152
159
|
end
|
153
160
|
|
161
|
+
def gpu?
|
162
|
+
gpus.positive?
|
163
|
+
end
|
164
|
+
|
154
165
|
# The comparison operator
|
155
166
|
# @param other [#to_h] object to compare against
|
156
167
|
# @return [Boolean] whether objects are equivalent
|
data/lib/ood_core/version.rb
CHANGED
data/lib/ood_core.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -20,10 +20,11 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.bindir = "exe"
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
|
-
spec.required_ruby_version = ">= 2.
|
23
|
+
spec.required_ruby_version = ">= 2.7.0"
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
26
|
spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
|
27
|
+
spec.add_runtime_dependency "rexml", "~> 3.2"
|
27
28
|
spec.add_development_dependency "bundler", "~> 2.1"
|
28
29
|
spec.add_development_dependency "rake", "~> 13.0.1"
|
29
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-03
|
13
|
+
date: 2022-06-03 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -46,6 +46,20 @@ dependencies:
|
|
46
46
|
- - ">="
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: 1.9.6
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rexml
|
51
|
+
requirement: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '3.2'
|
56
|
+
type: :runtime
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '3.2'
|
49
63
|
- !ruby/object:Gem::Dependency
|
50
64
|
name: bundler
|
51
65
|
requirement: !ruby/object:Gem::Requirement
|
@@ -196,6 +210,7 @@ files:
|
|
196
210
|
- lib/ood_core/job/adapters/torque/error.rb
|
197
211
|
- lib/ood_core/job/adapters/torque/ffi.rb
|
198
212
|
- lib/ood_core/job/array_ids.rb
|
213
|
+
- lib/ood_core/job/cluster_info.rb
|
199
214
|
- lib/ood_core/job/factory.rb
|
200
215
|
- lib/ood_core/job/info.rb
|
201
216
|
- lib/ood_core/job/node_info.rb
|
@@ -219,7 +234,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
219
234
|
requirements:
|
220
235
|
- - ">="
|
221
236
|
- !ruby/object:Gem::Version
|
222
|
-
version: 2.
|
237
|
+
version: 2.7.0
|
223
238
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
224
239
|
requirements:
|
225
240
|
- - ">="
|