ood_core 0.25.0 → 0.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ood_core/batch_connect/templates/vnc.rb +6 -4
- data/lib/ood_core/job/adapter.rb +8 -0
- data/lib/ood_core/job/adapters/slurm.rb +33 -3
- data/lib/ood_core/job/adapters/systemd/launcher.rb +1 -1
- data/lib/ood_core/job/node_info.rb +11 -2
- data/lib/ood_core/job/queue_info.rb +8 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -2
- metadata +6 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ef6cd5a8a206393dcaed9d0121e3646dd6fc3d2b0405992c3fcfa5a745e9489
|
4
|
+
data.tar.gz: 16a5cf6f03ed2be4c563dafb7eaa9e2a04d465a4d8a3cea5d3bb14b634d65868
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 563930fc05d105b46fa1f8942294a06b3fd3ffce665ba6aa93fae3a21759613fdf229c9cca4f5ec8bd99c7b57cbb9e7e4348420de47d4d0884a78dfd62c43b5f
|
7
|
+
data.tar.gz: dc88b7bc4a28e96725bcd2caf0e40659bca94a40ab1c50432af0eee2b649f4a48356e858a7f9ceca1a8a18f99c823bf9ffc4c7cecefecb9820d8ce7621c7c51b
|
@@ -134,7 +134,8 @@ module OodCore
|
|
134
134
|
def after_script
|
135
135
|
websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
|
136
136
|
websockify_hb = context.fetch(:websockify_heartbeat_seconds, "${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}").to_s
|
137
|
-
|
137
|
+
websockify_timeout_seconds = context.fetch(:websockify_timeout_seconds, '${WEBSOCKIFY_TIMEOUT_SECONDS:-10}').to_s
|
138
|
+
|
138
139
|
<<-EOT.gsub(/^ {14}/, "")
|
139
140
|
#{super}
|
140
141
|
|
@@ -146,6 +147,7 @@ module OodCore
|
|
146
147
|
#{websockify_cmd} $1 --heartbeat=#{websockify_hb} $2 &> $log_file &
|
147
148
|
local ws_pid=$!
|
148
149
|
local counter=0
|
150
|
+
local max_timeout=#{websockify_timeout_seconds}
|
149
151
|
|
150
152
|
# wait till websockify has successfully started
|
151
153
|
echo "[websockify]: pid: $ws_pid (proxying $1 ==> $2)" >&2
|
@@ -156,9 +158,9 @@ module OodCore
|
|
156
158
|
if ! ps $ws_pid > /dev/null; then
|
157
159
|
echo "[websockify]: failed to launch!" >&2
|
158
160
|
return 1
|
159
|
-
elif [ $counter -ge
|
160
|
-
# timeout after
|
161
|
-
echo "[websockify]: timed-out :(!" >&2
|
161
|
+
elif [ $counter -ge $max_timeout ]; then
|
162
|
+
# timeout after max_timeout seconds
|
163
|
+
echo "[websockify]: timed-out after $max_timeout seconds :(!" >&2
|
162
164
|
return 1
|
163
165
|
else
|
164
166
|
sleep 1
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -213,6 +213,14 @@ module OodCore
|
|
213
213
|
def queues
|
214
214
|
[]
|
215
215
|
end
|
216
|
+
|
217
|
+
# Return the list of nodes for this scheduler.
|
218
|
+
#
|
219
|
+
# Subclasses that do not implement this will return empty arrays.
|
220
|
+
# @return [Array<NodeInfo>]
|
221
|
+
def nodes
|
222
|
+
[]
|
223
|
+
end
|
216
224
|
end
|
217
225
|
end
|
218
226
|
end
|
@@ -41,7 +41,7 @@ module OodCore
|
|
41
41
|
# calculated from gres string
|
42
42
|
# @return [Integer] the number of gpus in gres
|
43
43
|
def self.gpus_from_gres(gres)
|
44
|
-
gres.to_s.scan(/gpu
|
44
|
+
gres.to_s.scan(/gpu[^(,]*[:=](\d+)/).flatten.map(&:to_i).sum
|
45
45
|
end
|
46
46
|
|
47
47
|
# Object used for simplified communication with a Slurm batch server
|
@@ -169,6 +169,7 @@ module OodCore
|
|
169
169
|
# jobs << job
|
170
170
|
#
|
171
171
|
# assuming keys and values are same length! if not we have an error!
|
172
|
+
line = line.encode('UTF-8', invalid: :replace, undef: :replace)
|
172
173
|
values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
|
173
174
|
jobs << Hash[fields.keys.zip(values)] unless values.empty?
|
174
175
|
end
|
@@ -327,13 +328,37 @@ module OodCore
|
|
327
328
|
|
328
329
|
[].tap do |ret_arr|
|
329
330
|
info_raw.each_line do |line|
|
330
|
-
ret_arr <<
|
331
|
+
ret_arr << str_to_queue_info(line)
|
331
332
|
end
|
332
333
|
end
|
333
334
|
end
|
334
335
|
|
336
|
+
def all_sinfo_node_fields
|
337
|
+
{
|
338
|
+
procs: '%c',
|
339
|
+
name: '%n',
|
340
|
+
features: '%f'
|
341
|
+
}
|
342
|
+
end
|
343
|
+
|
344
|
+
def nodes
|
345
|
+
args = all_sinfo_node_fields.values.join(UNIT_SEPARATOR)
|
346
|
+
output = call('sinfo', '-ho', "#{RECORD_SEPARATOR}#{args}")
|
347
|
+
|
348
|
+
output.each_line(RECORD_SEPARATOR).map do |line|
|
349
|
+
values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
|
350
|
+
|
351
|
+
next if values.empty?
|
352
|
+
|
353
|
+
data = Hash[all_sinfo_node_fields.keys.zip(values)]
|
354
|
+
data[:name] = data[:name].to_s.split(',').first
|
355
|
+
data[:features] = data[:features].to_s.split(',')
|
356
|
+
NodeInfo.new(**data)
|
357
|
+
end.compact
|
358
|
+
end
|
359
|
+
|
335
360
|
private
|
336
|
-
def
|
361
|
+
def str_to_queue_info(line)
|
337
362
|
hsh = line.split(' ').map do |token|
|
338
363
|
m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
|
339
364
|
[m[:key], m[:value]]
|
@@ -349,6 +374,7 @@ module OodCore
|
|
349
374
|
|
350
375
|
|
351
376
|
hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
|
377
|
+
hsh[:tres] = hsh[:TRES].nil? ? {} : hsh[:TRES].to_s.split(',').map { |str| str.split('=') }.to_h
|
352
378
|
|
353
379
|
OodCore::Job::QueueInfo.new(**hsh)
|
354
380
|
end
|
@@ -669,6 +695,10 @@ module OodCore
|
|
669
695
|
@slurm.queues
|
670
696
|
end
|
671
697
|
|
698
|
+
def nodes
|
699
|
+
@slurm.nodes
|
700
|
+
end
|
701
|
+
|
672
702
|
private
|
673
703
|
# Convert duration to seconds
|
674
704
|
def duration_in_seconds(time)
|
@@ -204,7 +204,7 @@ class OodCore::Job::Adapters::LinuxSystemd::Launcher
|
|
204
204
|
|
205
205
|
# List all Systemd sessions on destination_host started by this adapter
|
206
206
|
def list_remote_systemd_session(destination_host)
|
207
|
-
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}
|
207
|
+
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-\\*"])
|
208
208
|
|
209
209
|
# individual units are separated with an empty line
|
210
210
|
call(*cmd).split("\n\n").map do |oneunit|
|
@@ -10,17 +10,26 @@ module OodCore
|
|
10
10
|
# @return [Integer, nil] number of procs
|
11
11
|
attr_reader :procs
|
12
12
|
|
13
|
+
# The features associated with this node.
|
14
|
+
# @return [Array<String>, []]
|
15
|
+
attr_reader :features
|
16
|
+
|
13
17
|
# @param name [#to_s] node name
|
14
18
|
# @param procs [#to_i, nil] number of procs
|
15
|
-
|
19
|
+
# @param features [#to_a, []] list of features
|
20
|
+
def initialize(name:, procs: nil, features: [], **_)
|
16
21
|
@name = name.to_s
|
17
22
|
@procs = procs && procs.to_i
|
23
|
+
@features = features.to_a
|
18
24
|
end
|
19
25
|
|
20
26
|
# Convert object to hash
|
21
27
|
# @return [Hash] object as hash
|
22
28
|
def to_h
|
23
|
-
|
29
|
+
instance_variables.map do |var|
|
30
|
+
name = var.to_s.gsub('@', '').to_sym
|
31
|
+
[name, send(name)]
|
32
|
+
end.to_h
|
24
33
|
end
|
25
34
|
|
26
35
|
# The comparison operator
|
@@ -20,9 +20,13 @@ class OodCore::Job::QueueInfo
|
|
20
20
|
# The accounts that are not allowed to use this queue.
|
21
21
|
attr_reader :deny_accounts
|
22
22
|
|
23
|
+
# An Hash of Trackable Resources and their values.
|
24
|
+
attr_reader :tres
|
25
|
+
|
23
26
|
def initialize(**opts)
|
24
27
|
@name = opts.fetch(:name, 'unknown')
|
25
28
|
@qos = opts.fetch(:qos, [])
|
29
|
+
@tres = opts.fetch(:tres, {})
|
26
30
|
|
27
31
|
allow_accounts = opts.fetch(:allow_accounts, nil)
|
28
32
|
@allow_accounts = if allow_accounts.nil?
|
@@ -42,4 +46,8 @@ class OodCore::Job::QueueInfo
|
|
42
46
|
[name, send(name)]
|
43
47
|
end.to_h
|
44
48
|
end
|
49
|
+
|
50
|
+
def gpu?
|
51
|
+
tres.keys.any? { |name| name.to_s.match?(%r{^gres/gpu($|:)}i) }
|
52
|
+
end
|
45
53
|
end
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = ">= 2.5.0"
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
|
-
spec.add_runtime_dependency "ffi", "~> 1.
|
26
|
+
spec.add_runtime_dependency "ffi", "~> 1.16.3"
|
27
27
|
spec.add_runtime_dependency "rexml", "~> 3.2"
|
28
28
|
spec.add_development_dependency "bundler", "~> 2.1"
|
29
|
-
spec.add_development_dependency "rake", "~> 13.
|
29
|
+
spec.add_development_dependency "rake", "~> 13.2.0"
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
32
32
|
spec.add_development_dependency "timecop", "~> 0.8"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2024-
|
13
|
+
date: 2024-07-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -32,20 +32,14 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
36
|
-
- - ">="
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
version: 1.9.6
|
35
|
+
version: 1.16.3
|
39
36
|
type: :runtime
|
40
37
|
prerelease: false
|
41
38
|
version_requirements: !ruby/object:Gem::Requirement
|
42
39
|
requirements:
|
43
40
|
- - "~>"
|
44
41
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
-
- - ">="
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: 1.9.6
|
42
|
+
version: 1.16.3
|
49
43
|
- !ruby/object:Gem::Dependency
|
50
44
|
name: rexml
|
51
45
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,14 +74,14 @@ dependencies:
|
|
80
74
|
requirements:
|
81
75
|
- - "~>"
|
82
76
|
- !ruby/object:Gem::Version
|
83
|
-
version: 13.
|
77
|
+
version: 13.2.0
|
84
78
|
type: :development
|
85
79
|
prerelease: false
|
86
80
|
version_requirements: !ruby/object:Gem::Requirement
|
87
81
|
requirements:
|
88
82
|
- - "~>"
|
89
83
|
- !ruby/object:Gem::Version
|
90
|
-
version: 13.
|
84
|
+
version: 13.2.0
|
91
85
|
- !ruby/object:Gem::Dependency
|
92
86
|
name: rspec
|
93
87
|
requirement: !ruby/object:Gem::Requirement
|