ood_core 0.25.0 → 0.26.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ood_core/batch_connect/templates/vnc.rb +6 -4
- data/lib/ood_core/job/adapter.rb +8 -0
- data/lib/ood_core/job/adapters/slurm.rb +33 -3
- data/lib/ood_core/job/adapters/systemd/launcher.rb +1 -1
- data/lib/ood_core/job/node_info.rb +11 -2
- data/lib/ood_core/job/queue_info.rb +8 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -2
- metadata +6 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ef6cd5a8a206393dcaed9d0121e3646dd6fc3d2b0405992c3fcfa5a745e9489
|
4
|
+
data.tar.gz: 16a5cf6f03ed2be4c563dafb7eaa9e2a04d465a4d8a3cea5d3bb14b634d65868
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 563930fc05d105b46fa1f8942294a06b3fd3ffce665ba6aa93fae3a21759613fdf229c9cca4f5ec8bd99c7b57cbb9e7e4348420de47d4d0884a78dfd62c43b5f
|
7
|
+
data.tar.gz: dc88b7bc4a28e96725bcd2caf0e40659bca94a40ab1c50432af0eee2b649f4a48356e858a7f9ceca1a8a18f99c823bf9ffc4c7cecefecb9820d8ce7621c7c51b
|
@@ -134,7 +134,8 @@ module OodCore
|
|
134
134
|
def after_script
|
135
135
|
websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
|
136
136
|
websockify_hb = context.fetch(:websockify_heartbeat_seconds, "${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}").to_s
|
137
|
-
|
137
|
+
websockify_timeout_seconds = context.fetch(:websockify_timeout_seconds, '${WEBSOCKIFY_TIMEOUT_SECONDS:-10}').to_s
|
138
|
+
|
138
139
|
<<-EOT.gsub(/^ {14}/, "")
|
139
140
|
#{super}
|
140
141
|
|
@@ -146,6 +147,7 @@ module OodCore
|
|
146
147
|
#{websockify_cmd} $1 --heartbeat=#{websockify_hb} $2 &> $log_file &
|
147
148
|
local ws_pid=$!
|
148
149
|
local counter=0
|
150
|
+
local max_timeout=#{websockify_timeout_seconds}
|
149
151
|
|
150
152
|
# wait till websockify has successfully started
|
151
153
|
echo "[websockify]: pid: $ws_pid (proxying $1 ==> $2)" >&2
|
@@ -156,9 +158,9 @@ module OodCore
|
|
156
158
|
if ! ps $ws_pid > /dev/null; then
|
157
159
|
echo "[websockify]: failed to launch!" >&2
|
158
160
|
return 1
|
159
|
-
elif [ $counter -ge
|
160
|
-
# timeout after
|
161
|
-
echo "[websockify]: timed-out :(!" >&2
|
161
|
+
elif [ $counter -ge $max_timeout ]; then
|
162
|
+
# timeout after max_timeout seconds
|
163
|
+
echo "[websockify]: timed-out after $max_timeout seconds :(!" >&2
|
162
164
|
return 1
|
163
165
|
else
|
164
166
|
sleep 1
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -213,6 +213,14 @@ module OodCore
|
|
213
213
|
def queues
|
214
214
|
[]
|
215
215
|
end
|
216
|
+
|
217
|
+
# Return the list of nodes for this scheduler.
|
218
|
+
#
|
219
|
+
# Subclasses that do not implement this will return empty arrays.
|
220
|
+
# @return [Array<NodeInfo>]
|
221
|
+
def nodes
|
222
|
+
[]
|
223
|
+
end
|
216
224
|
end
|
217
225
|
end
|
218
226
|
end
|
@@ -41,7 +41,7 @@ module OodCore
|
|
41
41
|
# calculated from gres string
|
42
42
|
# @return [Integer] the number of gpus in gres
|
43
43
|
def self.gpus_from_gres(gres)
|
44
|
-
gres.to_s.scan(/gpu
|
44
|
+
gres.to_s.scan(/gpu[^(,]*[:=](\d+)/).flatten.map(&:to_i).sum
|
45
45
|
end
|
46
46
|
|
47
47
|
# Object used for simplified communication with a Slurm batch server
|
@@ -169,6 +169,7 @@ module OodCore
|
|
169
169
|
# jobs << job
|
170
170
|
#
|
171
171
|
# assuming keys and values are same length! if not we have an error!
|
172
|
+
line = line.encode('UTF-8', invalid: :replace, undef: :replace)
|
172
173
|
values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
|
173
174
|
jobs << Hash[fields.keys.zip(values)] unless values.empty?
|
174
175
|
end
|
@@ -327,13 +328,37 @@ module OodCore
|
|
327
328
|
|
328
329
|
[].tap do |ret_arr|
|
329
330
|
info_raw.each_line do |line|
|
330
|
-
ret_arr <<
|
331
|
+
ret_arr << str_to_queue_info(line)
|
331
332
|
end
|
332
333
|
end
|
333
334
|
end
|
334
335
|
|
336
|
+
def all_sinfo_node_fields
|
337
|
+
{
|
338
|
+
procs: '%c',
|
339
|
+
name: '%n',
|
340
|
+
features: '%f'
|
341
|
+
}
|
342
|
+
end
|
343
|
+
|
344
|
+
def nodes
|
345
|
+
args = all_sinfo_node_fields.values.join(UNIT_SEPARATOR)
|
346
|
+
output = call('sinfo', '-ho', "#{RECORD_SEPARATOR}#{args}")
|
347
|
+
|
348
|
+
output.each_line(RECORD_SEPARATOR).map do |line|
|
349
|
+
values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
|
350
|
+
|
351
|
+
next if values.empty?
|
352
|
+
|
353
|
+
data = Hash[all_sinfo_node_fields.keys.zip(values)]
|
354
|
+
data[:name] = data[:name].to_s.split(',').first
|
355
|
+
data[:features] = data[:features].to_s.split(',')
|
356
|
+
NodeInfo.new(**data)
|
357
|
+
end.compact
|
358
|
+
end
|
359
|
+
|
335
360
|
private
|
336
|
-
def
|
361
|
+
def str_to_queue_info(line)
|
337
362
|
hsh = line.split(' ').map do |token|
|
338
363
|
m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
|
339
364
|
[m[:key], m[:value]]
|
@@ -349,6 +374,7 @@ module OodCore
|
|
349
374
|
|
350
375
|
|
351
376
|
hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
|
377
|
+
hsh[:tres] = hsh[:TRES].nil? ? {} : hsh[:TRES].to_s.split(',').map { |str| str.split('=') }.to_h
|
352
378
|
|
353
379
|
OodCore::Job::QueueInfo.new(**hsh)
|
354
380
|
end
|
@@ -669,6 +695,10 @@ module OodCore
|
|
669
695
|
@slurm.queues
|
670
696
|
end
|
671
697
|
|
698
|
+
def nodes
|
699
|
+
@slurm.nodes
|
700
|
+
end
|
701
|
+
|
672
702
|
private
|
673
703
|
# Convert duration to seconds
|
674
704
|
def duration_in_seconds(time)
|
@@ -204,7 +204,7 @@ class OodCore::Job::Adapters::LinuxSystemd::Launcher
|
|
204
204
|
|
205
205
|
# List all Systemd sessions on destination_host started by this adapter
|
206
206
|
def list_remote_systemd_session(destination_host)
|
207
|
-
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}
|
207
|
+
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-\\*"])
|
208
208
|
|
209
209
|
# individual units are separated with an empty line
|
210
210
|
call(*cmd).split("\n\n").map do |oneunit|
|
@@ -10,17 +10,26 @@ module OodCore
|
|
10
10
|
# @return [Integer, nil] number of procs
|
11
11
|
attr_reader :procs
|
12
12
|
|
13
|
+
# The features associated with this node.
|
14
|
+
# @return [Array<String>, []]
|
15
|
+
attr_reader :features
|
16
|
+
|
13
17
|
# @param name [#to_s] node name
|
14
18
|
# @param procs [#to_i, nil] number of procs
|
15
|
-
|
19
|
+
# @param features [#to_a, []] list of features
|
20
|
+
def initialize(name:, procs: nil, features: [], **_)
|
16
21
|
@name = name.to_s
|
17
22
|
@procs = procs && procs.to_i
|
23
|
+
@features = features.to_a
|
18
24
|
end
|
19
25
|
|
20
26
|
# Convert object to hash
|
21
27
|
# @return [Hash] object as hash
|
22
28
|
def to_h
|
23
|
-
|
29
|
+
instance_variables.map do |var|
|
30
|
+
name = var.to_s.gsub('@', '').to_sym
|
31
|
+
[name, send(name)]
|
32
|
+
end.to_h
|
24
33
|
end
|
25
34
|
|
26
35
|
# The comparison operator
|
@@ -20,9 +20,13 @@ class OodCore::Job::QueueInfo
|
|
20
20
|
# The accounts that are not allowed to use this queue.
|
21
21
|
attr_reader :deny_accounts
|
22
22
|
|
23
|
+
# An Hash of Trackable Resources and their values.
|
24
|
+
attr_reader :tres
|
25
|
+
|
23
26
|
def initialize(**opts)
|
24
27
|
@name = opts.fetch(:name, 'unknown')
|
25
28
|
@qos = opts.fetch(:qos, [])
|
29
|
+
@tres = opts.fetch(:tres, {})
|
26
30
|
|
27
31
|
allow_accounts = opts.fetch(:allow_accounts, nil)
|
28
32
|
@allow_accounts = if allow_accounts.nil?
|
@@ -42,4 +46,8 @@ class OodCore::Job::QueueInfo
|
|
42
46
|
[name, send(name)]
|
43
47
|
end.to_h
|
44
48
|
end
|
49
|
+
|
50
|
+
def gpu?
|
51
|
+
tres.keys.any? { |name| name.to_s.match?(%r{^gres/gpu($|:)}i) }
|
52
|
+
end
|
45
53
|
end
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = ">= 2.5.0"
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
|
-
spec.add_runtime_dependency "ffi", "~> 1.
|
26
|
+
spec.add_runtime_dependency "ffi", "~> 1.16.3"
|
27
27
|
spec.add_runtime_dependency "rexml", "~> 3.2"
|
28
28
|
spec.add_development_dependency "bundler", "~> 2.1"
|
29
|
-
spec.add_development_dependency "rake", "~> 13.
|
29
|
+
spec.add_development_dependency "rake", "~> 13.2.0"
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
32
32
|
spec.add_development_dependency "timecop", "~> 0.8"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2024-
|
13
|
+
date: 2024-07-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -32,20 +32,14 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
36
|
-
- - ">="
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
version: 1.9.6
|
35
|
+
version: 1.16.3
|
39
36
|
type: :runtime
|
40
37
|
prerelease: false
|
41
38
|
version_requirements: !ruby/object:Gem::Requirement
|
42
39
|
requirements:
|
43
40
|
- - "~>"
|
44
41
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
-
- - ">="
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: 1.9.6
|
42
|
+
version: 1.16.3
|
49
43
|
- !ruby/object:Gem::Dependency
|
50
44
|
name: rexml
|
51
45
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,14 +74,14 @@ dependencies:
|
|
80
74
|
requirements:
|
81
75
|
- - "~>"
|
82
76
|
- !ruby/object:Gem::Version
|
83
|
-
version: 13.
|
77
|
+
version: 13.2.0
|
84
78
|
type: :development
|
85
79
|
prerelease: false
|
86
80
|
version_requirements: !ruby/object:Gem::Requirement
|
87
81
|
requirements:
|
88
82
|
- - "~>"
|
89
83
|
- !ruby/object:Gem::Version
|
90
|
-
version: 13.
|
84
|
+
version: 13.2.0
|
91
85
|
- !ruby/object:Gem::Dependency
|
92
86
|
name: rspec
|
93
87
|
requirement: !ruby/object:Gem::Requirement
|