ood_core 0.24.2 → 0.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/lib/ood_core/batch_connect/templates/vnc.rb +10 -4
- data/lib/ood_core/job/adapter.rb +8 -0
- data/lib/ood_core/job/adapters/slurm.rb +33 -3
- data/lib/ood_core/job/adapters/systemd/launcher.rb +1 -1
- data/lib/ood_core/job/node_info.rb +11 -2
- data/lib/ood_core/job/queue_info.rb +8 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -2
- metadata +6 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ef6cd5a8a206393dcaed9d0121e3646dd6fc3d2b0405992c3fcfa5a745e9489
|
4
|
+
data.tar.gz: 16a5cf6f03ed2be4c563dafb7eaa9e2a04d465a4d8a3cea5d3bb14b634d65868
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 563930fc05d105b46fa1f8942294a06b3fd3ffce665ba6aa93fae3a21759613fdf229c9cca4f5ec8bd99c7b57cbb9e7e4348420de47d4d0884a78dfd62c43b5f
|
7
|
+
data.tar.gz: dc88b7bc4a28e96725bcd2caf0e40659bca94a40ab1c50432af0eee2b649f4a48356e858a7f9ceca1a8a18f99c823bf9ffc4c7cecefecb9820d8ce7621c7c51b
|
data/CHANGELOG.md
CHANGED
@@ -7,10 +7,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
# [0.25.0] - 03-27-2024
|
11
|
+
|
12
|
+
- [828](https://github.com/OSC/ood_core/pull/828) and [826](https://github.com/OSC/ood_core/pull/826)
|
13
|
+
add configurable heartbeats to noVNC connections.
|
14
|
+
|
10
15
|
# [0.24.2] - 01-24-2024
|
11
16
|
|
12
17
|
- [823](https://github.com/OSC/ood_core/pull/823) Corrected a mistake in converting duration to seconds.
|
13
|
-
|
18
|
+
- [821](https://github.com/OSC/ood_core/pull/821) add container_start_args to pass options to the start command.
|
14
19
|
|
15
20
|
## [0.24.1] - 11-29-2023
|
16
21
|
|
@@ -24,6 +24,9 @@ module OodCore
|
|
24
24
|
# @option context [#to_s] :websockify_cmd
|
25
25
|
# ("${WEBSOCKIFY_CMD:-/opt/websockify/run}") the path to the
|
26
26
|
# websockify script (assumes you don't modify `:after_script`)
|
27
|
+
# @option context [#to_s] :websockify_heartbeat_seconds
|
28
|
+
# ("${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}") the websockify heartbeat
|
29
|
+
# duration in seconds. (assumes you don't modify `:after_script`)
|
27
30
|
# @option context [#to_s] :vnc_log ("vnc.log") path to vnc server log
|
28
31
|
# file (assumes you don't modify `:before_script` or `:after_script`)
|
29
32
|
# @option context [#to_s] :vnc_passwd ("vnc.passwd") path to the file
|
@@ -130,6 +133,8 @@ module OodCore
|
|
130
133
|
# successful connections so that the password can be reset
|
131
134
|
def after_script
|
132
135
|
websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
|
136
|
+
websockify_hb = context.fetch(:websockify_heartbeat_seconds, "${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}").to_s
|
137
|
+
websockify_timeout_seconds = context.fetch(:websockify_timeout_seconds, '${WEBSOCKIFY_TIMEOUT_SECONDS:-10}').to_s
|
133
138
|
|
134
139
|
<<-EOT.gsub(/^ {14}/, "")
|
135
140
|
#{super}
|
@@ -139,9 +144,10 @@ module OodCore
|
|
139
144
|
start_websockify() {
|
140
145
|
local log_file="./websockify.log"
|
141
146
|
# launch websockify in background and redirect all output to a file.
|
142
|
-
#{websockify_cmd} $1 $2 &> $log_file &
|
147
|
+
#{websockify_cmd} $1 --heartbeat=#{websockify_hb} $2 &> $log_file &
|
143
148
|
local ws_pid=$!
|
144
149
|
local counter=0
|
150
|
+
local max_timeout=#{websockify_timeout_seconds}
|
145
151
|
|
146
152
|
# wait till websockify has successfully started
|
147
153
|
echo "[websockify]: pid: $ws_pid (proxying $1 ==> $2)" >&2
|
@@ -152,9 +158,9 @@ module OodCore
|
|
152
158
|
if ! ps $ws_pid > /dev/null; then
|
153
159
|
echo "[websockify]: failed to launch!" >&2
|
154
160
|
return 1
|
155
|
-
elif [ $counter -ge
|
156
|
-
# timeout after
|
157
|
-
echo "[websockify]: timed-out :(!" >&2
|
161
|
+
elif [ $counter -ge $max_timeout ]; then
|
162
|
+
# timeout after max_timeout seconds
|
163
|
+
echo "[websockify]: timed-out after $max_timeout seconds :(!" >&2
|
158
164
|
return 1
|
159
165
|
else
|
160
166
|
sleep 1
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -213,6 +213,14 @@ module OodCore
|
|
213
213
|
def queues
|
214
214
|
[]
|
215
215
|
end
|
216
|
+
|
217
|
+
# Return the list of nodes for this scheduler.
|
218
|
+
#
|
219
|
+
# Subclasses that do not implement this will return empty arrays.
|
220
|
+
# @return [Array<NodeInfo>]
|
221
|
+
def nodes
|
222
|
+
[]
|
223
|
+
end
|
216
224
|
end
|
217
225
|
end
|
218
226
|
end
|
@@ -41,7 +41,7 @@ module OodCore
|
|
41
41
|
# calculated from gres string
|
42
42
|
# @return [Integer] the number of gpus in gres
|
43
43
|
def self.gpus_from_gres(gres)
|
44
|
-
gres.to_s.scan(/gpu
|
44
|
+
gres.to_s.scan(/gpu[^(,]*[:=](\d+)/).flatten.map(&:to_i).sum
|
45
45
|
end
|
46
46
|
|
47
47
|
# Object used for simplified communication with a Slurm batch server
|
@@ -169,6 +169,7 @@ module OodCore
|
|
169
169
|
# jobs << job
|
170
170
|
#
|
171
171
|
# assuming keys and values are same length! if not we have an error!
|
172
|
+
line = line.encode('UTF-8', invalid: :replace, undef: :replace)
|
172
173
|
values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
|
173
174
|
jobs << Hash[fields.keys.zip(values)] unless values.empty?
|
174
175
|
end
|
@@ -327,13 +328,37 @@ module OodCore
|
|
327
328
|
|
328
329
|
[].tap do |ret_arr|
|
329
330
|
info_raw.each_line do |line|
|
330
|
-
ret_arr <<
|
331
|
+
ret_arr << str_to_queue_info(line)
|
331
332
|
end
|
332
333
|
end
|
333
334
|
end
|
334
335
|
|
336
|
+
def all_sinfo_node_fields
|
337
|
+
{
|
338
|
+
procs: '%c',
|
339
|
+
name: '%n',
|
340
|
+
features: '%f'
|
341
|
+
}
|
342
|
+
end
|
343
|
+
|
344
|
+
def nodes
|
345
|
+
args = all_sinfo_node_fields.values.join(UNIT_SEPARATOR)
|
346
|
+
output = call('sinfo', '-ho', "#{RECORD_SEPARATOR}#{args}")
|
347
|
+
|
348
|
+
output.each_line(RECORD_SEPARATOR).map do |line|
|
349
|
+
values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
|
350
|
+
|
351
|
+
next if values.empty?
|
352
|
+
|
353
|
+
data = Hash[all_sinfo_node_fields.keys.zip(values)]
|
354
|
+
data[:name] = data[:name].to_s.split(',').first
|
355
|
+
data[:features] = data[:features].to_s.split(',')
|
356
|
+
NodeInfo.new(**data)
|
357
|
+
end.compact
|
358
|
+
end
|
359
|
+
|
335
360
|
private
|
336
|
-
def
|
361
|
+
def str_to_queue_info(line)
|
337
362
|
hsh = line.split(' ').map do |token|
|
338
363
|
m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
|
339
364
|
[m[:key], m[:value]]
|
@@ -349,6 +374,7 @@ module OodCore
|
|
349
374
|
|
350
375
|
|
351
376
|
hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
|
377
|
+
hsh[:tres] = hsh[:TRES].nil? ? {} : hsh[:TRES].to_s.split(',').map { |str| str.split('=') }.to_h
|
352
378
|
|
353
379
|
OodCore::Job::QueueInfo.new(**hsh)
|
354
380
|
end
|
@@ -669,6 +695,10 @@ module OodCore
|
|
669
695
|
@slurm.queues
|
670
696
|
end
|
671
697
|
|
698
|
+
def nodes
|
699
|
+
@slurm.nodes
|
700
|
+
end
|
701
|
+
|
672
702
|
private
|
673
703
|
# Convert duration to seconds
|
674
704
|
def duration_in_seconds(time)
|
@@ -204,7 +204,7 @@ class OodCore::Job::Adapters::LinuxSystemd::Launcher
|
|
204
204
|
|
205
205
|
# List all Systemd sessions on destination_host started by this adapter
|
206
206
|
def list_remote_systemd_session(destination_host)
|
207
|
-
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}
|
207
|
+
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-\\*"])
|
208
208
|
|
209
209
|
# individual units are separated with an empty line
|
210
210
|
call(*cmd).split("\n\n").map do |oneunit|
|
@@ -10,17 +10,26 @@ module OodCore
|
|
10
10
|
# @return [Integer, nil] number of procs
|
11
11
|
attr_reader :procs
|
12
12
|
|
13
|
+
# The features associated with this node.
|
14
|
+
# @return [Array<String>, []]
|
15
|
+
attr_reader :features
|
16
|
+
|
13
17
|
# @param name [#to_s] node name
|
14
18
|
# @param procs [#to_i, nil] number of procs
|
15
|
-
|
19
|
+
# @param features [#to_a, []] list of features
|
20
|
+
def initialize(name:, procs: nil, features: [], **_)
|
16
21
|
@name = name.to_s
|
17
22
|
@procs = procs && procs.to_i
|
23
|
+
@features = features.to_a
|
18
24
|
end
|
19
25
|
|
20
26
|
# Convert object to hash
|
21
27
|
# @return [Hash] object as hash
|
22
28
|
def to_h
|
23
|
-
|
29
|
+
instance_variables.map do |var|
|
30
|
+
name = var.to_s.gsub('@', '').to_sym
|
31
|
+
[name, send(name)]
|
32
|
+
end.to_h
|
24
33
|
end
|
25
34
|
|
26
35
|
# The comparison operator
|
@@ -20,9 +20,13 @@ class OodCore::Job::QueueInfo
|
|
20
20
|
# The accounts that are not allowed to use this queue.
|
21
21
|
attr_reader :deny_accounts
|
22
22
|
|
23
|
+
# An Hash of Trackable Resources and their values.
|
24
|
+
attr_reader :tres
|
25
|
+
|
23
26
|
def initialize(**opts)
|
24
27
|
@name = opts.fetch(:name, 'unknown')
|
25
28
|
@qos = opts.fetch(:qos, [])
|
29
|
+
@tres = opts.fetch(:tres, {})
|
26
30
|
|
27
31
|
allow_accounts = opts.fetch(:allow_accounts, nil)
|
28
32
|
@allow_accounts = if allow_accounts.nil?
|
@@ -42,4 +46,8 @@ class OodCore::Job::QueueInfo
|
|
42
46
|
[name, send(name)]
|
43
47
|
end.to_h
|
44
48
|
end
|
49
|
+
|
50
|
+
def gpu?
|
51
|
+
tres.keys.any? { |name| name.to_s.match?(%r{^gres/gpu($|:)}i) }
|
52
|
+
end
|
45
53
|
end
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = ">= 2.5.0"
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
|
-
spec.add_runtime_dependency "ffi", "~> 1.
|
26
|
+
spec.add_runtime_dependency "ffi", "~> 1.16.3"
|
27
27
|
spec.add_runtime_dependency "rexml", "~> 3.2"
|
28
28
|
spec.add_development_dependency "bundler", "~> 2.1"
|
29
|
-
spec.add_development_dependency "rake", "~> 13.
|
29
|
+
spec.add_development_dependency "rake", "~> 13.2.0"
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
32
32
|
spec.add_development_dependency "timecop", "~> 0.8"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2024-
|
13
|
+
date: 2024-07-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -32,20 +32,14 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
36
|
-
- - ">="
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
version: 1.9.6
|
35
|
+
version: 1.16.3
|
39
36
|
type: :runtime
|
40
37
|
prerelease: false
|
41
38
|
version_requirements: !ruby/object:Gem::Requirement
|
42
39
|
requirements:
|
43
40
|
- - "~>"
|
44
41
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
-
- - ">="
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: 1.9.6
|
42
|
+
version: 1.16.3
|
49
43
|
- !ruby/object:Gem::Dependency
|
50
44
|
name: rexml
|
51
45
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,14 +74,14 @@ dependencies:
|
|
80
74
|
requirements:
|
81
75
|
- - "~>"
|
82
76
|
- !ruby/object:Gem::Version
|
83
|
-
version: 13.
|
77
|
+
version: 13.2.0
|
84
78
|
type: :development
|
85
79
|
prerelease: false
|
86
80
|
version_requirements: !ruby/object:Gem::Requirement
|
87
81
|
requirements:
|
88
82
|
- - "~>"
|
89
83
|
- !ruby/object:Gem::Version
|
90
|
-
version: 13.
|
84
|
+
version: 13.2.0
|
91
85
|
- !ruby/object:Gem::Dependency
|
92
86
|
name: rspec
|
93
87
|
requirement: !ruby/object:Gem::Requirement
|