ood_core 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -1
- data/CHANGELOG.md +29 -1
- data/docs/.keep +0 -0
- data/lib/ood_core/batch_connect/templates/vnc_container.rb +252 -0
- data/lib/ood_core/job/account_info.rb +36 -0
- data/lib/ood_core/job/adapter.rb +16 -0
- data/lib/ood_core/job/adapters/ccq.rb +1 -0
- data/lib/ood_core/job/adapters/helper.rb +11 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +2 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +10 -4
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +1 -1
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +5 -1
- data/lib/ood_core/job/adapters/kubernetes.rb +2 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +1 -1
- data/lib/ood_core/job/adapters/lsf.rb +2 -0
- data/lib/ood_core/job/adapters/pbspro.rb +1 -1
- data/lib/ood_core/job/adapters/sge/helper.rb +2 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +0 -1
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +0 -1
- data/lib/ood_core/job/adapters/sge.rb +2 -0
- data/lib/ood_core/job/adapters/slurm.rb +64 -1
- data/lib/ood_core/job/adapters/systemd.rb +1 -2
- data/lib/ood_core/job/adapters/torque/attributes.rb +1 -0
- data/lib/ood_core/job/adapters/torque/batch.rb +2 -0
- data/lib/ood_core/job/adapters/torque/error.rb +1 -0
- data/lib/ood_core/job/adapters/torque/ffi.rb +1 -1
- data/lib/ood_core/job/array_ids.rb +9 -9
- data/lib/ood_core/job/cluster_info.rb +1 -1
- data/lib/ood_core/job/info.rb +1 -1
- data/lib/ood_core/job/node_info.rb +1 -1
- data/lib/ood_core/job/queue_info.rb +33 -0
- data/lib/ood_core/job/status.rb +1 -1
- data/lib/ood_core/version.rb +1 -1
- data/lib/ood_core.rb +2 -0
- data/ood_core.gemspec +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 379d230153e94bdbeca027f601afc89a525b328d8e200cc1a6a18754b3370b88
|
4
|
+
data.tar.gz: 5544e85c81b6e955a63144a056eee24d6bd5ec062de4d17a535d9df78eb5a817
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 972cabe2a634f50b730e29d31a54e34c2784ea707fd562b867c8f8731e4d612bd39fd8bcacb84b49585a08131d4f9e3770195f2c1c6036fde52ff4e3d4f7124c
|
7
|
+
data.tar.gz: 4a0997de2400017ad8a921358b76341706cd4e58034c881dd8b9dfe63e9d9b7c559c42ed711a13051cd5168dde6703164714d22ee1e8ae2d7980fa33d2da3e4c
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.23.0] - 01-17-2023
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- [787](https://github.com/OSC/ood_core/pull/787) added the `queues` API to the adapter class with
|
15
|
+
support for Slurm.
|
16
|
+
- [783](https://github.com/OSC/ood_core/pull/783) added the `accounts` API to the adapter class with
|
17
|
+
support for Slurm.
|
18
|
+
|
19
|
+
### Fixed
|
20
|
+
|
21
|
+
- The linux host adapter now supports apptainer in [788](https://github.com/OSC/ood_core/pull/788).
|
22
|
+
|
23
|
+
|
24
|
+
## [0.22.0] - 10-31-2022
|
25
|
+
|
26
|
+
### Added
|
27
|
+
|
28
|
+
- Added the `vnc_container` batch connect template in [774](https://github.com/OSC/ood_core/pull/774).
|
29
|
+
- https://osc.github.io/ood_core is now updated on every commit to master in [765](https://github.com/OSC/ood_core/pull/765).
|
30
|
+
|
31
|
+
### Fixed
|
32
|
+
|
33
|
+
- Kubernetes can now read mulitple secrets in [778](https://github.com/OSC/ood_core/pull/778).
|
34
|
+
- PBSPro correctly reads usernames with periods in them in [780](https://github.com/OSC/ood_core/pull/780).
|
35
|
+
|
10
36
|
## [0.21.0] - 08-01-2022
|
11
37
|
|
12
38
|
### Added
|
@@ -443,7 +469,9 @@ Functionally the same as [0.17.3] but with some CI updates.
|
|
443
469
|
### Added
|
444
470
|
- Initial release!
|
445
471
|
|
446
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
472
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.23.0...HEAD
|
473
|
+
[0.23.0]: https://github.com/OSC/ood_core/compare/v0.22.0...v0.23.0
|
474
|
+
[0.22.0]: https://github.com/OSC/ood_core/compare/v0.21.0...v0.22.0
|
447
475
|
[0.21.0]: https://github.com/OSC/ood_core/compare/v0.20.2...v0.21.0
|
448
476
|
[0.20.2]: https://github.com/OSC/ood_core/compare/v0.20.1...v0.20.2
|
449
477
|
[0.20.1]: https://github.com/OSC/ood_core/compare/v0.20.0...v0.20.1
|
data/docs/.keep
ADDED
File without changes
|
@@ -0,0 +1,252 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "securerandom"
|
3
|
+
|
4
|
+
module OodCore
|
5
|
+
module BatchConnect
|
6
|
+
class Factory
|
7
|
+
using Refinements::HashExtensions
|
8
|
+
|
9
|
+
# Build the VNC template from a configuration
|
10
|
+
# @param config [#to_h] the configuration for the batch connect template
|
11
|
+
def self.build_vnc_container(config)
|
12
|
+
context = config.to_h.symbolize_keys.reject { |k, _| k == :template }
|
13
|
+
|
14
|
+
unless context.key?(:container_path)
|
15
|
+
raise JobAdapterError, "You are missing the configuration 'container_path' for a vnc_container template."
|
16
|
+
end
|
17
|
+
|
18
|
+
Templates::VNC_Container.new(context)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Templates
|
23
|
+
# A batch connect template that starts up a VNC server within a batch job
|
24
|
+
class VNC_Container < Template
|
25
|
+
# @param context [#to_h] the context used to render the template
|
26
|
+
# @option context [#to_sym, Array<#to_sym>] :conn_params ([]) A list of
|
27
|
+
# connection parameters added to the connection file (`:host`,
|
28
|
+
# `:port`, `:password`, `:spassword`, `:display` and `:websocket`
|
29
|
+
# will always exist)
|
30
|
+
# @option context [#to_s] :websockify_cmd
|
31
|
+
# ("${WEBSOCKIFY_CMD:-/opt/websockify/run}") the path to the
|
32
|
+
# websockify script (assumes you don't modify `:after_script`)
|
33
|
+
# @option context [#to_s] :vnc_log ("vnc.log") path to vnc server log
|
34
|
+
# file (assumes you don't modify `:before_script` or `:after_script`)
|
35
|
+
# @option context [#to_s] :vnc_passwd ("vnc.passwd") path to the file
|
36
|
+
# generated that contains the encrypted vnc password (assumes you
|
37
|
+
# don't modify `:before_script`)
|
38
|
+
# @option context [#to_s] :vnc_args arguments used when starting up the
|
39
|
+
# vnc server (overrides any specific vnc argument) (assumes you don't
|
40
|
+
# modify `:before_script`)
|
41
|
+
# @option context [#to_s] :name ("") name of the vnc server session
|
42
|
+
# (not set if blank or `:vnc_args` is set) (assumes you don't modify
|
43
|
+
# `:before_script`)
|
44
|
+
# @option context [#to_s] :geometry ("") resolution of vnc display (not
|
45
|
+
# set if blank or `:vnc_args` is set) (assumes you don't modify
|
46
|
+
# `:before_script`)
|
47
|
+
# @option context [#to_s] :dpi ("") dpi of vnc display (not set if
|
48
|
+
# blank or `:vnc_args` is set) (assumes you don't modify
|
49
|
+
# `:before_script`)
|
50
|
+
# @option context [#to_s] :fonts ("") command delimited list of fonts
|
51
|
+
# available in vnc display (not set if blank or `:vnc_args` is set)
|
52
|
+
# (assumes you don't modify `:before_script`)
|
53
|
+
# @option context [#to_s] :idle ("") timeout vnc server if no
|
54
|
+
# connection in this amount of time in seconds (not set if blank or
|
55
|
+
# `:vnc_args` is set) (assumes you don't modify `:before_script`)
|
56
|
+
# @option context [#to_s] :extra_args ("") any extra arguments used
|
57
|
+
# when initializing the vnc server process (not set if blank or
|
58
|
+
# `:vnc_args` is set) (assumes you don't modify `:before_script`)
|
59
|
+
# @option context [#to_s] :vnc_clean ("...") script used to clean up
|
60
|
+
# any active vnc sessions (assumes you don't modify `:before_script`
|
61
|
+
# or `:clean_script`)
|
62
|
+
# @option context [#to_s] :container_path ("vnc_container.sif") the path
|
63
|
+
# to the container with VNC
|
64
|
+
# @option context [#to_s] :container_bindpath ("") paths to bind into
|
65
|
+
# the container with VNC
|
66
|
+
# @option context [#to_s] :container_module ("singularity") the module
|
67
|
+
# that loads Singularity or Apptainer with Lmod. Supports versions (i.e.
|
68
|
+
# apptainer/1.10). If Singularity or Apptainer are installed at a
|
69
|
+
# system level (i.e., no module loaded to activate), set this to an
|
70
|
+
# empty string.
|
71
|
+
# @option context [#to_s] :container_command ("singularity") the
|
72
|
+
# singularity or apptainer execution command
|
73
|
+
# @param instance_name (uuid) a name for the instance
|
74
|
+
# @see Template
|
75
|
+
|
76
|
+
def initialize(context = {})
|
77
|
+
@instance_name = SecureRandom.uuid
|
78
|
+
super
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
# We need to know the VNC and websockify connection information
|
83
|
+
def conn_params
|
84
|
+
(super + [:display, :websocket, :spassword, :instance_name]).uniq
|
85
|
+
end
|
86
|
+
|
87
|
+
# Before running the main script, start up a VNC server and record
|
88
|
+
# the connection information
|
89
|
+
def before_script
|
90
|
+
container_path = context.fetch(:container_path, "vnc_container.sif").to_s
|
91
|
+
container_bindpath = context.fetch(:container_bindpath, "").to_s
|
92
|
+
|
93
|
+
<<-EOT.gsub(/^ {14}/, "")
|
94
|
+
|
95
|
+
# Load #{container_module}
|
96
|
+
echo "Loading #{container_module}..."
|
97
|
+
module load #{container_module}
|
98
|
+
export #{container_command.upcase}_BINDPATH="#{container_bindpath}"
|
99
|
+
export INSTANCE_NAME="#{@instance_name}"
|
100
|
+
export instance_name="#{@instance_name}"
|
101
|
+
echo "Starting instance..."
|
102
|
+
#{container_command} instance start #{container_path} #{@instance_name}
|
103
|
+
|
104
|
+
# Setup one-time use passwords and initialize the VNC password
|
105
|
+
function change_passwd () {
|
106
|
+
echo "Setting VNC password..."
|
107
|
+
password=$(create_passwd "#{password_size}")
|
108
|
+
spassword=${spassword:-$(create_passwd "#{password_size}")}
|
109
|
+
(
|
110
|
+
umask 077
|
111
|
+
echo -ne "${password}\\n${spassword}" | #{container_command} exec instance://#{@instance_name} vncpasswd -f > "#{vnc_passwd}"
|
112
|
+
)
|
113
|
+
}
|
114
|
+
change_passwd
|
115
|
+
|
116
|
+
|
117
|
+
# Start up vnc server (if at first you don't succeed, try, try again)
|
118
|
+
echo "Starting VNC server..."
|
119
|
+
for i in $(seq 1 10); do
|
120
|
+
# Clean up any old VNC sessions that weren't cleaned before
|
121
|
+
#{vnc_clean}
|
122
|
+
|
123
|
+
# for turbovnc 3.0 compatability.
|
124
|
+
if timeout 2 #{container_command} exec instance://#{@instance_name} vncserver --help 2>&1 | grep 'nohttpd' >/dev/null 2>&1; then
|
125
|
+
HTTPD_OPT='-nohttpd'
|
126
|
+
fi
|
127
|
+
|
128
|
+
# Attempt to start VNC server
|
129
|
+
VNC_OUT=$(#{container_command} exec instance://#{@instance_name} vncserver -log "#{vnc_log}" -rfbauth "#{vnc_passwd}" $HTTPD_OPT -noxstartup #{vnc_args} 2>&1)
|
130
|
+
VNC_PID=$(pgrep -s 0 Xvnc) # the script above will daemonize the Xvnc process
|
131
|
+
echo "${VNC_PID}"
|
132
|
+
echo "${VNC_OUT}"
|
133
|
+
|
134
|
+
# Sometimes Xvnc hangs if it fails to find working disaply, we
|
135
|
+
# should kill it and try again
|
136
|
+
kill -0 ${VNC_PID} 2>/dev/null && [[ "${VNC_OUT}" =~ "Fatal server error" ]] && kill -TERM ${VNC_PID}
|
137
|
+
|
138
|
+
# Check that Xvnc process is running, if not assume it died and
|
139
|
+
# wait some random period of time before restarting
|
140
|
+
kill -0 ${VNC_PID} 2>/dev/null || sleep 0.$(random_number 1 9)s
|
141
|
+
|
142
|
+
# If running, then all is well and break out of loop
|
143
|
+
kill -0 ${VNC_PID} 2>/dev/null && break
|
144
|
+
done
|
145
|
+
|
146
|
+
# If we fail to start it after so many tries, then just give up
|
147
|
+
kill -0 ${VNC_PID} 2>/dev/null || clean_up 1
|
148
|
+
|
149
|
+
# Parse output for ports used
|
150
|
+
display=$(echo "${VNC_OUT}" | awk -F':' '/^Desktop/{print $NF}')
|
151
|
+
port=$((5900+display))
|
152
|
+
|
153
|
+
echo "Successfully started VNC server on ${host}:${port}..."
|
154
|
+
|
155
|
+
#{super}
|
156
|
+
EOT
|
157
|
+
end
|
158
|
+
|
159
|
+
# Run the script under the VNC server's display
|
160
|
+
def run_script
|
161
|
+
%(DISPLAY=:${display} #{super})
|
162
|
+
end
|
163
|
+
|
164
|
+
# After startup the main script, scan the VNC server log file for
|
165
|
+
# successful connections so that the password can be reset
|
166
|
+
def after_script
|
167
|
+
websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
|
168
|
+
|
169
|
+
<<-EOT.gsub(/^ {14}/, "")
|
170
|
+
#{super}
|
171
|
+
|
172
|
+
# Launch websockify websocket server
|
173
|
+
module load #{container_module}
|
174
|
+
echo "Starting websocket server..."
|
175
|
+
websocket=$(find_port)
|
176
|
+
#{container_command} exec instance://#{@instance_name} #{websockify_cmd} -D ${websocket} localhost:${port}
|
177
|
+
|
178
|
+
# Set up background process that scans the log file for successful
|
179
|
+
# connections by users, and change the password after every
|
180
|
+
# connection
|
181
|
+
echo "Scanning VNC log file for user authentications..."
|
182
|
+
while read -r line; do
|
183
|
+
if [[ ${line} =~ "Full-control authentication enabled for" ]]; then
|
184
|
+
change_passwd
|
185
|
+
create_yml
|
186
|
+
fi
|
187
|
+
done < <(tail -f --pid=${SCRIPT_PID} "#{vnc_log}") &
|
188
|
+
EOT
|
189
|
+
end
|
190
|
+
|
191
|
+
# Clean up the running VNC server and any other stale VNC servers
|
192
|
+
def clean_script
|
193
|
+
<<-EOT.gsub(/^ {14}/, "")
|
194
|
+
#{super}
|
195
|
+
module load #{container_module}
|
196
|
+
|
197
|
+
#{vnc_clean}
|
198
|
+
[[ -n ${display} ]] && vncserver -kill :${display}
|
199
|
+
#{container_command} instance stop #{@instance_name}
|
200
|
+
EOT
|
201
|
+
end
|
202
|
+
|
203
|
+
# Log file for VNC server
|
204
|
+
def vnc_log
|
205
|
+
context.fetch(:vnc_log, "vnc.log").to_s
|
206
|
+
end
|
207
|
+
|
208
|
+
# Password file for VNC server
|
209
|
+
def vnc_passwd
|
210
|
+
context.fetch(:vnc_passwd, "vnc.passwd").to_s
|
211
|
+
end
|
212
|
+
|
213
|
+
def container_module
|
214
|
+
context.fetch(:container_module, "singularity").to_s
|
215
|
+
end
|
216
|
+
|
217
|
+
def container_command
|
218
|
+
context.fetch(:container_command, "singularity").to_s
|
219
|
+
end
|
220
|
+
|
221
|
+
# Arguments sent to `vncserver` command
|
222
|
+
def vnc_args
|
223
|
+
context.fetch(:vnc_args) do
|
224
|
+
name = context.fetch(:name, "").to_s
|
225
|
+
geometry = context.fetch(:geometry, "").to_s
|
226
|
+
dpi = context.fetch(:dpi, "").to_s
|
227
|
+
fonts = context.fetch(:fonts, "").to_s
|
228
|
+
idle = context.fetch(:idle, "").to_s
|
229
|
+
extra_args = context.fetch(:extra_args, "").to_s
|
230
|
+
|
231
|
+
args = []
|
232
|
+
args << "-name #{name}" unless name.empty?
|
233
|
+
args << "-geometry #{geometry}" unless geometry.empty?
|
234
|
+
args << "-dpi #{dpi}" unless dpi.empty?
|
235
|
+
args << "-fp #{fonts}" unless fonts.empty?
|
236
|
+
args << "-idletimeout #{idle}" unless idle.empty?
|
237
|
+
args << extra_args
|
238
|
+
|
239
|
+
args.join(" ")
|
240
|
+
end.to_s
|
241
|
+
end
|
242
|
+
|
243
|
+
# Clean up any stale VNC sessions
|
244
|
+
def vnc_clean
|
245
|
+
context.fetch(:vnc_clean) do
|
246
|
+
%(#{container_command} exec instance://#{@instance_name} vncserver -list | awk '/^:/{system("kill -0 "$2" 2>/dev/null || #{container_command} exec instance://#{@instance_name} vncserver -kill "$1)}')
|
247
|
+
end.to_s
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
|
4
|
+
class AccountInfo
|
5
|
+
|
6
|
+
# The name of the account.
|
7
|
+
attr_reader :name
|
8
|
+
alias to_s name
|
9
|
+
|
10
|
+
# The QoS values this account can use.
|
11
|
+
attr_reader :qos
|
12
|
+
|
13
|
+
# The cluster this account is associated with.
|
14
|
+
attr_reader :cluster
|
15
|
+
|
16
|
+
# The queue this account can use. nil means there is no queue info
|
17
|
+
# for this account.
|
18
|
+
attr_reader :queue
|
19
|
+
|
20
|
+
def initialize(**opts)
|
21
|
+
orig_name = opts.fetch(:name, 'unknown')
|
22
|
+
@name = OodCore::Job::Adapters::Helper.upcase_accounts? ? orig_name.upcase : orig_name
|
23
|
+
@qos = opts.fetch(:qos, [])
|
24
|
+
@cluster = opts.fetch(:cluster, nil)
|
25
|
+
@queue = opts.fetch(:queue, nil)
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_h
|
29
|
+
instance_variables.map do |var|
|
30
|
+
name = var.to_s.gsub('@', '').to_sym
|
31
|
+
[name, send(name)]
|
32
|
+
end.to_h
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -197,6 +197,22 @@ module OodCore
|
|
197
197
|
def job_name_illegal_chars
|
198
198
|
ENV["OOD_JOB_NAME_ILLEGAL_CHARS"].to_s
|
199
199
|
end
|
200
|
+
|
201
|
+
# Retrieve the accounts available to use for the current user.
|
202
|
+
#
|
203
|
+
# Subclasses that do not implement this will return empty arrays.
|
204
|
+
# @return [Array<AccountInfo>] the accounts available to the user.
|
205
|
+
def accounts
|
206
|
+
[]
|
207
|
+
end
|
208
|
+
|
209
|
+
# Return the list of queues for this scheduler.
|
210
|
+
#
|
211
|
+
# Subclasses that do not implement this will return empty arrays.
|
212
|
+
# @return [Array<QueueInfo>]
|
213
|
+
def queues
|
214
|
+
[]
|
215
|
+
end
|
200
216
|
end
|
201
217
|
end
|
202
218
|
end
|
@@ -31,6 +31,17 @@ module OodCore
|
|
31
31
|
|
32
32
|
return 'ssh', args + [cmd] + cmd_args
|
33
33
|
end
|
34
|
+
|
35
|
+
# Determine whether to upcase account strings when returning adapter#accounts
|
36
|
+
def self.upcase_accounts?
|
37
|
+
env_var = ENV['OOD_UPCASE_ACCOUNTS']
|
38
|
+
|
39
|
+
if env_var.nil? || env_var.to_s.downcase == 'false'
|
40
|
+
false
|
41
|
+
else
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
34
45
|
end
|
35
46
|
end
|
36
47
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# Utility class for the Kubernetes adapter to parse
|
2
|
+
# json data into Ruby objects.
|
1
3
|
class OodCore::Job::Adapters::Kubernetes::Helper
|
2
4
|
|
3
5
|
require_relative 'resources'
|
@@ -193,10 +195,14 @@ class OodCore::Job::Adapters::Kubernetes::Helper
|
|
193
195
|
end
|
194
196
|
|
195
197
|
def secret_info_from_json(json_data)
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
198
|
+
data = json_data.to_h[:data] || {}
|
199
|
+
|
200
|
+
info = data.symbolize_keys.each_with_object({}) do |data_kv, hash|
|
201
|
+
hash[data_kv[0]] = Base64.decode64(data_kv[1])
|
202
|
+
rescue
|
203
|
+
next
|
204
|
+
end
|
205
|
+
{ ood_connection_info: info }
|
200
206
|
end
|
201
207
|
|
202
208
|
def dispatch_time(json_data)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# An object that describes a submitted kubernetes job with extended information
|
1
|
+
# An object that describes a submitted kubernetes job with extended information.
|
2
2
|
class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
|
3
3
|
attr_reader :ood_connection_info
|
4
4
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module OodCore::Job::Adapters::Kubernetes::Resources
|
2
|
-
|
2
|
+
# Utility class for kubernetes configmap objects.
|
3
3
|
class ConfigMap
|
4
4
|
attr_accessor :name, :files
|
5
5
|
|
@@ -20,6 +20,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
# Utility class for mounting files in kubernetes configmap objects.
|
23
24
|
class ConfigMapFile
|
24
25
|
attr_accessor :filename, :data, :mount_path, :sub_path, :init_mount_path, :init_sub_path
|
25
26
|
|
@@ -33,6 +34,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
|
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
37
|
+
# Utility class for kuberenetes probe settings.
|
36
38
|
class TCPProbe
|
37
39
|
attr_accessor :port, :initial_delay_seconds, :failure_threshold, :period_seconds
|
38
40
|
|
@@ -54,6 +56,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
|
|
54
56
|
end
|
55
57
|
end
|
56
58
|
|
59
|
+
# Utility class for kuberenetes container object.
|
57
60
|
class Container
|
58
61
|
attr_accessor :name, :image, :command, :port, :env, :working_dir,
|
59
62
|
:memory_limit, :memory_request, :cpu_limit, :cpu_request,
|
@@ -106,6 +109,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
|
|
106
109
|
end
|
107
110
|
end
|
108
111
|
|
112
|
+
# Utility class for kuberenetes podspec object.
|
109
113
|
class PodSpec
|
110
114
|
attr_accessor :container, :init_containers
|
111
115
|
def initialize(container, init_containers: nil)
|
@@ -73,7 +73,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
73
73
|
# Get the tmux pane PID for the target session
|
74
74
|
pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
|
75
75
|
# Find the Singularity sinit PID child of the pane process
|
76
|
-
pane_sinit_pid=$(pstree -p -l "$pane_pid" | egrep -o 'sinit[(][[:digit:]]*|shim-init[(][[:digit:]]
|
76
|
+
pane_sinit_pid=$(pstree -p -l "$pane_pid" | egrep -o 'sinit[(][[:digit:]]*|shim-init[(][[:digit:]]|appinit[(][[:digit:]]' | grep -o '[[:digit:]]*')
|
77
77
|
# Kill sinit which stops both Singularity-based processes and the tmux session
|
78
78
|
kill "$pane_sinit_pid"
|
79
79
|
SCRIPT
|
@@ -453,7 +453,7 @@ module OodCore
|
|
453
453
|
|
454
454
|
# Parse hash describing PBS Pro job status
|
455
455
|
def parse_job_info(v)
|
456
|
-
/^(?<job_owner>[\w
|
456
|
+
/^(?<job_owner>[\w\-.]+)@(?<submit_host>.+)$/ =~ v[:Job_Owner]
|
457
457
|
allocated_nodes = parse_nodes(v[:exec_host] || "")
|
458
458
|
procs = allocated_nodes.inject(0) { |sum, x| sum + x[:procs] }
|
459
459
|
if allocated_nodes.empty? # fill in with requested resources
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "time"
|
2
|
+
require 'etc'
|
2
3
|
require "ood_core/refinements/hash_extensions"
|
3
4
|
require "ood_core/refinements/array_extensions"
|
4
5
|
require "ood_core/job/adapters/helper"
|
@@ -178,6 +179,27 @@ module OodCore
|
|
178
179
|
return [{ id: id, state: 'undetermined' }]
|
179
180
|
end
|
180
181
|
|
182
|
+
def accounts
|
183
|
+
user = Etc.getlogin
|
184
|
+
args = ['-nP', 'show', 'users', 'withassoc', 'format=account,cluster,partition,qos', 'where', "user=#{user}"]
|
185
|
+
|
186
|
+
[].tap do |accts|
|
187
|
+
call('sacctmgr', *args).each_line do |line|
|
188
|
+
acct, cluster, queue, qos = line.split('|')
|
189
|
+
next if acct.nil?
|
190
|
+
|
191
|
+
args = {
|
192
|
+
name: acct,
|
193
|
+
qos: qos.to_s.chomp.split(','),
|
194
|
+
cluster: cluster,
|
195
|
+
queue: queue.empty? ? nil : queue
|
196
|
+
}
|
197
|
+
info = OodCore::Job::AccountInfo.new(**args) unless acct.nil?
|
198
|
+
accts << info unless acct.nil?
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
181
203
|
def squeue_fields(attrs)
|
182
204
|
if attrs.nil?
|
183
205
|
all_squeue_fields
|
@@ -300,7 +322,37 @@ module OodCore
|
|
300
322
|
}
|
301
323
|
end
|
302
324
|
|
325
|
+
def queues
|
326
|
+
info_raw = call('scontrol', 'show', 'part', '-o')
|
327
|
+
|
328
|
+
[].tap do |ret_arr|
|
329
|
+
info_raw.each_line do |line|
|
330
|
+
ret_arr << str_to_acct_info(line)
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
303
335
|
private
|
336
|
+
def str_to_acct_info(line)
|
337
|
+
hsh = line.split(' ').map do |token|
|
338
|
+
m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
|
339
|
+
[m[:key], m[:value]]
|
340
|
+
end.to_h.symbolize_keys
|
341
|
+
|
342
|
+
hsh[:name] = hsh[:PartitionName]
|
343
|
+
hsh[:qos] = hsh[:QoS].to_s == 'N/A' ? [] : hsh[:QoS].to_s.split(',')
|
344
|
+
hsh[:allow_accounts] = if hsh[:AllowAccounts].nil? || hsh[:AllowAccounts].to_s == 'ALL'
|
345
|
+
nil
|
346
|
+
else
|
347
|
+
hsh[:AllowAccounts].to_s.split(',')
|
348
|
+
end
|
349
|
+
|
350
|
+
|
351
|
+
hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
|
352
|
+
|
353
|
+
OodCore::Job::QueueInfo.new(**hsh)
|
354
|
+
end
|
355
|
+
|
304
356
|
# Modify the StringIO instance by advancing past the squeue header
|
305
357
|
#
|
306
358
|
# The first two "records" should always be discarded. Consider the
|
@@ -325,7 +377,7 @@ module OodCore
|
|
325
377
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
326
378
|
|
327
379
|
args = args.map(&:to_s)
|
328
|
-
args.concat ["-M", cluster] if cluster
|
380
|
+
args.concat ["-M", cluster] if cluster && cmd != 'sacctmgr'
|
329
381
|
|
330
382
|
env = env.to_h
|
331
383
|
env["SLURM_CONF"] = conf.to_s if conf
|
@@ -483,6 +535,13 @@ module OodCore
|
|
483
535
|
@slurm.get_cluster_info
|
484
536
|
end
|
485
537
|
|
538
|
+
# Retrieve the accounts available to use for the current user.
|
539
|
+
#
|
540
|
+
# @return [Array<String>] the accounts available to the user.
|
541
|
+
def accounts
|
542
|
+
@slurm.accounts
|
543
|
+
end
|
544
|
+
|
486
545
|
# Retrieve info for all jobs from the resource manager
|
487
546
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
488
547
|
# @return [Array<Info>] information describing submitted jobs
|
@@ -605,6 +664,10 @@ module OodCore
|
|
605
664
|
'#SBATCH'
|
606
665
|
end
|
607
666
|
|
667
|
+
def queues
|
668
|
+
@slurm.queues
|
669
|
+
end
|
670
|
+
|
608
671
|
private
|
609
672
|
# Convert duration to seconds
|
610
673
|
def duration_in_seconds(time)
|
@@ -37,8 +37,7 @@ module OodCore
|
|
37
37
|
end
|
38
38
|
|
39
39
|
module Adapters
|
40
|
-
#
|
41
|
-
# for job management.
|
40
|
+
# The adapter for using systemd timers as the scheduler.
|
42
41
|
class LinuxSystemd < Adapter
|
43
42
|
using Refinements::ArrayExtensions
|
44
43
|
|
@@ -1,14 +1,14 @@
|
|
1
|
-
# Builds a sorted array of job ids given a job array spec string
|
2
|
-
#
|
3
|
-
# Job array spec strings:
|
4
|
-
# 1 Single id
|
5
|
-
# 1-10 Range
|
6
|
-
# 1-10:2 Range with step
|
7
|
-
# 1-10,13 Compound (range with single id)
|
8
|
-
#
|
9
|
-
# Note that Ranges are expected to be inclusive
|
10
1
|
module OodCore
|
11
2
|
module Job
|
3
|
+
# Builds a sorted array of job ids given a job array spec string
|
4
|
+
#
|
5
|
+
# Job array spec strings:
|
6
|
+
# 1 Single id
|
7
|
+
# 1-10 Range
|
8
|
+
# 1-10:2 Range with step
|
9
|
+
# 1-10,13 Compound (range with single id)
|
10
|
+
#
|
11
|
+
# Note that Ranges are expected to be inclusive
|
12
12
|
class ArrayIds
|
13
13
|
attr_reader :spec_string
|
14
14
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module OodCore
|
2
2
|
module Job
|
3
|
-
# An object that contains details about the cluster's active and total nodes, processors
|
3
|
+
# An object that contains details about the cluster's active and total nodes, processors and gpus.
|
4
4
|
class ClusterInfo
|
5
5
|
using Refinements::HashExtensions
|
6
6
|
|
data/lib/ood_core/job/info.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# QueueInfo is information about a given queue on a scheduler.
|
4
|
+
class OodCore::Job::QueueInfo
|
5
|
+
# The name of the queue.
|
6
|
+
attr_reader :name
|
7
|
+
alias to_s name
|
8
|
+
|
9
|
+
# The QoSes associated with this queue
|
10
|
+
attr_reader :qos
|
11
|
+
|
12
|
+
# The accounts that are allowed to use this queue.
|
13
|
+
#
|
14
|
+
# nil means ALL accounts are allowed.
|
15
|
+
attr_reader :allow_accounts
|
16
|
+
|
17
|
+
# The accounts that are not allowed to use this queue.
|
18
|
+
attr_reader :deny_accounts
|
19
|
+
|
20
|
+
def initialize(**opts)
|
21
|
+
@name = opts.fetch(:name, 'unknown')
|
22
|
+
@qos = opts.fetch(:qos, [])
|
23
|
+
@allow_accounts = opts.fetch(:allow_accounts, nil)
|
24
|
+
@deny_accounts = opts.fetch(:deny_accounts, [])
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_h
|
28
|
+
instance_variables.map do |var|
|
29
|
+
name = var.to_s.gsub('@', '').to_sym
|
30
|
+
[name, send(name)]
|
31
|
+
end.to_h
|
32
|
+
end
|
33
|
+
end
|
data/lib/ood_core/job/status.rb
CHANGED
data/lib/ood_core/version.rb
CHANGED
data/lib/ood_core.rb
CHANGED
@@ -12,6 +12,8 @@ module OodCore
|
|
12
12
|
require "ood_core/job/script"
|
13
13
|
require "ood_core/job/info"
|
14
14
|
require "ood_core/job/cluster_info"
|
15
|
+
require "ood_core/job/account_info"
|
16
|
+
require "ood_core/job/queue_info"
|
15
17
|
require "ood_core/job/status"
|
16
18
|
require "ood_core/job/adapter"
|
17
19
|
require "ood_core/job/factory"
|
data/ood_core.gemspec
CHANGED
@@ -30,5 +30,5 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
32
32
|
spec.add_development_dependency "timecop", "~> 0.8"
|
33
|
-
spec.add_development_dependency "climate_control", "~> 1.
|
33
|
+
spec.add_development_dependency "climate_control", "~> 1.2.0"
|
34
34
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.23.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2023-01-18 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -136,14 +136,14 @@ dependencies:
|
|
136
136
|
requirements:
|
137
137
|
- - "~>"
|
138
138
|
- !ruby/object:Gem::Version
|
139
|
-
version: 1.
|
139
|
+
version: 1.2.0
|
140
140
|
type: :development
|
141
141
|
prerelease: false
|
142
142
|
version_requirements: !ruby/object:Gem::Requirement
|
143
143
|
requirements:
|
144
144
|
- - "~>"
|
145
145
|
- !ruby/object:Gem::Version
|
146
|
-
version: 1.
|
146
|
+
version: 1.2.0
|
147
147
|
description: Open OnDemand core library that provides support for an HPC Center to
|
148
148
|
globally define HPC services that web applications can then take advantage of.
|
149
149
|
email:
|
@@ -164,6 +164,7 @@ files:
|
|
164
164
|
- Rakefile
|
165
165
|
- bin/console
|
166
166
|
- bin/setup
|
167
|
+
- docs/.keep
|
167
168
|
- lib/ood_core.rb
|
168
169
|
- lib/ood_core/acl/adapter.rb
|
169
170
|
- lib/ood_core/acl/adapters/group.rb
|
@@ -172,10 +173,12 @@ files:
|
|
172
173
|
- lib/ood_core/batch_connect/template.rb
|
173
174
|
- lib/ood_core/batch_connect/templates/basic.rb
|
174
175
|
- lib/ood_core/batch_connect/templates/vnc.rb
|
176
|
+
- lib/ood_core/batch_connect/templates/vnc_container.rb
|
175
177
|
- lib/ood_core/cluster.rb
|
176
178
|
- lib/ood_core/clusters.rb
|
177
179
|
- lib/ood_core/errors.rb
|
178
180
|
- lib/ood_core/invalid_cluster.rb
|
181
|
+
- lib/ood_core/job/account_info.rb
|
179
182
|
- lib/ood_core/job/adapter.rb
|
180
183
|
- lib/ood_core/job/adapters/ccq.rb
|
181
184
|
- lib/ood_core/job/adapters/drmaa.rb
|
@@ -215,6 +218,7 @@ files:
|
|
215
218
|
- lib/ood_core/job/factory.rb
|
216
219
|
- lib/ood_core/job/info.rb
|
217
220
|
- lib/ood_core/job/node_info.rb
|
221
|
+
- lib/ood_core/job/queue_info.rb
|
218
222
|
- lib/ood_core/job/script.rb
|
219
223
|
- lib/ood_core/job/status.rb
|
220
224
|
- lib/ood_core/job/task.rb
|