ood_core 0.21.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -1
  3. data/CHANGELOG.md +29 -1
  4. data/docs/.keep +0 -0
  5. data/lib/ood_core/batch_connect/templates/vnc_container.rb +252 -0
  6. data/lib/ood_core/job/account_info.rb +36 -0
  7. data/lib/ood_core/job/adapter.rb +16 -0
  8. data/lib/ood_core/job/adapters/ccq.rb +1 -0
  9. data/lib/ood_core/job/adapters/helper.rb +11 -0
  10. data/lib/ood_core/job/adapters/kubernetes/batch.rb +2 -0
  11. data/lib/ood_core/job/adapters/kubernetes/helper.rb +10 -4
  12. data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +1 -1
  13. data/lib/ood_core/job/adapters/kubernetes/resources.rb +5 -1
  14. data/lib/ood_core/job/adapters/kubernetes.rb +2 -0
  15. data/lib/ood_core/job/adapters/linux_host/launcher.rb +1 -1
  16. data/lib/ood_core/job/adapters/lsf.rb +2 -0
  17. data/lib/ood_core/job/adapters/pbspro.rb +1 -1
  18. data/lib/ood_core/job/adapters/sge/helper.rb +2 -0
  19. data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +0 -1
  20. data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +0 -1
  21. data/lib/ood_core/job/adapters/sge.rb +2 -0
  22. data/lib/ood_core/job/adapters/slurm.rb +64 -1
  23. data/lib/ood_core/job/adapters/systemd.rb +1 -2
  24. data/lib/ood_core/job/adapters/torque/attributes.rb +1 -0
  25. data/lib/ood_core/job/adapters/torque/batch.rb +2 -0
  26. data/lib/ood_core/job/adapters/torque/error.rb +1 -0
  27. data/lib/ood_core/job/adapters/torque/ffi.rb +1 -1
  28. data/lib/ood_core/job/array_ids.rb +9 -9
  29. data/lib/ood_core/job/cluster_info.rb +1 -1
  30. data/lib/ood_core/job/info.rb +1 -1
  31. data/lib/ood_core/job/node_info.rb +1 -1
  32. data/lib/ood_core/job/queue_info.rb +33 -0
  33. data/lib/ood_core/job/status.rb +1 -1
  34. data/lib/ood_core/version.rb +1 -1
  35. data/lib/ood_core.rb +2 -0
  36. data/ood_core.gemspec +1 -1
  37. metadata +8 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d1d489149a451b24284191ba966ef7c5d85f859c939b050d50b6501fd49a4cb
4
- data.tar.gz: 3d438089095a42b66f4edee0d3a6afe683e1d87ebb865d908120b977733c6169
3
+ metadata.gz: 379d230153e94bdbeca027f601afc89a525b328d8e200cc1a6a18754b3370b88
4
+ data.tar.gz: 5544e85c81b6e955a63144a056eee24d6bd5ec062de4d17a535d9df78eb5a817
5
5
  SHA512:
6
- metadata.gz: 4106f8af4babd7ae5cf59e133d42e5d1ecda3c1436727740f91f8b3e8a21112254ec8d35f724e2614cc18b65043944ada99adf6dfdeb0959f618d5c90e8178c0
7
- data.tar.gz: a3caaaf21cc6ee4bd68fea96817f58b61592c93993813f69333e8acdce95df8b0d8b49b169000a50ddcb7bcd6031d151272882e5dc3cd02e09a0595ef1d31116
6
+ metadata.gz: 972cabe2a634f50b730e29d31a54e34c2784ea707fd562b867c8f8731e4d612bd39fd8bcacb84b49585a08131d4f9e3770195f2c1c6036fde52ff4e3d4f7124c
7
+ data.tar.gz: 4a0997de2400017ad8a921358b76341706cd4e58034c881dd8b9dfe63e9d9b7c559c42ed711a13051cd5168dde6703164714d22ee1e8ae2d7980fa33d2da3e4c
data/.gitignore CHANGED
@@ -50,4 +50,8 @@ Gemfile.lock
50
50
  .rvmrc
51
51
 
52
52
  # SSHFS temp files
53
- ._*
53
+ ._*
54
+
55
+ # docs are only held in the gh-pages branch
56
+ /docs/*
57
+ !/docs/.keep
data/CHANGELOG.md CHANGED
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.23.0] - 01-17-2023
11
+
12
+ ### Added
13
+
14
+ - [787](https://github.com/OSC/ood_core/pull/787) added the `queues` API to the adapter class with
15
+ support for Slurm.
16
+ - [783](https://github.com/OSC/ood_core/pull/783) added the `accounts` API to the adapter class with
17
+ support for Slurm.
18
+
19
+ ### Fixed
20
+
21
+ - The linux host adapter now supports apptainer in [788](https://github.com/OSC/ood_core/pull/788).
22
+
23
+
24
+ ## [0.22.0] - 10-31-2022
25
+
26
+ ### Added
27
+
28
+ - Added the `vnc_container` batch connect template in [774](https://github.com/OSC/ood_core/pull/774).
29
+ - https://osc.github.io/ood_core is now updated on every commit to master in [765](https://github.com/OSC/ood_core/pull/765).
30
+
31
+ ### Fixed
32
+
33
+ - Kubernetes can now read mulitple secrets in [778](https://github.com/OSC/ood_core/pull/778).
34
+ - PBSPro correctly reads usernames with periods in them in [780](https://github.com/OSC/ood_core/pull/780).
35
+
10
36
  ## [0.21.0] - 08-01-2022
11
37
 
12
38
  ### Added
@@ -443,7 +469,9 @@ Functionally the same as [0.17.3] but with some CI updates.
443
469
  ### Added
444
470
  - Initial release!
445
471
 
446
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.21.0...HEAD
472
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.23.0...HEAD
473
+ [0.23.0]: https://github.com/OSC/ood_core/compare/v0.22.0...v0.23.0
474
+ [0.22.0]: https://github.com/OSC/ood_core/compare/v0.21.0...v0.22.0
447
475
  [0.21.0]: https://github.com/OSC/ood_core/compare/v0.20.2...v0.21.0
448
476
  [0.20.2]: https://github.com/OSC/ood_core/compare/v0.20.1...v0.20.2
449
477
  [0.20.1]: https://github.com/OSC/ood_core/compare/v0.20.0...v0.20.1
data/docs/.keep ADDED
File without changes
@@ -0,0 +1,252 @@
1
+ require "ood_core/refinements/hash_extensions"
2
+ require "securerandom"
3
+
4
+ module OodCore
5
+ module BatchConnect
6
+ class Factory
7
+ using Refinements::HashExtensions
8
+
9
+ # Build the VNC template from a configuration
10
+ # @param config [#to_h] the configuration for the batch connect template
11
+ def self.build_vnc_container(config)
12
+ context = config.to_h.symbolize_keys.reject { |k, _| k == :template }
13
+
14
+ unless context.key?(:container_path)
15
+ raise JobAdapterError, "You are missing the configuration 'container_path' for a vnc_container template."
16
+ end
17
+
18
+ Templates::VNC_Container.new(context)
19
+ end
20
+ end
21
+
22
+ module Templates
23
+ # A batch connect template that starts up a VNC server within a batch job
24
+ class VNC_Container < Template
25
+ # @param context [#to_h] the context used to render the template
26
+ # @option context [#to_sym, Array<#to_sym>] :conn_params ([]) A list of
27
+ # connection parameters added to the connection file (`:host`,
28
+ # `:port`, `:password`, `:spassword`, `:display` and `:websocket`
29
+ # will always exist)
30
+ # @option context [#to_s] :websockify_cmd
31
+ # ("${WEBSOCKIFY_CMD:-/opt/websockify/run}") the path to the
32
+ # websockify script (assumes you don't modify `:after_script`)
33
+ # @option context [#to_s] :vnc_log ("vnc.log") path to vnc server log
34
+ # file (assumes you don't modify `:before_script` or `:after_script`)
35
+ # @option context [#to_s] :vnc_passwd ("vnc.passwd") path to the file
36
+ # generated that contains the encrypted vnc password (assumes you
37
+ # don't modify `:before_script`)
38
+ # @option context [#to_s] :vnc_args arguments used when starting up the
39
+ # vnc server (overrides any specific vnc argument) (assumes you don't
40
+ # modify `:before_script`)
41
+ # @option context [#to_s] :name ("") name of the vnc server session
42
+ # (not set if blank or `:vnc_args` is set) (assumes you don't modify
43
+ # `:before_script`)
44
+ # @option context [#to_s] :geometry ("") resolution of vnc display (not
45
+ # set if blank or `:vnc_args` is set) (assumes you don't modify
46
+ # `:before_script`)
47
+ # @option context [#to_s] :dpi ("") dpi of vnc display (not set if
48
+ # blank or `:vnc_args` is set) (assumes you don't modify
49
+ # `:before_script`)
50
+ # @option context [#to_s] :fonts ("") command delimited list of fonts
51
+ # available in vnc display (not set if blank or `:vnc_args` is set)
52
+ # (assumes you don't modify `:before_script`)
53
+ # @option context [#to_s] :idle ("") timeout vnc server if no
54
+ # connection in this amount of time in seconds (not set if blank or
55
+ # `:vnc_args` is set) (assumes you don't modify `:before_script`)
56
+ # @option context [#to_s] :extra_args ("") any extra arguments used
57
+ # when initializing the vnc server process (not set if blank or
58
+ # `:vnc_args` is set) (assumes you don't modify `:before_script`)
59
+ # @option context [#to_s] :vnc_clean ("...") script used to clean up
60
+ # any active vnc sessions (assumes you don't modify `:before_script`
61
+ # or `:clean_script`)
62
+ # @option context [#to_s] :container_path ("vnc_container.sif") the path
63
+ # to the container with VNC
64
+ # @option context [#to_s] :container_bindpath ("") paths to bind into
65
+ # the container with VNC
66
+ # @option context [#to_s] :container_module ("singularity") the module
67
+ # that loads Singularity or Apptainer with Lmod. Supports versions (i.e.
68
+ # apptainer/1.10). If Singularity or Apptainer are installed at a
69
+ # system level (i.e., no module loaded to activate), set this to an
70
+ # empty string.
71
+ # @option context [#to_s] :container_command ("singularity") the
72
+ # singularity or apptainer execution command
73
+ # @param instance_name (uuid) a name for the instance
74
+ # @see Template
75
+
76
+ def initialize(context = {})
77
+ @instance_name = SecureRandom.uuid
78
+ super
79
+ end
80
+
81
+ private
82
+ # We need to know the VNC and websockify connection information
83
+ def conn_params
84
+ (super + [:display, :websocket, :spassword, :instance_name]).uniq
85
+ end
86
+
87
+ # Before running the main script, start up a VNC server and record
88
+ # the connection information
89
+ def before_script
90
+ container_path = context.fetch(:container_path, "vnc_container.sif").to_s
91
+ container_bindpath = context.fetch(:container_bindpath, "").to_s
92
+
93
+ <<-EOT.gsub(/^ {14}/, "")
94
+
95
+ # Load #{container_module}
96
+ echo "Loading #{container_module}..."
97
+ module load #{container_module}
98
+ export #{container_command.upcase}_BINDPATH="#{container_bindpath}"
99
+ export INSTANCE_NAME="#{@instance_name}"
100
+ export instance_name="#{@instance_name}"
101
+ echo "Starting instance..."
102
+ #{container_command} instance start #{container_path} #{@instance_name}
103
+
104
+ # Setup one-time use passwords and initialize the VNC password
105
+ function change_passwd () {
106
+ echo "Setting VNC password..."
107
+ password=$(create_passwd "#{password_size}")
108
+ spassword=${spassword:-$(create_passwd "#{password_size}")}
109
+ (
110
+ umask 077
111
+ echo -ne "${password}\\n${spassword}" | #{container_command} exec instance://#{@instance_name} vncpasswd -f > "#{vnc_passwd}"
112
+ )
113
+ }
114
+ change_passwd
115
+
116
+
117
+ # Start up vnc server (if at first you don't succeed, try, try again)
118
+ echo "Starting VNC server..."
119
+ for i in $(seq 1 10); do
120
+ # Clean up any old VNC sessions that weren't cleaned before
121
+ #{vnc_clean}
122
+
123
+ # for turbovnc 3.0 compatability.
124
+ if timeout 2 #{container_command} exec instance://#{@instance_name} vncserver --help 2>&1 | grep 'nohttpd' >/dev/null 2>&1; then
125
+ HTTPD_OPT='-nohttpd'
126
+ fi
127
+
128
+ # Attempt to start VNC server
129
+ VNC_OUT=$(#{container_command} exec instance://#{@instance_name} vncserver -log "#{vnc_log}" -rfbauth "#{vnc_passwd}" $HTTPD_OPT -noxstartup #{vnc_args} 2>&1)
130
+ VNC_PID=$(pgrep -s 0 Xvnc) # the script above will daemonize the Xvnc process
131
+ echo "${VNC_PID}"
132
+ echo "${VNC_OUT}"
133
+
134
+ # Sometimes Xvnc hangs if it fails to find working disaply, we
135
+ # should kill it and try again
136
+ kill -0 ${VNC_PID} 2>/dev/null && [[ "${VNC_OUT}" =~ "Fatal server error" ]] && kill -TERM ${VNC_PID}
137
+
138
+ # Check that Xvnc process is running, if not assume it died and
139
+ # wait some random period of time before restarting
140
+ kill -0 ${VNC_PID} 2>/dev/null || sleep 0.$(random_number 1 9)s
141
+
142
+ # If running, then all is well and break out of loop
143
+ kill -0 ${VNC_PID} 2>/dev/null && break
144
+ done
145
+
146
+ # If we fail to start it after so many tries, then just give up
147
+ kill -0 ${VNC_PID} 2>/dev/null || clean_up 1
148
+
149
+ # Parse output for ports used
150
+ display=$(echo "${VNC_OUT}" | awk -F':' '/^Desktop/{print $NF}')
151
+ port=$((5900+display))
152
+
153
+ echo "Successfully started VNC server on ${host}:${port}..."
154
+
155
+ #{super}
156
+ EOT
157
+ end
158
+
159
+ # Run the script under the VNC server's display
160
+ def run_script
161
+ %(DISPLAY=:${display} #{super})
162
+ end
163
+
164
+ # After startup the main script, scan the VNC server log file for
165
+ # successful connections so that the password can be reset
166
+ def after_script
167
+ websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
168
+
169
+ <<-EOT.gsub(/^ {14}/, "")
170
+ #{super}
171
+
172
+ # Launch websockify websocket server
173
+ module load #{container_module}
174
+ echo "Starting websocket server..."
175
+ websocket=$(find_port)
176
+ #{container_command} exec instance://#{@instance_name} #{websockify_cmd} -D ${websocket} localhost:${port}
177
+
178
+ # Set up background process that scans the log file for successful
179
+ # connections by users, and change the password after every
180
+ # connection
181
+ echo "Scanning VNC log file for user authentications..."
182
+ while read -r line; do
183
+ if [[ ${line} =~ "Full-control authentication enabled for" ]]; then
184
+ change_passwd
185
+ create_yml
186
+ fi
187
+ done < <(tail -f --pid=${SCRIPT_PID} "#{vnc_log}") &
188
+ EOT
189
+ end
190
+
191
+ # Clean up the running VNC server and any other stale VNC servers
192
+ def clean_script
193
+ <<-EOT.gsub(/^ {14}/, "")
194
+ #{super}
195
+ module load #{container_module}
196
+
197
+ #{vnc_clean}
198
+ [[ -n ${display} ]] && vncserver -kill :${display}
199
+ #{container_command} instance stop #{@instance_name}
200
+ EOT
201
+ end
202
+
203
+ # Log file for VNC server
204
+ def vnc_log
205
+ context.fetch(:vnc_log, "vnc.log").to_s
206
+ end
207
+
208
+ # Password file for VNC server
209
+ def vnc_passwd
210
+ context.fetch(:vnc_passwd, "vnc.passwd").to_s
211
+ end
212
+
213
+ def container_module
214
+ context.fetch(:container_module, "singularity").to_s
215
+ end
216
+
217
+ def container_command
218
+ context.fetch(:container_command, "singularity").to_s
219
+ end
220
+
221
+ # Arguments sent to `vncserver` command
222
+ def vnc_args
223
+ context.fetch(:vnc_args) do
224
+ name = context.fetch(:name, "").to_s
225
+ geometry = context.fetch(:geometry, "").to_s
226
+ dpi = context.fetch(:dpi, "").to_s
227
+ fonts = context.fetch(:fonts, "").to_s
228
+ idle = context.fetch(:idle, "").to_s
229
+ extra_args = context.fetch(:extra_args, "").to_s
230
+
231
+ args = []
232
+ args << "-name #{name}" unless name.empty?
233
+ args << "-geometry #{geometry}" unless geometry.empty?
234
+ args << "-dpi #{dpi}" unless dpi.empty?
235
+ args << "-fp #{fonts}" unless fonts.empty?
236
+ args << "-idletimeout #{idle}" unless idle.empty?
237
+ args << extra_args
238
+
239
+ args.join(" ")
240
+ end.to_s
241
+ end
242
+
243
+ # Clean up any stale VNC sessions
244
+ def vnc_clean
245
+ context.fetch(:vnc_clean) do
246
+ %(#{container_command} exec instance://#{@instance_name} vncserver -list | awk '/^:/{system("kill -0 "$2" 2>/dev/null || #{container_command} exec instance://#{@instance_name} vncserver -kill "$1)}')
247
+ end.to_s
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,36 @@
1
+ module OodCore
2
+ module Job
3
+
4
+ class AccountInfo
5
+
6
+ # The name of the account.
7
+ attr_reader :name
8
+ alias to_s name
9
+
10
+ # The QoS values this account can use.
11
+ attr_reader :qos
12
+
13
+ # The cluster this account is associated with.
14
+ attr_reader :cluster
15
+
16
+ # The queue this account can use. nil means there is no queue info
17
+ # for this account.
18
+ attr_reader :queue
19
+
20
+ def initialize(**opts)
21
+ orig_name = opts.fetch(:name, 'unknown')
22
+ @name = OodCore::Job::Adapters::Helper.upcase_accounts? ? orig_name.upcase : orig_name
23
+ @qos = opts.fetch(:qos, [])
24
+ @cluster = opts.fetch(:cluster, nil)
25
+ @queue = opts.fetch(:queue, nil)
26
+ end
27
+
28
+ def to_h
29
+ instance_variables.map do |var|
30
+ name = var.to_s.gsub('@', '').to_sym
31
+ [name, send(name)]
32
+ end.to_h
33
+ end
34
+ end
35
+ end
36
+ end
@@ -197,6 +197,22 @@ module OodCore
197
197
  def job_name_illegal_chars
198
198
  ENV["OOD_JOB_NAME_ILLEGAL_CHARS"].to_s
199
199
  end
200
+
201
+ # Retrieve the accounts available to use for the current user.
202
+ #
203
+ # Subclasses that do not implement this will return empty arrays.
204
+ # @return [Array<AccountInfo>] the accounts available to the user.
205
+ def accounts
206
+ []
207
+ end
208
+
209
+ # Return the list of queues for this scheduler.
210
+ #
211
+ # Subclasses that do not implement this will return empty arrays.
212
+ # @return [Array<QueueInfo>]
213
+ def queues
214
+ []
215
+ end
200
216
  end
201
217
  end
202
218
  end
@@ -23,6 +23,7 @@ module OodCore
23
23
 
24
24
  class PromptError < StandardError; end
25
25
 
26
+ # The adapter class for the Cloudy Cluster product CCQ.
26
27
  class CCQ < Adapter
27
28
  using Refinements::ArrayExtensions
28
29
 
@@ -31,6 +31,17 @@ module OodCore
31
31
 
32
32
  return 'ssh', args + [cmd] + cmd_args
33
33
  end
34
+
35
+ # Determine whether to upcase account strings when returning adapter#accounts
36
+ def self.upcase_accounts?
37
+ env_var = ENV['OOD_UPCASE_ACCOUNTS']
38
+
39
+ if env_var.nil? || env_var.to_s.downcase == 'false'
40
+ false
41
+ else
42
+ true
43
+ end
44
+ end
34
45
  end
35
46
  end
36
47
  end
@@ -1,6 +1,8 @@
1
1
  require "ood_core/refinements/hash_extensions"
2
2
  require "json"
3
3
 
4
+ # Utility class for the Kubernetes adapter to interact
5
+ # with the Kuberenetes APIs.
4
6
  class OodCore::Job::Adapters::Kubernetes::Batch
5
7
 
6
8
  require_relative "helper"
@@ -1,3 +1,5 @@
1
+ # Utility class for the Kubernetes adapter to parse
2
+ # json data into Ruby objects.
1
3
  class OodCore::Job::Adapters::Kubernetes::Helper
2
4
 
3
5
  require_relative 'resources'
@@ -193,10 +195,14 @@ class OodCore::Job::Adapters::Kubernetes::Helper
193
195
  end
194
196
 
195
197
  def secret_info_from_json(json_data)
196
- raw = json_data.dig(:data, :password)
197
- { ood_connection_info: { password: Base64.decode64(raw) } }
198
- rescue
199
- {}
198
+ data = json_data.to_h[:data] || {}
199
+
200
+ info = data.symbolize_keys.each_with_object({}) do |data_kv, hash|
201
+ hash[data_kv[0]] = Base64.decode64(data_kv[1])
202
+ rescue
203
+ next
204
+ end
205
+ { ood_connection_info: info }
200
206
  end
201
207
 
202
208
  def dispatch_time(json_data)
@@ -1,4 +1,4 @@
1
- # An object that describes a submitted kubernetes job with extended information
1
+ # An object that describes a submitted kubernetes job with extended information.
2
2
  class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
3
3
  attr_reader :ood_connection_info
4
4
 
@@ -1,5 +1,5 @@
1
1
  module OodCore::Job::Adapters::Kubernetes::Resources
2
-
2
+ # Utility class for kubernetes configmap objects.
3
3
  class ConfigMap
4
4
  attr_accessor :name, :files
5
5
 
@@ -20,6 +20,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
20
20
  end
21
21
  end
22
22
 
23
+ # Utility class for mounting files in kubernetes configmap objects.
23
24
  class ConfigMapFile
24
25
  attr_accessor :filename, :data, :mount_path, :sub_path, :init_mount_path, :init_sub_path
25
26
 
@@ -33,6 +34,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
33
34
  end
34
35
  end
35
36
 
37
+ # Utility class for kuberenetes probe settings.
36
38
  class TCPProbe
37
39
  attr_accessor :port, :initial_delay_seconds, :failure_threshold, :period_seconds
38
40
 
@@ -54,6 +56,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
54
56
  end
55
57
  end
56
58
 
59
+ # Utility class for kuberenetes container object.
57
60
  class Container
58
61
  attr_accessor :name, :image, :command, :port, :env, :working_dir,
59
62
  :memory_limit, :memory_request, :cpu_limit, :cpu_request,
@@ -106,6 +109,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
106
109
  end
107
110
  end
108
111
 
112
+ # Utility class for kuberenetes podspec object.
109
113
  class PodSpec
110
114
  attr_accessor :container, :init_containers
111
115
  def initialize(container, init_containers: nil)
@@ -13,6 +13,8 @@ module OodCore
13
13
  end
14
14
 
15
15
  module Adapters
16
+
17
+ # The adapter class for Kubernetes.
16
18
  class Kubernetes < Adapter
17
19
 
18
20
  using Refinements::ArrayExtensions
@@ -73,7 +73,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
73
73
  # Get the tmux pane PID for the target session
74
74
  pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
75
75
  # Find the Singularity sinit PID child of the pane process
76
- pane_sinit_pid=$(pstree -p -l "$pane_pid" | egrep -o 'sinit[(][[:digit:]]*|shim-init[(][[:digit:]]*' | grep -o '[[:digit:]]*')
76
+ pane_sinit_pid=$(pstree -p -l "$pane_pid" | egrep -o 'sinit[(][[:digit:]]*|shim-init[(][[:digit:]]|appinit[(][[:digit:]]' | grep -o '[[:digit:]]*')
77
77
  # Kill sinit which stops both Singularity-based processes and the tmux session
78
78
  kill "$pane_sinit_pid"
79
79
  SCRIPT
@@ -22,6 +22,8 @@ module OodCore
22
22
  end
23
23
 
24
24
  module Adapters
25
+
26
+ # The adapter class for the LSF scheduler.
25
27
  class Lsf < Adapter
26
28
  using Refinements::ArrayExtensions
27
29
 
@@ -453,7 +453,7 @@ module OodCore
453
453
 
454
454
  # Parse hash describing PBS Pro job status
455
455
  def parse_job_info(v)
456
- /^(?<job_owner>[\w-]+)@(?<submit_host>.+)$/ =~ v[:Job_Owner]
456
+ /^(?<job_owner>[\w\-.]+)@(?<submit_host>.+)$/ =~ v[:Job_Owner]
457
457
  allocated_nodes = parse_nodes(v[:exec_host] || "")
458
458
  procs = allocated_nodes.inject(0) { |sum, x| sum + x[:procs] }
459
459
  if allocated_nodes.empty? # fill in with requested resources
@@ -1,3 +1,5 @@
1
+ # Utility helper class for the SGE adapter to parse
2
+ # input and generate submission arguements.
1
3
  class OodCore::Job::Adapters::Sge::Helper
2
4
  require 'ood_core/job/adapters/sge'
3
5
 
@@ -16,7 +16,6 @@ require 'ood_core/job/array_ids'
16
16
  # :status
17
17
  # :wallclock_limit
18
18
  # :wallclock_time
19
-
20
19
  class QstatXmlJRListener
21
20
  # [Hash]
22
21
  attr_reader :parsed_job
@@ -13,7 +13,6 @@ require 'date'
13
13
  # :queue_name
14
14
  # :status
15
15
  # :wallclock_limit
16
-
17
16
  class QstatXmlRListener
18
17
  # [Array<Hash>]
19
18
  attr_reader :parsed_jobs
@@ -22,6 +22,8 @@ module OodCore
22
22
  end
23
23
 
24
24
  module Adapters
25
+
26
+ # The adpater class for Grid Engine (GE) flavors like Sun Grid Engine.
25
27
  class Sge < Adapter
26
28
  using Refinements::HashExtensions
27
29
  using Refinements::ArrayExtensions
@@ -1,4 +1,5 @@
1
1
  require "time"
2
+ require 'etc'
2
3
  require "ood_core/refinements/hash_extensions"
3
4
  require "ood_core/refinements/array_extensions"
4
5
  require "ood_core/job/adapters/helper"
@@ -178,6 +179,27 @@ module OodCore
178
179
  return [{ id: id, state: 'undetermined' }]
179
180
  end
180
181
 
182
+ def accounts
183
+ user = Etc.getlogin
184
+ args = ['-nP', 'show', 'users', 'withassoc', 'format=account,cluster,partition,qos', 'where', "user=#{user}"]
185
+
186
+ [].tap do |accts|
187
+ call('sacctmgr', *args).each_line do |line|
188
+ acct, cluster, queue, qos = line.split('|')
189
+ next if acct.nil?
190
+
191
+ args = {
192
+ name: acct,
193
+ qos: qos.to_s.chomp.split(','),
194
+ cluster: cluster,
195
+ queue: queue.empty? ? nil : queue
196
+ }
197
+ info = OodCore::Job::AccountInfo.new(**args) unless acct.nil?
198
+ accts << info unless acct.nil?
199
+ end
200
+ end
201
+ end
202
+
181
203
  def squeue_fields(attrs)
182
204
  if attrs.nil?
183
205
  all_squeue_fields
@@ -300,7 +322,37 @@ module OodCore
300
322
  }
301
323
  end
302
324
 
325
+ def queues
326
+ info_raw = call('scontrol', 'show', 'part', '-o')
327
+
328
+ [].tap do |ret_arr|
329
+ info_raw.each_line do |line|
330
+ ret_arr << str_to_acct_info(line)
331
+ end
332
+ end
333
+ end
334
+
303
335
  private
336
+ def str_to_acct_info(line)
337
+ hsh = line.split(' ').map do |token|
338
+ m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
339
+ [m[:key], m[:value]]
340
+ end.to_h.symbolize_keys
341
+
342
+ hsh[:name] = hsh[:PartitionName]
343
+ hsh[:qos] = hsh[:QoS].to_s == 'N/A' ? [] : hsh[:QoS].to_s.split(',')
344
+ hsh[:allow_accounts] = if hsh[:AllowAccounts].nil? || hsh[:AllowAccounts].to_s == 'ALL'
345
+ nil
346
+ else
347
+ hsh[:AllowAccounts].to_s.split(',')
348
+ end
349
+
350
+
351
+ hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
352
+
353
+ OodCore::Job::QueueInfo.new(**hsh)
354
+ end
355
+
304
356
  # Modify the StringIO instance by advancing past the squeue header
305
357
  #
306
358
  # The first two "records" should always be discarded. Consider the
@@ -325,7 +377,7 @@ module OodCore
325
377
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
326
378
 
327
379
  args = args.map(&:to_s)
328
- args.concat ["-M", cluster] if cluster
380
+ args.concat ["-M", cluster] if cluster && cmd != 'sacctmgr'
329
381
 
330
382
  env = env.to_h
331
383
  env["SLURM_CONF"] = conf.to_s if conf
@@ -483,6 +535,13 @@ module OodCore
483
535
  @slurm.get_cluster_info
484
536
  end
485
537
 
538
+ # Retrieve the accounts available to use for the current user.
539
+ #
540
+ # @return [Array<String>] the accounts available to the user.
541
+ def accounts
542
+ @slurm.accounts
543
+ end
544
+
486
545
  # Retrieve info for all jobs from the resource manager
487
546
  # @raise [JobAdapterError] if something goes wrong getting job info
488
547
  # @return [Array<Info>] information describing submitted jobs
@@ -605,6 +664,10 @@ module OodCore
605
664
  '#SBATCH'
606
665
  end
607
666
 
667
+ def queues
668
+ @slurm.queues
669
+ end
670
+
608
671
  private
609
672
  # Convert duration to seconds
610
673
  def duration_in_seconds(time)
@@ -37,8 +37,7 @@ module OodCore
37
37
  end
38
38
 
39
39
  module Adapters
40
- # An adapter object that describes the communication with a remote host
41
- # for job management.
40
+ # The adapter for using systemd timers as the scheduler.
42
41
  class LinuxSystemd < Adapter
43
42
  using Refinements::ArrayExtensions
44
43
 
@@ -1,3 +1,4 @@
1
+ # Utility class to maintain all the Torque attributes available.
1
2
  class OodCore::Job::Adapters::Torque
2
3
  # Maintains a constant Hash of defined PBS attribute types
3
4
  # Includes:
@@ -1,5 +1,7 @@
1
1
  require 'open3'
2
2
 
3
+ # Utility class for the Torque adapter to communicate with the
4
+ # Torque scheduler.
3
5
  class OodCore::Job::Adapters::Torque
4
6
  # Object used for simplified communication with a batch server
5
7
  class Batch
@@ -1,3 +1,4 @@
1
+ # FFI errors for the Torque adapter.
1
2
  class OodCore::Job::Adapters::Torque::FFI
2
3
  # The root exception class that all PBS-specific exceptions inherit from
3
4
  class Error < StandardError; end
@@ -1,6 +1,6 @@
1
1
  require 'ffi'
2
2
 
3
- # An interface to the C-library of Torque
3
+ # An interface to the C-library of Torque.
4
4
  class OodCore::Job::Adapters::Torque::FFI
5
5
 
6
6
  extend ::FFI::Library
@@ -1,14 +1,14 @@
1
- # Builds a sorted array of job ids given a job array spec string
2
- #
3
- # Job array spec strings:
4
- # 1 Single id
5
- # 1-10 Range
6
- # 1-10:2 Range with step
7
- # 1-10,13 Compound (range with single id)
8
- #
9
- # Note that Ranges are expected to be inclusive
10
1
  module OodCore
11
2
  module Job
3
+ # Builds a sorted array of job ids given a job array spec string
4
+ #
5
+ # Job array spec strings:
6
+ # 1 Single id
7
+ # 1-10 Range
8
+ # 1-10:2 Range with step
9
+ # 1-10,13 Compound (range with single id)
10
+ #
11
+ # Note that Ranges are expected to be inclusive
12
12
  class ArrayIds
13
13
  attr_reader :spec_string
14
14
 
@@ -1,6 +1,6 @@
1
1
  module OodCore
2
2
  module Job
3
- # An object that contains details about the cluster's active and total nodes, processors, and gpus
3
+ # An object that contains details about the cluster's active and total nodes, processors and gpus.
4
4
  class ClusterInfo
5
5
  using Refinements::HashExtensions
6
6
 
@@ -2,7 +2,7 @@ require 'time'
2
2
 
3
3
  module OodCore
4
4
  module Job
5
- # An object that describes a submitted job
5
+ # An object that describes a submitted job.
6
6
  class Info
7
7
  # The identifier of the job
8
8
  # @return [String] job id
@@ -1,6 +1,6 @@
1
1
  module OodCore
2
2
  module Job
3
- # An object that describes the resources used on a specific node
3
+ # An object that describes the resources used on a specific node.
4
4
  class NodeInfo
5
5
  # The name of the host machine
6
6
  # @return [String] node name
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # QueueInfo is information about a given queue on a scheduler.
4
+ class OodCore::Job::QueueInfo
5
+ # The name of the queue.
6
+ attr_reader :name
7
+ alias to_s name
8
+
9
+ # The QoSes associated with this queue
10
+ attr_reader :qos
11
+
12
+ # The accounts that are allowed to use this queue.
13
+ #
14
+ # nil means ALL accounts are allowed.
15
+ attr_reader :allow_accounts
16
+
17
+ # The accounts that are not allowed to use this queue.
18
+ attr_reader :deny_accounts
19
+
20
+ def initialize(**opts)
21
+ @name = opts.fetch(:name, 'unknown')
22
+ @qos = opts.fetch(:qos, [])
23
+ @allow_accounts = opts.fetch(:allow_accounts, nil)
24
+ @deny_accounts = opts.fetch(:deny_accounts, [])
25
+ end
26
+
27
+ def to_h
28
+ instance_variables.map do |var|
29
+ name = var.to_s.gsub('@', '').to_sym
30
+ [name, send(name)]
31
+ end.to_h
32
+ end
33
+ end
@@ -1,6 +1,6 @@
1
1
  module OodCore
2
2
  module Job
3
- # An object that describes the current state of a submitted job
3
+ # An object that describes the current state of a submitted job.
4
4
  class Status
5
5
  class << self
6
6
  # Possible states a submitted job can be in:
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.21.0"
3
+ VERSION = "0.23.0"
4
4
  end
data/lib/ood_core.rb CHANGED
@@ -12,6 +12,8 @@ module OodCore
12
12
  require "ood_core/job/script"
13
13
  require "ood_core/job/info"
14
14
  require "ood_core/job/cluster_info"
15
+ require "ood_core/job/account_info"
16
+ require "ood_core/job/queue_info"
15
17
  require "ood_core/job/status"
16
18
  require "ood_core/job/adapter"
17
19
  require "ood_core/job/factory"
data/ood_core.gemspec CHANGED
@@ -30,5 +30,5 @@ Gem::Specification.new do |spec|
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "pry", "~> 0.10"
32
32
  spec.add_development_dependency "timecop", "~> 0.8"
33
- spec.add_development_dependency "climate_control", "~> 1.1.1"
33
+ spec.add_development_dependency "climate_control", "~> 1.2.0"
34
34
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.21.0
4
+ version: 0.23.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2022-08-01 00:00:00.000000000 Z
13
+ date: 2023-01-18 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -136,14 +136,14 @@ dependencies:
136
136
  requirements:
137
137
  - - "~>"
138
138
  - !ruby/object:Gem::Version
139
- version: 1.1.1
139
+ version: 1.2.0
140
140
  type: :development
141
141
  prerelease: false
142
142
  version_requirements: !ruby/object:Gem::Requirement
143
143
  requirements:
144
144
  - - "~>"
145
145
  - !ruby/object:Gem::Version
146
- version: 1.1.1
146
+ version: 1.2.0
147
147
  description: Open OnDemand core library that provides support for an HPC Center to
148
148
  globally define HPC services that web applications can then take advantage of.
149
149
  email:
@@ -164,6 +164,7 @@ files:
164
164
  - Rakefile
165
165
  - bin/console
166
166
  - bin/setup
167
+ - docs/.keep
167
168
  - lib/ood_core.rb
168
169
  - lib/ood_core/acl/adapter.rb
169
170
  - lib/ood_core/acl/adapters/group.rb
@@ -172,10 +173,12 @@ files:
172
173
  - lib/ood_core/batch_connect/template.rb
173
174
  - lib/ood_core/batch_connect/templates/basic.rb
174
175
  - lib/ood_core/batch_connect/templates/vnc.rb
176
+ - lib/ood_core/batch_connect/templates/vnc_container.rb
175
177
  - lib/ood_core/cluster.rb
176
178
  - lib/ood_core/clusters.rb
177
179
  - lib/ood_core/errors.rb
178
180
  - lib/ood_core/invalid_cluster.rb
181
+ - lib/ood_core/job/account_info.rb
179
182
  - lib/ood_core/job/adapter.rb
180
183
  - lib/ood_core/job/adapters/ccq.rb
181
184
  - lib/ood_core/job/adapters/drmaa.rb
@@ -215,6 +218,7 @@ files:
215
218
  - lib/ood_core/job/factory.rb
216
219
  - lib/ood_core/job/info.rb
217
220
  - lib/ood_core/job/node_info.rb
221
+ - lib/ood_core/job/queue_info.rb
218
222
  - lib/ood_core/job/script.rb
219
223
  - lib/ood_core/job/status.rb
220
224
  - lib/ood_core/job/task.rb