ood_core 0.20.2 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c4e013f80e987d4d1cefbc78cc76bcff52e4083e0b84192b42807ae46806946
4
- data.tar.gz: c4a1607904baccc1b063916ecf8e5a9692a9c0102a0d8cda3a9edf0ae760191f
3
+ metadata.gz: b8700bb802df78b66c3bb58ec5464a7f5e54a4dc3ebdc978f9816b8b5e5f7373
4
+ data.tar.gz: 1e2b4ca05369c8072afe8d1069679a8f2744cdcb3ed766924a778e78af65afa2
5
5
  SHA512:
6
- metadata.gz: ab3333366fc7802d59a15dead3b21e863d0017385053eea629a109a076c6e768ed1575378a34e68bb6c163b050be87a9cf323f087d02e4e2be4d349550bf5531
7
- data.tar.gz: 234c13fbbc428717532bd93ba4e977cfd825e480c69daf66c737165ed7c5d8a951c329ced0312d525efc0b70cb4d11234c016c6216c2bb7f74573de854340889
6
+ metadata.gz: a40fc234d2be728b697b9b68884b2286ac68e63ed36eeaa4a48487af99c262fdc1f9b3010554f9e369555734c6d2c0693ca54bc2308c5817f424ec4032759563
7
+ data.tar.gz: 5215f5c924002bfdc40576560898cae49b9a9750ff8d11c87d1e43cd43d0e86327a511ee97eea765a20f8ce6d6903acfaece7b26ad8e7733fd1d3f985b8a97e2
data/.gitignore CHANGED
@@ -50,4 +50,8 @@ Gemfile.lock
50
50
  .rvmrc
51
51
 
52
52
  # SSHFS temp files
53
- ._*
53
+ ._*
54
+
55
+ # docs are only held in the gh-pages branch
56
+ /docs/*
57
+ !/docs/.keep
data/CHANGELOG.md CHANGED
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.22.0] - 10-31-2022
11
+
12
+ ### Added
13
+
14
+ - Added the `vnc_container` batch connect template in [774](https://github.com/OSC/ood_core/pull/774).
15
+ - https://osc.github.io/ood_core is now updated on every commit to master in [765](https://github.com/OSC/ood_core/pull/765).
16
+
17
+ ### Fixed
18
+
19
+ - Kubernetes can now read mulitple secrets in [778](https://github.com/OSC/ood_core/pull/778).
20
+ - PBSPro correctly reads usernames with periods in them in [780](https://github.com/OSC/ood_core/pull/780).
21
+
22
+ ## [0.21.0] - 08-01-2022
23
+
24
+ ### Added
25
+
26
+ - Added the `fujitsu_tcs` adapter in [766](https://github.com/OSC/ood_core/pull/766).
27
+
10
28
  ## [0.20.2] - 07-28-2022
11
29
 
12
30
  - Fixed an issue with Slurm's `cluster_info` in [762](https://github.com/OSC/ood_core/pull/762).
@@ -437,7 +455,9 @@ Functionally the same as [0.17.3] but with some CI updates.
437
455
  ### Added
438
456
  - Initial release!
439
457
 
440
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.20.2...HEAD
458
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.22.0...HEAD
459
+ [0.22.0]: https://github.com/OSC/ood_core/compare/v0.21.0...v0.22.0
460
+ [0.21.0]: https://github.com/OSC/ood_core/compare/v0.20.2...v0.21.0
441
461
  [0.20.2]: https://github.com/OSC/ood_core/compare/v0.20.1...v0.20.2
442
462
  [0.20.1]: https://github.com/OSC/ood_core/compare/v0.20.0...v0.20.1
443
463
  [0.20.0]: https://github.com/OSC/ood_core/compare/v0.19.0...v0.20.0
data/docs/.keep ADDED
File without changes
@@ -0,0 +1,252 @@
1
+ require "ood_core/refinements/hash_extensions"
2
+ require "securerandom"
3
+
4
+ module OodCore
5
+ module BatchConnect
6
+ class Factory
7
+ using Refinements::HashExtensions
8
+
9
+ # Build the VNC template from a configuration
10
+ # @param config [#to_h] the configuration for the batch connect template
11
+ def self.build_vnc_container(config)
12
+ context = config.to_h.symbolize_keys.reject { |k, _| k == :template }
13
+
14
+ unless context.key?(:container_path)
15
+ raise JobAdapterError, "You are missing the configuration 'container_path' for a vnc_container template."
16
+ end
17
+
18
+ Templates::VNC_Container.new(context)
19
+ end
20
+ end
21
+
22
+ module Templates
23
+ # A batch connect template that starts up a VNC server within a batch job
24
+ class VNC_Container < Template
25
+ # @param context [#to_h] the context used to render the template
26
+ # @option context [#to_sym, Array<#to_sym>] :conn_params ([]) A list of
27
+ # connection parameters added to the connection file (`:host`,
28
+ # `:port`, `:password`, `:spassword`, `:display` and `:websocket`
29
+ # will always exist)
30
+ # @option context [#to_s] :websockify_cmd
31
+ # ("${WEBSOCKIFY_CMD:-/opt/websockify/run}") the path to the
32
+ # websockify script (assumes you don't modify `:after_script`)
33
+ # @option context [#to_s] :vnc_log ("vnc.log") path to vnc server log
34
+ # file (assumes you don't modify `:before_script` or `:after_script`)
35
+ # @option context [#to_s] :vnc_passwd ("vnc.passwd") path to the file
36
+ # generated that contains the encrypted vnc password (assumes you
37
+ # don't modify `:before_script`)
38
+ # @option context [#to_s] :vnc_args arguments used when starting up the
39
+ # vnc server (overrides any specific vnc argument) (assumes you don't
40
+ # modify `:before_script`)
41
+ # @option context [#to_s] :name ("") name of the vnc server session
42
+ # (not set if blank or `:vnc_args` is set) (assumes you don't modify
43
+ # `:before_script`)
44
+ # @option context [#to_s] :geometry ("") resolution of vnc display (not
45
+ # set if blank or `:vnc_args` is set) (assumes you don't modify
46
+ # `:before_script`)
47
+ # @option context [#to_s] :dpi ("") dpi of vnc display (not set if
48
+ # blank or `:vnc_args` is set) (assumes you don't modify
49
+ # `:before_script`)
50
+ # @option context [#to_s] :fonts ("") command delimited list of fonts
51
+ # available in vnc display (not set if blank or `:vnc_args` is set)
52
+ # (assumes you don't modify `:before_script`)
53
+ # @option context [#to_s] :idle ("") timeout vnc server if no
54
+ # connection in this amount of time in seconds (not set if blank or
55
+ # `:vnc_args` is set) (assumes you don't modify `:before_script`)
56
+ # @option context [#to_s] :extra_args ("") any extra arguments used
57
+ # when initializing the vnc server process (not set if blank or
58
+ # `:vnc_args` is set) (assumes you don't modify `:before_script`)
59
+ # @option context [#to_s] :vnc_clean ("...") script used to clean up
60
+ # any active vnc sessions (assumes you don't modify `:before_script`
61
+ # or `:clean_script`)
62
+ # @option context [#to_s] :container_path ("vnc_container.sif") the path
63
+ # to the container with VNC
64
+ # @option context [#to_s] :container_bindpath ("") paths to bind into
65
+ # the container with VNC
66
+ # @option context [#to_s] :container_module ("singularity") the module
67
+ # that loads Singularity or Apptainer with Lmod. Supports versions (i.e.
68
+ # apptainer/1.10). If Singularity or Apptainer are installed at a
69
+ # system level (i.e., no module loaded to activate), set this to an
70
+ # empty string.
71
+ # @option context [#to_s] :container_command ("singularity") the
72
+ # singularity or apptainer execution command
73
+ # @param instance_name (uuid) a name for the instance
74
+ # @see Template
75
+
76
+ def initialize(context = {})
77
+ @instance_name = SecureRandom.uuid
78
+ super
79
+ end
80
+
81
+ private
82
+ # We need to know the VNC and websockify connection information
83
+ def conn_params
84
+ (super + [:display, :websocket, :spassword, :instance_name]).uniq
85
+ end
86
+
87
+ # Before running the main script, start up a VNC server and record
88
+ # the connection information
89
+ def before_script
90
+ container_path = context.fetch(:container_path, "vnc_container.sif").to_s
91
+ container_bindpath = context.fetch(:container_bindpath, "").to_s
92
+
93
+ <<-EOT.gsub(/^ {14}/, "")
94
+
95
+ # Load #{container_module}
96
+ echo "Loading #{container_module}..."
97
+ module load #{container_module}
98
+ export #{container_command.upcase}_BINDPATH="#{container_bindpath}"
99
+ export INSTANCE_NAME="#{@instance_name}"
100
+ export instance_name="#{@instance_name}"
101
+ echo "Starting instance..."
102
+ #{container_command} instance start #{container_path} #{@instance_name}
103
+
104
+ # Setup one-time use passwords and initialize the VNC password
105
+ function change_passwd () {
106
+ echo "Setting VNC password..."
107
+ password=$(create_passwd "#{password_size}")
108
+ spassword=${spassword:-$(create_passwd "#{password_size}")}
109
+ (
110
+ umask 077
111
+ echo -ne "${password}\\n${spassword}" | #{container_command} exec instance://#{@instance_name} vncpasswd -f > "#{vnc_passwd}"
112
+ )
113
+ }
114
+ change_passwd
115
+
116
+
117
+ # Start up vnc server (if at first you don't succeed, try, try again)
118
+ echo "Starting VNC server..."
119
+ for i in $(seq 1 10); do
120
+ # Clean up any old VNC sessions that weren't cleaned before
121
+ #{vnc_clean}
122
+
123
+ # for turbovnc 3.0 compatability.
124
+ if timeout 2 #{container_command} exec instance://#{@instance_name} vncserver --help 2>&1 | grep 'nohttpd' >/dev/null 2>&1; then
125
+ HTTPD_OPT='-nohttpd'
126
+ fi
127
+
128
+ # Attempt to start VNC server
129
+ VNC_OUT=$(#{container_command} exec instance://#{@instance_name} vncserver -log "#{vnc_log}" -rfbauth "#{vnc_passwd}" $HTTPD_OPT -noxstartup #{vnc_args} 2>&1)
130
+ VNC_PID=$(pgrep -s 0 Xvnc) # the script above will daemonize the Xvnc process
131
+ echo "${VNC_PID}"
132
+ echo "${VNC_OUT}"
133
+
134
+ # Sometimes Xvnc hangs if it fails to find working disaply, we
135
+ # should kill it and try again
136
+ kill -0 ${VNC_PID} 2>/dev/null && [[ "${VNC_OUT}" =~ "Fatal server error" ]] && kill -TERM ${VNC_PID}
137
+
138
+ # Check that Xvnc process is running, if not assume it died and
139
+ # wait some random period of time before restarting
140
+ kill -0 ${VNC_PID} 2>/dev/null || sleep 0.$(random_number 1 9)s
141
+
142
+ # If running, then all is well and break out of loop
143
+ kill -0 ${VNC_PID} 2>/dev/null && break
144
+ done
145
+
146
+ # If we fail to start it after so many tries, then just give up
147
+ kill -0 ${VNC_PID} 2>/dev/null || clean_up 1
148
+
149
+ # Parse output for ports used
150
+ display=$(echo "${VNC_OUT}" | awk -F':' '/^Desktop/{print $NF}')
151
+ port=$((5900+display))
152
+
153
+ echo "Successfully started VNC server on ${host}:${port}..."
154
+
155
+ #{super}
156
+ EOT
157
+ end
158
+
159
+ # Run the script under the VNC server's display
160
+ def run_script
161
+ %(DISPLAY=:${display} #{super})
162
+ end
163
+
164
+ # After startup the main script, scan the VNC server log file for
165
+ # successful connections so that the password can be reset
166
+ def after_script
167
+ websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
168
+
169
+ <<-EOT.gsub(/^ {14}/, "")
170
+ #{super}
171
+
172
+ # Launch websockify websocket server
173
+ module load #{container_module}
174
+ echo "Starting websocket server..."
175
+ websocket=$(find_port)
176
+ #{container_command} exec instance://#{@instance_name} #{websockify_cmd} -D ${websocket} localhost:${port}
177
+
178
+ # Set up background process that scans the log file for successful
179
+ # connections by users, and change the password after every
180
+ # connection
181
+ echo "Scanning VNC log file for user authentications..."
182
+ while read -r line; do
183
+ if [[ ${line} =~ "Full-control authentication enabled for" ]]; then
184
+ change_passwd
185
+ create_yml
186
+ fi
187
+ done < <(tail -f --pid=${SCRIPT_PID} "#{vnc_log}") &
188
+ EOT
189
+ end
190
+
191
+ # Clean up the running VNC server and any other stale VNC servers
192
+ def clean_script
193
+ <<-EOT.gsub(/^ {14}/, "")
194
+ #{super}
195
+ module load #{container_module}
196
+
197
+ #{vnc_clean}
198
+ [[ -n ${display} ]] && vncserver -kill :${display}
199
+ #{container_command} instance stop #{@instance_name}
200
+ EOT
201
+ end
202
+
203
+ # Log file for VNC server
204
+ def vnc_log
205
+ context.fetch(:vnc_log, "vnc.log").to_s
206
+ end
207
+
208
+ # Password file for VNC server
209
+ def vnc_passwd
210
+ context.fetch(:vnc_passwd, "vnc.passwd").to_s
211
+ end
212
+
213
+ def container_module
214
+ context.fetch(:container_module, "singularity").to_s
215
+ end
216
+
217
+ def container_command
218
+ context.fetch(:container_command, "singularity").to_s
219
+ end
220
+
221
+ # Arguments sent to `vncserver` command
222
+ def vnc_args
223
+ context.fetch(:vnc_args) do
224
+ name = context.fetch(:name, "").to_s
225
+ geometry = context.fetch(:geometry, "").to_s
226
+ dpi = context.fetch(:dpi, "").to_s
227
+ fonts = context.fetch(:fonts, "").to_s
228
+ idle = context.fetch(:idle, "").to_s
229
+ extra_args = context.fetch(:extra_args, "").to_s
230
+
231
+ args = []
232
+ args << "-name #{name}" unless name.empty?
233
+ args << "-geometry #{geometry}" unless geometry.empty?
234
+ args << "-dpi #{dpi}" unless dpi.empty?
235
+ args << "-fp #{fonts}" unless fonts.empty?
236
+ args << "-idletimeout #{idle}" unless idle.empty?
237
+ args << extra_args
238
+
239
+ args.join(" ")
240
+ end.to_s
241
+ end
242
+
243
+ # Clean up any stale VNC sessions
244
+ def vnc_clean
245
+ context.fetch(:vnc_clean) do
246
+ %(#{container_command} exec instance://#{@instance_name} vncserver -list | awk '/^:/{system("kill -0 "$2" 2>/dev/null || #{container_command} exec instance://#{@instance_name} vncserver -kill "$1)}')
247
+ end.to_s
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end
@@ -23,6 +23,7 @@ module OodCore
23
23
 
24
24
  class PromptError < StandardError; end
25
25
 
26
+ # The adapter class for the Cloudy Cluster product CCQ.
26
27
  class CCQ < Adapter
27
28
  using Refinements::ArrayExtensions
28
29
 
@@ -0,0 +1,403 @@
1
+ require "time"
2
+ require "ood_core/refinements/hash_extensions"
3
+ require "ood_core/refinements/array_extensions"
4
+ require "ood_core/job/adapters/helper"
5
+
6
+ module OodCore
7
+ module Job
8
+ class Factory
9
+ using Refinements::HashExtensions
10
+
11
+ # Build the Fujitsu TCS (Technical Computing Suite) adapter from a configuration
12
+ # @param config [#to_h] the configuration for job adapter
13
+ # @option config [Object] :bin (nil) Path to Fujitsu TCS resource manager binaries
14
+ # @option config [#to_h] :bin_overrides ({}) Optional overrides to Fujitsu TCS resource manager executables
15
+ def self.build_fujitsu_tcs(config)
16
+ c = config.to_h.symbolize_keys
17
+ bin = c.fetch(:bin, nil)
18
+ bin_overrides = c.fetch(:bin_overrides, {})
19
+ fujitsu_tcs = Adapters::Fujitsu_TCS::Batch.new(bin: bin, bin_overrides: bin_overrides)
20
+ Adapters::Fujitsu_TCS.new(fujitsu_tcs: fujitsu_tcs)
21
+ end
22
+ end
23
+
24
+ module Adapters
25
+ # An adapter object that describes the communication with a Fujitsu TCS
26
+ # resource manager for job management.
27
+ class Fujitsu_TCS < Adapter
28
+ using Refinements::HashExtensions
29
+ using Refinements::ArrayExtensions
30
+
31
+ # Object used for simplified communication with a Fujitsu TCS batch server
32
+ # @api private
33
+ class Batch
34
+ # The path to the Fujitsu TCS binaries
35
+ # @example
36
+ # my_batch.bin.to_s #=> "/usr/local/fujitsu_tcs/10.0.0/bin"
37
+ # @return [Pathname] path to Fujitsu TCS binaries
38
+ attr_reader :bin
39
+
40
+ # Optional overrides for Fujitsu TCS executables
41
+ # @example
42
+ # {'pjsub' => '/usr/local/bin/pjsub'}
43
+ # @return Hash<String, String>
44
+ attr_reader :bin_overrides
45
+
46
+ # The root exception class that all Fujitsu TCS specific exceptions inherit
47
+ # from
48
+ class Error < StandardError; end
49
+
50
+ # An error indicating the Fujitsu TCS command timed out
51
+ class Fujitsu_TCS_TimeoutError < Error; end
52
+
53
+ # @param bin [#to_s] path to Fujitsu TCS installation binaries
54
+ # @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
55
+ def initialize(bin: nil, bin_overrides: {})
56
+ @bin = Pathname.new(bin.to_s)
57
+ @bin_overrides = bin_overrides
58
+ end
59
+
60
+ # Get a list of hashes detailing each of the jobs on the batch server
61
+ # @example Status info for all jobs
62
+ # my_batch.get_jobs
63
+ # #=>
64
+ # #[
65
+ # # {
66
+ # # :JOB_ID => "123",
67
+ # # :JOB_NAME => "my_job",
68
+ # # ...
69
+ # # },
70
+ # # {
71
+ # # :JOB_ID => "125",
72
+ # # :JOB_NAME => "my_other_job",
73
+ # # ...
74
+ # # },
75
+ # # ...
76
+ # #]
77
+ # @param id [#to_s] the id of the job
78
+ # @param owner [String] the owner(s) of the job
79
+ # @raise [Error] if `pjstat` command exited unsuccessfully
80
+ # @return [Array<Hash>] list of details for jobs
81
+ def get_jobs(id: "", owner: nil)
82
+ args = ["-s", "--data", "--choose=jid,jnam,rscg,st,std,stde,adt,sdt,nnumr,usr,elpl,elp"]
83
+ args.concat ["--filter jid=" + id.to_s] unless id.to_s.empty?
84
+ args.concat ["--filter usr=" + owner.to_s] unless owner.to_s.empty?
85
+
86
+ StringIO.open(call("pjstat", *args)) do |output|
87
+ output.gets() # Skip header
88
+ jobs = []
89
+ output.each_line do |line|
90
+ l = line.split(",")
91
+ jobs << {:JOB_ID => l[1], :JOB_NAME => l[2], :RSC_GRP => l[3].split(" ")[0],
92
+ :ST => l[4], :STD => l[5], :STDE => l[6],
93
+ :ACCEPT => l[7], :START_DATE => l[8], :NODES => l[9].split(":")[0],
94
+ :USER => l[10], :ELAPSE_LIM => l[11], :ELAPSE_TIM => l[12].split(" ")[0] }
95
+ end
96
+ jobs
97
+ end
98
+ rescue Fujitsu_TCS_TimeoutError
99
+ return [{ JOB_ID: id, ST: 'undetermined' }]
100
+ end
101
+
102
+ # Put a specified job on hold
103
+ # @example Put job "1234" on hold
104
+ # my_batch.hold_job("1234")
105
+ # @param id [#to_s] the id of the job
106
+ # @raise [Error] if `pjhold` command exited unsuccessfully
107
+ # @return [void]
108
+ def hold_job(id)
109
+ call("pjhold", id.to_s)
110
+ end
111
+
112
+ # Release a specified job that is on hold
113
+ # @example Release job "1234" from on hold
114
+ # my_batch.release_job("1234")
115
+ # @param id [#to_s] the id of the job
116
+ # @raise [Error] if `pjrls` command exited unsuccessfully
117
+ # @return [void]
118
+ def release_job(id)
119
+ call("pjrls", id.to_s)
120
+ end
121
+
122
+ # Delete a specified job from batch server
123
+ # @example Delete job "1234"
124
+ # my_batch.delete_job("1234")
125
+ # @param id [#to_s] the id of the job
126
+ # @raise [Error] if `pjdel` command exited unsuccessfully
127
+ # @return [void]
128
+ def delete_job(id)
129
+ call("pjdel", id.to_s)
130
+ end
131
+
132
+ # Submit a script expanded as a string to the batch server
133
+ # @param str [#to_s] script as a string
134
+ # @param args [Array<#to_s>] arguments passed to `pjsub` command
135
+ # @raise [Error] if `pjsub` command exited unsuccessfully
136
+ # @return [String] the id of the job that was created
137
+ def submit_string(str, args: [])
138
+ args = args.map(&:to_s)
139
+ call("pjsub", *args, stdin: str.to_s).split(" ")[5]
140
+ end
141
+
142
+ private
143
+ # Call a forked Fujitsu TCS command
144
+ def call(cmd, *args, stdin: "")
145
+ cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
146
+ args = args.map(&:to_s)
147
+ o, e, s = Open3.capture3(cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
148
+ s.success? ? o : raise(Error, e)
149
+ end
150
+ end
151
+
152
+ # Mapping of state codes for Fujitsu TCS resource manager
153
+ STATE_MAP = {
154
+ 'ACC' => :queued, # Accepted job submission
155
+ 'RJT' => :completed, # Rejected job submission
156
+ 'QUE' => :queued, # Waiting for job execution
157
+ 'RNA' => :queued, # Acquiring resources required for job execution
158
+ 'RNP' => :running, # Executing prologue
159
+ 'RUN' => :running, # Executing job
160
+ 'RNE' => :running, # Executing epilogue
161
+ 'RNO' => :running, # Waiting for completion of job termination processing
162
+ 'SPP' => :suspended, # Suspend in progress
163
+ 'SPD' => :suspended, # Suspended
164
+ 'RSM' => :running, # Resume in progress
165
+ 'EXT' => :completed, # Exited job end execution
166
+ 'CCL' => :completed, # Exited job execution by interruption
167
+ 'HLD' => :suspended, # In fixed state due to users
168
+ 'ERR' => :completed, # In fixed state due to an error
169
+ }
170
+
171
+ # @api private
172
+ # @param opts [#to_h] the options defining this adapter
173
+ # @option opts [Batch] :the Fujitsu TCS batch object
174
+ # @see Factory.build_fujitsu_tcs
175
+ def initialize(opts = {})
176
+ o = opts.to_h.symbolize_keys
177
+
178
+ @fujitsu_tcs = o.fetch(:fujitsu_tcs) { raise ArgumentError, "No Fujitsu TCS object specified. Missing argument: fujitsu_tcs" }
179
+ end
180
+
181
+ # Submit a job with the attributes defined in the job template instance
182
+ # @param script [Script] script object that describes the script and
183
+ # attributes for the submitted job
184
+ # @param after [#to_s, Array<#to_s>] this job may be scheduled for
185
+ # execution at any point after dependent jobs have started execution
186
+ # @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
187
+ # execution only after dependent jobs have terminated with no errors
188
+ # @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
189
+ # execution only after dependent jobs have terminated with errors
190
+ # @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
191
+ # execution after dependent jobs have terminated
192
+ # @raise [JobAdapterError] if something goes wrong submitting a job
193
+ # @return [String] the job id returned after successfully submitting a
194
+ # job
195
+ # @see Adapter#submit
196
+ def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
197
+ #after = Array(after).map(&:to_s)
198
+ #afterok = Array(afterok).map(&:to_s)
199
+ #afternotok = Array(afternotok).map(&:to_s)
200
+ #afterany = Array(afterany).map(&:to_s)
201
+ if !after.empty? || !afterok.empty? || !afternotok.empty? || !afterany.empty?
202
+ raise JobAdapterError, "Dependency between jobs has not implemented yet."
203
+ end
204
+
205
+ # Set pjsub options
206
+ args = []
207
+ args.concat (script.rerunnable ? ["--restart"] : ["--norestart"]) unless script.rerunnable.nil?
208
+ args.concat ["--mail-list", script.email.join(",")] unless script.email.nil?
209
+ if script.email_on_started && script.email_on_terminated
210
+ args.concat ["-m", "b,e"]
211
+ elsif script.email_on_started
212
+ args.concat ["-m", "b"]
213
+ elsif script.email_on_terminated
214
+ args.concat ["-m", "e"]
215
+ end
216
+
217
+ args.concat ["-N", script.job_name] unless script.job_name.nil?
218
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
219
+ if script.error_path.nil?
220
+ args.concat ["-j"]
221
+ else
222
+ args.concat ["-e", script.error_path]
223
+ end
224
+ args.concat ["--rscgrp", script.queue_name] unless script.queue_name.nil?
225
+ args.concat ["-p", script.priority] unless script.priority.nil?
226
+ args.concat ["--at", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
227
+ args.concat ["-L \"elapse=" + seconds_to_duration(script.wall_time) + "\""] unless script.wall_time.nil?
228
+ args.concat ["--bulk", "--sparam", script.job_array_request] unless script.job_array_request.nil?
229
+
230
+ # Set environment variables
231
+ envvars = script.job_environment.to_h
232
+ args.concat ["-x", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
233
+ args.concat ["-X"] if script.copy_environment?
234
+
235
+ # Set native options
236
+ args.concat script.native if script.native
237
+
238
+ # Set content
239
+ content = if script.shell_path.nil?
240
+ script.content
241
+ else
242
+ "#!#{script.shell_path}\n#{script.content}"
243
+ end
244
+
245
+ # Submit job
246
+ @fujitsu_tcs.submit_string(content, args: args)
247
+ rescue Batch::Error => e
248
+ raise JobAdapterError, e.message
249
+ end
250
+
251
+ # Retrieve info for all jobs from the resource manager
252
+ # @raise [JobAdapterError] if something goes wrong getting job info
253
+ # @return [Array<Info>] information describing submitted jobs
254
+ # @see Adapter#info_all
255
+ def info_all(attrs: nil)
256
+ @fujitsu_tcs.get_jobs().map do |v|
257
+ parse_job_info(v)
258
+ end
259
+ rescue Batch::Error => e
260
+ raise JobAdapterError, e.message
261
+ end
262
+
263
+ # Retrieve job info from the resource manager
264
+ # @param id [#to_s] the id of the job
265
+ # @raise [JobAdapterError] if something goes wrong getting job info
266
+ # @return [Info] information describing submitted job
267
+ # @see Adapter#info
268
+ def info(id)
269
+ id = id.to_s
270
+ info_ary = @fujitsu_tcs.get_jobs(id: id).map do |v|
271
+ parse_job_info(v)
272
+ end
273
+
274
+ # If no job was found we assume that it has completed
275
+ info_ary.empty? ? Info.new(id: id, status: :completed) : info_ary.first # @fujitsu_tcs.get_jobs() must return only one element.
276
+ rescue Batch::Error => e
277
+ # set completed status if can't find job id
278
+ if /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
279
+ Info.new(
280
+ id: id,
281
+ status: :completed
282
+ )
283
+ else
284
+ raise JobAdapterError, e.message
285
+ end
286
+ end
287
+
288
+ # Retrieve info for all jobs for a given owner or owners from the
289
+ # resource manager
290
+ # @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
291
+ # @raise [JobAdapterError] if something goes wrong getting job info
292
+ # @return [Array<Info>] information describing submitted jobs
293
+ def info_where_owner(owner, attrs: nil)
294
+ owner = Array.wrap(owner).map(&:to_s).join('+')
295
+ @fujitsu_tcs.get_jobs(owner: owner).map do |v|
296
+ parse_job_info(v)
297
+ end
298
+ rescue Batch::Error => e
299
+ raise JobAdapterError, e.message
300
+ end
301
+
302
+ # Retrieve job status from resource manager
303
+ # @param id [#to_s] the id of the job
304
+ # @raise [JobAdapterError] if something goes wrong getting job status
305
+ # @return [Status] status of job
306
+ # @see Adapter#status
307
+ def status(id)
308
+ id = id.to_s
309
+ jobs = @fujitsu_tcs.get_jobs(id: id)
310
+
311
+ if job = jobs.detect { |j| j[:JOB_ID] == id }
312
+ Status.new(state: get_state(job[:ST]))
313
+ else
314
+ # set completed status if can't find job id
315
+ Status.new(state: :completed)
316
+ end
317
+ rescue Batch::Error => e
318
+ # set completed status if can't find job id
319
+ if /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
320
+ Status.new(state: :completed)
321
+ else
322
+ raise JobAdapterError, e.message
323
+ end
324
+ end
325
+
326
+ # Put the submitted job on hold
327
+ # @param id [#to_s] the id of the job
328
+ # @raise [JobAdapterError] if something goes wrong holding a job
329
+ # @return [void]
330
+ # @see Adapter#hold
331
+ def hold(id)
332
+ @fujitsu_tcs.hold_job(id.to_s)
333
+ rescue Batch::Error => e
334
+ # assume successful job hold if can't find job id
335
+ raise JobAdapterError, e.message unless /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
336
+ end
337
+
338
+ # Release the job that is on hold
339
+ # @param id [#to_s] the id of the job
340
+ # @raise [JobAdapterError] if something goes wrong releasing a job
341
+ # @return [void]
342
+ # @see Adapter#release
343
+ def release(id)
344
+ @fujitsu_tcs.release_job(id.to_s)
345
+ rescue Batch::Error => e
346
+ # assume successful job release if can't find job id
347
+ raise JobAdapterError, e.message unless /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
348
+ end
349
+
350
+ # Delete the submitted job
351
+ # @param id [#to_s] the id of the job
352
+ # @raise [JobAdapterError] if something goes wrong deleting a job
353
+ # @return [void]
354
+ # @see Adapter#delete
355
+ def delete(id)
356
+ @fujitsu_tcs.delete_job(id.to_s)
357
+ rescue Batch::Error => e
358
+ # assume successful job deletion if can't find job id
359
+ raise JobAdapterError, e.message unless /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
360
+ end
361
+
362
+ def directive_prefix
363
+ '#PJM'
364
+ end
365
+
366
+ private
367
+ # Convert duration to seconds
368
+ def duration_in_seconds(time)
369
+ return 0 if time.nil?
370
+ time, days = time.split("-").reverse
371
+ days.to_i * 24 * 3600 +
372
+ time.split(':').map { |v| v.to_i }.inject(0) { |total, v| total * 60 + v }
373
+ end
374
+
375
+ # Convert seconds to duration
376
+ def seconds_to_duration(time)
377
+ "%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
378
+ end
379
+
380
+ # Determine state from Fujitsu TCS state code
381
+ def get_state(st)
382
+ STATE_MAP.fetch(st, :undetermined)
383
+ end
384
+
385
+ # Parse hash describing Fujitsu TCS job status
386
+ def parse_job_info(v)
387
+ Info.new(
388
+ id: v[:JOB_ID],
389
+ job_name: v[:JOB_NAME],
390
+ status: get_state(v[:ST]),
391
+ job_owner: v[:USER],
392
+ dispatch_time: v[:START_DATE],
393
+ wallclock_time: duration_in_seconds(v[:ELAPSE_TIM]),
394
+ wallclock_limit: duration_in_seconds(v[:ELAPSE_LIM]),
395
+ submission_time: v[:ACCEPT],
396
+ queue_name: v[:RSC_GRP],
397
+ native: v
398
+ )
399
+ end
400
+ end
401
+ end
402
+ end
403
+ end
@@ -1,6 +1,8 @@
1
1
  require "ood_core/refinements/hash_extensions"
2
2
  require "json"
3
3
 
4
+ # Utility class for the Kubernetes adapter to interact
5
+ # with the Kuberenetes APIs.
4
6
  class OodCore::Job::Adapters::Kubernetes::Batch
5
7
 
6
8
  require_relative "helper"
@@ -1,3 +1,5 @@
1
+ # Utility class for the Kubernetes adapter to parse
2
+ # json data into Ruby objects.
1
3
  class OodCore::Job::Adapters::Kubernetes::Helper
2
4
 
3
5
  require_relative 'resources'
@@ -193,10 +195,14 @@ class OodCore::Job::Adapters::Kubernetes::Helper
193
195
  end
194
196
 
195
197
  def secret_info_from_json(json_data)
196
- raw = json_data.dig(:data, :password)
197
- { ood_connection_info: { password: Base64.decode64(raw) } }
198
- rescue
199
- {}
198
+ data = json_data.to_h[:data] || {}
199
+
200
+ info = data.symbolize_keys.each_with_object({}) do |data_kv, hash|
201
+ hash[data_kv[0]] = Base64.decode64(data_kv[1])
202
+ rescue
203
+ next
204
+ end
205
+ { ood_connection_info: info }
200
206
  end
201
207
 
202
208
  def dispatch_time(json_data)
@@ -1,4 +1,4 @@
1
- # An object that describes a submitted kubernetes job with extended information
1
+ # An object that describes a submitted kubernetes job with extended information.
2
2
  class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
3
3
  attr_reader :ood_connection_info
4
4
 
@@ -1,5 +1,5 @@
1
1
  module OodCore::Job::Adapters::Kubernetes::Resources
2
-
2
+ # Utility class for kubernetes configmap objects.
3
3
  class ConfigMap
4
4
  attr_accessor :name, :files
5
5
 
@@ -20,6 +20,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
20
20
  end
21
21
  end
22
22
 
23
+ # Utility class for mounting files in kubernetes configmap objects.
23
24
  class ConfigMapFile
24
25
  attr_accessor :filename, :data, :mount_path, :sub_path, :init_mount_path, :init_sub_path
25
26
 
@@ -33,6 +34,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
33
34
  end
34
35
  end
35
36
 
37
+ # Utility class for kuberenetes probe settings.
36
38
  class TCPProbe
37
39
  attr_accessor :port, :initial_delay_seconds, :failure_threshold, :period_seconds
38
40
 
@@ -54,6 +56,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
54
56
  end
55
57
  end
56
58
 
59
+ # Utility class for kuberenetes container object.
57
60
  class Container
58
61
  attr_accessor :name, :image, :command, :port, :env, :working_dir,
59
62
  :memory_limit, :memory_request, :cpu_limit, :cpu_request,
@@ -106,6 +109,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
106
109
  end
107
110
  end
108
111
 
112
+ # Utility class for kuberenetes podspec object.
109
113
  class PodSpec
110
114
  attr_accessor :container, :init_containers
111
115
  def initialize(container, init_containers: nil)
@@ -13,6 +13,8 @@ module OodCore
13
13
  end
14
14
 
15
15
  module Adapters
16
+
17
+ # The adapter class for Kubernetes.
16
18
  class Kubernetes < Adapter
17
19
 
18
20
  using Refinements::ArrayExtensions
@@ -22,6 +22,8 @@ module OodCore
22
22
  end
23
23
 
24
24
  module Adapters
25
+
26
+ # The adapter class for the LSF scheduler.
25
27
  class Lsf < Adapter
26
28
  using Refinements::ArrayExtensions
27
29
 
@@ -453,7 +453,7 @@ module OodCore
453
453
 
454
454
  # Parse hash describing PBS Pro job status
455
455
  def parse_job_info(v)
456
- /^(?<job_owner>[\w-]+)@(?<submit_host>.+)$/ =~ v[:Job_Owner]
456
+ /^(?<job_owner>[\w\-.]+)@(?<submit_host>.+)$/ =~ v[:Job_Owner]
457
457
  allocated_nodes = parse_nodes(v[:exec_host] || "")
458
458
  procs = allocated_nodes.inject(0) { |sum, x| sum + x[:procs] }
459
459
  if allocated_nodes.empty? # fill in with requested resources
@@ -1,3 +1,5 @@
1
+ # Utility helper class for the SGE adapter to parse
2
+ # input and generate submission arguements.
1
3
  class OodCore::Job::Adapters::Sge::Helper
2
4
  require 'ood_core/job/adapters/sge'
3
5
 
@@ -16,7 +16,6 @@ require 'ood_core/job/array_ids'
16
16
  # :status
17
17
  # :wallclock_limit
18
18
  # :wallclock_time
19
-
20
19
  class QstatXmlJRListener
21
20
  # [Hash]
22
21
  attr_reader :parsed_job
@@ -13,7 +13,6 @@ require 'date'
13
13
  # :queue_name
14
14
  # :status
15
15
  # :wallclock_limit
16
-
17
16
  class QstatXmlRListener
18
17
  # [Array<Hash>]
19
18
  attr_reader :parsed_jobs
@@ -22,6 +22,8 @@ module OodCore
22
22
  end
23
23
 
24
24
  module Adapters
25
+
26
+ # The adpater class for Grid Engine (GE) flavors like Sun Grid Engine.
25
27
  class Sge < Adapter
26
28
  using Refinements::HashExtensions
27
29
  using Refinements::ArrayExtensions
@@ -37,8 +37,7 @@ module OodCore
37
37
  end
38
38
 
39
39
  module Adapters
40
- # An adapter object that describes the communication with a remote host
41
- # for job management.
40
+ # The adapter for using systemd timers as the scheduler.
42
41
  class LinuxSystemd < Adapter
43
42
  using Refinements::ArrayExtensions
44
43
 
@@ -1,3 +1,4 @@
1
+ # Utility class to maintain all the Torque attributes available.
1
2
  class OodCore::Job::Adapters::Torque
2
3
  # Maintains a constant Hash of defined PBS attribute types
3
4
  # Includes:
@@ -1,5 +1,7 @@
1
1
  require 'open3'
2
2
 
3
+ # Utility class for the Torque adapter to communicate with the
4
+ # Torque scheduler.
3
5
  class OodCore::Job::Adapters::Torque
4
6
  # Object used for simplified communication with a batch server
5
7
  class Batch
@@ -1,3 +1,4 @@
1
+ # FFI errors for the Torque adapter.
1
2
  class OodCore::Job::Adapters::Torque::FFI
2
3
  # The root exception class that all PBS-specific exceptions inherit from
3
4
  class Error < StandardError; end
@@ -1,6 +1,6 @@
1
1
  require 'ffi'
2
2
 
3
- # An interface to the C-library of Torque
3
+ # An interface to the C-library of Torque.
4
4
  class OodCore::Job::Adapters::Torque::FFI
5
5
 
6
6
  extend ::FFI::Library
@@ -1,14 +1,14 @@
1
- # Builds a sorted array of job ids given a job array spec string
2
- #
3
- # Job array spec strings:
4
- # 1 Single id
5
- # 1-10 Range
6
- # 1-10:2 Range with step
7
- # 1-10,13 Compound (range with single id)
8
- #
9
- # Note that Ranges are expected to be inclusive
10
1
  module OodCore
11
2
  module Job
3
+ # Builds a sorted array of job ids given a job array spec string
4
+ #
5
+ # Job array spec strings:
6
+ # 1 Single id
7
+ # 1-10 Range
8
+ # 1-10:2 Range with step
9
+ # 1-10,13 Compound (range with single id)
10
+ #
11
+ # Note that Ranges are expected to be inclusive
12
12
  class ArrayIds
13
13
  attr_reader :spec_string
14
14
 
@@ -1,6 +1,6 @@
1
1
  module OodCore
2
2
  module Job
3
- # An object that contains details about the cluster's active and total nodes, processors, and gpus
3
+ # An object that contains details about the cluster's active and total nodes, processors and gpus.
4
4
  class ClusterInfo
5
5
  using Refinements::HashExtensions
6
6
 
@@ -2,7 +2,7 @@ require 'time'
2
2
 
3
3
  module OodCore
4
4
  module Job
5
- # An object that describes a submitted job
5
+ # An object that describes a submitted job.
6
6
  class Info
7
7
  # The identifier of the job
8
8
  # @return [String] job id
@@ -1,6 +1,6 @@
1
1
  module OodCore
2
2
  module Job
3
- # An object that describes the resources used on a specific node
3
+ # An object that describes the resources used on a specific node.
4
4
  class NodeInfo
5
5
  # The name of the host machine
6
6
  # @return [String] node name
@@ -1,6 +1,6 @@
1
1
  module OodCore
2
2
  module Job
3
- # An object that describes the current state of a submitted job
3
+ # An object that describes the current state of a submitted job.
4
4
  class Status
5
5
  class << self
6
6
  # Possible states a submitted job can be in:
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.20.2"
3
+ VERSION = "0.22.0"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -30,5 +30,5 @@ Gem::Specification.new do |spec|
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "pry", "~> 0.10"
32
32
  spec.add_development_dependency "timecop", "~> 0.8"
33
- spec.add_development_dependency "climate_control", "~> 1.1.1"
33
+ spec.add_development_dependency "climate_control", "~> 1.2.0"
34
34
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.2
4
+ version: 0.22.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2022-07-28 00:00:00.000000000 Z
13
+ date: 2022-10-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -136,14 +136,14 @@ dependencies:
136
136
  requirements:
137
137
  - - "~>"
138
138
  - !ruby/object:Gem::Version
139
- version: 1.1.1
139
+ version: 1.2.0
140
140
  type: :development
141
141
  prerelease: false
142
142
  version_requirements: !ruby/object:Gem::Requirement
143
143
  requirements:
144
144
  - - "~>"
145
145
  - !ruby/object:Gem::Version
146
- version: 1.1.1
146
+ version: 1.2.0
147
147
  description: Open OnDemand core library that provides support for an HPC Center to
148
148
  globally define HPC services that web applications can then take advantage of.
149
149
  email:
@@ -164,6 +164,7 @@ files:
164
164
  - Rakefile
165
165
  - bin/console
166
166
  - bin/setup
167
+ - docs/.keep
167
168
  - lib/ood_core.rb
168
169
  - lib/ood_core/acl/adapter.rb
169
170
  - lib/ood_core/acl/adapters/group.rb
@@ -172,6 +173,7 @@ files:
172
173
  - lib/ood_core/batch_connect/template.rb
173
174
  - lib/ood_core/batch_connect/templates/basic.rb
174
175
  - lib/ood_core/batch_connect/templates/vnc.rb
176
+ - lib/ood_core/batch_connect/templates/vnc_container.rb
175
177
  - lib/ood_core/cluster.rb
176
178
  - lib/ood_core/clusters.rb
177
179
  - lib/ood_core/errors.rb
@@ -179,6 +181,7 @@ files:
179
181
  - lib/ood_core/job/adapter.rb
180
182
  - lib/ood_core/job/adapters/ccq.rb
181
183
  - lib/ood_core/job/adapters/drmaa.rb
184
+ - lib/ood_core/job/adapters/fujitsu_tcs.rb
182
185
  - lib/ood_core/job/adapters/helper.rb
183
186
  - lib/ood_core/job/adapters/kubernetes.rb
184
187
  - lib/ood_core/job/adapters/kubernetes/batch.rb