ood_core 0.23.5 → 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dd70ee666e6339110f07224c339fb666d7e130c44854784d818ef6a87172e610
4
- data.tar.gz: a39550f3b74ea8b50aa28c35398d96dbf36d38e6768a37afa65c815cd5bd9cc2
3
+ metadata.gz: 2d7ae635ec6299414feac4de3589bea126a97cd427507bd99242f0bcb79553b4
4
+ data.tar.gz: 29855a0d0573e1d51fe6ebf7a0679c37993c608f77e45f804f6851d84160620c
5
5
  SHA512:
6
- metadata.gz: d5e9f4ab2800182ebc7f8e7d1c975ba49a4d9e079731856f05cb85ae30e610a8cd9c51db8354aae4a4a6b99bfdd512a544dabc2d518e2fb089f96bd86be82976
7
- data.tar.gz: ce37e66c311f9b1ddf7d2bd24b9213be4b1a676913bcc6e4242d4b9e84dcf175a5d4ea6e4fa4400ad3bf42d0bbf10e3829f1c145330bad916b581731e871aa30
6
+ metadata.gz: 6b93ec8179ffca892538e5bdd8bcc1eae6adceac3a2d898079c7f8f3a60d5cd04ac64569e71ba0af2fb9ab9833a9b7f6d0c1f845cde1487eaf3d9c8f90b382d8
7
+ data.tar.gz: 2919276a9ade663afce93339602304f0fc727fa392d6f711f0166b82003c51e8765520603c93047919e4f1d1b81104723fd5ac05c760e891e90ab3a13a5d1c8e
data/CHANGELOG.md CHANGED
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.24.1] - 11-29-2023
11
+
12
+ [820](https://github.com/OSC/ood_core/pull/820) Reverts [818](https://github.com/OSC/ood_core/pull/818)
13
+
14
+ ## [0.24.0] - 11-28-2023
15
+
16
+ - Code cleanup and separate arguments with whitespace in Fujitsu TCS adapter by @mnakao in https://github.com/OSC/ood_core/pull/808
17
+ - Add OUT_OF_MEMORY state for Slurm by @robinkar in https://github.com/OSC/ood_core/pull/809
18
+ - find_port: avoid infinite loop by @utkarshayachit in https://github.com/OSC/ood_core/pull/811
19
+ - handle find_port error codes by @utkarshayachit in https://github.com/OSC/ood_core/pull/812
20
+ - vnc: run websockify as background process by @utkarshayachit in https://github.com/OSC/ood_core/pull/813
21
+ - Add working_dir option for Fujitsu TCS job scheduler by @mnakao in https://github.com/OSC/ood_core/pull/816
22
+ - Minor fix for Fujitsu TCS by @mnakao in https://github.com/OSC/ood_core/pull/817
23
+ - Update rake requirement from ~> 13.0.1 to ~> 13.1.0 by @dependabot in https://github.com/OSC/ood_core/pull/814
24
+ - Changes default return value for cluster.batch_connect_ssh_allow? by @HazelGrant in https://github.com/OSC/ood_core/pull/818
25
+
10
26
  ## [0.23.5] - 04-10-2023
11
27
 
12
28
  ### Fixed
@@ -162,14 +162,28 @@ module OodCore
162
162
  export -f port_used
163
163
 
164
164
  # Find available port in range [$2..$3] for host $1
165
- # Default: [#{min_port}..#{max_port}]
165
+ # Default host: localhost
166
+ # Default port range: [#{min_port}..#{max_port}]
167
+ # returns error code (0: success, 1: failed)
168
+ # On success, the chosen port is echoed on stdout.
166
169
  find_port () {
167
170
  local host="${1:-localhost}"
168
- local port=$(random_number "${2:-#{min_port}}" "${3:-#{max_port}}")
169
- while port_used "${host}:${port}"; do
170
- port=$(random_number "${2:-#{min_port}}" "${3:-#{max_port}}")
171
+ local min_port=${2:-#{min_port}}
172
+ local max_port=${3:-#{max_port}}
173
+ local port_range=($(shuf -i ${min_port}-${max_port}))
174
+ local retries=1 # number of retries over the port range if first attempt fails
175
+ for ((attempt=0; attempt<=$retries; attempt++)); do
176
+ for port in "${port_range[@]}"; do
177
+ if port_used "${host}:${port}"; then
178
+ continue
179
+ fi
180
+ echo "${port}"
181
+ return 0 # success
182
+ done
171
183
  done
172
- echo "${port}"
184
+
185
+ echo "error: failed to find available port in range ${min_port}..${max_port}" >&2
186
+ return 1 # failure
173
187
  }
174
188
  export -f find_port
175
189
 
@@ -134,10 +134,45 @@ module OodCore
134
134
  <<-EOT.gsub(/^ {14}/, "")
135
135
  #{super}
136
136
 
137
+ # launches websockify in the background; waiting until the process
138
+ # has started proxying successfully.
139
+ start_websockify() {
140
+ local log_file="./websockify.log"
141
+ # launch websockify in background and redirect all output to a file.
142
+ #{websockify_cmd} $1 $2 &> $log_file &
143
+ local ws_pid=$!
144
+ local counter=0
145
+
146
+ # wait till websockify has successfully started
147
+ echo "[websockify]: pid: $ws_pid (proxying $1 ==> $2)" >&2
148
+ echo "[websockify]: log file: $log_file" >&2
149
+ echo "[websockify]: waiting ..." >&2
150
+ until grep -q -i "proxying from :$1" $log_file
151
+ do
152
+ if ! ps $ws_pid > /dev/null; then
153
+ echo "[websockify]: failed to launch!" >&2
154
+ return 1
155
+ elif [ $counter -ge 5 ]; then
156
+ # timeout after ~5 seconds
157
+ echo "[websockify]: timed-out :(!" >&2
158
+ return 1
159
+ else
160
+ sleep 1
161
+ ((counter=counter+1))
162
+ fi
163
+ done
164
+ echo "[websockify]: started successfully (proxying $1 ==> $2)" >&2
165
+ echo $ws_pid
166
+ return 0
167
+ }
168
+
137
169
  # Launch websockify websocket server
138
170
  echo "Starting websocket server..."
139
171
  websocket=$(find_port)
140
- #{websockify_cmd} -D ${websocket} localhost:${port}
172
+ [ $? -eq 0 ] || clean_up 1 # give up if port not found
173
+
174
+ ws_pid=$(start_websockify ${websocket} localhost:${port})
175
+ [ $? -eq 0 ] || clean_up 1 # give up if websockify launch failed
141
176
 
142
177
  # Set up background process that scans the log file for successful
143
178
  # connections by users, and change the password after every
@@ -173,6 +173,7 @@ module OodCore
173
173
  module load #{container_module}
174
174
  echo "Starting websocket server..."
175
175
  websocket=$(find_port)
176
+ [ $? -eq 0 ] || clean_up 1 # give up if port not found
176
177
  #{container_command} exec instance://#{@instance_name} #{websockify_cmd} -D ${websocket} localhost:${port}
177
178
 
178
179
  # Set up background process that scans the log file for successful
@@ -12,11 +12,13 @@ module OodCore
12
12
  # @param config [#to_h] the configuration for job adapter
13
13
  # @option config [Object] :bin (nil) Path to Fujitsu TCS resource manager binaries
14
14
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to Fujitsu TCS resource manager executables
15
+ # @option config [Object] :working_dir (nil) Working directory for submitting a batch script
15
16
  def self.build_fujitsu_tcs(config)
16
17
  c = config.to_h.symbolize_keys
17
18
  bin = c.fetch(:bin, nil)
18
19
  bin_overrides = c.fetch(:bin_overrides, {})
19
- fujitsu_tcs = Adapters::Fujitsu_TCS::Batch.new(bin: bin, bin_overrides: bin_overrides)
20
+ working_dir = c.fetch(:working_dir, nil)
21
+ fujitsu_tcs = Adapters::Fujitsu_TCS::Batch.new(bin: bin, bin_overrides: bin_overrides, working_dir: working_dir)
20
22
  Adapters::Fujitsu_TCS.new(fujitsu_tcs: fujitsu_tcs)
21
23
  end
22
24
  end
@@ -43,6 +45,11 @@ module OodCore
43
45
  # @return Hash<String, String>
44
46
  attr_reader :bin_overrides
45
47
 
48
+ # Working directory for submitting a batch script
49
+ # @example
50
+ # my_batch.working_dir #=> "HOME" or Dir.pwd
51
+ attr_reader :working_dir
52
+
46
53
  # The root exception class that all Fujitsu TCS specific exceptions inherit
47
54
  # from
48
55
  class Error < StandardError; end
@@ -52,9 +59,17 @@ module OodCore
52
59
 
53
60
  # @param bin [#to_s] path to Fujitsu TCS installation binaries
54
61
  # @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
55
- def initialize(bin: nil, bin_overrides: {})
62
+ # @param working_dir [] Working directory for submitting a batch script
63
+ def initialize(bin: nil, bin_overrides: {}, working_dir: nil)
56
64
  @bin = Pathname.new(bin.to_s)
57
65
  @bin_overrides = bin_overrides
66
+ if working_dir == nil
67
+ @working_dir = Dir.pwd
68
+ elsif working_dir == "HOME"
69
+ @working_dir = Dir.home
70
+ else
71
+ raise(StandardError, "Unknown working_dir")
72
+ end
58
73
  end
59
74
 
60
75
  # Get a list of hashes detailing each of the jobs on the batch server
@@ -79,19 +94,19 @@ module OodCore
79
94
  # @raise [Error] if `pjstat` command exited unsuccessfully
80
95
  # @return [Array<Hash>] list of details for jobs
81
96
  def get_jobs(id: "", owner: nil)
82
- args = ["-s", "--data", "--choose=jid,jnam,rscg,st,std,stde,adt,sdt,nnumr,usr,elpl,elp"]
83
- args.concat ["--filter jid=" + id.to_s] unless id.to_s.empty?
84
- args.concat ["--filter usr=" + owner.to_s] unless owner.to_s.empty?
97
+ args = ["-A", "-s", "--data", "--choose=jid,jnam,rscg,st,std,stde,adt,sdt,nnumr,usr,elpl,elp"]
98
+ args.concat ["--filter", "jid=" + id.to_s] unless id.to_s.empty?
99
+ args.concat ["--filter", "usr=" + owner.to_s] unless owner.to_s.empty?
85
100
 
86
101
  StringIO.open(call("pjstat", *args)) do |output|
87
102
  output.gets() # Skip header
88
103
  jobs = []
89
104
  output.each_line do |line|
90
105
  l = line.split(",")
91
- jobs << {:JOB_ID => l[1], :JOB_NAME => l[2], :RSC_GRP => l[3].split(" ")[0],
106
+ jobs << {:JOB_ID => l[1], :JOB_NAME => l[2], :RSC_GRP => l[3].split[0],
92
107
  :ST => l[4], :STD => l[5], :STDE => l[6],
93
108
  :ACCEPT => l[7], :START_DATE => l[8], :NODES => l[9].split(":")[0],
94
- :USER => l[10], :ELAPSE_LIM => l[11], :ELAPSE_TIM => l[12].split(" ")[0] }
109
+ :USER => l[10], :ELAPSE_LIM => l[11], :ELAPSE_TIM => l[12].split[0] }
95
110
  end
96
111
  jobs
97
112
  end
@@ -136,16 +151,18 @@ module OodCore
136
151
  # @return [String] the id of the job that was created
137
152
  def submit_string(str, args: [])
138
153
  args = args.map(&:to_s)
139
- call("pjsub", *args, stdin: str.to_s).split(" ")[5]
154
+ call("pjsub", *args, stdin: str.to_s).split[5]
140
155
  end
141
156
 
142
157
  private
143
158
  # Call a forked Fujitsu TCS command
144
159
  def call(cmd, *args, stdin: "")
145
160
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
146
- args = args.map(&:to_s)
147
- o, e, s = Open3.capture3(cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
148
- s.success? ? o : raise(Error, e)
161
+ args = args.map(&:to_s)
162
+ Dir.chdir(working_dir) do
163
+ o, e, s = Open3.capture3(cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
164
+ s.success? ? o : raise(Error, e)
165
+ end
149
166
  end
150
167
  end
151
168
 
@@ -221,12 +238,12 @@ module OodCore
221
238
  else
222
239
  args.concat ["-e", script.error_path]
223
240
  end
224
- args.concat ["-L rscgrp=" + script.queue_name] unless script.queue_name.nil?
241
+ args.concat ["-L", "rscgrp=" + script.queue_name] unless script.queue_name.nil?
225
242
  args.concat ["-p", script.priority] unless script.priority.nil?
226
243
 
227
244
  # start_time: <%= Time.local(2023,11,22,13,4).to_i %> in form.yml.erb
228
245
  args.concat ["--at", script.start_time.localtime.strftime("%C%y%m%d%H%M")] unless script.start_time.nil?
229
- args.concat ["-L elapse=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
246
+ args.concat ["-L", "elapse=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
230
247
  args.concat ["--bulk", "--sparam", script.job_array_request] unless script.job_array_request.nil?
231
248
 
232
249
  # Set environment variables
@@ -368,7 +385,7 @@ module OodCore
368
385
  private
369
386
  # Convert duration to seconds
370
387
  def duration_in_seconds(time)
371
- return 0 if time.nil?
388
+ return 0 if time.nil? or time == "-"
372
389
  time, days = time.split("-").reverse
373
390
  days.to_i * 24 * 3600 +
374
391
  time.split(':').map { |v| v.to_i }.inject(0) { |total, v| total * 60 + v }
@@ -436,7 +436,8 @@ module OodCore
436
436
  'SE' => :completed, # SPECIAL_EXIT
437
437
  'ST' => :running, # STOPPED
438
438
  'S' => :suspended, # SUSPENDED
439
- 'TO' => :completed # TIMEOUT
439
+ 'TO' => :completed, # TIMEOUT
440
+ 'OOM' => :completed # OUT_OF_MEMORY
440
441
  }
441
442
 
442
443
  # @api private
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.23.5"
3
+ VERSION = "0.24.1"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
27
27
  spec.add_runtime_dependency "rexml", "~> 3.2"
28
28
  spec.add_development_dependency "bundler", "~> 2.1"
29
- spec.add_development_dependency "rake", "~> 13.0.1"
29
+ spec.add_development_dependency "rake", "~> 13.1.0"
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "pry", "~> 0.10"
32
32
  spec.add_development_dependency "timecop", "~> 0.8"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.5
4
+ version: 0.24.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2023-04-10 00:00:00.000000000 Z
13
+ date: 2023-11-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -80,14 +80,14 @@ dependencies:
80
80
  requirements:
81
81
  - - "~>"
82
82
  - !ruby/object:Gem::Version
83
- version: 13.0.1
83
+ version: 13.1.0
84
84
  type: :development
85
85
  prerelease: false
86
86
  version_requirements: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - "~>"
89
89
  - !ruby/object:Gem::Version
90
- version: 13.0.1
90
+ version: 13.1.0
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rspec
93
93
  requirement: !ruby/object:Gem::Requirement