ood_core 0.24.2 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08f64f135dbbf6ea8058870d4339c1669586deed5be368d54dd0ff3ae476d8b0'
4
- data.tar.gz: 3f04d4f98590cbaa3ce502fca901e23e67638442b2daa6621bc5fb5c640a91ae
3
+ metadata.gz: 3ef6cd5a8a206393dcaed9d0121e3646dd6fc3d2b0405992c3fcfa5a745e9489
4
+ data.tar.gz: 16a5cf6f03ed2be4c563dafb7eaa9e2a04d465a4d8a3cea5d3bb14b634d65868
5
5
  SHA512:
6
- metadata.gz: 0ca0bdf738b58a61f45c030cc202b3a035d0f33782d7a125d5367fcd63e3fe5f83b70c60a61b772fd3ec5872742632af571c7d9be154845b4836e1fe8659d96c
7
- data.tar.gz: 2e7c8f01385fb2553e0ac5774442e3d4b1fca9cee72b1c8fd80b9a4436bcbf297ebce7a0a3e802a7e40fffab1a611c8d02ff74cf19d2c4a627ee5e755ed63581
6
+ metadata.gz: 563930fc05d105b46fa1f8942294a06b3fd3ffce665ba6aa93fae3a21759613fdf229c9cca4f5ec8bd99c7b57cbb9e7e4348420de47d4d0884a78dfd62c43b5f
7
+ data.tar.gz: dc88b7bc4a28e96725bcd2caf0e40659bca94a40ab1c50432af0eee2b649f4a48356e858a7f9ceca1a8a18f99c823bf9ffc4c7cecefecb9820d8ce7621c7c51b
data/CHANGELOG.md CHANGED
@@ -7,10 +7,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ # [0.25.0] - 03-27-2024
11
+
12
+ - [828](https://github.com/OSC/ood_core/pull/828) and [826](https://github.com/OSC/ood_core/pull/826)
13
+ add configurable heartbeats to noVNC connections.
14
+
10
15
  # [0.24.2] - 01-24-2024
11
16
 
12
17
  - [823](https://github.com/OSC/ood_core/pull/823) Corrected a mistake in converting duration to seconds.
13
- * [821](https://github.com/OSC/ood_core/pull/821) add container_start_args to pass options to the start command.
18
+ - [821](https://github.com/OSC/ood_core/pull/821) add container_start_args to pass options to the start command.
14
19
 
15
20
  ## [0.24.1] - 11-29-2023
16
21
 
@@ -24,6 +24,9 @@ module OodCore
24
24
  # @option context [#to_s] :websockify_cmd
25
25
  # ("${WEBSOCKIFY_CMD:-/opt/websockify/run}") the path to the
26
26
  # websockify script (assumes you don't modify `:after_script`)
27
+ # @option context [#to_s] :websockify_heartbeat_seconds
28
+ # ("${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}") the websockify heartbeat
29
+ # duration in seconds. (assumes you don't modify `:after_script`)
27
30
  # @option context [#to_s] :vnc_log ("vnc.log") path to vnc server log
28
31
  # file (assumes you don't modify `:before_script` or `:after_script`)
29
32
  # @option context [#to_s] :vnc_passwd ("vnc.passwd") path to the file
@@ -130,6 +133,8 @@ module OodCore
130
133
  # successful connections so that the password can be reset
131
134
  def after_script
132
135
  websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
136
+ websockify_hb = context.fetch(:websockify_heartbeat_seconds, "${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}").to_s
137
+ websockify_timeout_seconds = context.fetch(:websockify_timeout_seconds, '${WEBSOCKIFY_TIMEOUT_SECONDS:-10}').to_s
133
138
 
134
139
  <<-EOT.gsub(/^ {14}/, "")
135
140
  #{super}
@@ -139,9 +144,10 @@ module OodCore
139
144
  start_websockify() {
140
145
  local log_file="./websockify.log"
141
146
  # launch websockify in background and redirect all output to a file.
142
- #{websockify_cmd} $1 $2 &> $log_file &
147
+ #{websockify_cmd} $1 --heartbeat=#{websockify_hb} $2 &> $log_file &
143
148
  local ws_pid=$!
144
149
  local counter=0
150
+ local max_timeout=#{websockify_timeout_seconds}
145
151
 
146
152
  # wait till websockify has successfully started
147
153
  echo "[websockify]: pid: $ws_pid (proxying $1 ==> $2)" >&2
@@ -152,9 +158,9 @@ module OodCore
152
158
  if ! ps $ws_pid > /dev/null; then
153
159
  echo "[websockify]: failed to launch!" >&2
154
160
  return 1
155
- elif [ $counter -ge 5 ]; then
156
- # timeout after ~5 seconds
157
- echo "[websockify]: timed-out :(!" >&2
161
+ elif [ $counter -ge $max_timeout ]; then
162
+ # timeout after max_timeout seconds
163
+ echo "[websockify]: timed-out after $max_timeout seconds :(!" >&2
158
164
  return 1
159
165
  else
160
166
  sleep 1
@@ -213,6 +213,14 @@ module OodCore
213
213
  def queues
214
214
  []
215
215
  end
216
+
217
+ # Return the list of nodes for this scheduler.
218
+ #
219
+ # Subclasses that do not implement this will return empty arrays.
220
+ # @return [Array<NodeInfo>]
221
+ def nodes
222
+ []
223
+ end
216
224
  end
217
225
  end
218
226
  end
@@ -41,7 +41,7 @@ module OodCore
41
41
  # calculated from gres string
42
42
  # @return [Integer] the number of gpus in gres
43
43
  def self.gpus_from_gres(gres)
44
- gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
44
+ gres.to_s.scan(/gpu[^(,]*[:=](\d+)/).flatten.map(&:to_i).sum
45
45
  end
46
46
 
47
47
  # Object used for simplified communication with a Slurm batch server
@@ -169,6 +169,7 @@ module OodCore
169
169
  # jobs << job
170
170
  #
171
171
  # assuming keys and values are same length! if not we have an error!
172
+ line = line.encode('UTF-8', invalid: :replace, undef: :replace)
172
173
  values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
173
174
  jobs << Hash[fields.keys.zip(values)] unless values.empty?
174
175
  end
@@ -327,13 +328,37 @@ module OodCore
327
328
 
328
329
  [].tap do |ret_arr|
329
330
  info_raw.each_line do |line|
330
- ret_arr << str_to_acct_info(line)
331
+ ret_arr << str_to_queue_info(line)
331
332
  end
332
333
  end
333
334
  end
334
335
 
336
+ def all_sinfo_node_fields
337
+ {
338
+ procs: '%c',
339
+ name: '%n',
340
+ features: '%f'
341
+ }
342
+ end
343
+
344
+ def nodes
345
+ args = all_sinfo_node_fields.values.join(UNIT_SEPARATOR)
346
+ output = call('sinfo', '-ho', "#{RECORD_SEPARATOR}#{args}")
347
+
348
+ output.each_line(RECORD_SEPARATOR).map do |line|
349
+ values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
350
+
351
+ next if values.empty?
352
+
353
+ data = Hash[all_sinfo_node_fields.keys.zip(values)]
354
+ data[:name] = data[:name].to_s.split(',').first
355
+ data[:features] = data[:features].to_s.split(',')
356
+ NodeInfo.new(**data)
357
+ end.compact
358
+ end
359
+
335
360
  private
336
- def str_to_acct_info(line)
361
+ def str_to_queue_info(line)
337
362
  hsh = line.split(' ').map do |token|
338
363
  m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
339
364
  [m[:key], m[:value]]
@@ -349,6 +374,7 @@ module OodCore
349
374
 
350
375
 
351
376
  hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
377
+ hsh[:tres] = hsh[:TRES].nil? ? {} : hsh[:TRES].to_s.split(',').map { |str| str.split('=') }.to_h
352
378
 
353
379
  OodCore::Job::QueueInfo.new(**hsh)
354
380
  end
@@ -669,6 +695,10 @@ module OodCore
669
695
  @slurm.queues
670
696
  end
671
697
 
698
+ def nodes
699
+ @slurm.nodes
700
+ end
701
+
672
702
  private
673
703
  # Convert duration to seconds
674
704
  def duration_in_seconds(time)
@@ -204,7 +204,7 @@ class OodCore::Job::Adapters::LinuxSystemd::Launcher
204
204
 
205
205
  # List all Systemd sessions on destination_host started by this adapter
206
206
  def list_remote_systemd_session(destination_host)
207
- cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-*"])
207
+ cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-\\*"])
208
208
 
209
209
  # individual units are separated with an empty line
210
210
  call(*cmd).split("\n\n").map do |oneunit|
@@ -10,17 +10,26 @@ module OodCore
10
10
  # @return [Integer, nil] number of procs
11
11
  attr_reader :procs
12
12
 
13
+ # The features associated with this node.
14
+ # @return [Array<String>, []]
15
+ attr_reader :features
16
+
13
17
  # @param name [#to_s] node name
14
18
  # @param procs [#to_i, nil] number of procs
15
- def initialize(name:, procs: nil, **_)
19
+ # @param features [#to_a, []] list of features
20
+ def initialize(name:, procs: nil, features: [], **_)
16
21
  @name = name.to_s
17
22
  @procs = procs && procs.to_i
23
+ @features = features.to_a
18
24
  end
19
25
 
20
26
  # Convert object to hash
21
27
  # @return [Hash] object as hash
22
28
  def to_h
23
- { name: name, procs: procs }
29
+ instance_variables.map do |var|
30
+ name = var.to_s.gsub('@', '').to_sym
31
+ [name, send(name)]
32
+ end.to_h
24
33
  end
25
34
 
26
35
  # The comparison operator
@@ -20,9 +20,13 @@ class OodCore::Job::QueueInfo
20
20
  # The accounts that are not allowed to use this queue.
21
21
  attr_reader :deny_accounts
22
22
 
23
+ # An Hash of Trackable Resources and their values.
24
+ attr_reader :tres
25
+
23
26
  def initialize(**opts)
24
27
  @name = opts.fetch(:name, 'unknown')
25
28
  @qos = opts.fetch(:qos, [])
29
+ @tres = opts.fetch(:tres, {})
26
30
 
27
31
  allow_accounts = opts.fetch(:allow_accounts, nil)
28
32
  @allow_accounts = if allow_accounts.nil?
@@ -42,4 +46,8 @@ class OodCore::Job::QueueInfo
42
46
  [name, send(name)]
43
47
  end.to_h
44
48
  end
49
+
50
+ def gpu?
51
+ tres.keys.any? { |name| name.to_s.match?(%r{^gres/gpu($|:)}i) }
52
+ end
45
53
  end
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.24.2"
3
+ VERSION = "0.26.1"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.required_ruby_version = ">= 2.5.0"
24
24
 
25
25
  spec.add_runtime_dependency "ood_support", "~> 0.0.2"
26
- spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
26
+ spec.add_runtime_dependency "ffi", "~> 1.16.3"
27
27
  spec.add_runtime_dependency "rexml", "~> 3.2"
28
28
  spec.add_development_dependency "bundler", "~> 2.1"
29
- spec.add_development_dependency "rake", "~> 13.1.0"
29
+ spec.add_development_dependency "rake", "~> 13.2.0"
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "pry", "~> 0.10"
32
32
  spec.add_development_dependency "timecop", "~> 0.8"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.2
4
+ version: 0.26.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2024-01-31 00:00:00.000000000 Z
13
+ date: 2024-07-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -32,20 +32,14 @@ dependencies:
32
32
  requirements:
33
33
  - - "~>"
34
34
  - !ruby/object:Gem::Version
35
- version: '1.9'
36
- - - ">="
37
- - !ruby/object:Gem::Version
38
- version: 1.9.6
35
+ version: 1.16.3
39
36
  type: :runtime
40
37
  prerelease: false
41
38
  version_requirements: !ruby/object:Gem::Requirement
42
39
  requirements:
43
40
  - - "~>"
44
41
  - !ruby/object:Gem::Version
45
- version: '1.9'
46
- - - ">="
47
- - !ruby/object:Gem::Version
48
- version: 1.9.6
42
+ version: 1.16.3
49
43
  - !ruby/object:Gem::Dependency
50
44
  name: rexml
51
45
  requirement: !ruby/object:Gem::Requirement
@@ -80,14 +74,14 @@ dependencies:
80
74
  requirements:
81
75
  - - "~>"
82
76
  - !ruby/object:Gem::Version
83
- version: 13.1.0
77
+ version: 13.2.0
84
78
  type: :development
85
79
  prerelease: false
86
80
  version_requirements: !ruby/object:Gem::Requirement
87
81
  requirements:
88
82
  - - "~>"
89
83
  - !ruby/object:Gem::Version
90
- version: 13.1.0
84
+ version: 13.2.0
91
85
  - !ruby/object:Gem::Dependency
92
86
  name: rspec
93
87
  requirement: !ruby/object:Gem::Requirement