ood_core 0.25.0 → 0.26.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad05a0d8cf307f0237329f0e33e8a66f809781e5b164ddf20b9f03d65078451d
4
- data.tar.gz: 396dde55b4b5aaa321ccaf6fbc3cb049d01239335533c35c39ee9a11a1aa5d9b
3
+ metadata.gz: 3ef6cd5a8a206393dcaed9d0121e3646dd6fc3d2b0405992c3fcfa5a745e9489
4
+ data.tar.gz: 16a5cf6f03ed2be4c563dafb7eaa9e2a04d465a4d8a3cea5d3bb14b634d65868
5
5
  SHA512:
6
- metadata.gz: 4a1d7cb53117266c577eaa7792d867508f3e94ef78f2f9af7817882ad1f6165d5d43e8179e17163a83dd438627215700501ea75281204ee6b7d900649c77de9f
7
- data.tar.gz: 4aaf01f40d43e7b496a053e2ef85548769beb169c78e954cd055659ecc08f2e8ef7221561f94a184eb986b9f13d52c2efda30ffdd07faeb697fab4d9d4411ce7
6
+ metadata.gz: 563930fc05d105b46fa1f8942294a06b3fd3ffce665ba6aa93fae3a21759613fdf229c9cca4f5ec8bd99c7b57cbb9e7e4348420de47d4d0884a78dfd62c43b5f
7
+ data.tar.gz: dc88b7bc4a28e96725bcd2caf0e40659bca94a40ab1c50432af0eee2b649f4a48356e858a7f9ceca1a8a18f99c823bf9ffc4c7cecefecb9820d8ce7621c7c51b
@@ -134,7 +134,8 @@ module OodCore
134
134
  def after_script
135
135
  websockify_cmd = context.fetch(:websockify_cmd, "${WEBSOCKIFY_CMD:-/opt/websockify/run}").to_s
136
136
  websockify_hb = context.fetch(:websockify_heartbeat_seconds, "${WEBSOCKIFY_HEARTBEAT_SECONDS:-30}").to_s
137
-
137
+ websockify_timeout_seconds = context.fetch(:websockify_timeout_seconds, '${WEBSOCKIFY_TIMEOUT_SECONDS:-10}').to_s
138
+
138
139
  <<-EOT.gsub(/^ {14}/, "")
139
140
  #{super}
140
141
 
@@ -146,6 +147,7 @@ module OodCore
146
147
  #{websockify_cmd} $1 --heartbeat=#{websockify_hb} $2 &> $log_file &
147
148
  local ws_pid=$!
148
149
  local counter=0
150
+ local max_timeout=#{websockify_timeout_seconds}
149
151
 
150
152
  # wait till websockify has successfully started
151
153
  echo "[websockify]: pid: $ws_pid (proxying $1 ==> $2)" >&2
@@ -156,9 +158,9 @@ module OodCore
156
158
  if ! ps $ws_pid > /dev/null; then
157
159
  echo "[websockify]: failed to launch!" >&2
158
160
  return 1
159
- elif [ $counter -ge 5 ]; then
160
- # timeout after ~5 seconds
161
- echo "[websockify]: timed-out :(!" >&2
161
+ elif [ $counter -ge $max_timeout ]; then
162
+ # timeout after max_timeout seconds
163
+ echo "[websockify]: timed-out after $max_timeout seconds :(!" >&2
162
164
  return 1
163
165
  else
164
166
  sleep 1
@@ -213,6 +213,14 @@ module OodCore
213
213
  def queues
214
214
  []
215
215
  end
216
+
217
+ # Return the list of nodes for this scheduler.
218
+ #
219
+ # Subclasses that do not implement this will return empty arrays.
220
+ # @return [Array<NodeInfo>]
221
+ def nodes
222
+ []
223
+ end
216
224
  end
217
225
  end
218
226
  end
@@ -41,7 +41,7 @@ module OodCore
41
41
  # calculated from gres string
42
42
  # @return [Integer] the number of gpus in gres
43
43
  def self.gpus_from_gres(gres)
44
- gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
44
+ gres.to_s.scan(/gpu[^(,]*[:=](\d+)/).flatten.map(&:to_i).sum
45
45
  end
46
46
 
47
47
  # Object used for simplified communication with a Slurm batch server
@@ -169,6 +169,7 @@ module OodCore
169
169
  # jobs << job
170
170
  #
171
171
  # assuming keys and values are same length! if not we have an error!
172
+ line = line.encode('UTF-8', invalid: :replace, undef: :replace)
172
173
  values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
173
174
  jobs << Hash[fields.keys.zip(values)] unless values.empty?
174
175
  end
@@ -327,13 +328,37 @@ module OodCore
327
328
 
328
329
  [].tap do |ret_arr|
329
330
  info_raw.each_line do |line|
330
- ret_arr << str_to_acct_info(line)
331
+ ret_arr << str_to_queue_info(line)
331
332
  end
332
333
  end
333
334
  end
334
335
 
336
+ def all_sinfo_node_fields
337
+ {
338
+ procs: '%c',
339
+ name: '%n',
340
+ features: '%f'
341
+ }
342
+ end
343
+
344
+ def nodes
345
+ args = all_sinfo_node_fields.values.join(UNIT_SEPARATOR)
346
+ output = call('sinfo', '-ho', "#{RECORD_SEPARATOR}#{args}")
347
+
348
+ output.each_line(RECORD_SEPARATOR).map do |line|
349
+ values = line.chomp(RECORD_SEPARATOR).strip.split(UNIT_SEPARATOR)
350
+
351
+ next if values.empty?
352
+
353
+ data = Hash[all_sinfo_node_fields.keys.zip(values)]
354
+ data[:name] = data[:name].to_s.split(',').first
355
+ data[:features] = data[:features].to_s.split(',')
356
+ NodeInfo.new(**data)
357
+ end.compact
358
+ end
359
+
335
360
  private
336
- def str_to_acct_info(line)
361
+ def str_to_queue_info(line)
337
362
  hsh = line.split(' ').map do |token|
338
363
  m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
339
364
  [m[:key], m[:value]]
@@ -349,6 +374,7 @@ module OodCore
349
374
 
350
375
 
351
376
  hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
377
+ hsh[:tres] = hsh[:TRES].nil? ? {} : hsh[:TRES].to_s.split(',').map { |str| str.split('=') }.to_h
352
378
 
353
379
  OodCore::Job::QueueInfo.new(**hsh)
354
380
  end
@@ -669,6 +695,10 @@ module OodCore
669
695
  @slurm.queues
670
696
  end
671
697
 
698
+ def nodes
699
+ @slurm.nodes
700
+ end
701
+
672
702
  private
673
703
  # Convert duration to seconds
674
704
  def duration_in_seconds(time)
@@ -204,7 +204,7 @@ class OodCore::Job::Adapters::LinuxSystemd::Launcher
204
204
 
205
205
  # List all Systemd sessions on destination_host started by this adapter
206
206
  def list_remote_systemd_session(destination_host)
207
- cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-*"])
207
+ cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-\\*"])
208
208
 
209
209
  # individual units are separated with an empty line
210
210
  call(*cmd).split("\n\n").map do |oneunit|
@@ -10,17 +10,26 @@ module OodCore
10
10
  # @return [Integer, nil] number of procs
11
11
  attr_reader :procs
12
12
 
13
+ # The features associated with this node.
14
+ # @return [Array<String>, []]
15
+ attr_reader :features
16
+
13
17
  # @param name [#to_s] node name
14
18
  # @param procs [#to_i, nil] number of procs
15
- def initialize(name:, procs: nil, **_)
19
+ # @param features [#to_a, []] list of features
20
+ def initialize(name:, procs: nil, features: [], **_)
16
21
  @name = name.to_s
17
22
  @procs = procs && procs.to_i
23
+ @features = features.to_a
18
24
  end
19
25
 
20
26
  # Convert object to hash
21
27
  # @return [Hash] object as hash
22
28
  def to_h
23
- { name: name, procs: procs }
29
+ instance_variables.map do |var|
30
+ name = var.to_s.gsub('@', '').to_sym
31
+ [name, send(name)]
32
+ end.to_h
24
33
  end
25
34
 
26
35
  # The comparison operator
@@ -20,9 +20,13 @@ class OodCore::Job::QueueInfo
20
20
  # The accounts that are not allowed to use this queue.
21
21
  attr_reader :deny_accounts
22
22
 
23
+ # An Hash of Trackable Resources and their values.
24
+ attr_reader :tres
25
+
23
26
  def initialize(**opts)
24
27
  @name = opts.fetch(:name, 'unknown')
25
28
  @qos = opts.fetch(:qos, [])
29
+ @tres = opts.fetch(:tres, {})
26
30
 
27
31
  allow_accounts = opts.fetch(:allow_accounts, nil)
28
32
  @allow_accounts = if allow_accounts.nil?
@@ -42,4 +46,8 @@ class OodCore::Job::QueueInfo
42
46
  [name, send(name)]
43
47
  end.to_h
44
48
  end
49
+
50
+ def gpu?
51
+ tres.keys.any? { |name| name.to_s.match?(%r{^gres/gpu($|:)}i) }
52
+ end
45
53
  end
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.25.0"
3
+ VERSION = "0.26.1"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.required_ruby_version = ">= 2.5.0"
24
24
 
25
25
  spec.add_runtime_dependency "ood_support", "~> 0.0.2"
26
- spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
26
+ spec.add_runtime_dependency "ffi", "~> 1.16.3"
27
27
  spec.add_runtime_dependency "rexml", "~> 3.2"
28
28
  spec.add_development_dependency "bundler", "~> 2.1"
29
- spec.add_development_dependency "rake", "~> 13.1.0"
29
+ spec.add_development_dependency "rake", "~> 13.2.0"
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "pry", "~> 0.10"
32
32
  spec.add_development_dependency "timecop", "~> 0.8"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.26.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2024-03-27 00:00:00.000000000 Z
13
+ date: 2024-07-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -32,20 +32,14 @@ dependencies:
32
32
  requirements:
33
33
  - - "~>"
34
34
  - !ruby/object:Gem::Version
35
- version: '1.9'
36
- - - ">="
37
- - !ruby/object:Gem::Version
38
- version: 1.9.6
35
+ version: 1.16.3
39
36
  type: :runtime
40
37
  prerelease: false
41
38
  version_requirements: !ruby/object:Gem::Requirement
42
39
  requirements:
43
40
  - - "~>"
44
41
  - !ruby/object:Gem::Version
45
- version: '1.9'
46
- - - ">="
47
- - !ruby/object:Gem::Version
48
- version: 1.9.6
42
+ version: 1.16.3
49
43
  - !ruby/object:Gem::Dependency
50
44
  name: rexml
51
45
  requirement: !ruby/object:Gem::Requirement
@@ -80,14 +74,14 @@ dependencies:
80
74
  requirements:
81
75
  - - "~>"
82
76
  - !ruby/object:Gem::Version
83
- version: 13.1.0
77
+ version: 13.2.0
84
78
  type: :development
85
79
  prerelease: false
86
80
  version_requirements: !ruby/object:Gem::Requirement
87
81
  requirements:
88
82
  - - "~>"
89
83
  - !ruby/object:Gem::Version
90
- version: 13.1.0
84
+ version: 13.2.0
91
85
  - !ruby/object:Gem::Dependency
92
86
  name: rspec
93
87
  requirement: !ruby/object:Gem::Requirement