ood_core 0.30.1 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ood_core/helpers/openstack.rb +107 -0
- data/lib/ood_core/job/adapters/coder/openstack_credentials.rb +2 -1
- data/lib/ood_core/job/adapters/coder.rb +2 -5
- data/lib/ood_core/job/adapters/pbspro.rb +56 -0
- data/lib/ood_core/job/adapters/slurm.rb +24 -15
- data/lib/ood_core/version.rb +1 -1
- data/lib/ood_core.rb +5 -0
- data/ood_core.gemspec +2 -2
- metadata +7 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d1ad113f64d7ee39779802e7386bf753a2323969302963c58456b51a05f132b5
|
|
4
|
+
data.tar.gz: 472bcdea0fbd7096171a8338507e0e9c6554d6a4ceea3fb12f46a4a59f48cf7e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6436bdfb152bd592627f6fb710d44fc08e10d240a398199081ffd0ba1e3fd3a6f4fbaf8fb56adb88656bb58557eb6630702baea2e07ab1e5072f9180e02da967
|
|
7
|
+
data.tar.gz: 12eb878a31d953d7fb4c8bb6a0e67febf42b5c04aa397aa9fb17329d9aae33601e2d7358c3799ede4be5ef8af84fc9cb8d44e022847a4d42f3162788e5db828d
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
require "fog/openstack"
|
|
2
|
+
require "json"
|
|
3
|
+
require "etc"
|
|
4
|
+
|
|
5
|
+
module OodCore
|
|
6
|
+
class OpenStackHelper
|
|
7
|
+
attr_reader :auth_url, :openstack_instance
|
|
8
|
+
|
|
9
|
+
def initialize(token_file:, openstack_instance:)
|
|
10
|
+
@token_file = token_file
|
|
11
|
+
@openstack_instance = openstack_instance
|
|
12
|
+
@auth_url = "https://identity.#{openstack_instance}/v3"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Load token data from the token file
|
|
16
|
+
# @return [Hash] Parsed token JSON or nil if file does not exist
|
|
17
|
+
def load_token_data
|
|
18
|
+
return nil unless File.exist?(@token_file)
|
|
19
|
+
JSON.parse(File.read(@token_file))
|
|
20
|
+
rescue Errno::ENOENT => e
|
|
21
|
+
puts "Error loading token: #{e}"
|
|
22
|
+
nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Get access token from loaded credentials
|
|
26
|
+
# @return [String] The token ID
|
|
27
|
+
def access_token
|
|
28
|
+
load_token_data&.[]("id")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get user ID from loaded credentials
|
|
32
|
+
# @return [String] The user ID
|
|
33
|
+
def user_id
|
|
34
|
+
load_token_data&.[]("user_id")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Fetch all projects for the authenticated user
|
|
38
|
+
# @return [Array<Hash>] Array of project hashes with id and name
|
|
39
|
+
def fetch_user_projects
|
|
40
|
+
connection_params = {
|
|
41
|
+
openstack_auth_url: auth_url,
|
|
42
|
+
openstack_management_url: auth_url,
|
|
43
|
+
openstack_auth_token: access_token,
|
|
44
|
+
}
|
|
45
|
+
identity = Fog::OpenStack::Identity.new(connection_params)
|
|
46
|
+
identity.list_user_projects(user_id).body["projects"]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Fetch all flavors across all projects for a user
|
|
50
|
+
# @return [Array<Array>] Sorted array of [display_string, flavor_name, project_id]
|
|
51
|
+
def fetch_all_flavors
|
|
52
|
+
flavors = []
|
|
53
|
+
|
|
54
|
+
fetch_user_projects.each do |project|
|
|
55
|
+
scoped_token = scope_token_to_project(access_token, project['id'])
|
|
56
|
+
|
|
57
|
+
compute_connection_params = {
|
|
58
|
+
openstack_auth_url: auth_url,
|
|
59
|
+
openstack_project_name: project['name'],
|
|
60
|
+
openstack_management_url: "https://compute.#{openstack_instance}/v2.1/#{project['id']}",
|
|
61
|
+
openstack_auth_token: scoped_token,
|
|
62
|
+
}
|
|
63
|
+
compute = Fog::OpenStack::Compute.new(compute_connection_params)
|
|
64
|
+
|
|
65
|
+
compute.flavors.each do |flavor|
|
|
66
|
+
flavors << [
|
|
67
|
+
"#{flavor.name} - #{flavor.vcpus}VCPUS, #{flavor.ram/1024}GB RAM, #{flavor.disk}GB disk",
|
|
68
|
+
flavor.name,
|
|
69
|
+
project['id']
|
|
70
|
+
]
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
flavors.sort
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Convenience method that returns both projects and flavors
|
|
78
|
+
# @return [Array] Array containing [projects, flavors]
|
|
79
|
+
def load_projects_and_flavors
|
|
80
|
+
[fetch_user_projects, fetch_all_flavors]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Scope token to a specific project
|
|
84
|
+
# @param access_token [String] The unscoped token ID
|
|
85
|
+
# @param project_id [String] The project ID to scope to
|
|
86
|
+
# @return [String] The scoped token ID
|
|
87
|
+
def scope_token_to_project(access_token, project_id)
|
|
88
|
+
auth = {
|
|
89
|
+
"auth": {
|
|
90
|
+
"identity": {
|
|
91
|
+
"methods": ["token"],
|
|
92
|
+
"token": { "id": access_token }
|
|
93
|
+
},
|
|
94
|
+
"scope": { "project": { "id": project_id } }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
connection_params = {
|
|
99
|
+
openstack_auth_url: auth_url,
|
|
100
|
+
openstack_management_url: auth_url,
|
|
101
|
+
openstack_auth_token: access_token,
|
|
102
|
+
}
|
|
103
|
+
identity = Fog::OpenStack::Identity.new(connection_params)
|
|
104
|
+
identity.tokens.authenticate(auth)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -97,6 +97,7 @@ class OpenStackCredentials < CredentialsInterface
|
|
|
97
97
|
credentials_to_destroy.destroy
|
|
98
98
|
rescue Excon::Error::Forbidden => e
|
|
99
99
|
puts "Error destroying application credentials with id #{os_app_credentials['id']} #{e}"
|
|
100
|
+
raise JobAdapterError, e.message
|
|
100
101
|
end
|
|
101
102
|
end
|
|
102
103
|
|
|
@@ -115,4 +116,4 @@ class OpenStackCredentials < CredentialsInterface
|
|
|
115
116
|
def find_os_application_credentials(connection, os_app_credentials)
|
|
116
117
|
connection.application_credentials.find_by_id(os_app_credentials['id'], os_app_credentials['user_id'])
|
|
117
118
|
end
|
|
118
|
-
end
|
|
119
|
+
end
|
|
@@ -79,10 +79,7 @@ module OodCore
|
|
|
79
79
|
# adapters can get by without populating the entire Info object
|
|
80
80
|
# @return [Array<Info>] information describing submitted jobs
|
|
81
81
|
def info_all(attrs: nil)
|
|
82
|
-
|
|
83
|
-
batch.method_missing(attrs: attrs)
|
|
84
|
-
rescue Batch::Error => e
|
|
85
|
-
raise JobAdapterError, e.message
|
|
82
|
+
raise NotImplementedError, 'subclass did not define #info_all'
|
|
86
83
|
end
|
|
87
84
|
|
|
88
85
|
# Whether the adapter supports job arrays
|
|
@@ -125,4 +122,4 @@ module OodCore
|
|
|
125
122
|
end
|
|
126
123
|
end
|
|
127
124
|
end
|
|
128
|
-
end
|
|
125
|
+
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "time"
|
|
2
|
+
require "json"
|
|
2
3
|
require "ood_core/refinements/hash_extensions"
|
|
3
4
|
require "ood_core/job/adapters/helper"
|
|
4
5
|
|
|
@@ -85,6 +86,50 @@ module OodCore
|
|
|
85
86
|
@bin_overrides = bin_overrides
|
|
86
87
|
end
|
|
87
88
|
|
|
89
|
+
# Get a ClusterInfo object containing information about the given cluster
|
|
90
|
+
# @return [ClusterInfo] object containing cluster details
|
|
91
|
+
def get_cluster_info
|
|
92
|
+
args = ["-a", "-F", "json"]
|
|
93
|
+
stdout = call("pbsnodes", *args)
|
|
94
|
+
node_info = JSON.parse(stdout)
|
|
95
|
+
|
|
96
|
+
# Initialize cluster info values
|
|
97
|
+
total_nodes = 0
|
|
98
|
+
allocated_nodes = 0
|
|
99
|
+
total_cpus = 0
|
|
100
|
+
allocated_cpus = 0
|
|
101
|
+
total_gpus = 0
|
|
102
|
+
allocated_gpus = 0
|
|
103
|
+
|
|
104
|
+
nodes = node_info.fetch('nodes', {})
|
|
105
|
+
|
|
106
|
+
nodes.each do |_node_name, node|
|
|
107
|
+
total_nodes += 1
|
|
108
|
+
resources_avail = node.fetch('resources_available', {})
|
|
109
|
+
total_cpus += get_node_resource(resources_avail, 'ncpus')
|
|
110
|
+
total_gpus += get_node_resource(resources_avail, 'ngpus')
|
|
111
|
+
|
|
112
|
+
# Resources assigned (currently allocated to jobs)
|
|
113
|
+
resources_assigned = node.fetch('resources_assigned', {})
|
|
114
|
+
ncpus_assigned = get_node_resource(resources_assigned, 'ncpus')
|
|
115
|
+
ngpus_assigned = get_node_resource(resources_assigned, 'ngpus')
|
|
116
|
+
|
|
117
|
+
allocated_cpus += ncpus_assigned
|
|
118
|
+
allocated_gpus += ngpus_assigned
|
|
119
|
+
|
|
120
|
+
# A node is allocated if at least one CPU has been assigned to a job
|
|
121
|
+
allocated_nodes += 1 if ncpus_assigned > 0
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
ClusterInfo.new(active_nodes: allocated_nodes,
|
|
125
|
+
total_nodes: total_nodes,
|
|
126
|
+
active_processors: allocated_cpus,
|
|
127
|
+
total_processors: total_cpus,
|
|
128
|
+
active_gpus: allocated_gpus,
|
|
129
|
+
total_gpus: total_gpus
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
|
|
88
133
|
# Get a list of hashes detailing each of the jobs on the batch server
|
|
89
134
|
# @example Status info for all jobs
|
|
90
135
|
# my_batch.get_jobs
|
|
@@ -174,6 +219,13 @@ module OodCore
|
|
|
174
219
|
end
|
|
175
220
|
|
|
176
221
|
private
|
|
222
|
+
# Get a resource value from a node's resources hash, returning 0 if the
|
|
223
|
+
# resource is not present
|
|
224
|
+
def get_node_resource(resources, key)
|
|
225
|
+
val = resources.fetch(key, 0)
|
|
226
|
+
val.to_i
|
|
227
|
+
end
|
|
228
|
+
|
|
177
229
|
# Call a forked PBS Pro command for a given batch server
|
|
178
230
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
|
179
231
|
cmd = cmd.to_s
|
|
@@ -299,6 +351,10 @@ module OodCore
|
|
|
299
351
|
raise JobAdapterError, e.message
|
|
300
352
|
end
|
|
301
353
|
|
|
354
|
+
def cluster_info
|
|
355
|
+
@pbspro.get_cluster_info
|
|
356
|
+
end
|
|
357
|
+
|
|
302
358
|
# Retrieve info for all jobs from the resource manager
|
|
303
359
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
|
304
360
|
# @return [Array<Info>] information describing submitted jobs
|
|
@@ -47,7 +47,7 @@ module OodCore
|
|
|
47
47
|
# calculated from gres string
|
|
48
48
|
# @return [Integer] the number of gpus in gres
|
|
49
49
|
def self.gpus_from_gres(gres)
|
|
50
|
-
gres.to_s.scan(/gpu[
|
|
50
|
+
gres.to_s.scan(/gpu[s:]*[\w()-]*[=:]?(\d+)(?:[(,]|$)/).flatten.map(&:to_i).sum
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
# Object used for simplified communication with a Slurm batch server
|
|
@@ -121,19 +121,28 @@ module OodCore
|
|
|
121
121
|
# Get a ClusterInfo object containing information about the given cluster
|
|
122
122
|
# @return [ClusterInfo] object containing cluster details
|
|
123
123
|
def get_cluster_info
|
|
124
|
-
node_cpu_info = call("sinfo", "-aho %
|
|
124
|
+
node_cpu_info = call("sinfo", "-aho %F/%C").strip.split('/').map(&:to_i)
|
|
125
125
|
gres_length = call("sinfo", "-o %G").lines.map(&:strip).map(&:length).max + 2
|
|
126
|
-
gres_lines = call("sinfo", "-ahNO
|
|
127
|
-
.lines.uniq.map(&:split)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
126
|
+
gres_lines = call("sinfo", "-ahNO", "nodehost,gres:#{gres_length},gresused:#{gres_length},statelong")
|
|
127
|
+
.lines.uniq.reject { |line| line.match?(/maint|drain|down/i) }.map(&:split)
|
|
128
|
+
|
|
129
|
+
node_info = sinfo_headers.zip(node_cpu_info).to_h
|
|
130
|
+
ClusterInfo.new(active_nodes: node_info['nodes_allocated'],
|
|
131
|
+
total_nodes: node_info['nodes_total'],
|
|
132
|
+
active_processors: node_info['cpus_allocated'],
|
|
133
|
+
total_processors: node_info['cpus_total'],
|
|
132
134
|
active_gpus: gres_lines.sum { |line| Slurm.gpus_from_gres(line[2]) },
|
|
133
135
|
total_gpus: gres_lines.sum { |line| Slurm.gpus_from_gres(line[1]) }
|
|
134
136
|
)
|
|
135
137
|
end
|
|
136
138
|
|
|
139
|
+
def sinfo_headers
|
|
140
|
+
[
|
|
141
|
+
'nodes_allocated', 'nodes_idle', 'nodes_other', 'nodes_total',
|
|
142
|
+
'cpus_allocated', 'cpus_idle', 'cpus_other', 'cpus_total'
|
|
143
|
+
]
|
|
144
|
+
end
|
|
145
|
+
|
|
137
146
|
# Get a list of hashes detailing each of the jobs on the batch server
|
|
138
147
|
# @example Status info for all jobs
|
|
139
148
|
# my_batch.get_jobs
|
|
@@ -568,7 +577,7 @@ module OodCore
|
|
|
568
577
|
'OOM' => :completed, # OUT_OF_MEMORY
|
|
569
578
|
|
|
570
579
|
'BOOT_FAIL' => :completed,
|
|
571
|
-
'
|
|
580
|
+
'CANCELLED' => :completed,
|
|
572
581
|
'COMPLETED' => :completed,
|
|
573
582
|
'DEADLINE' => :completed,
|
|
574
583
|
'FAILED' => :completed,
|
|
@@ -870,11 +879,11 @@ module OodCore
|
|
|
870
879
|
"%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
|
|
871
880
|
end
|
|
872
881
|
|
|
873
|
-
#
|
|
882
|
+
# safely parse date time string, return nil when there are errors.
|
|
874
883
|
def parse_time(date_time)
|
|
875
|
-
return nil if date_time.empty? || %w[N/A NONE UNKNOWN].include?(date_time.to_s.upcase)
|
|
876
|
-
|
|
877
884
|
Time.parse(date_time)
|
|
885
|
+
rescue ArgumentError
|
|
886
|
+
nil
|
|
878
887
|
end
|
|
879
888
|
|
|
880
889
|
# Convert host list string to individual nodes
|
|
@@ -901,7 +910,7 @@ module OodCore
|
|
|
901
910
|
|
|
902
911
|
# Determine state from Slurm state code
|
|
903
912
|
def get_state(st)
|
|
904
|
-
STATE_MAP.fetch(st, :undetermined)
|
|
913
|
+
STATE_MAP.fetch(st.split.first, :undetermined)
|
|
905
914
|
end
|
|
906
915
|
|
|
907
916
|
# Parse hash describing Slurm job status
|
|
@@ -928,8 +937,8 @@ module OodCore
|
|
|
928
937
|
wallclock_time: duration_in_seconds(v[:time_used]),
|
|
929
938
|
wallclock_limit: duration_in_seconds(v[:time_limit]),
|
|
930
939
|
cpu_time: nil,
|
|
931
|
-
submission_time:
|
|
932
|
-
dispatch_time: (v[:start_time]
|
|
940
|
+
submission_time: parse_time(v[:submit_time]),
|
|
941
|
+
dispatch_time: parse_time(v[:start_time]),
|
|
933
942
|
native: v,
|
|
934
943
|
gpus: self.class.gpus_from_gres(v[:gres])
|
|
935
944
|
)
|
data/lib/ood_core/version.rb
CHANGED
data/lib/ood_core.rb
CHANGED
|
@@ -4,6 +4,7 @@ require "ood_core/cluster"
|
|
|
4
4
|
require "ood_core/clusters"
|
|
5
5
|
require "ood_core/invalid_cluster"
|
|
6
6
|
require "ood_core/data_formatter"
|
|
7
|
+
require "ood_core/helpers/openstack"
|
|
7
8
|
|
|
8
9
|
# The main namespace for ood_core
|
|
9
10
|
module OodCore
|
|
@@ -42,4 +43,8 @@ module OodCore
|
|
|
42
43
|
require "ood_core/batch_connect/template"
|
|
43
44
|
require "ood_core/batch_connect/factory"
|
|
44
45
|
end
|
|
46
|
+
|
|
47
|
+
module Helpers
|
|
48
|
+
require "ood_core/helpers/openstack"
|
|
49
|
+
end
|
|
45
50
|
end
|
data/ood_core.gemspec
CHANGED
|
@@ -27,11 +27,11 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_runtime_dependency "fog-openstack", "~> 1.1.5"
|
|
28
28
|
spec.add_runtime_dependency "rexml", "~> 3.2"
|
|
29
29
|
spec.add_development_dependency "bundler", "~> 2.1"
|
|
30
|
-
spec.add_development_dependency "rake", "~> 13.
|
|
30
|
+
spec.add_development_dependency "rake", "~> 13.4.1"
|
|
31
31
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
32
32
|
spec.add_development_dependency "pry", "~> 0.10"
|
|
33
33
|
spec.add_development_dependency "timecop", "~> 0.8"
|
|
34
34
|
spec.add_development_dependency "climate_control", "~> 1.2.0"
|
|
35
35
|
spec.add_development_dependency "minitest", "~> 5"
|
|
36
|
-
spec.add_development_dependency "mocha", "~>
|
|
36
|
+
spec.add_development_dependency "mocha", "~> 3.0"
|
|
37
37
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ood_core
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.31.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Eric Franz
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: exe
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date:
|
|
13
|
+
date: 2026-04-16 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: ood_support
|
|
@@ -88,14 +88,14 @@ dependencies:
|
|
|
88
88
|
requirements:
|
|
89
89
|
- - "~>"
|
|
90
90
|
- !ruby/object:Gem::Version
|
|
91
|
-
version: 13.
|
|
91
|
+
version: 13.4.1
|
|
92
92
|
type: :development
|
|
93
93
|
prerelease: false
|
|
94
94
|
version_requirements: !ruby/object:Gem::Requirement
|
|
95
95
|
requirements:
|
|
96
96
|
- - "~>"
|
|
97
97
|
- !ruby/object:Gem::Version
|
|
98
|
-
version: 13.
|
|
98
|
+
version: 13.4.1
|
|
99
99
|
- !ruby/object:Gem::Dependency
|
|
100
100
|
name: rspec
|
|
101
101
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -172,14 +172,14 @@ dependencies:
|
|
|
172
172
|
requirements:
|
|
173
173
|
- - "~>"
|
|
174
174
|
- !ruby/object:Gem::Version
|
|
175
|
-
version: '
|
|
175
|
+
version: '3.0'
|
|
176
176
|
type: :development
|
|
177
177
|
prerelease: false
|
|
178
178
|
version_requirements: !ruby/object:Gem::Requirement
|
|
179
179
|
requirements:
|
|
180
180
|
- - "~>"
|
|
181
181
|
- !ruby/object:Gem::Version
|
|
182
|
-
version: '
|
|
182
|
+
version: '3.0'
|
|
183
183
|
description: Open OnDemand core library that provides support for an HPC Center to
|
|
184
184
|
globally define HPC services that web applications can then take advantage of.
|
|
185
185
|
email:
|
|
@@ -214,6 +214,7 @@ files:
|
|
|
214
214
|
- lib/ood_core/clusters.rb
|
|
215
215
|
- lib/ood_core/data_formatter.rb
|
|
216
216
|
- lib/ood_core/errors.rb
|
|
217
|
+
- lib/ood_core/helpers/openstack.rb
|
|
217
218
|
- lib/ood_core/invalid_cluster.rb
|
|
218
219
|
- lib/ood_core/job/account_info.rb
|
|
219
220
|
- lib/ood_core/job/adapter.rb
|