ood_core 0.30.2 → 0.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ood_core/helpers/openstack.rb +107 -0
- data/lib/ood_core/job/adapters/coder/batch.rb +82 -49
- data/lib/ood_core/job/adapters/coder/openstack_credentials.rb +28 -12
- data/lib/ood_core/job/adapters/coder.rb +3 -3
- data/lib/ood_core/job/adapters/pbspro.rb +56 -0
- data/lib/ood_core/job/adapters/slurm.rb +9 -9
- data/lib/ood_core/version.rb +1 -1
- data/lib/ood_core.rb +5 -0
- data/ood_core.gemspec +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a2dab30b28f29c33b19e8cce3b5464805c09ba374336ac68fa27da684826fca6
|
|
4
|
+
data.tar.gz: f94e04cb03fa816f4fa54f72e43acdfac2e5a3ee301bd0de59624ed4367669f7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 627ea1414e51fe18601977b50ad1b6472e58904a4ec6571620bc4a7dfa382c952e6aee65e04243352e4ed7ef842365a559134c2155d27c47a77fda819385afe0
|
|
7
|
+
data.tar.gz: 535c8466bf1e18192b850ab6c0c116c7b36173923c918c9bccc23d4e751acc9178c7f16a189efa518e103ec57be2b72e6d68d8ab2d11ec55a5874a525fac0ac5
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
require "fog/openstack"
|
|
2
|
+
require "json"
|
|
3
|
+
require "etc"
|
|
4
|
+
|
|
5
|
+
module OodCore
|
|
6
|
+
class OpenStackHelper
|
|
7
|
+
attr_reader :auth_url, :openstack_instance
|
|
8
|
+
|
|
9
|
+
def initialize(token_file:, openstack_instance:)
|
|
10
|
+
@token_file = token_file
|
|
11
|
+
@openstack_instance = openstack_instance
|
|
12
|
+
@auth_url = "https://identity.#{openstack_instance}/v3"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Load token data from the token file
|
|
16
|
+
# @return [Hash] Parsed token JSON or nil if file does not exist
|
|
17
|
+
def load_token_data
|
|
18
|
+
return nil unless File.exist?(@token_file)
|
|
19
|
+
JSON.parse(File.read(@token_file))
|
|
20
|
+
rescue Errno::ENOENT => e
|
|
21
|
+
puts "Error loading token: #{e}"
|
|
22
|
+
nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Get access token from loaded credentials
|
|
26
|
+
# @return [String] The token ID
|
|
27
|
+
def access_token
|
|
28
|
+
load_token_data&.[]("id")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get user ID from loaded credentials
|
|
32
|
+
# @return [String] The user ID
|
|
33
|
+
def user_id
|
|
34
|
+
load_token_data&.[]("user_id")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Fetch all projects for the authenticated user
|
|
38
|
+
# @return [Array<Hash>] Array of project hashes with id and name
|
|
39
|
+
def fetch_user_projects
|
|
40
|
+
connection_params = {
|
|
41
|
+
openstack_auth_url: auth_url,
|
|
42
|
+
openstack_management_url: auth_url,
|
|
43
|
+
openstack_auth_token: access_token,
|
|
44
|
+
}
|
|
45
|
+
identity = Fog::OpenStack::Identity.new(connection_params)
|
|
46
|
+
identity.list_user_projects(user_id).body["projects"]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Fetch all flavors across all projects for a user
|
|
50
|
+
# @return [Array<Array>] Sorted array of [display_string, flavor_name, project_id]
|
|
51
|
+
def fetch_all_flavors
|
|
52
|
+
flavors = []
|
|
53
|
+
|
|
54
|
+
fetch_user_projects.each do |project|
|
|
55
|
+
scoped_token = scope_token_to_project(access_token, project['id'])
|
|
56
|
+
|
|
57
|
+
compute_connection_params = {
|
|
58
|
+
openstack_auth_url: auth_url,
|
|
59
|
+
openstack_project_name: project['name'],
|
|
60
|
+
openstack_management_url: "https://compute.#{openstack_instance}/v2.1/#{project['id']}",
|
|
61
|
+
openstack_auth_token: scoped_token,
|
|
62
|
+
}
|
|
63
|
+
compute = Fog::OpenStack::Compute.new(compute_connection_params)
|
|
64
|
+
|
|
65
|
+
compute.flavors.each do |flavor|
|
|
66
|
+
flavors << [
|
|
67
|
+
"#{flavor.name} - #{flavor.vcpus}VCPUS, #{flavor.ram/1024}GB RAM, #{flavor.disk}GB disk",
|
|
68
|
+
flavor.name,
|
|
69
|
+
project['id']
|
|
70
|
+
]
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
flavors.sort
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Convenience method that returns both projects and flavors
|
|
78
|
+
# @return [Array] Array containing [projects, flavors]
|
|
79
|
+
def load_projects_and_flavors
|
|
80
|
+
[fetch_user_projects, fetch_all_flavors]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Scope token to a specific project
|
|
84
|
+
# @param access_token [String] The unscoped token ID
|
|
85
|
+
# @param project_id [String] The project ID to scope to
|
|
86
|
+
# @return [String] The scoped token ID
|
|
87
|
+
def scope_token_to_project(access_token, project_id)
|
|
88
|
+
auth = {
|
|
89
|
+
"auth": {
|
|
90
|
+
"identity": {
|
|
91
|
+
"methods": ["token"],
|
|
92
|
+
"token": { "id": access_token }
|
|
93
|
+
},
|
|
94
|
+
"scope": { "project": { "id": project_id } }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
connection_params = {
|
|
99
|
+
openstack_auth_url: auth_url,
|
|
100
|
+
openstack_management_url: auth_url,
|
|
101
|
+
openstack_auth_token: access_token,
|
|
102
|
+
}
|
|
103
|
+
identity = Fog::OpenStack::Identity.new(connection_params)
|
|
104
|
+
identity.tokens.authenticate(auth)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
|
2
2
|
require "json"
|
|
3
|
-
|
|
4
3
|
|
|
5
4
|
# Utility class for the Coder adapter to interact with the Coders API.
|
|
6
5
|
class OodCore::Job::Adapters::Coder::Batch
|
|
@@ -10,8 +9,8 @@ class OodCore::Job::Adapters::Coder::Batch
|
|
|
10
9
|
@host = config[:host]
|
|
11
10
|
@token = config[:token]
|
|
12
11
|
@service_user = config[:service_user]
|
|
13
|
-
@
|
|
14
|
-
@
|
|
12
|
+
@deletion_max_attempts = config[:deletion_max_attempts] || 5
|
|
13
|
+
@deletion_timeout_interval_seconds = config[:deletion_timeout_interval] || 10
|
|
15
14
|
@credentials = credentials
|
|
16
15
|
end
|
|
17
16
|
|
|
@@ -38,63 +37,115 @@ class OodCore::Job::Adapters::Coder::Batch
|
|
|
38
37
|
}
|
|
39
38
|
end
|
|
40
39
|
|
|
40
|
+
def generate_coder_workspace_name(submitted_name)
|
|
41
|
+
"#{username}-#{submitted_name}-#{rand(2_821_109_907_456).to_s(36)}"
|
|
42
|
+
end
|
|
41
43
|
def submit(script)
|
|
42
|
-
org_id = script.native[:org_id]
|
|
43
44
|
project_id = script.native[:project_id]
|
|
44
|
-
|
|
45
|
+
app_credentials = @credentials.generate_credentials(project_id)
|
|
46
|
+
workspace_name = generate_coder_workspace_name(script.native[:workspace_name])
|
|
47
|
+
|
|
48
|
+
create_coder_workspace(
|
|
49
|
+
script.native[:org_id],
|
|
50
|
+
project_id,
|
|
51
|
+
script.native[:template_version_id],
|
|
52
|
+
script.native[:coder_parameters],
|
|
53
|
+
app_credentials,
|
|
54
|
+
workspace_name)
|
|
55
|
+
|
|
56
|
+
@credentials.save_credentials(workspace_name, app_credentials)
|
|
57
|
+
workspace_name
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def create_coder_workspace(org_id, project_id, template_version_id, coder_parameters, app_credentials, name)
|
|
45
61
|
endpoint = "#{@host}/api/v2/organizations/#{org_id}/members/#{@service_user}/workspaces"
|
|
46
|
-
app_credentials = @credentials.generate_credentials(project_id, username)
|
|
47
62
|
headers = get_headers(@token)
|
|
48
|
-
workspace_name = "#{username}-#{script.native[:workspace_name]}-#{rand(2_821_109_907_456).to_s(36)}"
|
|
49
63
|
body = {
|
|
50
|
-
template_version_id:
|
|
51
|
-
name:
|
|
64
|
+
template_version_id: template_version_id,
|
|
65
|
+
name: name,
|
|
52
66
|
rich_parameter_values: get_rich_parameters(coder_parameters, project_id, app_credentials),
|
|
53
67
|
}
|
|
68
|
+
api_call('post', endpoint, headers, body)
|
|
69
|
+
end
|
|
54
70
|
|
|
55
|
-
resp = api_call('post', endpoint, headers, body)
|
|
56
|
-
@credentials.save_credentials(resp["id"], username, app_credentials)
|
|
57
|
-
resp["id"]
|
|
58
71
|
|
|
59
|
-
|
|
72
|
+
def delete_coder_workspace(id)
|
|
73
|
+
build_id = get_workspace_info(id)["id"]
|
|
60
74
|
|
|
61
|
-
|
|
62
|
-
endpoint = "#{@host}/api/v2/workspaces/#{id}/builds"
|
|
75
|
+
endpoint = "#{@host}/api/v2/workspaces/#{build_id}/builds"
|
|
63
76
|
headers = get_headers(@token)
|
|
64
77
|
body = {
|
|
65
78
|
'orphan' => false,
|
|
66
79
|
'transition' => 'delete'
|
|
67
80
|
}
|
|
68
81
|
api_call('post', endpoint, headers, body)
|
|
82
|
+
end
|
|
69
83
|
|
|
70
|
-
|
|
71
|
-
|
|
84
|
+
def delete(id)
|
|
85
|
+
delete_coder_workspace(id)
|
|
86
|
+
|
|
87
|
+
credentials = @credentials.load_credentials(id)
|
|
88
|
+
puts "credentials loaded #{credentials["id"]}"
|
|
72
89
|
wait_for_workspace_deletion(id) do |attempt|
|
|
73
|
-
puts "#{Time.now.inspect} Deleting workspace (attempt #{attempt
|
|
90
|
+
puts "#{Time.now.inspect} Deleting workspace (attempt #{attempt}/#{5})"
|
|
74
91
|
end
|
|
75
|
-
|
|
76
|
-
@credentials.destroy_credentials(credentials,
|
|
92
|
+
workspace_info = get_workspace_info(id)
|
|
93
|
+
@credentials.destroy_credentials(credentials, workspace_status(workspace_info), id)
|
|
77
94
|
end
|
|
78
95
|
|
|
79
96
|
def wait_for_workspace_deletion(id)
|
|
80
|
-
max_attempts = @
|
|
81
|
-
timeout_interval = @
|
|
97
|
+
max_attempts = @deletion_max_attempts
|
|
98
|
+
timeout_interval = @deletion_timeout_interval_seconds
|
|
82
99
|
|
|
83
100
|
max_attempts.times do |attempt|
|
|
84
|
-
|
|
101
|
+
workspace_info = get_workspace_info(id)
|
|
102
|
+
break unless workspace_info && workspace_status(workspace_info) == "deleting"
|
|
85
103
|
yield(attempt + 1)
|
|
86
104
|
sleep(timeout_interval)
|
|
87
105
|
end
|
|
88
106
|
end
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
107
|
+
def workspace_status(workspace_info)
|
|
108
|
+
workspace_info.dig("latest_build", "status")
|
|
109
|
+
end
|
|
110
|
+
def parse_error_logs(logs_array)
|
|
111
|
+
logs_array
|
|
112
|
+
.reject { |n| n["output"].to_s.empty?}
|
|
113
|
+
.map { |n| n["output"].scan(/"message":\s*"([^"]+)"/)[0] }
|
|
114
|
+
.reject {|n| n.nil?}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def get_workspace_info(id)
|
|
118
|
+
endpoint = "#{@host}/api/v2/users/#{@service_user}/workspace/#{id}?include_deleted=true"
|
|
92
119
|
headers = get_headers(@token)
|
|
93
120
|
api_call('get', endpoint, headers)
|
|
94
121
|
end
|
|
95
122
|
|
|
123
|
+
def read_coder_output(latest_build)
|
|
124
|
+
coder_output_metadata = latest_build.dig("resources")
|
|
125
|
+
&.find { |resource| resource["name"] == "coder_output" }
|
|
126
|
+
&.dig("metadata")
|
|
127
|
+
coder_output_metadata&.map { |meta| [meta["key"].to_sym, meta["value"]] }&.to_h || {}
|
|
128
|
+
end
|
|
129
|
+
|
|
96
130
|
def info(id)
|
|
97
|
-
|
|
131
|
+
workspace_info = get_workspace_info(id)
|
|
132
|
+
latest_build = workspace_info.dig("latest_build")
|
|
133
|
+
coder_status = workspace_status(workspace_info) || latest_build.dig("job", "status")
|
|
134
|
+
ood_status = coder_state_to_ood_status(coder_status)
|
|
135
|
+
coder_output_hash = read_coder_output(latest_build)
|
|
136
|
+
build_logs = get_build_logs(latest_build.dig("id"))
|
|
137
|
+
error_logs = parse_error_logs(build_logs)
|
|
138
|
+
OodCore::Job::Adapters::Coder::CoderJobInfo.new(**{
|
|
139
|
+
id: workspace_info["id"],
|
|
140
|
+
job_name: workspace_info["workspace_name"],
|
|
141
|
+
status: OodCore::Job::Status.new(state: ood_status),
|
|
142
|
+
job_owner: workspace_info["workspace_owner_name"],
|
|
143
|
+
submission_time: workspace_info["created_at"],
|
|
144
|
+
dispatch_time: workspace_info.dig("updated_at"),
|
|
145
|
+
wallclock_time: wallclock_time(workspace_info, ood_status),
|
|
146
|
+
ood_connection_info: { host: coder_output_hash[:floating_ip], port: 80, error_logs: error_logs},
|
|
147
|
+
native: coder_output_hash
|
|
148
|
+
})
|
|
98
149
|
end
|
|
99
150
|
|
|
100
151
|
def coder_state_to_ood_status(coder_state)
|
|
@@ -114,22 +165,10 @@ class OodCore::Job::Adapters::Coder::Batch
|
|
|
114
165
|
end
|
|
115
166
|
end
|
|
116
167
|
|
|
117
|
-
def
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
coder_output_hash = coder_output_metadata&.map { |meta| [meta["key"].to_sym, meta["value"]] }&.to_h || {}
|
|
122
|
-
OodCore::Job::Adapters::Coder::CoderJobInfo.new(**{
|
|
123
|
-
id: json_data["id"],
|
|
124
|
-
job_name: json_data["workspace_name"],
|
|
125
|
-
status: OodCore::Job::Status.new(state: status),
|
|
126
|
-
job_owner: json_data["workspace_owner_name"],
|
|
127
|
-
submission_time: json_data["created_at"],
|
|
128
|
-
dispatch_time: json_data.dig("updated_at"),
|
|
129
|
-
wallclock_time: wallclock_time(json_data, status),
|
|
130
|
-
ood_connection_info: { host: coder_output_hash[:floating_ip], port: 80 },
|
|
131
|
-
native: coder_output_hash
|
|
132
|
-
})
|
|
168
|
+
def get_build_logs(build_id)
|
|
169
|
+
endpoint = "#{@host}/api/v2/workspacebuilds/#{build_id}/logs"
|
|
170
|
+
headers = get_headers(@token)
|
|
171
|
+
api_call('get', endpoint, headers)
|
|
133
172
|
end
|
|
134
173
|
|
|
135
174
|
def wallclock_time(json_data, status)
|
|
@@ -153,12 +192,6 @@ class OodCore::Job::Adapters::Coder::Batch
|
|
|
153
192
|
et
|
|
154
193
|
end
|
|
155
194
|
|
|
156
|
-
def workspace_info_from_json(json_data)
|
|
157
|
-
state = json_data.dig("latest_build", "status") || json_data.dig("latest_build", "job", "status")
|
|
158
|
-
status = coder_state_to_ood_status(state)
|
|
159
|
-
build_coder_job_info(json_data, status)
|
|
160
|
-
end
|
|
161
|
-
|
|
162
195
|
def api_call(method, endpoint, headers, body = nil)
|
|
163
196
|
uri = URI(endpoint)
|
|
164
197
|
case method.downcase
|
|
@@ -1,22 +1,28 @@
|
|
|
1
1
|
require "fog/openstack"
|
|
2
2
|
require "json"
|
|
3
3
|
require "ood_core/job/adapters/coder/credentials"
|
|
4
|
+
require "tempfile"
|
|
5
|
+
require 'excon'
|
|
4
6
|
|
|
5
7
|
class OpenStackCredentials < CredentialsInterface
|
|
6
|
-
def initialize(auth_url)
|
|
8
|
+
def initialize(auth_url, dir)
|
|
7
9
|
@auth_url = auth_url
|
|
10
|
+
@dir = dir
|
|
8
11
|
end
|
|
9
12
|
|
|
10
|
-
def load_credentials(id
|
|
11
|
-
file_path
|
|
12
|
-
JSON.parse(File.read(file_path))
|
|
13
|
+
def load_credentials(id)
|
|
14
|
+
JSON.parse(File.read(file_path(id)))
|
|
13
15
|
rescue Errno::ENOENT => e
|
|
14
16
|
puts "Error loading credentials: #{e}"
|
|
15
17
|
nil
|
|
16
18
|
end
|
|
17
19
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
+
def file_path(id)
|
|
21
|
+
return "#{@dir}/#{username}-#{id}-os-credentials.json"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def generate_credentials(project_id)
|
|
25
|
+
token_json = JSON.parse(File.read("#{@dir}/#{username}-os-token.json"))
|
|
20
26
|
access_token = token_json["id"]
|
|
21
27
|
user_id = token_json["user_id"]
|
|
22
28
|
connection = Fog::OpenStack::Identity.new({
|
|
@@ -74,13 +80,17 @@ class OpenStackCredentials < CredentialsInterface
|
|
|
74
80
|
|
|
75
81
|
end
|
|
76
82
|
|
|
77
|
-
def save_credentials(id,
|
|
78
|
-
|
|
79
|
-
|
|
83
|
+
def save_credentials(id, app_credentials)
|
|
84
|
+
Tempfile.open(["temp", ".json"], "/tmp") do |temp_file|
|
|
85
|
+
temp_file.write(JSON.generate(app_credentials))
|
|
86
|
+
temp_file.chmod(0600)
|
|
87
|
+
temp_file.close
|
|
88
|
+
FileUtils.mv(temp_file.path, file_path(id))
|
|
89
|
+
end
|
|
80
90
|
end
|
|
81
91
|
|
|
82
92
|
|
|
83
|
-
def destroy_credentials(os_app_credentials, deletion_status, id
|
|
93
|
+
def destroy_credentials(os_app_credentials, deletion_status, id)
|
|
84
94
|
return if os_app_credentials.nil?
|
|
85
95
|
|
|
86
96
|
|
|
@@ -88,17 +98,19 @@ class OpenStackCredentials < CredentialsInterface
|
|
|
88
98
|
credentials_to_destroy = find_os_application_credentials(connection, os_app_credentials)
|
|
89
99
|
|
|
90
100
|
if deletion_status != "deleted"
|
|
91
|
-
File.delete(
|
|
101
|
+
File.delete(file_path(id))
|
|
92
102
|
puts "Workspace deletion timed out, credentials with id #{os_app_credentials['id']} of user #{os_app_credentials['user_id']} were not destroyed"
|
|
93
103
|
return
|
|
94
104
|
end
|
|
95
105
|
|
|
96
106
|
begin
|
|
107
|
+
puts "Destroying application credentials with id #{os_app_credentials['id']} and session #{id}}"
|
|
97
108
|
credentials_to_destroy.destroy
|
|
98
|
-
rescue Excon::Error
|
|
109
|
+
rescue Excon::Error => e
|
|
99
110
|
puts "Error destroying application credentials with id #{os_app_credentials['id']} #{e}"
|
|
100
111
|
raise JobAdapterError, e.message
|
|
101
112
|
end
|
|
113
|
+
File.delete(file_path(id))
|
|
102
114
|
end
|
|
103
115
|
|
|
104
116
|
|
|
@@ -116,4 +128,8 @@ class OpenStackCredentials < CredentialsInterface
|
|
|
116
128
|
def find_os_application_credentials(connection, os_app_credentials)
|
|
117
129
|
connection.application_credentials.find_by_id(os_app_credentials['id'], os_app_credentials['user_id'])
|
|
118
130
|
end
|
|
131
|
+
def username
|
|
132
|
+
@username ||= Etc.getlogin
|
|
133
|
+
end
|
|
119
134
|
end
|
|
135
|
+
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
|
2
2
|
require "ood_core/refinements/array_extensions"
|
|
3
3
|
require 'net/http'
|
|
4
|
-
require 'json'
|
|
5
4
|
require 'etc'
|
|
6
5
|
|
|
6
|
+
|
|
7
7
|
module OodCore
|
|
8
8
|
module Job
|
|
9
9
|
class Factory
|
|
@@ -14,7 +14,7 @@ module OodCore
|
|
|
14
14
|
def self.build_coder(config)
|
|
15
15
|
config = config.to_h.symbolize_keys
|
|
16
16
|
if config[:auth]["cloud"] == "openstack"
|
|
17
|
-
credentials = OpenStackCredentials.new(config[:auth]["url"])
|
|
17
|
+
credentials = OpenStackCredentials.new(config[:auth]["url"], config[:auth]["credentials_dir"])
|
|
18
18
|
else
|
|
19
19
|
raise ArgumentError, "Unsupported credentials for cloud type: #{config[:auth]['cloud']}"
|
|
20
20
|
end
|
|
@@ -79,7 +79,7 @@ module OodCore
|
|
|
79
79
|
# adapters can get by without populating the entire Info object
|
|
80
80
|
# @return [Array<Info>] information describing submitted jobs
|
|
81
81
|
def info_all(attrs: nil)
|
|
82
|
-
|
|
82
|
+
[]
|
|
83
83
|
end
|
|
84
84
|
|
|
85
85
|
# Whether the adapter supports job arrays
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "time"
|
|
2
|
+
require "json"
|
|
2
3
|
require "ood_core/refinements/hash_extensions"
|
|
3
4
|
require "ood_core/job/adapters/helper"
|
|
4
5
|
|
|
@@ -85,6 +86,50 @@ module OodCore
|
|
|
85
86
|
@bin_overrides = bin_overrides
|
|
86
87
|
end
|
|
87
88
|
|
|
89
|
+
# Get a ClusterInfo object containing information about the given cluster
|
|
90
|
+
# @return [ClusterInfo] object containing cluster details
|
|
91
|
+
def get_cluster_info
|
|
92
|
+
args = ["-a", "-F", "json"]
|
|
93
|
+
stdout = call("pbsnodes", *args)
|
|
94
|
+
node_info = JSON.parse(stdout)
|
|
95
|
+
|
|
96
|
+
# Initialize cluster info values
|
|
97
|
+
total_nodes = 0
|
|
98
|
+
allocated_nodes = 0
|
|
99
|
+
total_cpus = 0
|
|
100
|
+
allocated_cpus = 0
|
|
101
|
+
total_gpus = 0
|
|
102
|
+
allocated_gpus = 0
|
|
103
|
+
|
|
104
|
+
nodes = node_info.fetch('nodes', {})
|
|
105
|
+
|
|
106
|
+
nodes.each do |_node_name, node|
|
|
107
|
+
total_nodes += 1
|
|
108
|
+
resources_avail = node.fetch('resources_available', {})
|
|
109
|
+
total_cpus += get_node_resource(resources_avail, 'ncpus')
|
|
110
|
+
total_gpus += get_node_resource(resources_avail, 'ngpus')
|
|
111
|
+
|
|
112
|
+
# Resources assigned (currently allocated to jobs)
|
|
113
|
+
resources_assigned = node.fetch('resources_assigned', {})
|
|
114
|
+
ncpus_assigned = get_node_resource(resources_assigned, 'ncpus')
|
|
115
|
+
ngpus_assigned = get_node_resource(resources_assigned, 'ngpus')
|
|
116
|
+
|
|
117
|
+
allocated_cpus += ncpus_assigned
|
|
118
|
+
allocated_gpus += ngpus_assigned
|
|
119
|
+
|
|
120
|
+
# A node is allocated if at least one CPU has been assigned to a job
|
|
121
|
+
allocated_nodes += 1 if ncpus_assigned > 0
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
ClusterInfo.new(active_nodes: allocated_nodes,
|
|
125
|
+
total_nodes: total_nodes,
|
|
126
|
+
active_processors: allocated_cpus,
|
|
127
|
+
total_processors: total_cpus,
|
|
128
|
+
active_gpus: allocated_gpus,
|
|
129
|
+
total_gpus: total_gpus
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
|
|
88
133
|
# Get a list of hashes detailing each of the jobs on the batch server
|
|
89
134
|
# @example Status info for all jobs
|
|
90
135
|
# my_batch.get_jobs
|
|
@@ -174,6 +219,13 @@ module OodCore
|
|
|
174
219
|
end
|
|
175
220
|
|
|
176
221
|
private
|
|
222
|
+
# Get a resource value from a node's resources hash, returning 0 if the
|
|
223
|
+
# resource is not present
|
|
224
|
+
def get_node_resource(resources, key)
|
|
225
|
+
val = resources.fetch(key, 0)
|
|
226
|
+
val.to_i
|
|
227
|
+
end
|
|
228
|
+
|
|
177
229
|
# Call a forked PBS Pro command for a given batch server
|
|
178
230
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
|
179
231
|
cmd = cmd.to_s
|
|
@@ -299,6 +351,10 @@ module OodCore
|
|
|
299
351
|
raise JobAdapterError, e.message
|
|
300
352
|
end
|
|
301
353
|
|
|
354
|
+
def cluster_info
|
|
355
|
+
@pbspro.get_cluster_info
|
|
356
|
+
end
|
|
357
|
+
|
|
302
358
|
# Retrieve info for all jobs from the resource manager
|
|
303
359
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
|
304
360
|
# @return [Array<Info>] information describing submitted jobs
|
|
@@ -47,7 +47,7 @@ module OodCore
|
|
|
47
47
|
# calculated from gres string
|
|
48
48
|
# @return [Integer] the number of gpus in gres
|
|
49
49
|
def self.gpus_from_gres(gres)
|
|
50
|
-
gres.to_s.scan(/gpu[
|
|
50
|
+
gres.to_s.scan(/gpu[s:]*[\w()-]*[=:]?(\d+)(?:[(,]|$)/).flatten.map(&:to_i).sum
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
# Object used for simplified communication with a Slurm batch server
|
|
@@ -577,7 +577,7 @@ module OodCore
|
|
|
577
577
|
'OOM' => :completed, # OUT_OF_MEMORY
|
|
578
578
|
|
|
579
579
|
'BOOT_FAIL' => :completed,
|
|
580
|
-
'
|
|
580
|
+
'CANCELLED' => :completed,
|
|
581
581
|
'COMPLETED' => :completed,
|
|
582
582
|
'DEADLINE' => :completed,
|
|
583
583
|
'FAILED' => :completed,
|
|
@@ -879,11 +879,11 @@ module OodCore
|
|
|
879
879
|
"%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
|
|
880
880
|
end
|
|
881
881
|
|
|
882
|
-
#
|
|
882
|
+
# safely parse date time string, return nil when there are errors.
|
|
883
883
|
def parse_time(date_time)
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
884
|
+
Time.parse(date_time.to_s)
|
|
885
|
+
rescue StandardError
|
|
886
|
+
nil
|
|
887
887
|
end
|
|
888
888
|
|
|
889
889
|
# Convert host list string to individual nodes
|
|
@@ -910,7 +910,7 @@ module OodCore
|
|
|
910
910
|
|
|
911
911
|
# Determine state from Slurm state code
|
|
912
912
|
def get_state(st)
|
|
913
|
-
STATE_MAP.fetch(st, :undetermined)
|
|
913
|
+
STATE_MAP.fetch(st.split.first, :undetermined)
|
|
914
914
|
end
|
|
915
915
|
|
|
916
916
|
# Parse hash describing Slurm job status
|
|
@@ -937,8 +937,8 @@ module OodCore
|
|
|
937
937
|
wallclock_time: duration_in_seconds(v[:time_used]),
|
|
938
938
|
wallclock_limit: duration_in_seconds(v[:time_limit]),
|
|
939
939
|
cpu_time: nil,
|
|
940
|
-
submission_time:
|
|
941
|
-
dispatch_time: (v[:start_time]
|
|
940
|
+
submission_time: parse_time(v[:submit_time]),
|
|
941
|
+
dispatch_time: parse_time(v[:start_time]),
|
|
942
942
|
native: v,
|
|
943
943
|
gpus: self.class.gpus_from_gres(v[:gres])
|
|
944
944
|
)
|
data/lib/ood_core/version.rb
CHANGED
data/lib/ood_core.rb
CHANGED
|
@@ -4,6 +4,7 @@ require "ood_core/cluster"
|
|
|
4
4
|
require "ood_core/clusters"
|
|
5
5
|
require "ood_core/invalid_cluster"
|
|
6
6
|
require "ood_core/data_formatter"
|
|
7
|
+
require "ood_core/helpers/openstack"
|
|
7
8
|
|
|
8
9
|
# The main namespace for ood_core
|
|
9
10
|
module OodCore
|
|
@@ -42,4 +43,8 @@ module OodCore
|
|
|
42
43
|
require "ood_core/batch_connect/template"
|
|
43
44
|
require "ood_core/batch_connect/factory"
|
|
44
45
|
end
|
|
46
|
+
|
|
47
|
+
module Helpers
|
|
48
|
+
require "ood_core/helpers/openstack"
|
|
49
|
+
end
|
|
45
50
|
end
|
data/ood_core.gemspec
CHANGED
|
@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_runtime_dependency "fog-openstack", "~> 1.1.5"
|
|
28
28
|
spec.add_runtime_dependency "rexml", "~> 3.2"
|
|
29
29
|
spec.add_development_dependency "bundler", "~> 2.1"
|
|
30
|
-
spec.add_development_dependency "rake", "~> 13.
|
|
30
|
+
spec.add_development_dependency "rake", "~> 13.4.1"
|
|
31
31
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
32
32
|
spec.add_development_dependency "pry", "~> 0.10"
|
|
33
33
|
spec.add_development_dependency "timecop", "~> 0.8"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ood_core
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.31.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Eric Franz
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: exe
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2026-
|
|
13
|
+
date: 2026-04-20 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: ood_support
|
|
@@ -88,14 +88,14 @@ dependencies:
|
|
|
88
88
|
requirements:
|
|
89
89
|
- - "~>"
|
|
90
90
|
- !ruby/object:Gem::Version
|
|
91
|
-
version: 13.
|
|
91
|
+
version: 13.4.1
|
|
92
92
|
type: :development
|
|
93
93
|
prerelease: false
|
|
94
94
|
version_requirements: !ruby/object:Gem::Requirement
|
|
95
95
|
requirements:
|
|
96
96
|
- - "~>"
|
|
97
97
|
- !ruby/object:Gem::Version
|
|
98
|
-
version: 13.
|
|
98
|
+
version: 13.4.1
|
|
99
99
|
- !ruby/object:Gem::Dependency
|
|
100
100
|
name: rspec
|
|
101
101
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -214,6 +214,7 @@ files:
|
|
|
214
214
|
- lib/ood_core/clusters.rb
|
|
215
215
|
- lib/ood_core/data_formatter.rb
|
|
216
216
|
- lib/ood_core/errors.rb
|
|
217
|
+
- lib/ood_core/helpers/openstack.rb
|
|
217
218
|
- lib/ood_core/invalid_cluster.rb
|
|
218
219
|
- lib/ood_core/job/account_info.rb
|
|
219
220
|
- lib/ood_core/job/adapter.rb
|