ood_core 0.27.1 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ood_core/batch_connect/template.rb +3 -2
- data/lib/ood_core/batch_connect/templates/vnc.rb +1 -1
- data/lib/ood_core/batch_connect/templates/vnc_container.rb +1 -1
- data/lib/ood_core/job/adapter.rb +12 -0
- data/lib/ood_core/job/adapters/coder/batch.rb +170 -0
- data/lib/ood_core/job/adapters/coder/coder_job_info.rb +8 -0
- data/lib/ood_core/job/adapters/coder.rb +120 -0
- data/lib/ood_core/job/adapters/htcondor.rb +549 -0
- data/lib/ood_core/job/adapters/psij/delete.py +18 -0
- data/lib/ood_core/job/adapters/psij/get_info.py +55 -0
- data/lib/ood_core/job/adapters/psij/hold.py +18 -0
- data/lib/ood_core/job/adapters/psij/release.py +18 -0
- data/lib/ood_core/job/adapters/psij/submit.py +28 -0
- data/lib/ood_core/job/adapters/psij.rb +410 -0
- data/lib/ood_core/job/adapters/slurm.rb +133 -3
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +1 -1
- metadata +14 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53043c13d393367627c85321c8c7ef9d69d7a6cbab687ea32a82ab2077484024
|
4
|
+
data.tar.gz: e6c1f60a01e714e5ac02090a9c01ce7b6e96bde96d1568b31c60ab630cecc3c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5ee3699f5737abf3158e00341fd7e236b6320893f9a3640f7e5b61c854796512e03277c699db100ebeef402f735a58e82596b50dce152a381b01f43cb93ce3b
|
7
|
+
data.tar.gz: 5ff0fc3ed64f3154394aeedc5caed25fd0779304963dbbecf0b6b00083185459abfa40a3787da43f0da21eb475c30bb726c26514e7395ece23b193081e4f2cb1
|
@@ -209,9 +209,10 @@ module OodCore
|
|
209
209
|
export -f wait_until_port_used
|
210
210
|
|
211
211
|
# Generate random alphanumeric password with $1 (default: #{password_size}) characters
|
212
|
-
create_passwd ()
|
212
|
+
create_passwd () (
|
213
|
+
set +o pipefail # ensure pipefail disabled, `head` closing stdin causes SIGPIPE
|
213
214
|
tr -cd 'a-zA-Z0-9' < /dev/urandom 2> /dev/null | head -c${1:-#{password_size}}
|
214
|
-
|
215
|
+
)
|
215
216
|
export -f create_passwd
|
216
217
|
}
|
217
218
|
export -f source_helpers
|
@@ -185,7 +185,7 @@ module OodCore
|
|
185
185
|
# connection
|
186
186
|
echo "Scanning VNC log file for user authentications..."
|
187
187
|
while read -r line; do
|
188
|
-
if [[ ${line} =~ "Full-control authentication enabled
|
188
|
+
if [[ ${line} =~ "Full-control authentication enabled" ]]; then
|
189
189
|
change_passwd
|
190
190
|
create_yml
|
191
191
|
fi
|
@@ -183,7 +183,7 @@ module OodCore
|
|
183
183
|
# connection
|
184
184
|
echo "Scanning VNC log file for user authentications..."
|
185
185
|
while read -r line; do
|
186
|
-
if [[ ${line} =~ "Full-control authentication enabled
|
186
|
+
if [[ ${line} =~ "Full-control authentication enabled" ]]; then
|
187
187
|
change_passwd
|
188
188
|
create_yml
|
189
189
|
fi
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -57,6 +57,18 @@ module OodCore
|
|
57
57
|
raise NotImplementedError, "subclass did not define #info_all"
|
58
58
|
end
|
59
59
|
|
60
|
+
# Retrieve historic info for all completed jobs from the resource manager.
|
61
|
+
# This depends on the data retention configuration of the resource manager.
|
62
|
+
# @abstract Subclass is expected to implement {#info_historic}
|
63
|
+
# @raise [NotImplementedError] if subclass did not define {#info_historic}
|
64
|
+
#
|
65
|
+
# @param opts [#to_h] options to filter jobs in the resource manager.
|
66
|
+
#
|
67
|
+
# @return [Array<Info>] information describing the jobs
|
68
|
+
def info_historic(opts: {})
|
69
|
+
raise NotImplementedError, "subclass did not define #info_historic"
|
70
|
+
end
|
71
|
+
|
60
72
|
# Retrieve info for all jobs for a given owner or owners from the
|
61
73
|
# resource manager
|
62
74
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
@@ -0,0 +1,170 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
# Utility class for the Coder adapter to interact with the Coders API.
|
5
|
+
class OodCore::Job::Adapters::Coder::Batch
|
6
|
+
require_relative "coder_job_info"
|
7
|
+
class Error < StandardError; end
|
8
|
+
def initialize(config)
|
9
|
+
@host = config[:host]
|
10
|
+
@token = config[:token]
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_os_app_credentials(username, project_id)
|
14
|
+
credentials_file = File.read("/home/#{username}/application_credentials.json")
|
15
|
+
credentials = JSON.parse(credentials_file)
|
16
|
+
credentials.find { |cred| cred["project_id"] == project_id }
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_rich_parameters(coder_parameters, project_id, os_app_credentials)
|
20
|
+
rich_parameter_values = [
|
21
|
+
{ name: "application_credential_name", value: os_app_credentials["name"] },
|
22
|
+
{ name: "application_credential_id", value: os_app_credentials["id"] },
|
23
|
+
{ name: "application_credential_secret", value: os_app_credentials["secret"] },
|
24
|
+
{name: "project_id", value: project_id }
|
25
|
+
]
|
26
|
+
if coder_parameters
|
27
|
+
coder_parameters.each do |key, value|
|
28
|
+
rich_parameter_values << { name: key, value: value.to_s}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rich_parameter_values
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_headers(coder_token)
|
35
|
+
{
|
36
|
+
'Content-Type' => 'application/json',
|
37
|
+
'Accept' => 'application/json',
|
38
|
+
'Coder-Session-Token' => coder_token
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def submit(script)
|
43
|
+
org_id = script.native[:org_id]
|
44
|
+
project_id = script.native[:project_id]
|
45
|
+
coder_parameters = script.native[:coder_parameters]
|
46
|
+
endpoint = "https://#{@host}/api/v2/organizations/#{org_id}/members/#{username}/workspaces"
|
47
|
+
os_app_credentials = get_os_app_credentials(username, project_id)
|
48
|
+
headers = get_headers(@token)
|
49
|
+
body = {
|
50
|
+
template_id: script.native[:template_id],
|
51
|
+
template_version_name: script.native[:template_version_name],
|
52
|
+
name: "#{username}-#{script.native[:workspace_name]}-#{rand(2_821_109_907_456).to_s(36)}",
|
53
|
+
rich_parameter_values: get_rich_parameters(coder_parameters, project_id, os_app_credentials),
|
54
|
+
}
|
55
|
+
|
56
|
+
resp = api_call('post', endpoint, headers, body)
|
57
|
+
resp["id"]
|
58
|
+
end
|
59
|
+
|
60
|
+
def delete(id)
|
61
|
+
endpoint = "https://#{@host}/api/v2/workspaces/#{id}/builds"
|
62
|
+
headers = get_headers(@token)
|
63
|
+
body = {
|
64
|
+
'orphan' => false,
|
65
|
+
'transition' => 'delete'
|
66
|
+
}
|
67
|
+
res = api_call('post', endpoint, headers, body)
|
68
|
+
end
|
69
|
+
|
70
|
+
def info(id)
|
71
|
+
endpoint = "https://#{@host}/api/v2/workspaces/#{id}?include_deleted=true"
|
72
|
+
headers = get_headers(@token)
|
73
|
+
workspace_info_from_json(api_call('get', endpoint, headers))
|
74
|
+
end
|
75
|
+
|
76
|
+
def coder_state_to_ood_status(coder_state)
|
77
|
+
case coder_state
|
78
|
+
when "starting"
|
79
|
+
"queued"
|
80
|
+
when "failed"
|
81
|
+
"suspended"
|
82
|
+
when "running"
|
83
|
+
"running"
|
84
|
+
when "deleted"
|
85
|
+
"completed"
|
86
|
+
when "stopped"
|
87
|
+
"completed"
|
88
|
+
else
|
89
|
+
"undetermined"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def build_coder_job_info(json_data, status)
|
94
|
+
coder_output_metadata = json_data["latest_build"]["resources"]
|
95
|
+
&.find { |resource| resource["name"] == "coder_output" }
|
96
|
+
&.dig("metadata")
|
97
|
+
coder_output_hash = coder_output_metadata&.map { |meta| [meta["key"].to_sym, meta["value"]] }&.to_h || {}
|
98
|
+
OodCore::Job::Adapters::Coder::CoderJobInfo.new(**{
|
99
|
+
id: json_data["id"],
|
100
|
+
job_name: json_data["workspace_name"],
|
101
|
+
status: OodCore::Job::Status.new(state: status),
|
102
|
+
job_owner: json_data["workspace_owner_name"],
|
103
|
+
submission_time: json_data["created_at"],
|
104
|
+
dispatch_time: json_data.dig("updated_at"),
|
105
|
+
wallclock_time: wallclock_time(json_data, status),
|
106
|
+
ood_connection_info: { host: coder_output_hash[:floating_ip], port: 80 },
|
107
|
+
native: coder_output_hash
|
108
|
+
})
|
109
|
+
end
|
110
|
+
|
111
|
+
def wallclock_time(json_data, status)
|
112
|
+
start_time = start_time(json_data)
|
113
|
+
end_time = end_time(json_data, status)
|
114
|
+
end_time - start_time
|
115
|
+
end
|
116
|
+
|
117
|
+
def start_time(json_data)
|
118
|
+
start_time_string = json_data.dig("updated_at")
|
119
|
+
DateTime.parse(start_time_string).to_time.to_i
|
120
|
+
end
|
121
|
+
|
122
|
+
def end_time(json_data, status)
|
123
|
+
if status == 'deleted'
|
124
|
+
end_time_string = json_data["latest_build"].dig("updated_at")
|
125
|
+
et = DateTime.parse(end_time_string).to_time.to_i
|
126
|
+
else
|
127
|
+
et = DateTime.now.to_time.to_i
|
128
|
+
end
|
129
|
+
et
|
130
|
+
end
|
131
|
+
|
132
|
+
def workspace_info_from_json(json_data)
|
133
|
+
state = json_data.dig("latest_build", "status") || json_data.dig("latest_build", "job", "status")
|
134
|
+
status = coder_state_to_ood_status(state)
|
135
|
+
build_coder_job_info(json_data, status)
|
136
|
+
end
|
137
|
+
|
138
|
+
def api_call(method, endpoint, headers, body = nil)
|
139
|
+
uri = URI(endpoint)
|
140
|
+
|
141
|
+
case method.downcase
|
142
|
+
when 'get'
|
143
|
+
request = Net::HTTP::Get.new(uri, headers)
|
144
|
+
when 'post'
|
145
|
+
request = Net::HTTP::Post.new(uri, headers)
|
146
|
+
when 'delete'
|
147
|
+
request = Net::HTTP::Delete.new(uri, headers)
|
148
|
+
else
|
149
|
+
raise ArgumentError, "Invalid HTTP method: #{method}"
|
150
|
+
end
|
151
|
+
|
152
|
+
request.body = body.to_json if body
|
153
|
+
|
154
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
155
|
+
http.request(request)
|
156
|
+
end
|
157
|
+
|
158
|
+
case response
|
159
|
+
when Net::HTTPSuccess
|
160
|
+
JSON.parse(response.body)
|
161
|
+
else
|
162
|
+
raise Error, "HTTP Error: #{response.code} #{response.message} for request #{endpoint} and body #{body}"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def username
|
167
|
+
@username ||= Etc.getlogin
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
require 'net/http'
|
4
|
+
require 'json'
|
5
|
+
require 'etc'
|
6
|
+
|
7
|
+
module OodCore
|
8
|
+
module Job
|
9
|
+
class Factory
|
10
|
+
using Refinements::HashExtensions
|
11
|
+
|
12
|
+
def self.build_coder(config)
|
13
|
+
batch = Adapters::Coder::Batch.new(config.to_h.symbolize_keys)
|
14
|
+
Adapters::Coder.new(batch)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module Adapters
|
19
|
+
attr_reader :host, :token
|
20
|
+
|
21
|
+
# The adapter class for Kubernetes.
|
22
|
+
class Coder < Adapter
|
23
|
+
|
24
|
+
using Refinements::ArrayExtensions
|
25
|
+
using Refinements::HashExtensions
|
26
|
+
|
27
|
+
require "ood_core/job/adapters/coder/batch"
|
28
|
+
|
29
|
+
attr_reader :batch
|
30
|
+
def initialize(batch)
|
31
|
+
@batch = batch
|
32
|
+
end
|
33
|
+
|
34
|
+
# Submit a job with the attributes defined in the job template instance
|
35
|
+
# @example Submit job template to cluster
|
36
|
+
# solver_id = job_adapter.submit(solver_script)
|
37
|
+
# #=> "1234.server"
|
38
|
+
# @example Submit job that depends on previous job
|
39
|
+
# post_id = job_adapter.submit(
|
40
|
+
# post_script,
|
41
|
+
# afterok: solver_id
|
42
|
+
# )
|
43
|
+
# #=> "1235.server"
|
44
|
+
# @param script [Script] script object that describes the
|
45
|
+
# script and attributes for the submitted job
|
46
|
+
# @param after [#to_s, Array<#to_s>] this job may be scheduled for execution
|
47
|
+
# at any point after dependent jobs have started execution
|
48
|
+
# @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
|
49
|
+
# execution only after dependent jobs have terminated with no errors
|
50
|
+
# @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
|
51
|
+
# execution only after dependent jobs have terminated with errors
|
52
|
+
# @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
|
53
|
+
# execution after dependent jobs have terminated
|
54
|
+
# @return [String] the job id returned after successfully submitting a job
|
55
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
56
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
57
|
+
batch.submit(script)
|
58
|
+
rescue Batch::Error => e
|
59
|
+
raise JobAdapterError, e.message
|
60
|
+
end
|
61
|
+
|
62
|
+
# Retrieve info for all jobs from the resource manager
|
63
|
+
# @abstract Subclass is expected to implement {#info_all}
|
64
|
+
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
65
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
66
|
+
# This array specifies only attrs you want, in addition to id and status.
|
67
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
68
|
+
# to have a value for any attr besides the ones specified and id and status.
|
69
|
+
#
|
70
|
+
# For certain adapters this may speed up the response since
|
71
|
+
# adapters can get by without populating the entire Info object
|
72
|
+
# @return [Array<Info>] information describing submitted jobs
|
73
|
+
def info_all(attrs: nil)
|
74
|
+
# TODO - implement info all for namespaces?
|
75
|
+
batch.method_missing(attrs: attrs)
|
76
|
+
rescue Batch::Error => e
|
77
|
+
raise JobAdapterError, e.message
|
78
|
+
end
|
79
|
+
|
80
|
+
# Whether the adapter supports job arrays
|
81
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
82
|
+
# explicitly do not
|
83
|
+
def supports_job_arrays?
|
84
|
+
false
|
85
|
+
end
|
86
|
+
|
87
|
+
# Retrieve job info from the resource manager
|
88
|
+
# @abstract Subclass is expected to implement {#info}
|
89
|
+
# @raise [NotImplementedError] if subclass did not define {#info}
|
90
|
+
# @param id [#to_s] the id of the job
|
91
|
+
# @return [Info] information describing submitted job
|
92
|
+
def info(id)
|
93
|
+
batch.info(id.to_s)
|
94
|
+
rescue Batch::Error => e
|
95
|
+
raise JobAdapterError, e.message
|
96
|
+
end
|
97
|
+
|
98
|
+
# Retrieve job status from resource manager
|
99
|
+
# @note Optimized slightly over retrieving complete job information from server
|
100
|
+
# @abstract Subclass is expected to implement {#status}
|
101
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
102
|
+
# @param id [#to_s] the id of the job
|
103
|
+
# @return [Status] status of job
|
104
|
+
def status(id)
|
105
|
+
info(id)["job"]["status"]
|
106
|
+
end
|
107
|
+
|
108
|
+
# Delete the submitted job.
|
109
|
+
#
|
110
|
+
# @param id [#to_s] the id of the job
|
111
|
+
# @return [void]
|
112
|
+
def delete(id)
|
113
|
+
res = batch.delete(id)
|
114
|
+
rescue Batch::Error => e
|
115
|
+
raise JobAdapterError, e.message
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|