ood_core 0.27.1 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ood_core/batch_connect/template.rb +3 -2
- data/lib/ood_core/batch_connect/templates/vnc.rb +1 -1
- data/lib/ood_core/batch_connect/templates/vnc_container.rb +1 -1
- data/lib/ood_core/job/adapter.rb +12 -0
- data/lib/ood_core/job/adapters/coder/batch.rb +170 -0
- data/lib/ood_core/job/adapters/coder/coder_job_info.rb +8 -0
- data/lib/ood_core/job/adapters/coder.rb +120 -0
- data/lib/ood_core/job/adapters/slurm.rb +133 -3
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc94feaae3ffa1016b59b053179a18322a7e9c4ad1f937776864a2c2b19fc0a1
|
4
|
+
data.tar.gz: e41085ddcbbfff4a36723da314f41f1010e4db2fb28209ad781d5c2a6516cd46
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85daa1b26fe2b973ecdbb3b9d2f72b37b142795f637d2db0a73611de0b4081c052b732c56e71ea7d37830bcf3211edc6c01109a2d2514b1a2500b38ac9ac6b98
|
7
|
+
data.tar.gz: 3a2f47afde0ac909f4d3ea6a7ccf2fcf48b8e13de94ad4e550ee1b2492cf28c7dcc72df179755b332b358f49accb72e8c229a1b506de6614148db14233e6c902
|
@@ -209,9 +209,10 @@ module OodCore
|
|
209
209
|
export -f wait_until_port_used
|
210
210
|
|
211
211
|
# Generate random alphanumeric password with $1 (default: #{password_size}) characters
|
212
|
-
create_passwd ()
|
212
|
+
create_passwd () (
|
213
|
+
set +o pipefail # ensure pipefail disabled, `head` closing stdin causes SIGPIPE
|
213
214
|
tr -cd 'a-zA-Z0-9' < /dev/urandom 2> /dev/null | head -c${1:-#{password_size}}
|
214
|
-
|
215
|
+
)
|
215
216
|
export -f create_passwd
|
216
217
|
}
|
217
218
|
export -f source_helpers
|
@@ -185,7 +185,7 @@ module OodCore
|
|
185
185
|
# connection
|
186
186
|
echo "Scanning VNC log file for user authentications..."
|
187
187
|
while read -r line; do
|
188
|
-
if [[ ${line} =~ "Full-control authentication enabled
|
188
|
+
if [[ ${line} =~ "Full-control authentication enabled" ]]; then
|
189
189
|
change_passwd
|
190
190
|
create_yml
|
191
191
|
fi
|
@@ -183,7 +183,7 @@ module OodCore
|
|
183
183
|
# connection
|
184
184
|
echo "Scanning VNC log file for user authentications..."
|
185
185
|
while read -r line; do
|
186
|
-
if [[ ${line} =~ "Full-control authentication enabled
|
186
|
+
if [[ ${line} =~ "Full-control authentication enabled" ]]; then
|
187
187
|
change_passwd
|
188
188
|
create_yml
|
189
189
|
fi
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -57,6 +57,18 @@ module OodCore
|
|
57
57
|
raise NotImplementedError, "subclass did not define #info_all"
|
58
58
|
end
|
59
59
|
|
60
|
+
# Retrieve historic info for all completed jobs from the resource manager.
|
61
|
+
# This depends on the data retention configuration of the resource manager.
|
62
|
+
# @abstract Subclass is expected to implement {#info_historic}
|
63
|
+
# @raise [NotImplementedError] if subclass did not define {#info_historic}
|
64
|
+
#
|
65
|
+
# @param opts [#to_h] options to filter jobs in the resource manager.
|
66
|
+
#
|
67
|
+
# @return [Array<Info>] information describing the jobs
|
68
|
+
def info_historic(opts: {})
|
69
|
+
raise NotImplementedError, "subclass did not define #info_historic"
|
70
|
+
end
|
71
|
+
|
60
72
|
# Retrieve info for all jobs for a given owner or owners from the
|
61
73
|
# resource manager
|
62
74
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
@@ -0,0 +1,170 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
# Utility class for the Coder adapter to interact with the Coders API.
|
5
|
+
class OodCore::Job::Adapters::Coder::Batch
|
6
|
+
require_relative "coder_job_info"
|
7
|
+
class Error < StandardError; end
|
8
|
+
def initialize(config)
|
9
|
+
@host = config[:host]
|
10
|
+
@token = config[:token]
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_os_app_credentials(username, project_id)
|
14
|
+
credentials_file = File.read("/home/#{username}/application_credentials.json")
|
15
|
+
credentials = JSON.parse(credentials_file)
|
16
|
+
credentials.find { |cred| cred["project_id"] == project_id }
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_rich_parameters(coder_parameters, project_id, os_app_credentials)
|
20
|
+
rich_parameter_values = [
|
21
|
+
{ name: "application_credential_name", value: os_app_credentials["name"] },
|
22
|
+
{ name: "application_credential_id", value: os_app_credentials["id"] },
|
23
|
+
{ name: "application_credential_secret", value: os_app_credentials["secret"] },
|
24
|
+
{name: "project_id", value: project_id }
|
25
|
+
]
|
26
|
+
if coder_parameters
|
27
|
+
coder_parameters.each do |key, value|
|
28
|
+
rich_parameter_values << { name: key, value: value.to_s}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rich_parameter_values
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_headers(coder_token)
|
35
|
+
{
|
36
|
+
'Content-Type' => 'application/json',
|
37
|
+
'Accept' => 'application/json',
|
38
|
+
'Coder-Session-Token' => coder_token
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def submit(script)
|
43
|
+
org_id = script.native[:org_id]
|
44
|
+
project_id = script.native[:project_id]
|
45
|
+
coder_parameters = script.native[:coder_parameters]
|
46
|
+
endpoint = "https://#{@host}/api/v2/organizations/#{org_id}/members/#{username}/workspaces"
|
47
|
+
os_app_credentials = get_os_app_credentials(username, project_id)
|
48
|
+
headers = get_headers(@token)
|
49
|
+
body = {
|
50
|
+
template_id: script.native[:template_id],
|
51
|
+
template_version_name: script.native[:template_version_name],
|
52
|
+
name: "#{username}-#{script.native[:workspace_name]}-#{rand(2_821_109_907_456).to_s(36)}",
|
53
|
+
rich_parameter_values: get_rich_parameters(coder_parameters, project_id, os_app_credentials),
|
54
|
+
}
|
55
|
+
|
56
|
+
resp = api_call('post', endpoint, headers, body)
|
57
|
+
resp["id"]
|
58
|
+
end
|
59
|
+
|
60
|
+
def delete(id)
|
61
|
+
endpoint = "https://#{@host}/api/v2/workspaces/#{id}/builds"
|
62
|
+
headers = get_headers(@token)
|
63
|
+
body = {
|
64
|
+
'orphan' => false,
|
65
|
+
'transition' => 'delete'
|
66
|
+
}
|
67
|
+
res = api_call('post', endpoint, headers, body)
|
68
|
+
end
|
69
|
+
|
70
|
+
def info(id)
|
71
|
+
endpoint = "https://#{@host}/api/v2/workspaces/#{id}?include_deleted=true"
|
72
|
+
headers = get_headers(@token)
|
73
|
+
workspace_info_from_json(api_call('get', endpoint, headers))
|
74
|
+
end
|
75
|
+
|
76
|
+
def coder_state_to_ood_status(coder_state)
|
77
|
+
case coder_state
|
78
|
+
when "starting"
|
79
|
+
"queued"
|
80
|
+
when "failed"
|
81
|
+
"suspended"
|
82
|
+
when "running"
|
83
|
+
"running"
|
84
|
+
when "deleted"
|
85
|
+
"completed"
|
86
|
+
when "stopped"
|
87
|
+
"completed"
|
88
|
+
else
|
89
|
+
"undetermined"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def build_coder_job_info(json_data, status)
|
94
|
+
coder_output_metadata = json_data["latest_build"]["resources"]
|
95
|
+
&.find { |resource| resource["name"] == "coder_output" }
|
96
|
+
&.dig("metadata")
|
97
|
+
coder_output_hash = coder_output_metadata&.map { |meta| [meta["key"].to_sym, meta["value"]] }&.to_h || {}
|
98
|
+
OodCore::Job::Adapters::Coder::CoderJobInfo.new(**{
|
99
|
+
id: json_data["id"],
|
100
|
+
job_name: json_data["workspace_name"],
|
101
|
+
status: OodCore::Job::Status.new(state: status),
|
102
|
+
job_owner: json_data["workspace_owner_name"],
|
103
|
+
submission_time: json_data["created_at"],
|
104
|
+
dispatch_time: json_data.dig("updated_at"),
|
105
|
+
wallclock_time: wallclock_time(json_data, status),
|
106
|
+
ood_connection_info: { host: coder_output_hash[:floating_ip], port: 80 },
|
107
|
+
native: coder_output_hash
|
108
|
+
})
|
109
|
+
end
|
110
|
+
|
111
|
+
def wallclock_time(json_data, status)
|
112
|
+
start_time = start_time(json_data)
|
113
|
+
end_time = end_time(json_data, status)
|
114
|
+
end_time - start_time
|
115
|
+
end
|
116
|
+
|
117
|
+
def start_time(json_data)
|
118
|
+
start_time_string = json_data.dig("updated_at")
|
119
|
+
DateTime.parse(start_time_string).to_time.to_i
|
120
|
+
end
|
121
|
+
|
122
|
+
def end_time(json_data, status)
|
123
|
+
if status == 'deleted'
|
124
|
+
end_time_string = json_data["latest_build"].dig("updated_at")
|
125
|
+
et = DateTime.parse(end_time_string).to_time.to_i
|
126
|
+
else
|
127
|
+
et = DateTime.now.to_time.to_i
|
128
|
+
end
|
129
|
+
et
|
130
|
+
end
|
131
|
+
|
132
|
+
def workspace_info_from_json(json_data)
|
133
|
+
state = json_data.dig("latest_build", "status") || json_data.dig("latest_build", "job", "status")
|
134
|
+
status = coder_state_to_ood_status(state)
|
135
|
+
build_coder_job_info(json_data, status)
|
136
|
+
end
|
137
|
+
|
138
|
+
def api_call(method, endpoint, headers, body = nil)
|
139
|
+
uri = URI(endpoint)
|
140
|
+
|
141
|
+
case method.downcase
|
142
|
+
when 'get'
|
143
|
+
request = Net::HTTP::Get.new(uri, headers)
|
144
|
+
when 'post'
|
145
|
+
request = Net::HTTP::Post.new(uri, headers)
|
146
|
+
when 'delete'
|
147
|
+
request = Net::HTTP::Delete.new(uri, headers)
|
148
|
+
else
|
149
|
+
raise ArgumentError, "Invalid HTTP method: #{method}"
|
150
|
+
end
|
151
|
+
|
152
|
+
request.body = body.to_json if body
|
153
|
+
|
154
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
155
|
+
http.request(request)
|
156
|
+
end
|
157
|
+
|
158
|
+
case response
|
159
|
+
when Net::HTTPSuccess
|
160
|
+
JSON.parse(response.body)
|
161
|
+
else
|
162
|
+
raise Error, "HTTP Error: #{response.code} #{response.message} for request #{endpoint} and body #{body}"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def username
|
167
|
+
@username ||= Etc.getlogin
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
require 'net/http'
|
4
|
+
require 'json'
|
5
|
+
require 'etc'
|
6
|
+
|
7
|
+
module OodCore
|
8
|
+
module Job
|
9
|
+
class Factory
|
10
|
+
using Refinements::HashExtensions
|
11
|
+
|
12
|
+
def self.build_coder(config)
|
13
|
+
batch = Adapters::Coder::Batch.new(config.to_h.symbolize_keys)
|
14
|
+
Adapters::Coder.new(batch)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module Adapters
|
19
|
+
attr_reader :host, :token
|
20
|
+
|
21
|
+
# The adapter class for Kubernetes.
|
22
|
+
class Coder < Adapter
|
23
|
+
|
24
|
+
using Refinements::ArrayExtensions
|
25
|
+
using Refinements::HashExtensions
|
26
|
+
|
27
|
+
require "ood_core/job/adapters/coder/batch"
|
28
|
+
|
29
|
+
attr_reader :batch
|
30
|
+
def initialize(batch)
|
31
|
+
@batch = batch
|
32
|
+
end
|
33
|
+
|
34
|
+
# Submit a job with the attributes defined in the job template instance
|
35
|
+
# @example Submit job template to cluster
|
36
|
+
# solver_id = job_adapter.submit(solver_script)
|
37
|
+
# #=> "1234.server"
|
38
|
+
# @example Submit job that depends on previous job
|
39
|
+
# post_id = job_adapter.submit(
|
40
|
+
# post_script,
|
41
|
+
# afterok: solver_id
|
42
|
+
# )
|
43
|
+
# #=> "1235.server"
|
44
|
+
# @param script [Script] script object that describes the
|
45
|
+
# script and attributes for the submitted job
|
46
|
+
# @param after [#to_s, Array<#to_s>] this job may be scheduled for execution
|
47
|
+
# at any point after dependent jobs have started execution
|
48
|
+
# @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
|
49
|
+
# execution only after dependent jobs have terminated with no errors
|
50
|
+
# @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
|
51
|
+
# execution only after dependent jobs have terminated with errors
|
52
|
+
# @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
|
53
|
+
# execution after dependent jobs have terminated
|
54
|
+
# @return [String] the job id returned after successfully submitting a job
|
55
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
56
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
57
|
+
batch.submit(script)
|
58
|
+
rescue Batch::Error => e
|
59
|
+
raise JobAdapterError, e.message
|
60
|
+
end
|
61
|
+
|
62
|
+
# Retrieve info for all jobs from the resource manager
|
63
|
+
# @abstract Subclass is expected to implement {#info_all}
|
64
|
+
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
65
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
66
|
+
# This array specifies only attrs you want, in addition to id and status.
|
67
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
68
|
+
# to have a value for any attr besides the ones specified and id and status.
|
69
|
+
#
|
70
|
+
# For certain adapters this may speed up the response since
|
71
|
+
# adapters can get by without populating the entire Info object
|
72
|
+
# @return [Array<Info>] information describing submitted jobs
|
73
|
+
def info_all(attrs: nil)
|
74
|
+
# TODO - implement info all for namespaces?
|
75
|
+
batch.method_missing(attrs: attrs)
|
76
|
+
rescue Batch::Error => e
|
77
|
+
raise JobAdapterError, e.message
|
78
|
+
end
|
79
|
+
|
80
|
+
# Whether the adapter supports job arrays
|
81
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
82
|
+
# explicitly do not
|
83
|
+
def supports_job_arrays?
|
84
|
+
false
|
85
|
+
end
|
86
|
+
|
87
|
+
# Retrieve job info from the resource manager
|
88
|
+
# @abstract Subclass is expected to implement {#info}
|
89
|
+
# @raise [NotImplementedError] if subclass did not define {#info}
|
90
|
+
# @param id [#to_s] the id of the job
|
91
|
+
# @return [Info] information describing submitted job
|
92
|
+
def info(id)
|
93
|
+
batch.info(id.to_s)
|
94
|
+
rescue Batch::Error => e
|
95
|
+
raise JobAdapterError, e.message
|
96
|
+
end
|
97
|
+
|
98
|
+
# Retrieve job status from resource manager
|
99
|
+
# @note Optimized slightly over retrieving complete job information from server
|
100
|
+
# @abstract Subclass is expected to implement {#status}
|
101
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
102
|
+
# @param id [#to_s] the id of the job
|
103
|
+
# @return [Status] status of job
|
104
|
+
def status(id)
|
105
|
+
info(id)["job"]["status"]
|
106
|
+
end
|
107
|
+
|
108
|
+
# Delete the submitted job.
|
109
|
+
#
|
110
|
+
# @param id [#to_s] the id of the job
|
111
|
+
# @return [void]
|
112
|
+
def delete(id)
|
113
|
+
res = batch.delete(id)
|
114
|
+
rescue Batch::Error => e
|
115
|
+
raise JobAdapterError, e.message
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -323,6 +323,46 @@ module OodCore
|
|
323
323
|
}
|
324
324
|
end
|
325
325
|
|
326
|
+
# Job info fields requested from a formatted `sacct` call
|
327
|
+
def sacct_info_fields
|
328
|
+
{
|
329
|
+
# The user name of the user who ran the job.
|
330
|
+
user: 'User',
|
331
|
+
# The group name of the user who ran the job.
|
332
|
+
group_name: 'Group',
|
333
|
+
# Job Id for reference
|
334
|
+
job_id: 'JobId',
|
335
|
+
# The name of the job or job step
|
336
|
+
job_name: 'JobName',
|
337
|
+
# The job's elapsed time.
|
338
|
+
elapsed: 'Elapsed',
|
339
|
+
# Minimum required memory for the job
|
340
|
+
req_mem: 'ReqMem',
|
341
|
+
# Count of allocated CPUs
|
342
|
+
alloc_cpus: 'AllocCPUS',
|
343
|
+
# Number of requested CPUs.
|
344
|
+
req_cpus: 'ReqCPUS',
|
345
|
+
# What the timelimit was/is for the job
|
346
|
+
time_limit: 'Timelimit',
|
347
|
+
# Displays the job status, or state
|
348
|
+
state: 'State',
|
349
|
+
# The sum of the SystemCPU and UserCPU time used by the job or job step
|
350
|
+
total_cpu: 'TotalCPU',
|
351
|
+
# Maximum resident set size of all tasks in job.
|
352
|
+
max_rss: 'MaxRSS',
|
353
|
+
# Identifies the partition on which the job ran.
|
354
|
+
partition: 'Partition',
|
355
|
+
# The time the job was submitted. In the same format as End.
|
356
|
+
submit_time: 'Submit',
|
357
|
+
# Initiation time of the job. In the same format as End.
|
358
|
+
start_time: 'Start',
|
359
|
+
# Termination time of the job.
|
360
|
+
end: 'End',
|
361
|
+
# Trackable resources. These are the minimum resource counts requested by the job/step at submission time.
|
362
|
+
gres: 'ReqTRES'
|
363
|
+
}
|
364
|
+
end
|
365
|
+
|
326
366
|
def queues
|
327
367
|
info_raw = call('scontrol', 'show', 'part', '-o')
|
328
368
|
|
@@ -357,6 +397,31 @@ module OodCore
|
|
357
397
|
end.compact
|
358
398
|
end
|
359
399
|
|
400
|
+
def sacct_info(job_ids, states, from, to, show_steps)
|
401
|
+
# https://slurm.schedmd.com/sacct.html
|
402
|
+
fields = sacct_info_fields
|
403
|
+
args = ['-P'] # Output will be delimited
|
404
|
+
args.concat ['--delimiter', UNIT_SEPARATOR]
|
405
|
+
args.concat ['-n'] # No header
|
406
|
+
args.concat ['--units', 'G'] # Memory units in GB
|
407
|
+
args.concat ['--allocations'] unless show_steps # Show statistics relevant to the job, not taking steps into consideration
|
408
|
+
args.concat ['-o', fields.values.join(',')] # Required data
|
409
|
+
args.concat ['--state', states.join(',')] unless states.empty? # Filter by these states
|
410
|
+
args.concat ['-j', job_ids.join(',')] unless job_ids.empty? # Filter by these job ids
|
411
|
+
args.concat ['-S', from] if from # Filter from This date
|
412
|
+
args.concat ['-E', to] if to # Filter until this date
|
413
|
+
|
414
|
+
jobs_info = []
|
415
|
+
StringIO.open(call('sacct', *args)) do |output|
|
416
|
+
output.each_line do |line|
|
417
|
+
# Replace blank values with nil
|
418
|
+
values = line.strip.split(UNIT_SEPARATOR).map{ |value| value.to_s.empty? ? nil : value }
|
419
|
+
jobs_info << Hash[fields.keys.zip(values)] unless values.empty?
|
420
|
+
end
|
421
|
+
end
|
422
|
+
jobs_info
|
423
|
+
end
|
424
|
+
|
360
425
|
private
|
361
426
|
def str_to_queue_info(line)
|
362
427
|
hsh = line.split(' ').map do |token|
|
@@ -372,7 +437,11 @@ module OodCore
|
|
372
437
|
hsh[:AllowAccounts].to_s.split(',')
|
373
438
|
end
|
374
439
|
|
375
|
-
hsh[:deny_accounts] = hsh[:
|
440
|
+
hsh[:deny_accounts] = if !hsh[:allow_accounts].nil?
|
441
|
+
[] # manpage says AllowAccounts negates DenyAccounts
|
442
|
+
else
|
443
|
+
hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')
|
444
|
+
end
|
376
445
|
|
377
446
|
hsh[:tres] = case hsh[:TRES]
|
378
447
|
when nil, '(null)', ''
|
@@ -466,8 +535,23 @@ module OodCore
|
|
466
535
|
'SE' => :completed, # SPECIAL_EXIT
|
467
536
|
'ST' => :running, # STOPPED
|
468
537
|
'S' => :suspended, # SUSPENDED
|
469
|
-
'TO' => :completed,
|
470
|
-
'OOM' => :completed
|
538
|
+
'TO' => :completed, # TIMEOUT
|
539
|
+
'OOM' => :completed, # OUT_OF_MEMORY
|
540
|
+
|
541
|
+
'BOOT_FAIL' => :completed,
|
542
|
+
'CANCELED' => :completed,
|
543
|
+
'COMPLETED' => :completed,
|
544
|
+
'DEADLINE' => :completed,
|
545
|
+
'FAILED' => :completed,
|
546
|
+
'NODE_FAIL' => :completed,
|
547
|
+
'OUT_OF_MEMORY' => :completed,
|
548
|
+
'PENDING' => :queued,
|
549
|
+
'PREEMPTED' => :completed,
|
550
|
+
'RUNNING' => :running,
|
551
|
+
'REQUEUED' => :queued,
|
552
|
+
'REVOKED' => :completed,
|
553
|
+
'SUSPENDED' => :suspended,
|
554
|
+
'TIMEOUT' => :completed,
|
471
555
|
}
|
472
556
|
|
473
557
|
# @api private
|
@@ -586,6 +670,45 @@ module OodCore
|
|
586
670
|
raise JobAdapterError, e.message
|
587
671
|
end
|
588
672
|
|
673
|
+
# Retrieve historic info for all completed jobs from the resource manager.
|
674
|
+
#
|
675
|
+
# Known options:
|
676
|
+
# job_ids [Array<#to_s>] optional list of job ids to filter the results.
|
677
|
+
# states [Array<#to_s>] optional list of job state codes.
|
678
|
+
# Selects jobs based on their state during the time period given.
|
679
|
+
# from [#to_s] optional date string to filter jobs in any state after the specified time.
|
680
|
+
# If states are provided, filter jobs in these states after this period
|
681
|
+
# to [#to_s] optional date string to filter jobs in any state before the specified time.
|
682
|
+
# If states are provided, filter jobs in these states before this period.
|
683
|
+
# show_steps [#Boolean] optional boolean to filter job steps from the results.
|
684
|
+
#
|
685
|
+
# @return [Array<Info>] information describing submitted jobs
|
686
|
+
# @see Adapter#info_historic
|
687
|
+
def info_historic(opts: {})
|
688
|
+
job_ids = opts.fetch(:job_ids, [])
|
689
|
+
states = opts.fetch(:states, [])
|
690
|
+
from = opts.fetch(:from, nil)
|
691
|
+
to = opts.fetch(:to, nil)
|
692
|
+
show_steps = opts.fetch(:show_steps, false)
|
693
|
+
@slurm.sacct_info(job_ids, states, from, to, show_steps).map do |v|
|
694
|
+
Info.new(
|
695
|
+
id: v[:job_id],
|
696
|
+
status: get_state(v[:state]),
|
697
|
+
job_name: v[:job_name],
|
698
|
+
job_owner: v[:user],
|
699
|
+
procs: v[:alloc_cpus],
|
700
|
+
queue_name: v[:partition],
|
701
|
+
wallclock_time: duration_in_seconds(v[:elapsed]),
|
702
|
+
wallclock_limit: duration_in_seconds(v[:time_limit]),
|
703
|
+
cpu_time: duration_in_seconds(v[:total_cpu]),
|
704
|
+
submission_time: parse_time(v[:submit_time]),
|
705
|
+
dispatch_time: parse_time(v[:start_time]),
|
706
|
+
native: v,
|
707
|
+
gpus: self.class.gpus_from_gres(v[:gres])
|
708
|
+
)
|
709
|
+
end
|
710
|
+
end
|
711
|
+
|
589
712
|
# Retrieve job info from the resource manager
|
590
713
|
# @param id [#to_s] the id of the job
|
591
714
|
# @raise [JobAdapterError] if something goes wrong getting job info
|
@@ -718,6 +841,13 @@ module OodCore
|
|
718
841
|
"%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
|
719
842
|
end
|
720
843
|
|
844
|
+
# Parse date time string ignoring unknown values returned by Slurm
|
845
|
+
def parse_time(date_time)
|
846
|
+
return nil if date_time.empty? || %w[N/A NONE UNKNOWN].include?(date_time.to_s.upcase)
|
847
|
+
|
848
|
+
Time.parse(date_time)
|
849
|
+
end
|
850
|
+
|
721
851
|
# Convert host list string to individual nodes
|
722
852
|
# "em082"
|
723
853
|
# "em[014,055-056,161]"
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_runtime_dependency "ffi", "~> 1.16.3"
|
27
27
|
spec.add_runtime_dependency "rexml", "~> 3.2"
|
28
28
|
spec.add_development_dependency "bundler", "~> 2.1"
|
29
|
-
spec.add_development_dependency "rake", "~> 13.
|
29
|
+
spec.add_development_dependency "rake", "~> 13.3.0"
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
32
32
|
spec.add_development_dependency "timecop", "~> 0.8"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.28.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2025-
|
13
|
+
date: 2025-07-16 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -74,14 +74,14 @@ dependencies:
|
|
74
74
|
requirements:
|
75
75
|
- - "~>"
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 13.
|
77
|
+
version: 13.3.0
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
81
81
|
requirements:
|
82
82
|
- - "~>"
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: 13.
|
84
|
+
version: 13.3.0
|
85
85
|
- !ruby/object:Gem::Dependency
|
86
86
|
name: rspec
|
87
87
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,6 +204,9 @@ files:
|
|
204
204
|
- lib/ood_core/job/account_info.rb
|
205
205
|
- lib/ood_core/job/adapter.rb
|
206
206
|
- lib/ood_core/job/adapters/ccq.rb
|
207
|
+
- lib/ood_core/job/adapters/coder.rb
|
208
|
+
- lib/ood_core/job/adapters/coder/batch.rb
|
209
|
+
- lib/ood_core/job/adapters/coder/coder_job_info.rb
|
207
210
|
- lib/ood_core/job/adapters/drmaa.rb
|
208
211
|
- lib/ood_core/job/adapters/fujitsu_tcs.rb
|
209
212
|
- lib/ood_core/job/adapters/helper.rb
|