ood_core 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +50 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +60 -0
- data/Rakefile +6 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/lib/ood_core.rb +34 -0
- data/lib/ood_core/acl/adapter.rb +17 -0
- data/lib/ood_core/acl/adapters/group.rb +59 -0
- data/lib/ood_core/acl/factory.rb +41 -0
- data/lib/ood_core/cluster.rb +143 -0
- data/lib/ood_core/clusters.rb +114 -0
- data/lib/ood_core/errors.rb +19 -0
- data/lib/ood_core/job/adapter.rb +89 -0
- data/lib/ood_core/job/adapters/lsf.rb +193 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +160 -0
- data/lib/ood_core/job/adapters/lsf/helper.rb +26 -0
- data/lib/ood_core/job/adapters/slurm.rb +470 -0
- data/lib/ood_core/job/adapters/torque.rb +274 -0
- data/lib/ood_core/job/factory.rb +41 -0
- data/lib/ood_core/job/info.rb +141 -0
- data/lib/ood_core/job/node_info.rb +47 -0
- data/lib/ood_core/job/node_request.rb +51 -0
- data/lib/ood_core/job/script.rb +235 -0
- data/lib/ood_core/job/status.rb +128 -0
- data/lib/ood_core/refinements/array_extensions.rb +22 -0
- data/lib/ood_core/refinements/hash_extensions.rb +25 -0
- data/lib/ood_core/version.rb +4 -0
- data/ood_core.gemspec +32 -0
- metadata +182 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
# An object that describes the resources used on a specific node
|
4
|
+
class NodeInfo
|
5
|
+
# The name of the host machine
|
6
|
+
# @return [String] node name
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
# The number of procs reserved on the given machine
|
10
|
+
# @return [Fixnum, nil] number of procs
|
11
|
+
attr_reader :procs
|
12
|
+
|
13
|
+
# @param name [#to_s] node name
|
14
|
+
# @param procs [#to_i, nil] number of procs
|
15
|
+
def initialize(name:, procs: nil, **_)
|
16
|
+
@name = name.to_s
|
17
|
+
@procs = procs && procs.to_i
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert object to hash
|
21
|
+
# @return [Hash] object as hash
|
22
|
+
def to_h
|
23
|
+
{ name: name, procs: procs }
|
24
|
+
end
|
25
|
+
|
26
|
+
# The comparison operator
|
27
|
+
# @param other [#to_h] object to compare against
|
28
|
+
# @return [Boolean] whether objects are equivalent
|
29
|
+
def ==(other)
|
30
|
+
to_h == other.to_h
|
31
|
+
end
|
32
|
+
|
33
|
+
# Whether objects are identical to each other
|
34
|
+
# @param other [#to_h] object to compare against
|
35
|
+
# @return [Boolean] whether objects are identical
|
36
|
+
def eql?(other)
|
37
|
+
self.class == other.class && self == other
|
38
|
+
end
|
39
|
+
|
40
|
+
# Generate a hash value for this object
|
41
|
+
# @return [Fixnum] hash value of object
|
42
|
+
def hash
|
43
|
+
[self.class, to_h].hash
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'ood_core/refinements/array_extensions'
|
2
|
+
|
3
|
+
module OodCore
|
4
|
+
module Job
|
5
|
+
# An object that describes a request for a node when submitting a job
|
6
|
+
class NodeRequest
|
7
|
+
using Refinements::ArrayExtensions
|
8
|
+
|
9
|
+
# Number of processors usable by job
|
10
|
+
# @return [Fixnum, nil] number of procs
|
11
|
+
attr_reader :procs
|
12
|
+
|
13
|
+
# List of properties required by job
|
14
|
+
# @return [Array<String>, nil] list of properties
|
15
|
+
attr_reader :properties
|
16
|
+
|
17
|
+
# @param procs [#to_i, nil] number of procs
|
18
|
+
# @param properties [#to_s, Array<#to_s>, nil] list of properties
|
19
|
+
def initialize(procs: nil, properties: nil, **_)
|
20
|
+
@procs = procs && procs.to_i
|
21
|
+
@properties = properties && Array.wrap(properties).map(&:to_s)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Convert object to hash
|
25
|
+
# @return [Hash] object as hash
|
26
|
+
def to_h
|
27
|
+
{ procs: procs, properties: properties }
|
28
|
+
end
|
29
|
+
|
30
|
+
# The comparison operator
|
31
|
+
# @param other [#to_h] object to compare against
|
32
|
+
# @return [Boolean] whether objects are equivalent
|
33
|
+
def ==(other)
|
34
|
+
to_h == other.to_h
|
35
|
+
end
|
36
|
+
|
37
|
+
# Whether objects are identical to each other
|
38
|
+
# @param other [#to_h] object to compare against
|
39
|
+
# @return [Boolean] whether objects are identical
|
40
|
+
def eql?(other)
|
41
|
+
self.class == other.class && self == other
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generate a hash value for this object
|
45
|
+
# @return [Fixnum] hash value of object
|
46
|
+
def hash
|
47
|
+
[self.class, to_h].hash
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'time'
|
3
|
+
require 'ood_core/refinements/array_extensions'
|
4
|
+
|
5
|
+
module OodCore
|
6
|
+
module Job
|
7
|
+
# An object that describes a batch job before it is submitted. This includes
|
8
|
+
# the resources this batch job will require of the resource manager.
|
9
|
+
class Script
|
10
|
+
using Refinements::ArrayExtensions
|
11
|
+
|
12
|
+
# Content of the script to be executed on the remote host
|
13
|
+
# @return [String] the script content
|
14
|
+
attr_reader :content
|
15
|
+
|
16
|
+
# Arguments supplied to script to be executed
|
17
|
+
# @return [Array<String>, nil] arguments supplied to script
|
18
|
+
attr_reader :args
|
19
|
+
|
20
|
+
# Whether job is held after submitted
|
21
|
+
# @return [Boolean, nil] whether job is held after submit
|
22
|
+
attr_reader :submit_as_hold
|
23
|
+
|
24
|
+
# Whether job can safely be restarted by the resource manager, for example on
|
25
|
+
# node failure or some other re-scheduling event
|
26
|
+
# @note This SHOULD NOT be used to let the application denote the
|
27
|
+
# checkpointability of a job
|
28
|
+
# @return [Boolean, nil] whether job can be restarted
|
29
|
+
attr_reader :rerunnable
|
30
|
+
|
31
|
+
# Environment variables to be set on remote host when running job
|
32
|
+
# @note These will override the remote host environment settings
|
33
|
+
# @return [Hash{String=>String}, nil] environment variables
|
34
|
+
attr_reader :job_environment
|
35
|
+
|
36
|
+
# Directory where the job is executed from
|
37
|
+
# @return [Pathname, nil] working directory
|
38
|
+
attr_reader :workdir
|
39
|
+
|
40
|
+
# List of email addresses that should be used when resource manager sends
|
41
|
+
# status notifications
|
42
|
+
# @return [Array<String>, nil] list of emails
|
43
|
+
attr_reader :email
|
44
|
+
|
45
|
+
# Whether given email addresses should be notified when job starts
|
46
|
+
# @return [Boolean, nil] whether email when job starts
|
47
|
+
attr_reader :email_on_started
|
48
|
+
|
49
|
+
# Whether given email addresses should be notified when job ends
|
50
|
+
# @return [Boolean, nil] whether email when job ends
|
51
|
+
attr_reader :email_on_terminated
|
52
|
+
|
53
|
+
# The name of the job
|
54
|
+
# @return [String, nil] name of job
|
55
|
+
attr_reader :job_name
|
56
|
+
|
57
|
+
# Path to file specifying the input stream of the job
|
58
|
+
# @return [Pathname, nil] file path specifying input stream
|
59
|
+
attr_reader :input_path
|
60
|
+
|
61
|
+
# Path to file specifying the output stream of the job
|
62
|
+
# @return [Pathname, nil] file path specifying output stream
|
63
|
+
attr_reader :output_path
|
64
|
+
|
65
|
+
# Path to file specifying the error stream of the job
|
66
|
+
# @return [Pathname, nil] file path specifying error stream
|
67
|
+
attr_reader :error_path
|
68
|
+
|
69
|
+
# Whether the error stream should be intermixed with the output stream
|
70
|
+
# @return [Boolean, nil] whether error stream intermixed with output stream
|
71
|
+
attr_reader :join_files
|
72
|
+
|
73
|
+
# Identifier of existing reservation to be associated with the job
|
74
|
+
# @return [String, nil] reservation id
|
75
|
+
attr_reader :reservation_id
|
76
|
+
|
77
|
+
# Name of the queue the job should be submitted to
|
78
|
+
# @return [String, nil] queue name
|
79
|
+
attr_reader :queue_name
|
80
|
+
|
81
|
+
# The scheduling priority for the job
|
82
|
+
# @return [Fixnum, nil] scheduling priority
|
83
|
+
attr_reader :priority
|
84
|
+
|
85
|
+
# The minmimum amount of physical memory in kilobyte that should be available
|
86
|
+
# for the job
|
87
|
+
# @return [Fixnum, nil] minimum physical memory
|
88
|
+
attr_reader :min_phys_memory
|
89
|
+
|
90
|
+
# The earliest time when the job may be eligible to run
|
91
|
+
# @return [Time, nil] eligible start time
|
92
|
+
attr_reader :start_time
|
93
|
+
|
94
|
+
# The maximum amount of real time during which the job can be running in
|
95
|
+
# seconds
|
96
|
+
# @return [Fixnum, nil] max real time
|
97
|
+
attr_reader :wall_time
|
98
|
+
|
99
|
+
# The attribute used for job accounting purposes
|
100
|
+
# @return [String, nil] accounting id
|
101
|
+
attr_reader :accounting_id
|
102
|
+
|
103
|
+
# Node or list of nodes detailing the specifications the job should run on
|
104
|
+
# @example Job to run on a list of defined nodes
|
105
|
+
# my_script.nodes
|
106
|
+
# #=> ["n0001", "n0002", "n0003"]
|
107
|
+
# @example Job to run on 2 nodes with 12 procs per node
|
108
|
+
# my_script.nodes
|
109
|
+
# #=> [
|
110
|
+
# # #<OodCore::Job::NodeRequest procs=12, properties={}>,
|
111
|
+
# # #<OodCore::Job::NodeRequest procs=12, properties={}>
|
112
|
+
# # ]
|
113
|
+
# @example Create job script that will run on 100 nodes with 20 procs per node
|
114
|
+
# OodCore::Job::Script.new(
|
115
|
+
# script: Pathname.new('/path/to/script'),
|
116
|
+
# nodes: [OodCore::Job::NodeRequest.new(procs: 20)] * 100
|
117
|
+
# )
|
118
|
+
# @return [Array<String, NodeRequest>, nil] list of nodes
|
119
|
+
attr_reader :nodes
|
120
|
+
|
121
|
+
# Object detailing any native specifications that are implementation specific
|
122
|
+
# @note Should not be used at all costs.
|
123
|
+
# @return [Object, nil] native specifications
|
124
|
+
attr_reader :native
|
125
|
+
|
126
|
+
# @param content [#to_s] the script content
|
127
|
+
# @param args [Array<#to_s>, nil] arguments supplied to script
|
128
|
+
# @param submit_as_hold [Boolean, nil] whether job is held after submit
|
129
|
+
# @param rerunnable [Boolean, nil] whether job can be restarted
|
130
|
+
# @param job_environment [Hash{#to_s => #to_s}, nil] environment variables
|
131
|
+
# @param workdir [#to_s, nil] working directory
|
132
|
+
# @param email [#to_s, Array<#to_s>, nil] list of emails
|
133
|
+
# @param email_on_started [Boolean, nil] whether email when job starts
|
134
|
+
# @param email_on_terminated [Boolean, nil] whether email when job ends
|
135
|
+
# @param job_name [#to_s, nil] name of job
|
136
|
+
# @param input_path [#to_s, nil] file path specifying input stream
|
137
|
+
# @param output_path [#to_s, nil] file path specifying output stream
|
138
|
+
# @param error_path [#to_s, nil] file path specifying error stream
|
139
|
+
# @param join_files [Boolean, nil] whether error stream intermixed with output stream
|
140
|
+
# @param reservation_id [#to_s, nil] reservation id
|
141
|
+
# @param queue_name [#to_s, nil] queue name
|
142
|
+
# @param priority [#to_i, nil] scheduling priority
|
143
|
+
# @param min_phys_memory [#to_i, nil] minimum physical memory
|
144
|
+
# @param start_time [#to_i, nil] eligible start time
|
145
|
+
# @param wall_time [#to_i, nil] max real time
|
146
|
+
# @param accounting_id [#to_s, nil] accounting id
|
147
|
+
# @param nodes [#to_h, #to_s, Array<#to_h, #to_s>, nil] list of nodes
|
148
|
+
# @param native [Object, nil] native specifications
|
149
|
+
def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
|
150
|
+
job_environment: nil, workdir: nil, email: nil,
|
151
|
+
email_on_started: nil, email_on_terminated: nil, job_name: nil,
|
152
|
+
input_path: nil, output_path: nil, error_path: nil,
|
153
|
+
join_files: nil, reservation_id: nil, queue_name: nil,
|
154
|
+
priority: nil, min_phys_memory: nil, start_time: nil,
|
155
|
+
wall_time: nil, accounting_id: nil, nodes: nil, native: nil,
|
156
|
+
**_)
|
157
|
+
@content = content.to_s
|
158
|
+
|
159
|
+
@submit_as_hold = submit_as_hold
|
160
|
+
@rerunnable = rerunnable
|
161
|
+
@email_on_started = email_on_started
|
162
|
+
@email_on_terminated = email_on_terminated
|
163
|
+
@join_files = join_files
|
164
|
+
|
165
|
+
@args = args && args.map(&:to_s)
|
166
|
+
@job_environment = job_environment && job_environment.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
167
|
+
@workdir = workdir && Pathname.new(workdir.to_s)
|
168
|
+
@email = email && Array.wrap(email).map(&:to_s)
|
169
|
+
@job_name = job_name && job_name.to_s
|
170
|
+
@input_path = input_path && Pathname.new(input_path.to_s)
|
171
|
+
@output_path = output_path && Pathname.new(output_path.to_s)
|
172
|
+
@error_path = error_path && Pathname.new(error_path.to_s)
|
173
|
+
@reservation_id = reservation_id && reservation_id.to_s
|
174
|
+
@queue_name = queue_name && queue_name.to_s
|
175
|
+
@priority = priority && priority.to_i
|
176
|
+
@min_phys_memory = min_phys_memory && min_phys_memory.to_i
|
177
|
+
@start_time = start_time && Time.at(start_time.to_i)
|
178
|
+
@wall_time = wall_time && wall_time.to_i
|
179
|
+
@accounting_id = accounting_id && accounting_id.to_s
|
180
|
+
@nodes = nodes && Array.wrap(nodes).map { |n| n.respond_to?(:to_h) ? NodeRequest.new(n.to_h) : n.to_s }
|
181
|
+
@native = native
|
182
|
+
end
|
183
|
+
|
184
|
+
# Convert object to hash
|
185
|
+
# @return [Hash] object as hash
|
186
|
+
def to_h
|
187
|
+
{
|
188
|
+
content: content,
|
189
|
+
args: args,
|
190
|
+
submit_as_hold: submit_as_hold,
|
191
|
+
rerunnable: rerunnable,
|
192
|
+
job_environment: job_environment,
|
193
|
+
workdir: workdir,
|
194
|
+
email: email,
|
195
|
+
email_on_started: email_on_started,
|
196
|
+
email_on_terminated: email_on_terminated,
|
197
|
+
job_name: job_name,
|
198
|
+
input_path: input_path,
|
199
|
+
output_path: output_path,
|
200
|
+
error_path: error_path,
|
201
|
+
join_files: join_files,
|
202
|
+
reservation_id: reservation_id,
|
203
|
+
queue_name: queue_name,
|
204
|
+
priority: priority,
|
205
|
+
min_phys_memory: min_phys_memory,
|
206
|
+
start_time: start_time,
|
207
|
+
wall_time: wall_time,
|
208
|
+
accounting_id: accounting_id,
|
209
|
+
nodes: nodes,
|
210
|
+
native: native
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
# The comparison operator
|
215
|
+
# @param other [#to_h] object to compare against
|
216
|
+
# @return [Boolean] whether objects are equivalent
|
217
|
+
def ==(other)
|
218
|
+
to_h == other.to_h
|
219
|
+
end
|
220
|
+
|
221
|
+
# Whether objects are identical to each other
|
222
|
+
# @param other [#to_h] object to compare against
|
223
|
+
# @return [Boolean] whether objects are identical
|
224
|
+
def eql?(other)
|
225
|
+
self.class == other.class && self == other
|
226
|
+
end
|
227
|
+
|
228
|
+
# Generate a hash value for this object
|
229
|
+
# @return [Fixnum] hash value of object
|
230
|
+
def hash
|
231
|
+
[self.class, to_h].hash
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
module OodCore
|
2
|
+
module Job
|
3
|
+
# An object that describes the current state of a submitted job
|
4
|
+
class Status
|
5
|
+
class << self
|
6
|
+
# Possible states a submitted job can be in:
|
7
|
+
# # Job status cannot be determined
|
8
|
+
# :undetermined
|
9
|
+
#
|
10
|
+
# # Job is queued for being scheduled and executed
|
11
|
+
# :queued
|
12
|
+
#
|
13
|
+
# # Job has been placed on hold by the system, the administrator, or
|
14
|
+
# # submitting user
|
15
|
+
# :queued_held
|
16
|
+
#
|
17
|
+
# # Job is running on an execution host
|
18
|
+
# :running
|
19
|
+
#
|
20
|
+
# # Job has been suspended by the user, the system, or the administrator
|
21
|
+
# :suspended
|
22
|
+
#
|
23
|
+
# # Job is completed and not running on an execution host
|
24
|
+
# :completed
|
25
|
+
def states
|
26
|
+
%i(
|
27
|
+
undetermined
|
28
|
+
queued
|
29
|
+
queued_held
|
30
|
+
running
|
31
|
+
suspended
|
32
|
+
completed
|
33
|
+
)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Current status of submitted job
|
38
|
+
# @return [Symbol] status of job
|
39
|
+
attr_reader :state
|
40
|
+
|
41
|
+
# @param state [#to_sym] status of job
|
42
|
+
# @raise [UnknownStateAttribute] if supplied state does not exist
|
43
|
+
def initialize(state:, **_)
|
44
|
+
@state = state.to_sym
|
45
|
+
raise UnknownStateAttribute, "arguments specify unknown '#{@state}' state" unless self.class.states.include?(@state)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Convert object to symbol
|
49
|
+
# @return [Symbol] object as symbol
|
50
|
+
def to_sym
|
51
|
+
state
|
52
|
+
end
|
53
|
+
|
54
|
+
# Convert object to string
|
55
|
+
# @return [String] object as string
|
56
|
+
def to_s
|
57
|
+
state.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
# The comparison operator
|
61
|
+
# @param other [#to_sym] object to compare against
|
62
|
+
# @return [Boolean] whether objects are equivalent
|
63
|
+
def ==(other)
|
64
|
+
to_sym == other.to_sym
|
65
|
+
end
|
66
|
+
|
67
|
+
# Whether objects are identical to each other
|
68
|
+
# @param other [#to_sym] object to compare against
|
69
|
+
# @return [Boolean] whether objects are identical
|
70
|
+
def eql?(other)
|
71
|
+
self.class == other.class && self == other
|
72
|
+
end
|
73
|
+
|
74
|
+
# Generate a hash value for this object
|
75
|
+
# @return [Fixnum] hash value of object
|
76
|
+
def hash
|
77
|
+
[self.class, to_sym].hash
|
78
|
+
end
|
79
|
+
|
80
|
+
# @!method undetermined?
|
81
|
+
# Whether the status is undetermined
|
82
|
+
# @return [Boolean] whether undetermined
|
83
|
+
#
|
84
|
+
# @!method queued?
|
85
|
+
# Whether the status is queued
|
86
|
+
# @return [Boolean] whether queued
|
87
|
+
#
|
88
|
+
# @!method queued_held?
|
89
|
+
# Whether the status is queued_held
|
90
|
+
# @return [Boolean] whether queued_held
|
91
|
+
#
|
92
|
+
# @!method running?
|
93
|
+
# Whether the status is running
|
94
|
+
# @return [Boolean] whether running
|
95
|
+
#
|
96
|
+
# @!method suspended?
|
97
|
+
# Whether the status is suspended
|
98
|
+
# @return [Boolean] whether suspended
|
99
|
+
#
|
100
|
+
# @!method completed?
|
101
|
+
# Whether the status is completed
|
102
|
+
# @return [Boolean] whether completed
|
103
|
+
#
|
104
|
+
# Determine whether this method corresponds to a status check for a valid
|
105
|
+
# state. If so, then check whether this object is in that valid state.
|
106
|
+
# @param method_name the method name called
|
107
|
+
# @param arguments the arguments to the call
|
108
|
+
# @param block an optional block for the call
|
109
|
+
# @raise [NoMethodError] if method name doesn't pass checks
|
110
|
+
# @return [Boolean] whether it is in this state
|
111
|
+
def method_missing(method_name, *arguments, &block)
|
112
|
+
if /^(?<other_state>.+)\?$/ =~ method_name && self.class.states.include?(other_state.to_sym)
|
113
|
+
self == other_state
|
114
|
+
else
|
115
|
+
super
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Determines whether this method corresponds to a status check for a valid
|
120
|
+
# state
|
121
|
+
# @param method_name the method name called
|
122
|
+
# @return [Boolean]
|
123
|
+
def respond_to_missing?(method_name, include_private = false)
|
124
|
+
/^(?<other_state>.+)\?$/ =~ method_name && self.class.states.include?(other_state.to_sym) || super
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|