ood_core 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/ood_core/batch_connect/template.rb +17 -6
- data/lib/ood_core/batch_connect/templates/vnc.rb +2 -2
- data/lib/ood_core/job/adapters/drmaa.rb +1002 -0
- data/lib/ood_core/job/adapters/helper.rb +18 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +4 -3
- data/lib/ood_core/job/adapters/lsf.rb +4 -2
- data/lib/ood_core/job/adapters/pbspro.rb +19 -8
- data/lib/ood_core/job/adapters/sge/batch.rb +203 -0
- data/lib/ood_core/job/adapters/sge/helper.rb +65 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +116 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +138 -0
- data/lib/ood_core/job/adapters/sge.rb +163 -0
- data/lib/ood_core/job/adapters/slurm.rb +16 -5
- data/lib/ood_core/job/adapters/torque/attributes.rb +109 -0
- data/lib/ood_core/job/adapters/torque/batch.rb +470 -0
- data/lib/ood_core/job/adapters/torque/error.rb +403 -0
- data/lib/ood_core/job/adapters/torque/ffi.rb +430 -0
- data/lib/ood_core/job/adapters/torque.rb +23 -18
- data/lib/ood_core/job/status.rb +3 -13
- data/lib/ood_core/refinements/drmaa_extensions.rb +21 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +3 -3
- metadata +23 -9
@@ -0,0 +1,430 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
|
3
|
+
class OodCore::Job::Adapters::Torque::FFI
|
4
|
+
# An interface to the C-library of Torque
|
5
|
+
extend ::FFI::Library
|
6
|
+
|
7
|
+
# @!attribute [rw] self.pbs_errno
|
8
|
+
# The internal PBS error number
|
9
|
+
# int pbs_errno
|
10
|
+
# @return [Fixnum] pbs error number
|
11
|
+
|
12
|
+
# @!attribute [r] self.pbs_server
|
13
|
+
# The PBS server name
|
14
|
+
# char *pbs_server
|
15
|
+
# @return [String] pbs server name
|
16
|
+
|
17
|
+
# @!method self.pbs_strerror(errno)
|
18
|
+
# Generates PBS error string from given error number
|
19
|
+
# char *pbs_strerror(int errno)
|
20
|
+
# @param errno [Fixnum] pbs error number
|
21
|
+
# @return [String] pbs error string
|
22
|
+
|
23
|
+
# @!method self.pbs_default
|
24
|
+
# Default PBS server name
|
25
|
+
# char *pbs_default(void)
|
26
|
+
# @see http://linux.die.net/man/3/pbs_default
|
27
|
+
# @return [String] default pbs server name
|
28
|
+
|
29
|
+
# @!method self.pbs_connect(server)
|
30
|
+
# Connect to PBS batch server
|
31
|
+
# int pbs_connect(char *server)
|
32
|
+
# @see http://linux.die.net/man/3/pbs_connect
|
33
|
+
# @param server [String] name of pbs server
|
34
|
+
# @return [Fixnum] connection identifier
|
35
|
+
|
36
|
+
# @!method self.pbs_disconnect(connect)
|
37
|
+
# Disconnect from a PBS batch server
|
38
|
+
# int pbs_disconnect(int connect)
|
39
|
+
# @see http://linux.die.net/man/3/pbs_disconnect
|
40
|
+
# @param connect [Fixnum] connection identifier
|
41
|
+
# @return [Fixnum] exit status code
|
42
|
+
|
43
|
+
# @!method self.pbs_deljob(connect, job_id, extend)
|
44
|
+
# Delete a PBS batch job
|
45
|
+
# int pbs_deljob(int connect, char *job_id, char *extend)
|
46
|
+
# @see http://linux.die.net/man/3/pbs_deljob
|
47
|
+
# @param connect [Fixnum] connection identifier
|
48
|
+
# @param job_id [String] the job id
|
49
|
+
# @param extend [String] implementation defined extensions
|
50
|
+
# @return [Fixnum] exit status code
|
51
|
+
|
52
|
+
# @!method self.pbs_holdjob(connect, job_id, hold_type, extend)
|
53
|
+
# Place a hold on a PBS batch job
|
54
|
+
# int pbs_holdjob(int connect, char *job_id, char *hold_type, char *extend)
|
55
|
+
# @see http://linux.die.net/man/3/pbs_holdjob
|
56
|
+
# @param connect [Fixnum] connection identifier
|
57
|
+
# @param job_id [String] the job id
|
58
|
+
# @param hold_type [String] type of hold to be applied
|
59
|
+
# @param extend [String] implementation defined extensions
|
60
|
+
# @return [Fixnum] exit status code
|
61
|
+
|
62
|
+
# @!method self.pbs_rlsjob(connect, job_id, hold_type, extend)
|
63
|
+
# Release a hold on a PBS batch job
|
64
|
+
# int pbs_rlsjob(int connect, char *job_id, char *hold_type, char *extend)
|
65
|
+
# @see http://linux.die.net/man/3/pbs_rlsjob
|
66
|
+
# @param connect [Fixnum] connection identifier
|
67
|
+
# @param job_id [String] the job id
|
68
|
+
# @param hold_type [String] type of hold to be released
|
69
|
+
# @param extend [String] implementation defined extensions
|
70
|
+
# @return [Fixnum] exit status code
|
71
|
+
|
72
|
+
# @!method self.pbs_statfree(stat)
|
73
|
+
# Free the memory allocated by {BatchStatus} object
|
74
|
+
# void pbs_statfree(struct batch_status *stat)
|
75
|
+
# @param stat [BatchStatus] the batch status object
|
76
|
+
# @return [void]
|
77
|
+
|
78
|
+
# @!method self.pbs_statjob(connect, id, attrib, extend)
|
79
|
+
# Obtain status of PBS batch jobs
|
80
|
+
# batch_status * pbs_statjob(int connect, char *id, struct attrl *attrib, char *extend)
|
81
|
+
# @see http://linux.die.net/man/3/pbs_statjob
|
82
|
+
# @param connect [Fixnum] connection identifier
|
83
|
+
# @param id [String] job or destination identifier
|
84
|
+
# @param attrib [Attrl] the attribute c-linked list object
|
85
|
+
# @param extend [String] implementation defined extensions
|
86
|
+
# @return [BatchStatus] c-linked list of batch status objects
|
87
|
+
# @note It is up to the user to free the space of the batch status objects
|
88
|
+
|
89
|
+
# @!method self.pbs_statnode(connect, id, attrib, extend)
|
90
|
+
# Obtain status of PBS nodes
|
91
|
+
# batch_status * pbs_statnode(int connect, char *id, struct attrl *attrib, char *extend)
|
92
|
+
# @see http://linux.die.net/man/3/pbs_statnode
|
93
|
+
# @param connect [Fixnum] connection identifier
|
94
|
+
# @param id [String] name of a node or null string
|
95
|
+
# @param attrib [Attrl] the attribute c-linked list object
|
96
|
+
# @param extend [String] implementation defined extensions
|
97
|
+
# @return [BatchStatus] c-linked list of batch status objects
|
98
|
+
# @note It is up to the user to free the space of the batch status objects
|
99
|
+
|
100
|
+
# @!method self.pbs_statque(connect, id, attrib, extend)
|
101
|
+
# Obtain status of PBS batch queues
|
102
|
+
# batch_status * pbs_statque(int connect, char *id, struct attrl *attrib, char *extend)
|
103
|
+
# @see http://linux.die.net/man/3/pbs_statque
|
104
|
+
# @param connect [Fixnum] connection identifier
|
105
|
+
# @param id [String] name of a queue or null string
|
106
|
+
# @param attrib [Attrl] the attribute c-linked list object
|
107
|
+
# @param extend [String] implementation defined extensions
|
108
|
+
# @return [BatchStatus] c-linked list of batch status objects
|
109
|
+
# @note It is up to the user to free the space of the batch status objects
|
110
|
+
|
111
|
+
# @!method self.pbs_statserver(connect, attrib, extend)
|
112
|
+
# Obtain status of a PBS batch server
|
113
|
+
# batch_status * pbs_statserver(int connect, struct attrl *attrib, char *extend)
|
114
|
+
# @see http://linux.die.net/man/3/pbs_statserver
|
115
|
+
# @param connect [Fixnum] connection identifier
|
116
|
+
# @param attrib [Attrl] the attribute c-linked list object
|
117
|
+
# @param extend [String] implementation defined extensions
|
118
|
+
# @return [BatchStatus] c-linked list of batch status objects
|
119
|
+
# @note It is up to the user to free the space of the batch status objects
|
120
|
+
|
121
|
+
# @!method self.pbs_selstat(connect, attrib, extend)
|
122
|
+
# Obtain status of selected PBS batch jobs
|
123
|
+
# batch_status * pbs_selstat(int connect, struct attropl *sel_list, char *extend)
|
124
|
+
# @see http://linux.die.net/man/3/pbs_selstat
|
125
|
+
# @param connect [Fixnum] connection identifier
|
126
|
+
# @param attrib [Attropl] the attribute operation c-linked list object
|
127
|
+
# @param extend [String] implementation defined extensions
|
128
|
+
# @return [BatchStatus] c-linked list of batch status objects
|
129
|
+
# @note It is up to the user to free the space of the batch status objects
|
130
|
+
|
131
|
+
# @!method self.pbs_submit(connect, attrib, script, destination, extend)
|
132
|
+
# Submit a PBS batch job
|
133
|
+
# char *pbs_submit(int connect, struct attropl *attrib, char *script, char *destination, char *extend)
|
134
|
+
# @see http://linux.die.net/man/3/pbs_submit
|
135
|
+
# @param connect [Fixnum] connection identifier
|
136
|
+
# @param attrib [Attropl] the attribute operation c-linked list object
|
137
|
+
# @param script [String] the path to the script
|
138
|
+
# @param destination [String] the queue to send job to
|
139
|
+
# @param extend [String] implementation defined extensions
|
140
|
+
# @return [String] the job id
|
141
|
+
|
142
|
+
# The path to the torque library file
|
143
|
+
# @return [String] path to torque library
|
144
|
+
def self.lib
|
145
|
+
@lib
|
146
|
+
end
|
147
|
+
|
148
|
+
# Define torque methods using a supplied library
|
149
|
+
# @param lib [#to_s, nil] path to library file
|
150
|
+
# @return [void]
|
151
|
+
def self.lib=(lib)
|
152
|
+
@lib = lib ? lib.to_s : 'torque'
|
153
|
+
|
154
|
+
# Set up FFI to use this library
|
155
|
+
ffi_lib @lib
|
156
|
+
|
157
|
+
attach_variable :pbs_errno, :int
|
158
|
+
attach_variable :pbs_server, :string
|
159
|
+
attach_function :pbs_strerror, [ :int ], :string
|
160
|
+
attach_function :pbs_default, [], :string
|
161
|
+
attach_function :pbs_connect, [ :string ], :int
|
162
|
+
attach_function :pbs_disconnect, [ :int ], :int
|
163
|
+
attach_function :pbs_deljob, [ :int, :string, :string ], :int
|
164
|
+
attach_function :pbs_holdjob, [ :int, :string, :string, :string ], :int
|
165
|
+
attach_function :pbs_rlsjob, [ :int, :string, :string, :string ], :int
|
166
|
+
attach_function :pbs_statfree, [ BatchStatus.ptr ], :void
|
167
|
+
attach_function :pbs_statjob, [ :int, :string, Attrl.ptr, :string ], BatchStatus.ptr
|
168
|
+
attach_function :pbs_statnode, [ :int, :string, Attrl.ptr, :string ], BatchStatus.ptr
|
169
|
+
attach_function :pbs_statque, [ :int, :string, Attrl.ptr, :string ], BatchStatus.ptr
|
170
|
+
attach_function :pbs_statserver, [ :int, Attrl.ptr, :string ], BatchStatus.ptr
|
171
|
+
attach_function :pbs_selstat, [ :int, Attropl.ptr, :string ], BatchStatus.ptr
|
172
|
+
|
173
|
+
# FIXME: The space for the job_identifier string is allocated by
|
174
|
+
# pbs_submit() and should be released via a call to free() when no longer
|
175
|
+
# needed
|
176
|
+
attach_function :pbs_submit, [ :int, Attropl.ptr, :string, :string, :string ], :string
|
177
|
+
end
|
178
|
+
|
179
|
+
# Check for any errors set in the errno
|
180
|
+
# @return [void]
|
181
|
+
def self.check_for_error
|
182
|
+
errno = pbs_errno
|
183
|
+
self.pbs_errno = 0 # reset error number
|
184
|
+
raise_error(errno) if errno > 0
|
185
|
+
end
|
186
|
+
|
187
|
+
# For a given errno, raise the corresponding error with error message
|
188
|
+
# @param errno [Fixnum] the error number
|
189
|
+
# @raise [Error] if errno is not 0
|
190
|
+
# @return [void]
|
191
|
+
def self.raise_error(errno)
|
192
|
+
raise (ERROR_CODES[errno] || PBS::Error), "#{pbs_strerror(errno)}"
|
193
|
+
end
|
194
|
+
|
195
|
+
#
|
196
|
+
# Data structures defined in pbs_ifl.h
|
197
|
+
#
|
198
|
+
|
199
|
+
# Enum for Batch Operation
|
200
|
+
BatchOp = enum(:set, :unset, :incr, :decr, :eq, :ne, :ge, :gt, :le, :lt, :dflt, :merge, :incr_old)
|
201
|
+
|
202
|
+
# Struct for Attribute C-linked list
|
203
|
+
class Attrl < ::FFI::Struct
|
204
|
+
layout :next, Attrl.ptr, # pointer to next Attrl object
|
205
|
+
:name, :pointer, # string for name of attribute
|
206
|
+
:resource, :pointer, # string for resource if this attribute is a resource
|
207
|
+
:value, :pointer, # string for value of attribute
|
208
|
+
:op, BatchOp # not used in an Attrl object
|
209
|
+
|
210
|
+
# Given an array of attribute names convert it to {Attrl} C-linked list
|
211
|
+
# @param list [Array<Symbol>] list of attribute names
|
212
|
+
# @return [Attrl] generated attribute c-linked list object
|
213
|
+
def self.from_list(list)
|
214
|
+
attrl = nil
|
215
|
+
prev = Attrl.new(::FFI::Pointer::NULL)
|
216
|
+
list.each do |key|
|
217
|
+
attrl = Attrl.new
|
218
|
+
attrl[:name] = ::FFI::MemoryPointer.from_string(key.to_s)
|
219
|
+
attrl[:next] = prev
|
220
|
+
prev = attrl
|
221
|
+
end
|
222
|
+
attrl
|
223
|
+
end
|
224
|
+
|
225
|
+
# Convert to hash describing this linked list
|
226
|
+
# @return [Hash] hash describing linked list
|
227
|
+
def to_h
|
228
|
+
attrl = self
|
229
|
+
hash = {}
|
230
|
+
until attrl.to_ptr.null?
|
231
|
+
n = attrl[:name].read_string
|
232
|
+
v = attrl[:value].read_string
|
233
|
+
r = attrl[:resource].null? ? nil : attrl[:resource].read_string
|
234
|
+
r ? (hash[n.to_sym] ||= {} and hash[n.to_sym][r.to_sym] = v) : hash[n.to_sym] = v
|
235
|
+
attrl = attrl[:next]
|
236
|
+
end
|
237
|
+
hash
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# Struct for Attribute Operation C-linked list
|
242
|
+
class Attropl < ::FFI::Struct
|
243
|
+
layout :next, Attropl.ptr, # pointer to next Attropl object
|
244
|
+
:name, :pointer, # string for name of attribute
|
245
|
+
:resource, :pointer, # string for resource if this attribute is a resource
|
246
|
+
:value, :pointer, # string for value of attribute
|
247
|
+
:op, BatchOp # operation to perform for this attribute
|
248
|
+
|
249
|
+
# Convert to C-linked list of structs from list of hashes
|
250
|
+
# @param list [Array<#to_h>] list of hashes describing attribute
|
251
|
+
# @return [Attropl] generated attribute operation c-linked list object
|
252
|
+
def self.from_list(list)
|
253
|
+
list = list.map(&:to_h)
|
254
|
+
attropl = nil
|
255
|
+
prev = Attropl.new(::FFI::Pointer::NULL)
|
256
|
+
list.each do |attrib|
|
257
|
+
attropl = Attropl.new
|
258
|
+
attropl[:name] = ::FFI::MemoryPointer.from_string attrib[:name].to_s
|
259
|
+
attropl[:value] = ::FFI::MemoryPointer.from_string attrib[:value].to_s
|
260
|
+
attropl[:resource] = ::FFI::MemoryPointer.from_string attrib[:resource].to_s
|
261
|
+
attropl[:op] = (attrib[:op] || :eq).to_sym
|
262
|
+
attropl[:next] = prev
|
263
|
+
prev = attropl
|
264
|
+
end
|
265
|
+
attropl
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# Struct for PBS batch server status responses
|
270
|
+
class BatchStatus < ::FFI::ManagedStruct
|
271
|
+
layout :next, BatchStatus.ptr, # pointer to next BatchStatus object
|
272
|
+
:name, :string, # string for name of this status
|
273
|
+
:attribs, Attrl.ptr, # pointer to beginning of C-linked list of an Attrl object
|
274
|
+
:text, :string # string containing unknown text
|
275
|
+
|
276
|
+
# Free memory for allocated {BatchStatus} C-linked list
|
277
|
+
def self.release(ptr)
|
278
|
+
pbs_statfree(ptr)
|
279
|
+
end
|
280
|
+
|
281
|
+
# Convert to hash describing this linked list
|
282
|
+
# @return [Hash] hash describing linked list
|
283
|
+
def to_h
|
284
|
+
batch = self
|
285
|
+
hash = {}
|
286
|
+
until batch.to_ptr.null?
|
287
|
+
hash[batch[:name]] = batch[:attribs].to_h
|
288
|
+
batch = batch[:next]
|
289
|
+
end
|
290
|
+
hash
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Defined error codes, valid as of Torque >=4.2.10
|
295
|
+
ERROR_CODES = {
|
296
|
+
15001 => UnkjobidError,
|
297
|
+
15002 => NoattrError,
|
298
|
+
15003 => AttrroError,
|
299
|
+
15004 => IvalreqError,
|
300
|
+
15005 => UnkreqError,
|
301
|
+
15006 => ToomanyError,
|
302
|
+
15007 => PermError,
|
303
|
+
15008 => IffNotFoundError,
|
304
|
+
15009 => MungeNotFoundError,
|
305
|
+
15010 => BadhostError,
|
306
|
+
15011 => JobexistError,
|
307
|
+
15012 => SystemError,
|
308
|
+
15013 => InternalError,
|
309
|
+
15014 => RegrouteError,
|
310
|
+
15015 => UnksigError,
|
311
|
+
15016 => BadatvalError,
|
312
|
+
15017 => ModatrrunError,
|
313
|
+
15018 => BadstateError,
|
314
|
+
15020 => UnkqueError,
|
315
|
+
15021 => BadcredError,
|
316
|
+
15022 => ExpiredError,
|
317
|
+
15023 => QunoenbError,
|
318
|
+
15024 => QacessError,
|
319
|
+
15025 => BaduserError,
|
320
|
+
15026 => HopcountError,
|
321
|
+
15027 => QueexistError,
|
322
|
+
15028 => AttrtypeError,
|
323
|
+
15029 => QuebusyError,
|
324
|
+
15030 => QuenbigError,
|
325
|
+
15031 => NosupError,
|
326
|
+
15032 => QuenoenError,
|
327
|
+
15033 => ProtocolError,
|
328
|
+
15034 => BadatlstError,
|
329
|
+
15035 => NoconnectsError,
|
330
|
+
15036 => NoserverError,
|
331
|
+
15037 => UnkrescError,
|
332
|
+
15038 => ExcqrescError,
|
333
|
+
15039 => QuenodfltError,
|
334
|
+
15040 => NorerunError,
|
335
|
+
15041 => RouterejError,
|
336
|
+
15042 => RouteexpdError,
|
337
|
+
15043 => MomrejectError,
|
338
|
+
15044 => BadscriptError,
|
339
|
+
15045 => StageinError,
|
340
|
+
15046 => RescunavError,
|
341
|
+
15047 => BadgrpError,
|
342
|
+
15048 => MaxquedError,
|
343
|
+
15049 => CkpbsyError,
|
344
|
+
15050 => ExlimitError,
|
345
|
+
15051 => BadacctError,
|
346
|
+
15052 => AlrdyexitError,
|
347
|
+
15053 => NocopyfileError,
|
348
|
+
15054 => CleanedoutError,
|
349
|
+
15055 => NosyncmstrError,
|
350
|
+
15056 => BaddependError,
|
351
|
+
15057 => DuplistError,
|
352
|
+
15058 => DisprotoError,
|
353
|
+
15059 => ExecthereError,
|
354
|
+
15060 => SisrejectError,
|
355
|
+
15061 => SiscommError,
|
356
|
+
15062 => SvrdownError,
|
357
|
+
15063 => CkpshortError,
|
358
|
+
15064 => UnknodeError,
|
359
|
+
15065 => UnknodeatrError,
|
360
|
+
15066 => NonodesError,
|
361
|
+
15067 => NodenbigError,
|
362
|
+
15068 => NodeexistError,
|
363
|
+
15069 => BadndatvalError,
|
364
|
+
15070 => MutualexError,
|
365
|
+
15071 => GmoderrError,
|
366
|
+
15072 => NorelymomError,
|
367
|
+
15073 => NotsnodeError,
|
368
|
+
15074 => JobtypeError,
|
369
|
+
15075 => BadaclhostError,
|
370
|
+
15076 => MaxuserquedError,
|
371
|
+
15077 => BaddisallowtypeError,
|
372
|
+
15078 => NointeractiveError,
|
373
|
+
15079 => NobatchError,
|
374
|
+
15080 => NorerunableError,
|
375
|
+
15081 => NononrerunableError,
|
376
|
+
15082 => UnkarrayidError,
|
377
|
+
15083 => BadArrayReqError,
|
378
|
+
15084 => BadArrayDataError,
|
379
|
+
15085 => TimeoutError,
|
380
|
+
15086 => JobnotfoundError,
|
381
|
+
15087 => NofaulttolerantError,
|
382
|
+
15088 => NofaultintolerantError,
|
383
|
+
15089 => NojobarraysError,
|
384
|
+
15090 => RelayedToMomError,
|
385
|
+
15091 => MemMallocError,
|
386
|
+
15092 => MutexError,
|
387
|
+
15093 => ThreadattrError,
|
388
|
+
15094 => ThreadError,
|
389
|
+
15095 => SelectError,
|
390
|
+
15096 => SocketFaultError,
|
391
|
+
15097 => SocketWriteError,
|
392
|
+
15098 => SocketReadError,
|
393
|
+
15099 => SocketCloseError,
|
394
|
+
15100 => SocketListenError,
|
395
|
+
15101 => AuthInvalidError,
|
396
|
+
15102 => NotImplementedError,
|
397
|
+
15103 => QuenotavailableError,
|
398
|
+
15104 => TmpdiffownerError,
|
399
|
+
15105 => TmpnotdirError,
|
400
|
+
15106 => TmpnonameError,
|
401
|
+
15107 => CantopensocketError,
|
402
|
+
15108 => CantcontactsistersError,
|
403
|
+
15109 => CantcreatetmpdirError,
|
404
|
+
15110 => BadmomstateError,
|
405
|
+
15111 => SocketInformationError,
|
406
|
+
15112 => SocketDataError,
|
407
|
+
15113 => ClientInvalidError,
|
408
|
+
15114 => PrematureEofError,
|
409
|
+
15115 => CanNotSaveFileError,
|
410
|
+
15116 => CanNotOpenFileError,
|
411
|
+
15117 => CanNotWriteFileError,
|
412
|
+
15118 => JobFileCorruptError,
|
413
|
+
15119 => JobRerunError,
|
414
|
+
15120 => ConnectError,
|
415
|
+
15121 => JobworkdelayError,
|
416
|
+
15122 => BadParameterError,
|
417
|
+
15123 => ContinueError,
|
418
|
+
15124 => JobsubstateError,
|
419
|
+
15125 => CanNotMoveFileError,
|
420
|
+
15126 => JobRecycledError,
|
421
|
+
15127 => JobAlreadyInQueueError,
|
422
|
+
15128 => InvalidMutexError,
|
423
|
+
15129 => MutexAlreadyLockedError,
|
424
|
+
15130 => MutexAlreadyUnlockedError,
|
425
|
+
15131 => InvalidSyntaxError,
|
426
|
+
15132 => NodeDownError,
|
427
|
+
15133 => ServerNotFoundError,
|
428
|
+
15134 => ServerBusyError,
|
429
|
+
}
|
430
|
+
end
|
@@ -1,7 +1,5 @@
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
2
|
-
|
3
|
-
gem "pbs", "~> 2.1"
|
4
|
-
require "pbs"
|
2
|
+
require "ood_core/job/adapters/helper"
|
5
3
|
|
6
4
|
module OodCore
|
7
5
|
module Job
|
@@ -13,12 +11,14 @@ module OodCore
|
|
13
11
|
# @option config [#to_s] :host The batch server host
|
14
12
|
# @option config [#to_s] :lib ('') Path to torque client libraries
|
15
13
|
# @option config [#to_s] :bin ('') Path to torque client binaries
|
14
|
+
# @option config [#to_h] :custom_bin ({}) Optional overrides to Torque client executables
|
16
15
|
def self.build_torque(config)
|
17
16
|
c = config.to_h.symbolize_keys
|
18
17
|
host = c.fetch(:host) { raise ArgumentError, "No host specified. Missing argument: host" }.to_s
|
19
18
|
lib = c.fetch(:lib, "").to_s
|
20
19
|
bin = c.fetch(:bin, "").to_s
|
21
|
-
|
20
|
+
custom_bin = c.fetch(:custom_bin, {})
|
21
|
+
pbs = Adapters::Torque::Batch.new(host: host, lib: lib, bin: bin, custom_bin: custom_bin)
|
22
22
|
Adapters::Torque.new(pbs: pbs)
|
23
23
|
end
|
24
24
|
end
|
@@ -30,6 +30,11 @@ module OodCore
|
|
30
30
|
using Refinements::ArrayExtensions
|
31
31
|
using Refinements::HashExtensions
|
32
32
|
|
33
|
+
require "ood_core/job/adapters/torque/error"
|
34
|
+
require "ood_core/job/adapters/torque/attributes"
|
35
|
+
require "ood_core/job/adapters/torque/ffi"
|
36
|
+
require "ood_core/job/adapters/torque/batch"
|
37
|
+
|
33
38
|
# Mapping of state characters for PBS
|
34
39
|
STATE_MAP = {
|
35
40
|
'Q' => :queued,
|
@@ -44,7 +49,7 @@ module OodCore
|
|
44
49
|
|
45
50
|
# @api private
|
46
51
|
# @param opts [#to_h] the options defining this adapter
|
47
|
-
# @option opts [
|
52
|
+
# @option opts [Torque::Batch] :pbs The PBS batch object
|
48
53
|
# @see Factory.build_torque
|
49
54
|
def initialize(opts = {})
|
50
55
|
o = opts.to_h.symbolize_keys
|
@@ -160,7 +165,7 @@ module OodCore
|
|
160
165
|
# Submit job
|
161
166
|
@pbs.submit(script.content, args: args, env: env, chdir: script.workdir)
|
162
167
|
end
|
163
|
-
rescue
|
168
|
+
rescue Torque::Batch::Error => e
|
164
169
|
raise JobAdapterError, e.message
|
165
170
|
end
|
166
171
|
|
@@ -172,7 +177,7 @@ module OodCore
|
|
172
177
|
@pbs.get_jobs.map do |k, v|
|
173
178
|
parse_job_info(k, v)
|
174
179
|
end
|
175
|
-
rescue
|
180
|
+
rescue Torque::Batch::Error => e
|
176
181
|
raise JobAdapterError, e.message
|
177
182
|
end
|
178
183
|
|
@@ -190,7 +195,7 @@ module OodCore
|
|
190
195
|
).map do |k, v|
|
191
196
|
parse_job_info(k, v)
|
192
197
|
end
|
193
|
-
rescue
|
198
|
+
rescue Torque::Batch::Error => e
|
194
199
|
raise JobAdapterError, e.message
|
195
200
|
end
|
196
201
|
|
@@ -202,13 +207,13 @@ module OodCore
|
|
202
207
|
def info(id)
|
203
208
|
id = id.to_s
|
204
209
|
parse_job_info(*@pbs.get_job(id).flatten)
|
205
|
-
rescue
|
210
|
+
rescue Torque::FFI::UnkjobidError
|
206
211
|
# set completed status if can't find job id
|
207
212
|
Info.new(
|
208
213
|
id: id,
|
209
214
|
status: :completed
|
210
215
|
)
|
211
|
-
rescue
|
216
|
+
rescue Torque::Batch::Error => e
|
212
217
|
raise JobAdapterError, e.message
|
213
218
|
end
|
214
219
|
|
@@ -221,10 +226,10 @@ module OodCore
|
|
221
226
|
id = id.to_s
|
222
227
|
char = @pbs.get_job(id, filters: [:job_state])[id][:job_state]
|
223
228
|
Status.new(state: STATE_MAP.fetch(char, :undetermined))
|
224
|
-
rescue
|
229
|
+
rescue Torque::FFI::UnkjobidError
|
225
230
|
# set completed status if can't find job id
|
226
231
|
Status.new(state: :completed)
|
227
|
-
rescue
|
232
|
+
rescue Torque::Batch::Error => e
|
228
233
|
raise JobAdapterError, e.message
|
229
234
|
end
|
230
235
|
|
@@ -235,10 +240,10 @@ module OodCore
|
|
235
240
|
# @see Adapter#hold
|
236
241
|
def hold(id)
|
237
242
|
@pbs.hold_job(id.to_s)
|
238
|
-
rescue
|
243
|
+
rescue Torque::FFI::UnkjobidError
|
239
244
|
# assume successful job hold if can't find job id
|
240
245
|
nil
|
241
|
-
rescue
|
246
|
+
rescue Torque::Batch::Error => e
|
242
247
|
raise JobAdapterError, e.message
|
243
248
|
end
|
244
249
|
|
@@ -249,10 +254,10 @@ module OodCore
|
|
249
254
|
# @see Adapter#release
|
250
255
|
def release(id)
|
251
256
|
@pbs.release_job(id.to_s)
|
252
|
-
rescue
|
257
|
+
rescue Torque::FFI::UnkjobidError
|
253
258
|
# assume successful job release if can't find job id
|
254
259
|
nil
|
255
|
-
rescue
|
260
|
+
rescue Torque::Batch::Error => e
|
256
261
|
raise JobAdapterError, e.message
|
257
262
|
end
|
258
263
|
|
@@ -263,11 +268,11 @@ module OodCore
|
|
263
268
|
# @see Adapter#delete
|
264
269
|
def delete(id)
|
265
270
|
@pbs.delete_job(id.to_s)
|
266
|
-
rescue
|
271
|
+
rescue Torque::FFI::UnkjobidError, Torque::FFI::BadstateError
|
267
272
|
# assume successful job deletion if can't find job id
|
268
273
|
# assume successful job deletion if job is exiting or completed
|
269
274
|
nil
|
270
|
-
rescue
|
275
|
+
rescue Torque::Batch::Error => e
|
271
276
|
raise JobAdapterError, e.message
|
272
277
|
end
|
273
278
|
|
data/lib/ood_core/job/status.rb
CHANGED
@@ -108,21 +108,11 @@ module OodCore
|
|
108
108
|
# @param block an optional block for the call
|
109
109
|
# @raise [NoMethodError] if method name doesn't pass checks
|
110
110
|
# @return [Boolean] whether it is in this state
|
111
|
-
|
112
|
-
|
113
|
-
self ==
|
114
|
-
else
|
115
|
-
super
|
111
|
+
states.each do |state|
|
112
|
+
define_method("#{state}?") do
|
113
|
+
self == state
|
116
114
|
end
|
117
115
|
end
|
118
|
-
|
119
|
-
# Determines whether this method corresponds to a status check for a valid
|
120
|
-
# state
|
121
|
-
# @param method_name the method name called
|
122
|
-
# @return [Boolean]
|
123
|
-
def respond_to_missing?(method_name, include_private = false)
|
124
|
-
/^(?<other_state>.+)\?$/ =~ method_name && self.class.states.include?(other_state.to_sym) || super
|
125
|
-
end
|
126
116
|
end
|
127
117
|
end
|
128
118
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
module DRMAA
|
3
|
+
# The one and only connection with DRMAA
|
4
|
+
# Attempting to instantiate a DRMAA::Session more than once causes it to crash
|
5
|
+
class SessionSingleton < DRMAA::Session
|
6
|
+
include Singleton
|
7
|
+
end
|
8
|
+
DRMMA_TO_OOD_STATE_MAP = {
|
9
|
+
DRMAA::STATE_UNDETERMINED => :undetermined,
|
10
|
+
DRMAA::STATE_QUEUED_ACTIVE => :queued,
|
11
|
+
DRMAA::STATE_SYSTEM_ON_HOLD => :queued_held,
|
12
|
+
DRMAA::STATE_USER_ON_HOLD => :queued_held,
|
13
|
+
DRMAA::STATE_USER_SYSTEM_ON_HOLD => :queued_held,
|
14
|
+
DRMAA::STATE_RUNNING => :running,
|
15
|
+
DRMAA::STATE_SYSTEM_SUSPENDED => :suspended,
|
16
|
+
DRMAA::STATE_USER_SUSPENDED => :suspended,
|
17
|
+
DRMAA::STATE_USER_SYSTEM_SUSPENDED => :suspended,
|
18
|
+
DRMAA::STATE_DONE => :completed,
|
19
|
+
DRMAA::STATE_FAILED => :completed
|
20
|
+
}
|
21
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -6,8 +6,8 @@ require 'ood_core/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "ood_core"
|
8
8
|
spec.version = OodCore::VERSION
|
9
|
-
spec.authors = ["Jeremy Nicklas"]
|
10
|
-
spec.email = ["jnicklas@osc.edu"]
|
9
|
+
spec.authors = ["Jeremy Nicklas", "Morgan Rodgers"]
|
10
|
+
spec.email = ["jnicklas@osc.edu", "mrodgers@osc.edu"]
|
11
11
|
|
12
12
|
spec.summary = %q{Open OnDemand core library}
|
13
13
|
spec.description = %q{Open OnDemand core library that provides support for an HPC Center to globally define HPC services that web applications can then take advantage of.}
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.required_ruby_version = ">= 2.2.0"
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
|
-
spec.
|
26
|
+
spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
|
27
27
|
spec.add_development_dependency "bundler", "~> 1.7"
|
28
28
|
spec.add_development_dependency "rake", "~> 10.0"
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|