osc-machete 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/CHANGELOG.md +87 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +194 -0
- data/Rakefile +23 -0
- data/lib/osc/machete.rb +18 -0
- data/lib/osc/machete/job.rb +239 -0
- data/lib/osc/machete/job_dir.rb +56 -0
- data/lib/osc/machete/location.rb +91 -0
- data/lib/osc/machete/process.rb +32 -0
- data/lib/osc/machete/status.rb +190 -0
- data/lib/osc/machete/torque_helper.rb +190 -0
- data/lib/osc/machete/user.rb +72 -0
- data/lib/osc/machete/version.rb +6 -0
- data/osc-machete.gemspec +30 -0
- data/test/fixtures/app-params.yml +8 -0
- data/test/fixtures/app-template-rendered/job.sh +40 -0
- data/test/fixtures/app-template-rendered/params.yml +8 -0
- data/test/fixtures/app-template-rendered/test/job.sh +40 -0
- data/test/fixtures/app-template/job.sh.mustache +40 -0
- data/test/fixtures/app-template/params.yml.mustache +8 -0
- data/test/fixtures/app-template/test/job.sh.mustache +40 -0
- data/test/fixtures/oakley.sh +14 -0
- data/test/fixtures/quick.sh +14 -0
- data/test/fixtures/ruby.sh +14 -0
- data/test/test_job.rb +179 -0
- data/test/test_job_dir.rb +39 -0
- data/test/test_location.rb +97 -0
- data/test/test_status.rb +99 -0
- data/test/test_torque_helper.rb +209 -0
- data/test/test_torque_helper_live.rb +174 -0
- metadata +177 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c1517fce8efdd8af65a55f139e0b4b38f1c3481c
|
4
|
+
data.tar.gz: e4fbae834bb6adf6b369cb655b23c7a550f0a0fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 88a1af6cddefc4380f7622b9bd70efa3e364f930b3f14fc9a4f5f9b80e097f8372bffc36ee9eeab2bcdcff13b26d6aa2ef76eba0dd5e4dfed5d5f6fe6aad8f84
|
7
|
+
data.tar.gz: 434158ca87d980ab77125c43bed516febd1d8678c4d897b4edf7e6b7969307adcb4c0d019083bd5235052a3aa825ae8c1de0da15388f6c29c5059cd169bf807e
|
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Change Log
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
This project adheres to [Semantic Versioning](http://semver.org/).
|
5
|
+
|
6
|
+
## [Unreleased]
|
7
|
+
|
8
|
+
## [1.1.1] - 2016-02-24
|
9
|
+
|
10
|
+
### Fixed
|
11
|
+
|
12
|
+
- Omit account string when submitting a job if using default account string that is an invalid project i.e. `appl`
|
13
|
+
|
14
|
+
## [1.1.0] - 2016-02-18
|
15
|
+
|
16
|
+
### Changed
|
17
|
+
|
18
|
+
- Account string by default is specified as being the primary group name of the
|
19
|
+
process running the app. This corresponds to OSC's convention that the primary
|
20
|
+
group is the project of the user.
|
21
|
+
- OSC::Machete::Job is updated to change the default account string used for all
|
22
|
+
instances via setting OSC::Machete::Job.default_account_string
|
23
|
+
- OSC::Machete::Job is updated to accept account_string as an argument to the
|
24
|
+
initializer to use for that instance.
|
25
|
+
|
26
|
+
## [1.0.1] - 2016-02-16
|
27
|
+
|
28
|
+
### Fixed
|
29
|
+
|
30
|
+
- use latest version of pbs gem and its custom Error classes to catch the common cases for qdel and qstat when the pbsid is unknown
|
31
|
+
|
32
|
+
## [1.0.0] - 2016-02-03
|
33
|
+
|
34
|
+
### Fixed
|
35
|
+
|
36
|
+
- qstat would return nil if the job completed or if an error occurred with qstat; now qstat throws exception in error cases and returns a valid Status value otherwise
|
37
|
+
- using qstat with a Ruby job would fail because Ruby pbsid's don't include the host; fixed by adding host arg and if thats omitted inspecting the script first or else assuming its Ruby if the host is omitted from the job id
|
38
|
+
|
39
|
+
### Added
|
40
|
+
|
41
|
+
- lib/osc/machete/status.rb: Status value object
|
42
|
+
- lib/osc/machete/process.rb: Provides helper methods wrapping Etc and Process modules to inspect user info from the currently running process.
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
### Changed
|
47
|
+
|
48
|
+
lib/osc/machete/user.rb
|
49
|
+
|
50
|
+
- uses Etc instead of the environment variables to determine the current user by default (but any username can be passed in)
|
51
|
+
- provides information about the specified user from Etc and inspecting the system's groups file
|
52
|
+
- new methods include User#groups, User#member_of_group?, and a factory method to get an instance from the uid: User.from_uid
|
53
|
+
|
54
|
+
lib/osc/machete/job.rb
|
55
|
+
|
56
|
+
- host can be past in as an argument to the initializer; if this is not provided, torque_helper internally will try to determine what OSC system the PBSID corresponds to, or try inspecting the script for PBS headers
|
57
|
+
- Job#submit now throws ScriptMissingError or PBS::Error
|
58
|
+
- Job#status now returns an OSC::Machete::Status object instead of a character
|
59
|
+
- Job#delete now throws PBS::Error
|
60
|
+
|
61
|
+
lib/osc/machete/torque_helper.rb _(still an internal class right now, not meant to be used directly)_
|
62
|
+
|
63
|
+
- try to determine what OSC system the PBSID corresponds to, or try inspecting the script for PBS headers
|
64
|
+
- returns OSC::Machete::Status for qsub, qstat, qdel
|
65
|
+
- uses pbs gem instead of shelling out for qsub, qstat, qdel
|
66
|
+
- throws PBS::Error for qsub, qstat, qdel in erroneous cases
|
67
|
+
- handles mapping between Torque specific status values and the generic OSC::Machete::Status
|
68
|
+
- if host not provided, tries to determine host from pbsid and job script
|
69
|
+
|
70
|
+
### Removed
|
71
|
+
|
72
|
+
- lib/osc/machete/simple_job.rb - module is now alias for OscMacheteRails in the os_machete_rails gem; but including SimpleJob no longer results in including Statutable and Submittable
|
73
|
+
- lib/osc/machete/simple_job/statusable.rb - moved to osc_machete_rails
|
74
|
+
- lib/osc/machete/simple_job/workflow.rb - moved to osc_machete_rails
|
75
|
+
- lib/osc/machete/simple_job/submittable.rb - removed! use has_workflow_of instead
|
76
|
+
- lib/osc/machete/staging.rb - removed!
|
77
|
+
|
78
|
+
## 0.6.3 - 2015-11-23
|
79
|
+
|
80
|
+
Previous release of osc-machete
|
81
|
+
|
82
|
+
[Unreleased]: https://github.com/AweSim-OSC/osc-machete/compare/v1.1.1...master
|
83
|
+
[1.1.1]: https://github.com/AweSim-OSC/osc-machete/compare/v1.1.0...v1.1.1
|
84
|
+
[1.1.0]: https://github.com/AweSim-OSC/osc-machete/compare/v1.0.1...v1.1.0
|
85
|
+
[1.0.1]: https://github.com/AweSim-OSC/osc-machete/compare/v1.0.0...v1.0.1
|
86
|
+
[1.0.0]: https://github.com/AweSim-OSC/osc-machete/compare/v0.6.3...v1.0.0
|
87
|
+
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013-2016 Ohio Supercomputer Center
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
# OSC::Machete
|
2
|
+
|
3
|
+
Ruby code to help with staging and checking the status of batch jobs.
|
4
|
+
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
To use, add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'osc-machete'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle install
|
15
|
+
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
Three main classes are provided: Job, Process, and User.
|
20
|
+
The other are support classes for these three.
|
21
|
+
|
22
|
+
|
23
|
+
### OSC::Machete::Job
|
24
|
+
|
25
|
+
This is the main class and is a utility class for managing batch simulations. It
|
26
|
+
uses pbs-ruby to submit jobs, check the status of jobs, and stop running jobs.
|
27
|
+
|
28
|
+
|
29
|
+
Check the status of a job:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
s = OSC::Machete::Job.new(pbsid: "117711759.opt-batch.osc.edu").status
|
33
|
+
#=> #<OSC::Machete::Status:0x002ba824829e50 @char="R">
|
34
|
+
puts s #=> "Running"
|
35
|
+
```
|
36
|
+
* status returns an `OSC::Machete::Status` value object
|
37
|
+
|
38
|
+
Setup dependencies, submit, and delete a job:
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
solve_job = OSC::Machete::Job.new(script: path_to_solve_script)
|
42
|
+
post_job = OSC::Machete::Job.new(script: path_to_post_script)
|
43
|
+
|
44
|
+
# ensure that post_job doesn't start till solve_job ends (with any exit status)
|
45
|
+
post_job.afterany(solve_job)
|
46
|
+
|
47
|
+
# submit both jobs (can do it in any order, dependencies will be managed for you)
|
48
|
+
post_job.submit
|
49
|
+
solve_job.submit
|
50
|
+
|
51
|
+
# if you want to qdel both jobs:
|
52
|
+
solve_job.delete
|
53
|
+
post_job.delete
|
54
|
+
```
|
55
|
+
|
56
|
+
* when submitting a job, if a shell script is not found, OSC::Machete::Job::ScriptMissingError error is raised
|
57
|
+
* `Job#submit`, `Job#status`, `Job#delete` all raise a `PBS::Error` if something
|
58
|
+
goes wrong with interacting with Torque.
|
59
|
+
|
60
|
+
#### Account String for submitting jobs
|
61
|
+
|
62
|
+
By default, the account string will be set as a command line argument to qsub
|
63
|
+
using the `-A` flag, which means setting this in a PBS header in the shell
|
64
|
+
scripts will not work. The default account_string is the primary group of the
|
65
|
+
process, which in our case happens to be the user.
|
66
|
+
|
67
|
+
If you need to change the default account_string, you can do so by providing an
|
68
|
+
extra argument to the OSC::Machete::Job initializer:
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
j = OSC::Machete::Job.new(script: path_to_script)
|
72
|
+
j.account_string # nil - so when the job submits the primary group will be used
|
73
|
+
|
74
|
+
j = OSC::Machete::Job.new(script: path_to_script, account_string: "PZS0530")
|
75
|
+
j.account_string # "PZS0530" - so when the job submits "PZS0530" will be used
|
76
|
+
```
|
77
|
+
|
78
|
+
You can also set a class variable on the job object so that all future job
|
79
|
+
objects are instantiated using the specified account string:
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
OSC::Machete::Job.default_account_string = "PZS0530"
|
83
|
+
|
84
|
+
j = OSC::Machete::Job.new(script: path_to_script)
|
85
|
+
j.account_string # "PZS0530" - so when the job submits "PZS0530" will be used
|
86
|
+
```
|
87
|
+
|
88
|
+
### OSC::Machete::Status
|
89
|
+
|
90
|
+
See [Martin Fowler on value objects](http://martinfowler.com/bliki/ValueObject.html)
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
s = OSC::Machete::Job.new(pbsid: "117711759.opt-batch.osc.edu").status
|
94
|
+
#=> #<OSC::Machete::Status:0x002ba824829e50 @char="R">
|
95
|
+
puts s #=> "Running"
|
96
|
+
|
97
|
+
s.passed? #=> true
|
98
|
+
s.completed? #=> true
|
99
|
+
s.failed? #=> false
|
100
|
+
s.submitted? #=> true
|
101
|
+
|
102
|
+
f = OSC::Machete::Status.failed #=> #<OSC::Machete::Status:0x002ba8274334d8 @char="F">
|
103
|
+
f.failed? #=> true
|
104
|
+
f.completed? #=> true
|
105
|
+
```
|
106
|
+
|
107
|
+
To get an array of all the possible values:
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
irb(main):001:0> OSC::Machete::Status.values
|
111
|
+
=> [#<OSC::Machete::Status:0x002ba201079918 @char="U">, #<OSC::Machete::Status:0x002ba2010798a0 @char=nil>, #<OSC::Machete::Status:0x002ba201079710 @char="C">, #<OSC::Machete::Status:0x002ba201079620 @char="F">, #<OSC::Machete::Status:0x002ba201079558 @char="H">, #<OSC::Machete::Status:0x002ba2010794e0 @char="Q">, #<OSC::Machete::Status:0x002ba2010793c8 @char="R">, #<OSC::Machete::Status:0x002ba201079328 @char="S">]
|
112
|
+
irb(main):002:0> OSC::Machete::Status.values.map(&:to_s)
|
113
|
+
=> ["Undetermined", "Not Submitted", "Passed", "Failed", "Held", "Queued", "Running", "Suspended"]
|
114
|
+
irb(main):003:0>
|
115
|
+
```
|
116
|
+
|
117
|
+
### OSC::Machete::Process
|
118
|
+
|
119
|
+
Gives information about the running process. Uses Ruby's Process library when it
|
120
|
+
makes sense.
|
121
|
+
|
122
|
+
Examles using pry:
|
123
|
+
|
124
|
+
```
|
125
|
+
[14] pry(main)> OSC::Machete::Process.new.groupname
|
126
|
+
=> "PZS0562"
|
127
|
+
[15] pry(main)> OSC::Machete::Process.new.username
|
128
|
+
=> "efranz"
|
129
|
+
[16] pry(main)> OSC::Machete::Process.new.home
|
130
|
+
=> "/nfs/17/efranz"
|
131
|
+
[17] pry(main)> OSC::Machete::Process.new.group_membership_changed?
|
132
|
+
=> false
|
133
|
+
```
|
134
|
+
|
135
|
+
### OSC::Machete::User
|
136
|
+
|
137
|
+
Gives informaiton about the specified user, by using Ruby's Etc library and
|
138
|
+
inspecting the group membership file.
|
139
|
+
|
140
|
+
Example using pry:
|
141
|
+
|
142
|
+
```
|
143
|
+
[18] pry(main)> OSC::Machete::User.new.member_of_group?("awsmdev")
|
144
|
+
=> true
|
145
|
+
[19] pry(main)> OSC::Machete::User.new.home
|
146
|
+
=> "/nfs/17/efranz"
|
147
|
+
[20] pry(main)> OSC::Machete::Process.new.home
|
148
|
+
=> "/nfs/17/efranz"
|
149
|
+
[21] pry(main)> OSC::Machete::User.new.groups
|
150
|
+
=> [2959,
|
151
|
+
3140,
|
152
|
+
3141,
|
153
|
+
3179,
|
154
|
+
3285,
|
155
|
+
3528,
|
156
|
+
3572,
|
157
|
+
4391,
|
158
|
+
4497,
|
159
|
+
4498,
|
160
|
+
4511,
|
161
|
+
4514,
|
162
|
+
4517,
|
163
|
+
4580,
|
164
|
+
4807,
|
165
|
+
4808]
|
166
|
+
```
|
167
|
+
|
168
|
+
### Example of using Machete directly via irb
|
169
|
+
|
170
|
+
```sh
|
171
|
+
-bash-3.2$ pry -rosc/machete
|
172
|
+
[24] pry(main)> puts OSC::Machete::Job.new(pbsid: "17711768.opt-batch.osc.edu").status
|
173
|
+
Running
|
174
|
+
=> nil
|
175
|
+
```
|
176
|
+
|
177
|
+
Or you could write your own ruby script that that does something using the gem:
|
178
|
+
|
179
|
+
```sh
|
180
|
+
-bash-3.2$ cat test.rb
|
181
|
+
require 'osc/machete'
|
182
|
+
|
183
|
+
pbsid = "17711768.opt-batch.osc.edu"
|
184
|
+
j = OSC::Machete::Job.new pbsid: pbsid
|
185
|
+
puts j.status
|
186
|
+
```
|
187
|
+
|
188
|
+
And then run it like this:
|
189
|
+
|
190
|
+
```sh
|
191
|
+
-bash-3.2$ ruby test.rb
|
192
|
+
Running
|
193
|
+
-bash-3.2$
|
194
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |test|
|
5
|
+
test.libs << 'test'
|
6
|
+
end
|
7
|
+
|
8
|
+
desc "Run tests"
|
9
|
+
task :default do
|
10
|
+
|
11
|
+
puts "\nIf you want to run tests that submit simple jobs to batch system, " \
|
12
|
+
"set the environment variable LIVETEST.\n\n" unless ENV['LIVETEST']
|
13
|
+
|
14
|
+
Rake::Task['test'].invoke
|
15
|
+
end
|
16
|
+
|
17
|
+
task :console do
|
18
|
+
require 'irb'
|
19
|
+
require 'irb/completion'
|
20
|
+
require 'osc/machete'
|
21
|
+
ARGV.clear
|
22
|
+
IRB.start
|
23
|
+
end
|
data/lib/osc/machete.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# declares the module
|
2
|
+
require "osc/machete/version"
|
3
|
+
|
4
|
+
require "osc/machete/status"
|
5
|
+
require "osc/machete/job"
|
6
|
+
require "osc/machete/job_dir"
|
7
|
+
require "osc/machete/location"
|
8
|
+
require "osc/machete/torque_helper"
|
9
|
+
require "osc/machete/user"
|
10
|
+
require "osc/machete/process"
|
11
|
+
|
12
|
+
# The OSC namespace module.
|
13
|
+
module OSC
|
14
|
+
# The main osc-machete module.
|
15
|
+
module Machete
|
16
|
+
# Your code goes here...
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,239 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
|
4
|
+
# Core object for working with batch jobs, including:
|
5
|
+
#
|
6
|
+
# * submitting jobs
|
7
|
+
# * checking job status
|
8
|
+
# * setting dependencies between jobs via a directed acyclic graph
|
9
|
+
#
|
10
|
+
# Create a new Job from a script:
|
11
|
+
#
|
12
|
+
# job = Job.new(script: '/nfs/17/efranz/jobs/1/script.sh')
|
13
|
+
# job.submitted? #=> false
|
14
|
+
# job.path #=> '/nfs/17/efranz/jobs/1'
|
15
|
+
# job.script_name #=> 'script.sh'
|
16
|
+
# job.status #=> nil
|
17
|
+
# job.pbsid #=> nil
|
18
|
+
#
|
19
|
+
# # PBS_O_WORKDIR will be set to the directory containing the script
|
20
|
+
# job.submit
|
21
|
+
#
|
22
|
+
# job.submitted? #=> true
|
23
|
+
# job.status #=> "Q"
|
24
|
+
# job.pbsid #=> "3422735.oak-batch"
|
25
|
+
#
|
26
|
+
# # if you know the pbs id you can instantiate a
|
27
|
+
# # Job object to ask for the status of it
|
28
|
+
# job2 = Job.new(pbsid: "3422735.oak-batch")
|
29
|
+
# job2.status #=> "Q"
|
30
|
+
#
|
31
|
+
# # because the object was created with only the pbsid passed in,
|
32
|
+
# # path and script_name and dependency information is not available
|
33
|
+
# job2.path #=> nil
|
34
|
+
# job2.script_name #=> nil
|
35
|
+
#
|
36
|
+
# # but an unknown pbsid results in status nil
|
37
|
+
# job3 = Job.new(pbsid: "12345.oak-batch")
|
38
|
+
# job3.status #=> nil
|
39
|
+
#
|
40
|
+
# Create two Job instances and form a dependency between them:
|
41
|
+
#
|
42
|
+
# job1 = Job.new(script: '/nfs/17/efranz/jobs/1/script.sh')
|
43
|
+
# job2 = Job.new(script: '/nfs/17/efranz/jobs/1/post.sh')
|
44
|
+
#
|
45
|
+
# job2.afterany(job1) # job2 runs after job1 completes with any exit status
|
46
|
+
#
|
47
|
+
# job1.submit
|
48
|
+
# job2.submit
|
49
|
+
#
|
50
|
+
# job1.status #=> "Q"
|
51
|
+
# job2.status #=> "H"
|
52
|
+
#
|
53
|
+
# @!attribute [r] pbsid
|
54
|
+
# @return [String, nil] the PBS job id, or nil if not set
|
55
|
+
# @!attribute [r] script_path
|
56
|
+
# @return [String, nil] path of the job script, or nil if not set
|
57
|
+
#
|
58
|
+
class OSC::Machete::Job
|
59
|
+
attr_reader :pbsid, :script_path, :account_string
|
60
|
+
|
61
|
+
class << self
|
62
|
+
# set this to change the billable account that is used by default
|
63
|
+
attr_accessor :default_account_string
|
64
|
+
end
|
65
|
+
|
66
|
+
# Create Job instance to represent an unsubmitted batch job from the specified
|
67
|
+
# script, or an existing, already submitted batch job from the specified pbsid
|
68
|
+
#
|
69
|
+
# Takes params in options hash as single argument:
|
70
|
+
#
|
71
|
+
# Job.new(script: '/path/to/job/dir/go.sh')
|
72
|
+
#
|
73
|
+
# or
|
74
|
+
#
|
75
|
+
# opts = Hash.new(script: '/path/to/job/dir/go.sh')
|
76
|
+
# Job.new(opts)
|
77
|
+
#
|
78
|
+
# Job class makes assumption that a job's PBS_O_WORKDIR will be
|
79
|
+
# in the directory containing the shell script that is run.
|
80
|
+
#
|
81
|
+
# @param [Hash] args the arguments to create the job
|
82
|
+
# @option args [String] :script full path to script (optional)
|
83
|
+
# @option args [String, nil] :pbsid pbsid of a job already submitted (optional)
|
84
|
+
# @option args [TorqueHelper, nil] :torque_helper override default torque helper (optional)
|
85
|
+
# NOTE: used for testing purposes
|
86
|
+
# we could use it also if we had different
|
87
|
+
# torque_helper classes for different systems
|
88
|
+
def initialize(args)
|
89
|
+
@script_path = Pathname.new(args[:script]).cleanpath unless args[:script].nil?
|
90
|
+
# @script_path = @script_path.expand_path would change this to absolute path
|
91
|
+
|
92
|
+
@pbsid = args[:pbsid]
|
93
|
+
@host = args[:host]
|
94
|
+
@torque = args[:torque_helper] || OSC::Machete::TorqueHelper.default
|
95
|
+
@account_string = args[:account_string] || self.class.default_account_string
|
96
|
+
|
97
|
+
@dependencies = {} # {:afterany => [Job, Job], :afterok => [Job]}
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [String, nil] script name or nil if instance wasn't initialized with a script
|
101
|
+
def script_name
|
102
|
+
Pathname.new(@script_path).basename.to_s if @script_path
|
103
|
+
end
|
104
|
+
|
105
|
+
# @return [String, nil] job directory or nil if instance wasn't initialized with a script
|
106
|
+
def path
|
107
|
+
Pathname.new(@script_path).dirname if @script_path
|
108
|
+
end
|
109
|
+
|
110
|
+
# Submit any dependent jobs that haven't been submitted
|
111
|
+
# then submit this job, specifying dependencies as required by Torque.
|
112
|
+
# Submitting includes cd-ing into the script's directory and qsub-ing from
|
113
|
+
# that location, ensuring that environment variable PBS_O_WORKDIR is
|
114
|
+
# set to the directory containing the script.
|
115
|
+
#
|
116
|
+
# @raise [ScriptMissingError] Raised when the path to the script does not exist or cannot be read.
|
117
|
+
def submit
|
118
|
+
return if submitted?
|
119
|
+
raise ScriptMissingError, "#{script_path} does not exist or cannot be read" unless script_path.file? && script_path.readable?
|
120
|
+
|
121
|
+
# submit any dependent jobs that have not yet been submitted
|
122
|
+
submit_dependencies
|
123
|
+
|
124
|
+
# cd into directory, submit job from there
|
125
|
+
# so that PBS_O_WORKDIR is set to location
|
126
|
+
# where job is run
|
127
|
+
#
|
128
|
+
#TODO: you can set PBS_O_WORKDIR via qsub args, is this necessary? there is
|
129
|
+
# another env var besides PBS_O_WORKDIR that is affected by the path of the
|
130
|
+
# current directory when the job is submitted
|
131
|
+
#
|
132
|
+
#TODO: what if you want to submit via piping to qsub i.e. without creating a file?
|
133
|
+
Dir.chdir(path.to_s) do
|
134
|
+
@pbsid = @torque.qsub script_name, depends_on: dependency_ids, host: @host, account_string: account_string
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Check whether the job jas been submitted.
|
139
|
+
#
|
140
|
+
# @return [Boolean] true if @pbsid is set
|
141
|
+
def submitted?
|
142
|
+
! @pbsid.nil?
|
143
|
+
end
|
144
|
+
|
145
|
+
# Perform a qstat and return a char representing the status of the job.
|
146
|
+
#
|
147
|
+
# @return [Status] value object representing status of a job
|
148
|
+
def status
|
149
|
+
if @pbsid.nil?
|
150
|
+
OSC::Machete::Status.not_submitted
|
151
|
+
else
|
152
|
+
@torque.qstat @pbsid, host: @host
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Ensure Job starts only after the specified Job(s) complete
|
157
|
+
#
|
158
|
+
# @param [Job, Array<Job>] jobs Job(s) that this Job should depend on (wait for)
|
159
|
+
# @return [self] self so you can chain method calls
|
160
|
+
def afterany(jobs)
|
161
|
+
add_dependencies(:afterany, jobs)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Ensure Job starts only after the specified Job(s) complete with successful
|
165
|
+
# return value.
|
166
|
+
#
|
167
|
+
# @param (see #afterany)
|
168
|
+
# @return (see #afterany)
|
169
|
+
def afterok(jobs)
|
170
|
+
add_dependencies(:afterok, jobs)
|
171
|
+
end
|
172
|
+
|
173
|
+
# Ensure Job starts only after the specified Job(s) start.
|
174
|
+
#
|
175
|
+
# @param (see #afterany)
|
176
|
+
# @return (see #afterany)
|
177
|
+
def after(jobs)
|
178
|
+
add_dependencies(:after, jobs)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Ensure Job starts only after the specified Job(s) complete with error
|
182
|
+
# return value.
|
183
|
+
#
|
184
|
+
# @param (see #afterany)
|
185
|
+
# @return (see #afterany)
|
186
|
+
def afternotok(jobs)
|
187
|
+
add_dependencies(:afternotok, jobs)
|
188
|
+
end
|
189
|
+
|
190
|
+
# Kill the currently running batch job
|
191
|
+
#
|
192
|
+
# @param [Boolean] rmdir (false) if true, recursively remove the containing directory
|
193
|
+
# of the job script if killing the job succeeded
|
194
|
+
#
|
195
|
+
# @return [nil]
|
196
|
+
def delete(rmdir: false)
|
197
|
+
# FIXME: rethink this interface... should qdel be idempotent?
|
198
|
+
# After first call, no errors thrown after?
|
199
|
+
|
200
|
+
if pbsid
|
201
|
+
|
202
|
+
@torque.qdel(pbsid, host: @host)
|
203
|
+
# FIXME: removing a directory is always a dangerous action.
|
204
|
+
# I wonder if we can add more tests to make sure we don't delete
|
205
|
+
# something if the script name is munged
|
206
|
+
|
207
|
+
# recursively delete the directory after killing the job
|
208
|
+
Pathname.new(path).rmtree if path && rmdir && File.exists?(path)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# Error class thrown when script is not available.
|
213
|
+
class ScriptMissingError < StandardError; end
|
214
|
+
|
215
|
+
private
|
216
|
+
|
217
|
+
def submit_dependencies
|
218
|
+
# assumes each dependency is a Job object
|
219
|
+
@dependencies.values.flatten.each { |j| j.submit }
|
220
|
+
end
|
221
|
+
|
222
|
+
# build a dictionary of ids for each dependency type
|
223
|
+
def dependency_ids
|
224
|
+
ids = {}
|
225
|
+
|
226
|
+
@dependencies.each do |type, jobs|
|
227
|
+
ids[type] = jobs.map(&:pbsid).compact
|
228
|
+
end
|
229
|
+
|
230
|
+
ids.keep_if { |k,v| ! v.empty? }
|
231
|
+
end
|
232
|
+
|
233
|
+
def add_dependencies(type, jobs)
|
234
|
+
@dependencies[type] = [] unless @dependencies.has_key?(type)
|
235
|
+
@dependencies[type].concat(Array(jobs))
|
236
|
+
|
237
|
+
self
|
238
|
+
end
|
239
|
+
end
|