once-only 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +20 -0
- data/README.md +199 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/bin/once-only +208 -0
- data/features/once-only.feature +22 -0
- data/features/step_definitions/once-only_steps.rb +20 -0
- data/features/support/env.rb +13 -0
- data/lib/once-only.rb +4 -0
- data/lib/once-only/check.rb +100 -0
- data/lib/once-only/once-only.rb +3 -0
- data/lib/once-only/sha1.rb +91 -0
- data/spec/bio-once-only_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- metadata +114 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.travis.yml
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
|
7
|
+
# - rbx-19mode
|
8
|
+
# - 1.8.7
|
9
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
10
|
+
# - rbx-18mode
|
11
|
+
|
12
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
13
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "rspec", "~> 2.8.0"
|
10
|
+
# gem "rdoc", "~> 3.12"
|
11
|
+
gem "cucumber", ">= 0"
|
12
|
+
gem "jeweler", "~> 1.8.4", :git => "https://github.com/technicalpickles/jeweler.git"
|
13
|
+
gem "bundler", ">= 1.0.21"
|
14
|
+
# gem "bio", ">= 1.4.2"
|
15
|
+
# gem "rdoc", "~> 3.12"
|
16
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Pjotr Prins
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
# once-only
|
2
|
+
|
3
|
+
[![Build Status](https://secure.travis-ci.org/pjotrp/once-only.png)](http://travis-ci.org/pjotrp/once-only)
|
4
|
+
|
5
|
+
Relax with PBS!
|
6
|
+
|
7
|
+
* Computations only happen once
|
8
|
+
* A completed job does not get submitted again to PBS
|
9
|
+
* A job already in the queue does not get submitted again to PBS
|
10
|
+
|
11
|
+
Once-only makes a program or script run only *once*, provided the inputs don't
|
12
|
+
change (in a functional style!). This is very useful when running a range of
|
13
|
+
jobs on a compute cluster or GRID. It may even be useful in the context of
|
14
|
+
webservices. Once-only makes it relaxed to run many jobs on compute clusters!
|
15
|
+
A mistake, interruption, or even a parameter tweak, does not mean everything
|
16
|
+
has to be run again.
|
17
|
+
|
18
|
+
Instead of running a tool or script directly, such as
|
19
|
+
|
20
|
+
```sh
|
21
|
+
bowtie -t e_coli reads/e_coli_1000.fq e_coli.map
|
22
|
+
```
|
23
|
+
|
24
|
+
Prepend once-only
|
25
|
+
|
26
|
+
```sh
|
27
|
+
once-only bowtie -t e_coli reads/e_coli_1000.fq e_coli.map
|
28
|
+
```
|
29
|
+
|
30
|
+
and once-only will parse the command line for existing files and run a checksum
|
31
|
+
on them (here the binary executable 'bowtie' and data files
|
32
|
+
reads/e_coli_1000.fq and e_coli.map). This checksum, in fact an MD5
|
33
|
+
cryptographic hash, or optionally [pfff](https://github.com/pfff/pfff) for
|
34
|
+
large files, is a unique identifier (aka fingerprint) and saved in a file in the running
|
35
|
+
directory. When the checksum file does not exist in the directory the command
|
36
|
+
'bowtie -t e_coli reads/e_coli_1000.fq e_coli.map' is executed.
|
37
|
+
|
38
|
+
When the file already exists execution is skipped. In other words, the checksum
|
39
|
+
file guarantees the program is only run once with the same inputs. Really
|
40
|
+
simple!
|
41
|
+
|
42
|
+
In combination with PBS this could be
|
43
|
+
|
44
|
+
```sh
|
45
|
+
echo "once-only bowtie -t e_coli reads/e_coli_1000.fq e_coli.map" |qsub -k oe -d path
|
46
|
+
```
|
47
|
+
|
48
|
+
Interestingly once-only also comes with PBS support, which won't add a job to
|
49
|
+
the queue if it is already in the queue, or if it has been executed
|
50
|
+
successfully:
|
51
|
+
|
52
|
+
```sh
|
53
|
+
once-only --pbs '-k oe' bowtie -t e_coli reads/e_coli_1000.fq e_coli.map
|
54
|
+
```
|
55
|
+
|
56
|
+
The PBS job will be named and identified according to the checksum value. This
|
57
|
+
can be used to query PBS and clean up based on queued jobs.
|
58
|
+
|
59
|
+
The file once-only writes contains a list of the input files with
|
60
|
+
their individual checksum values. E.g. on
|
61
|
+
|
62
|
+
```sh
|
63
|
+
./bin/once-only -v ../bioruby-table/bin/bio-table ../bioruby-table/test/data/input/table1.csv
|
64
|
+
|
65
|
+
cat bio-table-25e51f9297b43b5dacf687b4158f0b79e69c6817.txt
|
66
|
+
|
67
|
+
MD5 53bcceee564c47cebff8160ab734313f ../bioruby-table/bin/bio-table
|
68
|
+
MD5 9868b63e3624023a176c29bb80eb54f5 ../bioruby-table/test/data/input/table1.csv
|
69
|
+
SHA1 46ae0f4af8c2566185954bb07d4eeb18c1867077 ../bioruby-table/bin/bio-table ../bioruby-table/test/data/input/table1.csv
|
70
|
+
```
|
71
|
+
|
72
|
+
This list can also be used to distinguish between input and output files after
|
73
|
+
completion of the program. To check the validity of input files you could run
|
74
|
+
md5sum on the one-only has file, for example
|
75
|
+
|
76
|
+
```sh
|
77
|
+
grep MD5 bio-table-ce4ceee0d2ee08ef235662c35b8238ad47fed030.txt |awk 'BEGIN { FS = "[ \t\n]+" }{ print $2,"",$3 }'|md5sum -c
|
78
|
+
```
|
79
|
+
|
80
|
+
Once-only is inspired by the Lisp once-only function, which wraps another
|
81
|
+
function and calculates a result only once, based on the same inputs. It is
|
82
|
+
also inspired by the NixOS software deployment system, which guarantees
|
83
|
+
packages are uniquely deployed, based on the source code inputs and the
|
84
|
+
configuration at compile time.
|
85
|
+
|
86
|
+
## Installation
|
87
|
+
|
88
|
+
Note: once-only is written in Ruby, but you don't need to understand
|
89
|
+
Ruby programming to use it!
|
90
|
+
|
91
|
+
With Ruby 1.9 or later on your system you can run
|
92
|
+
|
93
|
+
```sh
|
94
|
+
gem install once-only
|
95
|
+
```
|
96
|
+
|
97
|
+
### Dependencies
|
98
|
+
|
99
|
+
'md5sum' is used for calculating MD5 hash values.
|
100
|
+
|
101
|
+
'pfff' is optional and used for calculating pfff hash values on very large files.
|
102
|
+
|
103
|
+
When you are using PBS, once-only requires the 'qsub' and 'qstat' commands.
|
104
|
+
|
105
|
+
## Usage (command line)
|
106
|
+
|
107
|
+
To get a full list of command options
|
108
|
+
|
109
|
+
```sh
|
110
|
+
once-only --help
|
111
|
+
```
|
112
|
+
|
113
|
+
Useful switches can be -v (verbose) and -q (quiet).
|
114
|
+
|
115
|
+
If you want to skip scanning the executable file (useful in heterogenous environments,
|
116
|
+
such as the GRID) use the --skip-exe switch:
|
117
|
+
|
118
|
+
```sh
|
119
|
+
once-only --skip-exe muscle -in aa.fa -out out-alignment.fa -tree1 first.ph -tree2 tree.ph
|
120
|
+
```
|
121
|
+
|
122
|
+
where only aa.fa is the scanned input file in the first round. To prevent the second run
|
123
|
+
of once-only to include the output files (out-alignment.fa, first.ph and tree.ph) you
|
124
|
+
can specify them the first round on the command line as
|
125
|
+
|
126
|
+
```sh
|
127
|
+
once-only --skip out-alignment.fa --skip first.ph --skip tree.ph muscle -in aa.fa -out out-alignment.fa -tree1 first.ph -tree2 tree.ph
|
128
|
+
```
|
129
|
+
|
130
|
+
a regular expression on output filenames may be the nicer option
|
131
|
+
|
132
|
+
```sh
|
133
|
+
once-only --skip-exe --skip-regex 'out|\.ph$' muscle -in aa.fa -out out-alignment.fa -tree1 first.ph -tree2 tree.ph
|
134
|
+
```
|
135
|
+
|
136
|
+
or if you are more comfortable with shell style pattern matching use
|
137
|
+
|
138
|
+
```sh
|
139
|
+
once-only --skip-exe --skip-glob 'out*' --skip-glob '*.ph' muscle -in aa.fa -out out-alignment.fa -tree1 first.ph -tree2 tree.ph
|
140
|
+
```
|
141
|
+
|
142
|
+
For a full range of glob patterns, see this [page](http://ruby.about.com/od/beginningruby/a/dir2.htm).
|
143
|
+
|
144
|
+
Sometimes you want to include input files that are not on the command line for generating the hash. Maybe some default input file name is being picked up, or it is defined in a
|
145
|
+
configuration file. In that case use the --include/--in options.
|
146
|
+
|
147
|
+
Another once-only command line option is to change directory before executing the script
|
148
|
+
|
149
|
+
```sh
|
150
|
+
once-only -d run001 --skip-regex 'out|\.ph$' muscle -in aa.fa -out out-alignment.fa -tree1 first.ph -tree2 tree.ph
|
151
|
+
```
|
152
|
+
|
153
|
+
which is useful with PBS and in scripted environments.
|
154
|
+
|
155
|
+
### PBS
|
156
|
+
|
157
|
+
Once-only has PBS support built-in. It only uses the 'qsub' and 'qstat' commands.
|
158
|
+
|
159
|
+
Basically use the --pbs option:
|
160
|
+
|
161
|
+
```sh
|
162
|
+
once-only --pbs /bin/cat ~/.bashrc
|
163
|
+
```
|
164
|
+
|
165
|
+
Will submit 'cat ~/.bashrc' to the queue. If the job is already in the queue it
|
166
|
+
won't be submitted, thanks to the unique job ID once-only generates.
|
167
|
+
|
168
|
+
In fact, this is an interesting example, because
|
169
|
+
both /bin/cat and ~/.bashrc files may differ on the submission machine and the cluster
|
170
|
+
nodes. Only when both are the same you can expect once-only to run properly. In
|
171
|
+
this case it is wise to add at least
|
172
|
+
|
173
|
+
```sh
|
174
|
+
once-only --pbs --skip-exe /bin/cat ~/.bashrc
|
175
|
+
```
|
176
|
+
|
177
|
+
so once-only won't check the file /bin/cat.
|
178
|
+
|
179
|
+
## Project home page
|
180
|
+
|
181
|
+
Information on the source tree, documentation, examples, issues and
|
182
|
+
how to contribute, see
|
183
|
+
|
184
|
+
http://github.com/pjotrp/once-only
|
185
|
+
|
186
|
+
## Cite
|
187
|
+
|
188
|
+
If you use this software, please cite
|
189
|
+
|
190
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
191
|
+
|
192
|
+
## Biogems.info
|
193
|
+
|
194
|
+
This Biogem is published at http://biogems.info/
|
195
|
+
|
196
|
+
## Copyright
|
197
|
+
|
198
|
+
Copyright (c) 2013 Pjotr Prins. See LICENSE.txt for further details.
|
199
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "once-only"
|
18
|
+
gem.homepage = "http://github.com/pjotrp/once-only"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Run commands once only if inputs do not change}
|
21
|
+
gem.description = %Q{Run programs and scripts once only. Especially
|
22
|
+
useful for PBS and GRID computing}
|
23
|
+
gem.email = "pjotr.public01@thebird.nl"
|
24
|
+
gem.authors = ["Pjotr Prins"]
|
25
|
+
# dependencies defined in Gemfile
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rspec/core'
|
30
|
+
require 'rspec/core/rake_task'
|
31
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
32
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
|
+
end
|
34
|
+
|
35
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
36
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
37
|
+
spec.rcov = true
|
38
|
+
end
|
39
|
+
|
40
|
+
require 'cucumber/rake/task'
|
41
|
+
Cucumber::Rake::Task.new do |features|
|
42
|
+
end
|
43
|
+
|
44
|
+
task :default => [:cucumber]
|
45
|
+
|
46
|
+
require 'rdoc/task'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
49
|
+
|
50
|
+
rdoc.rdoc_dir = 'rdoc'
|
51
|
+
rdoc.title = "once-only #{version}"
|
52
|
+
rdoc.rdoc_files.include('README*')
|
53
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
54
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/bin/once-only
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Once-only Run applications once with the same inputs
|
4
|
+
# Author:: Pjotr Prins
|
5
|
+
# Copyright:: 2013
|
6
|
+
|
7
|
+
USAGE =<<EOB
|
8
|
+
|
9
|
+
once-only runs a command once only when inputs don't change!
|
10
|
+
|
11
|
+
Usage:
|
12
|
+
|
13
|
+
-d path change to directory before executing
|
14
|
+
--pbs [opts] convert to PBS command with optional options
|
15
|
+
--skip|--out file skip making a checksum of the named file (multiple allowed)
|
16
|
+
--skip-exe skip making a checksum of the executable command/script
|
17
|
+
--skip-cli skip making a checksum of full command line
|
18
|
+
--skip-regex regex skip making checksumes of filenames that match the regex (multiple allowed)
|
19
|
+
--skip-glob regex skip making checksumes of filenames that match the glob (multiple allowed)
|
20
|
+
--include|--in file include input filename for making the checksumes
|
21
|
+
-v increase verbosity
|
22
|
+
-q run quietly
|
23
|
+
--debug give debug information
|
24
|
+
--dry-run do not execute command
|
25
|
+
--force force execute command
|
26
|
+
|
27
|
+
See the README for examples
|
28
|
+
|
29
|
+
EOB
|
30
|
+
|
31
|
+
original_commands = ARGV.join(' ')
|
32
|
+
|
33
|
+
gempath = File.dirname(File.dirname(__FILE__))
|
34
|
+
$: << File.join(gempath,'lib')
|
35
|
+
|
36
|
+
VERSION_FILENAME=File.join(gempath,'VERSION')
|
37
|
+
version = File.new(VERSION_FILENAME).read.chomp
|
38
|
+
|
39
|
+
$stderr.print "once-only #{version} (using Ruby #{RUBY_VERSION}) by Pjotr Prins 2013\n"
|
40
|
+
|
41
|
+
require 'once-only'
|
42
|
+
|
43
|
+
if ARGV.size == 0
|
44
|
+
print USAGE
|
45
|
+
exit 1
|
46
|
+
end
|
47
|
+
|
48
|
+
def exit_error errval = 1
|
49
|
+
$stderr.print "\nonce-only returned error #{errval}\n"
|
50
|
+
exit errval
|
51
|
+
end
|
52
|
+
|
53
|
+
def parse_args(args)
|
54
|
+
options = { :skip => [], :skip_regex => [], :skip_glob => [], :include => [] }
|
55
|
+
|
56
|
+
consume = lambda { |args|
|
57
|
+
return args if File.exist?(args[0]) # reached the executable command
|
58
|
+
case args[0]
|
59
|
+
when '-d'
|
60
|
+
options[:dir] = File.expand_path(args[1])
|
61
|
+
consume.call(args[2..-1])
|
62
|
+
when '--pbs'
|
63
|
+
if args[1] =~ /\s+/ # optional argument
|
64
|
+
options[:pbs] = args[1]
|
65
|
+
consume.call(args[2..-1])
|
66
|
+
else
|
67
|
+
options[:pbs] = "''"
|
68
|
+
consume.call(args[1..-1])
|
69
|
+
end
|
70
|
+
when '--skip','--out'
|
71
|
+
options[:skip] << args[1]
|
72
|
+
consume.call(args[2..-1])
|
73
|
+
when '--skip-exe'
|
74
|
+
options[:skip_exe] = true
|
75
|
+
consume.call(args[1..-1])
|
76
|
+
when '--skip-cli'
|
77
|
+
options[:skip_cli] = true
|
78
|
+
consume.call(args[1..-1])
|
79
|
+
when '--skip-regex'
|
80
|
+
options[:skip_regex] << args[1]
|
81
|
+
consume.call(args[2..-1])
|
82
|
+
when '--skip-glob'
|
83
|
+
options[:skip_glob] << args[1]
|
84
|
+
consume.call(args[2..-1])
|
85
|
+
when '--include','--in'
|
86
|
+
options[:include] << args[1]
|
87
|
+
consume.call(args[2..-1])
|
88
|
+
when '-h', '--help'
|
89
|
+
print USAGE
|
90
|
+
exit 1
|
91
|
+
when '--debug'
|
92
|
+
options[:debug] = true
|
93
|
+
consume.call(args[1..-1])
|
94
|
+
when '-v'
|
95
|
+
options[:verbose] = true
|
96
|
+
consume.call(args[1..-1])
|
97
|
+
when '-q'
|
98
|
+
options[:quiet] = true
|
99
|
+
consume.call(args[1..-1])
|
100
|
+
when '--dry-run'
|
101
|
+
options[:dry_run] = true
|
102
|
+
consume.call(args[1..-1])
|
103
|
+
when '--force'
|
104
|
+
options[:force] = true
|
105
|
+
consume.call(args[1..-1])
|
106
|
+
else
|
107
|
+
$stderr.print "Can not parse arguments",args
|
108
|
+
exit_error(1)
|
109
|
+
end
|
110
|
+
}
|
111
|
+
|
112
|
+
return consume.call(args),options
|
113
|
+
end
|
114
|
+
|
115
|
+
args,options = parse_args(ARGV)
|
116
|
+
once_only_args = OnceOnly::Check.requote([ __FILE__ ] + ( ARGV[0..ARGV.size-args.size-1] ))
|
117
|
+
|
118
|
+
if options[:debug]
|
119
|
+
print "Full: "
|
120
|
+
p ARGV
|
121
|
+
print "Prefix: "
|
122
|
+
p once_only_args
|
123
|
+
print "Postfix: "
|
124
|
+
p args
|
125
|
+
print "Options: "
|
126
|
+
p options
|
127
|
+
end
|
128
|
+
|
129
|
+
once_only_args = OnceOnly::Check.drop_pbs_option(once_only_args)
|
130
|
+
once_only_args = OnceOnly::Check.drop_dir_option(once_only_args)
|
131
|
+
once_only_command = once_only_args.join(' ')
|
132
|
+
|
133
|
+
command = args.join(' ')
|
134
|
+
command_sorted = args.sort.join(' ')
|
135
|
+
command_sha1 = OnceOnly::Check::calc_checksum(command_sorted)
|
136
|
+
|
137
|
+
# change dir
|
138
|
+
if options[:dir]
|
139
|
+
$stderr.print "Changing dir to ",options[:dir],"\n" if !options[:quiet]
|
140
|
+
Dir.chdir options[:dir]
|
141
|
+
end
|
142
|
+
|
143
|
+
executable = args[0]
|
144
|
+
args = args[1..-1] if options[:skip_exe]
|
145
|
+
|
146
|
+
file_list = OnceOnly::Check::get_file_list(args)
|
147
|
+
options[:skip_regex].each { |regex|
|
148
|
+
file_list = OnceOnly::Check::filter_file_list(file_list,regex)
|
149
|
+
}
|
150
|
+
options[:skip_glob].each { |glob|
|
151
|
+
file_list = OnceOnly::Check::filter_file_list_glob(file_list,glob)
|
152
|
+
}
|
153
|
+
file_list -= options[:skip]
|
154
|
+
|
155
|
+
OnceOnly::Check::check_files_exist(options[:include])
|
156
|
+
file_list += options[:include]
|
157
|
+
|
158
|
+
checksums = OnceOnly::Check::calc_file_checksums(file_list)
|
159
|
+
checksums.push ['SHA1',command_sha1,command_sorted] if not options[:skip_cli]
|
160
|
+
|
161
|
+
once_only_filename = OnceOnly::Check::make_once_filename(checksums,File.basename(executable))
|
162
|
+
$stderr.print "Check file name ",once_only_filename,"\n" if options[:verbose]
|
163
|
+
error_filename = once_only_filename + '.err'
|
164
|
+
$stderr.print "Job file exists ",once_only_filename,"!\n" if options[:debug] and File.exist?(once_only_filename)
|
165
|
+
|
166
|
+
dirname = File.basename(Dir.pwd).rjust(8,"-") # make sure it is long enough
|
167
|
+
|
168
|
+
job_name = (dirname[-5..-1] + once_only_filename.split(/-/).map{|s|s[0..5]}.join).gsub(/[_-]/,'')[0..15]
|
169
|
+
$stderr.print "Job name ",job_name,"\n" if options[:verbose]
|
170
|
+
|
171
|
+
if options[:force] or not File.exist?(once_only_filename)
|
172
|
+
$stderr.print "Running #{command}\n" if not options[:quiet]
|
173
|
+
if options[:pbs]
|
174
|
+
# --- Check if job is already queued in PBS
|
175
|
+
qstat = `/usr/bin/qstat`
|
176
|
+
if qstat =~ /#{job_name}/
|
177
|
+
$stderr.print "Job #{job_name} already in queue!\n"
|
178
|
+
exit 0
|
179
|
+
end
|
180
|
+
# --- Submit PBS job
|
181
|
+
pbs_command = 'echo "' + once_only_command + ' ' + command + "\"|qsub -N #{job_name} "+options[:pbs]+' '
|
182
|
+
pbs_command += '-d ' + (options[:dir] ? options[:dir] : Dir.pwd)
|
183
|
+
|
184
|
+
$stderr.print(pbs_command,"\n") if options[:verbose]
|
185
|
+
if !options[:dry_run]
|
186
|
+
if not system(pbs_command)
|
187
|
+
OnceOnly::Check::write_file(error_filename,checksums)
|
188
|
+
exit_error($?.exitstatus)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
else
|
192
|
+
# --- Run on command line
|
193
|
+
if !options[:dry_run]
|
194
|
+
if not system(command)
|
195
|
+
OnceOnly::Check::write_file(error_filename,checksums)
|
196
|
+
exit_error($?.exitstatus)
|
197
|
+
else
|
198
|
+
# --- Success!
|
199
|
+
File.unlink(error_filename) if File.exist?(error_filename)
|
200
|
+
OnceOnly::Check::write_file(once_only_filename,checksums)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
else
|
205
|
+
$stderr.print "Inputs unchanged. No need to rerun '#{original_commands}'!\n" if not options[:quiet]
|
206
|
+
end
|
207
|
+
|
208
|
+
exit 0 # success!
|
@@ -0,0 +1,22 @@
|
|
1
|
+
Feature: Execute command line
|
2
|
+
|
3
|
+
Once-only asserts a command in only run once, based on the inputs on the
|
4
|
+
command line.
|
5
|
+
|
6
|
+
Scenario: Test first run and skip execute of second run
|
7
|
+
Given a command '/bin/cat LICENSE.txt'
|
8
|
+
When I run the command the first time
|
9
|
+
Then once-only should create a checksum
|
10
|
+
When I run the command the second time with the same inputs
|
11
|
+
Then once-only should not recreate the checksum and skip the run
|
12
|
+
|
13
|
+
Scenario: Run gives an error
|
14
|
+
Given a command '/bin/cat LICENSE.txt.none'
|
15
|
+
When I run the command the first time
|
16
|
+
Then once-only should pass back an error
|
17
|
+
Then once-only should have created an error file
|
18
|
+
|
19
|
+
Scenario: Executable does not exist
|
20
|
+
Given a command '/binxx/cat LICENSE.txt'
|
21
|
+
When I run the non-existing command
|
22
|
+
Then once-only should pass back a wrong command error
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Given(/^a command '\/bin\/cat LICENSE.txt'$/) do
|
2
|
+
@cmd = '/bin/cat LICENSE.txt'
|
3
|
+
end
|
4
|
+
|
5
|
+
When(/^I run the command the first time$/) do
|
6
|
+
pending # express the regexp above with the code you wish you had
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^once\-only should create a checksum$/) do
|
10
|
+
pending # express the regexp above with the code you wish you had
|
11
|
+
end
|
12
|
+
|
13
|
+
When(/^I run the command the second time with the same inputs$/) do
|
14
|
+
pending # express the regexp above with the code you wish you had
|
15
|
+
end
|
16
|
+
|
17
|
+
Then(/^once\-only should not recreate the checksum and skip the run$/) do
|
18
|
+
pending # express the regexp above with the code you wish you had
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup(:default, :development)
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
11
|
+
require 'once-only'
|
12
|
+
|
13
|
+
require 'rspec/expectations'
|
data/lib/once-only.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
begin
|
2
|
+
require "digest"
|
3
|
+
Digest::SHA1.hexdigest('test')
|
4
|
+
rescue LoadError
|
5
|
+
$stderr.print "Using native Ruby SHA1 (slow)\n"
|
6
|
+
$ruby_sha1 = true
|
7
|
+
end
|
8
|
+
|
9
|
+
module OnceOnly
|
10
|
+
|
11
|
+
module Check
|
12
|
+
# filter out all arguments that reflect existing files
|
13
|
+
def Check::get_file_list list
|
14
|
+
list.map { |arg| get_existing_filename(arg) }.compact
|
15
|
+
end
|
16
|
+
|
17
|
+
def Check::check_files_exist list
|
18
|
+
list.each { |fn|
|
19
|
+
raise "File #{fn} does not exist!" if not File.exist?(fn)
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
# filter out all names accoding to filters
|
24
|
+
def Check::filter_file_list list, regex
|
25
|
+
list.map { |name| ( name =~ /#{regex}/ ? nil : name ) }.compact
|
26
|
+
end
|
27
|
+
|
28
|
+
# filter out all names accoding to glob (this is not an efficient
|
29
|
+
# implementation, as the glob runs for every listed file!)
|
30
|
+
def Check::filter_file_list_glob list, glob
|
31
|
+
list.map { |name| ( Dir.glob(glob).index(name) ? nil : name ) }.compact
|
32
|
+
end
|
33
|
+
|
34
|
+
# Calculate the checksums for each file in the list
|
35
|
+
def Check::calc_file_checksums list
|
36
|
+
list.map { |fn|
|
37
|
+
['MD5'] + `/usr/bin/md5sum #{fn}`.split
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def Check::calc_checksum(buf)
|
42
|
+
if $ruby_sha1
|
43
|
+
Sha1::sha1(buf)
|
44
|
+
else
|
45
|
+
Digest::SHA1.hexdigest(buf)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Create a file name out of the content of checksums
|
50
|
+
def Check::make_once_filename checksums, prefix = 'once-only'
|
51
|
+
buf = checksums.map { |entry| entry }.join("\n")
|
52
|
+
prefix + '-' + calc_checksum(buf) + '.txt'
|
53
|
+
end
|
54
|
+
|
55
|
+
def Check::write_file fn, checksums
|
56
|
+
File.open(fn,'w') { |f|
|
57
|
+
checksums.each { |items| f.print items[0],"\t",items[1],"\t",items[2],"\n" }
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
# Put quotes around regexs and globs
|
62
|
+
def Check::requote list
|
63
|
+
a = [ list[0] ]
|
64
|
+
list.each_cons(2) { |pair| a << (['--skip-glob','--skip-regex'].index(pair[0]) ? "'#{pair[1]}'" : pair[1]) }
|
65
|
+
a
|
66
|
+
end
|
67
|
+
|
68
|
+
# Drop --pbs and optional argument from list
|
69
|
+
def Check::drop_pbs_option(list)
|
70
|
+
is_part_of_pbs_arg = lambda { |p1, p2|
|
71
|
+
(p1 == '--pbs' and p2 =~ /\s+/) or p2 == '--pbs'
|
72
|
+
}
|
73
|
+
a = [ list[0] ]
|
74
|
+
list.each_cons(2) { |pair| a << pair[1] if not is_part_of_pbs_arg.call(pair[0],pair[1])}
|
75
|
+
a
|
76
|
+
end
|
77
|
+
|
78
|
+
# Drop -d argument from list
|
79
|
+
def Check::drop_dir_option(list)
|
80
|
+
is_part_of_arg = lambda { |p1, p2|
|
81
|
+
(p1 == '-d' or p2 == '-d')
|
82
|
+
}
|
83
|
+
a = [ list[0] ]
|
84
|
+
list.each_cons(2) { |pair| a << pair[1] if not is_part_of_arg.call(pair[0],pair[1])}
|
85
|
+
a
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
protected
|
90
|
+
|
91
|
+
def Check::get_existing_filename arg
|
92
|
+
return arg if File.exist?(arg)
|
93
|
+
# sometimes arguments are formed as -in=file
|
94
|
+
(option,filename) = arg.split(/=/)
|
95
|
+
return filename if filename and File.exist?(filename)
|
96
|
+
nil
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module OnceOnly
|
4
|
+
|
5
|
+
module Sha1
|
6
|
+
|
7
|
+
# Calculates SHA-1 message digest of _string_. Returns binary digest.
|
8
|
+
# For hexadecimal digest, use +*sha1(string).unpack('H*')+.
|
9
|
+
#--
|
10
|
+
# This is a simple, pure-Ruby implementation of SHA-1, following
|
11
|
+
# the algorithm in FIPS 180-1.
|
12
|
+
#
|
13
|
+
# (lifted from Rosetta code http://rosettacode.org)
|
14
|
+
#++
|
15
|
+
def Sha1::sha1(string)
|
16
|
+
# functions and constants
|
17
|
+
mask = (1 << 32) - 1
|
18
|
+
s = proc{|n, x| ((x << n) & mask) | (x >> (32 - n))}
|
19
|
+
f = [
|
20
|
+
proc {|b, c, d| (b & c) | (b.^(mask) & d)},
|
21
|
+
proc {|b, c, d| b ^ c ^ d},
|
22
|
+
proc {|b, c, d| (b & c) | (b & d) | (c & d)},
|
23
|
+
proc {|b, c, d| b ^ c ^ d},
|
24
|
+
].freeze
|
25
|
+
k = [0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6].freeze
|
26
|
+
|
27
|
+
# initial hash
|
28
|
+
h = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0]
|
29
|
+
|
30
|
+
io = StringIO.new(string)
|
31
|
+
block = ""
|
32
|
+
term = false # appended "\x80" in second-last block?
|
33
|
+
last = false # last block?
|
34
|
+
until last
|
35
|
+
# Read next block of 16 words (64 bytes, 512 bits).
|
36
|
+
io.read(64, block) or (
|
37
|
+
# Work around a bug in Rubinius 1.2.4. At eof,
|
38
|
+
# MRI and JRuby already replace block with "".
|
39
|
+
block.replace("")
|
40
|
+
)
|
41
|
+
|
42
|
+
# Unpack block into 32-bit words "N".
|
43
|
+
case len = block.length
|
44
|
+
when 64
|
45
|
+
# Unpack 16 words.
|
46
|
+
w = block.unpack("N16")
|
47
|
+
when 56..63
|
48
|
+
# Second-last block: append padding, unpack 16 words.
|
49
|
+
block.concat("\x80"); term = true
|
50
|
+
block.concat("\0" * (63 - len))
|
51
|
+
w = block.unpack("N16")
|
52
|
+
when 0..55
|
53
|
+
# Last block: append padding, unpack 14 words.
|
54
|
+
block.concat(term ? "\0" : "\x80")
|
55
|
+
block.concat("\0" * (55 - len))
|
56
|
+
w = block.unpack("N14")
|
57
|
+
|
58
|
+
# Append bit length, 2 words.
|
59
|
+
bit_len = string.length << 3
|
60
|
+
w.push(bit_len >> 32, bit_len & mask)
|
61
|
+
last = true
|
62
|
+
else
|
63
|
+
fail "impossible"
|
64
|
+
end
|
65
|
+
|
66
|
+
# Process block.
|
67
|
+
(16..79).each {|t|
|
68
|
+
w[t] = s[1, w[t - 3] ^ w[t - 8] ^ w[t - 14] ^ w[t - 16]]}
|
69
|
+
|
70
|
+
a, b, c, d, e = h[0..4]
|
71
|
+
t = 0
|
72
|
+
(0..3).each {|i|
|
73
|
+
20.times {
|
74
|
+
temp = (s[5, a] + f[i][b, c, d] + e + w[t] + k[i]) & mask
|
75
|
+
e = d; d = c; c = s[30, b]; b = a; a = temp
|
76
|
+
t += 1}}
|
77
|
+
|
78
|
+
h[0] = (h[0] + a) & mask
|
79
|
+
h[1] = (h[1] + b) & mask
|
80
|
+
h[2] = (h[2] + c) & mask
|
81
|
+
h[3] = (h[3] + d) & mask
|
82
|
+
h[4] = (h[4] + e) & mask
|
83
|
+
end
|
84
|
+
|
85
|
+
# h.join('').hex.to_s
|
86
|
+
# p h p sprintf("%08X","10") -> 0000000A
|
87
|
+
h.map { |n| sprintf("%08x",n) }.join('')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'once-only'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: once-only
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Pjotr Prins
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-12 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &17265660 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.8.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *17265660
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: cucumber
|
27
|
+
requirement: &17264920 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *17264920
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: jeweler
|
38
|
+
requirement: &17264040 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.8.4
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *17264040
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: &17263420 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.21
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *17263420
|
58
|
+
description: ! "Run programs and scripts once only. Especially\n useful for PBS and
|
59
|
+
GRID computing"
|
60
|
+
email: pjotr.public01@thebird.nl
|
61
|
+
executables:
|
62
|
+
- once-only
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files:
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
files:
|
68
|
+
- .document
|
69
|
+
- .rspec
|
70
|
+
- .travis.yml
|
71
|
+
- Gemfile
|
72
|
+
- LICENSE.txt
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- VERSION
|
76
|
+
- bin/once-only
|
77
|
+
- features/once-only.feature
|
78
|
+
- features/step_definitions/once-only_steps.rb
|
79
|
+
- features/support/env.rb
|
80
|
+
- lib/once-only.rb
|
81
|
+
- lib/once-only/check.rb
|
82
|
+
- lib/once-only/once-only.rb
|
83
|
+
- lib/once-only/sha1.rb
|
84
|
+
- spec/bio-once-only_spec.rb
|
85
|
+
- spec/spec_helper.rb
|
86
|
+
homepage: http://github.com/pjotrp/once-only
|
87
|
+
licenses:
|
88
|
+
- MIT
|
89
|
+
post_install_message:
|
90
|
+
rdoc_options: []
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
hash: -995396904619724027
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.8.10
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: Run commands once only if inputs do not change
|
114
|
+
test_files: []
|