scaffolder-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,23 @@
1
+ source "http://rubygems.org"
2
+
3
+ group :default do
4
+ gem "configliere", "~> 0.1"
5
+ gem "bio", "~> 1.4"
6
+ gem "scaffolder", "~> 0.4"
7
+ gem "ronn", "~> 0.7"
8
+ end
9
+
10
+ group :development do
11
+ gem "bundler", "~> 1.0"
12
+ gem "jeweler", "~> 1.5"
13
+ gem "gherkin", "~> 2.3.3"
14
+ gem "rspec", "~> 2.4"
15
+ gem "cucumber", "~> 0.10"
16
+ gem "fakefs", "~> 0.2"
17
+ gem "aruba", "~> 0.2"
18
+ gem "mocha", "~> 0.9"
19
+ gem "hashie", "~> 0.4"
20
+ gem "yard", "~> 0.6"
21
+
22
+ gem "scaffolder-test-helpers", "~> 0.1"
23
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Michael Barton
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,38 @@
1
+ == Synopsis
2
+
3
+ A command line tool for processing genome scaffolds defined by the genome
4
+ scaffolder API.
5
+
6
+ == Feature List
7
+
8
+ * Output scaffold to fasta format.
9
+ * Test inserts do not overlap in the scaffold.
10
+
11
+ == Installing
12
+
13
+ Ruby and RubyGems are required to use scaffolder. Scaffolder tools is
14
+ installed on the command line using:
15
+
16
+ $ gem install scaffolder-tools
17
+
18
+ == Documentation
19
+
20
+ Command line usage can be found by running the following:
21
+
22
+ $ scaffolder help
23
+
24
+ A unix man page is available for each scaffolder command by typing:
25
+
26
+ $ scaffolder help COMMAND
27
+
28
+ == Contact
29
+
30
+ Scaffolder was developed by Michael Barton (http://www.michaelbarton.me.uk).
31
+ Pull requests, patches and bug reports are welcome. The source code is
32
+ available on github[http://github.com/michaelbarton/scaffolder]. Bug reports
33
+ and feature requests should be made here.
34
+
35
+ == Copyright
36
+
37
+ Scaffolder © 2010 by Michael Barton. Scaffolder is licensed under the MIT
38
+ license. Please see the LICENSE document for more information.
data/Rakefile ADDED
@@ -0,0 +1,41 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ gem.name = "scaffolder-tools"
15
+ gem.summary = "Tools for manipulating genome scaffolds"
16
+ gem.description = "Binary to use with scaffolder genome scaffolds"
17
+ gem.email = "mail@michaelbarton.me.uk"
18
+ gem.homepage = "http://github.com/michaelbarton/scaffolder-tools"
19
+ gem.authors = ["Michael Barton"]
20
+ gem.license = "MIT"
21
+ end
22
+ Jeweler::RubygemsDotOrgTasks.new
23
+
24
+ require 'rspec/core'
25
+ require 'rspec/core/rake_task'
26
+ RSpec::Core::RakeTask.new(:spec) do |spec|
27
+ spec.pattern = FileList['spec/**/*_spec.rb']
28
+ end
29
+
30
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
31
+ spec.pattern = 'spec/**/*_spec.rb'
32
+ spec.rcov = true
33
+ end
34
+
35
+ require 'cucumber/rake/task'
36
+ Cucumber::Rake::Task.new(:features)
37
+
38
+ task :default => :spec
39
+
40
+ require 'yard'
41
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/scaffolder ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+ require 'rubygems'
5
+ require 'configliere'
6
+ require 'scaffolder/binary_helper'
7
+
8
+ extend Scaffolder::BinaryHelper
9
+
10
+ Settings.use :commandline
11
+ Settings.resolve!
12
+
13
+ tool, arguments = determine_tool(Settings)
14
+ tool.new(arguments).run
@@ -0,0 +1,79 @@
1
+ Feature: Command line help for scaffolder
2
+ In order to understand how the scaffolder tools work
3
+ A user can use scaffolder help at the command line
4
+ to review its documentation
5
+
6
+ Scenario: Running scaffolder without any arguments
7
+ When I call "scaffolder" with arguments ""
8
+ Then the exit status should be 0
9
+ And the stdout should contain exactly:
10
+ """
11
+ usage: scaffolder [--version] COMMAND scaffold-file sequence-file
12
+ [options]
13
+
14
+ Commands:
15
+ help Help information for scaffolder commands
16
+ sequence Generate the fasta output for the scaffold
17
+ validate Validate scaffold for overlapping inserts
18
+
19
+ """
20
+
21
+ Scenario: Running scaffolder with the version argument
22
+ When I call "scaffolder" with arguments "--version"
23
+ Then the exit status should be 0
24
+ And the stdout should contain exactly:
25
+ """
26
+ scaffolder tool version 0.1.0
27
+
28
+ """
29
+
30
+ Scenario: Running scaffolder with an incorrect command
31
+ When I call "scaffolder" with arguments "unknown-command"
32
+ Then the exit status should be 1
33
+ And the stderr should contain exactly:
34
+ """
35
+ Error. Unknown command 'unknown-command'.
36
+ See 'scaffolder help'.
37
+
38
+ """
39
+
40
+ Scenario: Running scaffolder with just the help argument
41
+ When I call "scaffolder" with arguments "help"
42
+ Then the exit status should be 0
43
+ And the stdout should contain exactly:
44
+ """
45
+ usage: scaffolder [--version] COMMAND scaffold-file sequence-file
46
+ [options]
47
+
48
+ Commands:
49
+ help Help information for scaffolder commands
50
+ sequence Generate the fasta output for the scaffold
51
+ validate Validate scaffold for overlapping inserts
52
+
53
+ """
54
+
55
+ Scenario: Fetching the man page for sequence
56
+ When I call "scaffolder" with arguments "help sequence"
57
+ Then the exit status should be 0
58
+ And the stdout should contain "SCAFFOLDER-SEQUENCE(1)"
59
+
60
+ Scenario: Fetching the man page for validate
61
+ When I call "scaffolder" with arguments "help validate"
62
+ Then the exit status should be 0
63
+ And the stdout should contain "SCAFFOLDER-VALIDATE(1)"
64
+
65
+ Scenario: Fetching the man page for help
66
+ When I call "scaffolder" with arguments "help help"
67
+ Then the exit status should be 0
68
+ And the stdout should contain "SCAFFOLDER-HELP(1)"
69
+
70
+ Scenario: Fetching the man page for an incorrect command
71
+ When I call "scaffolder" with arguments "help unknown-command"
72
+ Then the exit status should be 1
73
+ And the stderr should contain exactly:
74
+ """
75
+ Error. Unknown command 'unknown-command'.
76
+ See 'scaffolder help'.
77
+
78
+ """
79
+
@@ -0,0 +1,157 @@
1
+ Feature: The scaffolder-sequence binary
2
+ In order to generate a fasta sequence of a genome scaffold
3
+ A user can use the scaffolder binary with argument sequence
4
+ to generate a fasta sequence from a scaffold and sequence file
5
+
6
+ Scenario: Generating fasta sequence for a simple scaffold
7
+ Given a file named "sequence.fna" with:
8
+ """
9
+ >seq
10
+ ATGGC
11
+ """
12
+ Given a file named "scaffold.yml" with:
13
+ """
14
+ ---
15
+ -
16
+ sequence:
17
+ source: "seq"
18
+ """
19
+ When I call "scaffolder" with arguments "sequence scaffold.yml sequence.fna"
20
+ Then the exit status should be 0
21
+ And the stdout should contain "ATGGC"
22
+
23
+ Scenario: The sequence file specified does not exist
24
+ Given a file named "scaffold.yml" with:
25
+ """
26
+ ---
27
+ -
28
+ sequence:
29
+ source: "seq"
30
+ """
31
+ When I call "scaffolder" with arguments "sequence scaffold.yml missing_file"
32
+ Then the exit status should be 1
33
+ And the stderr should contain "Error. Sequence file not found:"
34
+
35
+ Scenario: The sequence file doesn't contain any thing
36
+ Given an empty file named "sequence.fna"
37
+ Given a file named "scaffold.yml" with:
38
+ """
39
+ ---
40
+ -
41
+ sequence:
42
+ source: "seq1"
43
+ """
44
+ When I call "scaffolder" with arguments "sequence scaffold.yml sequence.fna"
45
+ Then the exit status should be 1
46
+ And the stderr should contain "Error. Sequence file is empty"
47
+
48
+ Scenario: The scaffold file specified does not exist
49
+ Given a file named "sequence.fna" with:
50
+ """
51
+ >seq
52
+ ATGGC
53
+ """
54
+ When I call "scaffolder" with arguments "sequence missing_file sequence.fna"
55
+ Then the exit status should be 1
56
+ And the stderr should contain "Error. Scaffold file not found:"
57
+
58
+ Scenario: The scaffold file doesn't contain anything
59
+ Given an empty file named "scaffold.yml"
60
+ Given a file named "sequence.fna" with:
61
+ """
62
+ >seq
63
+ ATGGC
64
+ """
65
+ When I call "scaffolder" with arguments "sequence scaffold.yml sequence.fna"
66
+ Then the exit status should be 1
67
+ And the stderr should contain "Error. Scaffold file is empty"
68
+
69
+ Scenario: One of the sequences specified in the scaffold is missing
70
+ Given a file named "sequence.fna" with:
71
+ """
72
+ >seq1
73
+ ATGGC
74
+ """
75
+ Given a file named "scaffold.yml" with:
76
+ """
77
+ ---
78
+ -
79
+ sequence:
80
+ source: "seq1"
81
+ -
82
+ sequence:
83
+ source: "seq2"
84
+ """
85
+ When I call "scaffolder" with arguments "sequence scaffold.yml sequence.fna"
86
+ Then the exit status should be 1
87
+ And the stderr should contain "Error. Unknown sequence: seq2"
88
+
89
+ Scenario: Using the definition argument before the file arguments
90
+ Given a file named "sequence.fna" with:
91
+ """
92
+ >seq
93
+ ATGGC
94
+ """
95
+ Given a file named "scaffold.yml" with:
96
+ """
97
+ ---
98
+ -
99
+ sequence:
100
+ source: "seq"
101
+ """
102
+ When I call "scaffolder" with arguments "sequence --definition='name' scaffold.yml sequence.fna"
103
+ Then the exit status should be 0
104
+ And the stdout should contain "ATGGC"
105
+ And the stdout should contain ">name"
106
+
107
+ Scenario: Using the definition argument after the file arguments
108
+ Given a file named "sequence.fna" with:
109
+ """
110
+ >seq
111
+ ATGGC
112
+ """
113
+ Given a file named "scaffold.yml" with:
114
+ """
115
+ ---
116
+ -
117
+ sequence:
118
+ source: "seq"
119
+ """
120
+ When I call "scaffolder" with arguments "sequence scaffold.yml sequence.fna --definition='name'"
121
+ Then the exit status should be 0
122
+ And the stdout should contain "ATGGC"
123
+ And the stdout should contain ">name"
124
+
125
+ Scenario: Using the argument --no-sequence-hash
126
+ Given a file named "sequence.fna" with:
127
+ """
128
+ >seq
129
+ ATGGC
130
+ """
131
+ Given a file named "scaffold.yml" with:
132
+ """
133
+ ---
134
+ -
135
+ sequence:
136
+ source: "seq"
137
+ """
138
+ When I call "scaffolder" with arguments "sequence --no-sequence-hash scaffold.yml sequence.fna"
139
+ Then the exit status should be 0
140
+ And the stdout should contain ">\nATGGC"
141
+
142
+ Scenario: Using the arguments --no-sequence-hash and --definition
143
+ Given a file named "sequence.fna" with:
144
+ """
145
+ >seq
146
+ ATGGC
147
+ """
148
+ Given a file named "scaffold.yml" with:
149
+ """
150
+ ---
151
+ -
152
+ sequence:
153
+ source: "seq"
154
+ """
155
+ When I call "scaffolder" with arguments "sequence scaffold.yml sequence.fna --no-sequence-hash --definition='name'"
156
+ Then the exit status should be 0
157
+ And the stdout should contain ">name \nATGGC"
@@ -0,0 +1,4 @@
1
+ When /^I call "([^"]*)" with arguments "([^"]*)"$/ do |command,args|
2
+ bin = File.join(File.dirname(__FILE__),'..','..','bin',command)
3
+ When "I run \"#{bin} #{args}\""
4
+ end
@@ -0,0 +1,15 @@
1
+ require 'bundler'
2
+ begin
3
+ Bundler.setup(:default, :development)
4
+ rescue Bundler::BundlerError => e
5
+ $stderr.puts e.message
6
+ $stderr.puts "Run `bundle install` to install missing gems"
7
+ exit e.status_code
8
+ end
9
+
10
+ require 'rspec/expectations'
11
+ require 'aruba/cucumber'
12
+
13
+ Before do
14
+ @dirs = ["/tmp"]
15
+ end
@@ -0,0 +1,245 @@
1
+ Feature: The scaffolder-validate binary
2
+ In order to test inserts are being correctly added to a scaffold
3
+ A user can use the scaffolder binary with the argument validate
4
+ to test that inserts are correctly inserted
5
+
6
+ Scenario: The sequence file specified does not exist
7
+ Given a file named "scaffold.yml" with:
8
+ """
9
+ ---
10
+ -
11
+ sequence:
12
+ source: "seq"
13
+ """
14
+ When I call "scaffolder" with arguments "validate scaffold.yml missing_file"
15
+ Then the exit status should be 1
16
+ And the stderr should contain "Error. Sequence file not found:"
17
+
18
+ Scenario: The sequence file doesn't contain any thing
19
+ Given an empty file named "sequence.fna"
20
+ Given a file named "scaffold.yml" with:
21
+ """
22
+ ---
23
+ -
24
+ sequence:
25
+ source: "seq1"
26
+ """
27
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
28
+ Then the exit status should be 1
29
+ And the stderr should contain "Error. Sequence file is empty"
30
+
31
+ Scenario: The scaffold file specified does not exist
32
+ Given a file named "sequence.fna" with:
33
+ """
34
+ >seq
35
+ ATGGC
36
+ """
37
+ When I call "scaffolder" with arguments "validate missing_file sequence.fna"
38
+ Then the exit status should be 1
39
+ And the stderr should contain "Error. Scaffold file not found:"
40
+
41
+ Scenario: The scaffold file doesn't contain anything
42
+ Given an empty file named "scaffold.yml"
43
+ Given a file named "sequence.fna" with:
44
+ """
45
+ >seq
46
+ ATGGC
47
+ """
48
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
49
+ Then the exit status should be 1
50
+ And the stderr should contain "Error. Scaffold file is empty"
51
+
52
+ Scenario: One of the sequences specified in the scaffold is missing
53
+ Given a file named "sequence.fna" with:
54
+ """
55
+ >seq1
56
+ ATGGC
57
+ """
58
+ Given a file named "scaffold.yml" with:
59
+ """
60
+ ---
61
+ -
62
+ sequence:
63
+ source: "seq1"
64
+ -
65
+ sequence:
66
+ source: "seq2"
67
+ """
68
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
69
+ Then the exit status should be 1
70
+ And the stderr should contain "Error. Unknown sequence: seq2"
71
+
72
+ Scenario: Validating a scaffold with no overlapping inserts
73
+ Given a file named "sequence.fna" with:
74
+ """
75
+ >seq
76
+ ATGGC
77
+ """
78
+ Given a file named "scaffold.yml" with:
79
+ """
80
+ ---
81
+ -
82
+ sequence:
83
+ source: "seq"
84
+ """
85
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
86
+ Then the exit status should be 0
87
+ And the stdout should contain exactly:
88
+ """
89
+ """
90
+
91
+ Scenario: Validating a scaffold with no inserts
92
+ Given a file named "sequence.fna" with:
93
+ """
94
+ >seq
95
+ ATGGC
96
+ """
97
+ Given a file named "scaffold.yml" with:
98
+ """
99
+ ---
100
+ -
101
+ sequence:
102
+ source: "seq"
103
+ """
104
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
105
+ Then the exit status should be 0
106
+ And the stdout should contain exactly:
107
+ """
108
+ """
109
+
110
+ Scenario: Validating a scaffold with two non-overlapping inserts
111
+ Given a file named "sequence.fna" with:
112
+ """
113
+ >seq
114
+ ATGGCG
115
+ >ins1
116
+ ATGGCG
117
+ >ins2
118
+ ATGGCG
119
+ """
120
+ Given a file named "scaffold.yml" with:
121
+ """
122
+ ---
123
+ -
124
+ sequence:
125
+ source: "seq"
126
+ inserts:
127
+ -
128
+ open: 2
129
+ close: 3
130
+ source: ins1
131
+ -
132
+ open: 4
133
+ close: 5
134
+ source: ins2
135
+
136
+ """
137
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
138
+ Then the exit status should be 0
139
+ And the stdout should contain exactly:
140
+ """
141
+ """
142
+
143
+ Scenario: Validating a scaffold with two overlapping inserts
144
+ Given a file named "sequence.fna" with:
145
+ """
146
+ >seq
147
+ ATGGCG
148
+ >ins1
149
+ ATGGCG
150
+ >ins2
151
+ ATGGCG
152
+ """
153
+ Given a file named "scaffold.yml" with:
154
+ """
155
+ ---
156
+ -
157
+ sequence:
158
+ source: "seq"
159
+ inserts:
160
+ -
161
+ open: 2
162
+ close: 4
163
+ source: ins1
164
+ -
165
+ open: 3
166
+ close: 5
167
+ source: ins2
168
+
169
+ """
170
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
171
+ Then the exit status should be 0
172
+ And the stdout should contain exactly:
173
+ """
174
+ ---
175
+ - sequence-insert-overlap:
176
+ inserts:
177
+ - open: 2
178
+ close: 4
179
+ source: ins1
180
+ - open: 3
181
+ close: 5
182
+ source: ins2
183
+ source: seq
184
+
185
+ """
186
+
187
+ Scenario: Validating a scaffold with two sets of overlapping inserts
188
+ Given a file named "sequence.fna" with:
189
+ """
190
+ >seq
191
+ ATGGCGGCTGA
192
+ >ins1
193
+ ATGGCG
194
+ >ins2
195
+ ATGGCG
196
+ """
197
+ Given a file named "scaffold.yml" with:
198
+ """
199
+ ---
200
+ -
201
+ sequence:
202
+ source: seq
203
+ inserts:
204
+ -
205
+ open: 2
206
+ close: 4
207
+ source: ins1
208
+ -
209
+ open: 3
210
+ close: 5
211
+ source: ins2
212
+ -
213
+ open: 6
214
+ close: 8
215
+ source: ins1
216
+ -
217
+ open: 7
218
+ close: 9
219
+ source: ins2
220
+ """
221
+ When I call "scaffolder" with arguments "validate scaffold.yml sequence.fna"
222
+ Then the exit status should be 0
223
+ And the stdout should contain exactly:
224
+ """
225
+ ---
226
+ - sequence-insert-overlap:
227
+ inserts:
228
+ - open: 2
229
+ close: 4
230
+ source: ins1
231
+ - open: 3
232
+ close: 5
233
+ source: ins2
234
+ source: seq
235
+ - sequence-insert-overlap:
236
+ inserts:
237
+ - open: 6
238
+ close: 8
239
+ source: ins1
240
+ - open: 7
241
+ close: 9
242
+ source: ins2
243
+ source: seq
244
+
245
+ """
@@ -0,0 +1,30 @@
1
+ require 'scaffolder/tool_index'
2
+
3
+ module Scaffolder::BinaryHelper
4
+ include Scaffolder::ToolIndex
5
+
6
+ DEFAULT_TOOL = Scaffolder::Tool::Help
7
+
8
+ def select_tool(name)
9
+ tool_exists?(name) ? get_tool(name) : DEFAULT_TOOL
10
+ end
11
+
12
+ def remove_first_argument(settings)
13
+ name = settings.rest.shift
14
+ end
15
+
16
+ def determine_tool(settings)
17
+ name = remove_first_argument(settings)
18
+
19
+ tool_class = select_tool(name)
20
+
21
+ if name.nil?
22
+ settings[:empty_args] = true
23
+ elsif not tool_exists?(name)
24
+ settings[:unknown_tool] = name
25
+ end
26
+
27
+ [tool_class,settings]
28
+ end
29
+
30
+ end