scaffolder-annotation-locator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ source "http://rubygems.org"
2
+
3
+ group :default do
4
+ gem "scaffolder", "~> 0.4"
5
+ end
6
+
7
+ group :development do
8
+ gem "bundler", "~> 1.0"
9
+ gem "jeweler", "~> 1.5"
10
+
11
+ gem "rspec", "~> 2.4"
12
+ gem "scaffolder-test-helpers", "0.2.2"
13
+ gem "cucumber", "~> 0.9"
14
+ gem "aruba", "~> 0.2"
15
+
16
+ gem "yard", "~> 0.6"
17
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Michael Barton
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = scaffolder-annotation-locator
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to scaffolder-annotation-locator
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 Michael Barton. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,42 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "scaffolder-annotation-locator"
16
+ gem.homepage = "http://next.gs"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Update locations of gff3 annotations from a scaffolder template}
19
+ gem.description = %Q{Build a genome scaffold using scaffolder and a set of annotated contigs. This tool updates the locations of the contig annotations using the scaffolder tempalte as a base.}
20
+ gem.email = "mail@michaelbarton.me.uk"
21
+ gem.authors = ["Michael Barton"]
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require 'rspec/core'
26
+ require 'rspec/core/rake_task'
27
+ RSpec::Core::RakeTask.new(:spec) do |spec|
28
+ spec.pattern = FileList['spec/**/*_spec.rb']
29
+ end
30
+
31
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
32
+ spec.pattern = 'spec/**/*_spec.rb'
33
+ spec.rcov = true
34
+ end
35
+
36
+ require 'cucumber/rake/task'
37
+ Cucumber::Rake::Task.new(:features)
38
+
39
+ task :default => :spec
40
+
41
+ require 'yard'
42
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,317 @@
1
+ Feature: Locating gff3 annotations on a scaffold
2
+ In order to add gff3 annotations to a scaffold
3
+ A user can use scaffold-annotation-locator
4
+ to return the updated coordinates of scaffold annotations
5
+
6
+ Scenario: One annotation on a contig
7
+ Given a file named "scaf.yml" with:
8
+ """
9
+ ---
10
+ - sequence:
11
+ source: contig1
12
+ """
13
+ Given a file named "seq.fna" with:
14
+ """
15
+ > contig1
16
+ AAAAAGGGGGCCCCCTTTTT
17
+ """
18
+ Given a file named "anno.gff" with:
19
+ """
20
+ ##gff-version 3
21
+ contig1 . CDS 4 13 . + 1 ID=gene1
22
+ """
23
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
24
+ Then the result should be:
25
+ """
26
+ ##gff-version 3
27
+ scaffold . CDS 4 13 . + 1 ID=gene1
28
+ """
29
+
30
+ Scenario: One annotation on a trimmed contig
31
+ Given a file named "scaf.yml" with:
32
+ """
33
+ ---
34
+ - sequence:
35
+ source: contig1
36
+ start: 4
37
+ """
38
+ Given a file named "seq.fna" with:
39
+ """
40
+ > contig1
41
+ AAAAAGGGGGCCCCCTTTTT
42
+ """
43
+ Given a file named "anno.gff" with:
44
+ """
45
+ ##gff-version 3
46
+ contig1 . CDS 4 13 . + 1 ID=gene1
47
+ """
48
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
49
+ Then the result should be:
50
+ """
51
+ ##gff-version 3
52
+ scaffold . CDS 1 10 . + 1 ID=gene1
53
+ """
54
+
55
+ Scenario: One annotation on a reversed contig
56
+ Given a file named "scaf.yml" with:
57
+ """
58
+ ---
59
+ - sequence:
60
+ source: contig1
61
+ reverse: true
62
+ """
63
+ Given a file named "seq.fna" with:
64
+ """
65
+ > contig1
66
+ AAAAAGGGGGCCCCCTTTTT
67
+ """
68
+ Given a file named "anno.gff" with:
69
+ """
70
+ ##gff-version 3
71
+ contig1 . CDS 1 6 . + 1 ID=gene1
72
+ """
73
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
74
+ Then the result should be:
75
+ """
76
+ ##gff-version 3
77
+ scaffold . CDS 15 20 . - 1 ID=gene1
78
+ """
79
+
80
+ Scenario: Three annotations on three contigs
81
+ Given a file named "scaf.yml" with:
82
+ """
83
+ ---
84
+ - sequence:
85
+ source: contig1
86
+ - sequence:
87
+ source: contig2
88
+ - sequence:
89
+ source: contig3
90
+ """
91
+ Given a file named "seq.fna" with:
92
+ """
93
+ > contig1
94
+ AAAAAGGGGGCCCCCTTTTT
95
+ > contig2
96
+ AAAAAGGGGGCCCCCTTTTT
97
+ > contig3
98
+ AAAAAGGGGGCCCCCTTTTT
99
+ """
100
+ Given a file named "anno.gff" with:
101
+ """
102
+ ##gff-version 3
103
+ contig1 . CDS 1 10 . + 1 ID=gene1
104
+ contig2 . CDS 1 6 . + 1 ID=gene2
105
+ contig3 . CDS 1 6 . + 1 ID=gene2
106
+ """
107
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
108
+ Then the result should be:
109
+ """
110
+ ##gff-version 3
111
+ scaffold . CDS 1 10 . + 1 ID=gene1
112
+ scaffold . CDS 21 26 . + 1 ID=gene2
113
+ scaffold . CDS 41 46 . + 1 ID=gene2
114
+ """
115
+
116
+ Scenario: Unordered Annotations on multiple contigs
117
+ Given a file named "scaf.yml" with:
118
+ """
119
+ ---
120
+ - sequence:
121
+ source: contig1
122
+ - sequence:
123
+ source: contig2
124
+ """
125
+ Given a file named "seq.fna" with:
126
+ """
127
+ > contig1
128
+ AAAAAGGGGGCCCCCTTTTT
129
+ > contig2
130
+ AAAAAGGGGGCCCCCTTTTT
131
+ """
132
+ Given a file named "anno.gff" with:
133
+ """
134
+ ##gff-version 3
135
+ contig2 . CDS 1 6 . + 1 ID=gene2
136
+ contig1 . CDS 1 10 . + 1 ID=gene1
137
+ """
138
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
139
+ Then the result should be:
140
+ """
141
+ ##gff-version 3
142
+ scaffold . CDS 1 10 . + 1 ID=gene1
143
+ scaffold . CDS 21 26 . + 1 ID=gene2
144
+ """
145
+
146
+ Scenario: Annotations on trimmed contigs
147
+ Given a file named "scaf.yml" with:
148
+ """
149
+ ---
150
+ - sequence:
151
+ source: contig1
152
+ stop: 17
153
+ - sequence:
154
+ source: contig2
155
+ start: 4
156
+ stop: 9
157
+ - sequence:
158
+ source: contig3
159
+ """
160
+ Given a file named "seq.fna" with:
161
+ """
162
+ > contig1
163
+ AAAAAGGGGGCCCCCTTTTT
164
+ > contig2
165
+ AAAAAGGGGGCCCCCTTTTT
166
+ > contig3
167
+ AAAAAGGGGGCCCCCTTTTT
168
+ """
169
+ Given a file named "anno.gff" with:
170
+ """
171
+ ##gff-version 3
172
+ contig1 . CDS 1 10 . + 1 ID=gene1
173
+ contig2 . CDS 4 6 . + 1 ID=gene2
174
+ contig3 . CDS 1 6 . + 1 ID=gene3
175
+ """
176
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
177
+ Then the result should be:
178
+ """
179
+ ##gff-version 3
180
+ scaffold . CDS 1 10 . + 1 ID=gene1
181
+ scaffold . CDS 18 20 . + 1 ID=gene2
182
+ scaffold . CDS 24 29 . + 1 ID=gene3
183
+ """
184
+
185
+ Scenario: Annotations on reversed and trimmed contigs
186
+ Given a file named "scaf.yml" with:
187
+ """
188
+ ---
189
+ - sequence:
190
+ source: contig1
191
+ stop: 17
192
+ - sequence:
193
+ source: contig2
194
+ start: 4
195
+ stop: 9
196
+ reverse: true
197
+ - sequence:
198
+ source: contig3
199
+ reverse: true
200
+ """
201
+ Given a file named "seq.fna" with:
202
+ """
203
+ > contig1
204
+ AAAAAGGGGGCCCCCTTTTT
205
+ > contig2
206
+ AAAAAGGGGGCCCCCTTTTT
207
+ > contig3
208
+ AAAAAGGGGGCCCCCTTTTT
209
+ """
210
+ Given a file named "anno.gff" with:
211
+ """
212
+ ##gff-version 3
213
+ contig1 . CDS 1 10 . + 1 ID=gene1
214
+ contig2 . CDS 4 6 . + 1 ID=gene2
215
+ contig3 . CDS 1 6 . + 1 ID=gene3
216
+ """
217
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
218
+ Then the result should be:
219
+ """
220
+ ##gff-version 3
221
+ scaffold . CDS 1 10 . + 1 ID=gene1
222
+ scaffold . CDS 21 23 . - 1 ID=gene2
223
+ scaffold . CDS 38 43 . - 1 ID=gene3
224
+ """
225
+
226
+ Scenario: Annotations on two contigs separated by an unannotated contig
227
+ Given a file named "scaf.yml" with:
228
+ """
229
+ ---
230
+ - sequence:
231
+ source: contig1
232
+ - sequence:
233
+ source: contig2
234
+ - sequence:
235
+ source: contig3
236
+ """
237
+ Given a file named "seq.fna" with:
238
+ """
239
+ > contig1
240
+ AAAAAGGGGGCCCCCTTTTT
241
+ > contig2
242
+ AAAAAGGGGGCCCCCTTTTT
243
+ > contig3
244
+ AAAAAGGGGGCCCCCTTTTT
245
+ """
246
+ Given a file named "anno.gff" with:
247
+ """
248
+ ##gff-version 3
249
+ contig1 . CDS 1 6 . + 1 ID=gene1
250
+ contig3 . CDS 1 6 . + 1 ID=gene2
251
+ """
252
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
253
+ Then the result should be:
254
+ """
255
+ ##gff-version 3
256
+ scaffold . CDS 1 6 . + 1 ID=gene1
257
+ scaffold . CDS 41 46 . + 1 ID=gene2
258
+ """
259
+
260
+ Scenario: Annotations on two contigs separated by an unresolved region
261
+ Given a file named "scaf.yml" with:
262
+ """
263
+ ---
264
+ - sequence:
265
+ source: contig1
266
+ - unresolved:
267
+ length: 10
268
+ - sequence:
269
+ source: contig2
270
+ """
271
+ Given a file named "seq.fna" with:
272
+ """
273
+ > contig1
274
+ AAAAAGGGGGCCCCCTTTTT
275
+ > contig2
276
+ AAAAAGGGGGCCCCCTTTTT
277
+ """
278
+ Given a file named "anno.gff" with:
279
+ """
280
+ ##gff-version 3
281
+ contig1 . CDS 1 6 . + 1 ID=gene1
282
+ contig2 . CDS 1 6 . + 1 ID=gene2
283
+ """
284
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
285
+ Then the result should be:
286
+ """
287
+ ##gff-version 3
288
+ scaffold . CDS 1 6 . + 1 ID=gene1
289
+ scaffold . CDS 31 36 . + 1 ID=gene2
290
+ """
291
+
292
+ Scenario: Annotations on a single duplicated contig
293
+ Given a file named "scaf.yml" with:
294
+ """
295
+ ---
296
+ - sequence:
297
+ source: contig1
298
+ - sequence:
299
+ source: contig1
300
+ """
301
+ Given a file named "seq.fna" with:
302
+ """
303
+ > contig1
304
+ AAAAAGGGGGCCCCCTTTTT
305
+ """
306
+ Given a file named "anno.gff" with:
307
+ """
308
+ ##gff-version 3
309
+ contig1 . CDS 1 6 . + 1 ID=gene1
310
+ """
311
+ When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
312
+ Then the result should be:
313
+ """
314
+ ##gff-version 3
315
+ scaffold . CDS 1 6 . + 1 ID=gene1
316
+ scaffold . CDS 21 26 . + 1 ID=gene1
317
+ """
@@ -0,0 +1,13 @@
1
+ When /^I relocate the annotations using "([^"]*)", "([^"]*)" and "([^"]*)"$/ do |scaffold, sequence, annotations|
2
+ gff3 = Bio::GFF::GFF3.new
3
+
4
+ gff3.records = Scaffolder::AnnotationLocator.new(
5
+ 'tmp/aruba/' + scaffold,
6
+ 'tmp/aruba/' + sequence,
7
+ 'tmp/aruba/' + annotations)
8
+ @result = gff3.to_s.strip
9
+ end
10
+
11
+ Then /^the result should be:$/ do |result|
12
+ @result.should == result
13
+ end
@@ -0,0 +1,14 @@
1
+ require 'bundler'
2
+ begin
3
+ Bundler.setup(:default, :development)
4
+ rescue Bundler::BundlerError => e
5
+ $stderr.puts e.message
6
+ $stderr.puts "Run `bundle install` to install missing gems"
7
+ exit e.status_code
8
+ end
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
+ require 'scaffolder/annotation_locator'
12
+
13
+ require 'rspec/expectations'
14
+ require 'aruba/cucumber'
@@ -0,0 +1,62 @@
1
+ require 'delegate'
2
+ require 'scaffolder'
3
+ require 'bio'
4
+
5
+ class Scaffolder::AnnotationLocator < DelegateClass(Array)
6
+
7
+ def initialize(scaffold_file,sequence_file,gff_file)
8
+ @scaffold_file = scaffold_file
9
+ @sequence_file = sequence_file
10
+ @gff_file = gff_file
11
+
12
+ updated_records = Array.new
13
+ scaffold.inject(0) do |length,entry|
14
+
15
+ if entry.entry_type == :sequence
16
+ updated_records << records[entry.source].map do |record|
17
+ update_record(record,entry,length)
18
+ end
19
+ end
20
+
21
+ length + entry.sequence.length
22
+ end
23
+
24
+ super updated_records.flatten
25
+ end
26
+
27
+ def update_record(record,scaffold_entry,prior_length)
28
+ record.start -= scaffold_entry.start - 1
29
+ record.end -= scaffold_entry.start - 1
30
+
31
+ if scaffold_entry.reverse
32
+ record.end = scaffold_entry.sequence.length - (record.end - 1)
33
+ record.start = scaffold_entry.sequence.length - (record.start - 1)
34
+
35
+ record.end, record.start = record.start, record.end
36
+ record.strand = self.class.flip_strand(record.strand)
37
+ end
38
+
39
+ record.start += prior_length
40
+ record.end += prior_length
41
+
42
+ record.seqname = "scaffold"
43
+ record
44
+ end
45
+
46
+ def scaffold
47
+ Scaffolder.new(YAML.load(File.read(@scaffold_file)),@sequence_file)
48
+ end
49
+
50
+ def records
51
+ gff3 = Bio::GFF::GFF3.new(File.read(@gff_file)).records
52
+ gff3.inject(Hash.new{|h,k| h[k] = Array.new }) do |hash,record|
53
+ hash[record.seqname] << record
54
+ hash
55
+ end
56
+ end
57
+
58
+ def self.flip_strand(strand)
59
+ strand == '+' ? '-' : '+'
60
+ end
61
+
62
+ end
@@ -0,0 +1,80 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{scaffolder-annotation-locator}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Michael Barton"]
12
+ s.date = %q{2011-04-05}
13
+ s.description = %q{Build a genome scaffold using scaffolder and a set of annotated contigs. This tool updates the locations of the contig annotations using the scaffolder tempalte as a base.}
14
+ s.email = %q{mail@michaelbarton.me.uk}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "Gemfile",
22
+ "LICENSE.txt",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "features/gff3.feature",
27
+ "features/step_definitions/scaffolder-annotation-locator_steps.rb",
28
+ "features/support/env.rb",
29
+ "lib/scaffolder/annotation_locator.rb",
30
+ "scaffolder-annotation-locator.gemspec",
31
+ "spec/scaffolder/annotation_locator_spec.rb",
32
+ "spec/spec_helper.rb",
33
+ "spec/support/gff_attribute_matcher.rb"
34
+ ]
35
+ s.homepage = %q{http://next.gs}
36
+ s.licenses = ["MIT"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.7}
39
+ s.summary = %q{Update locations of gff3 annotations from a scaffolder template}
40
+ s.test_files = [
41
+ "spec/scaffolder/annotation_locator_spec.rb",
42
+ "spec/spec_helper.rb",
43
+ "spec/support/gff_attribute_matcher.rb"
44
+ ]
45
+
46
+ if s.respond_to? :specification_version then
47
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
48
+ s.specification_version = 3
49
+
50
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
51
+ s.add_runtime_dependency(%q<scaffolder>, ["~> 0.4"])
52
+ s.add_development_dependency(%q<bundler>, ["~> 1.0"])
53
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
54
+ s.add_development_dependency(%q<rspec>, ["~> 2.4"])
55
+ s.add_development_dependency(%q<scaffolder-test-helpers>, ["= 0.2.2"])
56
+ s.add_development_dependency(%q<cucumber>, ["~> 0.9"])
57
+ s.add_development_dependency(%q<aruba>, ["~> 0.2"])
58
+ s.add_development_dependency(%q<yard>, ["~> 0.6"])
59
+ else
60
+ s.add_dependency(%q<scaffolder>, ["~> 0.4"])
61
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
62
+ s.add_dependency(%q<jeweler>, ["~> 1.5"])
63
+ s.add_dependency(%q<rspec>, ["~> 2.4"])
64
+ s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.2.2"])
65
+ s.add_dependency(%q<cucumber>, ["~> 0.9"])
66
+ s.add_dependency(%q<aruba>, ["~> 0.2"])
67
+ s.add_dependency(%q<yard>, ["~> 0.6"])
68
+ end
69
+ else
70
+ s.add_dependency(%q<scaffolder>, ["~> 0.4"])
71
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
72
+ s.add_dependency(%q<jeweler>, ["~> 1.5"])
73
+ s.add_dependency(%q<rspec>, ["~> 2.4"])
74
+ s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.2.2"])
75
+ s.add_dependency(%q<cucumber>, ["~> 0.9"])
76
+ s.add_dependency(%q<aruba>, ["~> 0.2"])
77
+ s.add_dependency(%q<yard>, ["~> 0.6"])
78
+ end
79
+ end
80
+
@@ -0,0 +1,218 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ describe Scaffolder::AnnotationLocator do
4
+
5
+ def relocate(scaffold,records)
6
+ @scaffold_file, @sequence_file = generate_scaffold_files(scaffold)
7
+ described_class.new(@scaffold_file.path, @sequence_file.path,
8
+ generate_gff3_file(records))
9
+ end
10
+
11
+ before do
12
+ @contig = Sequence.new(:name => 'c1',:sequence => 'ATGCCC')
13
+ @record = {:seqname => 'c1',
14
+ :start => 4, :end => 6, :strand => '+',:phase => 1}
15
+ end
16
+
17
+ describe "relocating a single contig" do
18
+
19
+ describe "with no annotations" do
20
+
21
+ subject do
22
+ relocate([@contig],[])
23
+ end
24
+
25
+ it "should return an empty annotation array" do
26
+ subject.should be_empty
27
+ end
28
+
29
+ end
30
+
31
+ describe "with a single annotation" do
32
+
33
+ subject do
34
+ relocate([@contig],[@record])
35
+ end
36
+
37
+ it{ should set_the_attribute(:seqname => 'scaffold') }
38
+ it{ should set_the_attribute(:phase => 1) }
39
+ it{ should set_the_attribute(:strand => '+') }
40
+
41
+ it{ should set_the_attribute(:start => 4).only_for_the(:first) }
42
+ it{ should set_the_attribute(:end => 6).only_for_the(:first) }
43
+
44
+ end
45
+
46
+ describe "reversed with a single annotation" do
47
+
48
+ subject do
49
+ relocate([@contig.clone.reverse(true)],[@record])
50
+ end
51
+
52
+ it{ should set_the_attribute(:seqname => 'scaffold') }
53
+ it{ should set_the_attribute(:phase => 1) }
54
+ it{ should set_the_attribute(:strand => '-') }
55
+
56
+ it{ should set_the_attribute(:start => 1).only_for_the(:first) }
57
+ it{ should set_the_attribute(:end => 3).only_for_the(:first) }
58
+
59
+ end
60
+
61
+ describe "start trimmed with a single annotation" do
62
+
63
+ subject do
64
+ relocate([@contig.clone.start(4)],[@record])
65
+ end
66
+
67
+ it{ should set_the_attribute(:seqname => 'scaffold') }
68
+ it{ should set_the_attribute(:phase => 1) }
69
+ it{ should set_the_attribute(:strand => '+') }
70
+
71
+ it{ should set_the_attribute(:start => 1).only_for_the(:first) }
72
+ it{ should set_the_attribute(:end => 3).only_for_the(:first) }
73
+
74
+ end
75
+
76
+ end
77
+
78
+ describe "relocating two contigs" do
79
+
80
+ describe "with an annotation on each contig" do
81
+
82
+ subject do
83
+ second = @record.clone
84
+ second[:seqname] = 'c2'
85
+ relocate([@contig, @contig.clone.name('c2')],[@record,second])
86
+ end
87
+
88
+ it{ should set_the_attribute(:seqname => 'scaffold') }
89
+ it{ should set_the_attribute(:phase => 1) }
90
+ it{ should set_the_attribute(:strand => '+') }
91
+
92
+ it{ should set_the_attribute(:start => 4).only_for_the(:first) }
93
+ it{ should set_the_attribute(:end => 6).only_for_the(:first) }
94
+
95
+ it{ should set_the_attribute(:start => 10).only_for_the(:second) }
96
+ it{ should set_the_attribute(:end => 12).only_for_the(:second) }
97
+
98
+ end
99
+
100
+ describe "where the two annotations are unordered annotations" do
101
+
102
+ subject do
103
+ second = @record.merge({:seqname => 'c2', :strand => '-'})
104
+ relocate([@contig, @contig.clone.name('c2')],[second,@record])
105
+ end
106
+
107
+ it{ should set_the_attribute(:seqname => 'scaffold') }
108
+ it{ should set_the_attribute(:phase => 1) }
109
+
110
+ it{ should set_the_attribute(:start => 4).only_for_the(:first) }
111
+ it{ should set_the_attribute(:end => 6).only_for_the(:first) }
112
+ it{ should set_the_attribute(:strand => '+').only_for_the(:first) }
113
+
114
+ it{ should set_the_attribute(:start => 10).only_for_the(:second) }
115
+ it{ should set_the_attribute(:end => 12).only_for_the(:second) }
116
+ it{ should set_the_attribute(:strand => '-').only_for_the(:second) }
117
+
118
+ end
119
+
120
+ describe "where the first of the two contigs is start trimmed" do
121
+
122
+ subject do
123
+ second = @record.clone
124
+ second[:seqname] = 'c2'
125
+
126
+ relocate([@contig.clone.start(4),@contig.clone.name('c2')],[@record,second])
127
+ end
128
+
129
+ it{ should set_the_attribute(:seqname => 'scaffold') }
130
+ it{ should set_the_attribute(:phase => 1) }
131
+ it{ should set_the_attribute(:strand => '+') }
132
+
133
+ it{ should set_the_attribute(:start => 1).only_for_the(:first) }
134
+ it{ should set_the_attribute(:end => 3).only_for_the(:first) }
135
+
136
+ it{ should set_the_attribute(:start => 7).only_for_the(:second) }
137
+ it{ should set_the_attribute(:end => 9).only_for_the(:second) }
138
+
139
+ end
140
+
141
+ describe "where the first of two contigs is stop trimmed" do
142
+
143
+ subject do
144
+ first = @record.clone
145
+ first[:start] = 1
146
+ first[:end] = 3
147
+
148
+ second = @record.clone
149
+ second[:seqname] = 'c2'
150
+
151
+ relocate([@contig.clone.stop(3),@contig.clone.name('c2')],[first,second])
152
+ end
153
+
154
+ it{ should set_the_attribute(:seqname => 'scaffold') }
155
+ it{ should set_the_attribute(:phase => 1) }
156
+ it{ should set_the_attribute(:strand => '+') }
157
+
158
+ it{ should set_the_attribute(:start => 1).only_for_the(:first) }
159
+ it{ should set_the_attribute(:end => 3).only_for_the(:first) }
160
+
161
+ it{ should set_the_attribute(:start => 7).only_for_the(:second) }
162
+ it{ should set_the_attribute(:end => 9).only_for_the(:second) }
163
+
164
+ end
165
+
166
+ describe "separated by an unresolved region" do
167
+
168
+ subject do
169
+ second = @record.clone
170
+ second[:seqname] = 'c2'
171
+
172
+ unresolved = Unresolved.new(:length => 10)
173
+ relocate([@contig,unresolved,@contig.clone.name('c2')],[@record,second])
174
+ end
175
+
176
+ it{ should set_the_attribute(:seqname => 'scaffold') }
177
+ it{ should set_the_attribute(:phase => 1) }
178
+ it{ should set_the_attribute(:strand => '+') }
179
+
180
+ it{ should set_the_attribute(:start => 4).only_for_the(:first) }
181
+ it{ should set_the_attribute(:end => 6).only_for_the(:first) }
182
+
183
+ it{ should set_the_attribute(:start => 20).only_for_the(:second) }
184
+ it{ should set_the_attribute(:end => 22).only_for_the(:second) }
185
+
186
+ end
187
+
188
+ end
189
+
190
+ describe "#records" do
191
+
192
+ subject do
193
+ second = @record.clone
194
+ second[:seqname] = 'c2'
195
+
196
+ relocate([@contig,@contig.clone.name('c2')],[@record,second]).records
197
+ end
198
+
199
+ it "should return the gff records grouped by sequence" do
200
+ subject['c1'].length.should == 1
201
+ subject['c2'].length.should == 1
202
+ end
203
+
204
+ end
205
+
206
+ describe "#flip_strand" do
207
+
208
+ it "should return '+' when passed '-'" do
209
+ described_class.flip_strand('+').should == '-'
210
+ end
211
+
212
+ it "should return '-' when passed '+'" do
213
+ described_class.flip_strand('-').should == '+'
214
+ end
215
+
216
+ end
217
+
218
+ end
@@ -0,0 +1,30 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'tempfile'
5
+
6
+ require 'rspec'
7
+ require 'scaffolder/test/helpers'
8
+ require 'scaffolder/annotation_locator'
9
+
10
+ # Requires supporting files with custom matchers and macros, etc,
11
+ # in ./support/ and its subdirectories.
12
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
13
+
14
+ RSpec.configure do |config|
15
+ include Scaffolder::Test
16
+ include Scaffolder::Test::Helpers
17
+
18
+ def generate_gff3_file(annotations)
19
+ gff = Bio::GFF::GFF3.new
20
+ gff.records = annotations.map do |a|
21
+ Bio::GFF::GFF3::Record.new(a[:seqname], a[:source], 'CDS', a[:start],
22
+ a[:end], nil, a[:strand], a[:phase])
23
+ end
24
+
25
+ tmp = Tempfile.new("gff").path
26
+ File.open(tmp,'w'){ |out| out.print(gff) }
27
+ tmp
28
+ end
29
+
30
+ end
@@ -0,0 +1,34 @@
1
+ ORDINALS = [:first,:second,:third,:fourth,:fifth]
2
+
3
+ RSpec::Matchers.define :set_the_attribute do |expected|
4
+ match do |annotations|
5
+ @attribute = expected.keys.first
6
+ @value = expected.values.first
7
+ if @ordinal
8
+ @actual = annotations[ORDINALS.index(@ordinal)].send(@attribute)
9
+ @actual == @value
10
+ else
11
+ annotations.all?{|a| a.send(@attribute) == @value }
12
+ end
13
+ end
14
+
15
+ chain :only_for_the do |ordinal|
16
+ @ordinal = ordinal
17
+ end
18
+
19
+ description do
20
+ string = "set the annotation #{@attribute} attribute to \"#{@value}\""
21
+ string << " for the #{@ordinal} annotation" if @ordinal
22
+ string
23
+ end
24
+
25
+ failure_message_for_should do |annotations|
26
+ message = "expected \"#{@attribute}\" to be \"#{@value}\" "
27
+ message + if @ordinal
28
+ "but was \"#{actual}\" for the #{@ordinal} annotation"
29
+ else
30
+ "for all annotations"
31
+ end
32
+ end
33
+
34
+ end
metadata ADDED
@@ -0,0 +1,203 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scaffolder-annotation-locator
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Michael Barton
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-05 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ - 4
31
+ version: "0.4"
32
+ type: :runtime
33
+ name: scaffolder
34
+ prerelease: false
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 1
45
+ - 0
46
+ version: "1.0"
47
+ type: :development
48
+ name: bundler
49
+ prerelease: false
50
+ version_requirements: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ hash: 5
58
+ segments:
59
+ - 1
60
+ - 5
61
+ version: "1.5"
62
+ type: :development
63
+ name: jeweler
64
+ prerelease: false
65
+ version_requirements: *id003
66
+ - !ruby/object:Gem::Dependency
67
+ requirement: &id004 !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ~>
71
+ - !ruby/object:Gem::Version
72
+ hash: 11
73
+ segments:
74
+ - 2
75
+ - 4
76
+ version: "2.4"
77
+ type: :development
78
+ name: rspec
79
+ prerelease: false
80
+ version_requirements: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ requirement: &id005 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - "="
86
+ - !ruby/object:Gem::Version
87
+ hash: 19
88
+ segments:
89
+ - 0
90
+ - 2
91
+ - 2
92
+ version: 0.2.2
93
+ type: :development
94
+ name: scaffolder-test-helpers
95
+ prerelease: false
96
+ version_requirements: *id005
97
+ - !ruby/object:Gem::Dependency
98
+ requirement: &id006 !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ hash: 25
104
+ segments:
105
+ - 0
106
+ - 9
107
+ version: "0.9"
108
+ type: :development
109
+ name: cucumber
110
+ prerelease: false
111
+ version_requirements: *id006
112
+ - !ruby/object:Gem::Dependency
113
+ requirement: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ~>
117
+ - !ruby/object:Gem::Version
118
+ hash: 15
119
+ segments:
120
+ - 0
121
+ - 2
122
+ version: "0.2"
123
+ type: :development
124
+ name: aruba
125
+ prerelease: false
126
+ version_requirements: *id007
127
+ - !ruby/object:Gem::Dependency
128
+ requirement: &id008 !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ hash: 7
134
+ segments:
135
+ - 0
136
+ - 6
137
+ version: "0.6"
138
+ type: :development
139
+ name: yard
140
+ prerelease: false
141
+ version_requirements: *id008
142
+ description: Build a genome scaffold using scaffolder and a set of annotated contigs. This tool updates the locations of the contig annotations using the scaffolder tempalte as a base.
143
+ email: mail@michaelbarton.me.uk
144
+ executables: []
145
+
146
+ extensions: []
147
+
148
+ extra_rdoc_files:
149
+ - LICENSE.txt
150
+ - README.rdoc
151
+ files:
152
+ - .document
153
+ - Gemfile
154
+ - LICENSE.txt
155
+ - README.rdoc
156
+ - Rakefile
157
+ - VERSION
158
+ - features/gff3.feature
159
+ - features/step_definitions/scaffolder-annotation-locator_steps.rb
160
+ - features/support/env.rb
161
+ - lib/scaffolder/annotation_locator.rb
162
+ - scaffolder-annotation-locator.gemspec
163
+ - spec/scaffolder/annotation_locator_spec.rb
164
+ - spec/spec_helper.rb
165
+ - spec/support/gff_attribute_matcher.rb
166
+ has_rdoc: true
167
+ homepage: http://next.gs
168
+ licenses:
169
+ - MIT
170
+ post_install_message:
171
+ rdoc_options: []
172
+
173
+ require_paths:
174
+ - lib
175
+ required_ruby_version: !ruby/object:Gem::Requirement
176
+ none: false
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ hash: 3
181
+ segments:
182
+ - 0
183
+ version: "0"
184
+ required_rubygems_version: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ">="
188
+ - !ruby/object:Gem::Version
189
+ hash: 3
190
+ segments:
191
+ - 0
192
+ version: "0"
193
+ requirements: []
194
+
195
+ rubyforge_project:
196
+ rubygems_version: 1.3.7
197
+ signing_key:
198
+ specification_version: 3
199
+ summary: Update locations of gff3 annotations from a scaffolder template
200
+ test_files:
201
+ - spec/scaffolder/annotation_locator_spec.rb
202
+ - spec/spec_helper.rb
203
+ - spec/support/gff_attribute_matcher.rb