scaffolder-annotation-locator 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -2
- data/VERSION +1 -1
- data/features/inserts.feature +128 -0
- data/features/{gff3.feature → multiple-contigs.feature} +36 -91
- data/features/single-contig.feature +158 -0
- data/features/unresolved.feature +37 -0
- data/lib/scaffolder/annotation_locator.rb +29 -26
- data/lib/scaffolder/extensions.rb +3 -0
- data/lib/scaffolder/gff_record_helper.rb +26 -0
- data/scaffolder-annotation-locator.gemspec +15 -14
- data/spec/scaffolder/annotation_locator_spec.rb +110 -13
- data/spec/scaffolder/gff_record_helper_spec.rb +86 -0
- data/spec/spec_helper.rb +2 -2
- metadata +20 -15
data/Gemfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
3
|
group :default do
|
4
|
-
gem "scaffolder", "
|
4
|
+
gem "scaffolder", "0.4.3"
|
5
5
|
end
|
6
6
|
|
7
7
|
group :development do
|
@@ -9,7 +9,7 @@ group :development do
|
|
9
9
|
gem "jeweler", "~> 1.5"
|
10
10
|
|
11
11
|
gem "rspec", "~> 2.4"
|
12
|
-
gem "scaffolder-test-helpers", "0.
|
12
|
+
gem "scaffolder-test-helpers", "0.3.0"
|
13
13
|
gem "cucumber", "~> 0.9"
|
14
14
|
gem "aruba", "~> 0.2"
|
15
15
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
@@ -0,0 +1,128 @@
|
|
1
|
+
Feature: Parsing contigs with inserts
|
2
|
+
In order to include inserts in a scaffold
|
3
|
+
A user can use scaffold-annotation-locator
|
4
|
+
to update annotation coordinates with respect to contigs with inserts
|
5
|
+
|
6
|
+
Scenario: An annotation before an insert in a contig
|
7
|
+
Given a file named "scaf.yml" with:
|
8
|
+
"""
|
9
|
+
---
|
10
|
+
- sequence:
|
11
|
+
source: contig1
|
12
|
+
inserts:
|
13
|
+
-
|
14
|
+
source: insert1
|
15
|
+
open: 14
|
16
|
+
close: 15
|
17
|
+
"""
|
18
|
+
Given a file named "seq.fna" with:
|
19
|
+
"""
|
20
|
+
> contig1
|
21
|
+
AAAAAGGGGGCCCCCTTTTT
|
22
|
+
> insert1
|
23
|
+
TTTT
|
24
|
+
"""
|
25
|
+
Given a file named "anno.gff" with:
|
26
|
+
"""
|
27
|
+
##gff-version 3
|
28
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
29
|
+
"""
|
30
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
31
|
+
Then the result should be:
|
32
|
+
"""
|
33
|
+
##gff-version 3
|
34
|
+
scaffold . CDS 4 13 . + 1 ID=gene1
|
35
|
+
"""
|
36
|
+
|
37
|
+
Scenario: An annotation after an insert in a contig
|
38
|
+
Given a file named "scaf.yml" with:
|
39
|
+
"""
|
40
|
+
---
|
41
|
+
- sequence:
|
42
|
+
source: contig1
|
43
|
+
inserts:
|
44
|
+
-
|
45
|
+
source: insert1
|
46
|
+
open: 1
|
47
|
+
close: 3
|
48
|
+
"""
|
49
|
+
Given a file named "seq.fna" with:
|
50
|
+
"""
|
51
|
+
> contig1
|
52
|
+
AAAAAGGGGGCCCCCTTTTT
|
53
|
+
> insert1
|
54
|
+
TTTT
|
55
|
+
"""
|
56
|
+
Given a file named "anno.gff" with:
|
57
|
+
"""
|
58
|
+
##gff-version 3
|
59
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
60
|
+
"""
|
61
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
62
|
+
Then the result should be:
|
63
|
+
"""
|
64
|
+
##gff-version 3
|
65
|
+
scaffold . CDS 5 14 . + 1 ID=gene1
|
66
|
+
"""
|
67
|
+
|
68
|
+
Scenario: An annotation before an insert in a reversed contig
|
69
|
+
Given a file named "scaf.yml" with:
|
70
|
+
"""
|
71
|
+
---
|
72
|
+
- sequence:
|
73
|
+
source: contig1
|
74
|
+
reverse: true
|
75
|
+
inserts:
|
76
|
+
-
|
77
|
+
source: insert1
|
78
|
+
open: 1
|
79
|
+
close: 3
|
80
|
+
"""
|
81
|
+
Given a file named "seq.fna" with:
|
82
|
+
"""
|
83
|
+
> contig1
|
84
|
+
AAAAAGGGGGCCCCCTTTTT
|
85
|
+
> insert1
|
86
|
+
TTTT
|
87
|
+
"""
|
88
|
+
Given a file named "anno.gff" with:
|
89
|
+
"""
|
90
|
+
##gff-version 3
|
91
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
92
|
+
"""
|
93
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
94
|
+
Then the result should be:
|
95
|
+
"""
|
96
|
+
##gff-version 3
|
97
|
+
scaffold . CDS 8 17 . - 1 ID=gene1
|
98
|
+
"""
|
99
|
+
|
100
|
+
Scenario: An annotation overlapping with an insert location
|
101
|
+
Given a file named "scaf.yml" with:
|
102
|
+
"""
|
103
|
+
---
|
104
|
+
- sequence:
|
105
|
+
source: contig1
|
106
|
+
inserts:
|
107
|
+
-
|
108
|
+
source: insert1
|
109
|
+
open: 1
|
110
|
+
close: 4
|
111
|
+
"""
|
112
|
+
Given a file named "seq.fna" with:
|
113
|
+
"""
|
114
|
+
> contig1
|
115
|
+
AAAAAGGGGGCCCCCTTTTT
|
116
|
+
> insert1
|
117
|
+
TTTT
|
118
|
+
"""
|
119
|
+
Given a file named "anno.gff" with:
|
120
|
+
"""
|
121
|
+
##gff-version 3
|
122
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
123
|
+
"""
|
124
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
125
|
+
Then the result should be:
|
126
|
+
"""
|
127
|
+
##gff-version 3
|
128
|
+
"""
|
@@ -1,81 +1,7 @@
|
|
1
|
-
Feature: Locating
|
2
|
-
In order to
|
1
|
+
Feature: Locating annotations on single contig scaffold
|
2
|
+
In order to build a genome from multiple contigs
|
3
3
|
A user can use scaffold-annotation-locator
|
4
|
-
to
|
5
|
-
|
6
|
-
Scenario: One annotation on a contig
|
7
|
-
Given a file named "scaf.yml" with:
|
8
|
-
"""
|
9
|
-
---
|
10
|
-
- sequence:
|
11
|
-
source: contig1
|
12
|
-
"""
|
13
|
-
Given a file named "seq.fna" with:
|
14
|
-
"""
|
15
|
-
> contig1
|
16
|
-
AAAAAGGGGGCCCCCTTTTT
|
17
|
-
"""
|
18
|
-
Given a file named "anno.gff" with:
|
19
|
-
"""
|
20
|
-
##gff-version 3
|
21
|
-
contig1 . CDS 4 13 . + 1 ID=gene1
|
22
|
-
"""
|
23
|
-
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
24
|
-
Then the result should be:
|
25
|
-
"""
|
26
|
-
##gff-version 3
|
27
|
-
scaffold . CDS 4 13 . + 1 ID=gene1
|
28
|
-
"""
|
29
|
-
|
30
|
-
Scenario: One annotation on a trimmed contig
|
31
|
-
Given a file named "scaf.yml" with:
|
32
|
-
"""
|
33
|
-
---
|
34
|
-
- sequence:
|
35
|
-
source: contig1
|
36
|
-
start: 4
|
37
|
-
"""
|
38
|
-
Given a file named "seq.fna" with:
|
39
|
-
"""
|
40
|
-
> contig1
|
41
|
-
AAAAAGGGGGCCCCCTTTTT
|
42
|
-
"""
|
43
|
-
Given a file named "anno.gff" with:
|
44
|
-
"""
|
45
|
-
##gff-version 3
|
46
|
-
contig1 . CDS 4 13 . + 1 ID=gene1
|
47
|
-
"""
|
48
|
-
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
49
|
-
Then the result should be:
|
50
|
-
"""
|
51
|
-
##gff-version 3
|
52
|
-
scaffold . CDS 1 10 . + 1 ID=gene1
|
53
|
-
"""
|
54
|
-
|
55
|
-
Scenario: One annotation on a reversed contig
|
56
|
-
Given a file named "scaf.yml" with:
|
57
|
-
"""
|
58
|
-
---
|
59
|
-
- sequence:
|
60
|
-
source: contig1
|
61
|
-
reverse: true
|
62
|
-
"""
|
63
|
-
Given a file named "seq.fna" with:
|
64
|
-
"""
|
65
|
-
> contig1
|
66
|
-
AAAAAGGGGGCCCCCTTTTT
|
67
|
-
"""
|
68
|
-
Given a file named "anno.gff" with:
|
69
|
-
"""
|
70
|
-
##gff-version 3
|
71
|
-
contig1 . CDS 1 6 . + 1 ID=gene1
|
72
|
-
"""
|
73
|
-
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
74
|
-
Then the result should be:
|
75
|
-
"""
|
76
|
-
##gff-version 3
|
77
|
-
scaffold . CDS 15 20 . - 1 ID=gene1
|
78
|
-
"""
|
4
|
+
to update annotation coordinates with respect from multiple contigs
|
79
5
|
|
80
6
|
Scenario: Three annotations on three contigs
|
81
7
|
Given a file named "scaf.yml" with:
|
@@ -257,61 +183,80 @@ Feature: Locating gff3 annotations on a scaffold
|
|
257
183
|
scaffold . CDS 41 46 . + 1 ID=gene2
|
258
184
|
"""
|
259
185
|
|
260
|
-
Scenario: Annotations on
|
186
|
+
Scenario: Annotations on a single duplicated contig
|
261
187
|
Given a file named "scaf.yml" with:
|
262
188
|
"""
|
263
189
|
---
|
264
190
|
- sequence:
|
265
191
|
source: contig1
|
266
|
-
- unresolved:
|
267
|
-
length: 10
|
268
192
|
- sequence:
|
269
|
-
source:
|
193
|
+
source: contig1
|
270
194
|
"""
|
271
195
|
Given a file named "seq.fna" with:
|
272
196
|
"""
|
273
197
|
> contig1
|
274
198
|
AAAAAGGGGGCCCCCTTTTT
|
275
|
-
> contig2
|
276
|
-
AAAAAGGGGGCCCCCTTTTT
|
277
199
|
"""
|
278
200
|
Given a file named "anno.gff" with:
|
279
201
|
"""
|
280
202
|
##gff-version 3
|
281
203
|
contig1 . CDS 1 6 . + 1 ID=gene1
|
282
|
-
contig2 . CDS 1 6 . + 1 ID=gene2
|
283
204
|
"""
|
284
205
|
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
285
206
|
Then the result should be:
|
286
207
|
"""
|
287
208
|
##gff-version 3
|
288
209
|
scaffold . CDS 1 6 . + 1 ID=gene1
|
289
|
-
scaffold . CDS
|
210
|
+
scaffold . CDS 21 26 . + 1 ID=gene1
|
290
211
|
"""
|
291
212
|
|
292
|
-
Scenario: Annotations on
|
213
|
+
Scenario: Annotations on reversed and trimmed contigs with inserts
|
293
214
|
Given a file named "scaf.yml" with:
|
294
215
|
"""
|
295
216
|
---
|
296
217
|
- sequence:
|
297
218
|
source: contig1
|
219
|
+
stop: 6
|
298
220
|
- sequence:
|
299
|
-
source:
|
221
|
+
source: contig2
|
222
|
+
reverse: true
|
223
|
+
inserts:
|
224
|
+
-
|
225
|
+
source: insert1
|
226
|
+
open: 6
|
227
|
+
close: 7
|
228
|
+
- sequence:
|
229
|
+
source: contig3
|
230
|
+
start: 3
|
231
|
+
|
300
232
|
"""
|
301
233
|
Given a file named "seq.fna" with:
|
302
234
|
"""
|
303
235
|
> contig1
|
304
|
-
|
236
|
+
AAAAAGGG
|
237
|
+
> contig2
|
238
|
+
AAAAAGGGGGC
|
239
|
+
> contig3
|
240
|
+
AAAAAGGG
|
241
|
+
> insert1
|
242
|
+
TTT
|
305
243
|
"""
|
306
244
|
Given a file named "anno.gff" with:
|
307
245
|
"""
|
308
246
|
##gff-version 3
|
309
|
-
contig1 . CDS 1
|
247
|
+
contig1 . CDS 1 4 . + 1 ID=gene1
|
248
|
+
contig1 . CDS 5 8 . + 1 ID=gene2
|
249
|
+
contig2 . CDS 1 4 . + 1 ID=gene3
|
250
|
+
contig2 . CDS 8 11 . + 1 ID=gene4
|
251
|
+
contig3 . CDS 1 3 . + 1 ID=gene5
|
252
|
+
contig3 . CDS 4 8 . + 1 ID=gene6
|
310
253
|
"""
|
311
254
|
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
312
255
|
Then the result should be:
|
313
256
|
"""
|
314
257
|
##gff-version 3
|
315
|
-
scaffold . CDS 1
|
316
|
-
scaffold . CDS
|
258
|
+
scaffold . CDS 1 4 . + 1 ID=gene1
|
259
|
+
scaffold . CDS 15 18 . - 1 ID=gene3
|
260
|
+
scaffold . CDS 7 10 . - 1 ID=gene4
|
261
|
+
scaffold . CDS 20 24 . + 1 ID=gene6
|
317
262
|
"""
|
@@ -0,0 +1,158 @@
|
|
1
|
+
Feature: Locating annotations on single contig scaffold
|
2
|
+
In order to add gff3 annotations to a scaffold
|
3
|
+
A user can use scaffold-annotation-locator
|
4
|
+
to return the updated coordinates of scaffold annotations
|
5
|
+
|
6
|
+
Scenario: One annotation on a contig
|
7
|
+
Given a file named "scaf.yml" with:
|
8
|
+
"""
|
9
|
+
---
|
10
|
+
- sequence:
|
11
|
+
source: contig1
|
12
|
+
"""
|
13
|
+
Given a file named "seq.fna" with:
|
14
|
+
"""
|
15
|
+
> contig1
|
16
|
+
AAAAAGGGGGCCCCCTTTTT
|
17
|
+
"""
|
18
|
+
Given a file named "anno.gff" with:
|
19
|
+
"""
|
20
|
+
##gff-version 3
|
21
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
22
|
+
"""
|
23
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
24
|
+
Then the result should be:
|
25
|
+
"""
|
26
|
+
##gff-version 3
|
27
|
+
scaffold . CDS 4 13 . + 1 ID=gene1
|
28
|
+
"""
|
29
|
+
|
30
|
+
Scenario: One annotation on a reversed contig
|
31
|
+
Given a file named "scaf.yml" with:
|
32
|
+
"""
|
33
|
+
---
|
34
|
+
- sequence:
|
35
|
+
source: contig1
|
36
|
+
reverse: true
|
37
|
+
"""
|
38
|
+
Given a file named "seq.fna" with:
|
39
|
+
"""
|
40
|
+
> contig1
|
41
|
+
AAAAAGGGGGCCCCCTTTTT
|
42
|
+
"""
|
43
|
+
Given a file named "anno.gff" with:
|
44
|
+
"""
|
45
|
+
##gff-version 3
|
46
|
+
contig1 . CDS 1 6 . + 1 ID=gene1
|
47
|
+
"""
|
48
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
49
|
+
Then the result should be:
|
50
|
+
"""
|
51
|
+
##gff-version 3
|
52
|
+
scaffold . CDS 15 20 . - 1 ID=gene1
|
53
|
+
"""
|
54
|
+
|
55
|
+
Scenario: An annotation in a start trimmed region of the sequence
|
56
|
+
Given a file named "scaf.yml" with:
|
57
|
+
"""
|
58
|
+
---
|
59
|
+
- sequence:
|
60
|
+
source: contig1
|
61
|
+
start: 5
|
62
|
+
"""
|
63
|
+
Given a file named "seq.fna" with:
|
64
|
+
"""
|
65
|
+
> contig1
|
66
|
+
AAAAAGGGGGCCCCCTTTTT
|
67
|
+
> insert1
|
68
|
+
TTTT
|
69
|
+
"""
|
70
|
+
Given a file named "anno.gff" with:
|
71
|
+
"""
|
72
|
+
##gff-version 3
|
73
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
74
|
+
"""
|
75
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
76
|
+
Then the result should be:
|
77
|
+
"""
|
78
|
+
##gff-version 3
|
79
|
+
"""
|
80
|
+
|
81
|
+
Scenario: An annotation inside a stop trimmed region of the sequence
|
82
|
+
Given a file named "scaf.yml" with:
|
83
|
+
"""
|
84
|
+
---
|
85
|
+
- sequence:
|
86
|
+
source: contig1
|
87
|
+
stop: 12
|
88
|
+
"""
|
89
|
+
Given a file named "seq.fna" with:
|
90
|
+
"""
|
91
|
+
> contig1
|
92
|
+
AAAAAGGGGGCCCCCTTTTT
|
93
|
+
> insert1
|
94
|
+
TTTT
|
95
|
+
"""
|
96
|
+
Given a file named "anno.gff" with:
|
97
|
+
"""
|
98
|
+
##gff-version 3
|
99
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
100
|
+
"""
|
101
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
102
|
+
Then the result should be:
|
103
|
+
"""
|
104
|
+
##gff-version 3
|
105
|
+
"""
|
106
|
+
|
107
|
+
Scenario: An annotation bordering a stop trimmed region of the sequence
|
108
|
+
Given a file named "scaf.yml" with:
|
109
|
+
"""
|
110
|
+
---
|
111
|
+
- sequence:
|
112
|
+
source: contig1
|
113
|
+
stop: 13
|
114
|
+
"""
|
115
|
+
Given a file named "seq.fna" with:
|
116
|
+
"""
|
117
|
+
> contig1
|
118
|
+
AAAAAGGGGGCCCCCTTTTT
|
119
|
+
> insert1
|
120
|
+
TTTT
|
121
|
+
"""
|
122
|
+
Given a file named "anno.gff" with:
|
123
|
+
"""
|
124
|
+
##gff-version 3
|
125
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
126
|
+
"""
|
127
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
128
|
+
Then the result should be:
|
129
|
+
"""
|
130
|
+
##gff-version 3
|
131
|
+
scaffold . CDS 4 13 . + 1 ID=gene1
|
132
|
+
"""
|
133
|
+
|
134
|
+
Scenario: An annotation bordering a start trimmed region of the sequence
|
135
|
+
Given a file named "scaf.yml" with:
|
136
|
+
"""
|
137
|
+
---
|
138
|
+
- sequence:
|
139
|
+
source: contig1
|
140
|
+
start: 4
|
141
|
+
"""
|
142
|
+
Given a file named "seq.fna" with:
|
143
|
+
"""
|
144
|
+
> contig1
|
145
|
+
AAAAAGGGGGCCCCCTTTTT
|
146
|
+
"""
|
147
|
+
Given a file named "anno.gff" with:
|
148
|
+
"""
|
149
|
+
##gff-version 3
|
150
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
151
|
+
"""
|
152
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
153
|
+
Then the result should be:
|
154
|
+
"""
|
155
|
+
##gff-version 3
|
156
|
+
scaffold . CDS 1 10 . + 1 ID=gene1
|
157
|
+
"""
|
158
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
Feature: Parsing unresolved regions
|
2
|
+
In order to include unresolved regions in a scaffold
|
3
|
+
A user can use scaffold-annotation-locator
|
4
|
+
to update annotation coordinates with respect to unresolved regions
|
5
|
+
|
6
|
+
Scenario: Annotations on two contigs separated by an unresolved region
|
7
|
+
Given a file named "scaf.yml" with:
|
8
|
+
"""
|
9
|
+
---
|
10
|
+
- sequence:
|
11
|
+
source: contig1
|
12
|
+
- unresolved:
|
13
|
+
length: 10
|
14
|
+
- sequence:
|
15
|
+
source: contig2
|
16
|
+
"""
|
17
|
+
Given a file named "seq.fna" with:
|
18
|
+
"""
|
19
|
+
> contig1
|
20
|
+
AAAAAGGGGGCCCCCTTTTT
|
21
|
+
> contig2
|
22
|
+
AAAAAGGGGGCCCCCTTTTT
|
23
|
+
"""
|
24
|
+
Given a file named "anno.gff" with:
|
25
|
+
"""
|
26
|
+
##gff-version 3
|
27
|
+
contig1 . CDS 1 6 . + 1 ID=gene1
|
28
|
+
contig2 . CDS 1 6 . + 1 ID=gene2
|
29
|
+
"""
|
30
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
31
|
+
Then the result should be:
|
32
|
+
"""
|
33
|
+
##gff-version 3
|
34
|
+
scaffold . CDS 1 6 . + 1 ID=gene1
|
35
|
+
scaffold . CDS 31 36 . + 1 ID=gene2
|
36
|
+
"""
|
37
|
+
|
@@ -2,6 +2,8 @@ require 'delegate'
|
|
2
2
|
require 'scaffolder'
|
3
3
|
require 'bio'
|
4
4
|
|
5
|
+
require 'scaffolder/extensions'
|
6
|
+
|
5
7
|
class Scaffolder::AnnotationLocator < DelegateClass(Array)
|
6
8
|
|
7
9
|
def initialize(scaffold_file,sequence_file,gff_file)
|
@@ -10,37 +12,42 @@ class Scaffolder::AnnotationLocator < DelegateClass(Array)
|
|
10
12
|
@gff_file = gff_file
|
11
13
|
|
12
14
|
updated_records = Array.new
|
13
|
-
scaffold.inject(0) do |
|
15
|
+
scaffold.inject(0) do |prior_length,entry|
|
14
16
|
|
15
17
|
if entry.entry_type == :sequence
|
16
|
-
|
17
|
-
update_record(record,entry,length)
|
18
|
-
end
|
19
|
-
end
|
18
|
+
records[entry.source].each do |record|
|
20
19
|
|
21
|
-
|
22
|
-
|
20
|
+
# Don't include this record if it overlaps with an insert
|
21
|
+
next if record.overlap?(entry.inserts.map{|i| (i.open..i.close)})
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
# Skip this record it lies in the start or stop trimmed regions
|
24
|
+
next if record.start < entry.start
|
25
|
+
next if record.end > entry.stop
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
# Update record location by size differences of prior inserts
|
28
|
+
entry.inserts.select {|i| i.close < record.start }.each do |insert|
|
29
|
+
record.change_position_by insert.size_diff
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
record.start = scaffold_entry.sequence.length - (record.start - 1)
|
32
|
+
# Decrease record position by distance contig is trimmed at start
|
33
|
+
record.change_position_by(1 - entry.start)
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
# Reverse complement record positions if contig is reversed
|
36
|
+
record.reverse_complement_by entry.sequence.length if entry.reverse
|
37
|
+
|
38
|
+
# Increase record position by length of prior contigs
|
39
|
+
record.change_position_by prior_length
|
38
40
|
|
39
|
-
|
40
|
-
record.end += prior_length
|
41
|
+
record.seqname = "scaffold"
|
41
42
|
|
42
|
-
|
43
|
-
|
43
|
+
updated_records << record
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
prior_length + entry.sequence.length
|
48
|
+
end
|
49
|
+
|
50
|
+
super updated_records
|
44
51
|
end
|
45
52
|
|
46
53
|
def scaffold
|
@@ -55,8 +62,4 @@ class Scaffolder::AnnotationLocator < DelegateClass(Array)
|
|
55
62
|
end
|
56
63
|
end
|
57
64
|
|
58
|
-
def self.flip_strand(strand)
|
59
|
-
strand == '+' ? '-' : '+'
|
60
|
-
end
|
61
|
-
|
62
65
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Scaffolder::GffRecordHelper
|
2
|
+
|
3
|
+
def flip_strand
|
4
|
+
self.strand = (self.strand == '+' ? '-' : '+')
|
5
|
+
end
|
6
|
+
|
7
|
+
def change_position_by(distance)
|
8
|
+
self.start += distance
|
9
|
+
self.end += distance
|
10
|
+
end
|
11
|
+
|
12
|
+
def reverse_complement_by(distance)
|
13
|
+
self.end = distance - (self.end - 1)
|
14
|
+
self.start = distance - (self.start - 1)
|
15
|
+
|
16
|
+
self.end, self.start = self.start, self.end
|
17
|
+
self.flip_strand
|
18
|
+
end
|
19
|
+
|
20
|
+
def overlap?(*ranges)
|
21
|
+
ranges.flatten.any? do |range|
|
22
|
+
range.include?(self.start) || range.include?(self.end)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{scaffolder-annotation-locator}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Michael Barton"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-06-10}
|
13
13
|
s.description = %q{Build a genome scaffold using scaffolder and a set of annotated contigs. This tool updates the locations of the contig annotations using the scaffolder tempalte as a base.}
|
14
14
|
s.email = %q{mail@michaelbarton.me.uk}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -23,12 +23,18 @@ Gem::Specification.new do |s|
|
|
23
23
|
"README.rdoc",
|
24
24
|
"Rakefile",
|
25
25
|
"VERSION",
|
26
|
-
"features/
|
26
|
+
"features/inserts.feature",
|
27
|
+
"features/multiple-contigs.feature",
|
28
|
+
"features/single-contig.feature",
|
27
29
|
"features/step_definitions/scaffolder-annotation-locator_steps.rb",
|
28
30
|
"features/support/env.rb",
|
31
|
+
"features/unresolved.feature",
|
29
32
|
"lib/scaffolder/annotation_locator.rb",
|
33
|
+
"lib/scaffolder/extensions.rb",
|
34
|
+
"lib/scaffolder/gff_record_helper.rb",
|
30
35
|
"scaffolder-annotation-locator.gemspec",
|
31
36
|
"spec/scaffolder/annotation_locator_spec.rb",
|
37
|
+
"spec/scaffolder/gff_record_helper_spec.rb",
|
32
38
|
"spec/spec_helper.rb",
|
33
39
|
"spec/support/gff_attribute_matcher.rb"
|
34
40
|
]
|
@@ -37,41 +43,36 @@ Gem::Specification.new do |s|
|
|
37
43
|
s.require_paths = ["lib"]
|
38
44
|
s.rubygems_version = %q{1.3.7}
|
39
45
|
s.summary = %q{Update locations of gff3 annotations from a scaffolder template}
|
40
|
-
s.test_files = [
|
41
|
-
"spec/scaffolder/annotation_locator_spec.rb",
|
42
|
-
"spec/spec_helper.rb",
|
43
|
-
"spec/support/gff_attribute_matcher.rb"
|
44
|
-
]
|
45
46
|
|
46
47
|
if s.respond_to? :specification_version then
|
47
48
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
48
49
|
s.specification_version = 3
|
49
50
|
|
50
51
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
51
|
-
s.add_runtime_dependency(%q<scaffolder>, ["
|
52
|
+
s.add_runtime_dependency(%q<scaffolder>, ["= 0.4.3"])
|
52
53
|
s.add_development_dependency(%q<bundler>, ["~> 1.0"])
|
53
54
|
s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
|
54
55
|
s.add_development_dependency(%q<rspec>, ["~> 2.4"])
|
55
|
-
s.add_development_dependency(%q<scaffolder-test-helpers>, ["= 0.
|
56
|
+
s.add_development_dependency(%q<scaffolder-test-helpers>, ["= 0.3.0"])
|
56
57
|
s.add_development_dependency(%q<cucumber>, ["~> 0.9"])
|
57
58
|
s.add_development_dependency(%q<aruba>, ["~> 0.2"])
|
58
59
|
s.add_development_dependency(%q<yard>, ["~> 0.6"])
|
59
60
|
else
|
60
|
-
s.add_dependency(%q<scaffolder>, ["
|
61
|
+
s.add_dependency(%q<scaffolder>, ["= 0.4.3"])
|
61
62
|
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
62
63
|
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
63
64
|
s.add_dependency(%q<rspec>, ["~> 2.4"])
|
64
|
-
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.
|
65
|
+
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.3.0"])
|
65
66
|
s.add_dependency(%q<cucumber>, ["~> 0.9"])
|
66
67
|
s.add_dependency(%q<aruba>, ["~> 0.2"])
|
67
68
|
s.add_dependency(%q<yard>, ["~> 0.6"])
|
68
69
|
end
|
69
70
|
else
|
70
|
-
s.add_dependency(%q<scaffolder>, ["
|
71
|
+
s.add_dependency(%q<scaffolder>, ["= 0.4.3"])
|
71
72
|
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
72
73
|
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
73
74
|
s.add_dependency(%q<rspec>, ["~> 2.4"])
|
74
|
-
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.
|
75
|
+
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.3.0"])
|
75
76
|
s.add_dependency(%q<cucumber>, ["~> 0.9"])
|
76
77
|
s.add_dependency(%q<aruba>, ["~> 0.2"])
|
77
78
|
s.add_dependency(%q<yard>, ["~> 0.6"])
|
@@ -5,7 +5,7 @@ describe Scaffolder::AnnotationLocator do
|
|
5
5
|
def relocate(scaffold,records)
|
6
6
|
@scaffold_file, @sequence_file = generate_scaffold_files(scaffold)
|
7
7
|
described_class.new(@scaffold_file.path, @sequence_file.path,
|
8
|
-
generate_gff3_file(records))
|
8
|
+
generate_gff3_file(records).path)
|
9
9
|
end
|
10
10
|
|
11
11
|
before do
|
@@ -73,6 +73,115 @@ describe Scaffolder::AnnotationLocator do
|
|
73
73
|
|
74
74
|
end
|
75
75
|
|
76
|
+
describe "with an insert before an annotation" do
|
77
|
+
|
78
|
+
subject do
|
79
|
+
relocate([@contig.clone.inserts(:open => 1, :close => 2, :sequence => 'TTT')],
|
80
|
+
[@record])
|
81
|
+
end
|
82
|
+
|
83
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
84
|
+
it{ should set_the_attribute(:phase => 1) }
|
85
|
+
it{ should set_the_attribute(:strand => '+') }
|
86
|
+
|
87
|
+
it{ should set_the_attribute(:start => 5).only_for_the(:first) }
|
88
|
+
it{ should set_the_attribute(:end => 7).only_for_the(:first) }
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
describe "with an insert after an annotation" do
|
93
|
+
|
94
|
+
subject do
|
95
|
+
relocate([@contig.clone.
|
96
|
+
inserts(:open => 7, :close => 8, :sequence => 'TTT').
|
97
|
+
sequence('ATGTTTCCC')],
|
98
|
+
[@record])
|
99
|
+
end
|
100
|
+
|
101
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
102
|
+
it{ should set_the_attribute(:phase => 1) }
|
103
|
+
it{ should set_the_attribute(:strand => '+') }
|
104
|
+
|
105
|
+
it{ should set_the_attribute(:start => 4).only_for_the(:first) }
|
106
|
+
it{ should set_the_attribute(:end => 6).only_for_the(:first) }
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "with an insert before and after an annotation" do
|
111
|
+
|
112
|
+
subject do
|
113
|
+
relocate([@contig.clone.
|
114
|
+
inserts(:open => 1, :close => 2, :sequence => 'TTT').
|
115
|
+
inserts(:open => 7, :close => 8, :sequence => 'TTT').
|
116
|
+
sequence('ATGTTTCCC')],
|
117
|
+
[@record])
|
118
|
+
end
|
119
|
+
|
120
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
121
|
+
it{ should set_the_attribute(:phase => 1) }
|
122
|
+
it{ should set_the_attribute(:strand => '+') }
|
123
|
+
|
124
|
+
it{ should set_the_attribute(:start => 5).only_for_the(:first) }
|
125
|
+
it{ should set_the_attribute(:end => 7).only_for_the(:first) }
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
describe "reversed with an insert before an annotation" do
|
130
|
+
|
131
|
+
subject do
|
132
|
+
contig = @contig.clone.
|
133
|
+
reverse(true).
|
134
|
+
inserts(:open => 1, :close => 2, :sequence => 'TTT')
|
135
|
+
relocate([contig],[@record])
|
136
|
+
end
|
137
|
+
|
138
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
139
|
+
it{ should set_the_attribute(:phase => 1) }
|
140
|
+
it{ should set_the_attribute(:strand => '-') }
|
141
|
+
|
142
|
+
it{ should set_the_attribute(:start => 1).only_for_the(:first) }
|
143
|
+
it{ should set_the_attribute(:end => 3).only_for_the(:first) }
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "with an insert overlapping with an annotation" do
|
148
|
+
|
149
|
+
subject do
|
150
|
+
relocate([@contig.clone.
|
151
|
+
inserts(:open => 3, :close => 5, :sequence => 'TTT')],
|
152
|
+
[@record])
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should not include this annotation" do
|
156
|
+
subject.should be_empty
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
describe "with an annotation in a start trimmed region" do
|
162
|
+
|
163
|
+
subject do
|
164
|
+
relocate([@contig.clone.start(5)],[@record])
|
165
|
+
end
|
166
|
+
|
167
|
+
it "should not include this annotation" do
|
168
|
+
subject.should be_empty
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
describe "with an annotation in a stop trimmed region" do
|
174
|
+
|
175
|
+
subject do
|
176
|
+
relocate([@contig.clone.stop(5)],[@record])
|
177
|
+
end
|
178
|
+
|
179
|
+
it "should not include this annotation" do
|
180
|
+
subject.should be_empty
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
76
185
|
end
|
77
186
|
|
78
187
|
describe "relocating two contigs" do
|
@@ -203,16 +312,4 @@ describe Scaffolder::AnnotationLocator do
|
|
203
312
|
|
204
313
|
end
|
205
314
|
|
206
|
-
describe "#flip_strand" do
|
207
|
-
|
208
|
-
it "should return '+' when passed '-'" do
|
209
|
-
described_class.flip_strand('+').should == '-'
|
210
|
-
end
|
211
|
-
|
212
|
-
it "should return '-' when passed '+'" do
|
213
|
-
described_class.flip_strand('-').should == '+'
|
214
|
-
end
|
215
|
-
|
216
|
-
end
|
217
|
-
|
218
315
|
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
|
2
|
+
|
3
|
+
describe Scaffolder::GffRecordHelper do
|
4
|
+
|
5
|
+
it "should be included in Bio::GFF::GFF3::Record" do
|
6
|
+
Bio::GFF::GFF3::Record.ancestors.should include(described_class)
|
7
|
+
end
|
8
|
+
|
9
|
+
subject do
|
10
|
+
Bio::GFF::GFF3::Record.new(nil,nil,'CDS',1,3,nil,'+')
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#flip_strand" do
|
14
|
+
|
15
|
+
it "should change strand to '-' when flipped from '+'" do
|
16
|
+
subject.flip_strand
|
17
|
+
subject.strand.should == '-'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should change strand to '+' when flipped from '-'" do
|
21
|
+
subject.strand = '-'
|
22
|
+
subject.flip_strand
|
23
|
+
subject.strand.should == '+'
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#change_position_by" do
|
29
|
+
|
30
|
+
before do
|
31
|
+
subject.change_position_by 3
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should increase start position" do
|
35
|
+
subject.start.should == 4
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should increase end position" do
|
39
|
+
subject.end.should == 6
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "#reverse_complement_by" do
|
45
|
+
|
46
|
+
before do
|
47
|
+
subject.reverse_complement_by 7
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should increase start position" do
|
51
|
+
subject.start.should == 5
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should increase end position" do
|
55
|
+
subject.end.should == 7
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should flip the stand" do
|
59
|
+
subject.strand.should == '-'
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "#overlap?" do
|
65
|
+
|
66
|
+
it "should return false when no overlap with a single insert" do
|
67
|
+
subject.overlap?(4..6).should be_false
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should return true when overlapping with a single insert" do
|
71
|
+
subject.overlap?(0..1).should be_true
|
72
|
+
subject.overlap?(3..4).should be_true
|
73
|
+
subject.overlap?(2..4).should be_true
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should return false when no overlap with a multiple inserts" do
|
77
|
+
subject.overlap?([4..6,7..9]).should be_false
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should return true when overlapping with one of two inserts" do
|
81
|
+
subject.overlap?([0..1,4..6]).should be_true
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scaffolder-annotation-locator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Michael Barton
|
@@ -15,20 +15,21 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-10 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
requirement: &id001 !ruby/object:Gem::Requirement
|
23
23
|
none: false
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - "="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
hash:
|
27
|
+
hash: 9
|
28
28
|
segments:
|
29
29
|
- 0
|
30
30
|
- 4
|
31
|
-
|
31
|
+
- 3
|
32
|
+
version: 0.4.3
|
32
33
|
type: :runtime
|
33
34
|
name: scaffolder
|
34
35
|
prerelease: false
|
@@ -87,9 +88,9 @@ dependencies:
|
|
87
88
|
hash: 19
|
88
89
|
segments:
|
89
90
|
- 0
|
90
|
-
-
|
91
|
-
-
|
92
|
-
version: 0.
|
91
|
+
- 3
|
92
|
+
- 0
|
93
|
+
version: 0.3.0
|
93
94
|
type: :development
|
94
95
|
name: scaffolder-test-helpers
|
95
96
|
prerelease: false
|
@@ -155,12 +156,18 @@ files:
|
|
155
156
|
- README.rdoc
|
156
157
|
- Rakefile
|
157
158
|
- VERSION
|
158
|
-
- features/
|
159
|
+
- features/inserts.feature
|
160
|
+
- features/multiple-contigs.feature
|
161
|
+
- features/single-contig.feature
|
159
162
|
- features/step_definitions/scaffolder-annotation-locator_steps.rb
|
160
163
|
- features/support/env.rb
|
164
|
+
- features/unresolved.feature
|
161
165
|
- lib/scaffolder/annotation_locator.rb
|
166
|
+
- lib/scaffolder/extensions.rb
|
167
|
+
- lib/scaffolder/gff_record_helper.rb
|
162
168
|
- scaffolder-annotation-locator.gemspec
|
163
169
|
- spec/scaffolder/annotation_locator_spec.rb
|
170
|
+
- spec/scaffolder/gff_record_helper_spec.rb
|
164
171
|
- spec/spec_helper.rb
|
165
172
|
- spec/support/gff_attribute_matcher.rb
|
166
173
|
has_rdoc: true
|
@@ -197,7 +204,5 @@ rubygems_version: 1.3.7
|
|
197
204
|
signing_key:
|
198
205
|
specification_version: 3
|
199
206
|
summary: Update locations of gff3 annotations from a scaffolder template
|
200
|
-
test_files:
|
201
|
-
|
202
|
-
- spec/spec_helper.rb
|
203
|
-
- spec/support/gff_attribute_matcher.rb
|
207
|
+
test_files: []
|
208
|
+
|