scaffolder-annotation-locator 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -2
- data/VERSION +1 -1
- data/features/inserts.feature +128 -0
- data/features/{gff3.feature → multiple-contigs.feature} +36 -91
- data/features/single-contig.feature +158 -0
- data/features/unresolved.feature +37 -0
- data/lib/scaffolder/annotation_locator.rb +29 -26
- data/lib/scaffolder/extensions.rb +3 -0
- data/lib/scaffolder/gff_record_helper.rb +26 -0
- data/scaffolder-annotation-locator.gemspec +15 -14
- data/spec/scaffolder/annotation_locator_spec.rb +110 -13
- data/spec/scaffolder/gff_record_helper_spec.rb +86 -0
- data/spec/spec_helper.rb +2 -2
- metadata +20 -15
data/Gemfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
3
|
group :default do
|
4
|
-
gem "scaffolder", "
|
4
|
+
gem "scaffolder", "0.4.3"
|
5
5
|
end
|
6
6
|
|
7
7
|
group :development do
|
@@ -9,7 +9,7 @@ group :development do
|
|
9
9
|
gem "jeweler", "~> 1.5"
|
10
10
|
|
11
11
|
gem "rspec", "~> 2.4"
|
12
|
-
gem "scaffolder-test-helpers", "0.
|
12
|
+
gem "scaffolder-test-helpers", "0.3.0"
|
13
13
|
gem "cucumber", "~> 0.9"
|
14
14
|
gem "aruba", "~> 0.2"
|
15
15
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
@@ -0,0 +1,128 @@
|
|
1
|
+
Feature: Parsing contigs with inserts
|
2
|
+
In order to include inserts in a scaffold
|
3
|
+
A user can use scaffold-annotation-locator
|
4
|
+
to update annotation coordinates with respect to contigs with inserts
|
5
|
+
|
6
|
+
Scenario: An annotation before an insert in a contig
|
7
|
+
Given a file named "scaf.yml" with:
|
8
|
+
"""
|
9
|
+
---
|
10
|
+
- sequence:
|
11
|
+
source: contig1
|
12
|
+
inserts:
|
13
|
+
-
|
14
|
+
source: insert1
|
15
|
+
open: 14
|
16
|
+
close: 15
|
17
|
+
"""
|
18
|
+
Given a file named "seq.fna" with:
|
19
|
+
"""
|
20
|
+
> contig1
|
21
|
+
AAAAAGGGGGCCCCCTTTTT
|
22
|
+
> insert1
|
23
|
+
TTTT
|
24
|
+
"""
|
25
|
+
Given a file named "anno.gff" with:
|
26
|
+
"""
|
27
|
+
##gff-version 3
|
28
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
29
|
+
"""
|
30
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
31
|
+
Then the result should be:
|
32
|
+
"""
|
33
|
+
##gff-version 3
|
34
|
+
scaffold . CDS 4 13 . + 1 ID=gene1
|
35
|
+
"""
|
36
|
+
|
37
|
+
Scenario: An annotation after an insert in a contig
|
38
|
+
Given a file named "scaf.yml" with:
|
39
|
+
"""
|
40
|
+
---
|
41
|
+
- sequence:
|
42
|
+
source: contig1
|
43
|
+
inserts:
|
44
|
+
-
|
45
|
+
source: insert1
|
46
|
+
open: 1
|
47
|
+
close: 3
|
48
|
+
"""
|
49
|
+
Given a file named "seq.fna" with:
|
50
|
+
"""
|
51
|
+
> contig1
|
52
|
+
AAAAAGGGGGCCCCCTTTTT
|
53
|
+
> insert1
|
54
|
+
TTTT
|
55
|
+
"""
|
56
|
+
Given a file named "anno.gff" with:
|
57
|
+
"""
|
58
|
+
##gff-version 3
|
59
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
60
|
+
"""
|
61
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
62
|
+
Then the result should be:
|
63
|
+
"""
|
64
|
+
##gff-version 3
|
65
|
+
scaffold . CDS 5 14 . + 1 ID=gene1
|
66
|
+
"""
|
67
|
+
|
68
|
+
Scenario: An annotation before an insert in a reversed contig
|
69
|
+
Given a file named "scaf.yml" with:
|
70
|
+
"""
|
71
|
+
---
|
72
|
+
- sequence:
|
73
|
+
source: contig1
|
74
|
+
reverse: true
|
75
|
+
inserts:
|
76
|
+
-
|
77
|
+
source: insert1
|
78
|
+
open: 1
|
79
|
+
close: 3
|
80
|
+
"""
|
81
|
+
Given a file named "seq.fna" with:
|
82
|
+
"""
|
83
|
+
> contig1
|
84
|
+
AAAAAGGGGGCCCCCTTTTT
|
85
|
+
> insert1
|
86
|
+
TTTT
|
87
|
+
"""
|
88
|
+
Given a file named "anno.gff" with:
|
89
|
+
"""
|
90
|
+
##gff-version 3
|
91
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
92
|
+
"""
|
93
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
94
|
+
Then the result should be:
|
95
|
+
"""
|
96
|
+
##gff-version 3
|
97
|
+
scaffold . CDS 8 17 . - 1 ID=gene1
|
98
|
+
"""
|
99
|
+
|
100
|
+
Scenario: An annotation overlapping with an insert location
|
101
|
+
Given a file named "scaf.yml" with:
|
102
|
+
"""
|
103
|
+
---
|
104
|
+
- sequence:
|
105
|
+
source: contig1
|
106
|
+
inserts:
|
107
|
+
-
|
108
|
+
source: insert1
|
109
|
+
open: 1
|
110
|
+
close: 4
|
111
|
+
"""
|
112
|
+
Given a file named "seq.fna" with:
|
113
|
+
"""
|
114
|
+
> contig1
|
115
|
+
AAAAAGGGGGCCCCCTTTTT
|
116
|
+
> insert1
|
117
|
+
TTTT
|
118
|
+
"""
|
119
|
+
Given a file named "anno.gff" with:
|
120
|
+
"""
|
121
|
+
##gff-version 3
|
122
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
123
|
+
"""
|
124
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
125
|
+
Then the result should be:
|
126
|
+
"""
|
127
|
+
##gff-version 3
|
128
|
+
"""
|
@@ -1,81 +1,7 @@
|
|
1
|
-
Feature: Locating
|
2
|
-
In order to
|
1
|
+
Feature: Locating annotations on single contig scaffold
|
2
|
+
In order to build a genome from multiple contigs
|
3
3
|
A user can use scaffold-annotation-locator
|
4
|
-
to
|
5
|
-
|
6
|
-
Scenario: One annotation on a contig
|
7
|
-
Given a file named "scaf.yml" with:
|
8
|
-
"""
|
9
|
-
---
|
10
|
-
- sequence:
|
11
|
-
source: contig1
|
12
|
-
"""
|
13
|
-
Given a file named "seq.fna" with:
|
14
|
-
"""
|
15
|
-
> contig1
|
16
|
-
AAAAAGGGGGCCCCCTTTTT
|
17
|
-
"""
|
18
|
-
Given a file named "anno.gff" with:
|
19
|
-
"""
|
20
|
-
##gff-version 3
|
21
|
-
contig1 . CDS 4 13 . + 1 ID=gene1
|
22
|
-
"""
|
23
|
-
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
24
|
-
Then the result should be:
|
25
|
-
"""
|
26
|
-
##gff-version 3
|
27
|
-
scaffold . CDS 4 13 . + 1 ID=gene1
|
28
|
-
"""
|
29
|
-
|
30
|
-
Scenario: One annotation on a trimmed contig
|
31
|
-
Given a file named "scaf.yml" with:
|
32
|
-
"""
|
33
|
-
---
|
34
|
-
- sequence:
|
35
|
-
source: contig1
|
36
|
-
start: 4
|
37
|
-
"""
|
38
|
-
Given a file named "seq.fna" with:
|
39
|
-
"""
|
40
|
-
> contig1
|
41
|
-
AAAAAGGGGGCCCCCTTTTT
|
42
|
-
"""
|
43
|
-
Given a file named "anno.gff" with:
|
44
|
-
"""
|
45
|
-
##gff-version 3
|
46
|
-
contig1 . CDS 4 13 . + 1 ID=gene1
|
47
|
-
"""
|
48
|
-
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
49
|
-
Then the result should be:
|
50
|
-
"""
|
51
|
-
##gff-version 3
|
52
|
-
scaffold . CDS 1 10 . + 1 ID=gene1
|
53
|
-
"""
|
54
|
-
|
55
|
-
Scenario: One annotation on a reversed contig
|
56
|
-
Given a file named "scaf.yml" with:
|
57
|
-
"""
|
58
|
-
---
|
59
|
-
- sequence:
|
60
|
-
source: contig1
|
61
|
-
reverse: true
|
62
|
-
"""
|
63
|
-
Given a file named "seq.fna" with:
|
64
|
-
"""
|
65
|
-
> contig1
|
66
|
-
AAAAAGGGGGCCCCCTTTTT
|
67
|
-
"""
|
68
|
-
Given a file named "anno.gff" with:
|
69
|
-
"""
|
70
|
-
##gff-version 3
|
71
|
-
contig1 . CDS 1 6 . + 1 ID=gene1
|
72
|
-
"""
|
73
|
-
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
74
|
-
Then the result should be:
|
75
|
-
"""
|
76
|
-
##gff-version 3
|
77
|
-
scaffold . CDS 15 20 . - 1 ID=gene1
|
78
|
-
"""
|
4
|
+
to update annotation coordinates with respect from multiple contigs
|
79
5
|
|
80
6
|
Scenario: Three annotations on three contigs
|
81
7
|
Given a file named "scaf.yml" with:
|
@@ -257,61 +183,80 @@ Feature: Locating gff3 annotations on a scaffold
|
|
257
183
|
scaffold . CDS 41 46 . + 1 ID=gene2
|
258
184
|
"""
|
259
185
|
|
260
|
-
Scenario: Annotations on
|
186
|
+
Scenario: Annotations on a single duplicated contig
|
261
187
|
Given a file named "scaf.yml" with:
|
262
188
|
"""
|
263
189
|
---
|
264
190
|
- sequence:
|
265
191
|
source: contig1
|
266
|
-
- unresolved:
|
267
|
-
length: 10
|
268
192
|
- sequence:
|
269
|
-
source:
|
193
|
+
source: contig1
|
270
194
|
"""
|
271
195
|
Given a file named "seq.fna" with:
|
272
196
|
"""
|
273
197
|
> contig1
|
274
198
|
AAAAAGGGGGCCCCCTTTTT
|
275
|
-
> contig2
|
276
|
-
AAAAAGGGGGCCCCCTTTTT
|
277
199
|
"""
|
278
200
|
Given a file named "anno.gff" with:
|
279
201
|
"""
|
280
202
|
##gff-version 3
|
281
203
|
contig1 . CDS 1 6 . + 1 ID=gene1
|
282
|
-
contig2 . CDS 1 6 . + 1 ID=gene2
|
283
204
|
"""
|
284
205
|
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
285
206
|
Then the result should be:
|
286
207
|
"""
|
287
208
|
##gff-version 3
|
288
209
|
scaffold . CDS 1 6 . + 1 ID=gene1
|
289
|
-
scaffold . CDS
|
210
|
+
scaffold . CDS 21 26 . + 1 ID=gene1
|
290
211
|
"""
|
291
212
|
|
292
|
-
Scenario: Annotations on
|
213
|
+
Scenario: Annotations on reversed and trimmed contigs with inserts
|
293
214
|
Given a file named "scaf.yml" with:
|
294
215
|
"""
|
295
216
|
---
|
296
217
|
- sequence:
|
297
218
|
source: contig1
|
219
|
+
stop: 6
|
298
220
|
- sequence:
|
299
|
-
source:
|
221
|
+
source: contig2
|
222
|
+
reverse: true
|
223
|
+
inserts:
|
224
|
+
-
|
225
|
+
source: insert1
|
226
|
+
open: 6
|
227
|
+
close: 7
|
228
|
+
- sequence:
|
229
|
+
source: contig3
|
230
|
+
start: 3
|
231
|
+
|
300
232
|
"""
|
301
233
|
Given a file named "seq.fna" with:
|
302
234
|
"""
|
303
235
|
> contig1
|
304
|
-
|
236
|
+
AAAAAGGG
|
237
|
+
> contig2
|
238
|
+
AAAAAGGGGGC
|
239
|
+
> contig3
|
240
|
+
AAAAAGGG
|
241
|
+
> insert1
|
242
|
+
TTT
|
305
243
|
"""
|
306
244
|
Given a file named "anno.gff" with:
|
307
245
|
"""
|
308
246
|
##gff-version 3
|
309
|
-
contig1 . CDS 1
|
247
|
+
contig1 . CDS 1 4 . + 1 ID=gene1
|
248
|
+
contig1 . CDS 5 8 . + 1 ID=gene2
|
249
|
+
contig2 . CDS 1 4 . + 1 ID=gene3
|
250
|
+
contig2 . CDS 8 11 . + 1 ID=gene4
|
251
|
+
contig3 . CDS 1 3 . + 1 ID=gene5
|
252
|
+
contig3 . CDS 4 8 . + 1 ID=gene6
|
310
253
|
"""
|
311
254
|
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
312
255
|
Then the result should be:
|
313
256
|
"""
|
314
257
|
##gff-version 3
|
315
|
-
scaffold . CDS 1
|
316
|
-
scaffold . CDS
|
258
|
+
scaffold . CDS 1 4 . + 1 ID=gene1
|
259
|
+
scaffold . CDS 15 18 . - 1 ID=gene3
|
260
|
+
scaffold . CDS 7 10 . - 1 ID=gene4
|
261
|
+
scaffold . CDS 20 24 . + 1 ID=gene6
|
317
262
|
"""
|
@@ -0,0 +1,158 @@
|
|
1
|
+
Feature: Locating annotations on single contig scaffold
|
2
|
+
In order to add gff3 annotations to a scaffold
|
3
|
+
A user can use scaffold-annotation-locator
|
4
|
+
to return the updated coordinates of scaffold annotations
|
5
|
+
|
6
|
+
Scenario: One annotation on a contig
|
7
|
+
Given a file named "scaf.yml" with:
|
8
|
+
"""
|
9
|
+
---
|
10
|
+
- sequence:
|
11
|
+
source: contig1
|
12
|
+
"""
|
13
|
+
Given a file named "seq.fna" with:
|
14
|
+
"""
|
15
|
+
> contig1
|
16
|
+
AAAAAGGGGGCCCCCTTTTT
|
17
|
+
"""
|
18
|
+
Given a file named "anno.gff" with:
|
19
|
+
"""
|
20
|
+
##gff-version 3
|
21
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
22
|
+
"""
|
23
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
24
|
+
Then the result should be:
|
25
|
+
"""
|
26
|
+
##gff-version 3
|
27
|
+
scaffold . CDS 4 13 . + 1 ID=gene1
|
28
|
+
"""
|
29
|
+
|
30
|
+
Scenario: One annotation on a reversed contig
|
31
|
+
Given a file named "scaf.yml" with:
|
32
|
+
"""
|
33
|
+
---
|
34
|
+
- sequence:
|
35
|
+
source: contig1
|
36
|
+
reverse: true
|
37
|
+
"""
|
38
|
+
Given a file named "seq.fna" with:
|
39
|
+
"""
|
40
|
+
> contig1
|
41
|
+
AAAAAGGGGGCCCCCTTTTT
|
42
|
+
"""
|
43
|
+
Given a file named "anno.gff" with:
|
44
|
+
"""
|
45
|
+
##gff-version 3
|
46
|
+
contig1 . CDS 1 6 . + 1 ID=gene1
|
47
|
+
"""
|
48
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
49
|
+
Then the result should be:
|
50
|
+
"""
|
51
|
+
##gff-version 3
|
52
|
+
scaffold . CDS 15 20 . - 1 ID=gene1
|
53
|
+
"""
|
54
|
+
|
55
|
+
Scenario: An annotation in a start trimmed region of the sequence
|
56
|
+
Given a file named "scaf.yml" with:
|
57
|
+
"""
|
58
|
+
---
|
59
|
+
- sequence:
|
60
|
+
source: contig1
|
61
|
+
start: 5
|
62
|
+
"""
|
63
|
+
Given a file named "seq.fna" with:
|
64
|
+
"""
|
65
|
+
> contig1
|
66
|
+
AAAAAGGGGGCCCCCTTTTT
|
67
|
+
> insert1
|
68
|
+
TTTT
|
69
|
+
"""
|
70
|
+
Given a file named "anno.gff" with:
|
71
|
+
"""
|
72
|
+
##gff-version 3
|
73
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
74
|
+
"""
|
75
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
76
|
+
Then the result should be:
|
77
|
+
"""
|
78
|
+
##gff-version 3
|
79
|
+
"""
|
80
|
+
|
81
|
+
Scenario: An annotation inside a stop trimmed region of the sequence
|
82
|
+
Given a file named "scaf.yml" with:
|
83
|
+
"""
|
84
|
+
---
|
85
|
+
- sequence:
|
86
|
+
source: contig1
|
87
|
+
stop: 12
|
88
|
+
"""
|
89
|
+
Given a file named "seq.fna" with:
|
90
|
+
"""
|
91
|
+
> contig1
|
92
|
+
AAAAAGGGGGCCCCCTTTTT
|
93
|
+
> insert1
|
94
|
+
TTTT
|
95
|
+
"""
|
96
|
+
Given a file named "anno.gff" with:
|
97
|
+
"""
|
98
|
+
##gff-version 3
|
99
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
100
|
+
"""
|
101
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
102
|
+
Then the result should be:
|
103
|
+
"""
|
104
|
+
##gff-version 3
|
105
|
+
"""
|
106
|
+
|
107
|
+
Scenario: An annotation bordering a stop trimmed region of the sequence
|
108
|
+
Given a file named "scaf.yml" with:
|
109
|
+
"""
|
110
|
+
---
|
111
|
+
- sequence:
|
112
|
+
source: contig1
|
113
|
+
stop: 13
|
114
|
+
"""
|
115
|
+
Given a file named "seq.fna" with:
|
116
|
+
"""
|
117
|
+
> contig1
|
118
|
+
AAAAAGGGGGCCCCCTTTTT
|
119
|
+
> insert1
|
120
|
+
TTTT
|
121
|
+
"""
|
122
|
+
Given a file named "anno.gff" with:
|
123
|
+
"""
|
124
|
+
##gff-version 3
|
125
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
126
|
+
"""
|
127
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
128
|
+
Then the result should be:
|
129
|
+
"""
|
130
|
+
##gff-version 3
|
131
|
+
scaffold . CDS 4 13 . + 1 ID=gene1
|
132
|
+
"""
|
133
|
+
|
134
|
+
Scenario: An annotation bordering a start trimmed region of the sequence
|
135
|
+
Given a file named "scaf.yml" with:
|
136
|
+
"""
|
137
|
+
---
|
138
|
+
- sequence:
|
139
|
+
source: contig1
|
140
|
+
start: 4
|
141
|
+
"""
|
142
|
+
Given a file named "seq.fna" with:
|
143
|
+
"""
|
144
|
+
> contig1
|
145
|
+
AAAAAGGGGGCCCCCTTTTT
|
146
|
+
"""
|
147
|
+
Given a file named "anno.gff" with:
|
148
|
+
"""
|
149
|
+
##gff-version 3
|
150
|
+
contig1 . CDS 4 13 . + 1 ID=gene1
|
151
|
+
"""
|
152
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
153
|
+
Then the result should be:
|
154
|
+
"""
|
155
|
+
##gff-version 3
|
156
|
+
scaffold . CDS 1 10 . + 1 ID=gene1
|
157
|
+
"""
|
158
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
Feature: Parsing unresolved regions
|
2
|
+
In order to include unresolved regions in a scaffold
|
3
|
+
A user can use scaffold-annotation-locator
|
4
|
+
to update annotation coordinates with respect to unresolved regions
|
5
|
+
|
6
|
+
Scenario: Annotations on two contigs separated by an unresolved region
|
7
|
+
Given a file named "scaf.yml" with:
|
8
|
+
"""
|
9
|
+
---
|
10
|
+
- sequence:
|
11
|
+
source: contig1
|
12
|
+
- unresolved:
|
13
|
+
length: 10
|
14
|
+
- sequence:
|
15
|
+
source: contig2
|
16
|
+
"""
|
17
|
+
Given a file named "seq.fna" with:
|
18
|
+
"""
|
19
|
+
> contig1
|
20
|
+
AAAAAGGGGGCCCCCTTTTT
|
21
|
+
> contig2
|
22
|
+
AAAAAGGGGGCCCCCTTTTT
|
23
|
+
"""
|
24
|
+
Given a file named "anno.gff" with:
|
25
|
+
"""
|
26
|
+
##gff-version 3
|
27
|
+
contig1 . CDS 1 6 . + 1 ID=gene1
|
28
|
+
contig2 . CDS 1 6 . + 1 ID=gene2
|
29
|
+
"""
|
30
|
+
When I relocate the annotations using "scaf.yml", "seq.fna" and "anno.gff"
|
31
|
+
Then the result should be:
|
32
|
+
"""
|
33
|
+
##gff-version 3
|
34
|
+
scaffold . CDS 1 6 . + 1 ID=gene1
|
35
|
+
scaffold . CDS 31 36 . + 1 ID=gene2
|
36
|
+
"""
|
37
|
+
|
@@ -2,6 +2,8 @@ require 'delegate'
|
|
2
2
|
require 'scaffolder'
|
3
3
|
require 'bio'
|
4
4
|
|
5
|
+
require 'scaffolder/extensions'
|
6
|
+
|
5
7
|
class Scaffolder::AnnotationLocator < DelegateClass(Array)
|
6
8
|
|
7
9
|
def initialize(scaffold_file,sequence_file,gff_file)
|
@@ -10,37 +12,42 @@ class Scaffolder::AnnotationLocator < DelegateClass(Array)
|
|
10
12
|
@gff_file = gff_file
|
11
13
|
|
12
14
|
updated_records = Array.new
|
13
|
-
scaffold.inject(0) do |
|
15
|
+
scaffold.inject(0) do |prior_length,entry|
|
14
16
|
|
15
17
|
if entry.entry_type == :sequence
|
16
|
-
|
17
|
-
update_record(record,entry,length)
|
18
|
-
end
|
19
|
-
end
|
18
|
+
records[entry.source].each do |record|
|
20
19
|
|
21
|
-
|
22
|
-
|
20
|
+
# Don't include this record if it overlaps with an insert
|
21
|
+
next if record.overlap?(entry.inserts.map{|i| (i.open..i.close)})
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
# Skip this record it lies in the start or stop trimmed regions
|
24
|
+
next if record.start < entry.start
|
25
|
+
next if record.end > entry.stop
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
# Update record location by size differences of prior inserts
|
28
|
+
entry.inserts.select {|i| i.close < record.start }.each do |insert|
|
29
|
+
record.change_position_by insert.size_diff
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
record.start = scaffold_entry.sequence.length - (record.start - 1)
|
32
|
+
# Decrease record position by distance contig is trimmed at start
|
33
|
+
record.change_position_by(1 - entry.start)
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
# Reverse complement record positions if contig is reversed
|
36
|
+
record.reverse_complement_by entry.sequence.length if entry.reverse
|
37
|
+
|
38
|
+
# Increase record position by length of prior contigs
|
39
|
+
record.change_position_by prior_length
|
38
40
|
|
39
|
-
|
40
|
-
record.end += prior_length
|
41
|
+
record.seqname = "scaffold"
|
41
42
|
|
42
|
-
|
43
|
-
|
43
|
+
updated_records << record
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
prior_length + entry.sequence.length
|
48
|
+
end
|
49
|
+
|
50
|
+
super updated_records
|
44
51
|
end
|
45
52
|
|
46
53
|
def scaffold
|
@@ -55,8 +62,4 @@ class Scaffolder::AnnotationLocator < DelegateClass(Array)
|
|
55
62
|
end
|
56
63
|
end
|
57
64
|
|
58
|
-
def self.flip_strand(strand)
|
59
|
-
strand == '+' ? '-' : '+'
|
60
|
-
end
|
61
|
-
|
62
65
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Scaffolder::GffRecordHelper
|
2
|
+
|
3
|
+
def flip_strand
|
4
|
+
self.strand = (self.strand == '+' ? '-' : '+')
|
5
|
+
end
|
6
|
+
|
7
|
+
def change_position_by(distance)
|
8
|
+
self.start += distance
|
9
|
+
self.end += distance
|
10
|
+
end
|
11
|
+
|
12
|
+
def reverse_complement_by(distance)
|
13
|
+
self.end = distance - (self.end - 1)
|
14
|
+
self.start = distance - (self.start - 1)
|
15
|
+
|
16
|
+
self.end, self.start = self.start, self.end
|
17
|
+
self.flip_strand
|
18
|
+
end
|
19
|
+
|
20
|
+
def overlap?(*ranges)
|
21
|
+
ranges.flatten.any? do |range|
|
22
|
+
range.include?(self.start) || range.include?(self.end)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{scaffolder-annotation-locator}
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Michael Barton"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-06-10}
|
13
13
|
s.description = %q{Build a genome scaffold using scaffolder and a set of annotated contigs. This tool updates the locations of the contig annotations using the scaffolder tempalte as a base.}
|
14
14
|
s.email = %q{mail@michaelbarton.me.uk}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -23,12 +23,18 @@ Gem::Specification.new do |s|
|
|
23
23
|
"README.rdoc",
|
24
24
|
"Rakefile",
|
25
25
|
"VERSION",
|
26
|
-
"features/
|
26
|
+
"features/inserts.feature",
|
27
|
+
"features/multiple-contigs.feature",
|
28
|
+
"features/single-contig.feature",
|
27
29
|
"features/step_definitions/scaffolder-annotation-locator_steps.rb",
|
28
30
|
"features/support/env.rb",
|
31
|
+
"features/unresolved.feature",
|
29
32
|
"lib/scaffolder/annotation_locator.rb",
|
33
|
+
"lib/scaffolder/extensions.rb",
|
34
|
+
"lib/scaffolder/gff_record_helper.rb",
|
30
35
|
"scaffolder-annotation-locator.gemspec",
|
31
36
|
"spec/scaffolder/annotation_locator_spec.rb",
|
37
|
+
"spec/scaffolder/gff_record_helper_spec.rb",
|
32
38
|
"spec/spec_helper.rb",
|
33
39
|
"spec/support/gff_attribute_matcher.rb"
|
34
40
|
]
|
@@ -37,41 +43,36 @@ Gem::Specification.new do |s|
|
|
37
43
|
s.require_paths = ["lib"]
|
38
44
|
s.rubygems_version = %q{1.3.7}
|
39
45
|
s.summary = %q{Update locations of gff3 annotations from a scaffolder template}
|
40
|
-
s.test_files = [
|
41
|
-
"spec/scaffolder/annotation_locator_spec.rb",
|
42
|
-
"spec/spec_helper.rb",
|
43
|
-
"spec/support/gff_attribute_matcher.rb"
|
44
|
-
]
|
45
46
|
|
46
47
|
if s.respond_to? :specification_version then
|
47
48
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
48
49
|
s.specification_version = 3
|
49
50
|
|
50
51
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
51
|
-
s.add_runtime_dependency(%q<scaffolder>, ["
|
52
|
+
s.add_runtime_dependency(%q<scaffolder>, ["= 0.4.3"])
|
52
53
|
s.add_development_dependency(%q<bundler>, ["~> 1.0"])
|
53
54
|
s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
|
54
55
|
s.add_development_dependency(%q<rspec>, ["~> 2.4"])
|
55
|
-
s.add_development_dependency(%q<scaffolder-test-helpers>, ["= 0.
|
56
|
+
s.add_development_dependency(%q<scaffolder-test-helpers>, ["= 0.3.0"])
|
56
57
|
s.add_development_dependency(%q<cucumber>, ["~> 0.9"])
|
57
58
|
s.add_development_dependency(%q<aruba>, ["~> 0.2"])
|
58
59
|
s.add_development_dependency(%q<yard>, ["~> 0.6"])
|
59
60
|
else
|
60
|
-
s.add_dependency(%q<scaffolder>, ["
|
61
|
+
s.add_dependency(%q<scaffolder>, ["= 0.4.3"])
|
61
62
|
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
62
63
|
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
63
64
|
s.add_dependency(%q<rspec>, ["~> 2.4"])
|
64
|
-
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.
|
65
|
+
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.3.0"])
|
65
66
|
s.add_dependency(%q<cucumber>, ["~> 0.9"])
|
66
67
|
s.add_dependency(%q<aruba>, ["~> 0.2"])
|
67
68
|
s.add_dependency(%q<yard>, ["~> 0.6"])
|
68
69
|
end
|
69
70
|
else
|
70
|
-
s.add_dependency(%q<scaffolder>, ["
|
71
|
+
s.add_dependency(%q<scaffolder>, ["= 0.4.3"])
|
71
72
|
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
72
73
|
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
73
74
|
s.add_dependency(%q<rspec>, ["~> 2.4"])
|
74
|
-
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.
|
75
|
+
s.add_dependency(%q<scaffolder-test-helpers>, ["= 0.3.0"])
|
75
76
|
s.add_dependency(%q<cucumber>, ["~> 0.9"])
|
76
77
|
s.add_dependency(%q<aruba>, ["~> 0.2"])
|
77
78
|
s.add_dependency(%q<yard>, ["~> 0.6"])
|
@@ -5,7 +5,7 @@ describe Scaffolder::AnnotationLocator do
|
|
5
5
|
def relocate(scaffold,records)
|
6
6
|
@scaffold_file, @sequence_file = generate_scaffold_files(scaffold)
|
7
7
|
described_class.new(@scaffold_file.path, @sequence_file.path,
|
8
|
-
generate_gff3_file(records))
|
8
|
+
generate_gff3_file(records).path)
|
9
9
|
end
|
10
10
|
|
11
11
|
before do
|
@@ -73,6 +73,115 @@ describe Scaffolder::AnnotationLocator do
|
|
73
73
|
|
74
74
|
end
|
75
75
|
|
76
|
+
describe "with an insert before an annotation" do
|
77
|
+
|
78
|
+
subject do
|
79
|
+
relocate([@contig.clone.inserts(:open => 1, :close => 2, :sequence => 'TTT')],
|
80
|
+
[@record])
|
81
|
+
end
|
82
|
+
|
83
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
84
|
+
it{ should set_the_attribute(:phase => 1) }
|
85
|
+
it{ should set_the_attribute(:strand => '+') }
|
86
|
+
|
87
|
+
it{ should set_the_attribute(:start => 5).only_for_the(:first) }
|
88
|
+
it{ should set_the_attribute(:end => 7).only_for_the(:first) }
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
describe "with an insert after an annotation" do
|
93
|
+
|
94
|
+
subject do
|
95
|
+
relocate([@contig.clone.
|
96
|
+
inserts(:open => 7, :close => 8, :sequence => 'TTT').
|
97
|
+
sequence('ATGTTTCCC')],
|
98
|
+
[@record])
|
99
|
+
end
|
100
|
+
|
101
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
102
|
+
it{ should set_the_attribute(:phase => 1) }
|
103
|
+
it{ should set_the_attribute(:strand => '+') }
|
104
|
+
|
105
|
+
it{ should set_the_attribute(:start => 4).only_for_the(:first) }
|
106
|
+
it{ should set_the_attribute(:end => 6).only_for_the(:first) }
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "with an insert before and after an annotation" do
|
111
|
+
|
112
|
+
subject do
|
113
|
+
relocate([@contig.clone.
|
114
|
+
inserts(:open => 1, :close => 2, :sequence => 'TTT').
|
115
|
+
inserts(:open => 7, :close => 8, :sequence => 'TTT').
|
116
|
+
sequence('ATGTTTCCC')],
|
117
|
+
[@record])
|
118
|
+
end
|
119
|
+
|
120
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
121
|
+
it{ should set_the_attribute(:phase => 1) }
|
122
|
+
it{ should set_the_attribute(:strand => '+') }
|
123
|
+
|
124
|
+
it{ should set_the_attribute(:start => 5).only_for_the(:first) }
|
125
|
+
it{ should set_the_attribute(:end => 7).only_for_the(:first) }
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
describe "reversed with an insert before an annotation" do
|
130
|
+
|
131
|
+
subject do
|
132
|
+
contig = @contig.clone.
|
133
|
+
reverse(true).
|
134
|
+
inserts(:open => 1, :close => 2, :sequence => 'TTT')
|
135
|
+
relocate([contig],[@record])
|
136
|
+
end
|
137
|
+
|
138
|
+
it{ should set_the_attribute(:seqname => 'scaffold') }
|
139
|
+
it{ should set_the_attribute(:phase => 1) }
|
140
|
+
it{ should set_the_attribute(:strand => '-') }
|
141
|
+
|
142
|
+
it{ should set_the_attribute(:start => 1).only_for_the(:first) }
|
143
|
+
it{ should set_the_attribute(:end => 3).only_for_the(:first) }
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "with an insert overlapping with an annotation" do
|
148
|
+
|
149
|
+
subject do
|
150
|
+
relocate([@contig.clone.
|
151
|
+
inserts(:open => 3, :close => 5, :sequence => 'TTT')],
|
152
|
+
[@record])
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should not include this annotation" do
|
156
|
+
subject.should be_empty
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
describe "with an annotation in a start trimmed region" do
|
162
|
+
|
163
|
+
subject do
|
164
|
+
relocate([@contig.clone.start(5)],[@record])
|
165
|
+
end
|
166
|
+
|
167
|
+
it "should not include this annotation" do
|
168
|
+
subject.should be_empty
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
describe "with an annotation in a stop trimmed region" do
|
174
|
+
|
175
|
+
subject do
|
176
|
+
relocate([@contig.clone.stop(5)],[@record])
|
177
|
+
end
|
178
|
+
|
179
|
+
it "should not include this annotation" do
|
180
|
+
subject.should be_empty
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
76
185
|
end
|
77
186
|
|
78
187
|
describe "relocating two contigs" do
|
@@ -203,16 +312,4 @@ describe Scaffolder::AnnotationLocator do
|
|
203
312
|
|
204
313
|
end
|
205
314
|
|
206
|
-
describe "#flip_strand" do
|
207
|
-
|
208
|
-
it "should return '+' when passed '-'" do
|
209
|
-
described_class.flip_strand('+').should == '-'
|
210
|
-
end
|
211
|
-
|
212
|
-
it "should return '-' when passed '+'" do
|
213
|
-
described_class.flip_strand('-').should == '+'
|
214
|
-
end
|
215
|
-
|
216
|
-
end
|
217
|
-
|
218
315
|
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
|
2
|
+
|
3
|
+
describe Scaffolder::GffRecordHelper do
|
4
|
+
|
5
|
+
it "should be included in Bio::GFF::GFF3::Record" do
|
6
|
+
Bio::GFF::GFF3::Record.ancestors.should include(described_class)
|
7
|
+
end
|
8
|
+
|
9
|
+
subject do
|
10
|
+
Bio::GFF::GFF3::Record.new(nil,nil,'CDS',1,3,nil,'+')
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#flip_strand" do
|
14
|
+
|
15
|
+
it "should change strand to '-' when flipped from '+'" do
|
16
|
+
subject.flip_strand
|
17
|
+
subject.strand.should == '-'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should change strand to '+' when flipped from '-'" do
|
21
|
+
subject.strand = '-'
|
22
|
+
subject.flip_strand
|
23
|
+
subject.strand.should == '+'
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#change_position_by" do
|
29
|
+
|
30
|
+
before do
|
31
|
+
subject.change_position_by 3
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should increase start position" do
|
35
|
+
subject.start.should == 4
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should increase end position" do
|
39
|
+
subject.end.should == 6
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "#reverse_complement_by" do
|
45
|
+
|
46
|
+
before do
|
47
|
+
subject.reverse_complement_by 7
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should increase start position" do
|
51
|
+
subject.start.should == 5
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should increase end position" do
|
55
|
+
subject.end.should == 7
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should flip the stand" do
|
59
|
+
subject.strand.should == '-'
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "#overlap?" do
|
65
|
+
|
66
|
+
it "should return false when no overlap with a single insert" do
|
67
|
+
subject.overlap?(4..6).should be_false
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should return true when overlapping with a single insert" do
|
71
|
+
subject.overlap?(0..1).should be_true
|
72
|
+
subject.overlap?(3..4).should be_true
|
73
|
+
subject.overlap?(2..4).should be_true
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should return false when no overlap with a multiple inserts" do
|
77
|
+
subject.overlap?([4..6,7..9]).should be_false
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should return true when overlapping with one of two inserts" do
|
81
|
+
subject.overlap?([0..1,4..6]).should be_true
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scaffolder-annotation-locator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Michael Barton
|
@@ -15,20 +15,21 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-10 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
requirement: &id001 !ruby/object:Gem::Requirement
|
23
23
|
none: false
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - "="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
hash:
|
27
|
+
hash: 9
|
28
28
|
segments:
|
29
29
|
- 0
|
30
30
|
- 4
|
31
|
-
|
31
|
+
- 3
|
32
|
+
version: 0.4.3
|
32
33
|
type: :runtime
|
33
34
|
name: scaffolder
|
34
35
|
prerelease: false
|
@@ -87,9 +88,9 @@ dependencies:
|
|
87
88
|
hash: 19
|
88
89
|
segments:
|
89
90
|
- 0
|
90
|
-
-
|
91
|
-
-
|
92
|
-
version: 0.
|
91
|
+
- 3
|
92
|
+
- 0
|
93
|
+
version: 0.3.0
|
93
94
|
type: :development
|
94
95
|
name: scaffolder-test-helpers
|
95
96
|
prerelease: false
|
@@ -155,12 +156,18 @@ files:
|
|
155
156
|
- README.rdoc
|
156
157
|
- Rakefile
|
157
158
|
- VERSION
|
158
|
-
- features/
|
159
|
+
- features/inserts.feature
|
160
|
+
- features/multiple-contigs.feature
|
161
|
+
- features/single-contig.feature
|
159
162
|
- features/step_definitions/scaffolder-annotation-locator_steps.rb
|
160
163
|
- features/support/env.rb
|
164
|
+
- features/unresolved.feature
|
161
165
|
- lib/scaffolder/annotation_locator.rb
|
166
|
+
- lib/scaffolder/extensions.rb
|
167
|
+
- lib/scaffolder/gff_record_helper.rb
|
162
168
|
- scaffolder-annotation-locator.gemspec
|
163
169
|
- spec/scaffolder/annotation_locator_spec.rb
|
170
|
+
- spec/scaffolder/gff_record_helper_spec.rb
|
164
171
|
- spec/spec_helper.rb
|
165
172
|
- spec/support/gff_attribute_matcher.rb
|
166
173
|
has_rdoc: true
|
@@ -197,7 +204,5 @@ rubygems_version: 1.3.7
|
|
197
204
|
signing_key:
|
198
205
|
specification_version: 3
|
199
206
|
summary: Update locations of gff3 annotations from a scaffolder template
|
200
|
-
test_files:
|
201
|
-
|
202
|
-
- spec/spec_helper.rb
|
203
|
-
- spec/support/gff_attribute_matcher.rb
|
207
|
+
test_files: []
|
208
|
+
|