bio-assembly 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/LICENSE.txt +1 -1
- data/README.rdoc +43 -3
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bio-assembly.gemspec +13 -5
- data/data/example.caf +2755 -0
- data/lib/bio-assembly.rb +4 -14
- data/lib/bio-assembly/ace.rb +11 -19
- data/lib/bio-assembly/caf.rb +124 -0
- data/lib/bio-assembly/contig.rb +5 -1
- data/lib/bio-assembly/maf.rb +15 -0
- data/lib/bio-assembly/read.rb +4 -0
- data/test/{test_bio-assembly.rb → test_bio-assembly-ace.rb} +15 -7
- data/test/test_bio-assembly-caf.rb +54 -0
- metadata +47 -25
data/lib/bio-assembly.rb
CHANGED
@@ -5,11 +5,10 @@ require 'bio-assembly/read'
|
|
5
5
|
module Bio
|
6
6
|
|
7
7
|
class Assembly
|
8
|
-
|
9
|
-
|
8
|
+
|
10
9
|
@@formats = { }
|
11
10
|
|
12
|
-
def self.
|
11
|
+
def self.open(path, format)
|
13
12
|
streamer = @@formats[format]
|
14
13
|
if streamer
|
15
14
|
streamer.new(path)
|
@@ -22,21 +21,11 @@ module Bio
|
|
22
21
|
@@formats[name] = self
|
23
22
|
end
|
24
23
|
|
25
|
-
def contigs
|
26
|
-
# use each_contig to stream large files
|
27
|
-
parse_whole_file if @contigs.empty?
|
28
|
-
@contigs
|
29
|
-
end
|
30
|
-
|
31
24
|
def each_contig
|
32
25
|
# implemented by each format subclass
|
33
26
|
end
|
34
27
|
|
35
28
|
private
|
36
|
-
|
37
|
-
def num_contigs
|
38
|
-
contigs.size
|
39
|
-
end
|
40
29
|
|
41
30
|
def num_reads
|
42
31
|
read_num = 0
|
@@ -52,4 +41,5 @@ module Bio
|
|
52
41
|
|
53
42
|
end
|
54
43
|
|
55
|
-
require 'bio-assembly/ace'
|
44
|
+
require 'bio-assembly/ace'
|
45
|
+
require 'bio-assembly/caf'
|
data/lib/bio-assembly/ace.rb
CHANGED
@@ -9,22 +9,15 @@ class Assembly
|
|
9
9
|
|
10
10
|
def initialize(path)
|
11
11
|
@file = File.new(path, 'r')
|
12
|
-
@contigs = Array.new
|
13
12
|
parse_as
|
14
13
|
end
|
15
14
|
|
16
15
|
def each_contig
|
17
|
-
# check if file is already parsed
|
18
|
-
if @total_num_contigs.to_i == @contigs.size
|
19
|
-
@contigs.each{ |contig| yield contig }
|
20
|
-
else
|
21
16
|
each_identifier do |identifier, attrs|
|
22
17
|
next unless identifier == 'CO'
|
23
18
|
contig = parse_contig(attrs)
|
24
|
-
@contigs.push contig
|
25
19
|
yield(contig)
|
26
20
|
end
|
27
|
-
end
|
28
21
|
end
|
29
22
|
|
30
23
|
def to_ace
|
@@ -35,8 +28,9 @@ class Assembly
|
|
35
28
|
end
|
36
29
|
|
37
30
|
private
|
31
|
+
|
38
32
|
def parse_contig(attrs)
|
39
|
-
contig =
|
33
|
+
contig = Contig.new
|
40
34
|
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
41
35
|
# keep track of the number of RD identifiers parsed
|
42
36
|
@num_rds_parsed = 0
|
@@ -83,7 +77,7 @@ class Assembly
|
|
83
77
|
|
84
78
|
# parse read meta data
|
85
79
|
def parse_af(contig, attrs)
|
86
|
-
read =
|
80
|
+
read = Read.new
|
87
81
|
read.name , read.orientation, read.from = attrs.split(" ")
|
88
82
|
contig.add_read read
|
89
83
|
end
|
@@ -139,11 +133,9 @@ class Assembly
|
|
139
133
|
# parse run meta data - ignored
|
140
134
|
def parse_ct(contig, attrs)
|
141
135
|
end
|
142
|
-
|
143
|
-
end # => end class Ace
|
144
136
|
|
145
|
-
#
|
146
|
-
|
137
|
+
# extend contig class and write ace specific methods for contig objects
|
138
|
+
class Contig < Bio::Assembly::Contig
|
147
139
|
|
148
140
|
def to_ace
|
149
141
|
ace = ""
|
@@ -176,10 +168,10 @@ class Assembly
|
|
176
168
|
ace
|
177
169
|
end
|
178
170
|
|
179
|
-
|
171
|
+
end # => end Contig class
|
180
172
|
|
181
|
-
|
182
|
-
|
173
|
+
# extend Read class to add ace specific methods for read objects
|
174
|
+
class Read < Bio::Assembly::Read
|
183
175
|
|
184
176
|
attr_accessor :base_sequences
|
185
177
|
|
@@ -238,7 +230,7 @@ class Assembly
|
|
238
230
|
end
|
239
231
|
|
240
232
|
def <=>(other)
|
241
|
-
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
233
|
+
unless other.kind_of?(Bio::Assembly::Ace::Read::BaseSequence)
|
242
234
|
raise "[Error] markers are not comparable"
|
243
235
|
end
|
244
236
|
if self.from == other.from
|
@@ -251,8 +243,8 @@ class Assembly
|
|
251
243
|
|
252
244
|
end # => end BaseSequence Class
|
253
245
|
|
254
|
-
|
255
|
-
|
246
|
+
end # => end Read Class
|
247
|
+
end # => end class Ace
|
256
248
|
|
257
249
|
end # => end class Assembly
|
258
250
|
end # => end module Bio
|
@@ -0,0 +1,124 @@
|
|
1
|
+
module Bio
|
2
|
+
class Assembly
|
3
|
+
class Caf < Bio::Assembly
|
4
|
+
|
5
|
+
# register parser with superclass
|
6
|
+
register_parser :caf
|
7
|
+
|
8
|
+
def initialize(path)
|
9
|
+
@file = File.new(path, 'r')
|
10
|
+
end
|
11
|
+
# iterator that return one contig at a time
|
12
|
+
def each_contig
|
13
|
+
contig = Contig.new
|
14
|
+
feature = Hash.new
|
15
|
+
@file.each do |line|
|
16
|
+
feature = parse_blocks(line,feature) # search the file for CAF blocks like DNA and Sequence
|
17
|
+
if feature[:type] == :read and feature[:parsed]
|
18
|
+
read = convert_to_read(feature)
|
19
|
+
contig.add_read(read)
|
20
|
+
feature = Hash.new
|
21
|
+
elsif feature[:type] == :contig and feature[:parsed]
|
22
|
+
contig = convert_to_contig(contig,feature)
|
23
|
+
yield contig
|
24
|
+
contig = Contig.new
|
25
|
+
feature = Hash.new
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Contig < Bio::Assembly::Contig
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
class Read < Bio::Assembly::Read
|
35
|
+
attr_accessor :quality
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_blocks(line,feat)
|
41
|
+
keywords = line.split("\s")
|
42
|
+
case keywords[0]
|
43
|
+
when "DNA" then parse_dna(feat)
|
44
|
+
when "Sequence" then parse_seq(feat,line)
|
45
|
+
end
|
46
|
+
return feat
|
47
|
+
end
|
48
|
+
|
49
|
+
# parse DNA sequence and BaseQuality
|
50
|
+
def parse_dna(feat)
|
51
|
+
feat[:seq] = @file.gets("\n\n").tr("\n","")
|
52
|
+
newline = @file.gets
|
53
|
+
keywords = newline.split("\s")
|
54
|
+
feat[:qual] = @file.gets("\n\n").tr("\n"," ").rstrip if keywords[0] == "BaseQuality"
|
55
|
+
feat[:parsed] = true if feat[:type] == :contig
|
56
|
+
end
|
57
|
+
|
58
|
+
# parse Sequence information like Name, Clipping, Strand and Type
|
59
|
+
def parse_seq(feat,line)
|
60
|
+
feat[:name] = line.split(":")[1].tr("\s|\n","")
|
61
|
+
sequence_block = @file.gets("\n\n")
|
62
|
+
sequence_block.split("\n").each do |l|
|
63
|
+
keywords = l.split("\s")
|
64
|
+
case keywords[0]
|
65
|
+
when "Clipping" then parse_clipping(feat,l)
|
66
|
+
when "Strand" then parse_strand(feat,l)
|
67
|
+
when "Assembled_from" then parse_af(feat,l)
|
68
|
+
when "Is_read" then feat[:type] = :read
|
69
|
+
when "Is_contig" then feat[:type] = :contig
|
70
|
+
end
|
71
|
+
end
|
72
|
+
feat[:parsed] = true if feat[:type] == :read
|
73
|
+
end
|
74
|
+
|
75
|
+
# parse read coordinates for quality clipping
|
76
|
+
def parse_clipping(feat,line)
|
77
|
+
val = line.chomp.split("\s")
|
78
|
+
feat[:clipping_start] = val[-2]
|
79
|
+
feat[:clipping_end] = val[-1]
|
80
|
+
end
|
81
|
+
|
82
|
+
# parse sequence strand information
|
83
|
+
def parse_strand(feat,line)
|
84
|
+
feat[:orientation] = line.split("\s")[1].tr("\n","")
|
85
|
+
end
|
86
|
+
|
87
|
+
# parse Assembled_from lines in Contig. These lines also include read alignment positions within the contig
|
88
|
+
def parse_af(feat,line)
|
89
|
+
if feat[:af].nil?
|
90
|
+
feat[:af] = [line]
|
91
|
+
else
|
92
|
+
feat[:af] << line
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# convert a generic feature into a Caf::Read object
|
97
|
+
def convert_to_read(feature)
|
98
|
+
read = Read.new
|
99
|
+
read.name = feature[:name]
|
100
|
+
read.seq = feature[:seq]
|
101
|
+
read.quality = feature[:qual]
|
102
|
+
read.clear_range_from = feature[:clipping_start]
|
103
|
+
read.clear_range_to = feature[:clipping_end]
|
104
|
+
read.orientation = feature[:orientation]
|
105
|
+
return read
|
106
|
+
end
|
107
|
+
|
108
|
+
# convert a generic feature into a Caf::Contig object
|
109
|
+
def convert_to_contig(contig,feature)
|
110
|
+
contig.name = feature[:name]
|
111
|
+
contig.seq = feature[:seq]
|
112
|
+
contig.quality = feature[:qual]
|
113
|
+
# assign reads ranges using Assembled_from lines in Contig
|
114
|
+
feature[:af].each do |af|
|
115
|
+
val = af.split("\s")
|
116
|
+
contig.reads[val[-5]].from = val[-4]
|
117
|
+
contig.reads[val[-5]].to = val[-3]
|
118
|
+
end
|
119
|
+
return contig
|
120
|
+
end
|
121
|
+
|
122
|
+
end # end Caf
|
123
|
+
end # end Assembly
|
124
|
+
end # end Bio
|
data/lib/bio-assembly/contig.rb
CHANGED
@@ -10,7 +10,7 @@ module Bio
|
|
10
10
|
def initialize(str="")
|
11
11
|
@reads = Hash.new
|
12
12
|
@seq = Bio::Sequence::NA.new(str)
|
13
|
-
# counter for
|
13
|
+
# counter for Reads
|
14
14
|
@rds_parsed = 0
|
15
15
|
end
|
16
16
|
|
@@ -54,6 +54,10 @@ module Bio
|
|
54
54
|
seq.length
|
55
55
|
end
|
56
56
|
|
57
|
+
def seq=(str)
|
58
|
+
@seq = Bio::Sequence::NA.new(str)
|
59
|
+
end
|
60
|
+
|
57
61
|
def num_base_segments
|
58
62
|
num_base_sequences = 0
|
59
63
|
each_read do |read|
|
data/lib/bio-assembly/read.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
|
-
class
|
3
|
+
class TestBioAssemblyAce < Test::Unit::TestCase
|
4
4
|
|
5
5
|
def setup
|
6
6
|
ace_filename = File.join('data', 'example1.ace')
|
7
|
-
@obj = Bio::Assembly.
|
7
|
+
@obj = Bio::Assembly.open(ace_filename, :ace)
|
8
8
|
|
9
9
|
# pick a contig to do in depth tests on
|
10
10
|
@contig = nil
|
11
|
-
@
|
11
|
+
@tot_contig = []
|
12
|
+
@obj.each_contig do |c|
|
13
|
+
@contig = c if c.name.to_i == 5
|
14
|
+
@tot_contig << c
|
15
|
+
end
|
12
16
|
|
13
17
|
# pick a read to do in depth tests on
|
14
18
|
@read = nil
|
@@ -17,13 +21,13 @@ class TestBioAssembly < Test::Unit::TestCase
|
|
17
21
|
|
18
22
|
def test_num_contigs_parsed
|
19
23
|
contigs_parsed = 13
|
20
|
-
assert_equal(contigs_parsed, @
|
24
|
+
assert_equal(contigs_parsed, @tot_contig.size)
|
21
25
|
end
|
22
26
|
|
23
27
|
def test_num_reads_parsed
|
24
28
|
reads_parsed_known = 1760
|
25
29
|
reads_parsed = 0
|
26
|
-
@
|
30
|
+
@tot_contig.each { |c| reads_parsed += c.reads.size }
|
27
31
|
assert_equal(reads_parsed_known, reads_parsed)
|
28
32
|
end
|
29
33
|
|
@@ -34,12 +38,16 @@ class TestBioAssembly < Test::Unit::TestCase
|
|
34
38
|
|
35
39
|
def test_contig_seq
|
36
40
|
seq = "TTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTA*TTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTTAATTTCAAGATGATTCACAGGTTTGTTGCCTCAAAAGAAAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGACAAGTTGACTCAACTTTTCAC*TTTATGTTATATTGTAAGGATGTGACTTTGTTTTGGAAAATTATATTTAATTTGATAATTAACCAATATAAAAAAGATAAACCAAAAGCTATAAGTCGTAAATAAGGACATTGGAAACAAGAAATATTCTCTCCTGAACATTATTTTAAATTATGCGCAATATGCAAATTTATAAGTGTTAAGTTAAAAAGATTGTTAATGGTTCTGTTTATTACCCAAAGACTTTTTTAAAGTTTAAGTCGTTGCTAAGAGTGCAGCGTTTAGACAAATAAAAATGCAATAATCTTCTCGCTCGGGAGCTATGTCCCTCGCATAATATTCTTCAAAGTGTACAGTAAATATTCTAGAAAAGTGAAGTGTGAAAAAGATATATTGCTTGTTTTTATATTTTGTTAATACAACAAAACTTCAAAAACCTGCGGTGGGGGGGGGGGGATAGTCACTTCCGTCACCTTCACCCCTCTCGTTCACTATACTCCCTCGCCCTGGCGTAATGATGGGGGGATTGGGGGTAGTTGCCCCTTAATAAAGTTCAAACTTGATTTATTTCTAACTCGATACCAGTGATTTACAAATGTTTCTGAAATGGCATGGTTTTCCCTAATAAATGCCTAAAAACCCTGAGCTGAGCCCACGCCAATT"
|
37
|
-
assert_equal(seq, @contig.seq.to_s)
|
41
|
+
assert_equal(seq, @contig.seq.to_s.upcase)
|
42
|
+
assert_instance_of(Bio::Sequence::NA,@contig.seq)
|
43
|
+
assert_instance_of(Bio::Assembly::Ace::Contig,@contig)
|
38
44
|
end
|
39
45
|
|
40
46
|
def test_read_seq
|
41
47
|
read_seq = 'GAAAAAAAAAGGCAGAAGTTTAATCAAAACGGATTTTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTATTTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTT*ATTTC*AGATGATTCACAGGTTTGTTGCCTCAAAAG*AAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGGACAAGTTGACTTCACCT'
|
42
|
-
assert_equal(read_seq, @read.seq.to_s)
|
48
|
+
assert_equal(read_seq, @read.seq.to_s.upcase)
|
49
|
+
assert_instance_of(Bio::Sequence::NA,@read.seq)
|
50
|
+
assert_instance_of(Bio::Assembly::Ace::Read,@read)
|
43
51
|
end
|
44
52
|
|
45
53
|
def test_read_range
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBioAssemblyCaf < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
path = File.join('data','example.caf')
|
7
|
+
@caf = Bio::Assembly.open(path,:caf)
|
8
|
+
@contigs = []
|
9
|
+
@caf.each_contig {|c| @contigs << c}
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_contigs
|
13
|
+
assert_equal(2,@contigs.size)
|
14
|
+
assert_equal('Contig1',@contigs[0].name)
|
15
|
+
assert_equal('Contig2',@contigs[1].name)
|
16
|
+
assert_instance_of(Bio::Assembly::Caf::Contig,@contigs[0])
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_read_per_contig
|
20
|
+
assert_equal(21,@contigs[0].reads.size)
|
21
|
+
assert_equal(21,@contigs[0].num_reads)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_contig_seq
|
25
|
+
seq = "TTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATANACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATNNNNNCACTTTGGGAGGCCAGTGCGGGTGAATCACGANGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAANCACCGTCTCTACTAAAATAAAAAGAAATTAGCTGGGTGTCGTGGCAGGTGCNTGTAATCCCAGCCACTTGGGAAGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCCGAGATCACGCGACTGCACTCCAGCCCAACCAATAGTGTGAGACTCTGTCTCGAAAAAAAAAAAGCAGAGACAAGACNACTAGTACAGTACTTACAGGGTTATTATGATGATTAAATGAGAGAATAGCTGTGAGGTGATTGATATAGTGCTGTGCTTAATACAAACTATCATTTTATTATACGGGTTGAGTGTNTCTAATCTGAAAATCCAAAATTAGAAATGCTCTACAGTCTGAAACTTTTTTGAGCACCGACCTAATGTTCAAAGGAAGTGCTTATTGGAGCATTATGGGTTGTTAGATTTTTGGGTTGGGAATATTCAACCAGTAAGTACTATAAAATGCAAATATTCCAAAAAAAATCTGAAATCTGAAACATTTCTGGTCCTAAGCAAGCATTTTGCAAAGGGATACGCAACCTGTAGTACGTTCTTTATCATTGTTTTAAGTAGTTAATATATTGTGGTACAGATTCTGAGGTGGTATAGCAAATTCGATTGTATTATTAAAAAGCATATTTATATTTTGAGAGCTTGCTTAGGATTATTGGAGAGAATAAAACAGTGAAGCTTTGGTGTTATGAGGGAATTTTAGATAGAAAAGTGCAGTTTTTCAGTTCATGCTCTTTCATTTTTTACTCCCTCAGGTTAAAGCTNGAAGCTCAACAAAGATATAGTGATCTCTGTGGGCATTTATAATCTGGTCCAGAAGGCTCTNNANNCNNNTCCNNNNNNNCTNNANNNNNNNACAAATGAACCAGTGAAAACCAAGACCCGGACCTTTAATACAAGTACAGGCGGTTTGCTTCTGCCTAGCGATACCAAGAGGTCTCAGGTAGGTAGAGATGCCTTTTGTTGTTGTTGTTTTTGAGACAGGGTCTCATTGTGTCGCCCAGGCTGGAGTGCAGTGGGGCGAACATGACTCGCTACAGCCTTGACCTCCTGGACTCAAGCGATCCTTCTGTCTCAGCCTCCCAAGTAGCTGGGATCACAGGCATGTGACATCACACCCAGCTAATTTATTTATTTATTTATTTTTTAAGAGACTGGATCGACTGGGCACAGTGGCTCATGCCTGTAATCCCANCACTTTGGGAGGCCGAGGCAGGTGGATTACCGAGGTCAGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCATCTCTACTAAAAATACAAAATGAGCTGGGCATGGTGGTGCATGCCTGTAATCC"
|
26
|
+
assert_equal(seq,@contigs[0].seq.to_s.upcase)
|
27
|
+
assert_instance_of(Bio::Sequence::NA,@contigs[0].seq)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_contig_qual
|
31
|
+
qual = "4 4 8 4 6 10 13 21 24 25 33 33 33 30 27 21 15 27 19 30 30 33 33 30 21 21 10 9 17 11 27 27 37 38 33 35 35 35 35 44 45 45 38 37 37 45 45 45 43 43 43 45 45 45 45 45 45 45 21 21 23 30 30 34 37 37 38 45 32 45 37 41 37 45 30 45 45 34 34 34 32 29 22 32 32 45 45 45 33 28 28 37 37 34 34 34 35 34 34 34 34 34 34 37 37 40 41 37 34 37 37 37 32 24 22 27 29 25 27 20 21 21 21 27 45 41 40 40 40 42 34 34 37 41 45 51 45 45 37 30 37 41 41 37 36 28 30 30 30 22 33 33 35 33 33 41 51 51 45 39 39 39 30 28 33 34 34 39 34 41 34 34 41 33 33 39 39 39 33 33 33 30 33 33 33 30 29 19 19 24 25 32 33 45 36 36 36 36 41 41 30 30 30 37 37 43 51 51 51 51 45 37 37 30 37 37 37 51 51 51 51 51 37 37 28 28 10 10 10 13 10 10 10 9 9 9 21 21 28 37 37 37 33 33 33 33 33 33 34 34 34 33 33 33 33 21 25 25 22 22 29 29 33 33 33 33 31 31 27 28 17 23 23 28 26 24 32 10 10 10 15 15 26 32 32 37 45 45 32 45 33 26 24 32 37 37 35 34 37 35 37 34 37 37 40 37 37 45 45 38 38 45 45 40 37 37 37 34 30 30 28 28 28 30 37 37 34 34 34 34 34 33 31 31 25 21 19 22 21 25 30 32 32 30 22 22 25 28 24 25 26 17 17 23 27 27 33 31 31 34 38 38 38 38 34 26 17 15 8 8 11 16 25 31 32 33 29 26 32 29 33 34 34 34 36 33 34 34 31 27 31 34 38 45 45 36 37 37 37 36 36 38 34 32 28 25 22 22 21 17 18 29 29 23 23 22 27 25 20 18 11 11 15 15 16 23 21 17 19 24 24 24 31 31 31 33 33 33 31 33 33 29 29 29 29 29 31 31 31 23 24 19 12 10 16 10 10 20 10 10 13 15 30 23 29 23 28 18 10 10 16 21 18 19 19 24 24 12 11 9 9 10 21 23 31 31 33 28 28 14 17 17 28 21 30 24 30 28 22 26 23 19 10 10 12 23 22 23 19 12 10 9 9 10 19 24 29 30 34 34 34 34 34 29 25 25 32 26 31 31 32 20 17 15 12 12 4 4 4 17 23 33 30 24 18 13 18 16 12 16 8 9 9 19 15 15 4 4 4 7 11 6 6 6 7 8 9 17 12 13 19 23 25 13 13 13 19 21 29 32 30 26 20 20 12 9 9 8 9 8 8 15 24 17 15 8 8 9 17 16 4 4 4 8 8 13 15 23 14 9 8 8 10 10 17 23 21 17 12 13 22 20 19 15 11 11 9 8 8 8 8 8 8 10 9 9 7 8 8 10 12 10 10 12 4 4 4 4 4 4 10 10 10 10 10 9 8 8 7 8 9 11 9 9 4"
|
32
|
+
assert_equal(qual,@contigs[1].quality)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_read
|
36
|
+
read = nil
|
37
|
+
assert_nothing_raised do
|
38
|
+
read = @contigs[0].find_read_by_name("22ak93c2.r1t")
|
39
|
+
end
|
40
|
+
seq = "GTCGCNCATAAGATTACGAGATCTCGAGCTCGGTACCCTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATAGACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATCCCANCACTTTGGGAGGCCAGTGCGGGTGAATCACGAAGTCAGGAGTTCAAGACCACCCTGGCCAACATGGTGTAACACCTGCTCTCCTAAAATTAAACAAAATTTCATGGTTTGCGTGGGCCGTCTTGTCTCATCACTTCACTCCTGAGGGCCGGCGCCGGAAAGATATCTTGATCTGCGGCGCTCCGACCGTTTTCTTTAAACCTTACAACTCCCGACCTCCTCGCCTATCCTCCCTAAATCCTCGCCAGGCTCGCCTGCTTCAGCCACTCTTTCCTTCGCACCCTCCCCTCTCTTCAATATACTTCACCCGCCCATCCTTCACGCCGGCACGTATCCAATCTCTTCTTATCTTTCCGTATCCAANTCCCTTCTCCCTCTGCCGCGACCTTCGCCATCCCTCTGCGCGTCCTCTTCC"
|
41
|
+
assert_equal(seq,read.seq.to_s.upcase)
|
42
|
+
assert_instance_of(Bio::Sequence::NA,read.seq)
|
43
|
+
qual = "4 4 8 4 4 4 4 4 4 4 4 4 6 8 17 21 14 7 6 6 6 7 7 6 8 14 16 21 15 20 20 24 26 21 18 18 14 14 19 23 10 8 8 15 20 16 29 26 34 29 39 29 31 29 31 34 32 27 27 25 19 19 24 31 33 36 34 34 34 26 27 22 32 32 36 28 28 15 15 15 28 28 34 30 12 12 22 27 31 31 31 31 31 23 24 27 21 24 24 29 27 27 27 34 34 36 38 38 38 36 36 40 36 37 38 45 45 36 34 33 31 31 34 34 33 33 28 28 27 23 24 11 11 10 10 18 25 21 20 17 17 17 20 15 24 18 24 26 23 23 18 20 25 23 30 30 30 33 33 37 37 32 37 37 32 45 35 37 37 37 40 36 49 49 36 36 34 33 20 15 9 9 8 7 12 22 21 28 28 30 33 36 36 36 34 31 31 25 31 28 26 26 24 20 17 9 11 8 9 10 23 23 31 23 23 15 9 9 15 33 26 33 33 31 25 25 22 31 24 23 12 10 12 11 9 8 9 7 7 8 8 9 18 12 9 9 18 20 26 31 21 21 9 8 8 11 13 21 21 23 15 15 15 15 15 17 17 9 7 9 19 20 21 21 25 25 25 25 25 23 23 9 9 9 21 16 24 24 24 24 26 33 33 33 31 31 27 15 17 7 4 4 4 16 20 27 33 34 34 23 15 14 8 9 6 6 9 1 14 16 8 11 15 23 25 34 36 31 33 16 16 6 6 6 9 8 14 9 11 11 9 13 13 10 8 10 9 9 7 8 20 20 20 14 14 10 10 10 16 8 8 6 6 8 9 10 7 8 8 8 8 6 8 6 6 8 12 9 8 7 13 10 8 8 9 8 8 8 9 6 6 6 6 8 6 6 6 6 6 10 11 10 12 12 10 7 6 6 7 6 6 7 8 7 6 6 7 6 8 6 6 8 8 8 8 6 6 6 8 6 6 8 6 6 11 15 9 9 9 9 9 9 9 9 9 9 8 9 9 6 6 6 7 9 6 6 6 7 7 7 8 9 7 7 9 9 11 8 11 11 11 9 10 9 8 8 8 10 6 6 9 8 6 6 6 6 8 8 9 11 9 8 8 8 8 8 8 8 9 9 9 10 10 8 6 6 8 6 6 10 11 10 6 6 8 6 6 6 6 8 11 8 9 8 8 8 8 8 9 6 6 9 8 8 8 6 6 13 15 12 12 8 8 6 6 8 9 8 8 9 8 8 6 6 6 8 8 6 6 6 8 8 8 6 6 8 6 6 8 6 6 9 8 8 6 6 8 8 6 6 8 6 7 8 9 10 11 11 10 9 9 8 10 9 10 10 8 9 13 8 8 8 13 13 11 7 7 7 11 8 8 10 11 10 9 9 14 7 7 10 10 10 8 8 8 8 6 7 8 8 6 6 6 6 6 6 6 6 6 8 9 10 8 8 6 6 8 8 8 8 8 8 8 8 8 8 6 6 8 8 9 10 10 4 4 4 6 7 6 6 6 6 10 8 8 9 15 11 6 6 8 8 8 8 8 8 9 9 8 9 7 7 8 6 6 8 13 10 8 8 7 6 8 6 6 8 6 7 8 8 8 8 8 8 8 4"
|
44
|
+
assert_equal(qual,read.quality)
|
45
|
+
assert_equal(39,read.clear_range_from)
|
46
|
+
assert_equal(331,read.clear_range_to)
|
47
|
+
assert_equal(1,read.from)
|
48
|
+
assert_equal(293,read.to)
|
49
|
+
assert_equal("Reverse",read.orientation)
|
50
|
+
assert_instance_of(Bio::Assembly::Caf::Read,read)
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
metadata
CHANGED
@@ -5,24 +5,38 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
+
- 1
|
8
9
|
- 0
|
9
|
-
|
10
|
-
version: 0.0.2
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chase Miller
|
14
|
+
- Francesco Strozzi
|
14
15
|
autorequire:
|
15
16
|
bindir: bin
|
16
17
|
cert_chain: []
|
17
18
|
|
18
|
-
date:
|
19
|
+
date: 2011-01-12 00:00:00 -05:00
|
19
20
|
default_executable:
|
20
21
|
dependencies:
|
21
22
|
- !ruby/object:Gem::Dependency
|
22
|
-
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 5
|
29
|
+
segments:
|
30
|
+
- 1
|
31
|
+
- 4
|
32
|
+
- 1
|
33
|
+
version: 1.4.1
|
34
|
+
type: :runtime
|
35
|
+
name: bio
|
36
|
+
version_requirements: *id001
|
23
37
|
prerelease: false
|
24
|
-
|
25
|
-
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
26
40
|
none: false
|
27
41
|
requirements:
|
28
42
|
- - ">="
|
@@ -31,12 +45,12 @@ dependencies:
|
|
31
45
|
segments:
|
32
46
|
- 0
|
33
47
|
version: "0"
|
34
|
-
requirement: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
48
|
type: :development
|
49
|
+
name: shoulda
|
50
|
+
version_requirements: *id002
|
37
51
|
prerelease: false
|
38
|
-
|
39
|
-
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
40
54
|
none: false
|
41
55
|
requirements:
|
42
56
|
- - ~>
|
@@ -47,12 +61,12 @@ dependencies:
|
|
47
61
|
- 0
|
48
62
|
- 0
|
49
63
|
version: 1.0.0
|
50
|
-
requirement: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
64
|
type: :development
|
65
|
+
name: bundler
|
66
|
+
version_requirements: *id003
|
53
67
|
prerelease: false
|
54
|
-
|
55
|
-
|
68
|
+
- !ruby/object:Gem::Dependency
|
69
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
56
70
|
none: false
|
57
71
|
requirements:
|
58
72
|
- - ~>
|
@@ -63,12 +77,12 @@ dependencies:
|
|
63
77
|
- 5
|
64
78
|
- 2
|
65
79
|
version: 1.5.2
|
66
|
-
requirement: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
80
|
type: :development
|
81
|
+
name: jeweler
|
82
|
+
version_requirements: *id004
|
69
83
|
prerelease: false
|
70
|
-
|
71
|
-
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
72
86
|
none: false
|
73
87
|
requirements:
|
74
88
|
- - ">="
|
@@ -77,12 +91,12 @@ dependencies:
|
|
77
91
|
segments:
|
78
92
|
- 0
|
79
93
|
version: "0"
|
80
|
-
requirement: *id004
|
81
|
-
- !ruby/object:Gem::Dependency
|
82
94
|
type: :development
|
95
|
+
name: rcov
|
96
|
+
version_requirements: *id005
|
83
97
|
prerelease: false
|
84
|
-
|
85
|
-
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
86
100
|
none: false
|
87
101
|
requirements:
|
88
102
|
- - ">="
|
@@ -93,7 +107,10 @@ dependencies:
|
|
93
107
|
- 4
|
94
108
|
- 1
|
95
109
|
version: 1.4.1
|
96
|
-
|
110
|
+
type: :development
|
111
|
+
name: bio
|
112
|
+
version_requirements: *id006
|
113
|
+
prerelease: false
|
97
114
|
description: bioruby plugin to parse, write, and manipulate assembly data
|
98
115
|
email: chmille4@gmail.com
|
99
116
|
executables: []
|
@@ -112,13 +129,17 @@ files:
|
|
112
129
|
- Rakefile
|
113
130
|
- VERSION
|
114
131
|
- bio-assembly.gemspec
|
132
|
+
- data/example.caf
|
115
133
|
- data/example1.ace
|
116
134
|
- lib/bio-assembly.rb
|
117
135
|
- lib/bio-assembly/ace.rb
|
136
|
+
- lib/bio-assembly/caf.rb
|
118
137
|
- lib/bio-assembly/contig.rb
|
138
|
+
- lib/bio-assembly/maf.rb
|
119
139
|
- lib/bio-assembly/read.rb
|
120
140
|
- test/helper.rb
|
121
|
-
- test/test_bio-assembly.rb
|
141
|
+
- test/test_bio-assembly-ace.rb
|
142
|
+
- test/test_bio-assembly-caf.rb
|
122
143
|
has_rdoc: true
|
123
144
|
homepage: http://github.com/chmille4/bioruby-assembly
|
124
145
|
licenses:
|
@@ -155,4 +176,5 @@ specification_version: 3
|
|
155
176
|
summary: BioRuby Assembly plugin
|
156
177
|
test_files:
|
157
178
|
- test/helper.rb
|
158
|
-
- test/test_bio-assembly.rb
|
179
|
+
- test/test_bio-assembly-ace.rb
|
180
|
+
- test/test_bio-assembly-caf.rb
|