bio-assembly 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/LICENSE.txt +1 -1
- data/README.rdoc +43 -3
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bio-assembly.gemspec +13 -5
- data/data/example.caf +2755 -0
- data/lib/bio-assembly.rb +4 -14
- data/lib/bio-assembly/ace.rb +11 -19
- data/lib/bio-assembly/caf.rb +124 -0
- data/lib/bio-assembly/contig.rb +5 -1
- data/lib/bio-assembly/maf.rb +15 -0
- data/lib/bio-assembly/read.rb +4 -0
- data/test/{test_bio-assembly.rb → test_bio-assembly-ace.rb} +15 -7
- data/test/test_bio-assembly-caf.rb +54 -0
- metadata +47 -25
data/lib/bio-assembly.rb
CHANGED
@@ -5,11 +5,10 @@ require 'bio-assembly/read'
|
|
5
5
|
module Bio
|
6
6
|
|
7
7
|
class Assembly
|
8
|
-
|
9
|
-
|
8
|
+
|
10
9
|
@@formats = { }
|
11
10
|
|
12
|
-
def self.
|
11
|
+
def self.open(path, format)
|
13
12
|
streamer = @@formats[format]
|
14
13
|
if streamer
|
15
14
|
streamer.new(path)
|
@@ -22,21 +21,11 @@ module Bio
|
|
22
21
|
@@formats[name] = self
|
23
22
|
end
|
24
23
|
|
25
|
-
def contigs
|
26
|
-
# use each_contig to stream large files
|
27
|
-
parse_whole_file if @contigs.empty?
|
28
|
-
@contigs
|
29
|
-
end
|
30
|
-
|
31
24
|
def each_contig
|
32
25
|
# implemented by each format subclass
|
33
26
|
end
|
34
27
|
|
35
28
|
private
|
36
|
-
|
37
|
-
def num_contigs
|
38
|
-
contigs.size
|
39
|
-
end
|
40
29
|
|
41
30
|
def num_reads
|
42
31
|
read_num = 0
|
@@ -52,4 +41,5 @@ module Bio
|
|
52
41
|
|
53
42
|
end
|
54
43
|
|
55
|
-
require 'bio-assembly/ace'
|
44
|
+
require 'bio-assembly/ace'
|
45
|
+
require 'bio-assembly/caf'
|
data/lib/bio-assembly/ace.rb
CHANGED
@@ -9,22 +9,15 @@ class Assembly
|
|
9
9
|
|
10
10
|
def initialize(path)
|
11
11
|
@file = File.new(path, 'r')
|
12
|
-
@contigs = Array.new
|
13
12
|
parse_as
|
14
13
|
end
|
15
14
|
|
16
15
|
def each_contig
|
17
|
-
# check if file is already parsed
|
18
|
-
if @total_num_contigs.to_i == @contigs.size
|
19
|
-
@contigs.each{ |contig| yield contig }
|
20
|
-
else
|
21
16
|
each_identifier do |identifier, attrs|
|
22
17
|
next unless identifier == 'CO'
|
23
18
|
contig = parse_contig(attrs)
|
24
|
-
@contigs.push contig
|
25
19
|
yield(contig)
|
26
20
|
end
|
27
|
-
end
|
28
21
|
end
|
29
22
|
|
30
23
|
def to_ace
|
@@ -35,8 +28,9 @@ class Assembly
|
|
35
28
|
end
|
36
29
|
|
37
30
|
private
|
31
|
+
|
38
32
|
def parse_contig(attrs)
|
39
|
-
contig =
|
33
|
+
contig = Contig.new
|
40
34
|
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
41
35
|
# keep track of the number of RD identifiers parsed
|
42
36
|
@num_rds_parsed = 0
|
@@ -83,7 +77,7 @@ class Assembly
|
|
83
77
|
|
84
78
|
# parse read meta data
|
85
79
|
def parse_af(contig, attrs)
|
86
|
-
read =
|
80
|
+
read = Read.new
|
87
81
|
read.name , read.orientation, read.from = attrs.split(" ")
|
88
82
|
contig.add_read read
|
89
83
|
end
|
@@ -139,11 +133,9 @@ class Assembly
|
|
139
133
|
# parse run meta data - ignored
|
140
134
|
def parse_ct(contig, attrs)
|
141
135
|
end
|
142
|
-
|
143
|
-
end # => end class Ace
|
144
136
|
|
145
|
-
#
|
146
|
-
|
137
|
+
# extend contig class and write ace specific methods for contig objects
|
138
|
+
class Contig < Bio::Assembly::Contig
|
147
139
|
|
148
140
|
def to_ace
|
149
141
|
ace = ""
|
@@ -176,10 +168,10 @@ class Assembly
|
|
176
168
|
ace
|
177
169
|
end
|
178
170
|
|
179
|
-
|
171
|
+
end # => end Contig class
|
180
172
|
|
181
|
-
|
182
|
-
|
173
|
+
# extend Read class to add ace specific methods for read objects
|
174
|
+
class Read < Bio::Assembly::Read
|
183
175
|
|
184
176
|
attr_accessor :base_sequences
|
185
177
|
|
@@ -238,7 +230,7 @@ class Assembly
|
|
238
230
|
end
|
239
231
|
|
240
232
|
def <=>(other)
|
241
|
-
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
233
|
+
unless other.kind_of?(Bio::Assembly::Ace::Read::BaseSequence)
|
242
234
|
raise "[Error] markers are not comparable"
|
243
235
|
end
|
244
236
|
if self.from == other.from
|
@@ -251,8 +243,8 @@ class Assembly
|
|
251
243
|
|
252
244
|
end # => end BaseSequence Class
|
253
245
|
|
254
|
-
|
255
|
-
|
246
|
+
end # => end Read Class
|
247
|
+
end # => end class Ace
|
256
248
|
|
257
249
|
end # => end class Assembly
|
258
250
|
end # => end module Bio
|
@@ -0,0 +1,124 @@
|
|
1
|
+
module Bio
|
2
|
+
class Assembly
|
3
|
+
class Caf < Bio::Assembly
|
4
|
+
|
5
|
+
# register parser with superclass
|
6
|
+
register_parser :caf
|
7
|
+
|
8
|
+
def initialize(path)
|
9
|
+
@file = File.new(path, 'r')
|
10
|
+
end
|
11
|
+
# iterator that return one contig at a time
|
12
|
+
def each_contig
|
13
|
+
contig = Contig.new
|
14
|
+
feature = Hash.new
|
15
|
+
@file.each do |line|
|
16
|
+
feature = parse_blocks(line,feature) # search the file for CAF blocks like DNA and Sequence
|
17
|
+
if feature[:type] == :read and feature[:parsed]
|
18
|
+
read = convert_to_read(feature)
|
19
|
+
contig.add_read(read)
|
20
|
+
feature = Hash.new
|
21
|
+
elsif feature[:type] == :contig and feature[:parsed]
|
22
|
+
contig = convert_to_contig(contig,feature)
|
23
|
+
yield contig
|
24
|
+
contig = Contig.new
|
25
|
+
feature = Hash.new
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Contig < Bio::Assembly::Contig
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
class Read < Bio::Assembly::Read
|
35
|
+
attr_accessor :quality
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_blocks(line,feat)
|
41
|
+
keywords = line.split("\s")
|
42
|
+
case keywords[0]
|
43
|
+
when "DNA" then parse_dna(feat)
|
44
|
+
when "Sequence" then parse_seq(feat,line)
|
45
|
+
end
|
46
|
+
return feat
|
47
|
+
end
|
48
|
+
|
49
|
+
# parse DNA sequence and BaseQuality
|
50
|
+
def parse_dna(feat)
|
51
|
+
feat[:seq] = @file.gets("\n\n").tr("\n","")
|
52
|
+
newline = @file.gets
|
53
|
+
keywords = newline.split("\s")
|
54
|
+
feat[:qual] = @file.gets("\n\n").tr("\n"," ").rstrip if keywords[0] == "BaseQuality"
|
55
|
+
feat[:parsed] = true if feat[:type] == :contig
|
56
|
+
end
|
57
|
+
|
58
|
+
# parse Sequence information like Name, Clipping, Strand and Type
|
59
|
+
def parse_seq(feat,line)
|
60
|
+
feat[:name] = line.split(":")[1].tr("\s|\n","")
|
61
|
+
sequence_block = @file.gets("\n\n")
|
62
|
+
sequence_block.split("\n").each do |l|
|
63
|
+
keywords = l.split("\s")
|
64
|
+
case keywords[0]
|
65
|
+
when "Clipping" then parse_clipping(feat,l)
|
66
|
+
when "Strand" then parse_strand(feat,l)
|
67
|
+
when "Assembled_from" then parse_af(feat,l)
|
68
|
+
when "Is_read" then feat[:type] = :read
|
69
|
+
when "Is_contig" then feat[:type] = :contig
|
70
|
+
end
|
71
|
+
end
|
72
|
+
feat[:parsed] = true if feat[:type] == :read
|
73
|
+
end
|
74
|
+
|
75
|
+
# parse read coordinates for quality clipping
|
76
|
+
def parse_clipping(feat,line)
|
77
|
+
val = line.chomp.split("\s")
|
78
|
+
feat[:clipping_start] = val[-2]
|
79
|
+
feat[:clipping_end] = val[-1]
|
80
|
+
end
|
81
|
+
|
82
|
+
# parse sequence strand information
|
83
|
+
def parse_strand(feat,line)
|
84
|
+
feat[:orientation] = line.split("\s")[1].tr("\n","")
|
85
|
+
end
|
86
|
+
|
87
|
+
# parse Assembled_from lines in Contig. These lines also include read alignment positions within the contig
|
88
|
+
def parse_af(feat,line)
|
89
|
+
if feat[:af].nil?
|
90
|
+
feat[:af] = [line]
|
91
|
+
else
|
92
|
+
feat[:af] << line
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# convert a generic feature into a Caf::Read object
|
97
|
+
def convert_to_read(feature)
|
98
|
+
read = Read.new
|
99
|
+
read.name = feature[:name]
|
100
|
+
read.seq = feature[:seq]
|
101
|
+
read.quality = feature[:qual]
|
102
|
+
read.clear_range_from = feature[:clipping_start]
|
103
|
+
read.clear_range_to = feature[:clipping_end]
|
104
|
+
read.orientation = feature[:orientation]
|
105
|
+
return read
|
106
|
+
end
|
107
|
+
|
108
|
+
# convert a generic feature into a Caf::Contig object
|
109
|
+
def convert_to_contig(contig,feature)
|
110
|
+
contig.name = feature[:name]
|
111
|
+
contig.seq = feature[:seq]
|
112
|
+
contig.quality = feature[:qual]
|
113
|
+
# assign reads ranges using Assembled_from lines in Contig
|
114
|
+
feature[:af].each do |af|
|
115
|
+
val = af.split("\s")
|
116
|
+
contig.reads[val[-5]].from = val[-4]
|
117
|
+
contig.reads[val[-5]].to = val[-3]
|
118
|
+
end
|
119
|
+
return contig
|
120
|
+
end
|
121
|
+
|
122
|
+
end # end Caf
|
123
|
+
end # end Assembly
|
124
|
+
end # end Bio
|
data/lib/bio-assembly/contig.rb
CHANGED
@@ -10,7 +10,7 @@ module Bio
|
|
10
10
|
def initialize(str="")
|
11
11
|
@reads = Hash.new
|
12
12
|
@seq = Bio::Sequence::NA.new(str)
|
13
|
-
# counter for
|
13
|
+
# counter for Reads
|
14
14
|
@rds_parsed = 0
|
15
15
|
end
|
16
16
|
|
@@ -54,6 +54,10 @@ module Bio
|
|
54
54
|
seq.length
|
55
55
|
end
|
56
56
|
|
57
|
+
def seq=(str)
|
58
|
+
@seq = Bio::Sequence::NA.new(str)
|
59
|
+
end
|
60
|
+
|
57
61
|
def num_base_segments
|
58
62
|
num_base_sequences = 0
|
59
63
|
each_read do |read|
|
data/lib/bio-assembly/read.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
|
-
class
|
3
|
+
class TestBioAssemblyAce < Test::Unit::TestCase
|
4
4
|
|
5
5
|
def setup
|
6
6
|
ace_filename = File.join('data', 'example1.ace')
|
7
|
-
@obj = Bio::Assembly.
|
7
|
+
@obj = Bio::Assembly.open(ace_filename, :ace)
|
8
8
|
|
9
9
|
# pick a contig to do in depth tests on
|
10
10
|
@contig = nil
|
11
|
-
@
|
11
|
+
@tot_contig = []
|
12
|
+
@obj.each_contig do |c|
|
13
|
+
@contig = c if c.name.to_i == 5
|
14
|
+
@tot_contig << c
|
15
|
+
end
|
12
16
|
|
13
17
|
# pick a read to do in depth tests on
|
14
18
|
@read = nil
|
@@ -17,13 +21,13 @@ class TestBioAssembly < Test::Unit::TestCase
|
|
17
21
|
|
18
22
|
def test_num_contigs_parsed
|
19
23
|
contigs_parsed = 13
|
20
|
-
assert_equal(contigs_parsed, @
|
24
|
+
assert_equal(contigs_parsed, @tot_contig.size)
|
21
25
|
end
|
22
26
|
|
23
27
|
def test_num_reads_parsed
|
24
28
|
reads_parsed_known = 1760
|
25
29
|
reads_parsed = 0
|
26
|
-
@
|
30
|
+
@tot_contig.each { |c| reads_parsed += c.reads.size }
|
27
31
|
assert_equal(reads_parsed_known, reads_parsed)
|
28
32
|
end
|
29
33
|
|
@@ -34,12 +38,16 @@ class TestBioAssembly < Test::Unit::TestCase
|
|
34
38
|
|
35
39
|
def test_contig_seq
|
36
40
|
seq = "TTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTA*TTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTTAATTTCAAGATGATTCACAGGTTTGTTGCCTCAAAAGAAAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGACAAGTTGACTCAACTTTTCAC*TTTATGTTATATTGTAAGGATGTGACTTTGTTTTGGAAAATTATATTTAATTTGATAATTAACCAATATAAAAAAGATAAACCAAAAGCTATAAGTCGTAAATAAGGACATTGGAAACAAGAAATATTCTCTCCTGAACATTATTTTAAATTATGCGCAATATGCAAATTTATAAGTGTTAAGTTAAAAAGATTGTTAATGGTTCTGTTTATTACCCAAAGACTTTTTTAAAGTTTAAGTCGTTGCTAAGAGTGCAGCGTTTAGACAAATAAAAATGCAATAATCTTCTCGCTCGGGAGCTATGTCCCTCGCATAATATTCTTCAAAGTGTACAGTAAATATTCTAGAAAAGTGAAGTGTGAAAAAGATATATTGCTTGTTTTTATATTTTGTTAATACAACAAAACTTCAAAAACCTGCGGTGGGGGGGGGGGGATAGTCACTTCCGTCACCTTCACCCCTCTCGTTCACTATACTCCCTCGCCCTGGCGTAATGATGGGGGGATTGGGGGTAGTTGCCCCTTAATAAAGTTCAAACTTGATTTATTTCTAACTCGATACCAGTGATTTACAAATGTTTCTGAAATGGCATGGTTTTCCCTAATAAATGCCTAAAAACCCTGAGCTGAGCCCACGCCAATT"
|
37
|
-
assert_equal(seq, @contig.seq.to_s)
|
41
|
+
assert_equal(seq, @contig.seq.to_s.upcase)
|
42
|
+
assert_instance_of(Bio::Sequence::NA,@contig.seq)
|
43
|
+
assert_instance_of(Bio::Assembly::Ace::Contig,@contig)
|
38
44
|
end
|
39
45
|
|
40
46
|
def test_read_seq
|
41
47
|
read_seq = 'GAAAAAAAAAGGCAGAAGTTTAATCAAAACGGATTTTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTATTTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTT*ATTTC*AGATGATTCACAGGTTTGTTGCCTCAAAAG*AAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGGACAAGTTGACTTCACCT'
|
42
|
-
assert_equal(read_seq, @read.seq.to_s)
|
48
|
+
assert_equal(read_seq, @read.seq.to_s.upcase)
|
49
|
+
assert_instance_of(Bio::Sequence::NA,@read.seq)
|
50
|
+
assert_instance_of(Bio::Assembly::Ace::Read,@read)
|
43
51
|
end
|
44
52
|
|
45
53
|
def test_read_range
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBioAssemblyCaf < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
path = File.join('data','example.caf')
|
7
|
+
@caf = Bio::Assembly.open(path,:caf)
|
8
|
+
@contigs = []
|
9
|
+
@caf.each_contig {|c| @contigs << c}
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_contigs
|
13
|
+
assert_equal(2,@contigs.size)
|
14
|
+
assert_equal('Contig1',@contigs[0].name)
|
15
|
+
assert_equal('Contig2',@contigs[1].name)
|
16
|
+
assert_instance_of(Bio::Assembly::Caf::Contig,@contigs[0])
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_read_per_contig
|
20
|
+
assert_equal(21,@contigs[0].reads.size)
|
21
|
+
assert_equal(21,@contigs[0].num_reads)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_contig_seq
|
25
|
+
seq = "TTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATANACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATNNNNNCACTTTGGGAGGCCAGTGCGGGTGAATCACGANGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAANCACCGTCTCTACTAAAATAAAAAGAAATTAGCTGGGTGTCGTGGCAGGTGCNTGTAATCCCAGCCACTTGGGAAGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCCGAGATCACGCGACTGCACTCCAGCCCAACCAATAGTGTGAGACTCTGTCTCGAAAAAAAAAAAGCAGAGACAAGACNACTAGTACAGTACTTACAGGGTTATTATGATGATTAAATGAGAGAATAGCTGTGAGGTGATTGATATAGTGCTGTGCTTAATACAAACTATCATTTTATTATACGGGTTGAGTGTNTCTAATCTGAAAATCCAAAATTAGAAATGCTCTACAGTCTGAAACTTTTTTGAGCACCGACCTAATGTTCAAAGGAAGTGCTTATTGGAGCATTATGGGTTGTTAGATTTTTGGGTTGGGAATATTCAACCAGTAAGTACTATAAAATGCAAATATTCCAAAAAAAATCTGAAATCTGAAACATTTCTGGTCCTAAGCAAGCATTTTGCAAAGGGATACGCAACCTGTAGTACGTTCTTTATCATTGTTTTAAGTAGTTAATATATTGTGGTACAGATTCTGAGGTGGTATAGCAAATTCGATTGTATTATTAAAAAGCATATTTATATTTTGAGAGCTTGCTTAGGATTATTGGAGAGAATAAAACAGTGAAGCTTTGGTGTTATGAGGGAATTTTAGATAGAAAAGTGCAGTTTTTCAGTTCATGCTCTTTCATTTTTTACTCCCTCAGGTTAAAGCTNGAAGCTCAACAAAGATATAGTGATCTCTGTGGGCATTTATAATCTGGTCCAGAAGGCTCTNNANNCNNNTCCNNNNNNNCTNNANNNNNNNACAAATGAACCAGTGAAAACCAAGACCCGGACCTTTAATACAAGTACAGGCGGTTTGCTTCTGCCTAGCGATACCAAGAGGTCTCAGGTAGGTAGAGATGCCTTTTGTTGTTGTTGTTTTTGAGACAGGGTCTCATTGTGTCGCCCAGGCTGGAGTGCAGTGGGGCGAACATGACTCGCTACAGCCTTGACCTCCTGGACTCAAGCGATCCTTCTGTCTCAGCCTCCCAAGTAGCTGGGATCACAGGCATGTGACATCACACCCAGCTAATTTATTTATTTATTTATTTTTTAAGAGACTGGATCGACTGGGCACAGTGGCTCATGCCTGTAATCCCANCACTTTGGGAGGCCGAGGCAGGTGGATTACCGAGGTCAGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCATCTCTACTAAAAATACAAAATGAGCTGGGCATGGTGGTGCATGCCTGTAATCC"
|
26
|
+
assert_equal(seq,@contigs[0].seq.to_s.upcase)
|
27
|
+
assert_instance_of(Bio::Sequence::NA,@contigs[0].seq)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_contig_qual
|
31
|
+
qual = "4 4 8 4 6 10 13 21 24 25 33 33 33 30 27 21 15 27 19 30 30 33 33 30 21 21 10 9 17 11 27 27 37 38 33 35 35 35 35 44 45 45 38 37 37 45 45 45 43 43 43 45 45 45 45 45 45 45 21 21 23 30 30 34 37 37 38 45 32 45 37 41 37 45 30 45 45 34 34 34 32 29 22 32 32 45 45 45 33 28 28 37 37 34 34 34 35 34 34 34 34 34 34 37 37 40 41 37 34 37 37 37 32 24 22 27 29 25 27 20 21 21 21 27 45 41 40 40 40 42 34 34 37 41 45 51 45 45 37 30 37 41 41 37 36 28 30 30 30 22 33 33 35 33 33 41 51 51 45 39 39 39 30 28 33 34 34 39 34 41 34 34 41 33 33 39 39 39 33 33 33 30 33 33 33 30 29 19 19 24 25 32 33 45 36 36 36 36 41 41 30 30 30 37 37 43 51 51 51 51 45 37 37 30 37 37 37 51 51 51 51 51 37 37 28 28 10 10 10 13 10 10 10 9 9 9 21 21 28 37 37 37 33 33 33 33 33 33 34 34 34 33 33 33 33 21 25 25 22 22 29 29 33 33 33 33 31 31 27 28 17 23 23 28 26 24 32 10 10 10 15 15 26 32 32 37 45 45 32 45 33 26 24 32 37 37 35 34 37 35 37 34 37 37 40 37 37 45 45 38 38 45 45 40 37 37 37 34 30 30 28 28 28 30 37 37 34 34 34 34 34 33 31 31 25 21 19 22 21 25 30 32 32 30 22 22 25 28 24 25 26 17 17 23 27 27 33 31 31 34 38 38 38 38 34 26 17 15 8 8 11 16 25 31 32 33 29 26 32 29 33 34 34 34 36 33 34 34 31 27 31 34 38 45 45 36 37 37 37 36 36 38 34 32 28 25 22 22 21 17 18 29 29 23 23 22 27 25 20 18 11 11 15 15 16 23 21 17 19 24 24 24 31 31 31 33 33 33 31 33 33 29 29 29 29 29 31 31 31 23 24 19 12 10 16 10 10 20 10 10 13 15 30 23 29 23 28 18 10 10 16 21 18 19 19 24 24 12 11 9 9 10 21 23 31 31 33 28 28 14 17 17 28 21 30 24 30 28 22 26 23 19 10 10 12 23 22 23 19 12 10 9 9 10 19 24 29 30 34 34 34 34 34 29 25 25 32 26 31 31 32 20 17 15 12 12 4 4 4 17 23 33 30 24 18 13 18 16 12 16 8 9 9 19 15 15 4 4 4 7 11 6 6 6 7 8 9 17 12 13 19 23 25 13 13 13 19 21 29 32 30 26 20 20 12 9 9 8 9 8 8 15 24 17 15 8 8 9 17 16 4 4 4 8 8 13 15 23 14 9 8 8 10 10 17 23 21 17 12 13 22 20 19 15 11 11 9 8 8 8 8 8 8 10 9 9 7 8 8 10 12 10 10 12 4 4 4 4 4 4 10 10 10 10 10 9 8 8 7 8 9 11 9 9 4"
|
32
|
+
assert_equal(qual,@contigs[1].quality)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_read
|
36
|
+
read = nil
|
37
|
+
assert_nothing_raised do
|
38
|
+
read = @contigs[0].find_read_by_name("22ak93c2.r1t")
|
39
|
+
end
|
40
|
+
seq = "GTCGCNCATAAGATTACGAGATCTCGAGCTCGGTACCCTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATAGACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATCCCANCACTTTGGGAGGCCAGTGCGGGTGAATCACGAAGTCAGGAGTTCAAGACCACCCTGGCCAACATGGTGTAACACCTGCTCTCCTAAAATTAAACAAAATTTCATGGTTTGCGTGGGCCGTCTTGTCTCATCACTTCACTCCTGAGGGCCGGCGCCGGAAAGATATCTTGATCTGCGGCGCTCCGACCGTTTTCTTTAAACCTTACAACTCCCGACCTCCTCGCCTATCCTCCCTAAATCCTCGCCAGGCTCGCCTGCTTCAGCCACTCTTTCCTTCGCACCCTCCCCTCTCTTCAATATACTTCACCCGCCCATCCTTCACGCCGGCACGTATCCAATCTCTTCTTATCTTTCCGTATCCAANTCCCTTCTCCCTCTGCCGCGACCTTCGCCATCCCTCTGCGCGTCCTCTTCC"
|
41
|
+
assert_equal(seq,read.seq.to_s.upcase)
|
42
|
+
assert_instance_of(Bio::Sequence::NA,read.seq)
|
43
|
+
qual = "4 4 8 4 4 4 4 4 4 4 4 4 6 8 17 21 14 7 6 6 6 7 7 6 8 14 16 21 15 20 20 24 26 21 18 18 14 14 19 23 10 8 8 15 20 16 29 26 34 29 39 29 31 29 31 34 32 27 27 25 19 19 24 31 33 36 34 34 34 26 27 22 32 32 36 28 28 15 15 15 28 28 34 30 12 12 22 27 31 31 31 31 31 23 24 27 21 24 24 29 27 27 27 34 34 36 38 38 38 36 36 40 36 37 38 45 45 36 34 33 31 31 34 34 33 33 28 28 27 23 24 11 11 10 10 18 25 21 20 17 17 17 20 15 24 18 24 26 23 23 18 20 25 23 30 30 30 33 33 37 37 32 37 37 32 45 35 37 37 37 40 36 49 49 36 36 34 33 20 15 9 9 8 7 12 22 21 28 28 30 33 36 36 36 34 31 31 25 31 28 26 26 24 20 17 9 11 8 9 10 23 23 31 23 23 15 9 9 15 33 26 33 33 31 25 25 22 31 24 23 12 10 12 11 9 8 9 7 7 8 8 9 18 12 9 9 18 20 26 31 21 21 9 8 8 11 13 21 21 23 15 15 15 15 15 17 17 9 7 9 19 20 21 21 25 25 25 25 25 23 23 9 9 9 21 16 24 24 24 24 26 33 33 33 31 31 27 15 17 7 4 4 4 16 20 27 33 34 34 23 15 14 8 9 6 6 9 1 14 16 8 11 15 23 25 34 36 31 33 16 16 6 6 6 9 8 14 9 11 11 9 13 13 10 8 10 9 9 7 8 20 20 20 14 14 10 10 10 16 8 8 6 6 8 9 10 7 8 8 8 8 6 8 6 6 8 12 9 8 7 13 10 8 8 9 8 8 8 9 6 6 6 6 8 6 6 6 6 6 10 11 10 12 12 10 7 6 6 7 6 6 7 8 7 6 6 7 6 8 6 6 8 8 8 8 6 6 6 8 6 6 8 6 6 11 15 9 9 9 9 9 9 9 9 9 9 8 9 9 6 6 6 7 9 6 6 6 7 7 7 8 9 7 7 9 9 11 8 11 11 11 9 10 9 8 8 8 10 6 6 9 8 6 6 6 6 8 8 9 11 9 8 8 8 8 8 8 8 9 9 9 10 10 8 6 6 8 6 6 10 11 10 6 6 8 6 6 6 6 8 11 8 9 8 8 8 8 8 9 6 6 9 8 8 8 6 6 13 15 12 12 8 8 6 6 8 9 8 8 9 8 8 6 6 6 8 8 6 6 6 8 8 8 6 6 8 6 6 8 6 6 9 8 8 6 6 8 8 6 6 8 6 7 8 9 10 11 11 10 9 9 8 10 9 10 10 8 9 13 8 8 8 13 13 11 7 7 7 11 8 8 10 11 10 9 9 14 7 7 10 10 10 8 8 8 8 6 7 8 8 6 6 6 6 6 6 6 6 6 8 9 10 8 8 6 6 8 8 8 8 8 8 8 8 8 8 6 6 8 8 9 10 10 4 4 4 6 7 6 6 6 6 10 8 8 9 15 11 6 6 8 8 8 8 8 8 9 9 8 9 7 7 8 6 6 8 13 10 8 8 7 6 8 6 6 8 6 7 8 8 8 8 8 8 8 4"
|
44
|
+
assert_equal(qual,read.quality)
|
45
|
+
assert_equal(39,read.clear_range_from)
|
46
|
+
assert_equal(331,read.clear_range_to)
|
47
|
+
assert_equal(1,read.from)
|
48
|
+
assert_equal(293,read.to)
|
49
|
+
assert_equal("Reverse",read.orientation)
|
50
|
+
assert_instance_of(Bio::Assembly::Caf::Read,read)
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
metadata
CHANGED
@@ -5,24 +5,38 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
+
- 1
|
8
9
|
- 0
|
9
|
-
|
10
|
-
version: 0.0.2
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chase Miller
|
14
|
+
- Francesco Strozzi
|
14
15
|
autorequire:
|
15
16
|
bindir: bin
|
16
17
|
cert_chain: []
|
17
18
|
|
18
|
-
date:
|
19
|
+
date: 2011-01-12 00:00:00 -05:00
|
19
20
|
default_executable:
|
20
21
|
dependencies:
|
21
22
|
- !ruby/object:Gem::Dependency
|
22
|
-
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 5
|
29
|
+
segments:
|
30
|
+
- 1
|
31
|
+
- 4
|
32
|
+
- 1
|
33
|
+
version: 1.4.1
|
34
|
+
type: :runtime
|
35
|
+
name: bio
|
36
|
+
version_requirements: *id001
|
23
37
|
prerelease: false
|
24
|
-
|
25
|
-
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
26
40
|
none: false
|
27
41
|
requirements:
|
28
42
|
- - ">="
|
@@ -31,12 +45,12 @@ dependencies:
|
|
31
45
|
segments:
|
32
46
|
- 0
|
33
47
|
version: "0"
|
34
|
-
requirement: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
48
|
type: :development
|
49
|
+
name: shoulda
|
50
|
+
version_requirements: *id002
|
37
51
|
prerelease: false
|
38
|
-
|
39
|
-
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
40
54
|
none: false
|
41
55
|
requirements:
|
42
56
|
- - ~>
|
@@ -47,12 +61,12 @@ dependencies:
|
|
47
61
|
- 0
|
48
62
|
- 0
|
49
63
|
version: 1.0.0
|
50
|
-
requirement: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
64
|
type: :development
|
65
|
+
name: bundler
|
66
|
+
version_requirements: *id003
|
53
67
|
prerelease: false
|
54
|
-
|
55
|
-
|
68
|
+
- !ruby/object:Gem::Dependency
|
69
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
56
70
|
none: false
|
57
71
|
requirements:
|
58
72
|
- - ~>
|
@@ -63,12 +77,12 @@ dependencies:
|
|
63
77
|
- 5
|
64
78
|
- 2
|
65
79
|
version: 1.5.2
|
66
|
-
requirement: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
80
|
type: :development
|
81
|
+
name: jeweler
|
82
|
+
version_requirements: *id004
|
69
83
|
prerelease: false
|
70
|
-
|
71
|
-
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
72
86
|
none: false
|
73
87
|
requirements:
|
74
88
|
- - ">="
|
@@ -77,12 +91,12 @@ dependencies:
|
|
77
91
|
segments:
|
78
92
|
- 0
|
79
93
|
version: "0"
|
80
|
-
requirement: *id004
|
81
|
-
- !ruby/object:Gem::Dependency
|
82
94
|
type: :development
|
95
|
+
name: rcov
|
96
|
+
version_requirements: *id005
|
83
97
|
prerelease: false
|
84
|
-
|
85
|
-
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
86
100
|
none: false
|
87
101
|
requirements:
|
88
102
|
- - ">="
|
@@ -93,7 +107,10 @@ dependencies:
|
|
93
107
|
- 4
|
94
108
|
- 1
|
95
109
|
version: 1.4.1
|
96
|
-
|
110
|
+
type: :development
|
111
|
+
name: bio
|
112
|
+
version_requirements: *id006
|
113
|
+
prerelease: false
|
97
114
|
description: bioruby plugin to parse, write, and manipulate assembly data
|
98
115
|
email: chmille4@gmail.com
|
99
116
|
executables: []
|
@@ -112,13 +129,17 @@ files:
|
|
112
129
|
- Rakefile
|
113
130
|
- VERSION
|
114
131
|
- bio-assembly.gemspec
|
132
|
+
- data/example.caf
|
115
133
|
- data/example1.ace
|
116
134
|
- lib/bio-assembly.rb
|
117
135
|
- lib/bio-assembly/ace.rb
|
136
|
+
- lib/bio-assembly/caf.rb
|
118
137
|
- lib/bio-assembly/contig.rb
|
138
|
+
- lib/bio-assembly/maf.rb
|
119
139
|
- lib/bio-assembly/read.rb
|
120
140
|
- test/helper.rb
|
121
|
-
- test/test_bio-assembly.rb
|
141
|
+
- test/test_bio-assembly-ace.rb
|
142
|
+
- test/test_bio-assembly-caf.rb
|
122
143
|
has_rdoc: true
|
123
144
|
homepage: http://github.com/chmille4/bioruby-assembly
|
124
145
|
licenses:
|
@@ -155,4 +176,5 @@ specification_version: 3
|
|
155
176
|
summary: BioRuby Assembly plugin
|
156
177
|
test_files:
|
157
178
|
- test/helper.rb
|
158
|
-
- test/test_bio-assembly.rb
|
179
|
+
- test/test_bio-assembly-ace.rb
|
180
|
+
- test/test_bio-assembly-caf.rb
|