bio-assembly 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,11 +5,10 @@ require 'bio-assembly/read'
5
5
  module Bio
6
6
 
7
7
  class Assembly
8
- attr_accessor :contigs
9
-
8
+
10
9
  @@formats = { }
11
10
 
12
- def self.create(path, format)
11
+ def self.open(path, format)
13
12
  streamer = @@formats[format]
14
13
  if streamer
15
14
  streamer.new(path)
@@ -22,21 +21,11 @@ module Bio
22
21
  @@formats[name] = self
23
22
  end
24
23
 
25
- def contigs
26
- # use each_contig to stream large files
27
- parse_whole_file if @contigs.empty?
28
- @contigs
29
- end
30
-
31
24
  def each_contig
32
25
  # implemented by each format subclass
33
26
  end
34
27
 
35
28
  private
36
-
37
- def num_contigs
38
- contigs.size
39
- end
40
29
 
41
30
  def num_reads
42
31
  read_num = 0
@@ -52,4 +41,5 @@ module Bio
52
41
 
53
42
  end
54
43
 
55
- require 'bio-assembly/ace'
44
+ require 'bio-assembly/ace'
45
+ require 'bio-assembly/caf'
@@ -9,22 +9,15 @@ class Assembly
9
9
 
10
10
  def initialize(path)
11
11
  @file = File.new(path, 'r')
12
- @contigs = Array.new
13
12
  parse_as
14
13
  end
15
14
 
16
15
  def each_contig
17
- # check if file is already parsed
18
- if @total_num_contigs.to_i == @contigs.size
19
- @contigs.each{ |contig| yield contig }
20
- else
21
16
  each_identifier do |identifier, attrs|
22
17
  next unless identifier == 'CO'
23
18
  contig = parse_contig(attrs)
24
- @contigs.push contig
25
19
  yield(contig)
26
20
  end
27
- end
28
21
  end
29
22
 
30
23
  def to_ace
@@ -35,8 +28,9 @@ class Assembly
35
28
  end
36
29
 
37
30
  private
31
+
38
32
  def parse_contig(attrs)
39
- contig = Bio::Assembly::Contig.new
33
+ contig = Contig.new
40
34
  contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
41
35
  # keep track of the number of RD identifiers parsed
42
36
  @num_rds_parsed = 0
@@ -83,7 +77,7 @@ class Assembly
83
77
 
84
78
  # parse read meta data
85
79
  def parse_af(contig, attrs)
86
- read = Bio::Assembly::Read.new
80
+ read = Read.new
87
81
  read.name , read.orientation, read.from = attrs.split(" ")
88
82
  contig.add_read read
89
83
  end
@@ -139,11 +133,9 @@ class Assembly
139
133
  # parse run meta data - ignored
140
134
  def parse_ct(contig, attrs)
141
135
  end
142
-
143
- end # => end class Ace
144
136
 
145
- # open contig class and write ace specific methods for contig objects
146
- class Contig
137
+ # extend contig class and write ace specific methods for contig objects
138
+ class Contig < Bio::Assembly::Contig
147
139
 
148
140
  def to_ace
149
141
  ace = ""
@@ -176,10 +168,10 @@ class Assembly
176
168
  ace
177
169
  end
178
170
 
179
- end # => end Contig class
171
+ end # => end Contig class
180
172
 
181
- # open Read class to add ace specific methods for read objects
182
- class Read
173
+ # extend Read class to add ace specific methods for read objects
174
+ class Read < Bio::Assembly::Read
183
175
 
184
176
  attr_accessor :base_sequences
185
177
 
@@ -238,7 +230,7 @@ class Assembly
238
230
  end
239
231
 
240
232
  def <=>(other)
241
- unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
233
+ unless other.kind_of?(Bio::Assembly::Ace::Read::BaseSequence)
242
234
  raise "[Error] markers are not comparable"
243
235
  end
244
236
  if self.from == other.from
@@ -251,8 +243,8 @@ class Assembly
251
243
 
252
244
  end # => end BaseSequence Class
253
245
 
254
- end # => end Read Class
255
-
246
+ end # => end Read Class
247
+ end # => end class Ace
256
248
 
257
249
  end # => end class Assembly
258
250
  end # => end module Bio
@@ -0,0 +1,124 @@
1
+ module Bio
2
+ class Assembly
3
+ class Caf < Bio::Assembly
4
+
5
+ # register parser with superclass
6
+ register_parser :caf
7
+
8
+ def initialize(path)
9
+ @file = File.new(path, 'r')
10
+ end
11
+ # iterator that return one contig at a time
12
+ def each_contig
13
+ contig = Contig.new
14
+ feature = Hash.new
15
+ @file.each do |line|
16
+ feature = parse_blocks(line,feature) # search the file for CAF blocks like DNA and Sequence
17
+ if feature[:type] == :read and feature[:parsed]
18
+ read = convert_to_read(feature)
19
+ contig.add_read(read)
20
+ feature = Hash.new
21
+ elsif feature[:type] == :contig and feature[:parsed]
22
+ contig = convert_to_contig(contig,feature)
23
+ yield contig
24
+ contig = Contig.new
25
+ feature = Hash.new
26
+ end
27
+ end
28
+ end
29
+
30
+ class Contig < Bio::Assembly::Contig
31
+ end
32
+
33
+
34
+ class Read < Bio::Assembly::Read
35
+ attr_accessor :quality
36
+ end
37
+
38
+ private
39
+
40
+ def parse_blocks(line,feat)
41
+ keywords = line.split("\s")
42
+ case keywords[0]
43
+ when "DNA" then parse_dna(feat)
44
+ when "Sequence" then parse_seq(feat,line)
45
+ end
46
+ return feat
47
+ end
48
+
49
+ # parse DNA sequence and BaseQuality
50
+ def parse_dna(feat)
51
+ feat[:seq] = @file.gets("\n\n").tr("\n","")
52
+ newline = @file.gets
53
+ keywords = newline.split("\s")
54
+ feat[:qual] = @file.gets("\n\n").tr("\n"," ").rstrip if keywords[0] == "BaseQuality"
55
+ feat[:parsed] = true if feat[:type] == :contig
56
+ end
57
+
58
+ # parse Sequence information like Name, Clipping, Strand and Type
59
+ def parse_seq(feat,line)
60
+ feat[:name] = line.split(":")[1].tr("\s|\n","")
61
+ sequence_block = @file.gets("\n\n")
62
+ sequence_block.split("\n").each do |l|
63
+ keywords = l.split("\s")
64
+ case keywords[0]
65
+ when "Clipping" then parse_clipping(feat,l)
66
+ when "Strand" then parse_strand(feat,l)
67
+ when "Assembled_from" then parse_af(feat,l)
68
+ when "Is_read" then feat[:type] = :read
69
+ when "Is_contig" then feat[:type] = :contig
70
+ end
71
+ end
72
+ feat[:parsed] = true if feat[:type] == :read
73
+ end
74
+
75
+ # parse read coordinates for quality clipping
76
+ def parse_clipping(feat,line)
77
+ val = line.chomp.split("\s")
78
+ feat[:clipping_start] = val[-2]
79
+ feat[:clipping_end] = val[-1]
80
+ end
81
+
82
+ # parse sequence strand information
83
+ def parse_strand(feat,line)
84
+ feat[:orientation] = line.split("\s")[1].tr("\n","")
85
+ end
86
+
87
+ # parse Assembled_from lines in Contig. These lines also include read alignment positions within the contig
88
+ def parse_af(feat,line)
89
+ if feat[:af].nil?
90
+ feat[:af] = [line]
91
+ else
92
+ feat[:af] << line
93
+ end
94
+ end
95
+
96
+ # convert a generic feature into a Caf::Read object
97
+ def convert_to_read(feature)
98
+ read = Read.new
99
+ read.name = feature[:name]
100
+ read.seq = feature[:seq]
101
+ read.quality = feature[:qual]
102
+ read.clear_range_from = feature[:clipping_start]
103
+ read.clear_range_to = feature[:clipping_end]
104
+ read.orientation = feature[:orientation]
105
+ return read
106
+ end
107
+
108
+ # convert a generic feature into a Caf::Contig object
109
+ def convert_to_contig(contig,feature)
110
+ contig.name = feature[:name]
111
+ contig.seq = feature[:seq]
112
+ contig.quality = feature[:qual]
113
+ # assign reads ranges using Assembled_from lines in Contig
114
+ feature[:af].each do |af|
115
+ val = af.split("\s")
116
+ contig.reads[val[-5]].from = val[-4]
117
+ contig.reads[val[-5]].to = val[-3]
118
+ end
119
+ return contig
120
+ end
121
+
122
+ end # end Caf
123
+ end # end Assembly
124
+ end # end Bio
@@ -10,7 +10,7 @@ module Bio
10
10
  def initialize(str="")
11
11
  @reads = Hash.new
12
12
  @seq = Bio::Sequence::NA.new(str)
13
- # counter for RD identifier
13
+ # counter for Reads
14
14
  @rds_parsed = 0
15
15
  end
16
16
 
@@ -54,6 +54,10 @@ module Bio
54
54
  seq.length
55
55
  end
56
56
 
57
+ def seq=(str)
58
+ @seq = Bio::Sequence::NA.new(str)
59
+ end
60
+
57
61
  def num_base_segments
58
62
  num_base_sequences = 0
59
63
  each_read do |read|
@@ -0,0 +1,15 @@
1
+ module Bio
2
+ class Assembly
3
+ class Maf < Bio::Assembly
4
+
5
+ # register parser with superclass
6
+ register_parser :maf
7
+
8
+ def initialize(path)
9
+ @file = File.new(path, 'r')
10
+ # TO DO
11
+ end
12
+
13
+ end # end Maf
14
+ end # end Assembly
15
+ end # end Bio
@@ -26,6 +26,10 @@ module Bio
26
26
  @to = new_to.to_i
27
27
  end
28
28
 
29
+ def seq=(str)
30
+ @seq = Bio::Sequence::NA.new(str)
31
+ end
32
+
29
33
  def clear_range_from=(new_clear_range_from)
30
34
  @clear_range_from = new_clear_range_from.to_i
31
35
  end
@@ -1,14 +1,18 @@
1
1
  require 'helper'
2
2
 
3
- class TestBioAssembly < Test::Unit::TestCase
3
+ class TestBioAssemblyAce < Test::Unit::TestCase
4
4
 
5
5
  def setup
6
6
  ace_filename = File.join('data', 'example1.ace')
7
- @obj = Bio::Assembly.create(ace_filename, :ace)
7
+ @obj = Bio::Assembly.open(ace_filename, :ace)
8
8
 
9
9
  # pick a contig to do in depth tests on
10
10
  @contig = nil
11
- @obj.each_contig { |c| @contig = c if c.name.to_i == 5 }
11
+ @tot_contig = []
12
+ @obj.each_contig do |c|
13
+ @contig = c if c.name.to_i == 5
14
+ @tot_contig << c
15
+ end
12
16
 
13
17
  # pick a read to do in depth tests on
14
18
  @read = nil
@@ -17,13 +21,13 @@ class TestBioAssembly < Test::Unit::TestCase
17
21
 
18
22
  def test_num_contigs_parsed
19
23
  contigs_parsed = 13
20
- assert_equal(contigs_parsed, @obj.contigs.size)
24
+ assert_equal(contigs_parsed, @tot_contig.size)
21
25
  end
22
26
 
23
27
  def test_num_reads_parsed
24
28
  reads_parsed_known = 1760
25
29
  reads_parsed = 0
26
- @obj.contigs.each { |c| reads_parsed += c.reads.size }
30
+ @tot_contig.each { |c| reads_parsed += c.reads.size }
27
31
  assert_equal(reads_parsed_known, reads_parsed)
28
32
  end
29
33
 
@@ -34,12 +38,16 @@ class TestBioAssembly < Test::Unit::TestCase
34
38
 
35
39
  def test_contig_seq
36
40
  seq = "TTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTA*TTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTTAATTTCAAGATGATTCACAGGTTTGTTGCCTCAAAAGAAAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGACAAGTTGACTCAACTTTTCAC*TTTATGTTATATTGTAAGGATGTGACTTTGTTTTGGAAAATTATATTTAATTTGATAATTAACCAATATAAAAAAGATAAACCAAAAGCTATAAGTCGTAAATAAGGACATTGGAAACAAGAAATATTCTCTCCTGAACATTATTTTAAATTATGCGCAATATGCAAATTTATAAGTGTTAAGTTAAAAAGATTGTTAATGGTTCTGTTTATTACCCAAAGACTTTTTTAAAGTTTAAGTCGTTGCTAAGAGTGCAGCGTTTAGACAAATAAAAATGCAATAATCTTCTCGCTCGGGAGCTATGTCCCTCGCATAATATTCTTCAAAGTGTACAGTAAATATTCTAGAAAAGTGAAGTGTGAAAAAGATATATTGCTTGTTTTTATATTTTGTTAATACAACAAAACTTCAAAAACCTGCGGTGGGGGGGGGGGGATAGTCACTTCCGTCACCTTCACCCCTCTCGTTCACTATACTCCCTCGCCCTGGCGTAATGATGGGGGGATTGGGGGTAGTTGCCCCTTAATAAAGTTCAAACTTGATTTATTTCTAACTCGATACCAGTGATTTACAAATGTTTCTGAAATGGCATGGTTTTCCCTAATAAATGCCTAAAAACCCTGAGCTGAGCCCACGCCAATT"
37
- assert_equal(seq, @contig.seq.to_s)
41
+ assert_equal(seq, @contig.seq.to_s.upcase)
42
+ assert_instance_of(Bio::Sequence::NA,@contig.seq)
43
+ assert_instance_of(Bio::Assembly::Ace::Contig,@contig)
38
44
  end
39
45
 
40
46
  def test_read_seq
41
47
  read_seq = 'GAAAAAAAAAGGCAGAAGTTTAATCAAAACGGATTTTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTATTTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTT*ATTTC*AGATGATTCACAGGTTTGTTGCCTCAAAAG*AAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGGACAAGTTGACTTCACCT'
42
- assert_equal(read_seq, @read.seq.to_s)
48
+ assert_equal(read_seq, @read.seq.to_s.upcase)
49
+ assert_instance_of(Bio::Sequence::NA,@read.seq)
50
+ assert_instance_of(Bio::Assembly::Ace::Read,@read)
43
51
  end
44
52
 
45
53
  def test_read_range
@@ -0,0 +1,54 @@
1
+ require 'helper'
2
+
3
+ class TestBioAssemblyCaf < Test::Unit::TestCase
4
+
5
+ def setup
6
+ path = File.join('data','example.caf')
7
+ @caf = Bio::Assembly.open(path,:caf)
8
+ @contigs = []
9
+ @caf.each_contig {|c| @contigs << c}
10
+ end
11
+
12
+ def test_contigs
13
+ assert_equal(2,@contigs.size)
14
+ assert_equal('Contig1',@contigs[0].name)
15
+ assert_equal('Contig2',@contigs[1].name)
16
+ assert_instance_of(Bio::Assembly::Caf::Contig,@contigs[0])
17
+ end
18
+
19
+ def test_read_per_contig
20
+ assert_equal(21,@contigs[0].reads.size)
21
+ assert_equal(21,@contigs[0].num_reads)
22
+ end
23
+
24
+ def test_contig_seq
25
+ seq = "TTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATANACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATNNNNNCACTTTGGGAGGCCAGTGCGGGTGAATCACGANGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAANCACCGTCTCTACTAAAATAAAAAGAAATTAGCTGGGTGTCGTGGCAGGTGCNTGTAATCCCAGCCACTTGGGAAGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCCGAGATCACGCGACTGCACTCCAGCCCAACCAATAGTGTGAGACTCTGTCTCGAAAAAAAAAAAGCAGAGACAAGACNACTAGTACAGTACTTACAGGGTTATTATGATGATTAAATGAGAGAATAGCTGTGAGGTGATTGATATAGTGCTGTGCTTAATACAAACTATCATTTTATTATACGGGTTGAGTGTNTCTAATCTGAAAATCCAAAATTAGAAATGCTCTACAGTCTGAAACTTTTTTGAGCACCGACCTAATGTTCAAAGGAAGTGCTTATTGGAGCATTATGGGTTGTTAGATTTTTGGGTTGGGAATATTCAACCAGTAAGTACTATAAAATGCAAATATTCCAAAAAAAATCTGAAATCTGAAACATTTCTGGTCCTAAGCAAGCATTTTGCAAAGGGATACGCAACCTGTAGTACGTTCTTTATCATTGTTTTAAGTAGTTAATATATTGTGGTACAGATTCTGAGGTGGTATAGCAAATTCGATTGTATTATTAAAAAGCATATTTATATTTTGAGAGCTTGCTTAGGATTATTGGAGAGAATAAAACAGTGAAGCTTTGGTGTTATGAGGGAATTTTAGATAGAAAAGTGCAGTTTTTCAGTTCATGCTCTTTCATTTTTTACTCCCTCAGGTTAAAGCTNGAAGCTCAACAAAGATATAGTGATCTCTGTGGGCATTTATAATCTGGTCCAGAAGGCTCTNNANNCNNNTCCNNNNNNNCTNNANNNNNNNACAAATGAACCAGTGAAAACCAAGACCCGGACCTTTAATACAAGTACAGGCGGTTTGCTTCTGCCTAGCGATACCAAGAGGTCTCAGGTAGGTAGAGATGCCTTTTGTTGTTGTTGTTTTTGAGACAGGGTCTCATTGTGTCGCCCAGGCTGGAGTGCAGTGGGGCGAACATGACTCGCTACAGCCTTGACCTCCTGGACTCAAGCGATCCTTCTGTCTCAGCCTCCCAAGTAGCTGGGATCACAGGCATGTGACATCACACCCAGCTAATTTATTTATTTATTTATTTTTTAAGAGACTGGATCGACTGGGCACAGTGGCTCATGCCTGTAATCCCANCACTTTGGGAGGCCGAGGCAGGTGGATTACCGAGGTCAGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCATCTCTACTAAAAATACAAAATGAGCTGGGCATGGTGGTGCATGCCTGTAATCC"
26
+ assert_equal(seq,@contigs[0].seq.to_s.upcase)
27
+ assert_instance_of(Bio::Sequence::NA,@contigs[0].seq)
28
+ end
29
+
30
+ def test_contig_qual
31
+ qual = "4 4 8 4 6 10 13 21 24 25 33 33 33 30 27 21 15 27 19 30 30 33 33 30 21 21 10 9 17 11 27 27 37 38 33 35 35 35 35 44 45 45 38 37 37 45 45 45 43 43 43 45 45 45 45 45 45 45 21 21 23 30 30 34 37 37 38 45 32 45 37 41 37 45 30 45 45 34 34 34 32 29 22 32 32 45 45 45 33 28 28 37 37 34 34 34 35 34 34 34 34 34 34 37 37 40 41 37 34 37 37 37 32 24 22 27 29 25 27 20 21 21 21 27 45 41 40 40 40 42 34 34 37 41 45 51 45 45 37 30 37 41 41 37 36 28 30 30 30 22 33 33 35 33 33 41 51 51 45 39 39 39 30 28 33 34 34 39 34 41 34 34 41 33 33 39 39 39 33 33 33 30 33 33 33 30 29 19 19 24 25 32 33 45 36 36 36 36 41 41 30 30 30 37 37 43 51 51 51 51 45 37 37 30 37 37 37 51 51 51 51 51 37 37 28 28 10 10 10 13 10 10 10 9 9 9 21 21 28 37 37 37 33 33 33 33 33 33 34 34 34 33 33 33 33 21 25 25 22 22 29 29 33 33 33 33 31 31 27 28 17 23 23 28 26 24 32 10 10 10 15 15 26 32 32 37 45 45 32 45 33 26 24 32 37 37 35 34 37 35 37 34 37 37 40 37 37 45 45 38 38 45 45 40 37 37 37 34 30 30 28 28 28 30 37 37 34 34 34 34 34 33 31 31 25 21 19 22 21 25 30 32 32 30 22 22 25 28 24 25 26 17 17 23 27 27 33 31 31 34 38 38 38 38 34 26 17 15 8 8 11 16 25 31 32 33 29 26 32 29 33 34 34 34 36 33 34 34 31 27 31 34 38 45 45 36 37 37 37 36 36 38 34 32 28 25 22 22 21 17 18 29 29 23 23 22 27 25 20 18 11 11 15 15 16 23 21 17 19 24 24 24 31 31 31 33 33 33 31 33 33 29 29 29 29 29 31 31 31 23 24 19 12 10 16 10 10 20 10 10 13 15 30 23 29 23 28 18 10 10 16 21 18 19 19 24 24 12 11 9 9 10 21 23 31 31 33 28 28 14 17 17 28 21 30 24 30 28 22 26 23 19 10 10 12 23 22 23 19 12 10 9 9 10 19 24 29 30 34 34 34 34 34 29 25 25 32 26 31 31 32 20 17 15 12 12 4 4 4 17 23 33 30 24 18 13 18 16 12 16 8 9 9 19 15 15 4 4 4 7 11 6 6 6 7 8 9 17 12 13 19 23 25 13 13 13 19 21 29 32 30 26 20 20 12 9 9 8 9 8 8 15 24 17 15 8 8 9 17 16 4 4 4 8 8 13 15 23 14 9 8 8 10 10 17 23 21 17 12 13 22 20 19 15 11 11 9 8 8 8 8 8 8 10 9 9 7 8 8 10 12 10 10 12 4 4 4 4 4 4 10 10 10 10 10 9 8 8 7 8 9 11 9 9 4"
32
+ assert_equal(qual,@contigs[1].quality)
33
+ end
34
+
35
+ def test_read
36
+ read = nil
37
+ assert_nothing_raised do
38
+ read = @contigs[0].find_read_by_name("22ak93c2.r1t")
39
+ end
40
+ seq = "GTCGCNCATAAGATTACGAGATCTCGAGCTCGGTACCCTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATAGACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATCCCANCACTTTGGGAGGCCAGTGCGGGTGAATCACGAAGTCAGGAGTTCAAGACCACCCTGGCCAACATGGTGTAACACCTGCTCTCCTAAAATTAAACAAAATTTCATGGTTTGCGTGGGCCGTCTTGTCTCATCACTTCACTCCTGAGGGCCGGCGCCGGAAAGATATCTTGATCTGCGGCGCTCCGACCGTTTTCTTTAAACCTTACAACTCCCGACCTCCTCGCCTATCCTCCCTAAATCCTCGCCAGGCTCGCCTGCTTCAGCCACTCTTTCCTTCGCACCCTCCCCTCTCTTCAATATACTTCACCCGCCCATCCTTCACGCCGGCACGTATCCAATCTCTTCTTATCTTTCCGTATCCAANTCCCTTCTCCCTCTGCCGCGACCTTCGCCATCCCTCTGCGCGTCCTCTTCC"
41
+ assert_equal(seq,read.seq.to_s.upcase)
42
+ assert_instance_of(Bio::Sequence::NA,read.seq)
43
+ qual = "4 4 8 4 4 4 4 4 4 4 4 4 6 8 17 21 14 7 6 6 6 7 7 6 8 14 16 21 15 20 20 24 26 21 18 18 14 14 19 23 10 8 8 15 20 16 29 26 34 29 39 29 31 29 31 34 32 27 27 25 19 19 24 31 33 36 34 34 34 26 27 22 32 32 36 28 28 15 15 15 28 28 34 30 12 12 22 27 31 31 31 31 31 23 24 27 21 24 24 29 27 27 27 34 34 36 38 38 38 36 36 40 36 37 38 45 45 36 34 33 31 31 34 34 33 33 28 28 27 23 24 11 11 10 10 18 25 21 20 17 17 17 20 15 24 18 24 26 23 23 18 20 25 23 30 30 30 33 33 37 37 32 37 37 32 45 35 37 37 37 40 36 49 49 36 36 34 33 20 15 9 9 8 7 12 22 21 28 28 30 33 36 36 36 34 31 31 25 31 28 26 26 24 20 17 9 11 8 9 10 23 23 31 23 23 15 9 9 15 33 26 33 33 31 25 25 22 31 24 23 12 10 12 11 9 8 9 7 7 8 8 9 18 12 9 9 18 20 26 31 21 21 9 8 8 11 13 21 21 23 15 15 15 15 15 17 17 9 7 9 19 20 21 21 25 25 25 25 25 23 23 9 9 9 21 16 24 24 24 24 26 33 33 33 31 31 27 15 17 7 4 4 4 16 20 27 33 34 34 23 15 14 8 9 6 6 9 1 14 16 8 11 15 23 25 34 36 31 33 16 16 6 6 6 9 8 14 9 11 11 9 13 13 10 8 10 9 9 7 8 20 20 20 14 14 10 10 10 16 8 8 6 6 8 9 10 7 8 8 8 8 6 8 6 6 8 12 9 8 7 13 10 8 8 9 8 8 8 9 6 6 6 6 8 6 6 6 6 6 10 11 10 12 12 10 7 6 6 7 6 6 7 8 7 6 6 7 6 8 6 6 8 8 8 8 6 6 6 8 6 6 8 6 6 11 15 9 9 9 9 9 9 9 9 9 9 8 9 9 6 6 6 7 9 6 6 6 7 7 7 8 9 7 7 9 9 11 8 11 11 11 9 10 9 8 8 8 10 6 6 9 8 6 6 6 6 8 8 9 11 9 8 8 8 8 8 8 8 9 9 9 10 10 8 6 6 8 6 6 10 11 10 6 6 8 6 6 6 6 8 11 8 9 8 8 8 8 8 9 6 6 9 8 8 8 6 6 13 15 12 12 8 8 6 6 8 9 8 8 9 8 8 6 6 6 8 8 6 6 6 8 8 8 6 6 8 6 6 8 6 6 9 8 8 6 6 8 8 6 6 8 6 7 8 9 10 11 11 10 9 9 8 10 9 10 10 8 9 13 8 8 8 13 13 11 7 7 7 11 8 8 10 11 10 9 9 14 7 7 10 10 10 8 8 8 8 6 7 8 8 6 6 6 6 6 6 6 6 6 8 9 10 8 8 6 6 8 8 8 8 8 8 8 8 8 8 6 6 8 8 9 10 10 4 4 4 6 7 6 6 6 6 10 8 8 9 15 11 6 6 8 8 8 8 8 8 9 9 8 9 7 7 8 6 6 8 13 10 8 8 7 6 8 6 6 8 6 7 8 8 8 8 8 8 8 4"
44
+ assert_equal(qual,read.quality)
45
+ assert_equal(39,read.clear_range_from)
46
+ assert_equal(331,read.clear_range_to)
47
+ assert_equal(1,read.from)
48
+ assert_equal(293,read.to)
49
+ assert_equal("Reverse",read.orientation)
50
+ assert_instance_of(Bio::Assembly::Caf::Read,read)
51
+ end
52
+
53
+
54
+ end
metadata CHANGED
@@ -5,24 +5,38 @@ version: !ruby/object:Gem::Version
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 2
10
- version: 0.0.2
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Chase Miller
14
+ - Francesco Strozzi
14
15
  autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2010-12-27 00:00:00 -05:00
19
+ date: 2011-01-12 00:00:00 -05:00
19
20
  default_executable:
20
21
  dependencies:
21
22
  - !ruby/object:Gem::Dependency
22
- type: :development
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 5
29
+ segments:
30
+ - 1
31
+ - 4
32
+ - 1
33
+ version: 1.4.1
34
+ type: :runtime
35
+ name: bio
36
+ version_requirements: *id001
23
37
  prerelease: false
24
- name: shoulda
25
- version_requirements: &id001 !ruby/object:Gem::Requirement
38
+ - !ruby/object:Gem::Dependency
39
+ requirement: &id002 !ruby/object:Gem::Requirement
26
40
  none: false
27
41
  requirements:
28
42
  - - ">="
@@ -31,12 +45,12 @@ dependencies:
31
45
  segments:
32
46
  - 0
33
47
  version: "0"
34
- requirement: *id001
35
- - !ruby/object:Gem::Dependency
36
48
  type: :development
49
+ name: shoulda
50
+ version_requirements: *id002
37
51
  prerelease: false
38
- name: bundler
39
- version_requirements: &id002 !ruby/object:Gem::Requirement
52
+ - !ruby/object:Gem::Dependency
53
+ requirement: &id003 !ruby/object:Gem::Requirement
40
54
  none: false
41
55
  requirements:
42
56
  - - ~>
@@ -47,12 +61,12 @@ dependencies:
47
61
  - 0
48
62
  - 0
49
63
  version: 1.0.0
50
- requirement: *id002
51
- - !ruby/object:Gem::Dependency
52
64
  type: :development
65
+ name: bundler
66
+ version_requirements: *id003
53
67
  prerelease: false
54
- name: jeweler
55
- version_requirements: &id003 !ruby/object:Gem::Requirement
68
+ - !ruby/object:Gem::Dependency
69
+ requirement: &id004 !ruby/object:Gem::Requirement
56
70
  none: false
57
71
  requirements:
58
72
  - - ~>
@@ -63,12 +77,12 @@ dependencies:
63
77
  - 5
64
78
  - 2
65
79
  version: 1.5.2
66
- requirement: *id003
67
- - !ruby/object:Gem::Dependency
68
80
  type: :development
81
+ name: jeweler
82
+ version_requirements: *id004
69
83
  prerelease: false
70
- name: rcov
71
- version_requirements: &id004 !ruby/object:Gem::Requirement
84
+ - !ruby/object:Gem::Dependency
85
+ requirement: &id005 !ruby/object:Gem::Requirement
72
86
  none: false
73
87
  requirements:
74
88
  - - ">="
@@ -77,12 +91,12 @@ dependencies:
77
91
  segments:
78
92
  - 0
79
93
  version: "0"
80
- requirement: *id004
81
- - !ruby/object:Gem::Dependency
82
94
  type: :development
95
+ name: rcov
96
+ version_requirements: *id005
83
97
  prerelease: false
84
- name: bio
85
- version_requirements: &id005 !ruby/object:Gem::Requirement
98
+ - !ruby/object:Gem::Dependency
99
+ requirement: &id006 !ruby/object:Gem::Requirement
86
100
  none: false
87
101
  requirements:
88
102
  - - ">="
@@ -93,7 +107,10 @@ dependencies:
93
107
  - 4
94
108
  - 1
95
109
  version: 1.4.1
96
- requirement: *id005
110
+ type: :development
111
+ name: bio
112
+ version_requirements: *id006
113
+ prerelease: false
97
114
  description: bioruby plugin to parse, write, and manipulate assembly data
98
115
  email: chmille4@gmail.com
99
116
  executables: []
@@ -112,13 +129,17 @@ files:
112
129
  - Rakefile
113
130
  - VERSION
114
131
  - bio-assembly.gemspec
132
+ - data/example.caf
115
133
  - data/example1.ace
116
134
  - lib/bio-assembly.rb
117
135
  - lib/bio-assembly/ace.rb
136
+ - lib/bio-assembly/caf.rb
118
137
  - lib/bio-assembly/contig.rb
138
+ - lib/bio-assembly/maf.rb
119
139
  - lib/bio-assembly/read.rb
120
140
  - test/helper.rb
121
- - test/test_bio-assembly.rb
141
+ - test/test_bio-assembly-ace.rb
142
+ - test/test_bio-assembly-caf.rb
122
143
  has_rdoc: true
123
144
  homepage: http://github.com/chmille4/bioruby-assembly
124
145
  licenses:
@@ -155,4 +176,5 @@ specification_version: 3
155
176
  summary: BioRuby Assembly plugin
156
177
  test_files:
157
178
  - test/helper.rb
158
- - test/test_bio-assembly.rb
179
+ - test/test_bio-assembly-ace.rb
180
+ - test/test_bio-assembly-caf.rb