bio-assembly 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,10 @@ require 'bio-assembly/read'
5
5
  module Bio
6
6
 
7
7
  class Assembly
8
- attr_accessor :contigs
9
-
8
+
10
9
  @@formats = { }
11
10
 
12
- def self.create(path, format)
11
+ def self.open(path, format)
13
12
  streamer = @@formats[format]
14
13
  if streamer
15
14
  streamer.new(path)
@@ -22,21 +21,11 @@ module Bio
22
21
  @@formats[name] = self
23
22
  end
24
23
 
25
- def contigs
26
- # use each_contig to stream large files
27
- parse_whole_file if @contigs.empty?
28
- @contigs
29
- end
30
-
31
24
  def each_contig
32
25
  # implemented by each format subclass
33
26
  end
34
27
 
35
28
  private
36
-
37
- def num_contigs
38
- contigs.size
39
- end
40
29
 
41
30
  def num_reads
42
31
  read_num = 0
@@ -52,4 +41,5 @@ module Bio
52
41
 
53
42
  end
54
43
 
55
- require 'bio-assembly/ace'
44
+ require 'bio-assembly/ace'
45
+ require 'bio-assembly/caf'
@@ -9,22 +9,15 @@ class Assembly
9
9
 
10
10
  def initialize(path)
11
11
  @file = File.new(path, 'r')
12
- @contigs = Array.new
13
12
  parse_as
14
13
  end
15
14
 
16
15
  def each_contig
17
- # check if file is already parsed
18
- if @total_num_contigs.to_i == @contigs.size
19
- @contigs.each{ |contig| yield contig }
20
- else
21
16
  each_identifier do |identifier, attrs|
22
17
  next unless identifier == 'CO'
23
18
  contig = parse_contig(attrs)
24
- @contigs.push contig
25
19
  yield(contig)
26
20
  end
27
- end
28
21
  end
29
22
 
30
23
  def to_ace
@@ -35,8 +28,9 @@ class Assembly
35
28
  end
36
29
 
37
30
  private
31
+
38
32
  def parse_contig(attrs)
39
- contig = Bio::Assembly::Contig.new
33
+ contig = Contig.new
40
34
  contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
41
35
  # keep track of the number of RD identifiers parsed
42
36
  @num_rds_parsed = 0
@@ -83,7 +77,7 @@ class Assembly
83
77
 
84
78
  # parse read meta data
85
79
  def parse_af(contig, attrs)
86
- read = Bio::Assembly::Read.new
80
+ read = Read.new
87
81
  read.name , read.orientation, read.from = attrs.split(" ")
88
82
  contig.add_read read
89
83
  end
@@ -139,11 +133,9 @@ class Assembly
139
133
  # parse run meta data - ignored
140
134
  def parse_ct(contig, attrs)
141
135
  end
142
-
143
- end # => end class Ace
144
136
 
145
- # open contig class and write ace specific methods for contig objects
146
- class Contig
137
+ # extend contig class and write ace specific methods for contig objects
138
+ class Contig < Bio::Assembly::Contig
147
139
 
148
140
  def to_ace
149
141
  ace = ""
@@ -176,10 +168,10 @@ class Assembly
176
168
  ace
177
169
  end
178
170
 
179
- end # => end Contig class
171
+ end # => end Contig class
180
172
 
181
- # open Read class to add ace specific methods for read objects
182
- class Read
173
+ # extend Read class to add ace specific methods for read objects
174
+ class Read < Bio::Assembly::Read
183
175
 
184
176
  attr_accessor :base_sequences
185
177
 
@@ -238,7 +230,7 @@ class Assembly
238
230
  end
239
231
 
240
232
  def <=>(other)
241
- unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
233
+ unless other.kind_of?(Bio::Assembly::Ace::Read::BaseSequence)
242
234
  raise "[Error] markers are not comparable"
243
235
  end
244
236
  if self.from == other.from
@@ -251,8 +243,8 @@ class Assembly
251
243
 
252
244
  end # => end BaseSequence Class
253
245
 
254
- end # => end Read Class
255
-
246
+ end # => end Read Class
247
+ end # => end class Ace
256
248
 
257
249
  end # => end class Assembly
258
250
  end # => end module Bio
@@ -0,0 +1,124 @@
1
+ module Bio
2
+ class Assembly
3
+ class Caf < Bio::Assembly
4
+
5
+ # register parser with superclass
6
+ register_parser :caf
7
+
8
+ def initialize(path)
9
+ @file = File.new(path, 'r')
10
+ end
11
+ # iterator that return one contig at a time
12
+ def each_contig
13
+ contig = Contig.new
14
+ feature = Hash.new
15
+ @file.each do |line|
16
+ feature = parse_blocks(line,feature) # search the file for CAF blocks like DNA and Sequence
17
+ if feature[:type] == :read and feature[:parsed]
18
+ read = convert_to_read(feature)
19
+ contig.add_read(read)
20
+ feature = Hash.new
21
+ elsif feature[:type] == :contig and feature[:parsed]
22
+ contig = convert_to_contig(contig,feature)
23
+ yield contig
24
+ contig = Contig.new
25
+ feature = Hash.new
26
+ end
27
+ end
28
+ end
29
+
30
+ class Contig < Bio::Assembly::Contig
31
+ end
32
+
33
+
34
+ class Read < Bio::Assembly::Read
35
+ attr_accessor :quality
36
+ end
37
+
38
+ private
39
+
40
+ def parse_blocks(line,feat)
41
+ keywords = line.split("\s")
42
+ case keywords[0]
43
+ when "DNA" then parse_dna(feat)
44
+ when "Sequence" then parse_seq(feat,line)
45
+ end
46
+ return feat
47
+ end
48
+
49
+ # parse DNA sequence and BaseQuality
50
+ def parse_dna(feat)
51
+ feat[:seq] = @file.gets("\n\n").tr("\n","")
52
+ newline = @file.gets
53
+ keywords = newline.split("\s")
54
+ feat[:qual] = @file.gets("\n\n").tr("\n"," ").rstrip if keywords[0] == "BaseQuality"
55
+ feat[:parsed] = true if feat[:type] == :contig
56
+ end
57
+
58
+ # parse Sequence information like Name, Clipping, Strand and Type
59
+ def parse_seq(feat,line)
60
+ feat[:name] = line.split(":")[1].tr("\s|\n","")
61
+ sequence_block = @file.gets("\n\n")
62
+ sequence_block.split("\n").each do |l|
63
+ keywords = l.split("\s")
64
+ case keywords[0]
65
+ when "Clipping" then parse_clipping(feat,l)
66
+ when "Strand" then parse_strand(feat,l)
67
+ when "Assembled_from" then parse_af(feat,l)
68
+ when "Is_read" then feat[:type] = :read
69
+ when "Is_contig" then feat[:type] = :contig
70
+ end
71
+ end
72
+ feat[:parsed] = true if feat[:type] == :read
73
+ end
74
+
75
+ # parse read coordinates for quality clipping
76
+ def parse_clipping(feat,line)
77
+ val = line.chomp.split("\s")
78
+ feat[:clipping_start] = val[-2]
79
+ feat[:clipping_end] = val[-1]
80
+ end
81
+
82
+ # parse sequence strand information
83
+ def parse_strand(feat,line)
84
+ feat[:orientation] = line.split("\s")[1].tr("\n","")
85
+ end
86
+
87
+ # parse Assembled_from lines in Contig. These lines also include read alignment positions within the contig
88
+ def parse_af(feat,line)
89
+ if feat[:af].nil?
90
+ feat[:af] = [line]
91
+ else
92
+ feat[:af] << line
93
+ end
94
+ end
95
+
96
+ # convert a generic feature into a Caf::Read object
97
+ def convert_to_read(feature)
98
+ read = Read.new
99
+ read.name = feature[:name]
100
+ read.seq = feature[:seq]
101
+ read.quality = feature[:qual]
102
+ read.clear_range_from = feature[:clipping_start]
103
+ read.clear_range_to = feature[:clipping_end]
104
+ read.orientation = feature[:orientation]
105
+ return read
106
+ end
107
+
108
+ # convert a generic feature into a Caf::Contig object
109
+ def convert_to_contig(contig,feature)
110
+ contig.name = feature[:name]
111
+ contig.seq = feature[:seq]
112
+ contig.quality = feature[:qual]
113
+ # assign reads ranges using Assembled_from lines in Contig
114
+ feature[:af].each do |af|
115
+ val = af.split("\s")
116
+ contig.reads[val[-5]].from = val[-4]
117
+ contig.reads[val[-5]].to = val[-3]
118
+ end
119
+ return contig
120
+ end
121
+
122
+ end # end Caf
123
+ end # end Assembly
124
+ end # end Bio
@@ -10,7 +10,7 @@ module Bio
10
10
  def initialize(str="")
11
11
  @reads = Hash.new
12
12
  @seq = Bio::Sequence::NA.new(str)
13
- # counter for RD identifier
13
+ # counter for Reads
14
14
  @rds_parsed = 0
15
15
  end
16
16
 
@@ -54,6 +54,10 @@ module Bio
54
54
  seq.length
55
55
  end
56
56
 
57
+ def seq=(str)
58
+ @seq = Bio::Sequence::NA.new(str)
59
+ end
60
+
57
61
  def num_base_segments
58
62
  num_base_sequences = 0
59
63
  each_read do |read|
@@ -0,0 +1,15 @@
1
+ module Bio
2
+ class Assembly
3
+ class Maf < Bio::Assembly
4
+
5
+ # register parser with superclass
6
+ register_parser :maf
7
+
8
+ def initialize(path)
9
+ @file = File.new(path, 'r')
10
+ # TO DO
11
+ end
12
+
13
+ end # end Maf
14
+ end # end Assembly
15
+ end # end Bio
@@ -26,6 +26,10 @@ module Bio
26
26
  @to = new_to.to_i
27
27
  end
28
28
 
29
+ def seq=(str)
30
+ @seq = Bio::Sequence::NA.new(str)
31
+ end
32
+
29
33
  def clear_range_from=(new_clear_range_from)
30
34
  @clear_range_from = new_clear_range_from.to_i
31
35
  end
@@ -1,14 +1,18 @@
1
1
  require 'helper'
2
2
 
3
- class TestBioAssembly < Test::Unit::TestCase
3
+ class TestBioAssemblyAce < Test::Unit::TestCase
4
4
 
5
5
  def setup
6
6
  ace_filename = File.join('data', 'example1.ace')
7
- @obj = Bio::Assembly.create(ace_filename, :ace)
7
+ @obj = Bio::Assembly.open(ace_filename, :ace)
8
8
 
9
9
  # pick a contig to do in depth tests on
10
10
  @contig = nil
11
- @obj.each_contig { |c| @contig = c if c.name.to_i == 5 }
11
+ @tot_contig = []
12
+ @obj.each_contig do |c|
13
+ @contig = c if c.name.to_i == 5
14
+ @tot_contig << c
15
+ end
12
16
 
13
17
  # pick a read to do in depth tests on
14
18
  @read = nil
@@ -17,13 +21,13 @@ class TestBioAssembly < Test::Unit::TestCase
17
21
 
18
22
  def test_num_contigs_parsed
19
23
  contigs_parsed = 13
20
- assert_equal(contigs_parsed, @obj.contigs.size)
24
+ assert_equal(contigs_parsed, @tot_contig.size)
21
25
  end
22
26
 
23
27
  def test_num_reads_parsed
24
28
  reads_parsed_known = 1760
25
29
  reads_parsed = 0
26
- @obj.contigs.each { |c| reads_parsed += c.reads.size }
30
+ @tot_contig.each { |c| reads_parsed += c.reads.size }
27
31
  assert_equal(reads_parsed_known, reads_parsed)
28
32
  end
29
33
 
@@ -34,12 +38,16 @@ class TestBioAssembly < Test::Unit::TestCase
34
38
 
35
39
  def test_contig_seq
36
40
  seq = "TTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTA*TTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTTAATTTCAAGATGATTCACAGGTTTGTTGCCTCAAAAGAAAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGACAAGTTGACTCAACTTTTCAC*TTTATGTTATATTGTAAGGATGTGACTTTGTTTTGGAAAATTATATTTAATTTGATAATTAACCAATATAAAAAAGATAAACCAAAAGCTATAAGTCGTAAATAAGGACATTGGAAACAAGAAATATTCTCTCCTGAACATTATTTTAAATTATGCGCAATATGCAAATTTATAAGTGTTAAGTTAAAAAGATTGTTAATGGTTCTGTTTATTACCCAAAGACTTTTTTAAAGTTTAAGTCGTTGCTAAGAGTGCAGCGTTTAGACAAATAAAAATGCAATAATCTTCTCGCTCGGGAGCTATGTCCCTCGCATAATATTCTTCAAAGTGTACAGTAAATATTCTAGAAAAGTGAAGTGTGAAAAAGATATATTGCTTGTTTTTATATTTTGTTAATACAACAAAACTTCAAAAACCTGCGGTGGGGGGGGGGGGATAGTCACTTCCGTCACCTTCACCCCTCTCGTTCACTATACTCCCTCGCCCTGGCGTAATGATGGGGGGATTGGGGGTAGTTGCCCCTTAATAAAGTTCAAACTTGATTTATTTCTAACTCGATACCAGTGATTTACAAATGTTTCTGAAATGGCATGGTTTTCCCTAATAAATGCCTAAAAACCCTGAGCTGAGCCCACGCCAATT"
37
- assert_equal(seq, @contig.seq.to_s)
41
+ assert_equal(seq, @contig.seq.to_s.upcase)
42
+ assert_instance_of(Bio::Sequence::NA,@contig.seq)
43
+ assert_instance_of(Bio::Assembly::Ace::Contig,@contig)
38
44
  end
39
45
 
40
46
  def test_read_seq
41
47
  read_seq = 'GAAAAAAAAAGGCAGAAGTTTAATCAAAACGGATTTTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTATTTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTT*ATTTC*AGATGATTCACAGGTTTGTTGCCTCAAAAG*AAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGGACAAGTTGACTTCACCT'
42
- assert_equal(read_seq, @read.seq.to_s)
48
+ assert_equal(read_seq, @read.seq.to_s.upcase)
49
+ assert_instance_of(Bio::Sequence::NA,@read.seq)
50
+ assert_instance_of(Bio::Assembly::Ace::Read,@read)
43
51
  end
44
52
 
45
53
  def test_read_range
@@ -0,0 +1,54 @@
1
+ require 'helper'
2
+
3
+ class TestBioAssemblyCaf < Test::Unit::TestCase
4
+
5
+ def setup
6
+ path = File.join('data','example.caf')
7
+ @caf = Bio::Assembly.open(path,:caf)
8
+ @contigs = []
9
+ @caf.each_contig {|c| @contigs << c}
10
+ end
11
+
12
+ def test_contigs
13
+ assert_equal(2,@contigs.size)
14
+ assert_equal('Contig1',@contigs[0].name)
15
+ assert_equal('Contig2',@contigs[1].name)
16
+ assert_instance_of(Bio::Assembly::Caf::Contig,@contigs[0])
17
+ end
18
+
19
+ def test_read_per_contig
20
+ assert_equal(21,@contigs[0].reads.size)
21
+ assert_equal(21,@contigs[0].num_reads)
22
+ end
23
+
24
+ def test_contig_seq
25
+ seq = "TTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATANACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATNNNNNCACTTTGGGAGGCCAGTGCGGGTGAATCACGANGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAANCACCGTCTCTACTAAAATAAAAAGAAATTAGCTGGGTGTCGTGGCAGGTGCNTGTAATCCCAGCCACTTGGGAAGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCCGAGATCACGCGACTGCACTCCAGCCCAACCAATAGTGTGAGACTCTGTCTCGAAAAAAAAAAAGCAGAGACAAGACNACTAGTACAGTACTTACAGGGTTATTATGATGATTAAATGAGAGAATAGCTGTGAGGTGATTGATATAGTGCTGTGCTTAATACAAACTATCATTTTATTATACGGGTTGAGTGTNTCTAATCTGAAAATCCAAAATTAGAAATGCTCTACAGTCTGAAACTTTTTTGAGCACCGACCTAATGTTCAAAGGAAGTGCTTATTGGAGCATTATGGGTTGTTAGATTTTTGGGTTGGGAATATTCAACCAGTAAGTACTATAAAATGCAAATATTCCAAAAAAAATCTGAAATCTGAAACATTTCTGGTCCTAAGCAAGCATTTTGCAAAGGGATACGCAACCTGTAGTACGTTCTTTATCATTGTTTTAAGTAGTTAATATATTGTGGTACAGATTCTGAGGTGGTATAGCAAATTCGATTGTATTATTAAAAAGCATATTTATATTTTGAGAGCTTGCTTAGGATTATTGGAGAGAATAAAACAGTGAAGCTTTGGTGTTATGAGGGAATTTTAGATAGAAAAGTGCAGTTTTTCAGTTCATGCTCTTTCATTTTTTACTCCCTCAGGTTAAAGCTNGAAGCTCAACAAAGATATAGTGATCTCTGTGGGCATTTATAATCTGGTCCAGAAGGCTCTNNANNCNNNTCCNNNNNNNCTNNANNNNNNNACAAATGAACCAGTGAAAACCAAGACCCGGACCTTTAATACAAGTACAGGCGGTTTGCTTCTGCCTAGCGATACCAAGAGGTCTCAGGTAGGTAGAGATGCCTTTTGTTGTTGTTGTTTTTGAGACAGGGTCTCATTGTGTCGCCCAGGCTGGAGTGCAGTGGGGCGAACATGACTCGCTACAGCCTTGACCTCCTGGACTCAAGCGATCCTTCTGTCTCAGCCTCCCAAGTAGCTGGGATCACAGGCATGTGACATCACACCCAGCTAATTTATTTATTTATTTATTTTTTAAGAGACTGGATCGACTGGGCACAGTGGCTCATGCCTGTAATCCCANCACTTTGGGAGGCCGAGGCAGGTGGATTACCGAGGTCAGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCATCTCTACTAAAAATACAAAATGAGCTGGGCATGGTGGTGCATGCCTGTAATCC"
26
+ assert_equal(seq,@contigs[0].seq.to_s.upcase)
27
+ assert_instance_of(Bio::Sequence::NA,@contigs[0].seq)
28
+ end
29
+
30
+ def test_contig_qual
31
+ qual = "4 4 8 4 6 10 13 21 24 25 33 33 33 30 27 21 15 27 19 30 30 33 33 30 21 21 10 9 17 11 27 27 37 38 33 35 35 35 35 44 45 45 38 37 37 45 45 45 43 43 43 45 45 45 45 45 45 45 21 21 23 30 30 34 37 37 38 45 32 45 37 41 37 45 30 45 45 34 34 34 32 29 22 32 32 45 45 45 33 28 28 37 37 34 34 34 35 34 34 34 34 34 34 37 37 40 41 37 34 37 37 37 32 24 22 27 29 25 27 20 21 21 21 27 45 41 40 40 40 42 34 34 37 41 45 51 45 45 37 30 37 41 41 37 36 28 30 30 30 22 33 33 35 33 33 41 51 51 45 39 39 39 30 28 33 34 34 39 34 41 34 34 41 33 33 39 39 39 33 33 33 30 33 33 33 30 29 19 19 24 25 32 33 45 36 36 36 36 41 41 30 30 30 37 37 43 51 51 51 51 45 37 37 30 37 37 37 51 51 51 51 51 37 37 28 28 10 10 10 13 10 10 10 9 9 9 21 21 28 37 37 37 33 33 33 33 33 33 34 34 34 33 33 33 33 21 25 25 22 22 29 29 33 33 33 33 31 31 27 28 17 23 23 28 26 24 32 10 10 10 15 15 26 32 32 37 45 45 32 45 33 26 24 32 37 37 35 34 37 35 37 34 37 37 40 37 37 45 45 38 38 45 45 40 37 37 37 34 30 30 28 28 28 30 37 37 34 34 34 34 34 33 31 31 25 21 19 22 21 25 30 32 32 30 22 22 25 28 24 25 26 17 17 23 27 27 33 31 31 34 38 38 38 38 34 26 17 15 8 8 11 16 25 31 32 33 29 26 32 29 33 34 34 34 36 33 34 34 31 27 31 34 38 45 45 36 37 37 37 36 36 38 34 32 28 25 22 22 21 17 18 29 29 23 23 22 27 25 20 18 11 11 15 15 16 23 21 17 19 24 24 24 31 31 31 33 33 33 31 33 33 29 29 29 29 29 31 31 31 23 24 19 12 10 16 10 10 20 10 10 13 15 30 23 29 23 28 18 10 10 16 21 18 19 19 24 24 12 11 9 9 10 21 23 31 31 33 28 28 14 17 17 28 21 30 24 30 28 22 26 23 19 10 10 12 23 22 23 19 12 10 9 9 10 19 24 29 30 34 34 34 34 34 29 25 25 32 26 31 31 32 20 17 15 12 12 4 4 4 17 23 33 30 24 18 13 18 16 12 16 8 9 9 19 15 15 4 4 4 7 11 6 6 6 7 8 9 17 12 13 19 23 25 13 13 13 19 21 29 32 30 26 20 20 12 9 9 8 9 8 8 15 24 17 15 8 8 9 17 16 4 4 4 8 8 13 15 23 14 9 8 8 10 10 17 23 21 17 12 13 22 20 19 15 11 11 9 8 8 8 8 8 8 10 9 9 7 8 8 10 12 10 10 12 4 4 4 4 4 4 10 10 10 10 10 9 8 8 7 8 9 11 9 9 4"
32
+ assert_equal(qual,@contigs[1].quality)
33
+ end
34
+
35
+ def test_read
36
+ read = nil
37
+ assert_nothing_raised do
38
+ read = @contigs[0].find_read_by_name("22ak93c2.r1t")
39
+ end
40
+ seq = "GTCGCNCATAAGATTACGAGATCTCGAGCTCGGTACCCTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTATAGACTGTGCGTGCGCCACCATGCCTGGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTCGAGCTCCTGACCTAGGATTACAGGCCTAAGCCACCGCACCCGGCATGATGGGTCTTTATTCTTCAAAGCAGGAGGAAGGGATCCTAGAAAAACAGAGACAAGGCCAAACATGGTAGCTCACACCTGTAATCCCANCACTTTGGGAGGCCAGTGCGGGTGAATCACGAAGTCAGGAGTTCAAGACCACCCTGGCCAACATGGTGTAACACCTGCTCTCCTAAAATTAAACAAAATTTCATGGTTTGCGTGGGCCGTCTTGTCTCATCACTTCACTCCTGAGGGCCGGCGCCGGAAAGATATCTTGATCTGCGGCGCTCCGACCGTTTTCTTTAAACCTTACAACTCCCGACCTCCTCGCCTATCCTCCCTAAATCCTCGCCAGGCTCGCCTGCTTCAGCCACTCTTTCCTTCGCACCCTCCCCTCTCTTCAATATACTTCACCCGCCCATCCTTCACGCCGGCACGTATCCAATCTCTTCTTATCTTTCCGTATCCAANTCCCTTCTCCCTCTGCCGCGACCTTCGCCATCCCTCTGCGCGTCCTCTTCC"
41
+ assert_equal(seq,read.seq.to_s.upcase)
42
+ assert_instance_of(Bio::Sequence::NA,read.seq)
43
+ qual = "4 4 8 4 4 4 4 4 4 4 4 4 6 8 17 21 14 7 6 6 6 7 7 6 8 14 16 21 15 20 20 24 26 21 18 18 14 14 19 23 10 8 8 15 20 16 29 26 34 29 39 29 31 29 31 34 32 27 27 25 19 19 24 31 33 36 34 34 34 26 27 22 32 32 36 28 28 15 15 15 28 28 34 30 12 12 22 27 31 31 31 31 31 23 24 27 21 24 24 29 27 27 27 34 34 36 38 38 38 36 36 40 36 37 38 45 45 36 34 33 31 31 34 34 33 33 28 28 27 23 24 11 11 10 10 18 25 21 20 17 17 17 20 15 24 18 24 26 23 23 18 20 25 23 30 30 30 33 33 37 37 32 37 37 32 45 35 37 37 37 40 36 49 49 36 36 34 33 20 15 9 9 8 7 12 22 21 28 28 30 33 36 36 36 34 31 31 25 31 28 26 26 24 20 17 9 11 8 9 10 23 23 31 23 23 15 9 9 15 33 26 33 33 31 25 25 22 31 24 23 12 10 12 11 9 8 9 7 7 8 8 9 18 12 9 9 18 20 26 31 21 21 9 8 8 11 13 21 21 23 15 15 15 15 15 17 17 9 7 9 19 20 21 21 25 25 25 25 25 23 23 9 9 9 21 16 24 24 24 24 26 33 33 33 31 31 27 15 17 7 4 4 4 16 20 27 33 34 34 23 15 14 8 9 6 6 9 1 14 16 8 11 15 23 25 34 36 31 33 16 16 6 6 6 9 8 14 9 11 11 9 13 13 10 8 10 9 9 7 8 20 20 20 14 14 10 10 10 16 8 8 6 6 8 9 10 7 8 8 8 8 6 8 6 6 8 12 9 8 7 13 10 8 8 9 8 8 8 9 6 6 6 6 8 6 6 6 6 6 10 11 10 12 12 10 7 6 6 7 6 6 7 8 7 6 6 7 6 8 6 6 8 8 8 8 6 6 6 8 6 6 8 6 6 11 15 9 9 9 9 9 9 9 9 9 9 8 9 9 6 6 6 7 9 6 6 6 7 7 7 8 9 7 7 9 9 11 8 11 11 11 9 10 9 8 8 8 10 6 6 9 8 6 6 6 6 8 8 9 11 9 8 8 8 8 8 8 8 9 9 9 10 10 8 6 6 8 6 6 10 11 10 6 6 8 6 6 6 6 8 11 8 9 8 8 8 8 8 9 6 6 9 8 8 8 6 6 13 15 12 12 8 8 6 6 8 9 8 8 9 8 8 6 6 6 8 8 6 6 6 8 8 8 6 6 8 6 6 8 6 6 9 8 8 6 6 8 8 6 6 8 6 7 8 9 10 11 11 10 9 9 8 10 9 10 10 8 9 13 8 8 8 13 13 11 7 7 7 11 8 8 10 11 10 9 9 14 7 7 10 10 10 8 8 8 8 6 7 8 8 6 6 6 6 6 6 6 6 6 8 9 10 8 8 6 6 8 8 8 8 8 8 8 8 8 8 6 6 8 8 9 10 10 4 4 4 6 7 6 6 6 6 10 8 8 9 15 11 6 6 8 8 8 8 8 8 9 9 8 9 7 7 8 6 6 8 13 10 8 8 7 6 8 6 6 8 6 7 8 8 8 8 8 8 8 4"
44
+ assert_equal(qual,read.quality)
45
+ assert_equal(39,read.clear_range_from)
46
+ assert_equal(331,read.clear_range_to)
47
+ assert_equal(1,read.from)
48
+ assert_equal(293,read.to)
49
+ assert_equal("Reverse",read.orientation)
50
+ assert_instance_of(Bio::Assembly::Caf::Read,read)
51
+ end
52
+
53
+
54
+ end
metadata CHANGED
@@ -5,24 +5,38 @@ version: !ruby/object:Gem::Version
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 2
10
- version: 0.0.2
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Chase Miller
14
+ - Francesco Strozzi
14
15
  autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2010-12-27 00:00:00 -05:00
19
+ date: 2011-01-12 00:00:00 -05:00
19
20
  default_executable:
20
21
  dependencies:
21
22
  - !ruby/object:Gem::Dependency
22
- type: :development
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 5
29
+ segments:
30
+ - 1
31
+ - 4
32
+ - 1
33
+ version: 1.4.1
34
+ type: :runtime
35
+ name: bio
36
+ version_requirements: *id001
23
37
  prerelease: false
24
- name: shoulda
25
- version_requirements: &id001 !ruby/object:Gem::Requirement
38
+ - !ruby/object:Gem::Dependency
39
+ requirement: &id002 !ruby/object:Gem::Requirement
26
40
  none: false
27
41
  requirements:
28
42
  - - ">="
@@ -31,12 +45,12 @@ dependencies:
31
45
  segments:
32
46
  - 0
33
47
  version: "0"
34
- requirement: *id001
35
- - !ruby/object:Gem::Dependency
36
48
  type: :development
49
+ name: shoulda
50
+ version_requirements: *id002
37
51
  prerelease: false
38
- name: bundler
39
- version_requirements: &id002 !ruby/object:Gem::Requirement
52
+ - !ruby/object:Gem::Dependency
53
+ requirement: &id003 !ruby/object:Gem::Requirement
40
54
  none: false
41
55
  requirements:
42
56
  - - ~>
@@ -47,12 +61,12 @@ dependencies:
47
61
  - 0
48
62
  - 0
49
63
  version: 1.0.0
50
- requirement: *id002
51
- - !ruby/object:Gem::Dependency
52
64
  type: :development
65
+ name: bundler
66
+ version_requirements: *id003
53
67
  prerelease: false
54
- name: jeweler
55
- version_requirements: &id003 !ruby/object:Gem::Requirement
68
+ - !ruby/object:Gem::Dependency
69
+ requirement: &id004 !ruby/object:Gem::Requirement
56
70
  none: false
57
71
  requirements:
58
72
  - - ~>
@@ -63,12 +77,12 @@ dependencies:
63
77
  - 5
64
78
  - 2
65
79
  version: 1.5.2
66
- requirement: *id003
67
- - !ruby/object:Gem::Dependency
68
80
  type: :development
81
+ name: jeweler
82
+ version_requirements: *id004
69
83
  prerelease: false
70
- name: rcov
71
- version_requirements: &id004 !ruby/object:Gem::Requirement
84
+ - !ruby/object:Gem::Dependency
85
+ requirement: &id005 !ruby/object:Gem::Requirement
72
86
  none: false
73
87
  requirements:
74
88
  - - ">="
@@ -77,12 +91,12 @@ dependencies:
77
91
  segments:
78
92
  - 0
79
93
  version: "0"
80
- requirement: *id004
81
- - !ruby/object:Gem::Dependency
82
94
  type: :development
95
+ name: rcov
96
+ version_requirements: *id005
83
97
  prerelease: false
84
- name: bio
85
- version_requirements: &id005 !ruby/object:Gem::Requirement
98
+ - !ruby/object:Gem::Dependency
99
+ requirement: &id006 !ruby/object:Gem::Requirement
86
100
  none: false
87
101
  requirements:
88
102
  - - ">="
@@ -93,7 +107,10 @@ dependencies:
93
107
  - 4
94
108
  - 1
95
109
  version: 1.4.1
96
- requirement: *id005
110
+ type: :development
111
+ name: bio
112
+ version_requirements: *id006
113
+ prerelease: false
97
114
  description: bioruby plugin to parse, write, and manipulate assembly data
98
115
  email: chmille4@gmail.com
99
116
  executables: []
@@ -112,13 +129,17 @@ files:
112
129
  - Rakefile
113
130
  - VERSION
114
131
  - bio-assembly.gemspec
132
+ - data/example.caf
115
133
  - data/example1.ace
116
134
  - lib/bio-assembly.rb
117
135
  - lib/bio-assembly/ace.rb
136
+ - lib/bio-assembly/caf.rb
118
137
  - lib/bio-assembly/contig.rb
138
+ - lib/bio-assembly/maf.rb
119
139
  - lib/bio-assembly/read.rb
120
140
  - test/helper.rb
121
- - test/test_bio-assembly.rb
141
+ - test/test_bio-assembly-ace.rb
142
+ - test/test_bio-assembly-caf.rb
122
143
  has_rdoc: true
123
144
  homepage: http://github.com/chmille4/bioruby-assembly
124
145
  licenses:
@@ -155,4 +176,5 @@ specification_version: 3
155
176
  summary: BioRuby Assembly plugin
156
177
  test_files:
157
178
  - test/helper.rb
158
- - test/test_bio-assembly.rb
179
+ - test/test_bio-assembly-ace.rb
180
+ - test/test_bio-assembly-caf.rb