bio-assembly 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +22 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-assembly.gemspec +71 -0
- data/data/example1.ace +44901 -0
- data/lib/bio-assembly.rb +169 -0
- data/lib/bio-assembly/contig.rb +97 -0
- data/lib/bio-assembly/read.rb +93 -0
- data/lib/bio-assembly/read/ace.rb +39 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-assembly.rb +78 -0
- metadata +158 -0
data/lib/bio-assembly.rb
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
|
2
|
+
require 'bio/sequence'
|
3
|
+
require 'bio-assembly/contig'
|
4
|
+
require 'bio-assembly/read'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
|
8
|
+
class Assembly
|
9
|
+
attr_accessor :contigs
|
10
|
+
|
11
|
+
def initialize(path)
|
12
|
+
@file = File.new(path, 'r')
|
13
|
+
@contigs = Array.new
|
14
|
+
parse_as
|
15
|
+
end
|
16
|
+
|
17
|
+
def contigs
|
18
|
+
# use each_contig to stream large files
|
19
|
+
parse_whole_file if @contigs.empty?
|
20
|
+
@contigs
|
21
|
+
end
|
22
|
+
|
23
|
+
def each_contig
|
24
|
+
# check if file is already parsed
|
25
|
+
if @total_num_contigs.to_i == @contigs.size
|
26
|
+
@contigs.each{ |contig| yield contig }
|
27
|
+
else
|
28
|
+
each_identifier do |identifier, attrs|
|
29
|
+
next unless identifier == 'CO'
|
30
|
+
contig = parse_contig(attrs)
|
31
|
+
@contigs.push contig
|
32
|
+
yield(contig)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_ace
|
38
|
+
ace = ""
|
39
|
+
ace += "AS " + num_contigs.to_s + " " + num_reads.to_s + "\n\n"
|
40
|
+
each_contig { |contig| ace += contig.to_ace + "\n" }
|
41
|
+
ace
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def parse_contig(attrs)
|
47
|
+
contig = Bio::Assembly::Contig.new
|
48
|
+
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
49
|
+
# keep track of the number of RD identifiers parsed
|
50
|
+
@num_rds_parsed = 0
|
51
|
+
|
52
|
+
# get sequence
|
53
|
+
seq = @file.gets("\n\n").tr(" \r\n", "")
|
54
|
+
contig.seq = seq
|
55
|
+
|
56
|
+
# loop through identifiers (e.g AF, RD, etc)
|
57
|
+
each_identifier do |identifier, attrs|
|
58
|
+
case identifier
|
59
|
+
when "BQ" then parse_bq(contig)
|
60
|
+
when "AF" then parse_af(contig, attrs)
|
61
|
+
when "BS" then parse_bs(contig, attrs)
|
62
|
+
when "RD" then parse_rd(contig, attrs); break if @num_rds_parsed == @num_reads.to_i
|
63
|
+
when "WR" then parse_wr(contig, attrs)
|
64
|
+
when "RT" then parse_rt(contig, attrs)
|
65
|
+
when "CT" then parse_ct(contig, attrs)
|
66
|
+
when "WA" then parse_wa(contig, attrs)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
contig
|
71
|
+
end
|
72
|
+
|
73
|
+
# Finds the next_identifier
|
74
|
+
def each_identifier
|
75
|
+
@file.each do |line|
|
76
|
+
next if line !~ /^[ABCDQRW][ADFOQRST][\s\n].*/
|
77
|
+
yield(line[0..1], line[3..-1])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# parse assembly meta data
|
82
|
+
def parse_as
|
83
|
+
line = @file.gets
|
84
|
+
identifier, @total_num_contigs, total_num_reads = line.split(" ")
|
85
|
+
end
|
86
|
+
|
87
|
+
# parse contig sequence quality data
|
88
|
+
def parse_bq(contig)
|
89
|
+
contig.quality = @file.gets("\n\n").tr("\r\n", "").gsub(/^\s/, "").split(' ')
|
90
|
+
end
|
91
|
+
|
92
|
+
# parse read meta data
|
93
|
+
def parse_af(contig, attrs)
|
94
|
+
read = Bio::Assembly::Read.new
|
95
|
+
read.name , read.orientation, read.from = attrs.split(" ")
|
96
|
+
contig.add_read read
|
97
|
+
end
|
98
|
+
|
99
|
+
# parse base sequence data
|
100
|
+
def parse_bs(contig, attrs)
|
101
|
+
from, to, read_name = attrs.split(" ")
|
102
|
+
read = contig.find_read_by_name( read_name )
|
103
|
+
read.add_base_sequence(from, to, read_name)
|
104
|
+
end
|
105
|
+
|
106
|
+
# parse read sequence and position data
|
107
|
+
def parse_rd(contig, attrs)
|
108
|
+
# increment counter
|
109
|
+
@num_rds_parsed += 1
|
110
|
+
|
111
|
+
# parse read
|
112
|
+
read_name, num_padded_bases, num_read_infos, num_read_tags = attrs.split(" ")
|
113
|
+
seq = @file.gets("\n\n").tr( " \r\n", "")
|
114
|
+
|
115
|
+
# get read with matching name
|
116
|
+
read = contig.find_read_by_name( read_name )
|
117
|
+
read.seq = seq
|
118
|
+
read.to = read.from.to_i + read.seq.length
|
119
|
+
# set read.to to contig length if read runs off contig
|
120
|
+
read.to = contig.seq.length if read.to > contig.seq.length
|
121
|
+
|
122
|
+
# if present parse QA and DS associated with this read
|
123
|
+
each_identifier do |identifier, attrs|
|
124
|
+
case identifier
|
125
|
+
when "QA" then parse_qa(read, attrs)
|
126
|
+
when "DS" then parse_ds(read, attrs); break
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
# parse a read's clear ranges (the part of the read that contributes to the contig)
|
133
|
+
def parse_qa(read, attrs)
|
134
|
+
start, stop, clear_range_from, clear_range_to = attrs.split(" ")
|
135
|
+
read.clear_range_from = clear_range_from
|
136
|
+
read.clear_range_to = clear_range_to
|
137
|
+
end
|
138
|
+
|
139
|
+
# parse file data - ignored
|
140
|
+
def parse_ds(read, attrs)
|
141
|
+
end
|
142
|
+
|
143
|
+
# parse run meta data - ignored
|
144
|
+
def parse_wa(contig, attrs)
|
145
|
+
end
|
146
|
+
|
147
|
+
# parse run meta data - ignored
|
148
|
+
def parse_ct(contig, attrs)
|
149
|
+
end
|
150
|
+
|
151
|
+
def num_contigs
|
152
|
+
contigs.size
|
153
|
+
end
|
154
|
+
|
155
|
+
def num_reads
|
156
|
+
read_num = 0
|
157
|
+
each_contig { |contig| read_num += contig.num_reads }
|
158
|
+
read_num
|
159
|
+
end
|
160
|
+
|
161
|
+
def parse_whole_file
|
162
|
+
each_contig { |x| 1 }
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module Bio
|
2
|
+
class Assembly
|
3
|
+
|
4
|
+
class Contig
|
5
|
+
attr_accessor :seq, :orientation, :quality, :to, :from, :name, :reads
|
6
|
+
alias consensus_seq seq
|
7
|
+
|
8
|
+
def initialize(str="")
|
9
|
+
@reads = Hash.new
|
10
|
+
@seq = Bio::Sequence::NA.new(str)
|
11
|
+
# counter for RD identifier
|
12
|
+
@rds_parsed = 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def find_read_by_name(name)
|
16
|
+
@reads[name]
|
17
|
+
end
|
18
|
+
|
19
|
+
def find_reads_in_range(clear_range_from, clear_range_to)
|
20
|
+
reads_in_range = Array.new
|
21
|
+
each_read do |read|
|
22
|
+
|
23
|
+
# Read starts in region
|
24
|
+
if read.from+read.clear_range_from > clear_range_from and read.from+read.clear_range_from < clear_range_to
|
25
|
+
reads_in_range.push read
|
26
|
+
# Read ends in region
|
27
|
+
elsif read.to+read.clear_range_to < clear_range_to and read.to+read.clear_range_to > clear_range_from
|
28
|
+
reads_in_range.push read
|
29
|
+
# Read encompasses region
|
30
|
+
elsif read.from+read.clear_range_from < clear_range_from and read.to+read.clear_range_to > clear_range_to
|
31
|
+
reads_in_range.push read
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
reads_in_range;
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_read(read)
|
39
|
+
# TODO do some checks for pos location
|
40
|
+
@reads[read.name] = read
|
41
|
+
end
|
42
|
+
|
43
|
+
def each_read
|
44
|
+
@reads.each_value { |read| yield read }
|
45
|
+
end
|
46
|
+
|
47
|
+
def num_reads
|
48
|
+
@reads.size
|
49
|
+
end
|
50
|
+
|
51
|
+
def num_bases
|
52
|
+
seq.length
|
53
|
+
end
|
54
|
+
|
55
|
+
def num_base_segments
|
56
|
+
num_base_sequences = 0
|
57
|
+
each_read do |read|
|
58
|
+
num_base_sequences += read.base_sequences.size unless read.base_sequences.nil?
|
59
|
+
end
|
60
|
+
num_base_sequences
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_ace
|
64
|
+
ace = ""
|
65
|
+
ace += ['CO', name, num_bases, num_reads, num_base_segments, orientation].join(' ') + "\n"
|
66
|
+
ace += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
67
|
+
ace += "BQ\n"
|
68
|
+
last_stop = quality.size - 1
|
69
|
+
(quality.size/50+1).times do |i|
|
70
|
+
start = i * 50
|
71
|
+
stop = (i+1) * 50 - 1
|
72
|
+
stop = last_stop if stop > last_stop
|
73
|
+
ace += ' ' + quality[start..stop].join(' ') + "\n"
|
74
|
+
end
|
75
|
+
ace += "\n"
|
76
|
+
|
77
|
+
# holds BS data for reads
|
78
|
+
bs_str = ""
|
79
|
+
# holds RD, QA, and DS data for reads
|
80
|
+
rest_str = ""
|
81
|
+
@reads.values.sort.each do |read|
|
82
|
+
ace += read.to_ace_af
|
83
|
+
bs_str += read.to_ace_bs
|
84
|
+
rest_str += read.to_ace_rest
|
85
|
+
end
|
86
|
+
|
87
|
+
# compile data in correct order
|
88
|
+
ace += bs_str
|
89
|
+
ace += "\n"
|
90
|
+
ace += rest_str
|
91
|
+
ace
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
|
2
|
+
require 'bio-assembly/read/ace'
|
3
|
+
|
4
|
+
module Bio
|
5
|
+
class Assembly
|
6
|
+
class Read
|
7
|
+
include Bio::Assembly::Read::Ace
|
8
|
+
|
9
|
+
attr_accessor :seq, :name, :orientation, :from, :to, :clear_range_from, :clear_range_to
|
10
|
+
def initialize(str="")
|
11
|
+
@seq = Bio::Sequence::NA.new(str)
|
12
|
+
end
|
13
|
+
|
14
|
+
def ==(other_read)
|
15
|
+
name == other_read.name
|
16
|
+
end
|
17
|
+
|
18
|
+
def num_bases
|
19
|
+
seq.length
|
20
|
+
end
|
21
|
+
|
22
|
+
def from=(new_from)
|
23
|
+
@from = new_from.to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
def to=(new_to)
|
27
|
+
@to = new_to.to_i
|
28
|
+
end
|
29
|
+
|
30
|
+
def clear_range_from=(new_clear_range_from)
|
31
|
+
@clear_range_from = new_clear_range_from.to_i
|
32
|
+
end
|
33
|
+
|
34
|
+
def clear_range_to=(new_clear_range_to)
|
35
|
+
@clear_range_to = new_clear_range_to.to_i
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_ace
|
39
|
+
ace += ""
|
40
|
+
# holds BS data for reads
|
41
|
+
bs_str = ""
|
42
|
+
# holds RD, QA, and DS data for reads
|
43
|
+
rest_str = ""
|
44
|
+
ace += to_ace_af
|
45
|
+
bs_str += to_ace_bs
|
46
|
+
rest_str = to_ace_rest
|
47
|
+
|
48
|
+
# compile data in correct order
|
49
|
+
ace += bs_str
|
50
|
+
ace += "\n"
|
51
|
+
ace += rest_str
|
52
|
+
ace
|
53
|
+
end
|
54
|
+
|
55
|
+
def <=>(other)
|
56
|
+
unless other.kind_of?(Bio::Assembly::Read)
|
57
|
+
raise "[Error] markers are not comparable"
|
58
|
+
end
|
59
|
+
if self.from == other.from
|
60
|
+
# sort by to if froms are identical
|
61
|
+
return self.to.<=>(other.to)
|
62
|
+
else
|
63
|
+
return self.from.<=>(other.from)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_ace_bs
|
68
|
+
bs_str = ""
|
69
|
+
unless base_sequences.nil?
|
70
|
+
base_sequences.each do |bs|
|
71
|
+
bs_str += ['BS', bs.from, bs.to, bs.read_name].join(' ') + "\n"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
bs_str
|
75
|
+
end
|
76
|
+
|
77
|
+
def to_ace_af
|
78
|
+
['AF', name, orientation, from].join(' ') + "\n"
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_ace_rest
|
82
|
+
rest_str = ""
|
83
|
+
rest_str += ['RD', name, num_bases, 0, 0].join(' ') + "\n"
|
84
|
+
rest_str += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
85
|
+
rest_str += ['QA', clear_range_from, clear_range_to, clear_range_from, clear_range_to].join(' ') + "\n"
|
86
|
+
rest_str += ['DS', 'CHROMAT_FILE:', name, 'PHD_FILE:', "#{name}.phd.1", 'TIME:', Time.now].join(' ') + "\n"
|
87
|
+
rest_str
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Bio
|
2
|
+
class Assembly
|
3
|
+
class Read
|
4
|
+
|
5
|
+
module Ace
|
6
|
+
attr_accessor :base_sequences
|
7
|
+
|
8
|
+
def add_base_sequence(from, to, read_name)
|
9
|
+
@base_sequences = Array.new if @base_sequences.nil?
|
10
|
+
@base_sequences.push BaseSequence.new(from, to, read_name)
|
11
|
+
end
|
12
|
+
|
13
|
+
class BaseSequence
|
14
|
+
attr_accessor :from, :to, :read_name
|
15
|
+
|
16
|
+
def initialize(from, to, read_name)
|
17
|
+
@from = from
|
18
|
+
@to = to
|
19
|
+
@read_name = read_name
|
20
|
+
end
|
21
|
+
|
22
|
+
def <=>(other)
|
23
|
+
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
24
|
+
raise "[Error] markers are not comparable"
|
25
|
+
end
|
26
|
+
if self.from == other.from
|
27
|
+
# sort by to if froms are identical
|
28
|
+
return self.to.<=>(other.to)
|
29
|
+
else
|
30
|
+
return self.from.<=>(other.from)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-assembly'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBioAssembly < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
ace_filename = File.join('data', 'example1.ace')
|
7
|
+
@obj = Bio::Assembly.new(ace_filename)
|
8
|
+
|
9
|
+
# pick a contig to do in depth tests on
|
10
|
+
@contig = nil
|
11
|
+
@obj.each_contig { |c| @contig = c if c.name.to_i == 5 }
|
12
|
+
|
13
|
+
# pick a read to do in depth tests on
|
14
|
+
@read = nil
|
15
|
+
@contig.each_read{ |r| @read = r if r.name == '235283518' }
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_num_contigs_parsed
|
19
|
+
contigs_parsed = 13
|
20
|
+
assert_equal(contigs_parsed, @obj.contigs.size)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_num_reads_parsed
|
24
|
+
reads_parsed_known = 1760
|
25
|
+
reads_parsed = 0
|
26
|
+
@obj.contigs.each { |c| reads_parsed += c.reads.size }
|
27
|
+
assert_equal(reads_parsed_known, reads_parsed)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_contig_num_reads
|
31
|
+
num_reads = 15
|
32
|
+
assert_equal(num_reads, @contig.reads.size )
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_contig_seq
|
36
|
+
seq = "TTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTA*TTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTTAATTTCAAGATGATTCACAGGTTTGTTGCCTCAAAAGAAAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGACAAGTTGACTCAACTTTTCAC*TTTATGTTATATTGTAAGGATGTGACTTTGTTTTGGAAAATTATATTTAATTTGATAATTAACCAATATAAAAAAGATAAACCAAAAGCTATAAGTCGTAAATAAGGACATTGGAAACAAGAAATATTCTCTCCTGAACATTATTTTAAATTATGCGCAATATGCAAATTTATAAGTGTTAAGTTAAAAAGATTGTTAATGGTTCTGTTTATTACCCAAAGACTTTTTTAAAGTTTAAGTCGTTGCTAAGAGTGCAGCGTTTAGACAAATAAAAATGCAATAATCTTCTCGCTCGGGAGCTATGTCCCTCGCATAATATTCTTCAAAGTGTACAGTAAATATTCTAGAAAAGTGAAGTGTGAAAAAGATATATTGCTTGTTTTTATATTTTGTTAATACAACAAAACTTCAAAAACCTGCGGTGGGGGGGGGGGGATAGTCACTTCCGTCACCTTCACCCCTCTCGTTCACTATACTCCCTCGCCCTGGCGTAATGATGGGGGGATTGGGGGTAGTTGCCCCTTAATAAAGTTCAAACTTGATTTATTTCTAACTCGATACCAGTGATTTACAAATGTTTCTGAAATGGCATGGTTTTCCCTAATAAATGCCTAAAAACCCTGAGCTGAGCCCACGCCAATT"
|
37
|
+
assert_equal(seq, @contig.seq.to_s)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_read_seq
|
41
|
+
read_seq = 'GAAAAAAAAAGGCAGAAGTTTAATCAAAACGGATTTTTCCGTCAGATGTAAAGGTTGCAGAACCGGACCATTCTTGCGTCTGATCTTTCAGGATCGGATCGTTGGCGTCGAACTTATCGCTGTCTTTAAAGACACGGCCCGCGTTTTTCCAGCTGTCGATTGAGTTGTCGCCGACCTTTTGATAAAACATGTAGATTGATGTGTCATCAGCGTCTTTCGGGCTTCCCGCAAGAGCAAACACAACGTGATAGCCGTTGTATTCAGCTACTGTTCCGTCAGCGTTTTGCAGCGGCCAGCTGTCCCACACATCAAGTCCTTTTGCAGACTCAATATTTTTAATCGTTGATTGATCGAATTGAGGCACTTGGTATTTTTCGTTTTGCTGCTGTTTAGGGATCTGCAGCATATCATGGCGTGTAATATGAGAGACGCCGTACGTTTCTTTGTATGCTTTTTGGTTATTTTCTTTCGCGAAGGCTTGAGTCGCTCCTCCTGCCAGAAGTGCAGTCGTAAAAGTCAGAACTGTGGCTTGTTTTACAATTTTTTTGATGTTCATGTTCATGTCTCCTTCTGTATGTACTGTTTTTTGCGATCTGCCGTTTCGATCCTCCCGAATTGACTAGTGGGTAGGCCTGGCGGCCGCCTGGCCGTCGACATTTAGGTGACACTATAGAAGGATCCGCGGAATTCCTTTTTAGATTGAGATAATGACTTTGTTTGGAAGGATGTATTTTTCATTTAATTAAAGCAAATTCGTAATAAT*AAAGTTAAACAATTT*ATTTC*AGATGATTCACAGGTTTGTTGCCTCAAAAG*AAACTTATATTAATGGCAAGTTGTGAATAATTTATGCAACTCTTGTGGGACAAGTTGACTTCACCT'
|
42
|
+
assert_equal(read_seq, @read.seq.to_s)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_read_range
|
46
|
+
from = -34
|
47
|
+
to = 849
|
48
|
+
assert_equal(to, @read.to)
|
49
|
+
assert_equal(from, @read.from)
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_read_clear_range
|
53
|
+
clear_range_from = 36
|
54
|
+
clear_range_to = 862
|
55
|
+
assert_equal(clear_range_from, @read.clear_range_from)
|
56
|
+
assert_equal(clear_range_to, @read.clear_range_to)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_read_orientation
|
60
|
+
orientation = 'U'
|
61
|
+
assert_equal(orientation, @read.orientation)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_find_reads_in_range
|
65
|
+
known_reads = [ '235283518', '235288260', '235293813', '235288255', '235283548' ]
|
66
|
+
reads_in_range = @contig.find_reads_in_range(295, 424)
|
67
|
+
assert_equal( 5, known_reads.size )
|
68
|
+
known_reads.each do |read_name|
|
69
|
+
read = Bio::Assembly::Read.new()
|
70
|
+
read.name = read_name
|
71
|
+
reads_in_range.delete(read)
|
72
|
+
end
|
73
|
+
assert_equal(0, reads_in_range.size)
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
|