bio-assembly 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +10 -1
- data/VERSION +1 -1
- data/bio-assembly.gemspec +2 -2
- data/lib/bio-assembly.rb +41 -155
- data/lib/bio-assembly/ace.rb +258 -0
- data/lib/bio-assembly/contig.rb +4 -32
- data/lib/bio-assembly/read.rb +2 -43
- data/test/test_bio-assembly.rb +1 -1
- metadata +4 -4
- data/lib/bio-assembly/read/ace.rb +0 -39
data/README.rdoc
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
= bio-assembly
|
2
2
|
|
3
|
-
|
3
|
+
BioRuby plugin for parsing, writing, and maniuplating assembly data
|
4
|
+
|
5
|
+
== Install
|
6
|
+
|
7
|
+
gem install bio-assembly
|
8
|
+
|
9
|
+
== Usage
|
10
|
+
|
11
|
+
Examples on my blog:
|
12
|
+
http://chasemiller4.blogspot.com/2010/10/bioruby-ace-parser-example.html
|
4
13
|
|
5
14
|
== Contributing to bio-assembly
|
6
15
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/bio-assembly.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-assembly}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Chase Miller"]
|
@@ -27,9 +27,9 @@ Gem::Specification.new do |s|
|
|
27
27
|
"bio-assembly.gemspec",
|
28
28
|
"data/example1.ace",
|
29
29
|
"lib/bio-assembly.rb",
|
30
|
+
"lib/bio-assembly/ace.rb",
|
30
31
|
"lib/bio-assembly/contig.rb",
|
31
32
|
"lib/bio-assembly/read.rb",
|
32
|
-
"lib/bio-assembly/read/ace.rb",
|
33
33
|
"test/helper.rb",
|
34
34
|
"test/test_bio-assembly.rb"
|
35
35
|
]
|
data/lib/bio-assembly.rb
CHANGED
@@ -1,169 +1,55 @@
|
|
1
|
-
|
2
|
-
require 'bio/sequence'
|
1
|
+
require 'bio/sequence'
|
3
2
|
require 'bio-assembly/contig'
|
4
3
|
require 'bio-assembly/read'
|
5
4
|
|
6
5
|
module Bio
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
@contigs
|
21
|
-
end
|
22
|
-
|
23
|
-
def each_contig
|
24
|
-
# check if file is already parsed
|
25
|
-
if @total_num_contigs.to_i == @contigs.size
|
26
|
-
@contigs.each{ |contig| yield contig }
|
27
|
-
else
|
28
|
-
each_identifier do |identifier, attrs|
|
29
|
-
next unless identifier == 'CO'
|
30
|
-
contig = parse_contig(attrs)
|
31
|
-
@contigs.push contig
|
32
|
-
yield(contig)
|
33
|
-
end
|
7
|
+
class Assembly
|
8
|
+
attr_accessor :contigs
|
9
|
+
|
10
|
+
@@formats = { }
|
11
|
+
|
12
|
+
def self.create(path, format)
|
13
|
+
streamer = @@formats[format]
|
14
|
+
if streamer
|
15
|
+
streamer.new(path)
|
16
|
+
else
|
17
|
+
raise "Format type '#{format}' is not supported"
|
18
|
+
end
|
34
19
|
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_ace
|
38
|
-
ace = ""
|
39
|
-
ace += "AS " + num_contigs.to_s + " " + num_reads.to_s + "\n\n"
|
40
|
-
each_contig { |contig| ace += contig.to_ace + "\n" }
|
41
|
-
ace
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
def parse_contig(attrs)
|
47
|
-
contig = Bio::Assembly::Contig.new
|
48
|
-
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
49
|
-
# keep track of the number of RD identifiers parsed
|
50
|
-
@num_rds_parsed = 0
|
51
20
|
|
52
|
-
|
53
|
-
|
54
|
-
contig.seq = seq
|
55
|
-
|
56
|
-
# loop through identifiers (e.g AF, RD, etc)
|
57
|
-
each_identifier do |identifier, attrs|
|
58
|
-
case identifier
|
59
|
-
when "BQ" then parse_bq(contig)
|
60
|
-
when "AF" then parse_af(contig, attrs)
|
61
|
-
when "BS" then parse_bs(contig, attrs)
|
62
|
-
when "RD" then parse_rd(contig, attrs); break if @num_rds_parsed == @num_reads.to_i
|
63
|
-
when "WR" then parse_wr(contig, attrs)
|
64
|
-
when "RT" then parse_rt(contig, attrs)
|
65
|
-
when "CT" then parse_ct(contig, attrs)
|
66
|
-
when "WA" then parse_wa(contig, attrs)
|
67
|
-
end
|
21
|
+
def self.register_parser name
|
22
|
+
@@formats[name] = self
|
68
23
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
def each_identifier
|
75
|
-
@file.each do |line|
|
76
|
-
next if line !~ /^[ABCDQRW][ADFOQRST][\s\n].*/
|
77
|
-
yield(line[0..1], line[3..-1])
|
24
|
+
|
25
|
+
def contigs
|
26
|
+
# use each_contig to stream large files
|
27
|
+
parse_whole_file if @contigs.empty?
|
28
|
+
@contigs
|
78
29
|
end
|
79
|
-
end
|
80
|
-
|
81
|
-
# parse assembly meta data
|
82
|
-
def parse_as
|
83
|
-
line = @file.gets
|
84
|
-
identifier, @total_num_contigs, total_num_reads = line.split(" ")
|
85
|
-
end
|
86
|
-
|
87
|
-
# parse contig sequence quality data
|
88
|
-
def parse_bq(contig)
|
89
|
-
contig.quality = @file.gets("\n\n").tr("\r\n", "").gsub(/^\s/, "").split(' ')
|
90
|
-
end
|
91
|
-
|
92
|
-
# parse read meta data
|
93
|
-
def parse_af(contig, attrs)
|
94
|
-
read = Bio::Assembly::Read.new
|
95
|
-
read.name , read.orientation, read.from = attrs.split(" ")
|
96
|
-
contig.add_read read
|
97
|
-
end
|
98
|
-
|
99
|
-
# parse base sequence data
|
100
|
-
def parse_bs(contig, attrs)
|
101
|
-
from, to, read_name = attrs.split(" ")
|
102
|
-
read = contig.find_read_by_name( read_name )
|
103
|
-
read.add_base_sequence(from, to, read_name)
|
104
|
-
end
|
105
|
-
|
106
|
-
# parse read sequence and position data
|
107
|
-
def parse_rd(contig, attrs)
|
108
|
-
# increment counter
|
109
|
-
@num_rds_parsed += 1
|
110
30
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
# set read.to to contig length if read runs off contig
|
120
|
-
read.to = contig.seq.length if read.to > contig.seq.length
|
121
|
-
|
122
|
-
# if present parse QA and DS associated with this read
|
123
|
-
each_identifier do |identifier, attrs|
|
124
|
-
case identifier
|
125
|
-
when "QA" then parse_qa(read, attrs)
|
126
|
-
when "DS" then parse_ds(read, attrs); break
|
127
|
-
end
|
31
|
+
def each_contig
|
32
|
+
# implemented by each format subclass
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def num_contigs
|
38
|
+
contigs.size
|
128
39
|
end
|
129
|
-
|
130
|
-
end
|
131
|
-
|
132
|
-
# parse a read's clear ranges (the part of the read that contributes to the contig)
|
133
|
-
def parse_qa(read, attrs)
|
134
|
-
start, stop, clear_range_from, clear_range_to = attrs.split(" ")
|
135
|
-
read.clear_range_from = clear_range_from
|
136
|
-
read.clear_range_to = clear_range_to
|
137
|
-
end
|
138
|
-
|
139
|
-
# parse file data - ignored
|
140
|
-
def parse_ds(read, attrs)
|
141
|
-
end
|
142
|
-
|
143
|
-
# parse run meta data - ignored
|
144
|
-
def parse_wa(contig, attrs)
|
145
|
-
end
|
146
|
-
|
147
|
-
# parse run meta data - ignored
|
148
|
-
def parse_ct(contig, attrs)
|
149
|
-
end
|
150
|
-
|
151
|
-
def num_contigs
|
152
|
-
contigs.size
|
153
|
-
end
|
154
|
-
|
155
|
-
def num_reads
|
156
|
-
read_num = 0
|
157
|
-
each_contig { |contig| read_num += contig.num_reads }
|
158
|
-
read_num
|
159
|
-
end
|
160
|
-
|
161
|
-
def parse_whole_file
|
162
|
-
each_contig { |x| 1 }
|
163
|
-
end
|
164
|
-
|
165
|
-
end
|
166
40
|
|
167
|
-
|
41
|
+
def num_reads
|
42
|
+
read_num = 0
|
43
|
+
each_contig { |contig| read_num += contig.num_reads }
|
44
|
+
read_num
|
45
|
+
end
|
168
46
|
|
47
|
+
def parse_whole_file
|
48
|
+
each_contig { |x| 1 }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
169
54
|
|
55
|
+
require 'bio-assembly/ace'
|
@@ -0,0 +1,258 @@
|
|
1
|
+
|
2
|
+
module Bio
|
3
|
+
class Assembly
|
4
|
+
|
5
|
+
class Ace < Bio::Assembly
|
6
|
+
|
7
|
+
# register parser with superclass
|
8
|
+
register_parser :ace
|
9
|
+
|
10
|
+
def initialize(path)
|
11
|
+
@file = File.new(path, 'r')
|
12
|
+
@contigs = Array.new
|
13
|
+
parse_as
|
14
|
+
end
|
15
|
+
|
16
|
+
def each_contig
|
17
|
+
# check if file is already parsed
|
18
|
+
if @total_num_contigs.to_i == @contigs.size
|
19
|
+
@contigs.each{ |contig| yield contig }
|
20
|
+
else
|
21
|
+
each_identifier do |identifier, attrs|
|
22
|
+
next unless identifier == 'CO'
|
23
|
+
contig = parse_contig(attrs)
|
24
|
+
@contigs.push contig
|
25
|
+
yield(contig)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_ace
|
31
|
+
ace = ""
|
32
|
+
ace += "AS " + num_contigs.to_s + " " + num_reads.to_s + "\n\n"
|
33
|
+
each_contig { |contig| ace += contig.to_ace + "\n" }
|
34
|
+
ace
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
def parse_contig(attrs)
|
39
|
+
contig = Bio::Assembly::Contig.new
|
40
|
+
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
41
|
+
# keep track of the number of RD identifiers parsed
|
42
|
+
@num_rds_parsed = 0
|
43
|
+
|
44
|
+
# get sequence
|
45
|
+
seq = @file.gets("\n\n").tr(" \r\n", "")
|
46
|
+
contig.seq = seq
|
47
|
+
|
48
|
+
# loop through identifiers (e.g AF, RD, etc)
|
49
|
+
each_identifier do |identifier, attrs|
|
50
|
+
case identifier
|
51
|
+
when "BQ" then parse_bq(contig)
|
52
|
+
when "AF" then parse_af(contig, attrs)
|
53
|
+
when "BS" then parse_bs(contig, attrs)
|
54
|
+
when "RD" then parse_rd(contig, attrs); break if @num_rds_parsed == @num_reads.to_i
|
55
|
+
when "WR" then parse_wr(contig, attrs)
|
56
|
+
when "RT" then parse_rt(contig, attrs)
|
57
|
+
when "CT" then parse_ct(contig, attrs)
|
58
|
+
when "WA" then parse_wa(contig, attrs)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
contig
|
63
|
+
end
|
64
|
+
|
65
|
+
# Finds the next_identifier
|
66
|
+
def each_identifier
|
67
|
+
@file.each do |line|
|
68
|
+
next if line !~ /^[ABCDQRW][ADFOQRST][\s\n].*/
|
69
|
+
yield(line[0..1], line[3..-1])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# parse assembly meta data
|
74
|
+
def parse_as
|
75
|
+
line = @file.gets
|
76
|
+
identifier, @total_num_contigs, total_num_reads = line.split(" ")
|
77
|
+
end
|
78
|
+
|
79
|
+
# parse contig sequence quality data
|
80
|
+
def parse_bq(contig)
|
81
|
+
contig.quality = @file.gets("\n\n").tr("\r\n", "").gsub(/^\s/, "").split(' ')
|
82
|
+
end
|
83
|
+
|
84
|
+
# parse read meta data
|
85
|
+
def parse_af(contig, attrs)
|
86
|
+
read = Bio::Assembly::Read.new
|
87
|
+
read.name , read.orientation, read.from = attrs.split(" ")
|
88
|
+
contig.add_read read
|
89
|
+
end
|
90
|
+
|
91
|
+
# parse base sequence data
|
92
|
+
def parse_bs(contig, attrs)
|
93
|
+
from, to, read_name = attrs.split(" ")
|
94
|
+
read = contig.find_read_by_name( read_name )
|
95
|
+
read.add_base_sequence(from, to, read_name)
|
96
|
+
end
|
97
|
+
|
98
|
+
# parse read sequence and position data
|
99
|
+
def parse_rd(contig, attrs)
|
100
|
+
# increment counter
|
101
|
+
@num_rds_parsed += 1
|
102
|
+
|
103
|
+
# parse read
|
104
|
+
read_name, num_padded_bases, num_read_infos, num_read_tags = attrs.split(" ")
|
105
|
+
seq = @file.gets("\n\n").tr( " \r\n", "")
|
106
|
+
|
107
|
+
# get read with matching name
|
108
|
+
read = contig.find_read_by_name( read_name )
|
109
|
+
read.seq = seq
|
110
|
+
read.to = read.from.to_i + read.seq.length
|
111
|
+
# set read.to to contig length if read runs off contig
|
112
|
+
read.to = contig.seq.length if read.to > contig.seq.length
|
113
|
+
|
114
|
+
# if present parse QA and DS associated with this read
|
115
|
+
each_identifier do |identifier, attrs|
|
116
|
+
case identifier
|
117
|
+
when "QA" then parse_qa(read, attrs)
|
118
|
+
when "DS" then parse_ds(read, attrs); break
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
# parse a read's clear ranges (the part of the read that contributes to the contig)
|
125
|
+
def parse_qa(read, attrs)
|
126
|
+
start, stop, clear_range_from, clear_range_to = attrs.split(" ")
|
127
|
+
read.clear_range_from = clear_range_from
|
128
|
+
read.clear_range_to = clear_range_to
|
129
|
+
end
|
130
|
+
|
131
|
+
# parse file data - ignored
|
132
|
+
def parse_ds(read, attrs)
|
133
|
+
end
|
134
|
+
|
135
|
+
# parse run meta data - ignored
|
136
|
+
def parse_wa(contig, attrs)
|
137
|
+
end
|
138
|
+
|
139
|
+
# parse run meta data - ignored
|
140
|
+
def parse_ct(contig, attrs)
|
141
|
+
end
|
142
|
+
|
143
|
+
end # => end class Ace
|
144
|
+
|
145
|
+
# open contig class and write ace specific methods for contig objects
|
146
|
+
class Contig
|
147
|
+
|
148
|
+
def to_ace
|
149
|
+
ace = ""
|
150
|
+
ace += ['CO', name, num_bases, num_reads, num_base_segments, orientation].join(' ') + "\n"
|
151
|
+
ace += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
152
|
+
ace += "BQ\n"
|
153
|
+
last_stop = quality.size - 1
|
154
|
+
(quality.size/50+1).times do |i|
|
155
|
+
start = i * 50
|
156
|
+
stop = (i+1) * 50 - 1
|
157
|
+
stop = last_stop if stop > last_stop
|
158
|
+
ace += ' ' + quality[start..stop].join(' ') + "\n"
|
159
|
+
end
|
160
|
+
ace += "\n"
|
161
|
+
|
162
|
+
# holds BS data for reads
|
163
|
+
bs_str = ""
|
164
|
+
# holds RD, QA, and DS data for reads
|
165
|
+
rest_str = ""
|
166
|
+
@reads.values.sort.each do |read|
|
167
|
+
ace += read.to_ace_af
|
168
|
+
bs_str += read.to_ace_bs
|
169
|
+
rest_str += read.to_ace_rest
|
170
|
+
end
|
171
|
+
|
172
|
+
# compile data in correct order
|
173
|
+
ace += bs_str
|
174
|
+
ace += "\n"
|
175
|
+
ace += rest_str
|
176
|
+
ace
|
177
|
+
end
|
178
|
+
|
179
|
+
end # => end Contig class
|
180
|
+
|
181
|
+
# open Read class to add ace specific methods for read objects
|
182
|
+
class Read
|
183
|
+
|
184
|
+
attr_accessor :base_sequences
|
185
|
+
|
186
|
+
def to_ace
|
187
|
+
ace += ""
|
188
|
+
# holds BS data for reads
|
189
|
+
bs_str = ""
|
190
|
+
# holds RD, QA, and DS data for reads
|
191
|
+
rest_str = ""
|
192
|
+
ace += to_ace_af
|
193
|
+
bs_str += to_ace_bs
|
194
|
+
rest_str = to_ace_rest
|
195
|
+
|
196
|
+
# compile data in correct order
|
197
|
+
ace += bs_str
|
198
|
+
ace += "\n"
|
199
|
+
ace += rest_str
|
200
|
+
ace
|
201
|
+
end
|
202
|
+
|
203
|
+
def to_ace_bs
|
204
|
+
bs_str = ""
|
205
|
+
unless base_sequences.nil?
|
206
|
+
base_sequences.each do |bs|
|
207
|
+
bs_str += ['BS', bs.from, bs.to, bs.read_name].join(' ') + "\n"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
bs_str
|
211
|
+
end
|
212
|
+
|
213
|
+
def to_ace_af
|
214
|
+
['AF', name, orientation, from].join(' ') + "\n"
|
215
|
+
end
|
216
|
+
|
217
|
+
def to_ace_rest
|
218
|
+
rest_str = ""
|
219
|
+
rest_str += ['RD', name, num_bases, 0, 0].join(' ') + "\n"
|
220
|
+
rest_str += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
221
|
+
rest_str += ['QA', clear_range_from, clear_range_to, clear_range_from, clear_range_to].join(' ') + "\n"
|
222
|
+
rest_str += ['DS', 'CHROMAT_FILE:', name, 'PHD_FILE:', "#{name}.phd.1", 'TIME:', Time.now].join(' ') + "\n"
|
223
|
+
rest_str
|
224
|
+
end
|
225
|
+
|
226
|
+
def add_base_sequence(from, to, read_name)
|
227
|
+
@base_sequences = Array.new if @base_sequences.nil?
|
228
|
+
@base_sequences.push BaseSequence.new(from, to, read_name)
|
229
|
+
end
|
230
|
+
|
231
|
+
class BaseSequence
|
232
|
+
attr_accessor :from, :to, :read_name
|
233
|
+
|
234
|
+
def initialize(from, to, read_name)
|
235
|
+
@from = from
|
236
|
+
@to = to
|
237
|
+
@read_name = read_name
|
238
|
+
end
|
239
|
+
|
240
|
+
def <=>(other)
|
241
|
+
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
242
|
+
raise "[Error] markers are not comparable"
|
243
|
+
end
|
244
|
+
if self.from == other.from
|
245
|
+
# sort by to if froms are identical
|
246
|
+
return self.to.<=>(other.to)
|
247
|
+
else
|
248
|
+
return self.from.<=>(other.from)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
end # => end BaseSequence Class
|
253
|
+
|
254
|
+
end # => end Read Class
|
255
|
+
|
256
|
+
|
257
|
+
end # => end class Assembly
|
258
|
+
end # => end module Bio
|
data/lib/bio-assembly/contig.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
#require 'bio-assembly/contig/ace'
|
2
|
+
|
1
3
|
module Bio
|
2
4
|
class Assembly
|
3
5
|
|
@@ -59,39 +61,9 @@ module Bio
|
|
59
61
|
end
|
60
62
|
num_base_sequences
|
61
63
|
end
|
62
|
-
|
63
|
-
def to_ace
|
64
|
-
ace = ""
|
65
|
-
ace += ['CO', name, num_bases, num_reads, num_base_segments, orientation].join(' ') + "\n"
|
66
|
-
ace += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
67
|
-
ace += "BQ\n"
|
68
|
-
last_stop = quality.size - 1
|
69
|
-
(quality.size/50+1).times do |i|
|
70
|
-
start = i * 50
|
71
|
-
stop = (i+1) * 50 - 1
|
72
|
-
stop = last_stop if stop > last_stop
|
73
|
-
ace += ' ' + quality[start..stop].join(' ') + "\n"
|
74
|
-
end
|
75
|
-
ace += "\n"
|
76
|
-
|
77
|
-
# holds BS data for reads
|
78
|
-
bs_str = ""
|
79
|
-
# holds RD, QA, and DS data for reads
|
80
|
-
rest_str = ""
|
81
|
-
@reads.values.sort.each do |read|
|
82
|
-
ace += read.to_ace_af
|
83
|
-
bs_str += read.to_ace_bs
|
84
|
-
rest_str += read.to_ace_rest
|
85
|
-
end
|
86
|
-
|
87
|
-
# compile data in correct order
|
88
|
-
ace += bs_str
|
89
|
-
ace += "\n"
|
90
|
-
ace += rest_str
|
91
|
-
ace
|
92
|
-
end
|
93
64
|
|
94
65
|
end
|
95
66
|
|
96
67
|
end
|
97
|
-
end
|
68
|
+
end
|
69
|
+
|
data/lib/bio-assembly/read.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
|
2
|
-
require 'bio-assembly/read/ace'
|
2
|
+
#require 'bio-assembly/read/ace'
|
3
3
|
|
4
4
|
module Bio
|
5
5
|
class Assembly
|
6
6
|
class Read
|
7
|
-
include Bio::Assembly::Read::Ace
|
8
7
|
|
9
8
|
attr_accessor :seq, :name, :orientation, :from, :to, :clear_range_from, :clear_range_to
|
10
9
|
def initialize(str="")
|
@@ -34,24 +33,7 @@ module Bio
|
|
34
33
|
def clear_range_to=(new_clear_range_to)
|
35
34
|
@clear_range_to = new_clear_range_to.to_i
|
36
35
|
end
|
37
|
-
|
38
|
-
def to_ace
|
39
|
-
ace += ""
|
40
|
-
# holds BS data for reads
|
41
|
-
bs_str = ""
|
42
|
-
# holds RD, QA, and DS data for reads
|
43
|
-
rest_str = ""
|
44
|
-
ace += to_ace_af
|
45
|
-
bs_str += to_ace_bs
|
46
|
-
rest_str = to_ace_rest
|
47
|
-
|
48
|
-
# compile data in correct order
|
49
|
-
ace += bs_str
|
50
|
-
ace += "\n"
|
51
|
-
ace += rest_str
|
52
|
-
ace
|
53
|
-
end
|
54
|
-
|
36
|
+
|
55
37
|
def <=>(other)
|
56
38
|
unless other.kind_of?(Bio::Assembly::Read)
|
57
39
|
raise "[Error] markers are not comparable"
|
@@ -64,29 +46,6 @@ module Bio
|
|
64
46
|
end
|
65
47
|
end
|
66
48
|
|
67
|
-
def to_ace_bs
|
68
|
-
bs_str = ""
|
69
|
-
unless base_sequences.nil?
|
70
|
-
base_sequences.each do |bs|
|
71
|
-
bs_str += ['BS', bs.from, bs.to, bs.read_name].join(' ') + "\n"
|
72
|
-
end
|
73
|
-
end
|
74
|
-
bs_str
|
75
|
-
end
|
76
|
-
|
77
|
-
def to_ace_af
|
78
|
-
['AF', name, orientation, from].join(' ') + "\n"
|
79
|
-
end
|
80
|
-
|
81
|
-
def to_ace_rest
|
82
|
-
rest_str = ""
|
83
|
-
rest_str += ['RD', name, num_bases, 0, 0].join(' ') + "\n"
|
84
|
-
rest_str += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
85
|
-
rest_str += ['QA', clear_range_from, clear_range_to, clear_range_from, clear_range_to].join(' ') + "\n"
|
86
|
-
rest_str += ['DS', 'CHROMAT_FILE:', name, 'PHD_FILE:', "#{name}.phd.1", 'TIME:', Time.now].join(' ') + "\n"
|
87
|
-
rest_str
|
88
|
-
end
|
89
|
-
|
90
49
|
end
|
91
50
|
|
92
51
|
end
|
data/test/test_bio-assembly.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-assembly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chase Miller
|
@@ -114,9 +114,9 @@ files:
|
|
114
114
|
- bio-assembly.gemspec
|
115
115
|
- data/example1.ace
|
116
116
|
- lib/bio-assembly.rb
|
117
|
+
- lib/bio-assembly/ace.rb
|
117
118
|
- lib/bio-assembly/contig.rb
|
118
119
|
- lib/bio-assembly/read.rb
|
119
|
-
- lib/bio-assembly/read/ace.rb
|
120
120
|
- test/helper.rb
|
121
121
|
- test/test_bio-assembly.rb
|
122
122
|
has_rdoc: true
|
@@ -1,39 +0,0 @@
|
|
1
|
-
module Bio
|
2
|
-
class Assembly
|
3
|
-
class Read
|
4
|
-
|
5
|
-
module Ace
|
6
|
-
attr_accessor :base_sequences
|
7
|
-
|
8
|
-
def add_base_sequence(from, to, read_name)
|
9
|
-
@base_sequences = Array.new if @base_sequences.nil?
|
10
|
-
@base_sequences.push BaseSequence.new(from, to, read_name)
|
11
|
-
end
|
12
|
-
|
13
|
-
class BaseSequence
|
14
|
-
attr_accessor :from, :to, :read_name
|
15
|
-
|
16
|
-
def initialize(from, to, read_name)
|
17
|
-
@from = from
|
18
|
-
@to = to
|
19
|
-
@read_name = read_name
|
20
|
-
end
|
21
|
-
|
22
|
-
def <=>(other)
|
23
|
-
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
24
|
-
raise "[Error] markers are not comparable"
|
25
|
-
end
|
26
|
-
if self.from == other.from
|
27
|
-
# sort by to if froms are identical
|
28
|
-
return self.to.<=>(other.to)
|
29
|
-
else
|
30
|
-
return self.from.<=>(other.from)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|