bio-assembly 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +10 -1
- data/VERSION +1 -1
- data/bio-assembly.gemspec +2 -2
- data/lib/bio-assembly.rb +41 -155
- data/lib/bio-assembly/ace.rb +258 -0
- data/lib/bio-assembly/contig.rb +4 -32
- data/lib/bio-assembly/read.rb +2 -43
- data/test/test_bio-assembly.rb +1 -1
- metadata +4 -4
- data/lib/bio-assembly/read/ace.rb +0 -39
data/README.rdoc
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
= bio-assembly
|
2
2
|
|
3
|
-
|
3
|
+
BioRuby plugin for parsing, writing, and maniuplating assembly data
|
4
|
+
|
5
|
+
== Install
|
6
|
+
|
7
|
+
gem install bio-assembly
|
8
|
+
|
9
|
+
== Usage
|
10
|
+
|
11
|
+
Examples on my blog:
|
12
|
+
http://chasemiller4.blogspot.com/2010/10/bioruby-ace-parser-example.html
|
4
13
|
|
5
14
|
== Contributing to bio-assembly
|
6
15
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/bio-assembly.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-assembly}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Chase Miller"]
|
@@ -27,9 +27,9 @@ Gem::Specification.new do |s|
|
|
27
27
|
"bio-assembly.gemspec",
|
28
28
|
"data/example1.ace",
|
29
29
|
"lib/bio-assembly.rb",
|
30
|
+
"lib/bio-assembly/ace.rb",
|
30
31
|
"lib/bio-assembly/contig.rb",
|
31
32
|
"lib/bio-assembly/read.rb",
|
32
|
-
"lib/bio-assembly/read/ace.rb",
|
33
33
|
"test/helper.rb",
|
34
34
|
"test/test_bio-assembly.rb"
|
35
35
|
]
|
data/lib/bio-assembly.rb
CHANGED
@@ -1,169 +1,55 @@
|
|
1
|
-
|
2
|
-
require 'bio/sequence'
|
1
|
+
require 'bio/sequence'
|
3
2
|
require 'bio-assembly/contig'
|
4
3
|
require 'bio-assembly/read'
|
5
4
|
|
6
5
|
module Bio
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
@contigs
|
21
|
-
end
|
22
|
-
|
23
|
-
def each_contig
|
24
|
-
# check if file is already parsed
|
25
|
-
if @total_num_contigs.to_i == @contigs.size
|
26
|
-
@contigs.each{ |contig| yield contig }
|
27
|
-
else
|
28
|
-
each_identifier do |identifier, attrs|
|
29
|
-
next unless identifier == 'CO'
|
30
|
-
contig = parse_contig(attrs)
|
31
|
-
@contigs.push contig
|
32
|
-
yield(contig)
|
33
|
-
end
|
7
|
+
class Assembly
|
8
|
+
attr_accessor :contigs
|
9
|
+
|
10
|
+
@@formats = { }
|
11
|
+
|
12
|
+
def self.create(path, format)
|
13
|
+
streamer = @@formats[format]
|
14
|
+
if streamer
|
15
|
+
streamer.new(path)
|
16
|
+
else
|
17
|
+
raise "Format type '#{format}' is not supported"
|
18
|
+
end
|
34
19
|
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_ace
|
38
|
-
ace = ""
|
39
|
-
ace += "AS " + num_contigs.to_s + " " + num_reads.to_s + "\n\n"
|
40
|
-
each_contig { |contig| ace += contig.to_ace + "\n" }
|
41
|
-
ace
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
def parse_contig(attrs)
|
47
|
-
contig = Bio::Assembly::Contig.new
|
48
|
-
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
49
|
-
# keep track of the number of RD identifiers parsed
|
50
|
-
@num_rds_parsed = 0
|
51
20
|
|
52
|
-
|
53
|
-
|
54
|
-
contig.seq = seq
|
55
|
-
|
56
|
-
# loop through identifiers (e.g AF, RD, etc)
|
57
|
-
each_identifier do |identifier, attrs|
|
58
|
-
case identifier
|
59
|
-
when "BQ" then parse_bq(contig)
|
60
|
-
when "AF" then parse_af(contig, attrs)
|
61
|
-
when "BS" then parse_bs(contig, attrs)
|
62
|
-
when "RD" then parse_rd(contig, attrs); break if @num_rds_parsed == @num_reads.to_i
|
63
|
-
when "WR" then parse_wr(contig, attrs)
|
64
|
-
when "RT" then parse_rt(contig, attrs)
|
65
|
-
when "CT" then parse_ct(contig, attrs)
|
66
|
-
when "WA" then parse_wa(contig, attrs)
|
67
|
-
end
|
21
|
+
def self.register_parser name
|
22
|
+
@@formats[name] = self
|
68
23
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
def each_identifier
|
75
|
-
@file.each do |line|
|
76
|
-
next if line !~ /^[ABCDQRW][ADFOQRST][\s\n].*/
|
77
|
-
yield(line[0..1], line[3..-1])
|
24
|
+
|
25
|
+
def contigs
|
26
|
+
# use each_contig to stream large files
|
27
|
+
parse_whole_file if @contigs.empty?
|
28
|
+
@contigs
|
78
29
|
end
|
79
|
-
end
|
80
|
-
|
81
|
-
# parse assembly meta data
|
82
|
-
def parse_as
|
83
|
-
line = @file.gets
|
84
|
-
identifier, @total_num_contigs, total_num_reads = line.split(" ")
|
85
|
-
end
|
86
|
-
|
87
|
-
# parse contig sequence quality data
|
88
|
-
def parse_bq(contig)
|
89
|
-
contig.quality = @file.gets("\n\n").tr("\r\n", "").gsub(/^\s/, "").split(' ')
|
90
|
-
end
|
91
|
-
|
92
|
-
# parse read meta data
|
93
|
-
def parse_af(contig, attrs)
|
94
|
-
read = Bio::Assembly::Read.new
|
95
|
-
read.name , read.orientation, read.from = attrs.split(" ")
|
96
|
-
contig.add_read read
|
97
|
-
end
|
98
|
-
|
99
|
-
# parse base sequence data
|
100
|
-
def parse_bs(contig, attrs)
|
101
|
-
from, to, read_name = attrs.split(" ")
|
102
|
-
read = contig.find_read_by_name( read_name )
|
103
|
-
read.add_base_sequence(from, to, read_name)
|
104
|
-
end
|
105
|
-
|
106
|
-
# parse read sequence and position data
|
107
|
-
def parse_rd(contig, attrs)
|
108
|
-
# increment counter
|
109
|
-
@num_rds_parsed += 1
|
110
30
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
# set read.to to contig length if read runs off contig
|
120
|
-
read.to = contig.seq.length if read.to > contig.seq.length
|
121
|
-
|
122
|
-
# if present parse QA and DS associated with this read
|
123
|
-
each_identifier do |identifier, attrs|
|
124
|
-
case identifier
|
125
|
-
when "QA" then parse_qa(read, attrs)
|
126
|
-
when "DS" then parse_ds(read, attrs); break
|
127
|
-
end
|
31
|
+
def each_contig
|
32
|
+
# implemented by each format subclass
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def num_contigs
|
38
|
+
contigs.size
|
128
39
|
end
|
129
|
-
|
130
|
-
end
|
131
|
-
|
132
|
-
# parse a read's clear ranges (the part of the read that contributes to the contig)
|
133
|
-
def parse_qa(read, attrs)
|
134
|
-
start, stop, clear_range_from, clear_range_to = attrs.split(" ")
|
135
|
-
read.clear_range_from = clear_range_from
|
136
|
-
read.clear_range_to = clear_range_to
|
137
|
-
end
|
138
|
-
|
139
|
-
# parse file data - ignored
|
140
|
-
def parse_ds(read, attrs)
|
141
|
-
end
|
142
|
-
|
143
|
-
# parse run meta data - ignored
|
144
|
-
def parse_wa(contig, attrs)
|
145
|
-
end
|
146
|
-
|
147
|
-
# parse run meta data - ignored
|
148
|
-
def parse_ct(contig, attrs)
|
149
|
-
end
|
150
|
-
|
151
|
-
def num_contigs
|
152
|
-
contigs.size
|
153
|
-
end
|
154
|
-
|
155
|
-
def num_reads
|
156
|
-
read_num = 0
|
157
|
-
each_contig { |contig| read_num += contig.num_reads }
|
158
|
-
read_num
|
159
|
-
end
|
160
|
-
|
161
|
-
def parse_whole_file
|
162
|
-
each_contig { |x| 1 }
|
163
|
-
end
|
164
|
-
|
165
|
-
end
|
166
40
|
|
167
|
-
|
41
|
+
def num_reads
|
42
|
+
read_num = 0
|
43
|
+
each_contig { |contig| read_num += contig.num_reads }
|
44
|
+
read_num
|
45
|
+
end
|
168
46
|
|
47
|
+
def parse_whole_file
|
48
|
+
each_contig { |x| 1 }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
169
54
|
|
55
|
+
require 'bio-assembly/ace'
|
@@ -0,0 +1,258 @@
|
|
1
|
+
|
2
|
+
module Bio
|
3
|
+
class Assembly
|
4
|
+
|
5
|
+
class Ace < Bio::Assembly
|
6
|
+
|
7
|
+
# register parser with superclass
|
8
|
+
register_parser :ace
|
9
|
+
|
10
|
+
def initialize(path)
|
11
|
+
@file = File.new(path, 'r')
|
12
|
+
@contigs = Array.new
|
13
|
+
parse_as
|
14
|
+
end
|
15
|
+
|
16
|
+
def each_contig
|
17
|
+
# check if file is already parsed
|
18
|
+
if @total_num_contigs.to_i == @contigs.size
|
19
|
+
@contigs.each{ |contig| yield contig }
|
20
|
+
else
|
21
|
+
each_identifier do |identifier, attrs|
|
22
|
+
next unless identifier == 'CO'
|
23
|
+
contig = parse_contig(attrs)
|
24
|
+
@contigs.push contig
|
25
|
+
yield(contig)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_ace
|
31
|
+
ace = ""
|
32
|
+
ace += "AS " + num_contigs.to_s + " " + num_reads.to_s + "\n\n"
|
33
|
+
each_contig { |contig| ace += contig.to_ace + "\n" }
|
34
|
+
ace
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
def parse_contig(attrs)
|
39
|
+
contig = Bio::Assembly::Contig.new
|
40
|
+
contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
|
41
|
+
# keep track of the number of RD identifiers parsed
|
42
|
+
@num_rds_parsed = 0
|
43
|
+
|
44
|
+
# get sequence
|
45
|
+
seq = @file.gets("\n\n").tr(" \r\n", "")
|
46
|
+
contig.seq = seq
|
47
|
+
|
48
|
+
# loop through identifiers (e.g AF, RD, etc)
|
49
|
+
each_identifier do |identifier, attrs|
|
50
|
+
case identifier
|
51
|
+
when "BQ" then parse_bq(contig)
|
52
|
+
when "AF" then parse_af(contig, attrs)
|
53
|
+
when "BS" then parse_bs(contig, attrs)
|
54
|
+
when "RD" then parse_rd(contig, attrs); break if @num_rds_parsed == @num_reads.to_i
|
55
|
+
when "WR" then parse_wr(contig, attrs)
|
56
|
+
when "RT" then parse_rt(contig, attrs)
|
57
|
+
when "CT" then parse_ct(contig, attrs)
|
58
|
+
when "WA" then parse_wa(contig, attrs)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
contig
|
63
|
+
end
|
64
|
+
|
65
|
+
# Finds the next_identifier
|
66
|
+
def each_identifier
|
67
|
+
@file.each do |line|
|
68
|
+
next if line !~ /^[ABCDQRW][ADFOQRST][\s\n].*/
|
69
|
+
yield(line[0..1], line[3..-1])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# parse assembly meta data
|
74
|
+
def parse_as
|
75
|
+
line = @file.gets
|
76
|
+
identifier, @total_num_contigs, total_num_reads = line.split(" ")
|
77
|
+
end
|
78
|
+
|
79
|
+
# parse contig sequence quality data
|
80
|
+
def parse_bq(contig)
|
81
|
+
contig.quality = @file.gets("\n\n").tr("\r\n", "").gsub(/^\s/, "").split(' ')
|
82
|
+
end
|
83
|
+
|
84
|
+
# parse read meta data
|
85
|
+
def parse_af(contig, attrs)
|
86
|
+
read = Bio::Assembly::Read.new
|
87
|
+
read.name , read.orientation, read.from = attrs.split(" ")
|
88
|
+
contig.add_read read
|
89
|
+
end
|
90
|
+
|
91
|
+
# parse base sequence data
|
92
|
+
def parse_bs(contig, attrs)
|
93
|
+
from, to, read_name = attrs.split(" ")
|
94
|
+
read = contig.find_read_by_name( read_name )
|
95
|
+
read.add_base_sequence(from, to, read_name)
|
96
|
+
end
|
97
|
+
|
98
|
+
# parse read sequence and position data
|
99
|
+
def parse_rd(contig, attrs)
|
100
|
+
# increment counter
|
101
|
+
@num_rds_parsed += 1
|
102
|
+
|
103
|
+
# parse read
|
104
|
+
read_name, num_padded_bases, num_read_infos, num_read_tags = attrs.split(" ")
|
105
|
+
seq = @file.gets("\n\n").tr( " \r\n", "")
|
106
|
+
|
107
|
+
# get read with matching name
|
108
|
+
read = contig.find_read_by_name( read_name )
|
109
|
+
read.seq = seq
|
110
|
+
read.to = read.from.to_i + read.seq.length
|
111
|
+
# set read.to to contig length if read runs off contig
|
112
|
+
read.to = contig.seq.length if read.to > contig.seq.length
|
113
|
+
|
114
|
+
# if present parse QA and DS associated with this read
|
115
|
+
each_identifier do |identifier, attrs|
|
116
|
+
case identifier
|
117
|
+
when "QA" then parse_qa(read, attrs)
|
118
|
+
when "DS" then parse_ds(read, attrs); break
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
# parse a read's clear ranges (the part of the read that contributes to the contig)
|
125
|
+
def parse_qa(read, attrs)
|
126
|
+
start, stop, clear_range_from, clear_range_to = attrs.split(" ")
|
127
|
+
read.clear_range_from = clear_range_from
|
128
|
+
read.clear_range_to = clear_range_to
|
129
|
+
end
|
130
|
+
|
131
|
+
# parse file data - ignored
|
132
|
+
def parse_ds(read, attrs)
|
133
|
+
end
|
134
|
+
|
135
|
+
# parse run meta data - ignored
|
136
|
+
def parse_wa(contig, attrs)
|
137
|
+
end
|
138
|
+
|
139
|
+
# parse run meta data - ignored
|
140
|
+
def parse_ct(contig, attrs)
|
141
|
+
end
|
142
|
+
|
143
|
+
end # => end class Ace
|
144
|
+
|
145
|
+
# open contig class and write ace specific methods for contig objects
|
146
|
+
class Contig
|
147
|
+
|
148
|
+
def to_ace
|
149
|
+
ace = ""
|
150
|
+
ace += ['CO', name, num_bases, num_reads, num_base_segments, orientation].join(' ') + "\n"
|
151
|
+
ace += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
152
|
+
ace += "BQ\n"
|
153
|
+
last_stop = quality.size - 1
|
154
|
+
(quality.size/50+1).times do |i|
|
155
|
+
start = i * 50
|
156
|
+
stop = (i+1) * 50 - 1
|
157
|
+
stop = last_stop if stop > last_stop
|
158
|
+
ace += ' ' + quality[start..stop].join(' ') + "\n"
|
159
|
+
end
|
160
|
+
ace += "\n"
|
161
|
+
|
162
|
+
# holds BS data for reads
|
163
|
+
bs_str = ""
|
164
|
+
# holds RD, QA, and DS data for reads
|
165
|
+
rest_str = ""
|
166
|
+
@reads.values.sort.each do |read|
|
167
|
+
ace += read.to_ace_af
|
168
|
+
bs_str += read.to_ace_bs
|
169
|
+
rest_str += read.to_ace_rest
|
170
|
+
end
|
171
|
+
|
172
|
+
# compile data in correct order
|
173
|
+
ace += bs_str
|
174
|
+
ace += "\n"
|
175
|
+
ace += rest_str
|
176
|
+
ace
|
177
|
+
end
|
178
|
+
|
179
|
+
end # => end Contig class
|
180
|
+
|
181
|
+
# open Read class to add ace specific methods for read objects
|
182
|
+
class Read
|
183
|
+
|
184
|
+
attr_accessor :base_sequences
|
185
|
+
|
186
|
+
def to_ace
|
187
|
+
ace += ""
|
188
|
+
# holds BS data for reads
|
189
|
+
bs_str = ""
|
190
|
+
# holds RD, QA, and DS data for reads
|
191
|
+
rest_str = ""
|
192
|
+
ace += to_ace_af
|
193
|
+
bs_str += to_ace_bs
|
194
|
+
rest_str = to_ace_rest
|
195
|
+
|
196
|
+
# compile data in correct order
|
197
|
+
ace += bs_str
|
198
|
+
ace += "\n"
|
199
|
+
ace += rest_str
|
200
|
+
ace
|
201
|
+
end
|
202
|
+
|
203
|
+
def to_ace_bs
|
204
|
+
bs_str = ""
|
205
|
+
unless base_sequences.nil?
|
206
|
+
base_sequences.each do |bs|
|
207
|
+
bs_str += ['BS', bs.from, bs.to, bs.read_name].join(' ') + "\n"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
bs_str
|
211
|
+
end
|
212
|
+
|
213
|
+
def to_ace_af
|
214
|
+
['AF', name, orientation, from].join(' ') + "\n"
|
215
|
+
end
|
216
|
+
|
217
|
+
def to_ace_rest
|
218
|
+
rest_str = ""
|
219
|
+
rest_str += ['RD', name, num_bases, 0, 0].join(' ') + "\n"
|
220
|
+
rest_str += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
221
|
+
rest_str += ['QA', clear_range_from, clear_range_to, clear_range_from, clear_range_to].join(' ') + "\n"
|
222
|
+
rest_str += ['DS', 'CHROMAT_FILE:', name, 'PHD_FILE:', "#{name}.phd.1", 'TIME:', Time.now].join(' ') + "\n"
|
223
|
+
rest_str
|
224
|
+
end
|
225
|
+
|
226
|
+
def add_base_sequence(from, to, read_name)
|
227
|
+
@base_sequences = Array.new if @base_sequences.nil?
|
228
|
+
@base_sequences.push BaseSequence.new(from, to, read_name)
|
229
|
+
end
|
230
|
+
|
231
|
+
class BaseSequence
|
232
|
+
attr_accessor :from, :to, :read_name
|
233
|
+
|
234
|
+
def initialize(from, to, read_name)
|
235
|
+
@from = from
|
236
|
+
@to = to
|
237
|
+
@read_name = read_name
|
238
|
+
end
|
239
|
+
|
240
|
+
def <=>(other)
|
241
|
+
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
242
|
+
raise "[Error] markers are not comparable"
|
243
|
+
end
|
244
|
+
if self.from == other.from
|
245
|
+
# sort by to if froms are identical
|
246
|
+
return self.to.<=>(other.to)
|
247
|
+
else
|
248
|
+
return self.from.<=>(other.from)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
end # => end BaseSequence Class
|
253
|
+
|
254
|
+
end # => end Read Class
|
255
|
+
|
256
|
+
|
257
|
+
end # => end class Assembly
|
258
|
+
end # => end module Bio
|
data/lib/bio-assembly/contig.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
#require 'bio-assembly/contig/ace'
|
2
|
+
|
1
3
|
module Bio
|
2
4
|
class Assembly
|
3
5
|
|
@@ -59,39 +61,9 @@ module Bio
|
|
59
61
|
end
|
60
62
|
num_base_sequences
|
61
63
|
end
|
62
|
-
|
63
|
-
def to_ace
|
64
|
-
ace = ""
|
65
|
-
ace += ['CO', name, num_bases, num_reads, num_base_segments, orientation].join(' ') + "\n"
|
66
|
-
ace += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
67
|
-
ace += "BQ\n"
|
68
|
-
last_stop = quality.size - 1
|
69
|
-
(quality.size/50+1).times do |i|
|
70
|
-
start = i * 50
|
71
|
-
stop = (i+1) * 50 - 1
|
72
|
-
stop = last_stop if stop > last_stop
|
73
|
-
ace += ' ' + quality[start..stop].join(' ') + "\n"
|
74
|
-
end
|
75
|
-
ace += "\n"
|
76
|
-
|
77
|
-
# holds BS data for reads
|
78
|
-
bs_str = ""
|
79
|
-
# holds RD, QA, and DS data for reads
|
80
|
-
rest_str = ""
|
81
|
-
@reads.values.sort.each do |read|
|
82
|
-
ace += read.to_ace_af
|
83
|
-
bs_str += read.to_ace_bs
|
84
|
-
rest_str += read.to_ace_rest
|
85
|
-
end
|
86
|
-
|
87
|
-
# compile data in correct order
|
88
|
-
ace += bs_str
|
89
|
-
ace += "\n"
|
90
|
-
ace += rest_str
|
91
|
-
ace
|
92
|
-
end
|
93
64
|
|
94
65
|
end
|
95
66
|
|
96
67
|
end
|
97
|
-
end
|
68
|
+
end
|
69
|
+
|
data/lib/bio-assembly/read.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
|
2
|
-
require 'bio-assembly/read/ace'
|
2
|
+
#require 'bio-assembly/read/ace'
|
3
3
|
|
4
4
|
module Bio
|
5
5
|
class Assembly
|
6
6
|
class Read
|
7
|
-
include Bio::Assembly::Read::Ace
|
8
7
|
|
9
8
|
attr_accessor :seq, :name, :orientation, :from, :to, :clear_range_from, :clear_range_to
|
10
9
|
def initialize(str="")
|
@@ -34,24 +33,7 @@ module Bio
|
|
34
33
|
def clear_range_to=(new_clear_range_to)
|
35
34
|
@clear_range_to = new_clear_range_to.to_i
|
36
35
|
end
|
37
|
-
|
38
|
-
def to_ace
|
39
|
-
ace += ""
|
40
|
-
# holds BS data for reads
|
41
|
-
bs_str = ""
|
42
|
-
# holds RD, QA, and DS data for reads
|
43
|
-
rest_str = ""
|
44
|
-
ace += to_ace_af
|
45
|
-
bs_str += to_ace_bs
|
46
|
-
rest_str = to_ace_rest
|
47
|
-
|
48
|
-
# compile data in correct order
|
49
|
-
ace += bs_str
|
50
|
-
ace += "\n"
|
51
|
-
ace += rest_str
|
52
|
-
ace
|
53
|
-
end
|
54
|
-
|
36
|
+
|
55
37
|
def <=>(other)
|
56
38
|
unless other.kind_of?(Bio::Assembly::Read)
|
57
39
|
raise "[Error] markers are not comparable"
|
@@ -64,29 +46,6 @@ module Bio
|
|
64
46
|
end
|
65
47
|
end
|
66
48
|
|
67
|
-
def to_ace_bs
|
68
|
-
bs_str = ""
|
69
|
-
unless base_sequences.nil?
|
70
|
-
base_sequences.each do |bs|
|
71
|
-
bs_str += ['BS', bs.from, bs.to, bs.read_name].join(' ') + "\n"
|
72
|
-
end
|
73
|
-
end
|
74
|
-
bs_str
|
75
|
-
end
|
76
|
-
|
77
|
-
def to_ace_af
|
78
|
-
['AF', name, orientation, from].join(' ') + "\n"
|
79
|
-
end
|
80
|
-
|
81
|
-
def to_ace_rest
|
82
|
-
rest_str = ""
|
83
|
-
rest_str += ['RD', name, num_bases, 0, 0].join(' ') + "\n"
|
84
|
-
rest_str += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
|
85
|
-
rest_str += ['QA', clear_range_from, clear_range_to, clear_range_from, clear_range_to].join(' ') + "\n"
|
86
|
-
rest_str += ['DS', 'CHROMAT_FILE:', name, 'PHD_FILE:', "#{name}.phd.1", 'TIME:', Time.now].join(' ') + "\n"
|
87
|
-
rest_str
|
88
|
-
end
|
89
|
-
|
90
49
|
end
|
91
50
|
|
92
51
|
end
|
data/test/test_bio-assembly.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-assembly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chase Miller
|
@@ -114,9 +114,9 @@ files:
|
|
114
114
|
- bio-assembly.gemspec
|
115
115
|
- data/example1.ace
|
116
116
|
- lib/bio-assembly.rb
|
117
|
+
- lib/bio-assembly/ace.rb
|
117
118
|
- lib/bio-assembly/contig.rb
|
118
119
|
- lib/bio-assembly/read.rb
|
119
|
-
- lib/bio-assembly/read/ace.rb
|
120
120
|
- test/helper.rb
|
121
121
|
- test/test_bio-assembly.rb
|
122
122
|
has_rdoc: true
|
@@ -1,39 +0,0 @@
|
|
1
|
-
module Bio
|
2
|
-
class Assembly
|
3
|
-
class Read
|
4
|
-
|
5
|
-
module Ace
|
6
|
-
attr_accessor :base_sequences
|
7
|
-
|
8
|
-
def add_base_sequence(from, to, read_name)
|
9
|
-
@base_sequences = Array.new if @base_sequences.nil?
|
10
|
-
@base_sequences.push BaseSequence.new(from, to, read_name)
|
11
|
-
end
|
12
|
-
|
13
|
-
class BaseSequence
|
14
|
-
attr_accessor :from, :to, :read_name
|
15
|
-
|
16
|
-
def initialize(from, to, read_name)
|
17
|
-
@from = from
|
18
|
-
@to = to
|
19
|
-
@read_name = read_name
|
20
|
-
end
|
21
|
-
|
22
|
-
def <=>(other)
|
23
|
-
unless other.kind_of?(Bio::Assembly::Read::BaseSequence)
|
24
|
-
raise "[Error] markers are not comparable"
|
25
|
-
end
|
26
|
-
if self.from == other.from
|
27
|
-
# sort by to if froms are identical
|
28
|
-
return self.to.<=>(other.to)
|
29
|
-
else
|
30
|
-
return self.from.<=>(other.from)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|