scbi_fastq 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Rakefile +1 -1
- data/lib/scbi_fastq/fastq_file.rb +158 -143
- data/lib/scbi_fastq.rb +1 -1
- data/test/test_scbi_fastq.rb +47 -1
- metadata +2 -2
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
|
2
2
|
# add ord method to ruby 1.8
|
3
3
|
if !String.instance_methods.include?(:ord)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
class String
|
5
|
+
|
6
|
+
def ord
|
7
|
+
return self[0]
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
|
14
14
|
|
15
15
|
class FastqFile
|
@@ -21,31 +21,31 @@ class FastqFile
|
|
21
21
|
#------------------------------------
|
22
22
|
def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
|
23
23
|
|
24
|
-
|
24
|
+
|
25
25
|
if mode.upcase.index('W')
|
26
26
|
@fastq_file = File.open(fasta_file_name,'w')
|
27
27
|
elsif mode.upcase.index('A')
|
28
28
|
if !File.exist?(fasta_file_name)
|
29
|
-
|
29
|
+
raise "File #{fasta_file_name} doesn't exists"
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
@fastq_file = File.open(fasta_file_name,'a')
|
33
33
|
else #read only
|
34
34
|
if !File.exist?(fasta_file_name)
|
35
|
-
|
35
|
+
raise "File #{fasta_file_name} doesn't exists"
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
if fasta_file_name.is_a?(IO)
|
39
|
-
|
39
|
+
@fastq_file = fasta_file_name
|
40
40
|
else
|
41
41
|
@fastq_file = File.open(fasta_file_name,'r')
|
42
42
|
end
|
43
43
|
end
|
44
|
-
|
44
|
+
|
45
45
|
@mode = mode
|
46
46
|
@num_seqs = 0
|
47
47
|
@fastq_type=fastq_type
|
48
|
-
|
48
|
+
|
49
49
|
# S - Sanger Phred+33, raw reads typically (0, 40)
|
50
50
|
# X - Solexa Solexa+64, raw reads typically (-5, 40)
|
51
51
|
# I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
|
@@ -63,7 +63,7 @@ class FastqFile
|
|
63
63
|
# > -5.8682532438011537
|
64
64
|
# > >>> solexa_quality_from_phred(0.1)
|
65
65
|
# > -16.32774717238372
|
66
|
-
# >
|
66
|
+
# >
|
67
67
|
# > >>> def phred_quality_from_solexa(solexa_quality) :
|
68
68
|
# > ... return 10*log(10**(solexa_quality/10.0) + 1, 10)
|
69
69
|
# > ...
|
@@ -75,62 +75,62 @@ class FastqFile
|
|
75
75
|
# > 3.0102999566398116
|
76
76
|
# > >>> phred_quality_from_solexa(-20)
|
77
77
|
# > 0.043213737826425784
|
78
|
-
|
79
|
-
|
78
|
+
|
79
|
+
|
80
80
|
#sanger by default
|
81
81
|
@to_phred = lambda{|q| q - 33}
|
82
82
|
@from_phred = lambda{|q| (q+33).chr}
|
83
|
-
|
83
|
+
|
84
84
|
if @fastq_type == :ilumina
|
85
|
-
|
86
|
-
|
87
|
-
|
85
|
+
@to_phred = lambda{|q| q - 64}
|
86
|
+
# @from_phred = lambda{|q| (q+64).chr}
|
87
|
+
|
88
88
|
elsif @fastq_type == :solexa
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
89
|
+
#
|
90
|
+
# solexa to phred quals
|
91
|
+
|
92
|
+
@to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
|
93
|
+
# @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
|
94
|
+
|
95
|
+
#phred to solexa quals
|
96
|
+
|
97
97
|
end
|
98
|
-
|
98
|
+
|
99
99
|
@qual_to_array = qual_to_array
|
100
|
-
|
100
|
+
|
101
101
|
@qual_to_phred = qual_to_phred
|
102
|
-
|
102
|
+
|
103
103
|
end
|
104
|
-
|
104
|
+
|
105
105
|
def close
|
106
|
-
|
106
|
+
@fastq_file.close
|
107
107
|
end
|
108
|
-
|
109
|
-
|
108
|
+
|
109
|
+
|
110
110
|
#------------------------------------
|
111
111
|
# Iterate over all sequences
|
112
112
|
#------------------------------------
|
113
113
|
def each
|
114
|
-
|
114
|
+
|
115
115
|
rewind
|
116
116
|
|
117
|
-
|
118
|
-
|
117
|
+
n,f,q,c=next_seq
|
118
|
+
|
119
119
|
while (!n.nil?)
|
120
|
-
|
121
|
-
|
120
|
+
yield(n,f,q,c)
|
121
|
+
n,f,q,c=next_seq
|
122
122
|
end
|
123
123
|
|
124
|
-
|
125
|
-
|
124
|
+
rewind
|
125
|
+
|
126
126
|
end
|
127
127
|
|
128
128
|
# goto first position in file
|
129
129
|
def rewind
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
130
|
+
|
131
|
+
@num_seqs = 0 ;
|
132
|
+
@fastq_file.pos=0
|
133
|
+
|
134
134
|
end
|
135
135
|
|
136
136
|
#------------------------------------
|
@@ -139,114 +139,129 @@ class FastqFile
|
|
139
139
|
def next_seq
|
140
140
|
#init variables
|
141
141
|
res = read_fastq
|
142
|
-
|
142
|
+
return res
|
143
143
|
end
|
144
|
-
|
144
|
+
|
145
145
|
# write sequence to file in sanger format
|
146
146
|
def write_seq(seq_name,seq_fasta,seq_qual,comments='')
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
147
|
+
name = ""
|
148
|
+
|
149
|
+
@fastq_file.puts("@#{seq_name} #{comments}")
|
150
|
+
@fastq_file.puts(seq_fasta)
|
151
|
+
@fastq_file.puts("+#{seq_name} #{comments}")
|
152
|
+
|
153
|
+
if seq_qual.is_a?(Array)
|
154
|
+
@fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
|
155
|
+
else
|
156
|
+
@fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
|
157
|
+
end
|
158
|
+
|
159
159
|
end
|
160
160
|
|
161
|
-
|
161
|
+
|
162
162
|
# creates fastq otuput in sanger format
|
163
163
|
def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')
|
164
|
-
|
164
|
+
|
165
165
|
res=[]
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
166
|
+
|
167
|
+
name = ""
|
168
|
+
|
169
|
+
res << ("@#{seq_name} #{comments}")
|
170
|
+
res << (seq_fasta)
|
171
|
+
res << ("+#{seq_name} #{comments}")
|
172
|
+
|
173
|
+
if !seq_qual.empty?
|
174
|
+
if @qual_to_phred
|
175
|
+
if seq_qual.is_a?(Array)
|
176
|
+
res<<(seq_qual.map{|e| (e+33).chr}.join)
|
177
|
+
else
|
178
|
+
res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
|
179
|
+
end
|
180
|
+
else
|
181
|
+
res << seq_qual
|
182
|
+
end
|
183
|
+
else # no qual provided, use a default value
|
184
|
+
q='D'*seq_fasta.length;
|
185
|
+
res << q
|
186
|
+
end
|
187
|
+
|
179
188
|
return res
|
180
189
|
end
|
181
|
-
|
190
|
+
|
182
191
|
def with_qual?
|
183
192
|
true
|
184
193
|
end
|
185
|
-
|
186
|
-
|
187
|
-
private
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
194
|
+
|
195
|
+
|
196
|
+
private
|
197
|
+
|
198
|
+
#------------------------------------
|
199
|
+
# Read one sequence in fastq
|
200
|
+
#------------------------------------
|
201
|
+
# @GEM-108-D02
|
202
|
+
# AAAAGCTGG
|
203
|
+
# +
|
204
|
+
# :::::::::
|
205
|
+
|
206
|
+
def read_fastq
|
207
|
+
|
208
|
+
seq_name = nil
|
209
|
+
seq_fasta = nil
|
210
|
+
seq_qual = nil
|
211
|
+
comments = nil
|
212
|
+
|
213
|
+
reading = :fasta
|
214
|
+
|
215
|
+
if !@fastq_file.eof
|
216
|
+
|
217
|
+
begin
|
218
|
+
#read four lines
|
219
|
+
name_line = @fastq_file.readline.chomp
|
220
|
+
seq_fasta = @fastq_file.readline.chomp
|
221
|
+
name2_line = @fastq_file.readline.chomp
|
222
|
+
seq_qual = @fastq_file.readline.chomp
|
223
|
+
|
224
|
+
|
225
|
+
# if there is no qual, but there is a fasta
|
226
|
+
if seq_qual.empty? && !seq_fasta.empty?
|
227
|
+
seq_qual = 'D'*seq_fasta.length
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
# parse name
|
232
|
+
if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
|
233
|
+
# remove comments
|
234
|
+
seq_name = $1
|
235
|
+
comments=$2
|
236
|
+
else
|
237
|
+
raise "Invalid sequence name in #{name_line}"
|
238
|
+
end
|
239
|
+
|
240
|
+
# parse fasta
|
241
|
+
seq_fasta.strip! if !seq_fasta.empty?
|
242
|
+
|
243
|
+
# parse qual_name
|
244
|
+
|
245
|
+
if !seq_name.nil? && !seq_qual.empty?
|
246
|
+
|
247
|
+
@num_seqs += 1
|
248
|
+
|
249
|
+
if @qual_to_phred
|
250
|
+
seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
|
251
|
+
|
252
|
+
if !@qual_to_array
|
253
|
+
seq_qual=seq_qual.join(' ')
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|
258
|
+
rescue EOFError
|
259
|
+
raise "Bad format in FastQ file"
|
242
260
|
end
|
243
|
-
rescue EOFError
|
244
|
-
raise "Bad format in FastQ file"
|
245
261
|
end
|
262
|
+
|
263
|
+
return [seq_name,seq_fasta,seq_qual,comments]
|
246
264
|
end
|
247
|
-
|
248
|
-
|
249
|
-
end
|
250
|
-
|
251
|
-
|
265
|
+
|
266
|
+
|
252
267
|
end
|
data/lib/scbi_fastq.rb
CHANGED
data/test/test_scbi_fastq.rb
CHANGED
@@ -29,6 +29,24 @@ class TestScbiFastq < Test::Unit::TestCase
|
|
29
29
|
f.close
|
30
30
|
end
|
31
31
|
|
32
|
+
def fill_file_no_qual(n,offset=33)
|
33
|
+
f=FastqFile.new(@test_file,'w')
|
34
|
+
|
35
|
+
n.times do |c|
|
36
|
+
i = c+1
|
37
|
+
|
38
|
+
name = "#{@seq_name+i.to_s}"
|
39
|
+
f.write_seq(name,@seq_fasta*i,'','comments')
|
40
|
+
# f.puts('@'+name)
|
41
|
+
# f.puts(@seq_fasta*i)
|
42
|
+
# f.puts('+'+name)
|
43
|
+
# f.puts((@seq_qual*i*@seq_fasta.length).map{|e| (e+offset).chr}.join)
|
44
|
+
end
|
45
|
+
|
46
|
+
f.close
|
47
|
+
end
|
48
|
+
|
49
|
+
|
32
50
|
def test_each
|
33
51
|
|
34
52
|
# make new file and fill with data
|
@@ -77,7 +95,7 @@ class TestScbiFastq < Test::Unit::TestCase
|
|
77
95
|
def test_next_seq_comments
|
78
96
|
|
79
97
|
# make new file and fill with data
|
80
|
-
fill_file(100)
|
98
|
+
fill_file(100)
|
81
99
|
|
82
100
|
|
83
101
|
fqr=FastqFile.new(@test_file)
|
@@ -97,8 +115,36 @@ class TestScbiFastq < Test::Unit::TestCase
|
|
97
115
|
end
|
98
116
|
end until n.nil?
|
99
117
|
|
118
|
+
fqr.close
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_to_fastq
|
122
|
+
puts FastqFile.to_fastq(@seq_name,@seq_fasta*10,'','')
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_each_no_qual
|
127
|
+
|
128
|
+
# make new file and fill with data
|
129
|
+
fill_file_no_qual(100)
|
130
|
+
|
131
|
+
|
132
|
+
fqr=FastqFile.new(@test_file,'r',:sanger, false,false)
|
133
|
+
|
134
|
+
i=1
|
135
|
+
|
136
|
+
fqr.each do |n,s,q|
|
137
|
+
puts n,s,q
|
138
|
+
assert_equal(@seq_name+i.to_s,n)
|
139
|
+
assert_equal(@seq_fasta*i,s)
|
140
|
+
# assert_equal((@seq_qual*i*@seq_fasta.length),q)
|
141
|
+
|
142
|
+
i+=1
|
143
|
+
end
|
144
|
+
|
100
145
|
fqr.close
|
101
146
|
end
|
147
|
+
|
102
148
|
|
103
149
|
|
104
150
|
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: scbi_fastq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.15
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2012-05-24 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: hoe
|