scbi_fastq 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.0.15 2012-05-24
2
+
3
+ Uses default qual when no qual is present
4
+
1
5
  === 0.0.14 2011-05-31
2
6
 
3
7
  release to rubygems
data/Rakefile CHANGED
@@ -23,4 +23,4 @@ Dir['tasks/**/*.rake'].each { |t| load t }
23
23
 
24
24
  # TODO - want other tests/tasks run by default? Add them to the list
25
25
  # remove_task :default
26
- # task :default => [:spec, :features]
26
+ task :default => [:spec, :features, :redocs]
@@ -1,15 +1,15 @@
1
1
 
2
2
  # add ord method to ruby 1.8
3
3
  if !String.instance_methods.include?(:ord)
4
- class String
5
-
6
- def ord
7
- return self[0]
8
- end
9
-
10
- end
4
+ class String
5
+
6
+ def ord
7
+ return self[0]
8
+ end
9
+
10
+ end
11
11
  end
12
-
12
+
13
13
 
14
14
 
15
15
  class FastqFile
@@ -21,31 +21,31 @@ class FastqFile
21
21
  #------------------------------------
22
22
  def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
23
23
 
24
-
24
+
25
25
  if mode.upcase.index('W')
26
26
  @fastq_file = File.open(fasta_file_name,'w')
27
27
  elsif mode.upcase.index('A')
28
28
  if !File.exist?(fasta_file_name)
29
- raise "File #{fasta_file_name} doesn't exists"
29
+ raise "File #{fasta_file_name} doesn't exists"
30
30
  end
31
-
31
+
32
32
  @fastq_file = File.open(fasta_file_name,'a')
33
33
  else #read only
34
34
  if !File.exist?(fasta_file_name)
35
- raise "File #{fasta_file_name} doesn't exists"
35
+ raise "File #{fasta_file_name} doesn't exists"
36
36
  end
37
-
37
+
38
38
  if fasta_file_name.is_a?(IO)
39
- @fastq_file = fasta_file_name
39
+ @fastq_file = fasta_file_name
40
40
  else
41
41
  @fastq_file = File.open(fasta_file_name,'r')
42
42
  end
43
43
  end
44
-
44
+
45
45
  @mode = mode
46
46
  @num_seqs = 0
47
47
  @fastq_type=fastq_type
48
-
48
+
49
49
  # S - Sanger Phred+33, raw reads typically (0, 40)
50
50
  # X - Solexa Solexa+64, raw reads typically (-5, 40)
51
51
  # I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
@@ -63,7 +63,7 @@ class FastqFile
63
63
  # > -5.8682532438011537
64
64
  # > >>> solexa_quality_from_phred(0.1)
65
65
  # > -16.32774717238372
66
- # >
66
+ # >
67
67
  # > >>> def phred_quality_from_solexa(solexa_quality) :
68
68
  # > ... return 10*log(10**(solexa_quality/10.0) + 1, 10)
69
69
  # > ...
@@ -75,62 +75,62 @@ class FastqFile
75
75
  # > 3.0102999566398116
76
76
  # > >>> phred_quality_from_solexa(-20)
77
77
  # > 0.043213737826425784
78
-
79
-
78
+
79
+
80
80
  #sanger by default
81
81
  @to_phred = lambda{|q| q - 33}
82
82
  @from_phred = lambda{|q| (q+33).chr}
83
-
83
+
84
84
  if @fastq_type == :ilumina
85
- @to_phred = lambda{|q| q - 64}
86
- # @from_phred = lambda{|q| (q+64).chr}
87
-
85
+ @to_phred = lambda{|q| q - 64}
86
+ # @from_phred = lambda{|q| (q+64).chr}
87
+
88
88
  elsif @fastq_type == :solexa
89
- #
90
- # solexa to phred quals
91
-
92
- @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
93
- # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
94
-
95
- #phred to solexa quals
96
-
89
+ #
90
+ # solexa to phred quals
91
+
92
+ @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
93
+ # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
94
+
95
+ #phred to solexa quals
96
+
97
97
  end
98
-
98
+
99
99
  @qual_to_array = qual_to_array
100
-
100
+
101
101
  @qual_to_phred = qual_to_phred
102
-
102
+
103
103
  end
104
-
104
+
105
105
  def close
106
- @fastq_file.close
106
+ @fastq_file.close
107
107
  end
108
-
109
-
108
+
109
+
110
110
  #------------------------------------
111
111
  # Iterate over all sequences
112
112
  #------------------------------------
113
113
  def each
114
-
114
+
115
115
  rewind
116
116
 
117
- n,f,q,c=next_seq
118
-
117
+ n,f,q,c=next_seq
118
+
119
119
  while (!n.nil?)
120
- yield(n,f,q,c)
121
- n,f,q,c=next_seq
120
+ yield(n,f,q,c)
121
+ n,f,q,c=next_seq
122
122
  end
123
123
 
124
- rewind
125
-
124
+ rewind
125
+
126
126
  end
127
127
 
128
128
  # goto first position in file
129
129
  def rewind
130
-
131
- @num_seqs = 0 ;
132
- @fastq_file.pos=0
133
-
130
+
131
+ @num_seqs = 0 ;
132
+ @fastq_file.pos=0
133
+
134
134
  end
135
135
 
136
136
  #------------------------------------
@@ -139,114 +139,129 @@ class FastqFile
139
139
  def next_seq
140
140
  #init variables
141
141
  res = read_fastq
142
- return res
142
+ return res
143
143
  end
144
-
144
+
145
145
  # write sequence to file in sanger format
146
146
  def write_seq(seq_name,seq_fasta,seq_qual,comments='')
147
- name = ""
148
-
149
- @fastq_file.puts("@#{seq_name} #{comments}")
150
- @fastq_file.puts(seq_fasta)
151
- @fastq_file.puts("+#{seq_name} #{comments}")
152
-
153
- if seq_qual.is_a?(Array)
154
- @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
155
- else
156
- @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
157
- end
158
-
147
+ name = ""
148
+
149
+ @fastq_file.puts("@#{seq_name} #{comments}")
150
+ @fastq_file.puts(seq_fasta)
151
+ @fastq_file.puts("+#{seq_name} #{comments}")
152
+
153
+ if seq_qual.is_a?(Array)
154
+ @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
155
+ else
156
+ @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
157
+ end
158
+
159
159
  end
160
160
 
161
-
161
+
162
162
  # creates fastq otuput in sanger format
163
163
  def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')
164
-
164
+
165
165
  res=[]
166
-
167
- name = ""
168
-
169
- res << ("@#{seq_name} #{comments}")
170
- res << (seq_fasta)
171
- res << ("+#{seq_name} #{comments}")
172
-
173
- if seq_qual.is_a?(Array)
174
- res<<(seq_qual.map{|e| (e+33).chr}.join)
175
- else
176
- res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
177
- end
178
-
166
+
167
+ name = ""
168
+
169
+ res << ("@#{seq_name} #{comments}")
170
+ res << (seq_fasta)
171
+ res << ("+#{seq_name} #{comments}")
172
+
173
+ if !seq_qual.empty?
174
+ if @qual_to_phred
175
+ if seq_qual.is_a?(Array)
176
+ res<<(seq_qual.map{|e| (e+33).chr}.join)
177
+ else
178
+ res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
179
+ end
180
+ else
181
+ res << seq_qual
182
+ end
183
+ else # no qual provided, use a default value
184
+ q='D'*seq_fasta.length;
185
+ res << q
186
+ end
187
+
179
188
  return res
180
189
  end
181
-
190
+
182
191
  def with_qual?
183
192
  true
184
193
  end
185
-
186
-
187
- private
188
-
189
- #------------------------------------
190
- # Read one sequence in fastq
191
- #------------------------------------
192
- # @GEM-108-D02
193
- # AAAAGCTGG
194
- # +
195
- # :::::::::
196
-
197
- def read_fastq
198
-
199
- seq_name = nil
200
- seq_fasta = nil
201
- seq_qual = nil
202
- comments = nil
203
-
204
- reading = :fasta
205
-
206
- if !@fastq_file.eof
207
-
208
- begin
209
- #read four lines
210
- name_line = @fastq_file.readline.chomp
211
- seq_fasta = @fastq_file.readline.chomp
212
- name2_line = @fastq_file.readline.chomp
213
- seq_qual = @fastq_file.readline.chomp
214
-
215
-
216
- # parse name
217
- if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
218
- # remove comments
219
- seq_name = $1
220
- comments=$2
221
- else
222
- raise "Invalid sequence name in #{name_line}"
223
- end
224
-
225
- # parse fasta
226
- seq_fasta.strip! if !seq_fasta.empty?
227
-
228
- # parse qual_name
229
-
230
- if !seq_name.nil? && !seq_qual.empty?
231
-
232
- @num_seqs += 1
233
-
234
- if @qual_to_phred
235
- seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
236
-
237
- if !@qual_to_array
238
- seq_qual=seq_qual.join(' ')
239
- end
240
- end
241
-
194
+
195
+
196
+ private
197
+
198
+ #------------------------------------
199
+ # Read one sequence in fastq
200
+ #------------------------------------
201
+ # @GEM-108-D02
202
+ # AAAAGCTGG
203
+ # +
204
+ # :::::::::
205
+
206
+ def read_fastq
207
+
208
+ seq_name = nil
209
+ seq_fasta = nil
210
+ seq_qual = nil
211
+ comments = nil
212
+
213
+ reading = :fasta
214
+
215
+ if !@fastq_file.eof
216
+
217
+ begin
218
+ #read four lines
219
+ name_line = @fastq_file.readline.chomp
220
+ seq_fasta = @fastq_file.readline.chomp
221
+ name2_line = @fastq_file.readline.chomp
222
+ seq_qual = @fastq_file.readline.chomp
223
+
224
+
225
+ # if there is no qual, but there is a fasta
226
+ if seq_qual.empty? && !seq_fasta.empty?
227
+ seq_qual = 'D'*seq_fasta.length
228
+ end
229
+
230
+
231
+ # parse name
232
+ if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
233
+ # remove comments
234
+ seq_name = $1
235
+ comments=$2
236
+ else
237
+ raise "Invalid sequence name in #{name_line}"
238
+ end
239
+
240
+ # parse fasta
241
+ seq_fasta.strip! if !seq_fasta.empty?
242
+
243
+ # parse qual_name
244
+
245
+ if !seq_name.nil? && !seq_qual.empty?
246
+
247
+ @num_seqs += 1
248
+
249
+ if @qual_to_phred
250
+ seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
251
+
252
+ if !@qual_to_array
253
+ seq_qual=seq_qual.join(' ')
254
+ end
255
+ end
256
+
257
+ end
258
+ rescue EOFError
259
+ raise "Bad format in FastQ file"
242
260
  end
243
- rescue EOFError
244
- raise "Bad format in FastQ file"
245
261
  end
262
+
263
+ return [seq_name,seq_fasta,seq_qual,comments]
246
264
  end
247
-
248
- return [seq_name,seq_fasta,seq_qual,comments]
249
- end
250
-
251
-
265
+
266
+
252
267
  end
data/lib/scbi_fastq.rb CHANGED
@@ -3,5 +3,5 @@ $:.unshift(File.dirname(__FILE__)) unless
3
3
 
4
4
  require 'scbi_fastq/fastq_file'
5
5
  module ScbiFastq
6
- VERSION = '0.0.14'
6
+ VERSION = '0.0.15'
7
7
  end
@@ -29,6 +29,24 @@ class TestScbiFastq < Test::Unit::TestCase
29
29
  f.close
30
30
  end
31
31
 
32
+ def fill_file_no_qual(n,offset=33)
33
+ f=FastqFile.new(@test_file,'w')
34
+
35
+ n.times do |c|
36
+ i = c+1
37
+
38
+ name = "#{@seq_name+i.to_s}"
39
+ f.write_seq(name,@seq_fasta*i,'','comments')
40
+ # f.puts('@'+name)
41
+ # f.puts(@seq_fasta*i)
42
+ # f.puts('+'+name)
43
+ # f.puts((@seq_qual*i*@seq_fasta.length).map{|e| (e+offset).chr}.join)
44
+ end
45
+
46
+ f.close
47
+ end
48
+
49
+
32
50
  def test_each
33
51
 
34
52
  # make new file and fill with data
@@ -77,7 +95,7 @@ class TestScbiFastq < Test::Unit::TestCase
77
95
  def test_next_seq_comments
78
96
 
79
97
  # make new file and fill with data
80
- fill_file(100)
98
+ fill_file(100)
81
99
 
82
100
 
83
101
  fqr=FastqFile.new(@test_file)
@@ -97,8 +115,36 @@ class TestScbiFastq < Test::Unit::TestCase
97
115
  end
98
116
  end until n.nil?
99
117
 
118
+ fqr.close
119
+ end
120
+
121
+ def test_to_fastq
122
+ puts FastqFile.to_fastq(@seq_name,@seq_fasta*10,'','')
123
+
124
+ end
125
+
126
+ def test_each_no_qual
127
+
128
+ # make new file and fill with data
129
+ fill_file_no_qual(100)
130
+
131
+
132
+ fqr=FastqFile.new(@test_file,'r',:sanger, false,false)
133
+
134
+ i=1
135
+
136
+ fqr.each do |n,s,q|
137
+ puts n,s,q
138
+ assert_equal(@seq_name+i.to_s,n)
139
+ assert_equal(@seq_fasta*i,s)
140
+ # assert_equal((@seq_qual*i*@seq_fasta.length),q)
141
+
142
+ i+=1
143
+ end
144
+
100
145
  fqr.close
101
146
  end
147
+
102
148
 
103
149
 
104
150
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: scbi_fastq
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.14
5
+ version: 0.0.15
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-31 00:00:00 Z
13
+ date: 2012-05-24 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: hoe