scbi_fastq 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.0.15 2012-05-24
2
+
3
+ Uses default qual when no qual is present
4
+
1
5
  === 0.0.14 2011-05-31
2
6
 
3
7
  release to rubygems
data/Rakefile CHANGED
@@ -23,4 +23,4 @@ Dir['tasks/**/*.rake'].each { |t| load t }
23
23
 
24
24
  # TODO - want other tests/tasks run by default? Add them to the list
25
25
  # remove_task :default
26
- # task :default => [:spec, :features]
26
+ task :default => [:spec, :features, :redocs]
@@ -1,15 +1,15 @@
1
1
 
2
2
  # add ord method to ruby 1.8
3
3
  if !String.instance_methods.include?(:ord)
4
- class String
5
-
6
- def ord
7
- return self[0]
8
- end
9
-
10
- end
4
+ class String
5
+
6
+ def ord
7
+ return self[0]
8
+ end
9
+
10
+ end
11
11
  end
12
-
12
+
13
13
 
14
14
 
15
15
  class FastqFile
@@ -21,31 +21,31 @@ class FastqFile
21
21
  #------------------------------------
22
22
  def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
23
23
 
24
-
24
+
25
25
  if mode.upcase.index('W')
26
26
  @fastq_file = File.open(fasta_file_name,'w')
27
27
  elsif mode.upcase.index('A')
28
28
  if !File.exist?(fasta_file_name)
29
- raise "File #{fasta_file_name} doesn't exists"
29
+ raise "File #{fasta_file_name} doesn't exists"
30
30
  end
31
-
31
+
32
32
  @fastq_file = File.open(fasta_file_name,'a')
33
33
  else #read only
34
34
  if !File.exist?(fasta_file_name)
35
- raise "File #{fasta_file_name} doesn't exists"
35
+ raise "File #{fasta_file_name} doesn't exists"
36
36
  end
37
-
37
+
38
38
  if fasta_file_name.is_a?(IO)
39
- @fastq_file = fasta_file_name
39
+ @fastq_file = fasta_file_name
40
40
  else
41
41
  @fastq_file = File.open(fasta_file_name,'r')
42
42
  end
43
43
  end
44
-
44
+
45
45
  @mode = mode
46
46
  @num_seqs = 0
47
47
  @fastq_type=fastq_type
48
-
48
+
49
49
  # S - Sanger Phred+33, raw reads typically (0, 40)
50
50
  # X - Solexa Solexa+64, raw reads typically (-5, 40)
51
51
  # I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
@@ -63,7 +63,7 @@ class FastqFile
63
63
  # > -5.8682532438011537
64
64
  # > >>> solexa_quality_from_phred(0.1)
65
65
  # > -16.32774717238372
66
- # >
66
+ # >
67
67
  # > >>> def phred_quality_from_solexa(solexa_quality) :
68
68
  # > ... return 10*log(10**(solexa_quality/10.0) + 1, 10)
69
69
  # > ...
@@ -75,62 +75,62 @@ class FastqFile
75
75
  # > 3.0102999566398116
76
76
  # > >>> phred_quality_from_solexa(-20)
77
77
  # > 0.043213737826425784
78
-
79
-
78
+
79
+
80
80
  #sanger by default
81
81
  @to_phred = lambda{|q| q - 33}
82
82
  @from_phred = lambda{|q| (q+33).chr}
83
-
83
+
84
84
  if @fastq_type == :ilumina
85
- @to_phred = lambda{|q| q - 64}
86
- # @from_phred = lambda{|q| (q+64).chr}
87
-
85
+ @to_phred = lambda{|q| q - 64}
86
+ # @from_phred = lambda{|q| (q+64).chr}
87
+
88
88
  elsif @fastq_type == :solexa
89
- #
90
- # solexa to phred quals
91
-
92
- @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
93
- # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
94
-
95
- #phred to solexa quals
96
-
89
+ #
90
+ # solexa to phred quals
91
+
92
+ @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
93
+ # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
94
+
95
+ #phred to solexa quals
96
+
97
97
  end
98
-
98
+
99
99
  @qual_to_array = qual_to_array
100
-
100
+
101
101
  @qual_to_phred = qual_to_phred
102
-
102
+
103
103
  end
104
-
104
+
105
105
  def close
106
- @fastq_file.close
106
+ @fastq_file.close
107
107
  end
108
-
109
-
108
+
109
+
110
110
  #------------------------------------
111
111
  # Iterate over all sequences
112
112
  #------------------------------------
113
113
  def each
114
-
114
+
115
115
  rewind
116
116
 
117
- n,f,q,c=next_seq
118
-
117
+ n,f,q,c=next_seq
118
+
119
119
  while (!n.nil?)
120
- yield(n,f,q,c)
121
- n,f,q,c=next_seq
120
+ yield(n,f,q,c)
121
+ n,f,q,c=next_seq
122
122
  end
123
123
 
124
- rewind
125
-
124
+ rewind
125
+
126
126
  end
127
127
 
128
128
  # goto first position in file
129
129
  def rewind
130
-
131
- @num_seqs = 0 ;
132
- @fastq_file.pos=0
133
-
130
+
131
+ @num_seqs = 0 ;
132
+ @fastq_file.pos=0
133
+
134
134
  end
135
135
 
136
136
  #------------------------------------
@@ -139,114 +139,129 @@ class FastqFile
139
139
  def next_seq
140
140
  #init variables
141
141
  res = read_fastq
142
- return res
142
+ return res
143
143
  end
144
-
144
+
145
145
  # write sequence to file in sanger format
146
146
  def write_seq(seq_name,seq_fasta,seq_qual,comments='')
147
- name = ""
148
-
149
- @fastq_file.puts("@#{seq_name} #{comments}")
150
- @fastq_file.puts(seq_fasta)
151
- @fastq_file.puts("+#{seq_name} #{comments}")
152
-
153
- if seq_qual.is_a?(Array)
154
- @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
155
- else
156
- @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
157
- end
158
-
147
+ name = ""
148
+
149
+ @fastq_file.puts("@#{seq_name} #{comments}")
150
+ @fastq_file.puts(seq_fasta)
151
+ @fastq_file.puts("+#{seq_name} #{comments}")
152
+
153
+ if seq_qual.is_a?(Array)
154
+ @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
155
+ else
156
+ @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
157
+ end
158
+
159
159
  end
160
160
 
161
-
161
+
162
162
  # creates fastq otuput in sanger format
163
163
  def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')
164
-
164
+
165
165
  res=[]
166
-
167
- name = ""
168
-
169
- res << ("@#{seq_name} #{comments}")
170
- res << (seq_fasta)
171
- res << ("+#{seq_name} #{comments}")
172
-
173
- if seq_qual.is_a?(Array)
174
- res<<(seq_qual.map{|e| (e+33).chr}.join)
175
- else
176
- res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
177
- end
178
-
166
+
167
+ name = ""
168
+
169
+ res << ("@#{seq_name} #{comments}")
170
+ res << (seq_fasta)
171
+ res << ("+#{seq_name} #{comments}")
172
+
173
+ if !seq_qual.empty?
174
+ if @qual_to_phred
175
+ if seq_qual.is_a?(Array)
176
+ res<<(seq_qual.map{|e| (e+33).chr}.join)
177
+ else
178
+ res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
179
+ end
180
+ else
181
+ res << seq_qual
182
+ end
183
+ else # no qual provided, use a default value
184
+ q='D'*seq_fasta.length;
185
+ res << q
186
+ end
187
+
179
188
  return res
180
189
  end
181
-
190
+
182
191
  def with_qual?
183
192
  true
184
193
  end
185
-
186
-
187
- private
188
-
189
- #------------------------------------
190
- # Read one sequence in fastq
191
- #------------------------------------
192
- # @GEM-108-D02
193
- # AAAAGCTGG
194
- # +
195
- # :::::::::
196
-
197
- def read_fastq
198
-
199
- seq_name = nil
200
- seq_fasta = nil
201
- seq_qual = nil
202
- comments = nil
203
-
204
- reading = :fasta
205
-
206
- if !@fastq_file.eof
207
-
208
- begin
209
- #read four lines
210
- name_line = @fastq_file.readline.chomp
211
- seq_fasta = @fastq_file.readline.chomp
212
- name2_line = @fastq_file.readline.chomp
213
- seq_qual = @fastq_file.readline.chomp
214
-
215
-
216
- # parse name
217
- if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
218
- # remove comments
219
- seq_name = $1
220
- comments=$2
221
- else
222
- raise "Invalid sequence name in #{name_line}"
223
- end
224
-
225
- # parse fasta
226
- seq_fasta.strip! if !seq_fasta.empty?
227
-
228
- # parse qual_name
229
-
230
- if !seq_name.nil? && !seq_qual.empty?
231
-
232
- @num_seqs += 1
233
-
234
- if @qual_to_phred
235
- seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
236
-
237
- if !@qual_to_array
238
- seq_qual=seq_qual.join(' ')
239
- end
240
- end
241
-
194
+
195
+
196
+ private
197
+
198
+ #------------------------------------
199
+ # Read one sequence in fastq
200
+ #------------------------------------
201
+ # @GEM-108-D02
202
+ # AAAAGCTGG
203
+ # +
204
+ # :::::::::
205
+
206
+ def read_fastq
207
+
208
+ seq_name = nil
209
+ seq_fasta = nil
210
+ seq_qual = nil
211
+ comments = nil
212
+
213
+ reading = :fasta
214
+
215
+ if !@fastq_file.eof
216
+
217
+ begin
218
+ #read four lines
219
+ name_line = @fastq_file.readline.chomp
220
+ seq_fasta = @fastq_file.readline.chomp
221
+ name2_line = @fastq_file.readline.chomp
222
+ seq_qual = @fastq_file.readline.chomp
223
+
224
+
225
+ # if there is no qual, but there is a fasta
226
+ if seq_qual.empty? && !seq_fasta.empty?
227
+ seq_qual = 'D'*seq_fasta.length
228
+ end
229
+
230
+
231
+ # parse name
232
+ if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
233
+ # remove comments
234
+ seq_name = $1
235
+ comments=$2
236
+ else
237
+ raise "Invalid sequence name in #{name_line}"
238
+ end
239
+
240
+ # parse fasta
241
+ seq_fasta.strip! if !seq_fasta.empty?
242
+
243
+ # parse qual_name
244
+
245
+ if !seq_name.nil? && !seq_qual.empty?
246
+
247
+ @num_seqs += 1
248
+
249
+ if @qual_to_phred
250
+ seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
251
+
252
+ if !@qual_to_array
253
+ seq_qual=seq_qual.join(' ')
254
+ end
255
+ end
256
+
257
+ end
258
+ rescue EOFError
259
+ raise "Bad format in FastQ file"
242
260
  end
243
- rescue EOFError
244
- raise "Bad format in FastQ file"
245
261
  end
262
+
263
+ return [seq_name,seq_fasta,seq_qual,comments]
246
264
  end
247
-
248
- return [seq_name,seq_fasta,seq_qual,comments]
249
- end
250
-
251
-
265
+
266
+
252
267
  end
data/lib/scbi_fastq.rb CHANGED
@@ -3,5 +3,5 @@ $:.unshift(File.dirname(__FILE__)) unless
3
3
 
4
4
  require 'scbi_fastq/fastq_file'
5
5
  module ScbiFastq
6
- VERSION = '0.0.14'
6
+ VERSION = '0.0.15'
7
7
  end
@@ -29,6 +29,24 @@ class TestScbiFastq < Test::Unit::TestCase
29
29
  f.close
30
30
  end
31
31
 
32
+ def fill_file_no_qual(n,offset=33)
33
+ f=FastqFile.new(@test_file,'w')
34
+
35
+ n.times do |c|
36
+ i = c+1
37
+
38
+ name = "#{@seq_name+i.to_s}"
39
+ f.write_seq(name,@seq_fasta*i,'','comments')
40
+ # f.puts('@'+name)
41
+ # f.puts(@seq_fasta*i)
42
+ # f.puts('+'+name)
43
+ # f.puts((@seq_qual*i*@seq_fasta.length).map{|e| (e+offset).chr}.join)
44
+ end
45
+
46
+ f.close
47
+ end
48
+
49
+
32
50
  def test_each
33
51
 
34
52
  # make new file and fill with data
@@ -77,7 +95,7 @@ class TestScbiFastq < Test::Unit::TestCase
77
95
  def test_next_seq_comments
78
96
 
79
97
  # make new file and fill with data
80
- fill_file(100)
98
+ fill_file(100)
81
99
 
82
100
 
83
101
  fqr=FastqFile.new(@test_file)
@@ -97,8 +115,36 @@ class TestScbiFastq < Test::Unit::TestCase
97
115
  end
98
116
  end until n.nil?
99
117
 
118
+ fqr.close
119
+ end
120
+
121
+ def test_to_fastq
122
+ puts FastqFile.to_fastq(@seq_name,@seq_fasta*10,'','')
123
+
124
+ end
125
+
126
+ def test_each_no_qual
127
+
128
+ # make new file and fill with data
129
+ fill_file_no_qual(100)
130
+
131
+
132
+ fqr=FastqFile.new(@test_file,'r',:sanger, false,false)
133
+
134
+ i=1
135
+
136
+ fqr.each do |n,s,q|
137
+ puts n,s,q
138
+ assert_equal(@seq_name+i.to_s,n)
139
+ assert_equal(@seq_fasta*i,s)
140
+ # assert_equal((@seq_qual*i*@seq_fasta.length),q)
141
+
142
+ i+=1
143
+ end
144
+
100
145
  fqr.close
101
146
  end
147
+
102
148
 
103
149
 
104
150
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: scbi_fastq
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.14
5
+ version: 0.0.15
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-31 00:00:00 Z
13
+ date: 2012-05-24 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: hoe