scbi_fastq 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,7 @@
1
+ === 0.0.14 2011-05-31
2
+
3
+ release to rubygems
4
+
5
+ === 0.0.1 2010-11-29
6
+
7
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,12 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/scbi_fastq.rb
7
+ lib/scbi_fastq/fastq_file.rb
8
+ script/console
9
+ script/destroy
10
+ script/generate
11
+ test/test_helper.rb
12
+ test/test_scbi_fastq.rb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on scbi_fastq, see http://scbi_fastq.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,113 @@
1
+ = scbi_fastq
2
+
3
+ * http://www.scbi.uma.es/downloads
4
+
5
+ == DESCRIPTION:
6
+
7
+ scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).
8
+
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ * Read FASTQ files in Sanger Solexa and Ilumina, making the appropriate quality values conversions
13
+ * Quality values can be automatically splitted
14
+ * Write FASTQ files in Sanger format
15
+ * Iteration over large files without extra memory usage
16
+
17
+
18
+ == SYNOPSIS:
19
+
20
+ === Reading a FASTQ with iterator:
21
+
22
+ require 'scbi_fastq'
23
+
24
+ # open file in sanger mode
25
+ fqr=FastqFile.new('file1.fastq')
26
+
27
+
28
+ fqr.each do |name,seq_fasta,qual,comments|
29
+
30
+ puts name
31
+ puts seq_fasta
32
+ puts qual
33
+ puts comments
34
+ end
35
+
36
+ fqr.close
37
+
38
+ === Reading a FASTQ one sequence at a time:
39
+
40
+ require 'scbi_fastq'
41
+
42
+ # open file in sanger mode
43
+ fqr=FastqFile.new('file1.fastq')
44
+
45
+
46
+ begin
47
+
48
+ # read one sequence
49
+ name,seq_fasta,qual,comments=fqr.next_seq
50
+
51
+ # name will be nil if there are not more sequences available
52
+ if !name.nil?
53
+ puts name
54
+ puts seq_fasta
55
+ puts qual
56
+ puts comments
57
+ end
58
+ end until name.nil?
59
+
60
+ fqr.close
61
+
62
+
63
+ === Writing a FASTQ:
64
+
65
+ require 'scbi_fastq'
66
+
67
+ # open new file
68
+ f=FastqFile.new('file.fastq','w')
69
+
70
+ # prepare sample data
71
+ name = 'seq1'
72
+ seq_fasta= 'acgtacgtacact'
73
+ seq_qual= [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40]
74
+
75
+ # write sequence to disk
76
+ f.write_seq(name,seq_fasta,seq_qual,'comments')
77
+
78
+
79
+ # close file
80
+ f.close
81
+
82
+ == REQUIREMENTS:
83
+
84
+ * This is a standalone gem.
85
+
86
+ == INSTALL:
87
+
88
+ * gem install scbi_fastq
89
+
90
+ == LICENSE:
91
+
92
+ (The MIT License)
93
+
94
+ Copyright (c) 2010 Dario Guerrero
95
+
96
+ Permission is hereby granted, free of charge, to any person obtaining
97
+ a copy of this software and associated documentation files (the
98
+ 'Software'), to deal in the Software without restriction, including
99
+ without limitation the rights to use, copy, modify, merge, publish,
100
+ distribute, sublicense, and/or sell copies of the Software, and to
101
+ permit persons to whom the Software is furnished to do so, subject to
102
+ the following conditions:
103
+
104
+ The above copyright notice and this permission notice shall be
105
+ included in all copies or substantial portions of the Software.
106
+
107
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
108
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
109
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
110
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
111
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
112
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
113
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/scbi_fastq'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'scbi_fastq' do
14
+ self.developer 'Dario Guerrero', 'dariogf@scbi.uma.es'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ # task :default => [:spec, :features]
@@ -0,0 +1,252 @@
1
+
2
+ # add ord method to ruby 1.8
3
+ if !String.instance_methods.include?(:ord)
4
+ class String
5
+
6
+ def ord
7
+ return self[0]
8
+ end
9
+
10
+ end
11
+ end
12
+
13
+
14
+
15
+ class FastqFile
16
+
17
+ attr_accessor :num_seqs
18
+
19
+ #------------------------------------
20
+ # Initialize instance
21
+ #------------------------------------
22
+ def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
23
+
24
+
25
+ if mode.upcase.index('W')
26
+ @fastq_file = File.open(fasta_file_name,'w')
27
+ elsif mode.upcase.index('A')
28
+ if !File.exist?(fasta_file_name)
29
+ raise "File #{fasta_file_name} doesn't exists"
30
+ end
31
+
32
+ @fastq_file = File.open(fasta_file_name,'a')
33
+ else #read only
34
+ if !File.exist?(fasta_file_name)
35
+ raise "File #{fasta_file_name} doesn't exists"
36
+ end
37
+
38
+ if fasta_file_name.is_a?(IO)
39
+ @fastq_file = fasta_file_name
40
+ else
41
+ @fastq_file = File.open(fasta_file_name,'r')
42
+ end
43
+ end
44
+
45
+ @mode = mode
46
+ @num_seqs = 0
47
+ @fastq_type=fastq_type
48
+
49
+ # S - Sanger Phred+33, raw reads typically (0, 40)
50
+ # X - Solexa Solexa+64, raw reads typically (-5, 40)
51
+ # I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
52
+ # J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)
53
+ # > >>> def solexa_quality_from_phred(phred_quality) :
54
+ # > ... return 10*log(10**(phred_quality/10.0) - 1, 10)
55
+ # > ...
56
+ # > >>> solexa_quality_from_phred(90)
57
+ # > 89.999999995657035
58
+ # > >>> solexa_quality_from_phred(50)
59
+ # > 49.99995657033466
60
+ # > >>> solexa_quality_from_phred(10)
61
+ # > 9.5424250943932485
62
+ # > >>> solexa_quality_from_phred(1)
63
+ # > -5.8682532438011537
64
+ # > >>> solexa_quality_from_phred(0.1)
65
+ # > -16.32774717238372
66
+ # >
67
+ # > >>> def phred_quality_from_solexa(solexa_quality) :
68
+ # > ... return 10*log(10**(solexa_quality/10.0) + 1, 10)
69
+ # > ...
70
+ # > >>> phred_quality_from_solexa(90)
71
+ # > 90.000000004342922
72
+ # > >>> phred_quality_from_solexa(10)
73
+ # > 10.41392685158225
74
+ # > >>> phred_quality_from_solexa(0)
75
+ # > 3.0102999566398116
76
+ # > >>> phred_quality_from_solexa(-20)
77
+ # > 0.043213737826425784
78
+
79
+
80
+ #sanger by default
81
+ @to_phred = lambda{|q| q - 33}
82
+ @from_phred = lambda{|q| (q+33).chr}
83
+
84
+ if @fastq_type == :ilumina
85
+ @to_phred = lambda{|q| q - 64}
86
+ # @from_phred = lambda{|q| (q+64).chr}
87
+
88
+ elsif @fastq_type == :solexa
89
+ #
90
+ # solexa to phred quals
91
+
92
+ @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
93
+ # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
94
+
95
+ #phred to solexa quals
96
+
97
+ end
98
+
99
+ @qual_to_array = qual_to_array
100
+
101
+ @qual_to_phred = qual_to_phred
102
+
103
+ end
104
+
105
+ def close
106
+ @fastq_file.close
107
+ end
108
+
109
+
110
+ #------------------------------------
111
+ # Iterate over all sequences
112
+ #------------------------------------
113
+ def each
114
+
115
+ rewind
116
+
117
+ n,f,q,c=next_seq
118
+
119
+ while (!n.nil?)
120
+ yield(n,f,q,c)
121
+ n,f,q,c=next_seq
122
+ end
123
+
124
+ rewind
125
+
126
+ end
127
+
128
+ # goto first position in file
129
+ def rewind
130
+
131
+ @num_seqs = 0 ;
132
+ @fastq_file.pos=0
133
+
134
+ end
135
+
136
+ #------------------------------------
137
+ # Get next sequence
138
+ #------------------------------------
139
+ def next_seq
140
+ #init variables
141
+ res = read_fastq
142
+ return res
143
+ end
144
+
145
+ # write sequence to file in sanger format
146
+ def write_seq(seq_name,seq_fasta,seq_qual,comments='')
147
+ name = ""
148
+
149
+ @fastq_file.puts("@#{seq_name} #{comments}")
150
+ @fastq_file.puts(seq_fasta)
151
+ @fastq_file.puts("+#{seq_name} #{comments}")
152
+
153
+ if seq_qual.is_a?(Array)
154
+ @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
155
+ else
156
+ @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
157
+ end
158
+
159
+ end
160
+
161
+
162
+ # creates fastq otuput in sanger format
163
+ def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')
164
+
165
+ res=[]
166
+
167
+ name = ""
168
+
169
+ res << ("@#{seq_name} #{comments}")
170
+ res << (seq_fasta)
171
+ res << ("+#{seq_name} #{comments}")
172
+
173
+ if seq_qual.is_a?(Array)
174
+ res<<(seq_qual.map{|e| (e+33).chr}.join)
175
+ else
176
+ res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
177
+ end
178
+
179
+ return res
180
+ end
181
+
182
+ def with_qual?
183
+ true
184
+ end
185
+
186
+
187
+ private
188
+
189
+ #------------------------------------
190
+ # Read one sequence in fastq
191
+ #------------------------------------
192
+ # @GEM-108-D02
193
+ # AAAAGCTGG
194
+ # +
195
+ # :::::::::
196
+
197
+ def read_fastq
198
+
199
+ seq_name = nil
200
+ seq_fasta = nil
201
+ seq_qual = nil
202
+ comments = nil
203
+
204
+ reading = :fasta
205
+
206
+ if !@fastq_file.eof
207
+
208
+ begin
209
+ #read four lines
210
+ name_line = @fastq_file.readline.chomp
211
+ seq_fasta = @fastq_file.readline.chomp
212
+ name2_line = @fastq_file.readline.chomp
213
+ seq_qual = @fastq_file.readline.chomp
214
+
215
+
216
+ # parse name
217
+ if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
218
+ # remove comments
219
+ seq_name = $1
220
+ comments=$2
221
+ else
222
+ raise "Invalid sequence name in #{name_line}"
223
+ end
224
+
225
+ # parse fasta
226
+ seq_fasta.strip! if !seq_fasta.empty?
227
+
228
+ # parse qual_name
229
+
230
+ if !seq_name.nil? && !seq_qual.empty?
231
+
232
+ @num_seqs += 1
233
+
234
+ if @qual_to_phred
235
+ seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
236
+
237
+ if !@qual_to_array
238
+ seq_qual=seq_qual.join(' ')
239
+ end
240
+ end
241
+
242
+ end
243
+ rescue EOFError
244
+ raise "Bad format in FastQ file"
245
+ end
246
+ end
247
+
248
+ return [seq_name,seq_fasta,seq_qual,comments]
249
+ end
250
+
251
+
252
+ end
data/lib/scbi_fastq.rb ADDED
@@ -0,0 +1,7 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'scbi_fastq/fastq_file'
5
+ module ScbiFastq
6
+ VERSION = '0.0.14'
7
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.expand_path(File.dirname(__FILE__) + '/../lib/scbi_fastq.rb')}"
9
+ puts "Loading scbi_fastq gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/scbi_fastq'
@@ -0,0 +1,117 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestScbiFastq < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @test_file='/tmp/sanger.fastq';
7
+
8
+ @seq_fasta='ACTG'
9
+ @seq_qual=[31]
10
+ @seq_name='SEQ'
11
+
12
+ end
13
+
14
+
15
+ def fill_file(n,offset=33)
16
+ f=FastqFile.new(@test_file,'w')
17
+
18
+ n.times do |c|
19
+ i = c+1
20
+
21
+ name = "#{@seq_name+i.to_s}"
22
+ f.write_seq(name,@seq_fasta*i,(@seq_qual*i*@seq_fasta.length),'comments')
23
+ # f.puts('@'+name)
24
+ # f.puts(@seq_fasta*i)
25
+ # f.puts('+'+name)
26
+ # f.puts((@seq_qual*i*@seq_fasta.length).map{|e| (e+offset).chr}.join)
27
+ end
28
+
29
+ f.close
30
+ end
31
+
32
+ def test_each
33
+
34
+ # make new file and fill with data
35
+ fill_file(100)
36
+
37
+
38
+ fqr=FastqFile.new(@test_file)
39
+
40
+ i=1
41
+
42
+ fqr.each do |n,s,q|
43
+
44
+ assert_equal(@seq_name+i.to_s,n)
45
+ assert_equal(@seq_fasta*i,s)
46
+ assert_equal((@seq_qual*i*@seq_fasta.length),q)
47
+
48
+ i+=1
49
+ end
50
+
51
+ fqr.close
52
+ end
53
+
54
+ def test_each_comments
55
+
56
+ # make new file and fill with data
57
+ fill_file(100)
58
+
59
+
60
+ fqr=FastqFile.new(@test_file)
61
+
62
+ i=1
63
+
64
+ fqr.each do |n,s,q,c|
65
+
66
+ assert_equal(@seq_name+i.to_s,n)
67
+ assert_equal(@seq_fasta*i,s)
68
+ assert_equal((@seq_qual*i*@seq_fasta.length),q)
69
+ assert_equal('comments',c)
70
+
71
+ i+=1
72
+ end
73
+
74
+ fqr.close
75
+ end
76
+
77
+ def test_next_seq_comments
78
+
79
+ # make new file and fill with data
80
+ fill_file(100)
81
+
82
+
83
+ fqr=FastqFile.new(@test_file)
84
+
85
+ i=1
86
+
87
+ begin
88
+ n,s,q,c = fqr.next_seq
89
+
90
+ if !n.nil?
91
+ assert_equal(@seq_name+i.to_s,n)
92
+ assert_equal(@seq_fasta*i,s)
93
+ assert_equal((@seq_qual*i*@seq_fasta.length),q)
94
+ assert_equal('comments',c)
95
+
96
+ i+=1
97
+ end
98
+ end until n.nil?
99
+
100
+ fqr.close
101
+ end
102
+
103
+
104
+
105
+ # def test_open_file
106
+ # fill_file(100)
107
+ # fq=FastqFile.new('test/sanger.fastq')
108
+ #
109
+ # fq.each do |n,f,q|
110
+ # puts n,f,q
111
+ # puts fq.num_seqs
112
+ # end
113
+ #
114
+ # fq.close
115
+ #
116
+ # end
117
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scbi_fastq
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.14
6
+ platform: ruby
7
+ authors:
8
+ - Dario Guerrero
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-05-31 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.8.0
24
+ type: :development
25
+ version_requirements: *id001
26
+ description: scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).
27
+ email:
28
+ - dariogf@scbi.uma.es
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files:
34
+ - History.txt
35
+ - Manifest.txt
36
+ - PostInstall.txt
37
+ files:
38
+ - History.txt
39
+ - Manifest.txt
40
+ - PostInstall.txt
41
+ - README.rdoc
42
+ - Rakefile
43
+ - lib/scbi_fastq.rb
44
+ - lib/scbi_fastq/fastq_file.rb
45
+ - script/console
46
+ - script/destroy
47
+ - script/generate
48
+ - test/test_helper.rb
49
+ - test/test_scbi_fastq.rb
50
+ homepage: http://www.scbi.uma.es/downloads
51
+ licenses: []
52
+
53
+ post_install_message: PostInstall.txt
54
+ rdoc_options:
55
+ - --main
56
+ - README.rdoc
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ requirements: []
72
+
73
+ rubyforge_project: scbi_fastq
74
+ rubygems_version: 1.7.2
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).
78
+ test_files:
79
+ - test/test_helper.rb
80
+ - test/test_scbi_fastq.rb