scbi_fastq 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,7 @@
1
+ === 0.0.14 2011-05-31
2
+
3
+ release to rubygems
4
+
5
+ === 0.0.1 2010-11-29
6
+
7
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,12 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/scbi_fastq.rb
7
+ lib/scbi_fastq/fastq_file.rb
8
+ script/console
9
+ script/destroy
10
+ script/generate
11
+ test/test_helper.rb
12
+ test/test_scbi_fastq.rb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on scbi_fastq, see http://scbi_fastq.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,113 @@
1
+ = scbi_fastq
2
+
3
+ * http://www.scbi.uma.es/downloads
4
+
5
+ == DESCRIPTION:
6
+
7
+ scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).
8
+
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ * Read FASTQ files in Sanger Solexa and Ilumina, making the appropriate quality values conversions
13
+ * Quality values can be automatically splitted
14
+ * Write FASTQ files in Sanger format
15
+ * Iteration over large files without extra memory usage
16
+
17
+
18
+ == SYNOPSIS:
19
+
20
+ === Reading a FASTQ with iterator:
21
+
22
+ require 'scbi_fastq'
23
+
24
+ # open file in sanger mode
25
+ fqr=FastqFile.new('file1.fastq')
26
+
27
+
28
+ fqr.each do |name,seq_fasta,qual,comments|
29
+
30
+ puts name
31
+ puts seq_fasta
32
+ puts qual
33
+ puts comments
34
+ end
35
+
36
+ fqr.close
37
+
38
+ === Reading a FASTQ one sequence at a time:
39
+
40
+ require 'scbi_fastq'
41
+
42
+ # open file in sanger mode
43
+ fqr=FastqFile.new('file1.fastq')
44
+
45
+
46
+ begin
47
+
48
+ # read one sequence
49
+ name,seq_fasta,qual,comments=fqr.next_seq
50
+
51
+ # name will be nil if there are not more sequences available
52
+ if !name.nil?
53
+ puts name
54
+ puts seq_fasta
55
+ puts qual
56
+ puts comments
57
+ end
58
+ end until name.nil?
59
+
60
+ fqr.close
61
+
62
+
63
+ === Writing a FASTQ:
64
+
65
+ require 'scbi_fastq'
66
+
67
+ # open new file
68
+ f=FastqFile.new('file.fastq','w')
69
+
70
+ # prepare sample data
71
+ name = 'seq1'
72
+ seq_fasta= 'acgtacgtacact'
73
+ seq_qual= [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40]
74
+
75
+ # write sequence to disk
76
+ f.write_seq(name,seq_fasta,seq_qual,'comments')
77
+
78
+
79
+ # close file
80
+ f.close
81
+
82
+ == REQUIREMENTS:
83
+
84
+ * This is a standalone gem.
85
+
86
+ == INSTALL:
87
+
88
+ * gem install scbi_fastq
89
+
90
+ == LICENSE:
91
+
92
+ (The MIT License)
93
+
94
+ Copyright (c) 2010 Dario Guerrero
95
+
96
+ Permission is hereby granted, free of charge, to any person obtaining
97
+ a copy of this software and associated documentation files (the
98
+ 'Software'), to deal in the Software without restriction, including
99
+ without limitation the rights to use, copy, modify, merge, publish,
100
+ distribute, sublicense, and/or sell copies of the Software, and to
101
+ permit persons to whom the Software is furnished to do so, subject to
102
+ the following conditions:
103
+
104
+ The above copyright notice and this permission notice shall be
105
+ included in all copies or substantial portions of the Software.
106
+
107
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
108
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
109
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
110
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
111
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
112
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
113
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/scbi_fastq'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'scbi_fastq' do
14
+ self.developer 'Dario Guerrero', 'dariogf@scbi.uma.es'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ # task :default => [:spec, :features]
@@ -0,0 +1,252 @@
1
+
2
+ # add ord method to ruby 1.8
3
+ if !String.instance_methods.include?(:ord)
4
+ class String
5
+
6
+ def ord
7
+ return self[0]
8
+ end
9
+
10
+ end
11
+ end
12
+
13
+
14
+
15
+ class FastqFile
16
+
17
+ attr_accessor :num_seqs
18
+
19
+ #------------------------------------
20
+ # Initialize instance
21
+ #------------------------------------
22
+ def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
23
+
24
+
25
+ if mode.upcase.index('W')
26
+ @fastq_file = File.open(fasta_file_name,'w')
27
+ elsif mode.upcase.index('A')
28
+ if !File.exist?(fasta_file_name)
29
+ raise "File #{fasta_file_name} doesn't exists"
30
+ end
31
+
32
+ @fastq_file = File.open(fasta_file_name,'a')
33
+ else #read only
34
+ if !File.exist?(fasta_file_name)
35
+ raise "File #{fasta_file_name} doesn't exists"
36
+ end
37
+
38
+ if fasta_file_name.is_a?(IO)
39
+ @fastq_file = fasta_file_name
40
+ else
41
+ @fastq_file = File.open(fasta_file_name,'r')
42
+ end
43
+ end
44
+
45
+ @mode = mode
46
+ @num_seqs = 0
47
+ @fastq_type=fastq_type
48
+
49
+ # S - Sanger Phred+33, raw reads typically (0, 40)
50
+ # X - Solexa Solexa+64, raw reads typically (-5, 40)
51
+ # I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
52
+ # J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)
53
+ # > >>> def solexa_quality_from_phred(phred_quality) :
54
+ # > ... return 10*log(10**(phred_quality/10.0) - 1, 10)
55
+ # > ...
56
+ # > >>> solexa_quality_from_phred(90)
57
+ # > 89.999999995657035
58
+ # > >>> solexa_quality_from_phred(50)
59
+ # > 49.99995657033466
60
+ # > >>> solexa_quality_from_phred(10)
61
+ # > 9.5424250943932485
62
+ # > >>> solexa_quality_from_phred(1)
63
+ # > -5.8682532438011537
64
+ # > >>> solexa_quality_from_phred(0.1)
65
+ # > -16.32774717238372
66
+ # >
67
+ # > >>> def phred_quality_from_solexa(solexa_quality) :
68
+ # > ... return 10*log(10**(solexa_quality/10.0) + 1, 10)
69
+ # > ...
70
+ # > >>> phred_quality_from_solexa(90)
71
+ # > 90.000000004342922
72
+ # > >>> phred_quality_from_solexa(10)
73
+ # > 10.41392685158225
74
+ # > >>> phred_quality_from_solexa(0)
75
+ # > 3.0102999566398116
76
+ # > >>> phred_quality_from_solexa(-20)
77
+ # > 0.043213737826425784
78
+
79
+
80
+ #sanger by default
81
+ @to_phred = lambda{|q| q - 33}
82
+ @from_phred = lambda{|q| (q+33).chr}
83
+
84
+ if @fastq_type == :ilumina
85
+ @to_phred = lambda{|q| q - 64}
86
+ # @from_phred = lambda{|q| (q+64).chr}
87
+
88
+ elsif @fastq_type == :solexa
89
+ #
90
+ # solexa to phred quals
91
+
92
+ @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
93
+ # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
94
+
95
+ #phred to solexa quals
96
+
97
+ end
98
+
99
+ @qual_to_array = qual_to_array
100
+
101
+ @qual_to_phred = qual_to_phred
102
+
103
+ end
104
+
105
+ def close
106
+ @fastq_file.close
107
+ end
108
+
109
+
110
+ #------------------------------------
111
+ # Iterate over all sequences
112
+ #------------------------------------
113
+ def each
114
+
115
+ rewind
116
+
117
+ n,f,q,c=next_seq
118
+
119
+ while (!n.nil?)
120
+ yield(n,f,q,c)
121
+ n,f,q,c=next_seq
122
+ end
123
+
124
+ rewind
125
+
126
+ end
127
+
128
+ # goto first position in file
129
+ def rewind
130
+
131
+ @num_seqs = 0 ;
132
+ @fastq_file.pos=0
133
+
134
+ end
135
+
136
+ #------------------------------------
137
+ # Get next sequence
138
+ #------------------------------------
139
+ def next_seq
140
+ #init variables
141
+ res = read_fastq
142
+ return res
143
+ end
144
+
145
+ # write sequence to file in sanger format
146
+ def write_seq(seq_name,seq_fasta,seq_qual,comments='')
147
+ name = ""
148
+
149
+ @fastq_file.puts("@#{seq_name} #{comments}")
150
+ @fastq_file.puts(seq_fasta)
151
+ @fastq_file.puts("+#{seq_name} #{comments}")
152
+
153
+ if seq_qual.is_a?(Array)
154
+ @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
155
+ else
156
+ @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
157
+ end
158
+
159
+ end
160
+
161
+
162
+ # creates fastq otuput in sanger format
163
+ def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')
164
+
165
+ res=[]
166
+
167
+ name = ""
168
+
169
+ res << ("@#{seq_name} #{comments}")
170
+ res << (seq_fasta)
171
+ res << ("+#{seq_name} #{comments}")
172
+
173
+ if seq_qual.is_a?(Array)
174
+ res<<(seq_qual.map{|e| (e+33).chr}.join)
175
+ else
176
+ res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
177
+ end
178
+
179
+ return res
180
+ end
181
+
182
+ def with_qual?
183
+ true
184
+ end
185
+
186
+
187
+ private
188
+
189
+ #------------------------------------
190
+ # Read one sequence in fastq
191
+ #------------------------------------
192
+ # @GEM-108-D02
193
+ # AAAAGCTGG
194
+ # +
195
+ # :::::::::
196
+
197
+ def read_fastq
198
+
199
+ seq_name = nil
200
+ seq_fasta = nil
201
+ seq_qual = nil
202
+ comments = nil
203
+
204
+ reading = :fasta
205
+
206
+ if !@fastq_file.eof
207
+
208
+ begin
209
+ #read four lines
210
+ name_line = @fastq_file.readline.chomp
211
+ seq_fasta = @fastq_file.readline.chomp
212
+ name2_line = @fastq_file.readline.chomp
213
+ seq_qual = @fastq_file.readline.chomp
214
+
215
+
216
+ # parse name
217
+ if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
218
+ # remove comments
219
+ seq_name = $1
220
+ comments=$2
221
+ else
222
+ raise "Invalid sequence name in #{name_line}"
223
+ end
224
+
225
+ # parse fasta
226
+ seq_fasta.strip! if !seq_fasta.empty?
227
+
228
+ # parse qual_name
229
+
230
+ if !seq_name.nil? && !seq_qual.empty?
231
+
232
+ @num_seqs += 1
233
+
234
+ if @qual_to_phred
235
+ seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
236
+
237
+ if !@qual_to_array
238
+ seq_qual=seq_qual.join(' ')
239
+ end
240
+ end
241
+
242
+ end
243
+ rescue EOFError
244
+ raise "Bad format in FastQ file"
245
+ end
246
+ end
247
+
248
+ return [seq_name,seq_fasta,seq_qual,comments]
249
+ end
250
+
251
+
252
+ end
data/lib/scbi_fastq.rb ADDED
@@ -0,0 +1,7 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'scbi_fastq/fastq_file'
5
+ module ScbiFastq
6
+ VERSION = '0.0.14'
7
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.expand_path(File.dirname(__FILE__) + '/../lib/scbi_fastq.rb')}"
9
+ puts "Loading scbi_fastq gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/scbi_fastq'
@@ -0,0 +1,117 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestScbiFastq < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @test_file='/tmp/sanger.fastq';
7
+
8
+ @seq_fasta='ACTG'
9
+ @seq_qual=[31]
10
+ @seq_name='SEQ'
11
+
12
+ end
13
+
14
+
15
+ def fill_file(n,offset=33)
16
+ f=FastqFile.new(@test_file,'w')
17
+
18
+ n.times do |c|
19
+ i = c+1
20
+
21
+ name = "#{@seq_name+i.to_s}"
22
+ f.write_seq(name,@seq_fasta*i,(@seq_qual*i*@seq_fasta.length),'comments')
23
+ # f.puts('@'+name)
24
+ # f.puts(@seq_fasta*i)
25
+ # f.puts('+'+name)
26
+ # f.puts((@seq_qual*i*@seq_fasta.length).map{|e| (e+offset).chr}.join)
27
+ end
28
+
29
+ f.close
30
+ end
31
+
32
+ def test_each
33
+
34
+ # make new file and fill with data
35
+ fill_file(100)
36
+
37
+
38
+ fqr=FastqFile.new(@test_file)
39
+
40
+ i=1
41
+
42
+ fqr.each do |n,s,q|
43
+
44
+ assert_equal(@seq_name+i.to_s,n)
45
+ assert_equal(@seq_fasta*i,s)
46
+ assert_equal((@seq_qual*i*@seq_fasta.length),q)
47
+
48
+ i+=1
49
+ end
50
+
51
+ fqr.close
52
+ end
53
+
54
+ def test_each_comments
55
+
56
+ # make new file and fill with data
57
+ fill_file(100)
58
+
59
+
60
+ fqr=FastqFile.new(@test_file)
61
+
62
+ i=1
63
+
64
+ fqr.each do |n,s,q,c|
65
+
66
+ assert_equal(@seq_name+i.to_s,n)
67
+ assert_equal(@seq_fasta*i,s)
68
+ assert_equal((@seq_qual*i*@seq_fasta.length),q)
69
+ assert_equal('comments',c)
70
+
71
+ i+=1
72
+ end
73
+
74
+ fqr.close
75
+ end
76
+
77
+ def test_next_seq_comments
78
+
79
+ # make new file and fill with data
80
+ fill_file(100)
81
+
82
+
83
+ fqr=FastqFile.new(@test_file)
84
+
85
+ i=1
86
+
87
+ begin
88
+ n,s,q,c = fqr.next_seq
89
+
90
+ if !n.nil?
91
+ assert_equal(@seq_name+i.to_s,n)
92
+ assert_equal(@seq_fasta*i,s)
93
+ assert_equal((@seq_qual*i*@seq_fasta.length),q)
94
+ assert_equal('comments',c)
95
+
96
+ i+=1
97
+ end
98
+ end until n.nil?
99
+
100
+ fqr.close
101
+ end
102
+
103
+
104
+
105
+ # def test_open_file
106
+ # fill_file(100)
107
+ # fq=FastqFile.new('test/sanger.fastq')
108
+ #
109
+ # fq.each do |n,f,q|
110
+ # puts n,f,q
111
+ # puts fq.num_seqs
112
+ # end
113
+ #
114
+ # fq.close
115
+ #
116
+ # end
117
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scbi_fastq
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.14
6
+ platform: ruby
7
+ authors:
8
+ - Dario Guerrero
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-05-31 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 2.8.0
24
+ type: :development
25
+ version_requirements: *id001
26
+ description: scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).
27
+ email:
28
+ - dariogf@scbi.uma.es
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files:
34
+ - History.txt
35
+ - Manifest.txt
36
+ - PostInstall.txt
37
+ files:
38
+ - History.txt
39
+ - Manifest.txt
40
+ - PostInstall.txt
41
+ - README.rdoc
42
+ - Rakefile
43
+ - lib/scbi_fastq.rb
44
+ - lib/scbi_fastq/fastq_file.rb
45
+ - script/console
46
+ - script/destroy
47
+ - script/generate
48
+ - test/test_helper.rb
49
+ - test/test_scbi_fastq.rb
50
+ homepage: http://www.scbi.uma.es/downloads
51
+ licenses: []
52
+
53
+ post_install_message: PostInstall.txt
54
+ rdoc_options:
55
+ - --main
56
+ - README.rdoc
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ requirements: []
72
+
73
+ rubyforge_project: scbi_fastq
74
+ rubygems_version: 1.7.2
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).
78
+ test_files:
79
+ - test/test_helper.rb
80
+ - test/test_scbi_fastq.rb