scbi_fastq 0.0.18 → 0.0.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZGZmZTIzNjczMTQ4OWIzM2Q2Njc0NTliYTRlYzkxMTg0MGFhYjE3Mg==
5
- data.tar.gz: !binary |-
6
- ODEzZTE0MzgzNzIyMWEyYWY4NzU5ZjlmYmVjMTQ1ZmUzYjJkYmZmNQ==
2
+ SHA1:
3
+ metadata.gz: c72341f5b45161a2bb31b82e772624c38ff9cfcd
4
+ data.tar.gz: 2ae8b1289a1b80610dac9e517dbcb98403f542a5
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- NWExOGZhYTNmMzFlMmEyOTdlZDA3YTkxMmQ3YTJkZGMwZWNmMzg4NGIwMWY5
10
- YjRkOTI4ZmIyYWQxOWQyOWI2ZjVlM2VmZDg1YjJkMzVlMDQ5OTQ2ZWI0MTFk
11
- MmMxNzZkOTI4ZGQyOWE1Yjg0ODI0ZGM1YTg3MTEzZTliOWQ3N2I=
12
- data.tar.gz: !binary |-
13
- MzFmNzRhOTlmOWZkYmQyNmRmZGVkZmRjMWZjZTA0YWJlNGU2MWIzYTQ0YjQ0
14
- YmQ4ZjFmZWI1NmQxNWNhZmJkNmVkNDI3ZTMxOWE0YWNmNzdmNGY5YWUzYTE1
15
- NGNlMTg0YWExOWU0Y2ZjZWVhMDdhZTczZDkzZTEwYWMwOTg1YjU=
6
+ metadata.gz: e9c7bb8330578cfc1fce1f3369a14e388d7ad17f6edeefd34e21a0d352b1e049f6fc4b9cabb52d56fdc1f29468bfe0a13eb6315c9451b85e123a1ec719f90584
7
+ data.tar.gz: 05db1afc8fa7963329e44fb03d6b33a4ccb89684b078adb3487a0e9d18a0101f4e5eae688f0914a6d7a49be22de08824ba1ad72837c973f78eeb23128353ad4f
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scbi_fastq.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 dariogf
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -105,27 +105,11 @@ scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qual
105
105
 
106
106
  * gem install scbi_fastq
107
107
 
108
- == LICENSE:
109
-
110
- (The MIT License)
111
-
112
- Copyright (c) 2010 Dario Guerrero
113
-
114
- Permission is hereby granted, free of charge, to any person obtaining
115
- a copy of this software and associated documentation files (the
116
- 'Software'), to deal in the Software without restriction, including
117
- without limitation the rights to use, copy, modify, merge, publish,
118
- distribute, sublicense, and/or sell copies of the Software, and to
119
- permit persons to whom the Software is furnished to do so, subject to
120
- the following conditions:
121
-
122
- The above copyright notice and this permission notice shall be
123
- included in all copies or substantial portions of the Software.
124
-
125
- THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
126
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
127
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
128
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
129
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
130
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
131
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
108
+ ## Contributing
109
+
110
+ 1. Fork it ( https://github.com/[my-github-username]/scbi_fastq/fork )
111
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
112
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
113
+ 4. Push to the branch (`git push origin my-new-feature`)
114
+ 5. Create a new Pull Request
115
+
data/Rakefile CHANGED
@@ -1,26 +1,8 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.1.0'
3
- require 'hoe'
4
- require 'fileutils'
5
- require './lib/scbi_fastq'
6
-
7
- Hoe.plugin :newgem
8
- # Hoe.plugin :website
9
- # Hoe.plugin :cucumberfeatures
10
-
11
- # Generate all the Rake tasks
12
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
- $hoe = Hoe.spec 'scbi_fastq' do
14
- self.developer 'Dario Guerrero', 'dariogf@scbi.uma.es'
15
- self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
- self.rubyforge_name = self.name # TODO this is default value
17
- # self.extra_deps = [['activesupport','>= 2.0.2']]
18
-
19
- end
20
-
21
- require 'newgem/tasks'
22
- Dir['tasks/**/*.rake'].each { |t| load t }
23
-
24
- # TODO - want other tests/tasks run by default? Add them to the list
25
- # remove_task :default
26
- task :default => [:spec, :features, :redocs]
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
data/lib/scbi_fastq.rb CHANGED
@@ -1,7 +1,6 @@
1
- $:.unshift(File.dirname(__FILE__)) unless
2
- $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
-
1
+ require "scbi_fastq/version"
4
2
  require 'scbi_fastq/fastq_file'
3
+
5
4
  module ScbiFastq
6
- VERSION = '0.0.18'
5
+ # Your code goes here...
7
6
  end
@@ -1,4 +1,5 @@
1
1
  require 'zlib'
2
+ require 'scbi_multi_gz_reader'
2
3
 
3
4
  # add ord method to ruby 1.8
4
5
  if !String.instance_methods.include?(:ord)
@@ -22,7 +23,6 @@ class FastqFile
22
23
  #------------------------------------
23
24
  def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
24
25
 
25
-
26
26
 
27
27
  if mode.upcase.index('W.GZ')
28
28
  @fastq_file = Zlib::GzipWriter.open(fasta_file_name)
@@ -43,9 +43,10 @@ class FastqFile
43
43
  @fastq_file = fasta_file_name
44
44
  else
45
45
  begin
46
- @fastq_file = Zlib::GzipReader.open(fasta_file_name)
46
+ #@fastq_file = Zlib::GzipReader.open(fasta_file_name)
47
+ @fastq_file = MultiGzReader.new(fasta_file_name)
47
48
  # puts "GZIP file detected"
48
- rescue
49
+ rescue => e
49
50
  @fastq_file = File.open(fasta_file_name,'r')
50
51
  # puts "NORMAL file detected"
51
52
  end
@@ -158,7 +159,8 @@ class FastqFile
158
159
 
159
160
  @fastq_file.puts("@#{seq_name} #{comments}")
160
161
  @fastq_file.puts(seq_fasta)
161
- @fastq_file.puts("+#{seq_name} #{comments}")
162
+ @fastq_file.puts("+")
163
+ #@fastq_file.puts("+#{seq_name} #{comments}")
162
164
 
163
165
  if seq_qual.is_a?(Array)
164
166
  @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
@@ -178,7 +180,8 @@ class FastqFile
178
180
 
179
181
  res << ("@#{seq_name} #{comments}")
180
182
  res << (seq_fasta)
181
- res << ("+#{seq_name} #{comments}")
183
+ res << ("+")
184
+ #res << ("+#{seq_name} #{comments}")
182
185
 
183
186
  if !seq_qual.empty?
184
187
  # if @qual_to_phred
@@ -0,0 +1,3 @@
1
+ module ScbiFastq
2
+ VERSION = "0.0.19"
3
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scbi_fastq/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scbi_fastq"
8
+ spec.version = ScbiFastq::VERSION
9
+ spec.authors = ["dariogf"]
10
+ spec.email = ["dariogf@scbi.uma.es"]
11
+ spec.summary = %q{read/write FASTQ files}
12
+ spec.description = %q{scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "scbi_multi_gz_reader"
22
+ spec.add_development_dependency "bundler", "~> 1.7"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ end
Binary file
@@ -0,0 +1,194 @@
1
+ #!/usr/local/bin/perl -w
2
+
3
+ # Author: lh3
4
+
5
+ use strict;
6
+ use warnings;
7
+ use Getopt::Std;
8
+
9
+ my $usage = qq(
10
+ Usage: fq_all2std.pl <command> <in.txt>
11
+
12
+ Command: scarf2std Convert SCARF format to the standard/Sanger FASTQ
13
+ fqint2std Convert FASTQ-int format to the standard/Sanger FASTQ
14
+ sol2std Convert Solexa/Illumina FASTQ to the standard FASTQ
15
+ fa2std Convert FASTA to the standard FASTQ
16
+ fq2fa Convert various FASTQ-like format to FASTA
17
+ instruction Explanation to different format
18
+ example Show examples of various formats
19
+
20
+ Note: Read/quality sequences MUST be presented in one line.
21
+ \n);
22
+
23
+ die($usage) if (@ARGV < 1);
24
+
25
+ # Solexa->Sanger quality conversion table
26
+ my @conv_table;
27
+ for (-64..64) {
28
+ $conv_table[$_+64] = chr(int(33 + 10*log(1+10**($_/10.0))/log(10)+.499));
29
+ }
30
+
31
+ # parsing command line
32
+ my $cmd = shift;
33
+ my %cmd_hash = (scarf2std=>\&scarf2std, fqint2std=>\&fqint2std, sol2std=>\&sol2std, fa2std=>\&fa2std,
34
+ fq2fa=>\&fq2fa, example=>\&example, instruction=>\&instruction);
35
+ if (defined($cmd_hash{$cmd})) {
36
+ &{$cmd_hash{$cmd}};
37
+ } else {
38
+ die("** Unrecognized command $cmd");
39
+ }
40
+
41
+ sub fa2std {
42
+ my %opts = (q=>25);
43
+ getopts('q:', \%opts);
44
+ my $q = chr($opts{q} + 33);
45
+ warn("-- The default quality is set to $opts{q}. Use '-q' at the command line to change the default.\n");
46
+ while (<>) {
47
+ if (/^>(\S+)/) {
48
+ print "\@$1\n";
49
+ $_ = <>;
50
+ print "$_+\n", $q x (length($_)-1), "\n";
51
+ }
52
+ }
53
+ }
54
+
55
+ sub fq2fa {
56
+ while (<>) {
57
+ if (/^@(\S+)/) {
58
+ print ">$1\n";
59
+ $_ = <>; print;
60
+ <>; <>;
61
+ }
62
+ }
63
+ }
64
+
65
+ sub scarf2std {
66
+ while (<>) {
67
+ my @t = split(':', $_);
68
+ my $name = join('_', @t[0..4]);
69
+ print "\@$name\n$t[5]\n+\n";
70
+ my $qual = '';
71
+ @t = split(/\s/, $t[6]);
72
+ $qual .= $conv_table[$_+64] for (@t);
73
+ print "$qual\n";
74
+ }
75
+ }
76
+
77
+ sub fqint2std {
78
+ while (<>) {
79
+ if (/^@/) {
80
+ print;
81
+ $_ = <>; print; $_ = <>; $_ = <>;
82
+ my @t = split;
83
+ my $qual = '';
84
+ $qual .= $conv_table[$_+64] for (@t);
85
+ print "+\n$qual\n";
86
+ }
87
+ }
88
+ }
89
+
90
+ sub sol2std {
91
+ my $max = 0;
92
+ while (<>) {
93
+ if (/^@/) {
94
+ print;
95
+ $_ = <>; print; $_ = <>; $_ = <>;
96
+ my @t = split('', $_);
97
+ my $qual = '';
98
+ $qual .= $conv_table[ord($_)] for (@t);
99
+ print "+\n$qual\n";
100
+ }
101
+ }
102
+ }
103
+
104
+ sub instruction {
105
+
106
+ print "
107
+ FASTQ format is first used in the Sanger Institute, and therefore
108
+ we take the Sanger specification as the standard FASTQ. Although
109
+ Solexa/Illumina reads file looks pretty much like the standard
110
+ FASTQ, they are different in that the qualities are scaled
111
+ differently. In the quality string, if you can see a character
112
+ with its ASCII code higher than 90, probably your file is in the
113
+ Solexa/Illumina format.
114
+
115
+ Sometimes we also use an integer, instead of a single character,
116
+ to explicitly show the qualities. In that case, negative
117
+ qualities indicates that Solexa/Illumina qualities are used.
118
+
119
+ ";
120
+
121
+ }
122
+
123
+ sub example {
124
+ my $exam_scarf = '
125
+ USI-EAS50_1:4:2:710:120:GTCAAAGTAATAATAGGAGATTTGAGCTATTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 19 23 23 23 18 23 23 23
126
+ USI-EAS50_1:4:2:690:87:GTTTTTTTTTTTCTTTCCATTAATTTCCCTTT:23 23 23 23 23 23 23 23 23 23 23 23 12 23 23 23 23 23 16 23 23 9 18 23 23 23 12 23 18 23 23 23
127
+ USI-EAS50_1:4:2:709:32:GAGAAGTCAAACCTGTGTTAGAAATTTTATAC:23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 23 12 23 18 23 23 23 23 23 23 23 23
128
+ USI-EAS50_1:4:2:886:890:GCTTATTTAAAAATTTACTTGGGGTTGTCTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
129
+ USI-EAS50_1:4:2:682:91:GGGTTTCTAGACTAAAGGGATTTAACAAGTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 18 23 23 23 23
130
+ USI-EAS50_1:4:2:663:928:GAATTTGTTTGAAGAGTGTCATGGTCAGATCT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
131
+ ';
132
+
133
+ my $exam_fqint = '
134
+ @4_1_912_360
135
+ AAGGGGCTAGAGAAACACGTAATGAAGGGAGGACTC
136
+ +4_1_912_360
137
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 40 40 40 40 40 40 40 40 40 26 40 40 14 39 40 40
138
+ @4_1_54_483
139
+ TAATAAATGTGCTTCCTTGATGCATGTGCTATGATT
140
+ +4_1_54_483
141
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 40 40 40 28 40 40 40 40 40 40 16 40 40 5 40 40
142
+ @4_1_537_334
143
+ ATTGATGATGCTGTGCACCTAGCAAGAAGTTGCATA
144
+ +4_1_537_334
145
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 29 40 40 33 40 40 33 40 40 33 31 40 40 40 40 18 26 40 -2
146
+ @4_1_920_361
147
+ AACGGCACAATCCAGGTTGATGCCTACGGCGGGTAC
148
+ +4_1_920_361
149
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 40 40 40 40 40 40 40 40 31 40 40 40 40 40 40 15 5 -1 3
150
+ @4_1_784_155
151
+ AATGCATGCTTCGAATGGCATTCTCTTCAATCACGA
152
+ +4_1_784_155
153
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 31 40 40 40 40 40
154
+ @4_1_595_150
155
+ AAAGACGTGGCCAGATGGGTGGCCAAGTGCCCGACT
156
+ +4_1_595_150
157
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 40 40 40 40 40 40 40 20 40 40 40 40 40 14 40 40
158
+ ';
159
+
160
+ my $exam_sol = '
161
+ @SLXA-B3_649_FC8437_R1_1_1_610_79
162
+ GATGTGCAATACCTTTGTAGAGGAA
163
+ +SLXA-B3_649_FC8437_R1_1_1_610_79
164
+ YYYYYYYYYYYYYYYYYYWYWYYSU
165
+ @SLXA-B3_649_FC8437_R1_1_1_397_389
166
+ GGTTTGAGAAAGAGAAATGAGATAA
167
+ +SLXA-B3_649_FC8437_R1_1_1_397_389
168
+ YYYYYYYYYWYYYYWWYYYWYWYWW
169
+ @SLXA-B3_649_FC8437_R1_1_1_850_123
170
+ GAGGGTGTTGATCATGATGATGGCG
171
+ +SLXA-B3_649_FC8437_R1_1_1_850_123
172
+ YYYYYYYYYYYYYWYYWYYSYYYSY
173
+ @SLXA-B3_649_FC8437_R1_1_1_362_549
174
+ GGAAACAAAGTTTTTCTCAACATAG
175
+ +SLXA-B3_649_FC8437_R1_1_1_362_549
176
+ YYYYYYYYYYYYYYYYYYWWWWYWY
177
+ @SLXA-B3_649_FC8437_R1_1_1_183_714
178
+ GTATTATTTAATGGCATACACTCAA
179
+ +SLXA-B3_649_FC8437_R1_1_1_183_714
180
+ YYYYYYYYYYWYYYYWYWWUWWWQQ
181
+ ';
182
+
183
+ print qq(
184
+ solexa
185
+ ======
186
+ $exam_sol
187
+ scarf
188
+ =====
189
+ $exam_scarf
190
+ fqint
191
+ =====
192
+ $exam_fqint
193
+ );
194
+ }
data/test/gz.rb ADDED
@@ -0,0 +1,11 @@
1
+ require '../lib/scbi_fastq/multi_gz_reader'
2
+
3
+ file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
4
+
5
+ loop do
6
+ res=file.readline
7
+ puts "LINE: #{res}"
8
+ break if res.nil?
9
+ end
10
+
11
+ file.close
data/test/h1.fastq.gz ADDED
Binary file
data/test/h2.fastq.gz ADDED
Binary file
Binary file
data/test/prueba.rb ADDED
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ fqr=FastqFile.new('./sanger.fastq.gz')
4
+
5
+ fqr.each do |n,f,q|
6
+ puts n
7
+ end
8
+ fqr.close