scbi_fastq 0.0.18 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZGZmZTIzNjczMTQ4OWIzM2Q2Njc0NTliYTRlYzkxMTg0MGFhYjE3Mg==
5
- data.tar.gz: !binary |-
6
- ODEzZTE0MzgzNzIyMWEyYWY4NzU5ZjlmYmVjMTQ1ZmUzYjJkYmZmNQ==
2
+ SHA1:
3
+ metadata.gz: c72341f5b45161a2bb31b82e772624c38ff9cfcd
4
+ data.tar.gz: 2ae8b1289a1b80610dac9e517dbcb98403f542a5
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- NWExOGZhYTNmMzFlMmEyOTdlZDA3YTkxMmQ3YTJkZGMwZWNmMzg4NGIwMWY5
10
- YjRkOTI4ZmIyYWQxOWQyOWI2ZjVlM2VmZDg1YjJkMzVlMDQ5OTQ2ZWI0MTFk
11
- MmMxNzZkOTI4ZGQyOWE1Yjg0ODI0ZGM1YTg3MTEzZTliOWQ3N2I=
12
- data.tar.gz: !binary |-
13
- MzFmNzRhOTlmOWZkYmQyNmRmZGVkZmRjMWZjZTA0YWJlNGU2MWIzYTQ0YjQ0
14
- YmQ4ZjFmZWI1NmQxNWNhZmJkNmVkNDI3ZTMxOWE0YWNmNzdmNGY5YWUzYTE1
15
- NGNlMTg0YWExOWU0Y2ZjZWVhMDdhZTczZDkzZTEwYWMwOTg1YjU=
6
+ metadata.gz: e9c7bb8330578cfc1fce1f3369a14e388d7ad17f6edeefd34e21a0d352b1e049f6fc4b9cabb52d56fdc1f29468bfe0a13eb6315c9451b85e123a1ec719f90584
7
+ data.tar.gz: 05db1afc8fa7963329e44fb03d6b33a4ccb89684b078adb3487a0e9d18a0101f4e5eae688f0914a6d7a49be22de08824ba1ad72837c973f78eeb23128353ad4f
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scbi_fastq.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 dariogf
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -105,27 +105,11 @@ scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qual
105
105
 
106
106
  * gem install scbi_fastq
107
107
 
108
- == LICENSE:
109
-
110
- (The MIT License)
111
-
112
- Copyright (c) 2010 Dario Guerrero
113
-
114
- Permission is hereby granted, free of charge, to any person obtaining
115
- a copy of this software and associated documentation files (the
116
- 'Software'), to deal in the Software without restriction, including
117
- without limitation the rights to use, copy, modify, merge, publish,
118
- distribute, sublicense, and/or sell copies of the Software, and to
119
- permit persons to whom the Software is furnished to do so, subject to
120
- the following conditions:
121
-
122
- The above copyright notice and this permission notice shall be
123
- included in all copies or substantial portions of the Software.
124
-
125
- THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
126
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
127
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
128
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
129
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
130
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
131
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
108
+ ## Contributing
109
+
110
+ 1. Fork it ( https://github.com/[my-github-username]/scbi_fastq/fork )
111
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
112
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
113
+ 4. Push to the branch (`git push origin my-new-feature`)
114
+ 5. Create a new Pull Request
115
+
data/Rakefile CHANGED
@@ -1,26 +1,8 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.1.0'
3
- require 'hoe'
4
- require 'fileutils'
5
- require './lib/scbi_fastq'
6
-
7
- Hoe.plugin :newgem
8
- # Hoe.plugin :website
9
- # Hoe.plugin :cucumberfeatures
10
-
11
- # Generate all the Rake tasks
12
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
- $hoe = Hoe.spec 'scbi_fastq' do
14
- self.developer 'Dario Guerrero', 'dariogf@scbi.uma.es'
15
- self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
- self.rubyforge_name = self.name # TODO this is default value
17
- # self.extra_deps = [['activesupport','>= 2.0.2']]
18
-
19
- end
20
-
21
- require 'newgem/tasks'
22
- Dir['tasks/**/*.rake'].each { |t| load t }
23
-
24
- # TODO - want other tests/tasks run by default? Add them to the list
25
- # remove_task :default
26
- task :default => [:spec, :features, :redocs]
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
data/lib/scbi_fastq.rb CHANGED
@@ -1,7 +1,6 @@
1
- $:.unshift(File.dirname(__FILE__)) unless
2
- $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
-
1
+ require "scbi_fastq/version"
4
2
  require 'scbi_fastq/fastq_file'
3
+
5
4
  module ScbiFastq
6
- VERSION = '0.0.18'
5
+ # Your code goes here...
7
6
  end
@@ -1,4 +1,5 @@
1
1
  require 'zlib'
2
+ require 'scbi_multi_gz_reader'
2
3
 
3
4
  # add ord method to ruby 1.8
4
5
  if !String.instance_methods.include?(:ord)
@@ -22,7 +23,6 @@ class FastqFile
22
23
  #------------------------------------
23
24
  def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
24
25
 
25
-
26
26
 
27
27
  if mode.upcase.index('W.GZ')
28
28
  @fastq_file = Zlib::GzipWriter.open(fasta_file_name)
@@ -43,9 +43,10 @@ class FastqFile
43
43
  @fastq_file = fasta_file_name
44
44
  else
45
45
  begin
46
- @fastq_file = Zlib::GzipReader.open(fasta_file_name)
46
+ #@fastq_file = Zlib::GzipReader.open(fasta_file_name)
47
+ @fastq_file = MultiGzReader.new(fasta_file_name)
47
48
  # puts "GZIP file detected"
48
- rescue
49
+ rescue => e
49
50
  @fastq_file = File.open(fasta_file_name,'r')
50
51
  # puts "NORMAL file detected"
51
52
  end
@@ -158,7 +159,8 @@ class FastqFile
158
159
 
159
160
  @fastq_file.puts("@#{seq_name} #{comments}")
160
161
  @fastq_file.puts(seq_fasta)
161
- @fastq_file.puts("+#{seq_name} #{comments}")
162
+ @fastq_file.puts("+")
163
+ #@fastq_file.puts("+#{seq_name} #{comments}")
162
164
 
163
165
  if seq_qual.is_a?(Array)
164
166
  @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
@@ -178,7 +180,8 @@ class FastqFile
178
180
 
179
181
  res << ("@#{seq_name} #{comments}")
180
182
  res << (seq_fasta)
181
- res << ("+#{seq_name} #{comments}")
183
+ res << ("+")
184
+ #res << ("+#{seq_name} #{comments}")
182
185
 
183
186
  if !seq_qual.empty?
184
187
  # if @qual_to_phred
@@ -0,0 +1,3 @@
1
+ module ScbiFastq
2
+ VERSION = "0.0.19"
3
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scbi_fastq/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scbi_fastq"
8
+ spec.version = ScbiFastq::VERSION
9
+ spec.authors = ["dariogf"]
10
+ spec.email = ["dariogf@scbi.uma.es"]
11
+ spec.summary = %q{read/write FASTQ files}
12
+ spec.description = %q{scbi_fastq is a ruby gem to read/write FASTQ files (DNA/RNA sequences) with qualities in a variety of formats (Sanger, Solexa, Ilumina).}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "scbi_multi_gz_reader"
22
+ spec.add_development_dependency "bundler", "~> 1.7"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ end
Binary file
@@ -0,0 +1,194 @@
1
+ #!/usr/local/bin/perl -w
2
+
3
+ # Author: lh3
4
+
5
+ use strict;
6
+ use warnings;
7
+ use Getopt::Std;
8
+
9
+ my $usage = qq(
10
+ Usage: fq_all2std.pl <command> <in.txt>
11
+
12
+ Command: scarf2std Convert SCARF format to the standard/Sanger FASTQ
13
+ fqint2std Convert FASTQ-int format to the standard/Sanger FASTQ
14
+ sol2std Convert Solexa/Illumina FASTQ to the standard FASTQ
15
+ fa2std Convert FASTA to the standard FASTQ
16
+ fq2fa Convert various FASTQ-like format to FASTA
17
+ instruction Explanation to different format
18
+ example Show examples of various formats
19
+
20
+ Note: Read/quality sequences MUST be presented in one line.
21
+ \n);
22
+
23
+ die($usage) if (@ARGV < 1);
24
+
25
+ # Solexa->Sanger quality conversion table
26
+ my @conv_table;
27
+ for (-64..64) {
28
+ $conv_table[$_+64] = chr(int(33 + 10*log(1+10**($_/10.0))/log(10)+.499));
29
+ }
30
+
31
+ # parsing command line
32
+ my $cmd = shift;
33
+ my %cmd_hash = (scarf2std=>\&scarf2std, fqint2std=>\&fqint2std, sol2std=>\&sol2std, fa2std=>\&fa2std,
34
+ fq2fa=>\&fq2fa, example=>\&example, instruction=>\&instruction);
35
+ if (defined($cmd_hash{$cmd})) {
36
+ &{$cmd_hash{$cmd}};
37
+ } else {
38
+ die("** Unrecognized command $cmd");
39
+ }
40
+
41
+ sub fa2std {
42
+ my %opts = (q=>25);
43
+ getopts('q:', \%opts);
44
+ my $q = chr($opts{q} + 33);
45
+ warn("-- The default quality is set to $opts{q}. Use '-q' at the command line to change the default.\n");
46
+ while (<>) {
47
+ if (/^>(\S+)/) {
48
+ print "\@$1\n";
49
+ $_ = <>;
50
+ print "$_+\n", $q x (length($_)-1), "\n";
51
+ }
52
+ }
53
+ }
54
+
55
+ sub fq2fa {
56
+ while (<>) {
57
+ if (/^@(\S+)/) {
58
+ print ">$1\n";
59
+ $_ = <>; print;
60
+ <>; <>;
61
+ }
62
+ }
63
+ }
64
+
65
+ sub scarf2std {
66
+ while (<>) {
67
+ my @t = split(':', $_);
68
+ my $name = join('_', @t[0..4]);
69
+ print "\@$name\n$t[5]\n+\n";
70
+ my $qual = '';
71
+ @t = split(/\s/, $t[6]);
72
+ $qual .= $conv_table[$_+64] for (@t);
73
+ print "$qual\n";
74
+ }
75
+ }
76
+
77
+ sub fqint2std {
78
+ while (<>) {
79
+ if (/^@/) {
80
+ print;
81
+ $_ = <>; print; $_ = <>; $_ = <>;
82
+ my @t = split;
83
+ my $qual = '';
84
+ $qual .= $conv_table[$_+64] for (@t);
85
+ print "+\n$qual\n";
86
+ }
87
+ }
88
+ }
89
+
90
+ sub sol2std {
91
+ my $max = 0;
92
+ while (<>) {
93
+ if (/^@/) {
94
+ print;
95
+ $_ = <>; print; $_ = <>; $_ = <>;
96
+ my @t = split('', $_);
97
+ my $qual = '';
98
+ $qual .= $conv_table[ord($_)] for (@t);
99
+ print "+\n$qual\n";
100
+ }
101
+ }
102
+ }
103
+
104
+ sub instruction {
105
+
106
+ print "
107
+ FASTQ format is first used in the Sanger Institute, and therefore
108
+ we take the Sanger specification as the standard FASTQ. Although
109
+ Solexa/Illumina reads file looks pretty much like the standard
110
+ FASTQ, they are different in that the qualities are scaled
111
+ differently. In the quality string, if you can see a character
112
+ with its ASCII code higher than 90, probably your file is in the
113
+ Solexa/Illumina format.
114
+
115
+ Sometimes we also use an integer, instead of a single character,
116
+ to explicitly show the qualities. In that case, negative
117
+ qualities indicates that Solexa/Illumina qualities are used.
118
+
119
+ ";
120
+
121
+ }
122
+
123
+ sub example {
124
+ my $exam_scarf = '
125
+ USI-EAS50_1:4:2:710:120:GTCAAAGTAATAATAGGAGATTTGAGCTATTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 19 23 23 23 18 23 23 23
126
+ USI-EAS50_1:4:2:690:87:GTTTTTTTTTTTCTTTCCATTAATTTCCCTTT:23 23 23 23 23 23 23 23 23 23 23 23 12 23 23 23 23 23 16 23 23 9 18 23 23 23 12 23 18 23 23 23
127
+ USI-EAS50_1:4:2:709:32:GAGAAGTCAAACCTGTGTTAGAAATTTTATAC:23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 23 12 23 18 23 23 23 23 23 23 23 23
128
+ USI-EAS50_1:4:2:886:890:GCTTATTTAAAAATTTACTTGGGGTTGTCTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
129
+ USI-EAS50_1:4:2:682:91:GGGTTTCTAGACTAAAGGGATTTAACAAGTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 18 23 23 23 23
130
+ USI-EAS50_1:4:2:663:928:GAATTTGTTTGAAGAGTGTCATGGTCAGATCT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
131
+ ';
132
+
133
+ my $exam_fqint = '
134
+ @4_1_912_360
135
+ AAGGGGCTAGAGAAACACGTAATGAAGGGAGGACTC
136
+ +4_1_912_360
137
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 40 40 40 40 40 40 40 40 40 26 40 40 14 39 40 40
138
+ @4_1_54_483
139
+ TAATAAATGTGCTTCCTTGATGCATGTGCTATGATT
140
+ +4_1_54_483
141
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 40 40 40 28 40 40 40 40 40 40 16 40 40 5 40 40
142
+ @4_1_537_334
143
+ ATTGATGATGCTGTGCACCTAGCAAGAAGTTGCATA
144
+ +4_1_537_334
145
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 29 40 40 33 40 40 33 40 40 33 31 40 40 40 40 18 26 40 -2
146
+ @4_1_920_361
147
+ AACGGCACAATCCAGGTTGATGCCTACGGCGGGTAC
148
+ +4_1_920_361
149
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 40 40 40 40 40 40 40 40 31 40 40 40 40 40 40 15 5 -1 3
150
+ @4_1_784_155
151
+ AATGCATGCTTCGAATGGCATTCTCTTCAATCACGA
152
+ +4_1_784_155
153
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 31 40 40 40 40 40
154
+ @4_1_595_150
155
+ AAAGACGTGGCCAGATGGGTGGCCAAGTGCCCGACT
156
+ +4_1_595_150
157
+ 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 40 40 40 40 40 40 40 20 40 40 40 40 40 14 40 40
158
+ ';
159
+
160
+ my $exam_sol = '
161
+ @SLXA-B3_649_FC8437_R1_1_1_610_79
162
+ GATGTGCAATACCTTTGTAGAGGAA
163
+ +SLXA-B3_649_FC8437_R1_1_1_610_79
164
+ YYYYYYYYYYYYYYYYYYWYWYYSU
165
+ @SLXA-B3_649_FC8437_R1_1_1_397_389
166
+ GGTTTGAGAAAGAGAAATGAGATAA
167
+ +SLXA-B3_649_FC8437_R1_1_1_397_389
168
+ YYYYYYYYYWYYYYWWYYYWYWYWW
169
+ @SLXA-B3_649_FC8437_R1_1_1_850_123
170
+ GAGGGTGTTGATCATGATGATGGCG
171
+ +SLXA-B3_649_FC8437_R1_1_1_850_123
172
+ YYYYYYYYYYYYYWYYWYYSYYYSY
173
+ @SLXA-B3_649_FC8437_R1_1_1_362_549
174
+ GGAAACAAAGTTTTTCTCAACATAG
175
+ +SLXA-B3_649_FC8437_R1_1_1_362_549
176
+ YYYYYYYYYYYYYYYYYYWWWWYWY
177
+ @SLXA-B3_649_FC8437_R1_1_1_183_714
178
+ GTATTATTTAATGGCATACACTCAA
179
+ +SLXA-B3_649_FC8437_R1_1_1_183_714
180
+ YYYYYYYYYYWYYYYWYWWUWWWQQ
181
+ ';
182
+
183
+ print qq(
184
+ solexa
185
+ ======
186
+ $exam_sol
187
+ scarf
188
+ =====
189
+ $exam_scarf
190
+ fqint
191
+ =====
192
+ $exam_fqint
193
+ );
194
+ }
data/test/gz.rb ADDED
@@ -0,0 +1,11 @@
1
+ require '../lib/scbi_fastq/multi_gz_reader'
2
+
3
+ file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
4
+
5
+ loop do
6
+ res=file.readline
7
+ puts "LINE: #{res}"
8
+ break if res.nil?
9
+ end
10
+
11
+ file.close
data/test/h1.fastq.gz ADDED
Binary file
data/test/h2.fastq.gz ADDED
Binary file
Binary file
data/test/prueba.rb ADDED
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ fqr=FastqFile.new('./sanger.fastq.gz')
4
+
5
+ fqr.each do |n,f,q|
6
+ puts n
7
+ end
8
+ fqr.close