scbi_multi_gz_reader 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +58 -0
- data/Rakefile +9 -0
- data/lib/scbi_multi_gz_reader/multi_gz_reader.rb +51 -0
- data/lib/scbi_multi_gz_reader/version.rb +3 -0
- data/lib/scbi_multi_gz_reader.rb +6 -0
- data/scbi_multi_gz_reader.gemspec +23 -0
- data/test/empty.fastq.gz +0 -0
- data/test/fq_all2std.pl +194 -0
- data/test/gz.rb +11 -0
- data/test/h1.fastq.gz +0 -0
- data/test/h2.fastq.gz +0 -0
- data/test/minitest.fastq.gz +0 -0
- data/test/minitest_full.fastq.gz +0 -0
- data/test/scbi_multi_gz_reader_test.rb +113 -0
- data/test/test_helper.rb +3 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bff6f483b2c9eb929aecd04e2054cadbfcd38407
|
4
|
+
data.tar.gz: f4822e17db2570421cabd7a1859eb28c8d9217cd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8b64117d3461220161b04fb5fbfbccaafeb24719fa7861753447992eff3c077f6a390b55406f89dbed13dfe856cbbaf2167eb74969dec12732fe680ed444969a
|
7
|
+
data.tar.gz: 2b4b01f1d4bda02b04bc943aa84668d79217fb7b7a88372b53bf4c9cbbccf21d24546bc6e844b890c39c0614c6e333b56d9991f424f191bdd5fd59a158956054
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 dariogf
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# ScbiMultiGzReader
|
2
|
+
|
3
|
+
Zlib::GzipReader is not able to read multiple stream gz/gzip files, instead it only reads the first stream and then closes the file without warnings so you only get a part of the file when you expected to get the full one.
|
4
|
+
|
5
|
+
This is a wrapper to read gz/gzip files composed of multiple streams (eg, made by doing a cat to join some gz files in a bigger one.
|
6
|
+
|
7
|
+
By now it only has the readline and eof? instance methods, the ones needed by scbi_fastq to read fastq files.
|
8
|
+
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
gem 'scbi_multi_gz_reader'
|
16
|
+
```
|
17
|
+
|
18
|
+
And then execute:
|
19
|
+
|
20
|
+
$ bundle
|
21
|
+
|
22
|
+
Or install it yourself as:
|
23
|
+
|
24
|
+
$ gem install scbi_multi_gz_reader
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
|
28
|
+
# read checking for eof
|
29
|
+
|
30
|
+
file=MultiGzReader.new('file.gz'))
|
31
|
+
|
32
|
+
while !file.eof? do
|
33
|
+
puts file.readline
|
34
|
+
end
|
35
|
+
|
36
|
+
file.close
|
37
|
+
|
38
|
+
------
|
39
|
+
|
40
|
+
# read checking for res.nil?
|
41
|
+
file=MultiGzReader.new('file.gz'))
|
42
|
+
|
43
|
+
loop do
|
44
|
+
res=file.readline
|
45
|
+
break if res.nil?
|
46
|
+
puts res
|
47
|
+
end
|
48
|
+
|
49
|
+
file.close
|
50
|
+
|
51
|
+
|
52
|
+
## Contributing
|
53
|
+
|
54
|
+
1. Fork it ( https://github.com/[my-github-username]/scbi_multi_gz_reader/fork )
|
55
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
56
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
57
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
58
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
class MultiGzReader
|
4
|
+
|
5
|
+
def initialize(file_name)
|
6
|
+
@file = File.open(file_name)
|
7
|
+
@io = Zlib::GzipReader.new @file
|
8
|
+
end
|
9
|
+
|
10
|
+
def readline
|
11
|
+
|
12
|
+
res=nil
|
13
|
+
|
14
|
+
begin
|
15
|
+
res=@io.readline
|
16
|
+
rescue EOFError => e
|
17
|
+
|
18
|
+
#reached END, check if there is more data to read
|
19
|
+
unused = @io.unused
|
20
|
+
|
21
|
+
@io.finish
|
22
|
+
|
23
|
+
# there is something left to read, open another stream
|
24
|
+
if !unused.nil?
|
25
|
+
#puts "FIN1, fpos: #{@file.pos}, unused: #{unused.length}, io_eof: #{@io.eof}, eof: #{@file.eof}"
|
26
|
+
|
27
|
+
@file.pos -= unused.length
|
28
|
+
@io = Zlib::GzipReader.new @file
|
29
|
+
#repeat the read so there is no eof error
|
30
|
+
res=readline
|
31
|
+
|
32
|
+
else
|
33
|
+
#no more data to read, return nil
|
34
|
+
res=nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return res
|
39
|
+
end
|
40
|
+
|
41
|
+
def eof?
|
42
|
+
#nothing more to read
|
43
|
+
@io.unused.nil? && (@io.closed? || @io.eof?) && (@file.closed? || @file.eof?)
|
44
|
+
end
|
45
|
+
|
46
|
+
def close
|
47
|
+
#@io.finish
|
48
|
+
@file.close
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'scbi_multi_gz_reader/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "scbi_multi_gz_reader"
|
8
|
+
spec.version = ScbiMultiGzReader::VERSION
|
9
|
+
spec.authors = ["dariogf"]
|
10
|
+
spec.email = ["dariogf@scbi.uma.es"]
|
11
|
+
spec.summary = %q{Reads gz/gzip files with multiple streams in ruby}
|
12
|
+
spec.description = %q{Wrapper to read gz/gzip files composed of multiple gz streams (eg, made by doing a cat to join some gz files in a bigger one.)}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
end
|
data/test/empty.fastq.gz
ADDED
Binary file
|
data/test/fq_all2std.pl
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
#!/usr/local/bin/perl -w
|
2
|
+
|
3
|
+
# Author: lh3
|
4
|
+
|
5
|
+
use strict;
|
6
|
+
use warnings;
|
7
|
+
use Getopt::Std;
|
8
|
+
|
9
|
+
my $usage = qq(
|
10
|
+
Usage: fq_all2std.pl <command> <in.txt>
|
11
|
+
|
12
|
+
Command: scarf2std Convert SCARF format to the standard/Sanger FASTQ
|
13
|
+
fqint2std Convert FASTQ-int format to the standard/Sanger FASTQ
|
14
|
+
sol2std Convert Solexa/Illumina FASTQ to the standard FASTQ
|
15
|
+
fa2std Convert FASTA to the standard FASTQ
|
16
|
+
fq2fa Convert various FASTQ-like format to FASTA
|
17
|
+
instruction Explanation to different format
|
18
|
+
example Show examples of various formats
|
19
|
+
|
20
|
+
Note: Read/quality sequences MUST be presented in one line.
|
21
|
+
\n);
|
22
|
+
|
23
|
+
die($usage) if (@ARGV < 1);
|
24
|
+
|
25
|
+
# Solexa->Sanger quality conversion table
|
26
|
+
my @conv_table;
|
27
|
+
for (-64..64) {
|
28
|
+
$conv_table[$_+64] = chr(int(33 + 10*log(1+10**($_/10.0))/log(10)+.499));
|
29
|
+
}
|
30
|
+
|
31
|
+
# parsing command line
|
32
|
+
my $cmd = shift;
|
33
|
+
my %cmd_hash = (scarf2std=>\&scarf2std, fqint2std=>\&fqint2std, sol2std=>\&sol2std, fa2std=>\&fa2std,
|
34
|
+
fq2fa=>\&fq2fa, example=>\&example, instruction=>\&instruction);
|
35
|
+
if (defined($cmd_hash{$cmd})) {
|
36
|
+
&{$cmd_hash{$cmd}};
|
37
|
+
} else {
|
38
|
+
die("** Unrecognized command $cmd");
|
39
|
+
}
|
40
|
+
|
41
|
+
sub fa2std {
|
42
|
+
my %opts = (q=>25);
|
43
|
+
getopts('q:', \%opts);
|
44
|
+
my $q = chr($opts{q} + 33);
|
45
|
+
warn("-- The default quality is set to $opts{q}. Use '-q' at the command line to change the default.\n");
|
46
|
+
while (<>) {
|
47
|
+
if (/^>(\S+)/) {
|
48
|
+
print "\@$1\n";
|
49
|
+
$_ = <>;
|
50
|
+
print "$_+\n", $q x (length($_)-1), "\n";
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
sub fq2fa {
|
56
|
+
while (<>) {
|
57
|
+
if (/^@(\S+)/) {
|
58
|
+
print ">$1\n";
|
59
|
+
$_ = <>; print;
|
60
|
+
<>; <>;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
sub scarf2std {
|
66
|
+
while (<>) {
|
67
|
+
my @t = split(':', $_);
|
68
|
+
my $name = join('_', @t[0..4]);
|
69
|
+
print "\@$name\n$t[5]\n+\n";
|
70
|
+
my $qual = '';
|
71
|
+
@t = split(/\s/, $t[6]);
|
72
|
+
$qual .= $conv_table[$_+64] for (@t);
|
73
|
+
print "$qual\n";
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
sub fqint2std {
|
78
|
+
while (<>) {
|
79
|
+
if (/^@/) {
|
80
|
+
print;
|
81
|
+
$_ = <>; print; $_ = <>; $_ = <>;
|
82
|
+
my @t = split;
|
83
|
+
my $qual = '';
|
84
|
+
$qual .= $conv_table[$_+64] for (@t);
|
85
|
+
print "+\n$qual\n";
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
sub sol2std {
|
91
|
+
my $max = 0;
|
92
|
+
while (<>) {
|
93
|
+
if (/^@/) {
|
94
|
+
print;
|
95
|
+
$_ = <>; print; $_ = <>; $_ = <>;
|
96
|
+
my @t = split('', $_);
|
97
|
+
my $qual = '';
|
98
|
+
$qual .= $conv_table[ord($_)] for (@t);
|
99
|
+
print "+\n$qual\n";
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
sub instruction {
|
105
|
+
|
106
|
+
print "
|
107
|
+
FASTQ format is first used in the Sanger Institute, and therefore
|
108
|
+
we take the Sanger specification as the standard FASTQ. Although
|
109
|
+
Solexa/Illumina reads file looks pretty much like the standard
|
110
|
+
FASTQ, they are different in that the qualities are scaled
|
111
|
+
differently. In the quality string, if you can see a character
|
112
|
+
with its ASCII code higher than 90, probably your file is in the
|
113
|
+
Solexa/Illumina format.
|
114
|
+
|
115
|
+
Sometimes we also use an integer, instead of a single character,
|
116
|
+
to explicitly show the qualities. In that case, negative
|
117
|
+
qualities indicates that Solexa/Illumina qualities are used.
|
118
|
+
|
119
|
+
";
|
120
|
+
|
121
|
+
}
|
122
|
+
|
123
|
+
sub example {
|
124
|
+
my $exam_scarf = '
|
125
|
+
USI-EAS50_1:4:2:710:120:GTCAAAGTAATAATAGGAGATTTGAGCTATTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 19 23 23 23 18 23 23 23
|
126
|
+
USI-EAS50_1:4:2:690:87:GTTTTTTTTTTTCTTTCCATTAATTTCCCTTT:23 23 23 23 23 23 23 23 23 23 23 23 12 23 23 23 23 23 16 23 23 9 18 23 23 23 12 23 18 23 23 23
|
127
|
+
USI-EAS50_1:4:2:709:32:GAGAAGTCAAACCTGTGTTAGAAATTTTATAC:23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 23 12 23 18 23 23 23 23 23 23 23 23
|
128
|
+
USI-EAS50_1:4:2:886:890:GCTTATTTAAAAATTTACTTGGGGTTGTCTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
|
129
|
+
USI-EAS50_1:4:2:682:91:GGGTTTCTAGACTAAAGGGATTTAACAAGTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 18 23 23 23 23
|
130
|
+
USI-EAS50_1:4:2:663:928:GAATTTGTTTGAAGAGTGTCATGGTCAGATCT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
|
131
|
+
';
|
132
|
+
|
133
|
+
my $exam_fqint = '
|
134
|
+
@4_1_912_360
|
135
|
+
AAGGGGCTAGAGAAACACGTAATGAAGGGAGGACTC
|
136
|
+
+4_1_912_360
|
137
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 40 40 40 40 40 40 40 40 40 26 40 40 14 39 40 40
|
138
|
+
@4_1_54_483
|
139
|
+
TAATAAATGTGCTTCCTTGATGCATGTGCTATGATT
|
140
|
+
+4_1_54_483
|
141
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 40 40 40 28 40 40 40 40 40 40 16 40 40 5 40 40
|
142
|
+
@4_1_537_334
|
143
|
+
ATTGATGATGCTGTGCACCTAGCAAGAAGTTGCATA
|
144
|
+
+4_1_537_334
|
145
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 29 40 40 33 40 40 33 40 40 33 31 40 40 40 40 18 26 40 -2
|
146
|
+
@4_1_920_361
|
147
|
+
AACGGCACAATCCAGGTTGATGCCTACGGCGGGTAC
|
148
|
+
+4_1_920_361
|
149
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 40 40 40 40 40 40 40 40 31 40 40 40 40 40 40 15 5 -1 3
|
150
|
+
@4_1_784_155
|
151
|
+
AATGCATGCTTCGAATGGCATTCTCTTCAATCACGA
|
152
|
+
+4_1_784_155
|
153
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 31 40 40 40 40 40
|
154
|
+
@4_1_595_150
|
155
|
+
AAAGACGTGGCCAGATGGGTGGCCAAGTGCCCGACT
|
156
|
+
+4_1_595_150
|
157
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 40 40 40 40 40 40 40 20 40 40 40 40 40 14 40 40
|
158
|
+
';
|
159
|
+
|
160
|
+
my $exam_sol = '
|
161
|
+
@SLXA-B3_649_FC8437_R1_1_1_610_79
|
162
|
+
GATGTGCAATACCTTTGTAGAGGAA
|
163
|
+
+SLXA-B3_649_FC8437_R1_1_1_610_79
|
164
|
+
YYYYYYYYYYYYYYYYYYWYWYYSU
|
165
|
+
@SLXA-B3_649_FC8437_R1_1_1_397_389
|
166
|
+
GGTTTGAGAAAGAGAAATGAGATAA
|
167
|
+
+SLXA-B3_649_FC8437_R1_1_1_397_389
|
168
|
+
YYYYYYYYYWYYYYWWYYYWYWYWW
|
169
|
+
@SLXA-B3_649_FC8437_R1_1_1_850_123
|
170
|
+
GAGGGTGTTGATCATGATGATGGCG
|
171
|
+
+SLXA-B3_649_FC8437_R1_1_1_850_123
|
172
|
+
YYYYYYYYYYYYYWYYWYYSYYYSY
|
173
|
+
@SLXA-B3_649_FC8437_R1_1_1_362_549
|
174
|
+
GGAAACAAAGTTTTTCTCAACATAG
|
175
|
+
+SLXA-B3_649_FC8437_R1_1_1_362_549
|
176
|
+
YYYYYYYYYYYYYYYYYYWWWWYWY
|
177
|
+
@SLXA-B3_649_FC8437_R1_1_1_183_714
|
178
|
+
GTATTATTTAATGGCATACACTCAA
|
179
|
+
+SLXA-B3_649_FC8437_R1_1_1_183_714
|
180
|
+
YYYYYYYYYYWYYYYWYWWUWWWQQ
|
181
|
+
';
|
182
|
+
|
183
|
+
print qq(
|
184
|
+
solexa
|
185
|
+
======
|
186
|
+
$exam_sol
|
187
|
+
scarf
|
188
|
+
=====
|
189
|
+
$exam_scarf
|
190
|
+
fqint
|
191
|
+
=====
|
192
|
+
$exam_fqint
|
193
|
+
);
|
194
|
+
}
|
data/test/gz.rb
ADDED
data/test/h1.fastq.gz
ADDED
Binary file
|
data/test/h2.fastq.gz
ADDED
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class ScbiMultiGzReaderTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_mini_multi_gz_reader
|
10
|
+
|
11
|
+
# test a file with multiple gz streams
|
12
|
+
|
13
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
|
14
|
+
|
15
|
+
i=0
|
16
|
+
|
17
|
+
loop do
|
18
|
+
res=file.readline
|
19
|
+
#puts res
|
20
|
+
break if res.nil?
|
21
|
+
i = i+1
|
22
|
+
end
|
23
|
+
|
24
|
+
file.close
|
25
|
+
|
26
|
+
assert_equal(28,i)
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def test_mini_multi_gz_reader_EOF_while
|
32
|
+
|
33
|
+
# test a file with multiple gz streams
|
34
|
+
|
35
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
|
36
|
+
|
37
|
+
i=0
|
38
|
+
|
39
|
+
begin
|
40
|
+
res=file.readline
|
41
|
+
#puts res
|
42
|
+
i = i+1
|
43
|
+
end while !file.eof?
|
44
|
+
|
45
|
+
file.close
|
46
|
+
|
47
|
+
assert_equal(28,i)
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_mini_multi_gz_reader_EOF_until
|
52
|
+
|
53
|
+
# test a file with multiple gz streams
|
54
|
+
|
55
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
|
56
|
+
|
57
|
+
i=0
|
58
|
+
|
59
|
+
begin
|
60
|
+
res=file.readline
|
61
|
+
#puts res
|
62
|
+
i = i+1
|
63
|
+
end until file.eof?
|
64
|
+
|
65
|
+
file.close
|
66
|
+
|
67
|
+
assert_equal(28,i)
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_empty_multi_gz_reader_EOF_until
|
72
|
+
|
73
|
+
# test a file with multiple gz streams
|
74
|
+
|
75
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'empty.fastq.gz'))
|
76
|
+
|
77
|
+
i=0
|
78
|
+
|
79
|
+
while !file.eof? do
|
80
|
+
res=file.readline
|
81
|
+
#puts res
|
82
|
+
i = i+1
|
83
|
+
end
|
84
|
+
|
85
|
+
file.close
|
86
|
+
|
87
|
+
assert_equal(0,i)
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
def test_mini_complete_gz
|
93
|
+
#test only one gz stream in file
|
94
|
+
|
95
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest_full.fastq.gz'))
|
96
|
+
|
97
|
+
i=0
|
98
|
+
|
99
|
+
loop do
|
100
|
+
res=file.readline
|
101
|
+
#puts res
|
102
|
+
break if res.nil?
|
103
|
+
i = i+1
|
104
|
+
end
|
105
|
+
|
106
|
+
file.close
|
107
|
+
|
108
|
+
assert_equal(i,28)
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scbi_multi_gz_reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- dariogf
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: Wrapper to read gz/gzip files composed of multiple gz streams (eg, made
|
42
|
+
by doing a cat to join some gz files in a bigger one.)
|
43
|
+
email:
|
44
|
+
- dariogf@scbi.uma.es
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- lib/scbi_multi_gz_reader.rb
|
55
|
+
- lib/scbi_multi_gz_reader/multi_gz_reader.rb
|
56
|
+
- lib/scbi_multi_gz_reader/version.rb
|
57
|
+
- scbi_multi_gz_reader.gemspec
|
58
|
+
- test/empty.fastq.gz
|
59
|
+
- test/fq_all2std.pl
|
60
|
+
- test/gz.rb
|
61
|
+
- test/h1.fastq.gz
|
62
|
+
- test/h2.fastq.gz
|
63
|
+
- test/minitest.fastq.gz
|
64
|
+
- test/minitest_full.fastq.gz
|
65
|
+
- test/scbi_multi_gz_reader_test.rb
|
66
|
+
- test/test_helper.rb
|
67
|
+
homepage: ''
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 2.2.2
|
88
|
+
signing_key:
|
89
|
+
specification_version: 4
|
90
|
+
summary: Reads gz/gzip files with multiple streams in ruby
|
91
|
+
test_files:
|
92
|
+
- test/empty.fastq.gz
|
93
|
+
- test/fq_all2std.pl
|
94
|
+
- test/gz.rb
|
95
|
+
- test/h1.fastq.gz
|
96
|
+
- test/h2.fastq.gz
|
97
|
+
- test/minitest.fastq.gz
|
98
|
+
- test/minitest_full.fastq.gz
|
99
|
+
- test/scbi_multi_gz_reader_test.rb
|
100
|
+
- test/test_helper.rb
|