scbi_multi_gz_reader 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +58 -0
- data/Rakefile +9 -0
- data/lib/scbi_multi_gz_reader/multi_gz_reader.rb +51 -0
- data/lib/scbi_multi_gz_reader/version.rb +3 -0
- data/lib/scbi_multi_gz_reader.rb +6 -0
- data/scbi_multi_gz_reader.gemspec +23 -0
- data/test/empty.fastq.gz +0 -0
- data/test/fq_all2std.pl +194 -0
- data/test/gz.rb +11 -0
- data/test/h1.fastq.gz +0 -0
- data/test/h2.fastq.gz +0 -0
- data/test/minitest.fastq.gz +0 -0
- data/test/minitest_full.fastq.gz +0 -0
- data/test/scbi_multi_gz_reader_test.rb +113 -0
- data/test/test_helper.rb +3 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bff6f483b2c9eb929aecd04e2054cadbfcd38407
|
4
|
+
data.tar.gz: f4822e17db2570421cabd7a1859eb28c8d9217cd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8b64117d3461220161b04fb5fbfbccaafeb24719fa7861753447992eff3c077f6a390b55406f89dbed13dfe856cbbaf2167eb74969dec12732fe680ed444969a
|
7
|
+
data.tar.gz: 2b4b01f1d4bda02b04bc943aa84668d79217fb7b7a88372b53bf4c9cbbccf21d24546bc6e844b890c39c0614c6e333b56d9991f424f191bdd5fd59a158956054
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 dariogf
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# ScbiMultiGzReader
|
2
|
+
|
3
|
+
Zlib::GzipReader is not able to read multiple stream gz/gzip files, instead it only reads the first stream and then closes the file without warnings so you only get a part of the file when you expected to get the full one.
|
4
|
+
|
5
|
+
This is a wrapper to read gz/gzip files composed of multiple streams (eg, made by doing a cat to join some gz files in a bigger one.
|
6
|
+
|
7
|
+
By now it only has the readline and eof? instance methods, the ones needed by scbi_fastq to read fastq files.
|
8
|
+
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
gem 'scbi_multi_gz_reader'
|
16
|
+
```
|
17
|
+
|
18
|
+
And then execute:
|
19
|
+
|
20
|
+
$ bundle
|
21
|
+
|
22
|
+
Or install it yourself as:
|
23
|
+
|
24
|
+
$ gem install scbi_multi_gz_reader
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
|
28
|
+
# read checking for eof
|
29
|
+
|
30
|
+
file=MultiGzReader.new('file.gz'))
|
31
|
+
|
32
|
+
while !file.eof? do
|
33
|
+
puts file.readline
|
34
|
+
end
|
35
|
+
|
36
|
+
file.close
|
37
|
+
|
38
|
+
------
|
39
|
+
|
40
|
+
# read checking for res.nil?
|
41
|
+
file=MultiGzReader.new('file.gz'))
|
42
|
+
|
43
|
+
loop do
|
44
|
+
res=file.readline
|
45
|
+
break if res.nil?
|
46
|
+
puts res
|
47
|
+
end
|
48
|
+
|
49
|
+
file.close
|
50
|
+
|
51
|
+
|
52
|
+
## Contributing
|
53
|
+
|
54
|
+
1. Fork it ( https://github.com/[my-github-username]/scbi_multi_gz_reader/fork )
|
55
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
56
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
57
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
58
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
class MultiGzReader
|
4
|
+
|
5
|
+
def initialize(file_name)
|
6
|
+
@file = File.open(file_name)
|
7
|
+
@io = Zlib::GzipReader.new @file
|
8
|
+
end
|
9
|
+
|
10
|
+
def readline
|
11
|
+
|
12
|
+
res=nil
|
13
|
+
|
14
|
+
begin
|
15
|
+
res=@io.readline
|
16
|
+
rescue EOFError => e
|
17
|
+
|
18
|
+
#reached END, check if there is more data to read
|
19
|
+
unused = @io.unused
|
20
|
+
|
21
|
+
@io.finish
|
22
|
+
|
23
|
+
# there is something left to read, open another stream
|
24
|
+
if !unused.nil?
|
25
|
+
#puts "FIN1, fpos: #{@file.pos}, unused: #{unused.length}, io_eof: #{@io.eof}, eof: #{@file.eof}"
|
26
|
+
|
27
|
+
@file.pos -= unused.length
|
28
|
+
@io = Zlib::GzipReader.new @file
|
29
|
+
#repeat the read so there is no eof error
|
30
|
+
res=readline
|
31
|
+
|
32
|
+
else
|
33
|
+
#no more data to read, return nil
|
34
|
+
res=nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return res
|
39
|
+
end
|
40
|
+
|
41
|
+
def eof?
|
42
|
+
#nothing more to read
|
43
|
+
@io.unused.nil? && (@io.closed? || @io.eof?) && (@file.closed? || @file.eof?)
|
44
|
+
end
|
45
|
+
|
46
|
+
def close
|
47
|
+
#@io.finish
|
48
|
+
@file.close
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'scbi_multi_gz_reader/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "scbi_multi_gz_reader"
|
8
|
+
spec.version = ScbiMultiGzReader::VERSION
|
9
|
+
spec.authors = ["dariogf"]
|
10
|
+
spec.email = ["dariogf@scbi.uma.es"]
|
11
|
+
spec.summary = %q{Reads gz/gzip files with multiple streams in ruby}
|
12
|
+
spec.description = %q{Wrapper to read gz/gzip files composed of multiple gz streams (eg, made by doing a cat to join some gz files in a bigger one.)}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
end
|
data/test/empty.fastq.gz
ADDED
Binary file
|
data/test/fq_all2std.pl
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
#!/usr/local/bin/perl -w
|
2
|
+
|
3
|
+
# Author: lh3
|
4
|
+
|
5
|
+
use strict;
|
6
|
+
use warnings;
|
7
|
+
use Getopt::Std;
|
8
|
+
|
9
|
+
my $usage = qq(
|
10
|
+
Usage: fq_all2std.pl <command> <in.txt>
|
11
|
+
|
12
|
+
Command: scarf2std Convert SCARF format to the standard/Sanger FASTQ
|
13
|
+
fqint2std Convert FASTQ-int format to the standard/Sanger FASTQ
|
14
|
+
sol2std Convert Solexa/Illumina FASTQ to the standard FASTQ
|
15
|
+
fa2std Convert FASTA to the standard FASTQ
|
16
|
+
fq2fa Convert various FASTQ-like format to FASTA
|
17
|
+
instruction Explanation to different format
|
18
|
+
example Show examples of various formats
|
19
|
+
|
20
|
+
Note: Read/quality sequences MUST be presented in one line.
|
21
|
+
\n);
|
22
|
+
|
23
|
+
die($usage) if (@ARGV < 1);
|
24
|
+
|
25
|
+
# Solexa->Sanger quality conversion table
|
26
|
+
my @conv_table;
|
27
|
+
for (-64..64) {
|
28
|
+
$conv_table[$_+64] = chr(int(33 + 10*log(1+10**($_/10.0))/log(10)+.499));
|
29
|
+
}
|
30
|
+
|
31
|
+
# parsing command line
|
32
|
+
my $cmd = shift;
|
33
|
+
my %cmd_hash = (scarf2std=>\&scarf2std, fqint2std=>\&fqint2std, sol2std=>\&sol2std, fa2std=>\&fa2std,
|
34
|
+
fq2fa=>\&fq2fa, example=>\&example, instruction=>\&instruction);
|
35
|
+
if (defined($cmd_hash{$cmd})) {
|
36
|
+
&{$cmd_hash{$cmd}};
|
37
|
+
} else {
|
38
|
+
die("** Unrecognized command $cmd");
|
39
|
+
}
|
40
|
+
|
41
|
+
sub fa2std {
|
42
|
+
my %opts = (q=>25);
|
43
|
+
getopts('q:', \%opts);
|
44
|
+
my $q = chr($opts{q} + 33);
|
45
|
+
warn("-- The default quality is set to $opts{q}. Use '-q' at the command line to change the default.\n");
|
46
|
+
while (<>) {
|
47
|
+
if (/^>(\S+)/) {
|
48
|
+
print "\@$1\n";
|
49
|
+
$_ = <>;
|
50
|
+
print "$_+\n", $q x (length($_)-1), "\n";
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
sub fq2fa {
|
56
|
+
while (<>) {
|
57
|
+
if (/^@(\S+)/) {
|
58
|
+
print ">$1\n";
|
59
|
+
$_ = <>; print;
|
60
|
+
<>; <>;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
sub scarf2std {
|
66
|
+
while (<>) {
|
67
|
+
my @t = split(':', $_);
|
68
|
+
my $name = join('_', @t[0..4]);
|
69
|
+
print "\@$name\n$t[5]\n+\n";
|
70
|
+
my $qual = '';
|
71
|
+
@t = split(/\s/, $t[6]);
|
72
|
+
$qual .= $conv_table[$_+64] for (@t);
|
73
|
+
print "$qual\n";
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
sub fqint2std {
|
78
|
+
while (<>) {
|
79
|
+
if (/^@/) {
|
80
|
+
print;
|
81
|
+
$_ = <>; print; $_ = <>; $_ = <>;
|
82
|
+
my @t = split;
|
83
|
+
my $qual = '';
|
84
|
+
$qual .= $conv_table[$_+64] for (@t);
|
85
|
+
print "+\n$qual\n";
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
sub sol2std {
|
91
|
+
my $max = 0;
|
92
|
+
while (<>) {
|
93
|
+
if (/^@/) {
|
94
|
+
print;
|
95
|
+
$_ = <>; print; $_ = <>; $_ = <>;
|
96
|
+
my @t = split('', $_);
|
97
|
+
my $qual = '';
|
98
|
+
$qual .= $conv_table[ord($_)] for (@t);
|
99
|
+
print "+\n$qual\n";
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
sub instruction {
|
105
|
+
|
106
|
+
print "
|
107
|
+
FASTQ format is first used in the Sanger Institute, and therefore
|
108
|
+
we take the Sanger specification as the standard FASTQ. Although
|
109
|
+
Solexa/Illumina reads file looks pretty much like the standard
|
110
|
+
FASTQ, they are different in that the qualities are scaled
|
111
|
+
differently. In the quality string, if you can see a character
|
112
|
+
with its ASCII code higher than 90, probably your file is in the
|
113
|
+
Solexa/Illumina format.
|
114
|
+
|
115
|
+
Sometimes we also use an integer, instead of a single character,
|
116
|
+
to explicitly show the qualities. In that case, negative
|
117
|
+
qualities indicates that Solexa/Illumina qualities are used.
|
118
|
+
|
119
|
+
";
|
120
|
+
|
121
|
+
}
|
122
|
+
|
123
|
+
sub example {
|
124
|
+
my $exam_scarf = '
|
125
|
+
USI-EAS50_1:4:2:710:120:GTCAAAGTAATAATAGGAGATTTGAGCTATTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 19 23 23 23 18 23 23 23
|
126
|
+
USI-EAS50_1:4:2:690:87:GTTTTTTTTTTTCTTTCCATTAATTTCCCTTT:23 23 23 23 23 23 23 23 23 23 23 23 12 23 23 23 23 23 16 23 23 9 18 23 23 23 12 23 18 23 23 23
|
127
|
+
USI-EAS50_1:4:2:709:32:GAGAAGTCAAACCTGTGTTAGAAATTTTATAC:23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 23 12 23 18 23 23 23 23 23 23 23 23
|
128
|
+
USI-EAS50_1:4:2:886:890:GCTTATTTAAAAATTTACTTGGGGTTGTCTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
|
129
|
+
USI-EAS50_1:4:2:682:91:GGGTTTCTAGACTAAAGGGATTTAACAAGTTT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 20 23 23 23 23 23 23 23 23 23 23 23 18 23 23 23 23
|
130
|
+
USI-EAS50_1:4:2:663:928:GAATTTGTTTGAAGAGTGTCATGGTCAGATCT:23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23
|
131
|
+
';
|
132
|
+
|
133
|
+
my $exam_fqint = '
|
134
|
+
@4_1_912_360
|
135
|
+
AAGGGGCTAGAGAAACACGTAATGAAGGGAGGACTC
|
136
|
+
+4_1_912_360
|
137
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 40 40 40 40 40 40 40 40 40 26 40 40 14 39 40 40
|
138
|
+
@4_1_54_483
|
139
|
+
TAATAAATGTGCTTCCTTGATGCATGTGCTATGATT
|
140
|
+
+4_1_54_483
|
141
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 40 40 40 28 40 40 40 40 40 40 16 40 40 5 40 40
|
142
|
+
@4_1_537_334
|
143
|
+
ATTGATGATGCTGTGCACCTAGCAAGAAGTTGCATA
|
144
|
+
+4_1_537_334
|
145
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 21 29 40 40 33 40 40 33 40 40 33 31 40 40 40 40 18 26 40 -2
|
146
|
+
@4_1_920_361
|
147
|
+
AACGGCACAATCCAGGTTGATGCCTACGGCGGGTAC
|
148
|
+
+4_1_920_361
|
149
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 40 40 40 40 40 40 40 40 31 40 40 40 40 40 40 15 5 -1 3
|
150
|
+
@4_1_784_155
|
151
|
+
AATGCATGCTTCGAATGGCATTCTCTTCAATCACGA
|
152
|
+
+4_1_784_155
|
153
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 31 40 40 40 40 40
|
154
|
+
@4_1_595_150
|
155
|
+
AAAGACGTGGCCAGATGGGTGGCCAAGTGCCCGACT
|
156
|
+
+4_1_595_150
|
157
|
+
40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 40 40 40 40 40 40 40 20 40 40 40 40 40 14 40 40
|
158
|
+
';
|
159
|
+
|
160
|
+
my $exam_sol = '
|
161
|
+
@SLXA-B3_649_FC8437_R1_1_1_610_79
|
162
|
+
GATGTGCAATACCTTTGTAGAGGAA
|
163
|
+
+SLXA-B3_649_FC8437_R1_1_1_610_79
|
164
|
+
YYYYYYYYYYYYYYYYYYWYWYYSU
|
165
|
+
@SLXA-B3_649_FC8437_R1_1_1_397_389
|
166
|
+
GGTTTGAGAAAGAGAAATGAGATAA
|
167
|
+
+SLXA-B3_649_FC8437_R1_1_1_397_389
|
168
|
+
YYYYYYYYYWYYYYWWYYYWYWYWW
|
169
|
+
@SLXA-B3_649_FC8437_R1_1_1_850_123
|
170
|
+
GAGGGTGTTGATCATGATGATGGCG
|
171
|
+
+SLXA-B3_649_FC8437_R1_1_1_850_123
|
172
|
+
YYYYYYYYYYYYYWYYWYYSYYYSY
|
173
|
+
@SLXA-B3_649_FC8437_R1_1_1_362_549
|
174
|
+
GGAAACAAAGTTTTTCTCAACATAG
|
175
|
+
+SLXA-B3_649_FC8437_R1_1_1_362_549
|
176
|
+
YYYYYYYYYYYYYYYYYYWWWWYWY
|
177
|
+
@SLXA-B3_649_FC8437_R1_1_1_183_714
|
178
|
+
GTATTATTTAATGGCATACACTCAA
|
179
|
+
+SLXA-B3_649_FC8437_R1_1_1_183_714
|
180
|
+
YYYYYYYYYYWYYYYWYWWUWWWQQ
|
181
|
+
';
|
182
|
+
|
183
|
+
print qq(
|
184
|
+
solexa
|
185
|
+
======
|
186
|
+
$exam_sol
|
187
|
+
scarf
|
188
|
+
=====
|
189
|
+
$exam_scarf
|
190
|
+
fqint
|
191
|
+
=====
|
192
|
+
$exam_fqint
|
193
|
+
);
|
194
|
+
}
|
data/test/gz.rb
ADDED
data/test/h1.fastq.gz
ADDED
Binary file
|
data/test/h2.fastq.gz
ADDED
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class ScbiMultiGzReaderTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_mini_multi_gz_reader
|
10
|
+
|
11
|
+
# test a file with multiple gz streams
|
12
|
+
|
13
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
|
14
|
+
|
15
|
+
i=0
|
16
|
+
|
17
|
+
loop do
|
18
|
+
res=file.readline
|
19
|
+
#puts res
|
20
|
+
break if res.nil?
|
21
|
+
i = i+1
|
22
|
+
end
|
23
|
+
|
24
|
+
file.close
|
25
|
+
|
26
|
+
assert_equal(28,i)
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def test_mini_multi_gz_reader_EOF_while
|
32
|
+
|
33
|
+
# test a file with multiple gz streams
|
34
|
+
|
35
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
|
36
|
+
|
37
|
+
i=0
|
38
|
+
|
39
|
+
begin
|
40
|
+
res=file.readline
|
41
|
+
#puts res
|
42
|
+
i = i+1
|
43
|
+
end while !file.eof?
|
44
|
+
|
45
|
+
file.close
|
46
|
+
|
47
|
+
assert_equal(28,i)
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_mini_multi_gz_reader_EOF_until
|
52
|
+
|
53
|
+
# test a file with multiple gz streams
|
54
|
+
|
55
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest.fastq.gz'))
|
56
|
+
|
57
|
+
i=0
|
58
|
+
|
59
|
+
begin
|
60
|
+
res=file.readline
|
61
|
+
#puts res
|
62
|
+
i = i+1
|
63
|
+
end until file.eof?
|
64
|
+
|
65
|
+
file.close
|
66
|
+
|
67
|
+
assert_equal(28,i)
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_empty_multi_gz_reader_EOF_until
|
72
|
+
|
73
|
+
# test a file with multiple gz streams
|
74
|
+
|
75
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'empty.fastq.gz'))
|
76
|
+
|
77
|
+
i=0
|
78
|
+
|
79
|
+
while !file.eof? do
|
80
|
+
res=file.readline
|
81
|
+
#puts res
|
82
|
+
i = i+1
|
83
|
+
end
|
84
|
+
|
85
|
+
file.close
|
86
|
+
|
87
|
+
assert_equal(0,i)
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
def test_mini_complete_gz
|
93
|
+
#test only one gz stream in file
|
94
|
+
|
95
|
+
file=MultiGzReader.new(File.join(File.dirname(__FILE__),'minitest_full.fastq.gz'))
|
96
|
+
|
97
|
+
i=0
|
98
|
+
|
99
|
+
loop do
|
100
|
+
res=file.readline
|
101
|
+
#puts res
|
102
|
+
break if res.nil?
|
103
|
+
i = i+1
|
104
|
+
end
|
105
|
+
|
106
|
+
file.close
|
107
|
+
|
108
|
+
assert_equal(i,28)
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scbi_multi_gz_reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- dariogf
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: Wrapper to read gz/gzip files composed of multiple gz streams (eg, made
|
42
|
+
by doing a cat to join some gz files in a bigger one.)
|
43
|
+
email:
|
44
|
+
- dariogf@scbi.uma.es
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- lib/scbi_multi_gz_reader.rb
|
55
|
+
- lib/scbi_multi_gz_reader/multi_gz_reader.rb
|
56
|
+
- lib/scbi_multi_gz_reader/version.rb
|
57
|
+
- scbi_multi_gz_reader.gemspec
|
58
|
+
- test/empty.fastq.gz
|
59
|
+
- test/fq_all2std.pl
|
60
|
+
- test/gz.rb
|
61
|
+
- test/h1.fastq.gz
|
62
|
+
- test/h2.fastq.gz
|
63
|
+
- test/minitest.fastq.gz
|
64
|
+
- test/minitest_full.fastq.gz
|
65
|
+
- test/scbi_multi_gz_reader_test.rb
|
66
|
+
- test/test_helper.rb
|
67
|
+
homepage: ''
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 2.2.2
|
88
|
+
signing_key:
|
89
|
+
specification_version: 4
|
90
|
+
summary: Reads gz/gzip files with multiple streams in ruby
|
91
|
+
test_files:
|
92
|
+
- test/empty.fastq.gz
|
93
|
+
- test/fq_all2std.pl
|
94
|
+
- test/gz.rb
|
95
|
+
- test/h1.fastq.gz
|
96
|
+
- test/h2.fastq.gz
|
97
|
+
- test/minitest.fastq.gz
|
98
|
+
- test/minitest_full.fastq.gz
|
99
|
+
- test/scbi_multi_gz_reader_test.rb
|
100
|
+
- test/test_helper.rb
|