bio-faster 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.4
1
+ 0.4.5
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-faster"
8
- s.version = "0.4.4"
8
+ s.version = "0.4.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Francesco Strozzi"]
12
- s.date = "2012-04-27"
12
+ s.date = "2012-06-13"
13
13
  s.description = "A fast parser for FastQ files"
14
14
  s.email = "francesco.strozzi@gmail.com"
15
15
  s.extensions = ["ext/mkrf_conf.rb"]
@@ -54,6 +54,7 @@ Gem::Specification.new do |s|
54
54
  "test/data/formats/illumina_full_range_as_sanger.fastq",
55
55
  "test/data/formats/illumina_full_range_as_solexa.fastq",
56
56
  "test/data/formats/illumina_full_range_original_illumina.fastq",
57
+ "test/data/formats/issue_2.fastq",
57
58
  "test/data/formats/longreads_as_illumina.fastq",
58
59
  "test/data/formats/longreads_as_sanger.fastq",
59
60
  "test/data/formats/longreads_as_solexa.fastq",
@@ -81,7 +82,7 @@ Gem::Specification.new do |s|
81
82
  s.homepage = "http://github.com/fstrozzi/bioruby-faster"
82
83
  s.licenses = ["MIT"]
83
84
  s.require_paths = ["lib"]
84
- s.rubygems_version = "1.8.15"
85
+ s.rubygems_version = "1.8.24"
85
86
  s.summary = "A fast parser for FastQ files"
86
87
 
87
88
  if s.respond_to? :specification_version then
@@ -95,23 +95,23 @@ int fastQ_iterator(FastQRecord *seq, int scale_factor) {
95
95
  if (!check_header(header,seq->line)) return -1; // check if the header format is correct
96
96
  // removing the @
97
97
  seq->id = alloc_and_copy(seq->id, seq->line+1);
98
-
99
98
  }
100
99
  else {
101
- if (check_bad_chars(seq->bad_chars,seq->line)) return -1; // check if quality or sequence includes bad characters
100
+ if ((i==1 || i==3) && (check_bad_chars(seq->bad_chars,seq->line))) return -1; // check if quality or sequence includes bad characters
102
101
  if (i==1) seq->seq = alloc_and_copy(seq->seq, seq->line);
103
102
  if (i==3) {
104
103
  seq->raw_quality = alloc_and_copy(seq->raw_quality, seq->line);
105
104
  int quality_length = strlen(seq->raw_quality);
106
105
  if(strlen(seq->seq) != strlen(seq->raw_quality)) return -2; // if sequence and quality are of different length the record is truncated
107
- int c = 0;
108
- seq->quality = initialize_int(seq->quality);
109
- seq->quality = malloc(sizeof (int)* quality_length);
110
- while(c < quality_length) {
111
- seq->quality[c] = *(seq->line + c) - scale_factor; // quality conversion
112
- c++;
113
- }
114
-
106
+ if (scale_factor != 0) {
107
+ int c = 0;
108
+ seq->quality = initialize_int(seq->quality);
109
+ seq->quality = malloc(sizeof (int)* quality_length);
110
+ while(c < quality_length) {
111
+ seq->quality[c] = *(seq->line + c) - scale_factor; // quality conversion
112
+ c++;
113
+ }
114
+ }
115
115
  }
116
116
 
117
117
  }
@@ -13,9 +13,8 @@ module Bio
13
13
 
14
14
  attr_accessor :file
15
15
  attr_accessor :encoding
16
- def initialize(file, encoding = :sanger)
16
+ def initialize(file)
17
17
  self.file = file
18
- self.encoding = encoding
19
18
  end
20
19
 
21
20
  class FastQRecord < FFI::Struct
@@ -32,23 +31,28 @@ module Bio
32
31
 
33
32
  attach_function :fastQ_iterator, [FastQRecord, :int], :int
34
33
 
35
- def each_record
34
+ def each_record(args = {:quality => :sanger}, &block)
36
35
  if self.file == :stdin
37
36
  self.file = "stdin"
38
37
  elsif !File.exists? self.file
39
38
  raise ArgumentError, "File #{self.file} does not exist"
40
39
  end
41
40
  record = FastQRecord.new
42
- scale_factor = nil
43
- case self.encoding
44
- when :sanger then scale_factor = 33
45
- when :solexa then scale_factor = 64
46
- end
47
41
  record[:filename] = FFI::MemoryPointer.from_string self.file
48
- while (result = Bio::Faster.fastQ_iterator(record,scale_factor)) == 1
49
- yield [record[:id].read_string,record[:seq].read_string,record[:quality].read_array_of_int(record[:raw_quality].read_string.length)]
50
- end
51
- case result
42
+ result = nil
43
+ case args[:quality]
44
+ when :sanger
45
+ scale_factor = 33
46
+ result = parse_fastq_with_quality_conversion(record, scale_factor, &block)
47
+ when :solexa
48
+ scale_factor = 64
49
+ result = parse_fastq_with_quality_conversion(record, scale_factor, &block)
50
+ when :raw
51
+ scale_factor = 0
52
+ result = parse_fastq(record, scale_factor, &block)
53
+ end
54
+
55
+ case result
52
56
  when -1 then raise RuntimeError, "Bad formatted FastQ file!"
53
57
  when -2 then raise RuntimeError, "Sequence or quality is truncated!"
54
58
  end
@@ -56,6 +60,21 @@ module Bio
56
60
 
57
61
  end
58
62
 
63
+ private
64
+
65
+ def parse_fastq_with_quality_conversion(record, scale_factor, &block)
66
+ while (result = Bio::Faster.fastQ_iterator(record,scale_factor)) == 1
67
+ yield [record[:id].read_string,record[:seq].read_string,record[:quality].read_array_of_int(record[:raw_quality].read_string.length)]
68
+ end
69
+ result
70
+ end
71
+
72
+ def parse_fastq(record, scale_factor, &block)
73
+ while (result = Bio::Faster.fastQ_iterator(record,scale_factor)) == 1
74
+ yield [record[:id].read_string,record[:seq].read_string,record[:raw_quality].read_string]
75
+ end
76
+ result
77
+ end
59
78
 
60
79
  end
61
- end
80
+ end
@@ -18,14 +18,14 @@ describe Bio::Faster do
18
18
  bioruby_data << [seq.entry_id,seq.seq,seq.qualities]
19
19
  end
20
20
  faster_data = []
21
- Bio::Faster.new(file, :solexa).each_record do |seq|
21
+ Bio::Faster.new(file).each_record(:quality => :solexa) do |seq|
22
22
  seq[0] = seq[0].split(" ").first
23
23
  faster_data << seq
24
24
  end
25
25
  faster_data.should == bioruby_data
26
26
  end
27
27
 
28
- it "should read different FastQ formats" do
28
+ it "should read different FastQ formats and convert quality scores" do
29
29
  files = Dir.glob(TEST_DATA+"/formats/*.fastq")
30
30
  files.each do |file|
31
31
  bioruby_data = []
@@ -42,6 +42,24 @@ describe Bio::Faster do
42
42
 
43
43
  end
44
44
 
45
+ it "should read different FastQ formats without converting quality scores" do
46
+
47
+ files = Dir.glob(TEST_DATA+"/formats/*.fastq")
48
+ files.each do |file|
49
+ bioruby_data = []
50
+ Bio::FlatFile.open(Bio::Fastq,File.open(file)).each_entry do |seq|
51
+ bioruby_data << [seq.entry_id,seq.seq,seq.quality_string]
52
+ end
53
+ faster_data = []
54
+ Bio::Faster.new(file).each_record(:quality => :raw) do |seq|
55
+ seq[0] = seq[0].split(" ").first
56
+ faster_data << seq
57
+ end
58
+ faster_data.should == bioruby_data
59
+ end
60
+
61
+ end
62
+
45
63
 
46
64
  it "can read from the standard input" do
47
65
  require 'digest/md5'
@@ -63,4 +81,4 @@ describe Bio::Faster do
63
81
  end
64
82
 
65
83
 
66
- end
84
+ end
@@ -0,0 +1,4 @@
1
+ @SRR098419.1 GDCLLRY15JDIA1
2
+ TCAGACGAGTGCGTATTACCGCGGCTGC
3
+ +SRR098419.1 GDCLLRY15JDIA1
4
+ IIIIIIIIIIIIIIIIIIIIIIIIIIII
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-faster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-27 00:00:00.000000000 Z
12
+ date: 2012-06-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ffi
16
- requirement: &2154695320 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2154695320
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: shoulda
27
- requirement: &2154694220 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: '0'
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2154694220
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: bundler
38
- requirement: &2154693120 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ~>
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: 1.0.0
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2154693120
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.0.0
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: jeweler
49
- requirement: &2154691040 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ~>
@@ -54,10 +69,15 @@ dependencies:
54
69
  version: 1.6.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2154691040
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.4
58
78
  - !ruby/object:Gem::Dependency
59
79
  name: rcov
60
- requirement: &2154704820 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
61
81
  none: false
62
82
  requirements:
63
83
  - - ! '>='
@@ -65,10 +85,15 @@ dependencies:
65
85
  version: '0'
66
86
  type: :development
67
87
  prerelease: false
68
- version_requirements: *2154704820
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
69
94
  - !ruby/object:Gem::Dependency
70
95
  name: bio
71
- requirement: &2154702860 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
72
97
  none: false
73
98
  requirements:
74
99
  - - ! '>='
@@ -76,10 +101,15 @@ dependencies:
76
101
  version: 1.4.2
77
102
  type: :development
78
103
  prerelease: false
79
- version_requirements: *2154702860
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 1.4.2
80
110
  - !ruby/object:Gem::Dependency
81
111
  name: rspec
82
- requirement: &2154701200 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
83
113
  none: false
84
114
  requirements:
85
115
  - - ! '>='
@@ -87,10 +117,15 @@ dependencies:
87
117
  version: '0'
88
118
  type: :development
89
119
  prerelease: false
90
- version_requirements: *2154701200
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
91
126
  - !ruby/object:Gem::Dependency
92
127
  name: ffi
93
- requirement: &2154699060 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
94
129
  none: false
95
130
  requirements:
96
131
  - - ! '>='
@@ -98,7 +133,12 @@ dependencies:
98
133
  version: '0'
99
134
  type: :development
100
135
  prerelease: false
101
- version_requirements: *2154699060
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
102
142
  description: A fast parser for FastQ files
103
143
  email: francesco.strozzi@gmail.com
104
144
  executables: []
@@ -144,6 +184,7 @@ files:
144
184
  - test/data/formats/illumina_full_range_as_sanger.fastq
145
185
  - test/data/formats/illumina_full_range_as_solexa.fastq
146
186
  - test/data/formats/illumina_full_range_original_illumina.fastq
187
+ - test/data/formats/issue_2.fastq
147
188
  - test/data/formats/longreads_as_illumina.fastq
148
189
  - test/data/formats/longreads_as_sanger.fastq
149
190
  - test/data/formats/longreads_as_solexa.fastq
@@ -182,7 +223,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
182
223
  version: '0'
183
224
  segments:
184
225
  - 0
185
- hash: -2874746173653094102
226
+ hash: -1805779141213914087
186
227
  required_rubygems_version: !ruby/object:Gem::Requirement
187
228
  none: false
188
229
  requirements:
@@ -191,7 +232,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
232
  version: '0'
192
233
  requirements: []
193
234
  rubyforge_project:
194
- rubygems_version: 1.8.15
235
+ rubygems_version: 1.8.24
195
236
  signing_key:
196
237
  specification_version: 3
197
238
  summary: A fast parser for FastQ files