bio-faster 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.4
1
+ 0.4.5
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-faster"
8
- s.version = "0.4.4"
8
+ s.version = "0.4.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Francesco Strozzi"]
12
- s.date = "2012-04-27"
12
+ s.date = "2012-06-13"
13
13
  s.description = "A fast parser for FastQ files"
14
14
  s.email = "francesco.strozzi@gmail.com"
15
15
  s.extensions = ["ext/mkrf_conf.rb"]
@@ -54,6 +54,7 @@ Gem::Specification.new do |s|
54
54
  "test/data/formats/illumina_full_range_as_sanger.fastq",
55
55
  "test/data/formats/illumina_full_range_as_solexa.fastq",
56
56
  "test/data/formats/illumina_full_range_original_illumina.fastq",
57
+ "test/data/formats/issue_2.fastq",
57
58
  "test/data/formats/longreads_as_illumina.fastq",
58
59
  "test/data/formats/longreads_as_sanger.fastq",
59
60
  "test/data/formats/longreads_as_solexa.fastq",
@@ -81,7 +82,7 @@ Gem::Specification.new do |s|
81
82
  s.homepage = "http://github.com/fstrozzi/bioruby-faster"
82
83
  s.licenses = ["MIT"]
83
84
  s.require_paths = ["lib"]
84
- s.rubygems_version = "1.8.15"
85
+ s.rubygems_version = "1.8.24"
85
86
  s.summary = "A fast parser for FastQ files"
86
87
 
87
88
  if s.respond_to? :specification_version then
@@ -95,23 +95,23 @@ int fastQ_iterator(FastQRecord *seq, int scale_factor) {
95
95
  if (!check_header(header,seq->line)) return -1; // check if the header format is correct
96
96
  // removing the @
97
97
  seq->id = alloc_and_copy(seq->id, seq->line+1);
98
-
99
98
  }
100
99
  else {
101
- if (check_bad_chars(seq->bad_chars,seq->line)) return -1; // check if quality or sequence includes bad characters
100
+ if ((i==1 || i==3) && (check_bad_chars(seq->bad_chars,seq->line))) return -1; // check if quality or sequence includes bad characters
102
101
  if (i==1) seq->seq = alloc_and_copy(seq->seq, seq->line);
103
102
  if (i==3) {
104
103
  seq->raw_quality = alloc_and_copy(seq->raw_quality, seq->line);
105
104
  int quality_length = strlen(seq->raw_quality);
106
105
  if(strlen(seq->seq) != strlen(seq->raw_quality)) return -2; // if sequence and quality are of different length the record is truncated
107
- int c = 0;
108
- seq->quality = initialize_int(seq->quality);
109
- seq->quality = malloc(sizeof (int)* quality_length);
110
- while(c < quality_length) {
111
- seq->quality[c] = *(seq->line + c) - scale_factor; // quality conversion
112
- c++;
113
- }
114
-
106
+ if (scale_factor != 0) {
107
+ int c = 0;
108
+ seq->quality = initialize_int(seq->quality);
109
+ seq->quality = malloc(sizeof (int)* quality_length);
110
+ while(c < quality_length) {
111
+ seq->quality[c] = *(seq->line + c) - scale_factor; // quality conversion
112
+ c++;
113
+ }
114
+ }
115
115
  }
116
116
 
117
117
  }
@@ -13,9 +13,8 @@ module Bio
13
13
 
14
14
  attr_accessor :file
15
15
  attr_accessor :encoding
16
- def initialize(file, encoding = :sanger)
16
+ def initialize(file)
17
17
  self.file = file
18
- self.encoding = encoding
19
18
  end
20
19
 
21
20
  class FastQRecord < FFI::Struct
@@ -32,23 +31,28 @@ module Bio
32
31
 
33
32
  attach_function :fastQ_iterator, [FastQRecord, :int], :int
34
33
 
35
- def each_record
34
+ def each_record(args = {:quality => :sanger}, &block)
36
35
  if self.file == :stdin
37
36
  self.file = "stdin"
38
37
  elsif !File.exists? self.file
39
38
  raise ArgumentError, "File #{self.file} does not exist"
40
39
  end
41
40
  record = FastQRecord.new
42
- scale_factor = nil
43
- case self.encoding
44
- when :sanger then scale_factor = 33
45
- when :solexa then scale_factor = 64
46
- end
47
41
  record[:filename] = FFI::MemoryPointer.from_string self.file
48
- while (result = Bio::Faster.fastQ_iterator(record,scale_factor)) == 1
49
- yield [record[:id].read_string,record[:seq].read_string,record[:quality].read_array_of_int(record[:raw_quality].read_string.length)]
50
- end
51
- case result
42
+ result = nil
43
+ case args[:quality]
44
+ when :sanger
45
+ scale_factor = 33
46
+ result = parse_fastq_with_quality_conversion(record, scale_factor, &block)
47
+ when :solexa
48
+ scale_factor = 64
49
+ result = parse_fastq_with_quality_conversion(record, scale_factor, &block)
50
+ when :raw
51
+ scale_factor = 0
52
+ result = parse_fastq(record, scale_factor, &block)
53
+ end
54
+
55
+ case result
52
56
  when -1 then raise RuntimeError, "Bad formatted FastQ file!"
53
57
  when -2 then raise RuntimeError, "Sequence or quality is truncated!"
54
58
  end
@@ -56,6 +60,21 @@ module Bio
56
60
 
57
61
  end
58
62
 
63
+ private
64
+
65
+ def parse_fastq_with_quality_conversion(record, scale_factor, &block)
66
+ while (result = Bio::Faster.fastQ_iterator(record,scale_factor)) == 1
67
+ yield [record[:id].read_string,record[:seq].read_string,record[:quality].read_array_of_int(record[:raw_quality].read_string.length)]
68
+ end
69
+ result
70
+ end
71
+
72
+ def parse_fastq(record, scale_factor, &block)
73
+ while (result = Bio::Faster.fastQ_iterator(record,scale_factor)) == 1
74
+ yield [record[:id].read_string,record[:seq].read_string,record[:raw_quality].read_string]
75
+ end
76
+ result
77
+ end
59
78
 
60
79
  end
61
- end
80
+ end
@@ -18,14 +18,14 @@ describe Bio::Faster do
18
18
  bioruby_data << [seq.entry_id,seq.seq,seq.qualities]
19
19
  end
20
20
  faster_data = []
21
- Bio::Faster.new(file, :solexa).each_record do |seq|
21
+ Bio::Faster.new(file).each_record(:quality => :solexa) do |seq|
22
22
  seq[0] = seq[0].split(" ").first
23
23
  faster_data << seq
24
24
  end
25
25
  faster_data.should == bioruby_data
26
26
  end
27
27
 
28
- it "should read different FastQ formats" do
28
+ it "should read different FastQ formats and convert quality scores" do
29
29
  files = Dir.glob(TEST_DATA+"/formats/*.fastq")
30
30
  files.each do |file|
31
31
  bioruby_data = []
@@ -42,6 +42,24 @@ describe Bio::Faster do
42
42
 
43
43
  end
44
44
 
45
+ it "should read different FastQ formats without converting quality scores" do
46
+
47
+ files = Dir.glob(TEST_DATA+"/formats/*.fastq")
48
+ files.each do |file|
49
+ bioruby_data = []
50
+ Bio::FlatFile.open(Bio::Fastq,File.open(file)).each_entry do |seq|
51
+ bioruby_data << [seq.entry_id,seq.seq,seq.quality_string]
52
+ end
53
+ faster_data = []
54
+ Bio::Faster.new(file).each_record(:quality => :raw) do |seq|
55
+ seq[0] = seq[0].split(" ").first
56
+ faster_data << seq
57
+ end
58
+ faster_data.should == bioruby_data
59
+ end
60
+
61
+ end
62
+
45
63
 
46
64
  it "can read from the standard input" do
47
65
  require 'digest/md5'
@@ -63,4 +81,4 @@ describe Bio::Faster do
63
81
  end
64
82
 
65
83
 
66
- end
84
+ end
@@ -0,0 +1,4 @@
1
+ @SRR098419.1 GDCLLRY15JDIA1
2
+ TCAGACGAGTGCGTATTACCGCGGCTGC
3
+ +SRR098419.1 GDCLLRY15JDIA1
4
+ IIIIIIIIIIIIIIIIIIIIIIIIIIII
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-faster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-27 00:00:00.000000000 Z
12
+ date: 2012-06-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ffi
16
- requirement: &2154695320 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2154695320
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: shoulda
27
- requirement: &2154694220 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: '0'
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2154694220
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: bundler
38
- requirement: &2154693120 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ~>
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: 1.0.0
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2154693120
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.0.0
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: jeweler
49
- requirement: &2154691040 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ~>
@@ -54,10 +69,15 @@ dependencies:
54
69
  version: 1.6.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2154691040
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.4
58
78
  - !ruby/object:Gem::Dependency
59
79
  name: rcov
60
- requirement: &2154704820 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
61
81
  none: false
62
82
  requirements:
63
83
  - - ! '>='
@@ -65,10 +85,15 @@ dependencies:
65
85
  version: '0'
66
86
  type: :development
67
87
  prerelease: false
68
- version_requirements: *2154704820
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
69
94
  - !ruby/object:Gem::Dependency
70
95
  name: bio
71
- requirement: &2154702860 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
72
97
  none: false
73
98
  requirements:
74
99
  - - ! '>='
@@ -76,10 +101,15 @@ dependencies:
76
101
  version: 1.4.2
77
102
  type: :development
78
103
  prerelease: false
79
- version_requirements: *2154702860
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 1.4.2
80
110
  - !ruby/object:Gem::Dependency
81
111
  name: rspec
82
- requirement: &2154701200 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
83
113
  none: false
84
114
  requirements:
85
115
  - - ! '>='
@@ -87,10 +117,15 @@ dependencies:
87
117
  version: '0'
88
118
  type: :development
89
119
  prerelease: false
90
- version_requirements: *2154701200
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
91
126
  - !ruby/object:Gem::Dependency
92
127
  name: ffi
93
- requirement: &2154699060 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
94
129
  none: false
95
130
  requirements:
96
131
  - - ! '>='
@@ -98,7 +133,12 @@ dependencies:
98
133
  version: '0'
99
134
  type: :development
100
135
  prerelease: false
101
- version_requirements: *2154699060
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
102
142
  description: A fast parser for FastQ files
103
143
  email: francesco.strozzi@gmail.com
104
144
  executables: []
@@ -144,6 +184,7 @@ files:
144
184
  - test/data/formats/illumina_full_range_as_sanger.fastq
145
185
  - test/data/formats/illumina_full_range_as_solexa.fastq
146
186
  - test/data/formats/illumina_full_range_original_illumina.fastq
187
+ - test/data/formats/issue_2.fastq
147
188
  - test/data/formats/longreads_as_illumina.fastq
148
189
  - test/data/formats/longreads_as_sanger.fastq
149
190
  - test/data/formats/longreads_as_solexa.fastq
@@ -182,7 +223,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
182
223
  version: '0'
183
224
  segments:
184
225
  - 0
185
- hash: -2874746173653094102
226
+ hash: -1805779141213914087
186
227
  required_rubygems_version: !ruby/object:Gem::Requirement
187
228
  none: false
188
229
  requirements:
@@ -191,7 +232,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
232
  version: '0'
192
233
  requirements: []
193
234
  rubyforge_project:
194
- rubygems_version: 1.8.15
235
+ rubygems_version: 1.8.24
195
236
  signing_key:
196
237
  specification_version: 3
197
238
  summary: A fast parser for FastQ files