dna 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -1
- data/Gemfile +2 -2
- data/dna.gemspec +8 -6
- data/lib/dna/dna.rb +0 -1
- data/lib/dna/phred.rb +50 -0
- data/lib/dna/record.rb +2 -0
- data/lib/dna/version.rb +2 -2
- data/spec/phred_spec.rb +75 -0
- metadata +39 -49
data/.rspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
--color
|
1
|
+
--color -f d
|
data/Gemfile
CHANGED
data/dna.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "dna"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-12-14"
|
13
13
|
s.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["dna"]
|
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/dna/parsers/fasta.rb",
|
27
27
|
"lib/dna/parsers/fastq.rb",
|
28
28
|
"lib/dna/parsers/qseq.rb",
|
29
|
+
"lib/dna/phred.rb",
|
29
30
|
"lib/dna/record.rb",
|
30
31
|
"lib/dna/version.rb",
|
31
32
|
"readme.md",
|
@@ -35,25 +36,26 @@ Gem::Specification.new do |s|
|
|
35
36
|
"spec/data/test.fastq",
|
36
37
|
"spec/data/test.qseq",
|
37
38
|
"spec/dna_spec.rb",
|
39
|
+
"spec/phred_spec.rb",
|
38
40
|
"spec/record_spec.rb",
|
39
41
|
"spec/spec_helper.rb"
|
40
42
|
]
|
41
43
|
s.homepage = "http://audy.github.com/dna"
|
42
44
|
s.licenses = ["MIT"]
|
43
45
|
s.require_paths = ["lib"]
|
44
|
-
s.rubygems_version = "1.8.
|
46
|
+
s.rubygems_version = "1.8.23"
|
45
47
|
s.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
|
46
48
|
|
47
49
|
if s.respond_to? :specification_version then
|
48
50
|
s.specification_version = 3
|
49
51
|
|
50
52
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
51
|
-
s.add_development_dependency(%q<jeweler>, ["
|
53
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
52
54
|
else
|
53
|
-
s.add_dependency(%q<jeweler>, ["
|
55
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
54
56
|
end
|
55
57
|
else
|
56
|
-
s.add_dependency(%q<jeweler>, ["
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
data/lib/dna/dna.rb
CHANGED
data/lib/dna/phred.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#
|
2
|
+
# Phred - intended to be used as a mixin to add phred-score
|
3
|
+
# conversions to integers with byte-offset taken into account
|
4
|
+
# and also for conversion from byte to probability.
|
5
|
+
#
|
6
|
+
# http://en.wikipedia.org/wiki/FASTQ_format
|
7
|
+
#
|
8
|
+
module Phred
|
9
|
+
|
10
|
+
formats = {
|
11
|
+
:illumina => { :offset => 33, :prob_func => Proc.new { |q| 10**(q/-10.0) } },
|
12
|
+
:sanger => { :offset => 33, :prob_func => Proc.new { |q| 10**(q/-10.0) } },
|
13
|
+
:solexa => { :offset => 64, :prob_func => Proc.new { |q| (10**(q/-10.0))/(1 + (10**(q/-10.0))) } },
|
14
|
+
}
|
15
|
+
|
16
|
+
#
|
17
|
+
# use metaprogramming to create methods for converting
|
18
|
+
# raw ascii quality string to quality socres and probabilities
|
19
|
+
#
|
20
|
+
# Do this without using method_missing so that the methods
|
21
|
+
# are visable.
|
22
|
+
#
|
23
|
+
formats.each do |type, props|
|
24
|
+
offset = props[:offset]
|
25
|
+
prob_func = props[:prob_func]
|
26
|
+
|
27
|
+
define_method "#{type}_scores" do
|
28
|
+
qualities_with_offset(offset)
|
29
|
+
end
|
30
|
+
|
31
|
+
define_method "#{type}_probabilities" do
|
32
|
+
probabilities_with_offset(offset, prob_func)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
extend self
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def qualities_with_offset(offset)
|
41
|
+
@qualities.each_byte.map { |q| q - offset }
|
42
|
+
end
|
43
|
+
|
44
|
+
def probabilities_with_offset(offset, func)
|
45
|
+
qualities_with_offset(offset).map do |q|
|
46
|
+
func.call(q)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
data/lib/dna/record.rb
CHANGED
data/lib/dna/version.rb
CHANGED
data/spec/phred_spec.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
class TestRecord
|
2
|
+
include Phred
|
3
|
+
attr_accessor :qualities
|
4
|
+
end
|
5
|
+
|
6
|
+
##
|
7
|
+
# Definition of different FASTQ formats for Phred scores
|
8
|
+
# as defined in the wikipedia article for the FASTQ format.
|
9
|
+
#
|
10
|
+
# Each format definition needs a :string, a :scores and a :probabilities
|
11
|
+
# Conversion between these properties is automatically tested by the
|
12
|
+
# test suite.
|
13
|
+
#
|
14
|
+
## Solexa and Illumina <= 1.3 (we just call it Solexa here)
|
15
|
+
#
|
16
|
+
# - 0-93 with ascii offset 33
|
17
|
+
# - Probability of error:
|
18
|
+
# `Q = -10log10(P/1-P)`
|
19
|
+
# Solve for P yourself :P
|
20
|
+
#
|
21
|
+
## Sanger
|
22
|
+
#
|
23
|
+
# - 0-62 with ascii ofset 64
|
24
|
+
# - Probability of error given by p = 10**(Q/10)
|
25
|
+
# - Quality = -10log10(P)
|
26
|
+
#
|
27
|
+
## Illumina 1.3+
|
28
|
+
#
|
29
|
+
# Illumina-formatted quality scores are the same as sanger except:
|
30
|
+
# - 0 and 1 are not used
|
31
|
+
# - 2 is reserved for B which just means 'really bad, do not use'
|
32
|
+
#
|
33
|
+
## 454
|
34
|
+
#
|
35
|
+
# - No one uses 454.
|
36
|
+
#
|
37
|
+
#
|
38
|
+
format_definitions = {
|
39
|
+
:solexa => {
|
40
|
+
:string => (0..62).map { |q| (q + 64).chr }.join,
|
41
|
+
:scores => (0..62).to_a,
|
42
|
+
:probabilities => (0..62).map { |q| (10**((q)/-10.0))/(1 + 10**(q/-10.0)) }
|
43
|
+
},
|
44
|
+
|
45
|
+
:illumina => {
|
46
|
+
:string => (3..93).map { |q| (q + 33).chr }.join + 'B'*4,
|
47
|
+
:scores => (3..93).to_a + [33, 33, 33, 33],
|
48
|
+
:probabilities => ((3..93).to_a + [33, 33, 33, 33]).map { |q| 10**((q)/-10.0) }
|
49
|
+
},
|
50
|
+
|
51
|
+
:sanger => {
|
52
|
+
:string => (0..93).map { |q| (q + 33).chr }.join,
|
53
|
+
:scores => (0..93).to_a,
|
54
|
+
:probabilities => (0..93).map { |q| 10**((q)/-10.0) }
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
describe Phred do
|
59
|
+
|
60
|
+
let (:test_record) { TestRecord.new }
|
61
|
+
|
62
|
+
format_definitions.each_pair do |type, properties|
|
63
|
+
|
64
|
+
it "should properly convert #{type} raw ascii PHRED scores to integers" do
|
65
|
+
test_record.qualities = properties[:string]
|
66
|
+
test_record.send(:"#{type}_scores").should == properties[:scores]
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should properly convert #{type.to_s} raw ascii PHRED scores to probabilities" do
|
70
|
+
test_record.qualities = properties[:string]
|
71
|
+
test_record.send(:"#{type}_probabilities").should == properties[:probabilities]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
metadata
CHANGED
@@ -1,45 +1,39 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 2
|
10
|
-
version: 0.1.2
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Austin G. Davis-Richardson
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
type: :development
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
-
none: false
|
24
|
-
requirements:
|
25
|
-
- - ">="
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
hash: 3
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
version: "0"
|
31
|
-
version_requirements: *id001
|
12
|
+
date: 2012-12-14 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
32
15
|
name: jeweler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.8.4
|
22
|
+
type: :development
|
33
23
|
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.8.4
|
34
30
|
description: Simple FASTA/FASTQ/QSEQ parser library for Ruby.
|
35
31
|
email: harekrishna@gmail.com
|
36
|
-
executables:
|
32
|
+
executables:
|
37
33
|
- dna
|
38
34
|
extensions: []
|
39
|
-
|
40
35
|
extra_rdoc_files: []
|
41
|
-
|
42
|
-
files:
|
36
|
+
files:
|
43
37
|
- .rspec
|
44
38
|
- .rvmrc
|
45
39
|
- .travis.yml
|
@@ -52,6 +46,7 @@ files:
|
|
52
46
|
- lib/dna/parsers/fasta.rb
|
53
47
|
- lib/dna/parsers/fastq.rb
|
54
48
|
- lib/dna/parsers/qseq.rb
|
49
|
+
- lib/dna/phred.rb
|
55
50
|
- lib/dna/record.rb
|
56
51
|
- lib/dna/version.rb
|
57
52
|
- readme.md
|
@@ -61,40 +56,35 @@ files:
|
|
61
56
|
- spec/data/test.fastq
|
62
57
|
- spec/data/test.qseq
|
63
58
|
- spec/dna_spec.rb
|
59
|
+
- spec/phred_spec.rb
|
64
60
|
- spec/record_spec.rb
|
65
61
|
- spec/spec_helper.rb
|
66
62
|
homepage: http://audy.github.com/dna
|
67
|
-
licenses:
|
63
|
+
licenses:
|
68
64
|
- MIT
|
69
65
|
post_install_message:
|
70
66
|
rdoc_options: []
|
71
|
-
|
72
|
-
require_paths:
|
67
|
+
require_paths:
|
73
68
|
- lib
|
74
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
70
|
none: false
|
76
|
-
requirements:
|
77
|
-
- -
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
|
80
|
-
segments:
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
segments:
|
81
76
|
- 0
|
82
|
-
|
83
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
hash: -74029819579817896
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
79
|
none: false
|
85
|
-
requirements:
|
86
|
-
- -
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
|
89
|
-
segments:
|
90
|
-
- 0
|
91
|
-
version: "0"
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
92
84
|
requirements: []
|
93
|
-
|
94
85
|
rubyforge_project:
|
95
|
-
rubygems_version: 1.8.
|
86
|
+
rubygems_version: 1.8.23
|
96
87
|
signing_key:
|
97
88
|
specification_version: 3
|
98
89
|
summary: Simple FASTA/FASTQ/QSEQ parser library for Ruby
|
99
90
|
test_files: []
|
100
|
-
|