dna 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +1 -1
- data/Gemfile +2 -2
- data/dna.gemspec +8 -6
- data/lib/dna/dna.rb +0 -1
- data/lib/dna/phred.rb +50 -0
- data/lib/dna/record.rb +2 -0
- data/lib/dna/version.rb +2 -2
- data/spec/phred_spec.rb +75 -0
- metadata +39 -49
data/.rspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
--color
|
1
|
+
--color -f d
|
data/Gemfile
CHANGED
data/dna.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "dna"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-12-14"
|
13
13
|
s.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["dna"]
|
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/dna/parsers/fasta.rb",
|
27
27
|
"lib/dna/parsers/fastq.rb",
|
28
28
|
"lib/dna/parsers/qseq.rb",
|
29
|
+
"lib/dna/phred.rb",
|
29
30
|
"lib/dna/record.rb",
|
30
31
|
"lib/dna/version.rb",
|
31
32
|
"readme.md",
|
@@ -35,25 +36,26 @@ Gem::Specification.new do |s|
|
|
35
36
|
"spec/data/test.fastq",
|
36
37
|
"spec/data/test.qseq",
|
37
38
|
"spec/dna_spec.rb",
|
39
|
+
"spec/phred_spec.rb",
|
38
40
|
"spec/record_spec.rb",
|
39
41
|
"spec/spec_helper.rb"
|
40
42
|
]
|
41
43
|
s.homepage = "http://audy.github.com/dna"
|
42
44
|
s.licenses = ["MIT"]
|
43
45
|
s.require_paths = ["lib"]
|
44
|
-
s.rubygems_version = "1.8.
|
46
|
+
s.rubygems_version = "1.8.23"
|
45
47
|
s.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
|
46
48
|
|
47
49
|
if s.respond_to? :specification_version then
|
48
50
|
s.specification_version = 3
|
49
51
|
|
50
52
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
51
|
-
s.add_development_dependency(%q<jeweler>, ["
|
53
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
52
54
|
else
|
53
|
-
s.add_dependency(%q<jeweler>, ["
|
55
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
54
56
|
end
|
55
57
|
else
|
56
|
-
s.add_dependency(%q<jeweler>, ["
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
data/lib/dna/dna.rb
CHANGED
data/lib/dna/phred.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#
|
2
|
+
# Phred - intended to be used as a mixin to add phred-score
|
3
|
+
# conversions to integers with byte-offset taken into account
|
4
|
+
# and also for conversion from byte to probability.
|
5
|
+
#
|
6
|
+
# http://en.wikipedia.org/wiki/FASTQ_format
|
7
|
+
#
|
8
|
+
module Phred
|
9
|
+
|
10
|
+
formats = {
|
11
|
+
:illumina => { :offset => 33, :prob_func => Proc.new { |q| 10**(q/-10.0) } },
|
12
|
+
:sanger => { :offset => 33, :prob_func => Proc.new { |q| 10**(q/-10.0) } },
|
13
|
+
:solexa => { :offset => 64, :prob_func => Proc.new { |q| (10**(q/-10.0))/(1 + (10**(q/-10.0))) } },
|
14
|
+
}
|
15
|
+
|
16
|
+
#
|
17
|
+
# use metaprogramming to create methods for converting
|
18
|
+
# raw ascii quality string to quality socres and probabilities
|
19
|
+
#
|
20
|
+
# Do this without using method_missing so that the methods
|
21
|
+
# are visable.
|
22
|
+
#
|
23
|
+
formats.each do |type, props|
|
24
|
+
offset = props[:offset]
|
25
|
+
prob_func = props[:prob_func]
|
26
|
+
|
27
|
+
define_method "#{type}_scores" do
|
28
|
+
qualities_with_offset(offset)
|
29
|
+
end
|
30
|
+
|
31
|
+
define_method "#{type}_probabilities" do
|
32
|
+
probabilities_with_offset(offset, prob_func)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
extend self
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def qualities_with_offset(offset)
|
41
|
+
@qualities.each_byte.map { |q| q - offset }
|
42
|
+
end
|
43
|
+
|
44
|
+
def probabilities_with_offset(offset, func)
|
45
|
+
qualities_with_offset(offset).map do |q|
|
46
|
+
func.call(q)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
data/lib/dna/record.rb
CHANGED
data/lib/dna/version.rb
CHANGED
data/spec/phred_spec.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
class TestRecord
|
2
|
+
include Phred
|
3
|
+
attr_accessor :qualities
|
4
|
+
end
|
5
|
+
|
6
|
+
##
|
7
|
+
# Definition of different FASTQ formats for Phred scores
|
8
|
+
# as defined in the wikipedia article for the FASTQ format.
|
9
|
+
#
|
10
|
+
# Each format definition needs a :string, a :scores and a :probabilities
|
11
|
+
# Conversion between these properties is automatically tested by the
|
12
|
+
# test suite.
|
13
|
+
#
|
14
|
+
## Solexa and Illumina <= 1.3 (we just call it Solexa here)
|
15
|
+
#
|
16
|
+
# - 0-93 with ascii offset 33
|
17
|
+
# - Probability of error:
|
18
|
+
# `Q = -10log10(P/1-P)`
|
19
|
+
# Solve for P yourself :P
|
20
|
+
#
|
21
|
+
## Sanger
|
22
|
+
#
|
23
|
+
# - 0-62 with ascii ofset 64
|
24
|
+
# - Probability of error given by p = 10**(Q/10)
|
25
|
+
# - Quality = -10log10(P)
|
26
|
+
#
|
27
|
+
## Illumina 1.3+
|
28
|
+
#
|
29
|
+
# Illumina-formatted quality scores are the same as sanger except:
|
30
|
+
# - 0 and 1 are not used
|
31
|
+
# - 2 is reserved for B which just means 'really bad, do not use'
|
32
|
+
#
|
33
|
+
## 454
|
34
|
+
#
|
35
|
+
# - No one uses 454.
|
36
|
+
#
|
37
|
+
#
|
38
|
+
format_definitions = {
|
39
|
+
:solexa => {
|
40
|
+
:string => (0..62).map { |q| (q + 64).chr }.join,
|
41
|
+
:scores => (0..62).to_a,
|
42
|
+
:probabilities => (0..62).map { |q| (10**((q)/-10.0))/(1 + 10**(q/-10.0)) }
|
43
|
+
},
|
44
|
+
|
45
|
+
:illumina => {
|
46
|
+
:string => (3..93).map { |q| (q + 33).chr }.join + 'B'*4,
|
47
|
+
:scores => (3..93).to_a + [33, 33, 33, 33],
|
48
|
+
:probabilities => ((3..93).to_a + [33, 33, 33, 33]).map { |q| 10**((q)/-10.0) }
|
49
|
+
},
|
50
|
+
|
51
|
+
:sanger => {
|
52
|
+
:string => (0..93).map { |q| (q + 33).chr }.join,
|
53
|
+
:scores => (0..93).to_a,
|
54
|
+
:probabilities => (0..93).map { |q| 10**((q)/-10.0) }
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
describe Phred do
|
59
|
+
|
60
|
+
let (:test_record) { TestRecord.new }
|
61
|
+
|
62
|
+
format_definitions.each_pair do |type, properties|
|
63
|
+
|
64
|
+
it "should properly convert #{type} raw ascii PHRED scores to integers" do
|
65
|
+
test_record.qualities = properties[:string]
|
66
|
+
test_record.send(:"#{type}_scores").should == properties[:scores]
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should properly convert #{type.to_s} raw ascii PHRED scores to probabilities" do
|
70
|
+
test_record.qualities = properties[:string]
|
71
|
+
test_record.send(:"#{type}_probabilities").should == properties[:probabilities]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
metadata
CHANGED
@@ -1,45 +1,39 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 2
|
10
|
-
version: 0.1.2
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Austin G. Davis-Richardson
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
type: :development
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
-
none: false
|
24
|
-
requirements:
|
25
|
-
- - ">="
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
hash: 3
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
version: "0"
|
31
|
-
version_requirements: *id001
|
12
|
+
date: 2012-12-14 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
32
15
|
name: jeweler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.8.4
|
22
|
+
type: :development
|
33
23
|
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.8.4
|
34
30
|
description: Simple FASTA/FASTQ/QSEQ parser library for Ruby.
|
35
31
|
email: harekrishna@gmail.com
|
36
|
-
executables:
|
32
|
+
executables:
|
37
33
|
- dna
|
38
34
|
extensions: []
|
39
|
-
|
40
35
|
extra_rdoc_files: []
|
41
|
-
|
42
|
-
files:
|
36
|
+
files:
|
43
37
|
- .rspec
|
44
38
|
- .rvmrc
|
45
39
|
- .travis.yml
|
@@ -52,6 +46,7 @@ files:
|
|
52
46
|
- lib/dna/parsers/fasta.rb
|
53
47
|
- lib/dna/parsers/fastq.rb
|
54
48
|
- lib/dna/parsers/qseq.rb
|
49
|
+
- lib/dna/phred.rb
|
55
50
|
- lib/dna/record.rb
|
56
51
|
- lib/dna/version.rb
|
57
52
|
- readme.md
|
@@ -61,40 +56,35 @@ files:
|
|
61
56
|
- spec/data/test.fastq
|
62
57
|
- spec/data/test.qseq
|
63
58
|
- spec/dna_spec.rb
|
59
|
+
- spec/phred_spec.rb
|
64
60
|
- spec/record_spec.rb
|
65
61
|
- spec/spec_helper.rb
|
66
62
|
homepage: http://audy.github.com/dna
|
67
|
-
licenses:
|
63
|
+
licenses:
|
68
64
|
- MIT
|
69
65
|
post_install_message:
|
70
66
|
rdoc_options: []
|
71
|
-
|
72
|
-
require_paths:
|
67
|
+
require_paths:
|
73
68
|
- lib
|
74
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
70
|
none: false
|
76
|
-
requirements:
|
77
|
-
- -
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
|
80
|
-
segments:
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
segments:
|
81
76
|
- 0
|
82
|
-
|
83
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
hash: -74029819579817896
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
79
|
none: false
|
85
|
-
requirements:
|
86
|
-
- -
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
|
89
|
-
segments:
|
90
|
-
- 0
|
91
|
-
version: "0"
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
92
84
|
requirements: []
|
93
|
-
|
94
85
|
rubyforge_project:
|
95
|
-
rubygems_version: 1.8.
|
86
|
+
rubygems_version: 1.8.23
|
96
87
|
signing_key:
|
97
88
|
specification_version: 3
|
98
89
|
summary: Simple FASTA/FASTQ/QSEQ parser library for Ruby
|
99
90
|
test_files: []
|
100
|
-
|