dna 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.rspec CHANGED
@@ -1 +1 @@
1
- --color
1
+ --color -f d
data/Gemfile CHANGED
@@ -1,9 +1,9 @@
1
1
  source :rubygems
2
2
 
3
3
  group :test do
4
- gem 'rspec'
4
+ gem 'rspec', '~> 2.11.0'
5
5
  end
6
6
 
7
7
  group :development do
8
- gem 'jeweler'
8
+ gem 'jeweler', '~> 1.8.4'
9
9
  end
data/dna.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "dna"
8
- s.version = "0.1.2"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-17"
12
+ s.date = "2012-12-14"
13
13
  s.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["dna"]
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
26
26
  "lib/dna/parsers/fasta.rb",
27
27
  "lib/dna/parsers/fastq.rb",
28
28
  "lib/dna/parsers/qseq.rb",
29
+ "lib/dna/phred.rb",
29
30
  "lib/dna/record.rb",
30
31
  "lib/dna/version.rb",
31
32
  "readme.md",
@@ -35,25 +36,26 @@ Gem::Specification.new do |s|
35
36
  "spec/data/test.fastq",
36
37
  "spec/data/test.qseq",
37
38
  "spec/dna_spec.rb",
39
+ "spec/phred_spec.rb",
38
40
  "spec/record_spec.rb",
39
41
  "spec/spec_helper.rb"
40
42
  ]
41
43
  s.homepage = "http://audy.github.com/dna"
42
44
  s.licenses = ["MIT"]
43
45
  s.require_paths = ["lib"]
44
- s.rubygems_version = "1.8.24"
46
+ s.rubygems_version = "1.8.23"
45
47
  s.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
46
48
 
47
49
  if s.respond_to? :specification_version then
48
50
  s.specification_version = 3
49
51
 
50
52
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
51
- s.add_development_dependency(%q<jeweler>, [">= 0"])
53
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
52
54
  else
53
- s.add_dependency(%q<jeweler>, [">= 0"])
55
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
54
56
  end
55
57
  else
56
- s.add_dependency(%q<jeweler>, [">= 0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
57
59
  end
58
60
  end
59
61
 
data/lib/dna/dna.rb CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  ##
3
2
  # Dna
4
3
  #
data/lib/dna/phred.rb ADDED
@@ -0,0 +1,50 @@
1
+ #
2
+ # Phred - intended to be used as a mixin to add phred-score
3
+ # conversions to integers with byte-offset taken into account
4
+ # and also for conversion from byte to probability.
5
+ #
6
+ # http://en.wikipedia.org/wiki/FASTQ_format
7
+ #
8
+ module Phred
9
+
10
+ formats = {
11
+ :illumina => { :offset => 33, :prob_func => Proc.new { |q| 10**(q/-10.0) } },
12
+ :sanger => { :offset => 33, :prob_func => Proc.new { |q| 10**(q/-10.0) } },
13
+ :solexa => { :offset => 64, :prob_func => Proc.new { |q| (10**(q/-10.0))/(1 + (10**(q/-10.0))) } },
14
+ }
15
+
16
+ #
17
+ # use metaprogramming to create methods for converting
18
+ # raw ascii quality string to quality socres and probabilities
19
+ #
20
+ # Do this without using method_missing so that the methods
21
+ # are visable.
22
+ #
23
+ formats.each do |type, props|
24
+ offset = props[:offset]
25
+ prob_func = props[:prob_func]
26
+
27
+ define_method "#{type}_scores" do
28
+ qualities_with_offset(offset)
29
+ end
30
+
31
+ define_method "#{type}_probabilities" do
32
+ probabilities_with_offset(offset, prob_func)
33
+ end
34
+ end
35
+
36
+ extend self
37
+
38
+ private
39
+
40
+ def qualities_with_offset(offset)
41
+ @qualities.each_byte.map { |q| q - offset }
42
+ end
43
+
44
+ def probabilities_with_offset(offset, func)
45
+ qualities_with_offset(offset).map do |q|
46
+ func.call(q)
47
+ end
48
+ end
49
+
50
+ end
data/lib/dna/record.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  class Record # nucleotide record
2
+ include Phred
3
+
2
4
  def length
3
5
  @sequence.length
4
6
  end
data/lib/dna/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  class Dna
2
2
  class Version
3
3
  MAJOR = 0
4
- MINOR = 1
5
- PATCH = 2
4
+ MINOR = 2
5
+ PATCH = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
@@ -0,0 +1,75 @@
1
+ class TestRecord
2
+ include Phred
3
+ attr_accessor :qualities
4
+ end
5
+
6
+ ##
7
+ # Definition of different FASTQ formats for Phred scores
8
+ # as defined in the wikipedia article for the FASTQ format.
9
+ #
10
+ # Each format definition needs a :string, a :scores and a :probabilities
11
+ # Conversion between these properties is automatically tested by the
12
+ # test suite.
13
+ #
14
+ ## Solexa and Illumina <= 1.3 (we just call it Solexa here)
15
+ #
16
+ # - 0-93 with ascii offset 33
17
+ # - Probability of error:
18
+ # `Q = -10log10(P/1-P)`
19
+ # Solve for P yourself :P
20
+ #
21
+ ## Sanger
22
+ #
23
+ # - 0-62 with ascii ofset 64
24
+ # - Probability of error given by p = 10**(Q/10)
25
+ # - Quality = -10log10(P)
26
+ #
27
+ ## Illumina 1.3+
28
+ #
29
+ # Illumina-formatted quality scores are the same as sanger except:
30
+ # - 0 and 1 are not used
31
+ # - 2 is reserved for B which just means 'really bad, do not use'
32
+ #
33
+ ## 454
34
+ #
35
+ # - No one uses 454.
36
+ #
37
+ #
38
+ format_definitions = {
39
+ :solexa => {
40
+ :string => (0..62).map { |q| (q + 64).chr }.join,
41
+ :scores => (0..62).to_a,
42
+ :probabilities => (0..62).map { |q| (10**((q)/-10.0))/(1 + 10**(q/-10.0)) }
43
+ },
44
+
45
+ :illumina => {
46
+ :string => (3..93).map { |q| (q + 33).chr }.join + 'B'*4,
47
+ :scores => (3..93).to_a + [33, 33, 33, 33],
48
+ :probabilities => ((3..93).to_a + [33, 33, 33, 33]).map { |q| 10**((q)/-10.0) }
49
+ },
50
+
51
+ :sanger => {
52
+ :string => (0..93).map { |q| (q + 33).chr }.join,
53
+ :scores => (0..93).to_a,
54
+ :probabilities => (0..93).map { |q| 10**((q)/-10.0) }
55
+ }
56
+ }
57
+
58
+ describe Phred do
59
+
60
+ let (:test_record) { TestRecord.new }
61
+
62
+ format_definitions.each_pair do |type, properties|
63
+
64
+ it "should properly convert #{type} raw ascii PHRED scores to integers" do
65
+ test_record.qualities = properties[:string]
66
+ test_record.send(:"#{type}_scores").should == properties[:scores]
67
+ end
68
+
69
+ it "should properly convert #{type.to_s} raw ascii PHRED scores to probabilities" do
70
+ test_record.qualities = properties[:string]
71
+ test_record.send(:"#{type}_probabilities").should == properties[:probabilities]
72
+ end
73
+ end
74
+
75
+ end
metadata CHANGED
@@ -1,45 +1,39 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: dna
3
- version: !ruby/object:Gem::Version
4
- hash: 31
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 2
10
- version: 0.1.2
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Austin G. Davis-Richardson
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-08-17 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- type: :development
22
- requirement: &id001 !ruby/object:Gem::Requirement
23
- none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- hash: 3
28
- segments:
29
- - 0
30
- version: "0"
31
- version_requirements: *id001
12
+ date: 2012-12-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
32
15
  name: jeweler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.8.4
22
+ type: :development
33
23
  prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.8.4
34
30
  description: Simple FASTA/FASTQ/QSEQ parser library for Ruby.
35
31
  email: harekrishna@gmail.com
36
- executables:
32
+ executables:
37
33
  - dna
38
34
  extensions: []
39
-
40
35
  extra_rdoc_files: []
41
-
42
- files:
36
+ files:
43
37
  - .rspec
44
38
  - .rvmrc
45
39
  - .travis.yml
@@ -52,6 +46,7 @@ files:
52
46
  - lib/dna/parsers/fasta.rb
53
47
  - lib/dna/parsers/fastq.rb
54
48
  - lib/dna/parsers/qseq.rb
49
+ - lib/dna/phred.rb
55
50
  - lib/dna/record.rb
56
51
  - lib/dna/version.rb
57
52
  - readme.md
@@ -61,40 +56,35 @@ files:
61
56
  - spec/data/test.fastq
62
57
  - spec/data/test.qseq
63
58
  - spec/dna_spec.rb
59
+ - spec/phred_spec.rb
64
60
  - spec/record_spec.rb
65
61
  - spec/spec_helper.rb
66
62
  homepage: http://audy.github.com/dna
67
- licenses:
63
+ licenses:
68
64
  - MIT
69
65
  post_install_message:
70
66
  rdoc_options: []
71
-
72
- require_paths:
67
+ require_paths:
73
68
  - lib
74
- required_ruby_version: !ruby/object:Gem::Requirement
69
+ required_ruby_version: !ruby/object:Gem::Requirement
75
70
  none: false
76
- requirements:
77
- - - ">="
78
- - !ruby/object:Gem::Version
79
- hash: 3
80
- segments:
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ segments:
81
76
  - 0
82
- version: "0"
83
- required_rubygems_version: !ruby/object:Gem::Requirement
77
+ hash: -74029819579817896
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
79
  none: false
85
- requirements:
86
- - - ">="
87
- - !ruby/object:Gem::Version
88
- hash: 3
89
- segments:
90
- - 0
91
- version: "0"
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
92
84
  requirements: []
93
-
94
85
  rubyforge_project:
95
- rubygems_version: 1.8.24
86
+ rubygems_version: 1.8.23
96
87
  signing_key:
97
88
  specification_version: 3
98
89
  summary: Simple FASTA/FASTQ/QSEQ parser library for Ruby
99
90
  test_files: []
100
-