parse_fasta 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae03847d56379d572d6118a176876a1e11a21a49
4
- data.tar.gz: ffb84a464f8f7057f3a363b2b872f4b60fcca9c2
3
+ metadata.gz: 689bddd57bc87d882ad8e8711acc84f0f1c33825
4
+ data.tar.gz: 85d056f1581a23cc25c1a60224fd2c177266ab23
5
5
  SHA512:
6
- metadata.gz: 0b6b3694de307b868df3b1d5b38b08e539e545827d8d5f47a8b41a550f60d6354164b0e563b6eacf39ab63bd844c2ab68578fc0bfbc5fd5a1f3a63b31a09cfc9
7
- data.tar.gz: 719f6fb5c112b06ecf969662617de3bdc02156a09fcb542ff0a9553de55f2d7d6400fc2d6384be27c780517b1ab95887991a8a22c9d05ab3070e7d0e2b05962f
6
+ metadata.gz: f68c1541107d1881177606398c63d09b7da2b04d7e0f30b3165ebd51cc7310caddbcba7788f72db71f21c778c5c1f022382a17e4b450cd172946534665f360a1
7
+ data.tar.gz: eb8a9bf47345d0ee4b164ef053da5ddb4cb6785d0c0a91424b2e4549d9231d2f55ab79f9355e0df8f924d51be7be738a95f5887b6333aa99c4a5a945941f34f3
data/README.md CHANGED
@@ -29,7 +29,7 @@ and over.
29
29
  ## Documentation ##
30
30
 
31
31
  Checkout
32
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.3.0/frames)
32
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.4.0/frames)
33
33
  to see the full documentation.
34
34
 
35
35
  ## Usage ##
@@ -58,6 +58,13 @@ Now we can parse fastq files as well!
58
58
 
59
59
  ## Versions ##
60
60
 
61
+ ### 1.4.0 ###
62
+
63
+ Added methods:
64
+
65
+ Sequence.base_counts
66
+ Sequence.base_frequencies
67
+
61
68
  ### 1.3.0 ###
62
69
 
63
70
  Add additional functionality to `each_record` method.
@@ -25,7 +25,8 @@ class Sequence < String
25
25
  # Calculates GC content by dividing count of G + C divided by count
26
26
  # of G + C + T + A + U. If there are both T's and U's in the
27
27
  # Sequence, things will get weird, but then again, that wouldn't
28
- # happen, now would it!
28
+ # happen, now would it! Ambiguous bases are ignored similar to
29
+ # BioRuby.
29
30
  #
30
31
  # @example Get GC of a Sequence
31
32
  # Sequence.new('ACTg').gc #=> 0.5
@@ -46,7 +47,77 @@ class Sequence < String
46
47
  u = s.count('u')
47
48
 
48
49
  return 0 if c + g + t + a + u == 0
49
- return (c + g).quo(c + g + t + a + u).to_f
50
+ return (c + g) / (c + g + t + a + u).to_f
50
51
  end
51
52
 
53
+ # Returns a map of base counts
54
+ #
55
+ # This method will check if the sequence is DNA or RNA and return a
56
+ # count map appropriate for each. If a truthy argument is given, the
57
+ # count of ambiguous bases will be returned as well.
58
+ #
59
+ # If a sequence has both T and U present, will warn the user and
60
+ # keep going. Will return a map with counts of both, however.
61
+ #
62
+ # @example Get base counts of DNA sequence without ambiguous bases
63
+ # Sequence.new('AcTGn').base_counts
64
+ # #=> { a: 1, c: 1, t: 1, g: 1 }
65
+ # @example Get base counts of DNA sequence with ambiguous bases
66
+ # Sequence.new('AcTGn').base_counts(true)
67
+ # #=> { a: 1, c: 1, t: 1, g: 1, n: 1 }
68
+ # @example Get base counts of RNA sequence without ambiguous bases
69
+ # Sequence.new('AcUGn').base_counts
70
+ # #=> { a: 1, c: 1, u: 1, g: 1 }
71
+ # @example Get base counts of DNA sequence with ambiguous bases
72
+ # Sequence.new('AcUGn').base_counts(true)
73
+ # #=> { a: 1, c: 1, u: 1, g: 1, n: 1 }
74
+ #
75
+ # @return [Hash] A hash with base as key, count as value
76
+ def base_counts(count_ambiguous_bases=nil)
77
+ s = self.downcase
78
+ t = s.count('t')
79
+ u = s.count('u')
80
+ counts = { a: s.count('a'), c: s.count('c'), g: s.count('g') }
81
+
82
+ if t > 0 && u == 0
83
+ counts[:t] = t
84
+ elsif t == 0 && u > 0
85
+ counts[:u] = u
86
+ elsif t > 0 && u > 0
87
+ warn('ERROR: A sequence contains both T and U')
88
+ counts[:t], counts[:u] = t, u
89
+ end
90
+
91
+ counts[:n] = s.count('n') if count_ambiguous_bases
92
+
93
+ counts
94
+ end
95
+
96
+ # Returns a map of base frequencies
97
+ #
98
+ # Counts bases with the `base_counts` method, then divides each
99
+ # count by the total bases counted to give frequency for each
100
+ # base. If a truthy argument is given, ambiguous bases will be
101
+ # included in the total and their frequency reported. Can discern
102
+ # between DNA and RNA.
103
+ #
104
+ # If default or falsy argument is given, ambiguous bases will not be
105
+ # counted in the total base count and their frequency will not be
106
+ # given.
107
+ #
108
+ # @example Get base frequencies of DNA sequence without ambiguous bases
109
+ # Sequence.new('AcTGn').base_counts
110
+ # #=> { a: 0.25, c: 0.25, t: 0.25, g: 0.25 }
111
+ # @example Get base counts of DNA sequence with ambiguous bases
112
+ # Sequence.new('AcTGn').base_counts(true)
113
+ # #=> { a: 0.2, c: 0.2, t: 0.2, g: 0.2, n: 0.2 }
114
+ #
115
+ # @return [Hash] A hash with base as key, frequency as value
116
+ def base_frequencies(count_ambiguous_bases=nil)
117
+ base_counts = self.base_counts(count_ambiguous_bases)
118
+ total_bases = base_counts.values.reduce(:+).to_f
119
+ base_freqs =
120
+ base_counts.map { |base, count| [base, count/total_bases] }.flatten
121
+ Hash[*base_freqs]
122
+ end
52
123
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.3.0"
20
+ VERSION = "1.4.0"
21
21
  end
data/parse_fasta.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Ryan Moore"]
10
10
  spec.email = ["moorer@udel.edu"]
11
11
  spec.summary = %q{Easy-peasy parsing of fasta files}
12
- spec.description = %q{So you want to parse a fasta file...}
12
+ spec.description = %q{So you want to parse a fasta or fastq file...}
13
13
  spec.homepage = "https://github.com/mooreryan/parse_fasta"
14
14
  spec.license = "GPLv3: http://www.gnu.org/licenses/gpl.txt"
15
15
 
@@ -53,4 +53,71 @@ describe Sequence do
53
53
  end
54
54
  end
55
55
  end
56
+
57
+ describe "#base_counts" do
58
+ context "for a DNA sequence with default or falsy argument" do
59
+ it "returns a map of A, C, T, and G counts" do
60
+ s = Sequence.new('ACTGactg')
61
+ expect(s.base_counts).to eq({ a: 2, c: 2, t: 2, g: 2 })
62
+ end
63
+ end
64
+
65
+ context "for a DNA sequence with truthy argument" do
66
+ it "returns a map of A, C, T, G and N counts" do
67
+ s = Sequence.new('ACTGNactgn')
68
+ expect(s.base_counts(1)).to eq({ a: 2, c: 2, t: 2, g: 2, n: 2 })
69
+ end
70
+ end
71
+
72
+ context "for an RNA sequence with falsy or default argument" do
73
+ it "returns a map of A, C, U, G counts" do
74
+ s = Sequence.new('ACUGacug')
75
+ expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
76
+ end
77
+ end
78
+
79
+ context "for an RNA sequence with truthy argument" do
80
+ it "returns a map of A, C, U, G and N counts" do
81
+ s = Sequence.new('ACUGNacugn')
82
+ expect(s.base_counts(1)).to eq({ a: 2, c: 2, u: 2, g: 2, n: 2 })
83
+ end
84
+ end
85
+
86
+ context "for a sequence with both U and T present" do
87
+ s = Sequence.new('AaCcTtGgNnUu')
88
+ err_message = 'ERROR: A sequence contains both T and U'
89
+
90
+ it "warns the user about having both U and T present" do
91
+ expect(s).to receive(:warn).with(err_message)
92
+ s.base_counts
93
+ end
94
+
95
+ it "returns a map that counts both U's and T's" do
96
+ expect(s.base_counts).to eq({ a: 2, c: 2, t: 2, u: 2, g: 2 })
97
+ end
98
+
99
+ it "returns a map with T, U and N if truthy argument given" do
100
+ base_counts = { a: 2, c: 2, t: 2, u: 2, g: 2, n: 2 }
101
+ expect(s.base_counts(1)).to eq(base_counts)
102
+ end
103
+ end
104
+ end
105
+
106
+ describe "#base_frequencies" do
107
+ context "with falsy argument" do
108
+ it "doesn't count ambiguous bases in total bases" do
109
+ s = Sequence.new('ACTTn')
110
+ base_freqs = { a: 0.25, c: 0.25, t: 0.5, g: 0.0 }
111
+ expect(s.base_frequencies).to eq(base_freqs)
112
+ end
113
+ end
114
+
115
+ context "when counting ambiguous bases" do
116
+ it "does count ambiguous bases in total bases" do
117
+ s = Sequence.new('ACTTn')
118
+ base_freqs = { a: 0.2, c: 0.2, t: 0.4, g: 0.0, n: 0.2 }
119
+ expect(s.base_frequencies(1)).to eq(base_freqs)
120
+ end
121
+ end
122
+ end
56
123
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-24 00:00:00.000000000 Z
11
+ date: 2014-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,7 +80,7 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.8'
83
- description: So you want to parse a fasta file...
83
+ description: So you want to parse a fasta or fastq file...
84
84
  email:
85
85
  - moorer@udel.edu
86
86
  executables: []