parse_fasta 1.7.1 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YzcxODgzZjkxYThjOWEzODQwMDZkYzM5OTkyNmM1MjE2MzJmNzI3Mg==
4
+ NzY3MWVlYjVmYzM4NzhjZjIzNDUyYjFlNWZhMjMxMGE4NWQ3YTI4Yw==
5
5
  data.tar.gz: !binary |-
6
- MTYxNmVjOTZjMGI5ZWIxNDZjY2ZjZTVkNjc5NTZhZjNiZGQ3OTJjMQ==
6
+ OTVjMThlYzg0NzUwZTdmMzIyZmJlYjc5NjBiNDUzMmE2NjJjMGVkOA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- OWMxZDZhNTEyMjcxMzQ1NGZhYzNlMjM0MWQyMWQwN2NlOGRhYmExNGIyZDA0
10
- ZGEwZjAyM2IzZmNjMGM1OGQ5ZWNkZTMwZmJlN2ExMGU1ZTg3ZWY5Yjc5MDYz
11
- MDk3OGEwNDJkZTY3N2ZkYjVhMjg0ZWE4NjI5MWUyZTJiY2MxYzk=
9
+ ZTVkOTQ4MGMzMTQ4ZDg5MzczMDllMzgwNDY3Y2NmNzU4NDc5MzMwOGIwYzA3
10
+ Y2FjMzQ1YzA4NDhiMjY0OWMyYjMyMWFhOWM5MTI5OGFiMTJiMDFhYmJlNTI3
11
+ NjBkZDU5MzMzOTJlOGY0NzdiZWRiYWI5ZWE2YTBiZjJkYTk1ZTU=
12
12
  data.tar.gz: !binary |-
13
- YzllMTA1ZTYxNzViMjk5ZjAzYWQ2OTIwMjNiYjFkNmU1Mzg1NDY4YTNkMjk5
14
- ZTIxMDIzOGIzYWQwN2FhYWQ0YjhkNDE4NjU3OGMwNjg1ZjJjYTdjNmQ0OTA4
15
- NDJjNzU1NmU1ZjY0MGJmNDk3N2I1M2I3ZDk1ZDRjMDhlMTQ1NDQ=
13
+ MWJkZjY5ZWY5MmYyNTJjMjIyODE0ZmQ4YWU4OTZlMTY5MzIyNGNlZGJkNjU2
14
+ YjBjNmI5YTIyYTdmMGJhZmVlNTExZWJkN2U4OWRlMDE0NzM3MGI4ZGE4M2Rj
15
+ MzNjODU3YzYzMDFmZDg1MjU1YzQ2MDgxODIwNmQ3ZDJkZDU3MWQ=
data/README.md CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
27
27
  ## Documentation ##
28
28
 
29
29
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.0/frames)
30
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.2/frames)
31
31
  for the full api documentation.
32
32
 
33
33
  ## Usage ##
@@ -70,6 +70,16 @@ Read fasta file into a hash.
70
70
 
71
71
  Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
72
72
 
73
+ #### 1.7.2 ####
74
+
75
+ Strip spaces (not all whitespace) from `Sequence` and `Quality` strings.
76
+
77
+ Some alignment fastas have spaces for easier reading. Strip these
78
+ out. For consistency, also strips spaces from `Quality` strings. If
79
+ there are spaces that don't match in the quality and sequence in a
80
+ fastQ file, then things will get messed up in the FastQ file. FastQ
81
+ shouldn't have spaces though.
82
+
73
83
  ### 1.6 ###
74
84
 
75
85
  Added `SeqFile` class, which accepts either fastA or fastQ files. It
@@ -19,6 +19,17 @@
19
19
  # Provide some methods for dealing with common tasks regarding
20
20
  # quality strings.
21
21
  class Quality < String
22
+
23
+ # Strips whitespace from the str argument before calling super
24
+ #
25
+ # @return [Quality] A Quality string
26
+ #
27
+ # @example Removes whitespace
28
+ # Quality.new "I I 2 ! " #=> "II2!"
29
+ def initialize(str)
30
+ super(str.gsub(/ +/, ""))
31
+ end
32
+
22
33
  # Returns the mean quality for the record. This will be a good deal
23
34
  # faster than getting the average with `qual_scores` and reduce.
24
35
  #
@@ -20,6 +20,16 @@
20
20
  # nucleotide sequences.
21
21
  class Sequence < String
22
22
 
23
+ # Strips whitespace from the str argument before calling super
24
+ #
25
+ # @return [Sequence] A Sequence string
26
+ #
27
+ # @example Removes whitespace
28
+ # Sequence.new "AA CC TT" #=> "AACCTT"
29
+ def initialize(str)
30
+ super(str.gsub(/ +/, ""))
31
+ end
32
+
23
33
  # Calculates GC content
24
34
  #
25
35
  # Calculates GC content by dividing count of G + C divided by count
@@ -45,7 +55,7 @@ class Sequence < String
45
55
  t = s.count('t')
46
56
  a = s.count('a')
47
57
  u = s.count('u')
48
-
58
+
49
59
  return 0 if c + g + t + a + u == 0
50
60
  return (c + g) / (c + g + t + a + u).to_f
51
61
  end
@@ -87,9 +97,9 @@ class Sequence < String
87
97
  warn('ERROR: A sequence contains both T and U')
88
98
  counts[:t], counts[:u] = t, u
89
99
  end
90
-
100
+
91
101
  counts[:n] = s.count('n') if count_ambiguous_bases
92
-
102
+
93
103
  counts
94
104
  end
95
105
 
@@ -116,7 +126,7 @@ class Sequence < String
116
126
  def base_frequencies(count_ambiguous_bases=nil)
117
127
  base_counts = self.base_counts(count_ambiguous_bases)
118
128
  total_bases = base_counts.values.reduce(:+).to_f
119
- base_freqs =
129
+ base_freqs =
120
130
  base_counts.map { |base, count| [base, count/total_bases] }.flatten
121
131
  Hash[*base_freqs]
122
132
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.7.1"
20
+ VERSION = "1.7.2"
21
21
  end
@@ -21,10 +21,18 @@ require 'bio'
21
21
 
22
22
  describe Quality do
23
23
  let(:qual_string) { qual_string = Quality.new('ab%63:K') }
24
- let(:bioruby_qual_scores) do
24
+ let(:bioruby_qual_scores) do
25
25
  Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
26
26
  end
27
27
 
28
+ describe "::new" do
29
+ it "removes any spaces in the quality string" do
30
+ q = " ab # :m, ! "
31
+ q_no_spaces = "ab#:m,!"
32
+ expect(Quality.new(q)).to eq q_no_spaces
33
+ end
34
+ end
35
+
28
36
  describe "#qual_scores" do
29
37
  context "with illumina style quality scores" do
30
38
  it "returns an array of quality scores" do
@@ -39,5 +47,5 @@ describe Quality do
39
47
  mean_quality = qual_string.qual_scores.reduce(:+) / len
40
48
  expect(qual_string.mean_qual).to eq mean_quality
41
49
  end
42
- end
50
+ end
43
51
  end
@@ -25,6 +25,14 @@ describe Sequence do
25
25
  expect(Sequence.new('ACTG')).to be_a String
26
26
  end
27
27
 
28
+ describe "::new" do
29
+ it "removes any spaces in the sequence" do
30
+ s = "ACT ACT ACT GCT "
31
+ s_no_spaces = "ACTACTACTGCT"
32
+ expect(Sequence.new(s)).to eq s_no_spaces
33
+ end
34
+ end
35
+
28
36
  describe "#gc" do
29
37
  it "gives the same answer as BioRuby" do
30
38
  s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
@@ -75,7 +83,7 @@ describe Sequence do
75
83
  expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
76
84
  end
77
85
  end
78
-
86
+
79
87
  context "for an RNA sequence with truthy argument" do
80
88
  it "returns a map of A, C, U, G and N counts" do
81
89
  s = Sequence.new('ACUGNacugn')
data/test_files/test.fa CHANGED
@@ -1,6 +1,6 @@
1
1
  > empty seq at beginning
2
2
  >seq1 is fun
3
- AACTGGNNN
3
+ AAC TGG NN N
4
4
 
5
5
 
6
6
  >seq2
@@ -15,4 +15,4 @@ yyyyyyyyyy
15
15
 
16
16
  yyyyy
17
17
  NNN
18
- >empty seq at end
18
+ >empty seq at end
Binary file
data/test_files/test.fq CHANGED
@@ -1,8 +1,8 @@
1
1
  @seq1
2
- AACCTTGG
2
+ AA CC TT GG
3
3
  +
4
- )#3gTqN8
4
+ )# 3g Tq N8
5
5
  @seq2 apples
6
6
  ACTG
7
7
  +seq2 apples
8
- *ujM
8
+ *ujM
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.1
4
+ version: 1.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-14 00:00:00.000000000 Z
11
+ date: 2015-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler