parse_fasta 1.7.1 → 1.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YzcxODgzZjkxYThjOWEzODQwMDZkYzM5OTkyNmM1MjE2MzJmNzI3Mg==
4
+ NzY3MWVlYjVmYzM4NzhjZjIzNDUyYjFlNWZhMjMxMGE4NWQ3YTI4Yw==
5
5
  data.tar.gz: !binary |-
6
- MTYxNmVjOTZjMGI5ZWIxNDZjY2ZjZTVkNjc5NTZhZjNiZGQ3OTJjMQ==
6
+ OTVjMThlYzg0NzUwZTdmMzIyZmJlYjc5NjBiNDUzMmE2NjJjMGVkOA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- OWMxZDZhNTEyMjcxMzQ1NGZhYzNlMjM0MWQyMWQwN2NlOGRhYmExNGIyZDA0
10
- ZGEwZjAyM2IzZmNjMGM1OGQ5ZWNkZTMwZmJlN2ExMGU1ZTg3ZWY5Yjc5MDYz
11
- MDk3OGEwNDJkZTY3N2ZkYjVhMjg0ZWE4NjI5MWUyZTJiY2MxYzk=
9
+ ZTVkOTQ4MGMzMTQ4ZDg5MzczMDllMzgwNDY3Y2NmNzU4NDc5MzMwOGIwYzA3
10
+ Y2FjMzQ1YzA4NDhiMjY0OWMyYjMyMWFhOWM5MTI5OGFiMTJiMDFhYmJlNTI3
11
+ NjBkZDU5MzMzOTJlOGY0NzdiZWRiYWI5ZWE2YTBiZjJkYTk1ZTU=
12
12
  data.tar.gz: !binary |-
13
- YzllMTA1ZTYxNzViMjk5ZjAzYWQ2OTIwMjNiYjFkNmU1Mzg1NDY4YTNkMjk5
14
- ZTIxMDIzOGIzYWQwN2FhYWQ0YjhkNDE4NjU3OGMwNjg1ZjJjYTdjNmQ0OTA4
15
- NDJjNzU1NmU1ZjY0MGJmNDk3N2I1M2I3ZDk1ZDRjMDhlMTQ1NDQ=
13
+ MWJkZjY5ZWY5MmYyNTJjMjIyODE0ZmQ4YWU4OTZlMTY5MzIyNGNlZGJkNjU2
14
+ YjBjNmI5YTIyYTdmMGJhZmVlNTExZWJkN2U4OWRlMDE0NzM3MGI4ZGE4M2Rj
15
+ MzNjODU3YzYzMDFmZDg1MjU1YzQ2MDgxODIwNmQ3ZDJkZDU3MWQ=
data/README.md CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
27
27
  ## Documentation ##
28
28
 
29
29
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.0/frames)
30
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.2/frames)
31
31
  for the full api documentation.
32
32
 
33
33
  ## Usage ##
@@ -70,6 +70,16 @@ Read fasta file into a hash.
70
70
 
71
71
  Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
72
72
 
73
+ #### 1.7.2 ####
74
+
75
+ Strip spaces (not all whitespace) from `Sequence` and `Quality` strings.
76
+
77
+ Some alignment fastas have spaces for easier reading. Strip these
78
+ out. For consistency, also strips spaces from `Quality` strings. If
79
+ there are spaces that don't match in the quality and sequence in a
80
+ fastQ file, then things will get messed up in the FastQ file. FastQ
81
+ shouldn't have spaces though.
82
+
73
83
  ### 1.6 ###
74
84
 
75
85
  Added `SeqFile` class, which accepts either fastA or fastQ files. It
@@ -19,6 +19,17 @@
19
19
  # Provide some methods for dealing with common tasks regarding
20
20
  # quality strings.
21
21
  class Quality < String
22
+
23
+ # Strips whitespace from the str argument before calling super
24
+ #
25
+ # @return [Quality] A Quality string
26
+ #
27
+ # @example Removes whitespace
28
+ # Quality.new "I I 2 ! " #=> "II2!"
29
+ def initialize(str)
30
+ super(str.gsub(/ +/, ""))
31
+ end
32
+
22
33
  # Returns the mean quality for the record. This will be a good deal
23
34
  # faster than getting the average with `qual_scores` and reduce.
24
35
  #
@@ -20,6 +20,16 @@
20
20
  # nucleotide sequences.
21
21
  class Sequence < String
22
22
 
23
+ # Strips whitespace from the str argument before calling super
24
+ #
25
+ # @return [Sequence] A Sequence string
26
+ #
27
+ # @example Removes whitespace
28
+ # Sequence.new "AA CC TT" #=> "AACCTT"
29
+ def initialize(str)
30
+ super(str.gsub(/ +/, ""))
31
+ end
32
+
23
33
  # Calculates GC content
24
34
  #
25
35
  # Calculates GC content by dividing count of G + C divided by count
@@ -45,7 +55,7 @@ class Sequence < String
45
55
  t = s.count('t')
46
56
  a = s.count('a')
47
57
  u = s.count('u')
48
-
58
+
49
59
  return 0 if c + g + t + a + u == 0
50
60
  return (c + g) / (c + g + t + a + u).to_f
51
61
  end
@@ -87,9 +97,9 @@ class Sequence < String
87
97
  warn('ERROR: A sequence contains both T and U')
88
98
  counts[:t], counts[:u] = t, u
89
99
  end
90
-
100
+
91
101
  counts[:n] = s.count('n') if count_ambiguous_bases
92
-
102
+
93
103
  counts
94
104
  end
95
105
 
@@ -116,7 +126,7 @@ class Sequence < String
116
126
  def base_frequencies(count_ambiguous_bases=nil)
117
127
  base_counts = self.base_counts(count_ambiguous_bases)
118
128
  total_bases = base_counts.values.reduce(:+).to_f
119
- base_freqs =
129
+ base_freqs =
120
130
  base_counts.map { |base, count| [base, count/total_bases] }.flatten
121
131
  Hash[*base_freqs]
122
132
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.7.1"
20
+ VERSION = "1.7.2"
21
21
  end
@@ -21,10 +21,18 @@ require 'bio'
21
21
 
22
22
  describe Quality do
23
23
  let(:qual_string) { qual_string = Quality.new('ab%63:K') }
24
- let(:bioruby_qual_scores) do
24
+ let(:bioruby_qual_scores) do
25
25
  Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
26
26
  end
27
27
 
28
+ describe "::new" do
29
+ it "removes any spaces in the quality string" do
30
+ q = " ab # :m, ! "
31
+ q_no_spaces = "ab#:m,!"
32
+ expect(Quality.new(q)).to eq q_no_spaces
33
+ end
34
+ end
35
+
28
36
  describe "#qual_scores" do
29
37
  context "with illumina style quality scores" do
30
38
  it "returns an array of quality scores" do
@@ -39,5 +47,5 @@ describe Quality do
39
47
  mean_quality = qual_string.qual_scores.reduce(:+) / len
40
48
  expect(qual_string.mean_qual).to eq mean_quality
41
49
  end
42
- end
50
+ end
43
51
  end
@@ -25,6 +25,14 @@ describe Sequence do
25
25
  expect(Sequence.new('ACTG')).to be_a String
26
26
  end
27
27
 
28
+ describe "::new" do
29
+ it "removes any spaces in the sequence" do
30
+ s = "ACT ACT ACT GCT "
31
+ s_no_spaces = "ACTACTACTGCT"
32
+ expect(Sequence.new(s)).to eq s_no_spaces
33
+ end
34
+ end
35
+
28
36
  describe "#gc" do
29
37
  it "gives the same answer as BioRuby" do
30
38
  s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
@@ -75,7 +83,7 @@ describe Sequence do
75
83
  expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
76
84
  end
77
85
  end
78
-
86
+
79
87
  context "for an RNA sequence with truthy argument" do
80
88
  it "returns a map of A, C, U, G and N counts" do
81
89
  s = Sequence.new('ACUGNacugn')
data/test_files/test.fa CHANGED
@@ -1,6 +1,6 @@
1
1
  > empty seq at beginning
2
2
  >seq1 is fun
3
- AACTGGNNN
3
+ AAC TGG NN N
4
4
 
5
5
 
6
6
  >seq2
@@ -15,4 +15,4 @@ yyyyyyyyyy
15
15
 
16
16
  yyyyy
17
17
  NNN
18
- >empty seq at end
18
+ >empty seq at end
Binary file
data/test_files/test.fq CHANGED
@@ -1,8 +1,8 @@
1
1
  @seq1
2
- AACCTTGG
2
+ AA CC TT GG
3
3
  +
4
- )#3gTqN8
4
+ )# 3g Tq N8
5
5
  @seq2 apples
6
6
  ACTG
7
7
  +seq2 apples
8
- *ujM
8
+ *ujM
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.1
4
+ version: 1.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-14 00:00:00.000000000 Z
11
+ date: 2015-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler