RubyGems - parse_fasta - Versions diffs - 1.7.1 → 1.7.2 - Mend

parse_fasta 1.7.1 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    YzcxODgzZjkxYThjOWEzODQwMDZkYzM5OTkyNmM1MjE2MzJmNzI3Mg==
+    NzY3MWVlYjVmYzM4NzhjZjIzNDUyYjFlNWZhMjMxMGE4NWQ3YTI4Yw==
   data.tar.gz: !binary |-
-    MTYxNmVjOTZjMGI5ZWIxNDZjY2ZjZTVkNjc5NTZhZjNiZGQ3OTJjMQ==
+    OTVjMThlYzg0NzUwZTdmMzIyZmJlYjc5NjBiNDUzMmE2NjJjMGVkOA==
 SHA512:
   metadata.gz: !binary |-
-    OWMxZDZhNTEyMjcxMzQ1NGZhYzNlMjM0MWQyMWQwN2NlOGRhYmExNGIyZDA0
-    ZGEwZjAyM2IzZmNjMGM1OGQ5ZWNkZTMwZmJlN2ExMGU1ZTg3ZWY5Yjc5MDYz
-    MDk3OGEwNDJkZTY3N2ZkYjVhMjg0ZWE4NjI5MWUyZTJiY2MxYzk=
+    ZTVkOTQ4MGMzMTQ4ZDg5MzczMDllMzgwNDY3Y2NmNzU4NDc5MzMwOGIwYzA3
+    Y2FjMzQ1YzA4NDhiMjY0OWMyYjMyMWFhOWM5MTI5OGFiMTJiMDFhYmJlNTI3
+    NjBkZDU5MzMzOTJlOGY0NzdiZWRiYWI5ZWE2YTBiZjJkYTk1ZTU=
   data.tar.gz: !binary |-
-    YzllMTA1ZTYxNzViMjk5ZjAzYWQ2OTIwMjNiYjFkNmU1Mzg1NDY4YTNkMjk5
-    ZTIxMDIzOGIzYWQwN2FhYWQ0YjhkNDE4NjU3OGMwNjg1ZjJjYTdjNmQ0OTA4
-    NDJjNzU1NmU1ZjY0MGJmNDk3N2I1M2I3ZDk1ZDRjMDhlMTQ1NDQ=
+    MWJkZjY5ZWY5MmYyNTJjMjIyODE0ZmQ4YWU4OTZlMTY5MzIyNGNlZGJkNjU2
+    YjBjNmI5YTIyYTdmMGJhZmVlNTExZWJkN2U4OWRlMDE0NzM3MGI4ZGE4M2Rj
+    MzNjODU3YzYzMDFmZDg1MjU1YzQ2MDgxODIwNmQ3ZDJkZDU3MWQ=

data/README.md CHANGED Viewed

@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
 ## Documentation ##
 Checkout
-[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.0/frames)
+[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.2/frames)
 for the full api documentation.
 ## Usage ##
@@ -70,6 +70,16 @@ Read fasta file into a hash.
 Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
+#### 1.7.2 ####
+Strip spaces (not all whitespace) from `Sequence` and `Quality` strings.
+Some alignment fastas have spaces for easier reading. Strip these
+out. For consistency, also strips spaces from `Quality` strings. If
+there are spaces that don't match in the quality and sequence in a
+fastQ file, then things will get messed up in the FastQ file. FastQ
+shouldn't have spaces though.
 ### 1.6 ###
 Added `SeqFile` class, which accepts either fastA or fastQ files. It

data/lib/parse_fasta/quality.rb CHANGED Viewed

@@ -19,6 +19,17 @@
 # Provide some methods for dealing with common tasks regarding
 # quality strings.
 class Quality < String
+  # Strips whitespace from the str argument before calling super
+  #
+  # @return [Quality] A Quality string
+  #
+  # @example Removes whitespace
+  #   Quality.new "I I 2 ! " #=> "II2!"
+  def initialize(str)
+    super(str.gsub(/ +/, ""))
+  end
   # Returns the mean quality for the record. This will be a good deal
   # faster than getting the average with `qual_scores` and reduce.
   #

data/lib/parse_fasta/sequence.rb CHANGED Viewed

@@ -20,6 +20,16 @@
 # nucleotide sequences.
 class Sequence < String
+  # Strips whitespace from the str argument before calling super
+  #
+  # @return [Sequence] A Sequence string
+  #
+  # @example Removes whitespace
+  #   Sequence.new "AA CC TT" #=> "AACCTT"
+  def initialize(str)
+    super(str.gsub(/ +/, ""))
+  end
   # Calculates GC content
   #
   # Calculates GC content by dividing count of G + C divided by count
@@ -45,7 +55,7 @@ class Sequence < String
     t = s.count('t')
     a = s.count('a')
     u = s.count('u')
     return 0 if c + g + t + a + u == 0
     return (c + g) / (c + g + t + a + u).to_f
   end
@@ -87,9 +97,9 @@ class Sequence < String
       warn('ERROR: A sequence contains both T and U')
       counts[:t], counts[:u] = t, u
     end
     counts[:n] = s.count('n') if count_ambiguous_bases
     counts
   end
@@ -116,7 +126,7 @@ class Sequence < String
   def base_frequencies(count_ambiguous_bases=nil)
     base_counts = self.base_counts(count_ambiguous_bases)
     total_bases = base_counts.values.reduce(:+).to_f
-    base_freqs =
+    base_freqs =
       base_counts.map { |base, count| [base, count/total_bases] }.flatten
     Hash[*base_freqs]
   end

data/lib/parse_fasta/version.rb CHANGED Viewed

@@ -17,5 +17,5 @@
 # along with parse_fasta.  If not, see <http://www.gnu.org/licenses/>.
 module ParseFasta
-  VERSION = "1.7.1"
+  VERSION = "1.7.2"
 end

data/spec/lib/quality_spec.rb CHANGED Viewed

@@ -21,10 +21,18 @@ require 'bio'
 describe Quality do
   let(:qual_string) { qual_string = Quality.new('ab%63:K') }
-  let(:bioruby_qual_scores) do
+  let(:bioruby_qual_scores) do
     Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
   end
+  describe "::new" do
+    it "removes any spaces in the quality string" do
+      q = "   ab #   :m, !    "
+      q_no_spaces = "ab#:m,!"
+      expect(Quality.new(q)).to eq q_no_spaces
+    end
+  end
   describe "#qual_scores" do
     context "with illumina style quality scores" do
       it "returns an array of quality scores" do
@@ -39,5 +47,5 @@ describe Quality do
       mean_quality = qual_string.qual_scores.reduce(:+) / len
       expect(qual_string.mean_qual).to eq mean_quality
     end
-  end
+  end
 end

data/spec/lib/sequence_spec.rb CHANGED Viewed

@@ -25,6 +25,14 @@ describe Sequence do
     expect(Sequence.new('ACTG')).to be_a String
   end
+  describe "::new" do
+    it "removes any spaces in the sequence" do
+      s = "ACT ACT ACT    GCT  "
+      s_no_spaces = "ACTACTACTGCT"
+      expect(Sequence.new(s)).to eq s_no_spaces
+    end
+  end
   describe "#gc" do
     it "gives the same answer as BioRuby" do
       s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
@@ -75,7 +83,7 @@ describe Sequence do
         expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
       end
     end
     context "for an RNA sequence with truthy argument" do
       it "returns a map of A, C, U, G and N counts" do
         s = Sequence.new('ACUGNacugn')

data/test_files/test.fa CHANGED Viewed

@@ -1,6 +1,6 @@
 > empty seq at beginning
 >seq1 is fun
-AACTGGNNN
+AAC TGG NN N
 >seq2
@@ -15,4 +15,4 @@ yyyyyyyyyy
 yyyyy
 NNN
->empty seq at end
+>empty seq at end

data/test_files/test.fa.gz CHANGED Viewed

Binary file

data/test_files/test.fq CHANGED Viewed

@@ -1,8 +1,8 @@
 @seq1
-AACCTTGG
+AA CC TT GG
 +
-)#3gTqN8
+)# 3g Tq N8
 @seq2 apples
 ACTG
 +seq2 apples
-*ujM
+*ujM

data/test_files/test.fq.gz CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: parse_fasta
 version: !ruby/object:Gem::Version
-  version: 1.7.1
+  version: 1.7.2
 platform: ruby
 authors:
 - Ryan Moore
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-07-14 00:00:00.000000000 Z
+date: 2015-10-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler