parse_fasta 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2f6ccbf7f40297b5bc4604e15c7ef47ffb573d44
4
- data.tar.gz: f95cffcff55c9789eabb563095319154ca21ec99
3
+ metadata.gz: 889ac551f3260a660a2035614b252f7ddd511551
4
+ data.tar.gz: 963cebe7b51e0caaf2327713acc5a0a6d7a34bb4
5
5
  SHA512:
6
- metadata.gz: 1cdd0c880390a6666bafa57190eb37672188abfe64e24048277e759cdfd48eedca35c0e6a2251426bfe071aedc9c0bddb00b220ba140f87fdf1de1f8ae0b67b5
7
- data.tar.gz: 92734f57cc2da98f1a705c5e60b568c67c47b152efd6a114e523c51fc982c55f1f8aa4edf6df6b143fc11a3a910ec0b448158f4010438a7b292c25bf0154fafa
6
+ metadata.gz: d5f26789225ebc0131680974502f54127240d2db1c9bffb3c577808bf75cf83e09eb7dc1b66fd86be18733b2ea49f4bffd8f51a8111cffeda37ba3e874bc72db
7
+ data.tar.gz: cad0979ede863a7e8205b684853533be11e85b8522429e93348eca20ad3c41895148ae42fc854dbd6d87f49f20dfbea5a95c346161790d5c038d1f0366e82a4b
data/README.md CHANGED
@@ -71,6 +71,11 @@ want your scripts to accept either fastA or fastQ files.
71
71
  If you need the description and quality string, you should use
72
72
  FastqFile instead.
73
73
 
74
+ #### 1.6.1 ####
75
+
76
+ Better internal handling of empty sequences -- instead of raising
77
+ errors, pass empty sequences.
78
+
74
79
  ### 1.5 ###
75
80
 
76
81
  Now accepts gzipped files. Huzzah!
@@ -33,13 +33,13 @@ class FastaFile < File
33
33
  # FastaFile.open('reads.fna.gz').each_record do |header, sequence|
34
34
  # puts [header, sequence.gc].join("\t")
35
35
  # end
36
- #
36
+ #
37
37
  # @example Parsing a fasta file (with truthy value param)
38
38
  # FastaFile.open('reads.fna').each_record(1) do |header, sequence|
39
39
  # # header => 'sequence_1'
40
40
  # # sequence => ['AACTG', 'AGTCGT', ... ]
41
41
  # end
42
- #
42
+ #
43
43
  # @yield The header and sequence for each record in the fasta
44
44
  # file to the block
45
45
  #
@@ -55,7 +55,7 @@ class FastaFile < File
55
55
  f = Zlib::GzipReader.open(self)
56
56
  rescue Zlib::GzipFile::Error => e
57
57
  f = self
58
- end
58
+ end
59
59
 
60
60
  if separate_lines
61
61
  f.each("\n>") do |line|
@@ -63,9 +63,9 @@ class FastaFile < File
63
63
  yield(header.strip, sequence)
64
64
  end
65
65
  else
66
- f.each("\n>") do |line|
66
+ f.each("\n>") do |line|
67
67
  header, sequence = parse_line(line)
68
- yield(header.strip, Sequence.new(sequence))
68
+ yield(header.strip, Sequence.new(sequence || ""))
69
69
  end
70
70
  end
71
71
 
@@ -74,16 +74,22 @@ class FastaFile < File
74
74
  end
75
75
 
76
76
  private
77
+
77
78
  def parse_line(line)
78
- line.chomp.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
79
+ line.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
79
80
  end
80
81
 
81
82
  def parse_line_separately(line)
82
- header, sequence =
83
- line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
84
- sequences = sequence.split("\n")
85
- .reject { |s| s.empty? }
86
- .map { |s| Sequence.new(s) }
83
+ header, sequence =
84
+ line.split("\n", 2).map { |s| s.gsub(/>/, '') }
85
+
86
+ if sequence.nil?
87
+ sequences = []
88
+ else
89
+ sequences = sequence.split("\n")
90
+ .reject { |s| s.empty? }
91
+ .map { |s| Sequence.new(s) }
92
+ end
87
93
 
88
94
  [header, sequences]
89
95
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.6.0"
20
+ VERSION = "1.6.1"
21
21
  end
@@ -20,17 +20,9 @@ require 'spec_helper'
20
20
 
21
21
  describe FastaFile do
22
22
  describe "#each_record" do
23
- let(:records) {
24
- [["seq1 is fun", "AACTGGNNN"],
25
- ["seq2", "AATCCTGNNN"],
26
- ["seq3", "yyyyyyyyyyyyyyyNNN"]]
27
- }
28
-
29
- let(:truthy_records) {
30
- [["seq1 is fun", ["AACTGGNNN"]],
31
- ["seq2", ["AAT", "CCTGNNN"]],
32
- ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]]
33
- }
23
+ let(:records) { Helpers::RECORDS }
24
+
25
+ let(:truthy_records) { Helpers::TRUTHY_RECORDS }
34
26
  let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
35
27
 
36
28
  shared_examples_for "any FastaFile" do
@@ -61,7 +53,7 @@ describe FastaFile do
61
53
  expect(all_Sequences).to be true
62
54
  end
63
55
  end
64
-
56
+
65
57
  end
66
58
  end
67
59
 
@@ -96,6 +88,5 @@ describe FastaFile do
96
88
  expect(f_handle).to be_an_instance_of FastaFile
97
89
  end
98
90
  end
99
- end
91
+ end
100
92
  end
101
-
@@ -20,15 +20,12 @@ require 'spec_helper'
20
20
 
21
21
  describe SeqFile do
22
22
  describe "#each_record" do
23
- let(:records) {
24
- [["seq1 is fun", "AACTGGNNN"],
25
- ["seq2", "AATCCTGNNN"],
26
- ["seq3", "yyyyyyyyyyyyyyyNNN"]]
27
- }
28
23
 
29
24
  context "when input is a fasta file" do
25
+ let(:records) { Helpers::RECORDS }
26
+
30
27
  let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
31
-
28
+
32
29
  shared_examples_for "parsing a fasta file" do
33
30
  it "yields proper header and sequence for each record" do
34
31
  expect { |b|
@@ -96,7 +93,7 @@ describe SeqFile do
96
93
  end
97
94
  end
98
95
  end
99
-
96
+
100
97
  context "with a 4 line per record fastq file" do
101
98
  describe "#each_record" do
102
99
  context "with a gzipped file" do
data/spec/spec_helper.rb CHANGED
@@ -21,3 +21,22 @@ Coveralls.wear!
21
21
 
22
22
  require 'parse_fasta'
23
23
 
24
+ module Helpers
25
+
26
+ RECORDS = [["empty seq at beginning", ""],
27
+ ["seq1 is fun", "AACTGGNNN"],
28
+ ["seq2", "AATCCTGNNN"],
29
+ ["empty seq 1", ""],
30
+ ["empty seq 2", ""],
31
+ ["seq3", "yyyyyyyyyyyyyyyNNN"],
32
+ ["empty seq at end", ""]]
33
+
34
+ TRUTHY_RECORDS = [["empty seq at beginning", []],
35
+ ["seq1 is fun", ["AACTGGNNN"]],
36
+ ["seq2", ["AAT", "CCTGNNN"]],
37
+ ["empty seq 1", []],
38
+ ["empty seq 2", []],
39
+ ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]],
40
+ ["empty seq at end", []]]
41
+
42
+ end
data/test_files/test.fa CHANGED
@@ -1,12 +1,18 @@
1
+ > empty seq at beginning
1
2
  >seq1 is fun
2
3
  AACTGGNNN
3
4
 
5
+
4
6
  >seq2
5
7
  AAT
6
8
  CCTGNNN
9
+ > empty seq 1
10
+ > empty seq 2
11
+
7
12
 
8
13
  >seq3
9
14
  yyyyyyyyyy
10
15
 
11
16
  yyyyy
12
17
  NNN
18
+ >empty seq at end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler