parse_fasta 1.6.0 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2f6ccbf7f40297b5bc4604e15c7ef47ffb573d44
4
- data.tar.gz: f95cffcff55c9789eabb563095319154ca21ec99
3
+ metadata.gz: 889ac551f3260a660a2035614b252f7ddd511551
4
+ data.tar.gz: 963cebe7b51e0caaf2327713acc5a0a6d7a34bb4
5
5
  SHA512:
6
- metadata.gz: 1cdd0c880390a6666bafa57190eb37672188abfe64e24048277e759cdfd48eedca35c0e6a2251426bfe071aedc9c0bddb00b220ba140f87fdf1de1f8ae0b67b5
7
- data.tar.gz: 92734f57cc2da98f1a705c5e60b568c67c47b152efd6a114e523c51fc982c55f1f8aa4edf6df6b143fc11a3a910ec0b448158f4010438a7b292c25bf0154fafa
6
+ metadata.gz: d5f26789225ebc0131680974502f54127240d2db1c9bffb3c577808bf75cf83e09eb7dc1b66fd86be18733b2ea49f4bffd8f51a8111cffeda37ba3e874bc72db
7
+ data.tar.gz: cad0979ede863a7e8205b684853533be11e85b8522429e93348eca20ad3c41895148ae42fc854dbd6d87f49f20dfbea5a95c346161790d5c038d1f0366e82a4b
data/README.md CHANGED
@@ -71,6 +71,11 @@ want your scripts to accept either fastA or fastQ files.
71
71
  If you need the description and quality string, you should use
72
72
  FastqFile instead.
73
73
 
74
+ #### 1.6.1 ####
75
+
76
+ Better internal handling of empty sequences -- instead of raising
77
+ errors, pass empty sequences.
78
+
74
79
  ### 1.5 ###
75
80
 
76
81
  Now accepts gzipped files. Huzzah!
@@ -33,13 +33,13 @@ class FastaFile < File
33
33
  # FastaFile.open('reads.fna.gz').each_record do |header, sequence|
34
34
  # puts [header, sequence.gc].join("\t")
35
35
  # end
36
- #
36
+ #
37
37
  # @example Parsing a fasta file (with truthy value param)
38
38
  # FastaFile.open('reads.fna').each_record(1) do |header, sequence|
39
39
  # # header => 'sequence_1'
40
40
  # # sequence => ['AACTG', 'AGTCGT', ... ]
41
41
  # end
42
- #
42
+ #
43
43
  # @yield The header and sequence for each record in the fasta
44
44
  # file to the block
45
45
  #
@@ -55,7 +55,7 @@ class FastaFile < File
55
55
  f = Zlib::GzipReader.open(self)
56
56
  rescue Zlib::GzipFile::Error => e
57
57
  f = self
58
- end
58
+ end
59
59
 
60
60
  if separate_lines
61
61
  f.each("\n>") do |line|
@@ -63,9 +63,9 @@ class FastaFile < File
63
63
  yield(header.strip, sequence)
64
64
  end
65
65
  else
66
- f.each("\n>") do |line|
66
+ f.each("\n>") do |line|
67
67
  header, sequence = parse_line(line)
68
- yield(header.strip, Sequence.new(sequence))
68
+ yield(header.strip, Sequence.new(sequence || ""))
69
69
  end
70
70
  end
71
71
 
@@ -74,16 +74,22 @@ class FastaFile < File
74
74
  end
75
75
 
76
76
  private
77
+
77
78
  def parse_line(line)
78
- line.chomp.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
79
+ line.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
79
80
  end
80
81
 
81
82
  def parse_line_separately(line)
82
- header, sequence =
83
- line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
84
- sequences = sequence.split("\n")
85
- .reject { |s| s.empty? }
86
- .map { |s| Sequence.new(s) }
83
+ header, sequence =
84
+ line.split("\n", 2).map { |s| s.gsub(/>/, '') }
85
+
86
+ if sequence.nil?
87
+ sequences = []
88
+ else
89
+ sequences = sequence.split("\n")
90
+ .reject { |s| s.empty? }
91
+ .map { |s| Sequence.new(s) }
92
+ end
87
93
 
88
94
  [header, sequences]
89
95
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.6.0"
20
+ VERSION = "1.6.1"
21
21
  end
@@ -20,17 +20,9 @@ require 'spec_helper'
20
20
 
21
21
  describe FastaFile do
22
22
  describe "#each_record" do
23
- let(:records) {
24
- [["seq1 is fun", "AACTGGNNN"],
25
- ["seq2", "AATCCTGNNN"],
26
- ["seq3", "yyyyyyyyyyyyyyyNNN"]]
27
- }
28
-
29
- let(:truthy_records) {
30
- [["seq1 is fun", ["AACTGGNNN"]],
31
- ["seq2", ["AAT", "CCTGNNN"]],
32
- ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]]
33
- }
23
+ let(:records) { Helpers::RECORDS }
24
+
25
+ let(:truthy_records) { Helpers::TRUTHY_RECORDS }
34
26
  let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
35
27
 
36
28
  shared_examples_for "any FastaFile" do
@@ -61,7 +53,7 @@ describe FastaFile do
61
53
  expect(all_Sequences).to be true
62
54
  end
63
55
  end
64
-
56
+
65
57
  end
66
58
  end
67
59
 
@@ -96,6 +88,5 @@ describe FastaFile do
96
88
  expect(f_handle).to be_an_instance_of FastaFile
97
89
  end
98
90
  end
99
- end
91
+ end
100
92
  end
101
-
@@ -20,15 +20,12 @@ require 'spec_helper'
20
20
 
21
21
  describe SeqFile do
22
22
  describe "#each_record" do
23
- let(:records) {
24
- [["seq1 is fun", "AACTGGNNN"],
25
- ["seq2", "AATCCTGNNN"],
26
- ["seq3", "yyyyyyyyyyyyyyyNNN"]]
27
- }
28
23
 
29
24
  context "when input is a fasta file" do
25
+ let(:records) { Helpers::RECORDS }
26
+
30
27
  let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
31
-
28
+
32
29
  shared_examples_for "parsing a fasta file" do
33
30
  it "yields proper header and sequence for each record" do
34
31
  expect { |b|
@@ -96,7 +93,7 @@ describe SeqFile do
96
93
  end
97
94
  end
98
95
  end
99
-
96
+
100
97
  context "with a 4 line per record fastq file" do
101
98
  describe "#each_record" do
102
99
  context "with a gzipped file" do
data/spec/spec_helper.rb CHANGED
@@ -21,3 +21,22 @@ Coveralls.wear!
21
21
 
22
22
  require 'parse_fasta'
23
23
 
24
+ module Helpers
25
+
26
+ RECORDS = [["empty seq at beginning", ""],
27
+ ["seq1 is fun", "AACTGGNNN"],
28
+ ["seq2", "AATCCTGNNN"],
29
+ ["empty seq 1", ""],
30
+ ["empty seq 2", ""],
31
+ ["seq3", "yyyyyyyyyyyyyyyNNN"],
32
+ ["empty seq at end", ""]]
33
+
34
+ TRUTHY_RECORDS = [["empty seq at beginning", []],
35
+ ["seq1 is fun", ["AACTGGNNN"]],
36
+ ["seq2", ["AAT", "CCTGNNN"]],
37
+ ["empty seq 1", []],
38
+ ["empty seq 2", []],
39
+ ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]],
40
+ ["empty seq at end", []]]
41
+
42
+ end
data/test_files/test.fa CHANGED
@@ -1,12 +1,18 @@
1
+ > empty seq at beginning
1
2
  >seq1 is fun
2
3
  AACTGGNNN
3
4
 
5
+
4
6
  >seq2
5
7
  AAT
6
8
  CCTGNNN
9
+ > empty seq 1
10
+ > empty seq 2
11
+
7
12
 
8
13
  >seq3
9
14
  yyyyyyyyyy
10
15
 
11
16
  yyyyy
12
17
  NNN
18
+ >empty seq at end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler