parse_fasta 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/lib/parse_fasta/fasta_file.rb +17 -11
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/lib/fasta_file_spec.rb +5 -14
- data/spec/lib/seq_file_spec.rb +4 -7
- data/spec/spec_helper.rb +19 -0
- data/test_files/test.fa +6 -0
- data/test_files/test.fa.gz +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 889ac551f3260a660a2035614b252f7ddd511551
|
4
|
+
data.tar.gz: 963cebe7b51e0caaf2327713acc5a0a6d7a34bb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5f26789225ebc0131680974502f54127240d2db1c9bffb3c577808bf75cf83e09eb7dc1b66fd86be18733b2ea49f4bffd8f51a8111cffeda37ba3e874bc72db
|
7
|
+
data.tar.gz: cad0979ede863a7e8205b684853533be11e85b8522429e93348eca20ad3c41895148ae42fc854dbd6d87f49f20dfbea5a95c346161790d5c038d1f0366e82a4b
|
data/README.md
CHANGED
@@ -71,6 +71,11 @@ want your scripts to accept either fastA or fastQ files.
|
|
71
71
|
If you need the description and quality string, you should use
|
72
72
|
FastqFile instead.
|
73
73
|
|
74
|
+
#### 1.6.1 ####
|
75
|
+
|
76
|
+
Better internal handling of empty sequences -- instead of raising
|
77
|
+
errors, pass empty sequences.
|
78
|
+
|
74
79
|
### 1.5 ###
|
75
80
|
|
76
81
|
Now accepts gzipped files. Huzzah!
|
@@ -33,13 +33,13 @@ class FastaFile < File
|
|
33
33
|
# FastaFile.open('reads.fna.gz').each_record do |header, sequence|
|
34
34
|
# puts [header, sequence.gc].join("\t")
|
35
35
|
# end
|
36
|
-
#
|
36
|
+
#
|
37
37
|
# @example Parsing a fasta file (with truthy value param)
|
38
38
|
# FastaFile.open('reads.fna').each_record(1) do |header, sequence|
|
39
39
|
# # header => 'sequence_1'
|
40
40
|
# # sequence => ['AACTG', 'AGTCGT', ... ]
|
41
41
|
# end
|
42
|
-
#
|
42
|
+
#
|
43
43
|
# @yield The header and sequence for each record in the fasta
|
44
44
|
# file to the block
|
45
45
|
#
|
@@ -55,7 +55,7 @@ class FastaFile < File
|
|
55
55
|
f = Zlib::GzipReader.open(self)
|
56
56
|
rescue Zlib::GzipFile::Error => e
|
57
57
|
f = self
|
58
|
-
end
|
58
|
+
end
|
59
59
|
|
60
60
|
if separate_lines
|
61
61
|
f.each("\n>") do |line|
|
@@ -63,9 +63,9 @@ class FastaFile < File
|
|
63
63
|
yield(header.strip, sequence)
|
64
64
|
end
|
65
65
|
else
|
66
|
-
f.each("\n>") do |line|
|
66
|
+
f.each("\n>") do |line|
|
67
67
|
header, sequence = parse_line(line)
|
68
|
-
yield(header.strip, Sequence.new(sequence))
|
68
|
+
yield(header.strip, Sequence.new(sequence || ""))
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -74,16 +74,22 @@ class FastaFile < File
|
|
74
74
|
end
|
75
75
|
|
76
76
|
private
|
77
|
+
|
77
78
|
def parse_line(line)
|
78
|
-
line.
|
79
|
+
line.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
|
79
80
|
end
|
80
81
|
|
81
82
|
def parse_line_separately(line)
|
82
|
-
header, sequence =
|
83
|
-
line.
|
84
|
-
|
85
|
-
|
86
|
-
|
83
|
+
header, sequence =
|
84
|
+
line.split("\n", 2).map { |s| s.gsub(/>/, '') }
|
85
|
+
|
86
|
+
if sequence.nil?
|
87
|
+
sequences = []
|
88
|
+
else
|
89
|
+
sequences = sequence.split("\n")
|
90
|
+
.reject { |s| s.empty? }
|
91
|
+
.map { |s| Sequence.new(s) }
|
92
|
+
end
|
87
93
|
|
88
94
|
[header, sequences]
|
89
95
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
data/spec/lib/fasta_file_spec.rb
CHANGED
@@ -20,17 +20,9 @@ require 'spec_helper'
|
|
20
20
|
|
21
21
|
describe FastaFile do
|
22
22
|
describe "#each_record" do
|
23
|
-
let(:records) {
|
24
|
-
|
25
|
-
|
26
|
-
["seq3", "yyyyyyyyyyyyyyyNNN"]]
|
27
|
-
}
|
28
|
-
|
29
|
-
let(:truthy_records) {
|
30
|
-
[["seq1 is fun", ["AACTGGNNN"]],
|
31
|
-
["seq2", ["AAT", "CCTGNNN"]],
|
32
|
-
["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]]
|
33
|
-
}
|
23
|
+
let(:records) { Helpers::RECORDS }
|
24
|
+
|
25
|
+
let(:truthy_records) { Helpers::TRUTHY_RECORDS }
|
34
26
|
let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
|
35
27
|
|
36
28
|
shared_examples_for "any FastaFile" do
|
@@ -61,7 +53,7 @@ describe FastaFile do
|
|
61
53
|
expect(all_Sequences).to be true
|
62
54
|
end
|
63
55
|
end
|
64
|
-
|
56
|
+
|
65
57
|
end
|
66
58
|
end
|
67
59
|
|
@@ -96,6 +88,5 @@ describe FastaFile do
|
|
96
88
|
expect(f_handle).to be_an_instance_of FastaFile
|
97
89
|
end
|
98
90
|
end
|
99
|
-
end
|
91
|
+
end
|
100
92
|
end
|
101
|
-
|
data/spec/lib/seq_file_spec.rb
CHANGED
@@ -20,15 +20,12 @@ require 'spec_helper'
|
|
20
20
|
|
21
21
|
describe SeqFile do
|
22
22
|
describe "#each_record" do
|
23
|
-
let(:records) {
|
24
|
-
[["seq1 is fun", "AACTGGNNN"],
|
25
|
-
["seq2", "AATCCTGNNN"],
|
26
|
-
["seq3", "yyyyyyyyyyyyyyyNNN"]]
|
27
|
-
}
|
28
23
|
|
29
24
|
context "when input is a fasta file" do
|
25
|
+
let(:records) { Helpers::RECORDS }
|
26
|
+
|
30
27
|
let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
|
31
|
-
|
28
|
+
|
32
29
|
shared_examples_for "parsing a fasta file" do
|
33
30
|
it "yields proper header and sequence for each record" do
|
34
31
|
expect { |b|
|
@@ -96,7 +93,7 @@ describe SeqFile do
|
|
96
93
|
end
|
97
94
|
end
|
98
95
|
end
|
99
|
-
|
96
|
+
|
100
97
|
context "with a 4 line per record fastq file" do
|
101
98
|
describe "#each_record" do
|
102
99
|
context "with a gzipped file" do
|
data/spec/spec_helper.rb
CHANGED
@@ -21,3 +21,22 @@ Coveralls.wear!
|
|
21
21
|
|
22
22
|
require 'parse_fasta'
|
23
23
|
|
24
|
+
module Helpers
|
25
|
+
|
26
|
+
RECORDS = [["empty seq at beginning", ""],
|
27
|
+
["seq1 is fun", "AACTGGNNN"],
|
28
|
+
["seq2", "AATCCTGNNN"],
|
29
|
+
["empty seq 1", ""],
|
30
|
+
["empty seq 2", ""],
|
31
|
+
["seq3", "yyyyyyyyyyyyyyyNNN"],
|
32
|
+
["empty seq at end", ""]]
|
33
|
+
|
34
|
+
TRUTHY_RECORDS = [["empty seq at beginning", []],
|
35
|
+
["seq1 is fun", ["AACTGGNNN"]],
|
36
|
+
["seq2", ["AAT", "CCTGNNN"]],
|
37
|
+
["empty seq 1", []],
|
38
|
+
["empty seq 2", []],
|
39
|
+
["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]],
|
40
|
+
["empty seq at end", []]]
|
41
|
+
|
42
|
+
end
|
data/test_files/test.fa
CHANGED
data/test_files/test.fa.gz
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|