parse_fasta 1.6.0 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/lib/parse_fasta/fasta_file.rb +17 -11
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/lib/fasta_file_spec.rb +5 -14
- data/spec/lib/seq_file_spec.rb +4 -7
- data/spec/spec_helper.rb +19 -0
- data/test_files/test.fa +6 -0
- data/test_files/test.fa.gz +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 889ac551f3260a660a2035614b252f7ddd511551
|
4
|
+
data.tar.gz: 963cebe7b51e0caaf2327713acc5a0a6d7a34bb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5f26789225ebc0131680974502f54127240d2db1c9bffb3c577808bf75cf83e09eb7dc1b66fd86be18733b2ea49f4bffd8f51a8111cffeda37ba3e874bc72db
|
7
|
+
data.tar.gz: cad0979ede863a7e8205b684853533be11e85b8522429e93348eca20ad3c41895148ae42fc854dbd6d87f49f20dfbea5a95c346161790d5c038d1f0366e82a4b
|
data/README.md
CHANGED
@@ -71,6 +71,11 @@ want your scripts to accept either fastA or fastQ files.
|
|
71
71
|
If you need the description and quality string, you should use
|
72
72
|
FastqFile instead.
|
73
73
|
|
74
|
+
#### 1.6.1 ####
|
75
|
+
|
76
|
+
Better internal handling of empty sequences -- instead of raising
|
77
|
+
errors, pass empty sequences.
|
78
|
+
|
74
79
|
### 1.5 ###
|
75
80
|
|
76
81
|
Now accepts gzipped files. Huzzah!
|
@@ -33,13 +33,13 @@ class FastaFile < File
|
|
33
33
|
# FastaFile.open('reads.fna.gz').each_record do |header, sequence|
|
34
34
|
# puts [header, sequence.gc].join("\t")
|
35
35
|
# end
|
36
|
-
#
|
36
|
+
#
|
37
37
|
# @example Parsing a fasta file (with truthy value param)
|
38
38
|
# FastaFile.open('reads.fna').each_record(1) do |header, sequence|
|
39
39
|
# # header => 'sequence_1'
|
40
40
|
# # sequence => ['AACTG', 'AGTCGT', ... ]
|
41
41
|
# end
|
42
|
-
#
|
42
|
+
#
|
43
43
|
# @yield The header and sequence for each record in the fasta
|
44
44
|
# file to the block
|
45
45
|
#
|
@@ -55,7 +55,7 @@ class FastaFile < File
|
|
55
55
|
f = Zlib::GzipReader.open(self)
|
56
56
|
rescue Zlib::GzipFile::Error => e
|
57
57
|
f = self
|
58
|
-
end
|
58
|
+
end
|
59
59
|
|
60
60
|
if separate_lines
|
61
61
|
f.each("\n>") do |line|
|
@@ -63,9 +63,9 @@ class FastaFile < File
|
|
63
63
|
yield(header.strip, sequence)
|
64
64
|
end
|
65
65
|
else
|
66
|
-
f.each("\n>") do |line|
|
66
|
+
f.each("\n>") do |line|
|
67
67
|
header, sequence = parse_line(line)
|
68
|
-
yield(header.strip, Sequence.new(sequence))
|
68
|
+
yield(header.strip, Sequence.new(sequence || ""))
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -74,16 +74,22 @@ class FastaFile < File
|
|
74
74
|
end
|
75
75
|
|
76
76
|
private
|
77
|
+
|
77
78
|
def parse_line(line)
|
78
|
-
line.
|
79
|
+
line.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
|
79
80
|
end
|
80
81
|
|
81
82
|
def parse_line_separately(line)
|
82
|
-
header, sequence =
|
83
|
-
line.
|
84
|
-
|
85
|
-
|
86
|
-
|
83
|
+
header, sequence =
|
84
|
+
line.split("\n", 2).map { |s| s.gsub(/>/, '') }
|
85
|
+
|
86
|
+
if sequence.nil?
|
87
|
+
sequences = []
|
88
|
+
else
|
89
|
+
sequences = sequence.split("\n")
|
90
|
+
.reject { |s| s.empty? }
|
91
|
+
.map { |s| Sequence.new(s) }
|
92
|
+
end
|
87
93
|
|
88
94
|
[header, sequences]
|
89
95
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
data/spec/lib/fasta_file_spec.rb
CHANGED
@@ -20,17 +20,9 @@ require 'spec_helper'
|
|
20
20
|
|
21
21
|
describe FastaFile do
|
22
22
|
describe "#each_record" do
|
23
|
-
let(:records) {
|
24
|
-
|
25
|
-
|
26
|
-
["seq3", "yyyyyyyyyyyyyyyNNN"]]
|
27
|
-
}
|
28
|
-
|
29
|
-
let(:truthy_records) {
|
30
|
-
[["seq1 is fun", ["AACTGGNNN"]],
|
31
|
-
["seq2", ["AAT", "CCTGNNN"]],
|
32
|
-
["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]]
|
33
|
-
}
|
23
|
+
let(:records) { Helpers::RECORDS }
|
24
|
+
|
25
|
+
let(:truthy_records) { Helpers::TRUTHY_RECORDS }
|
34
26
|
let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
|
35
27
|
|
36
28
|
shared_examples_for "any FastaFile" do
|
@@ -61,7 +53,7 @@ describe FastaFile do
|
|
61
53
|
expect(all_Sequences).to be true
|
62
54
|
end
|
63
55
|
end
|
64
|
-
|
56
|
+
|
65
57
|
end
|
66
58
|
end
|
67
59
|
|
@@ -96,6 +88,5 @@ describe FastaFile do
|
|
96
88
|
expect(f_handle).to be_an_instance_of FastaFile
|
97
89
|
end
|
98
90
|
end
|
99
|
-
end
|
91
|
+
end
|
100
92
|
end
|
101
|
-
|
data/spec/lib/seq_file_spec.rb
CHANGED
@@ -20,15 +20,12 @@ require 'spec_helper'
|
|
20
20
|
|
21
21
|
describe SeqFile do
|
22
22
|
describe "#each_record" do
|
23
|
-
let(:records) {
|
24
|
-
[["seq1 is fun", "AACTGGNNN"],
|
25
|
-
["seq2", "AATCCTGNNN"],
|
26
|
-
["seq3", "yyyyyyyyyyyyyyyNNN"]]
|
27
|
-
}
|
28
23
|
|
29
24
|
context "when input is a fasta file" do
|
25
|
+
let(:records) { Helpers::RECORDS }
|
26
|
+
|
30
27
|
let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
|
31
|
-
|
28
|
+
|
32
29
|
shared_examples_for "parsing a fasta file" do
|
33
30
|
it "yields proper header and sequence for each record" do
|
34
31
|
expect { |b|
|
@@ -96,7 +93,7 @@ describe SeqFile do
|
|
96
93
|
end
|
97
94
|
end
|
98
95
|
end
|
99
|
-
|
96
|
+
|
100
97
|
context "with a 4 line per record fastq file" do
|
101
98
|
describe "#each_record" do
|
102
99
|
context "with a gzipped file" do
|
data/spec/spec_helper.rb
CHANGED
@@ -21,3 +21,22 @@ Coveralls.wear!
|
|
21
21
|
|
22
22
|
require 'parse_fasta'
|
23
23
|
|
24
|
+
module Helpers
|
25
|
+
|
26
|
+
RECORDS = [["empty seq at beginning", ""],
|
27
|
+
["seq1 is fun", "AACTGGNNN"],
|
28
|
+
["seq2", "AATCCTGNNN"],
|
29
|
+
["empty seq 1", ""],
|
30
|
+
["empty seq 2", ""],
|
31
|
+
["seq3", "yyyyyyyyyyyyyyyNNN"],
|
32
|
+
["empty seq at end", ""]]
|
33
|
+
|
34
|
+
TRUTHY_RECORDS = [["empty seq at beginning", []],
|
35
|
+
["seq1 is fun", ["AACTGGNNN"]],
|
36
|
+
["seq2", ["AAT", "CCTGNNN"]],
|
37
|
+
["empty seq 1", []],
|
38
|
+
["empty seq 2", []],
|
39
|
+
["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]],
|
40
|
+
["empty seq at end", []]]
|
41
|
+
|
42
|
+
end
|
data/test_files/test.fa
CHANGED
data/test_files/test.fa.gz
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|