parse_fasta 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -1
- data/lib/parse_fasta/fasta_file.rb +37 -6
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/lib/fasta_file_spec.rb +29 -15
- data/spec/lib/sequence_spec.rb +5 -1
- data/test_files/test.fa +4 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae03847d56379d572d6118a176876a1e11a21a49
|
4
|
+
data.tar.gz: ffb84a464f8f7057f3a363b2b872f4b60fcca9c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b6b3694de307b868df3b1d5b38b08e539e545827d8d5f47a8b41a550f60d6354164b0e563b6eacf39ab63bd844c2ab68578fc0bfbc5fd5a1f3a63b31a09cfc9
|
7
|
+
data.tar.gz: 719f6fb5c112b06ecf969662617de3bdc02156a09fcb542ff0a9553de55f2d7d6400fc2d6384be27c780517b1ab95887991a8a22c9d05ab3070e7d0e2b05962f
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# parse_fasta #
|
2
2
|
|
3
|
+
[](http://badge.fury.io/rb/parse_fasta)
|
4
|
+
|
3
5
|
So you want to parse a fasta file...
|
4
6
|
|
5
7
|
## Installation ##
|
@@ -27,7 +29,7 @@ and over.
|
|
27
29
|
## Documentation ##
|
28
30
|
|
29
31
|
Checkout
|
30
|
-
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.
|
32
|
+
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.3.0/frames)
|
31
33
|
to see the full documentation.
|
32
34
|
|
33
35
|
## Usage ##
|
@@ -56,6 +58,45 @@ Now we can parse fastq files as well!
|
|
56
58
|
|
57
59
|
## Versions ##
|
58
60
|
|
61
|
+
### 1.3.0 ###
|
62
|
+
|
63
|
+
Add additional functionality to `each_record` method.
|
64
|
+
|
65
|
+
#### Info ####
|
66
|
+
|
67
|
+
I often like to use the fasta format for other things like so
|
68
|
+
|
69
|
+
>fruits
|
70
|
+
pineapple
|
71
|
+
pear
|
72
|
+
peach
|
73
|
+
>veggies
|
74
|
+
peppers
|
75
|
+
parsnip
|
76
|
+
peas
|
77
|
+
|
78
|
+
rather than having this in a two column file like this
|
79
|
+
|
80
|
+
fruit,pineapple
|
81
|
+
fruit,pear
|
82
|
+
fruit,peach
|
83
|
+
veggie,peppers
|
84
|
+
veggie,parsnip
|
85
|
+
veggie,peas
|
86
|
+
|
87
|
+
So I added functionality to `each_record` to keep each line a record
|
88
|
+
separate in an array. Here's an example using the above file.
|
89
|
+
|
90
|
+
info = []
|
91
|
+
FastaFile.open(f, 'r').each_record(1) do |header, lines|
|
92
|
+
info << [header, lines]
|
93
|
+
end
|
94
|
+
|
95
|
+
Then info will contain the following arrays
|
96
|
+
|
97
|
+
['fruits', ['pineapple', 'pear', 'peach']],
|
98
|
+
['veggies', ['peppers', 'parsnip', 'peas']]
|
99
|
+
|
59
100
|
### 1.2.0 ###
|
60
101
|
|
61
102
|
Added `mean_qual` method to the `Quality` class.
|
@@ -22,20 +22,42 @@ class FastaFile < File
|
|
22
22
|
# Analagous to File#each_line, #each_record is used to go through a
|
23
23
|
# fasta file record by record.
|
24
24
|
#
|
25
|
-
# @
|
25
|
+
# @param separate_lines [Object] If truthy, separate lines of record
|
26
|
+
# into an array, but if falsy, yield a Sequence object for the
|
27
|
+
# sequence instead.
|
28
|
+
#
|
29
|
+
# @example Parsing a fasta file (default behavior)
|
26
30
|
# FastaFile.open('reads.fna', 'r').each_record do |header, sequence|
|
27
31
|
# puts [header, sequence.gc].join("\t")
|
28
32
|
# end
|
29
33
|
#
|
34
|
+
# @example Parsing a fasta file (with truthy value param)
|
35
|
+
# FastaFile.open('reads.fna','r').each_record(1) do |header, sequence|
|
36
|
+
# # header => 'sequence_1'
|
37
|
+
# # sequence => ['AACTG', 'AGTCGT', ... ]
|
38
|
+
# end
|
39
|
+
#
|
30
40
|
# @yield The header and sequence for each record in the fasta
|
31
41
|
# file to the block
|
42
|
+
#
|
32
43
|
# @yieldparam header [String] The header of the fasta record without
|
33
44
|
# the leading '>'
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
45
|
+
#
|
46
|
+
# @yieldparam sequence [Sequence, Array<String>] The sequence of the
|
47
|
+
# fasta record. If `separate_lines` is falsy (the default
|
48
|
+
# behavior), will be Sequence, but if truthy will be
|
49
|
+
# Array<String>.
|
50
|
+
def each_record(separate_lines=nil)
|
51
|
+
if separate_lines
|
52
|
+
self.each("\n>") do |line|
|
53
|
+
header, sequence = parse_line_separately(line)
|
54
|
+
yield(header.strip, sequence)
|
55
|
+
end
|
56
|
+
else
|
57
|
+
self.each("\n>") do |line|
|
58
|
+
header, sequence = parse_line(line)
|
59
|
+
yield(header.strip, Sequence.new(sequence))
|
60
|
+
end
|
39
61
|
end
|
40
62
|
end
|
41
63
|
|
@@ -43,4 +65,13 @@ class FastaFile < File
|
|
43
65
|
def parse_line(line)
|
44
66
|
line.chomp.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
|
45
67
|
end
|
68
|
+
|
69
|
+
def parse_line_separately(line)
|
70
|
+
#line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
|
71
|
+
header, sequence =
|
72
|
+
line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
|
73
|
+
sequences = sequence.split("\n").reject { |s| s.empty? }
|
74
|
+
|
75
|
+
[header, sequences]
|
76
|
+
end
|
46
77
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
data/spec/lib/fasta_file_spec.rb
CHANGED
@@ -22,25 +22,39 @@ describe FastaFile do
|
|
22
22
|
describe "#each_record" do
|
23
23
|
|
24
24
|
let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
seqs
|
25
|
+
|
26
|
+
context "with no arguments" do
|
27
|
+
it "yields header and sequence for each record in a fasta file" do
|
28
|
+
seqs = []
|
29
|
+
FastaFile.open(fname, 'r').each_record do |header, sequence|
|
30
|
+
seqs << [header, sequence]
|
31
|
+
end
|
32
|
+
|
33
|
+
expect(seqs).to eq([["seq1 is fun", "AACTGGNNN"],
|
34
|
+
["seq2", "AATCCTGNNN"],
|
35
|
+
["seq3", "yyyyyyyyyyyyyyyNNN"]])
|
36
|
+
|
29
37
|
end
|
30
|
-
|
31
|
-
expect(seqs).to eq([["seq1 is fun", "AACTGGend"],
|
32
|
-
["seq2", "AATCCTGend"],
|
33
|
-
["seq3", "yyyyyyyyyyyyyyyend"]])
|
34
38
|
|
39
|
+
it "yields sequence of type Sequence as second parameter" do
|
40
|
+
FastaFile.open(fname, 'r').each_record do |header, sequence|
|
41
|
+
expect(sequence).to be_an_instance_of Sequence
|
42
|
+
break
|
43
|
+
end
|
44
|
+
end
|
35
45
|
end
|
36
46
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
47
|
+
context "with a truthy argument" do
|
48
|
+
it "yields header and array of lines for each record" do
|
49
|
+
seqs = []
|
50
|
+
FastaFile.open(fname, 'r').each_record(1) do |header, sequence|
|
51
|
+
seqs << [header, sequence]
|
52
|
+
end
|
53
|
+
|
54
|
+
expect(seqs).to eq([["seq1 is fun", ["AACTGGNNN"]],
|
55
|
+
["seq2", ["AAT", "CCTGNNN"]],
|
56
|
+
["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]])
|
42
57
|
end
|
43
|
-
|
44
|
-
end
|
58
|
+
end
|
45
59
|
end
|
46
60
|
end
|
data/spec/lib/sequence_spec.rb
CHANGED
@@ -20,8 +20,12 @@ require 'spec_helper'
|
|
20
20
|
require 'bio'
|
21
21
|
|
22
22
|
describe Sequence do
|
23
|
-
describe "#gc" do
|
24
23
|
|
24
|
+
it "inherits from String" do
|
25
|
+
expect(Sequence.new('ACTG')).to be_a String
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#gc" do
|
25
29
|
it "gives the same answer as BioRuby" do
|
26
30
|
s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
|
27
31
|
bioruby_gc = Bio::Sequence::NA.new(s).gc_content
|
data/test_files/test.fa
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|