parse_fasta 0.0.5 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/COPYING +674 -0
- data/README.md +56 -6
- data/Rakefile +6 -0
- data/doc/FastaFile.html +292 -0
- data/doc/File.html +212 -0
- data/doc/ParseFasta.html +144 -0
- data/doc/Sequence.html +361 -0
- data/doc/_index.html +136 -0
- data/doc/class_list.html +54 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +57 -0
- data/doc/css/style.css +339 -0
- data/doc/file.README.html +161 -0
- data/doc/file_list.html +56 -0
- data/doc/frames.html +26 -0
- data/doc/index.html +161 -0
- data/doc/js/app.js +219 -0
- data/doc/js/full_list.js +178 -0
- data/doc/js/jquery.js +4 -0
- data/doc/method_list.html +71 -0
- data/doc/top-level-namespace.html +114 -0
- data/lib/parse_fasta/fasta_file.rb +46 -0
- data/lib/parse_fasta/sequence.rb +55 -0
- data/lib/parse_fasta/version.rb +19 -1
- data/lib/parse_fasta.rb +11 -21
- data/parse_fasta.gemspec +3 -0
- data/spec/lib/fasta_file_spec.rb +46 -0
- data/spec/lib/sequence_spec.rb +52 -0
- data/spec/spec_helper.rb +19 -0
- data/test_files/benchmark.rb +70 -0
- data/test_files/test.fa +9 -0
- metadata +75 -5
- data/LICENSE.txt +0 -22
- data/benchmark.rb +0 -22
@@ -0,0 +1,46 @@
|
|
1
|
+
# Copyright 2014 Ryan Moore
|
2
|
+
# Contact: moorer@udel.edu
|
3
|
+
#
|
4
|
+
# This file is part of parse_fasta.
|
5
|
+
#
|
6
|
+
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'spec_helper'
|
20
|
+
|
21
|
+
describe FastaFile do
|
22
|
+
describe "#each_record" do
|
23
|
+
|
24
|
+
let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
|
25
|
+
it "yields a block with header and sequence for each record in a fasta file" do
|
26
|
+
seqs = []
|
27
|
+
FastaFile.open(fname, 'r').each_record do |header, sequence|
|
28
|
+
seqs << [header, sequence]
|
29
|
+
end
|
30
|
+
|
31
|
+
expect(seqs).to eq([["seq1 is fun", "AACTGGend"],
|
32
|
+
["seq2", "AATCCTGend"],
|
33
|
+
["seq3", "yyyyyyyyyyyyyyyend"]])
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
it "passes header of type string as first parameter" do
|
38
|
+
sequence_class = nil
|
39
|
+
FastaFile.open(fname, 'r').each_record do |header, sequence|
|
40
|
+
sequence_class = sequence.class
|
41
|
+
break
|
42
|
+
end
|
43
|
+
expect(sequence_class).to be Sequence
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright 2014 Ryan Moore
|
2
|
+
# Contact: moorer@udel.edu
|
3
|
+
#
|
4
|
+
# This file is part of parse_fasta.
|
5
|
+
#
|
6
|
+
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'spec_helper'
|
20
|
+
require 'bio'
|
21
|
+
|
22
|
+
describe Sequence do
|
23
|
+
describe "#gc" do
|
24
|
+
|
25
|
+
it "gives the same answer as BioRuby" do
|
26
|
+
s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
|
27
|
+
bioruby_gc = Bio::Sequence::NA.new(s).gc_content
|
28
|
+
expect(Sequence.new(s).gc).to eq bioruby_gc
|
29
|
+
end
|
30
|
+
|
31
|
+
context "when sequence isn't empty" do
|
32
|
+
it "calculates gc" do
|
33
|
+
s = Sequence.new('ActGnu')
|
34
|
+
expect(s.gc).to eq(2 / 5.to_f)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context "when sequence is empty" do
|
39
|
+
it "returns 0" do
|
40
|
+
s = Sequence.new('')
|
41
|
+
expect(s.gc).to eq 0
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context "there are no A, C, T, G or U (ie only N)" do
|
46
|
+
it "returns 0" do
|
47
|
+
s = Sequence.new('NNNNNnn')
|
48
|
+
expect(s.gc).to eq 0
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright 2014 Ryan Moore
|
2
|
+
# Contact: moorer@udel.edu
|
3
|
+
#
|
4
|
+
# This file is part of parse_fasta.
|
5
|
+
#
|
6
|
+
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'parse_fasta'
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Copyright 2014 Ryan Moore
|
4
|
+
# Contact: moorer@udel.edu
|
5
|
+
|
6
|
+
# This file is part of parse_fasta.
|
7
|
+
|
8
|
+
# parse_fasta is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
|
13
|
+
# parse_fasta is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
|
21
|
+
require 'parse_fasta'
|
22
|
+
require 'bio'
|
23
|
+
require 'benchmark'
|
24
|
+
|
25
|
+
def this_parse_fasta fname
|
26
|
+
FastaFile.open(fname, 'r').each_record do |header, sequence|
|
27
|
+
[header, sequence.length].join("\t")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def bioruby_parse_fasta fname
|
32
|
+
Bio::FastaFormat.open(fname).each do |entry|
|
33
|
+
[entry.definition, entry.seq.length].join("\t")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Benchmark.bmbm do |x|
|
38
|
+
# x.report('parse_fasta') { this_parse_fasta(ARGV.first) }
|
39
|
+
# x.report('bioruby') { bioruby_parse_fasta(ARGV.first) }
|
40
|
+
# end
|
41
|
+
|
42
|
+
####
|
43
|
+
|
44
|
+
def this_gc(str)
|
45
|
+
Sequence.new(str).gc
|
46
|
+
end
|
47
|
+
|
48
|
+
def bioruby_gc(str)
|
49
|
+
Bio::Sequence::NA.new(str).gc_content
|
50
|
+
end
|
51
|
+
|
52
|
+
# make a random sequence of given length
|
53
|
+
def make_seq(num)
|
54
|
+
num.times.reduce('') { |str, n| str << %w[A a C c T t G g N n].sample }
|
55
|
+
end
|
56
|
+
|
57
|
+
s1 = make_seq(2000000)
|
58
|
+
s2 = make_seq(4000000)
|
59
|
+
s3 = make_seq(8000000)
|
60
|
+
|
61
|
+
Benchmark.bmbm do |x|
|
62
|
+
x.report('this_gc 1') { this_gc(s1) }
|
63
|
+
x.report('bioruby_gc 1') { bioruby_gc(s1) }
|
64
|
+
|
65
|
+
x.report('this_gc 2') { this_gc(s2) }
|
66
|
+
x.report('bioruby_gc 2') { bioruby_gc(s2) }
|
67
|
+
|
68
|
+
x.report('this_gc 3') { this_gc(s3) }
|
69
|
+
x.report('bioruby_gc 3') { bioruby_gc(s3) }
|
70
|
+
end
|
data/test_files/test.fa
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,6 +38,48 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bio
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: yard
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
41
83
|
description: So you want to parse a fasta file...
|
42
84
|
email:
|
43
85
|
- moorer@udel.edu
|
@@ -46,14 +88,38 @@ extensions: []
|
|
46
88
|
extra_rdoc_files: []
|
47
89
|
files:
|
48
90
|
- ".gitignore"
|
91
|
+
- COPYING
|
49
92
|
- Gemfile
|
50
|
-
- LICENSE.txt
|
51
93
|
- README.md
|
52
94
|
- Rakefile
|
53
|
-
-
|
95
|
+
- doc/FastaFile.html
|
96
|
+
- doc/File.html
|
97
|
+
- doc/ParseFasta.html
|
98
|
+
- doc/Sequence.html
|
99
|
+
- doc/_index.html
|
100
|
+
- doc/class_list.html
|
101
|
+
- doc/css/common.css
|
102
|
+
- doc/css/full_list.css
|
103
|
+
- doc/css/style.css
|
104
|
+
- doc/file.README.html
|
105
|
+
- doc/file_list.html
|
106
|
+
- doc/frames.html
|
107
|
+
- doc/index.html
|
108
|
+
- doc/js/app.js
|
109
|
+
- doc/js/full_list.js
|
110
|
+
- doc/js/jquery.js
|
111
|
+
- doc/method_list.html
|
112
|
+
- doc/top-level-namespace.html
|
54
113
|
- lib/parse_fasta.rb
|
114
|
+
- lib/parse_fasta/fasta_file.rb
|
115
|
+
- lib/parse_fasta/sequence.rb
|
55
116
|
- lib/parse_fasta/version.rb
|
56
117
|
- parse_fasta.gemspec
|
118
|
+
- spec/lib/fasta_file_spec.rb
|
119
|
+
- spec/lib/sequence_spec.rb
|
120
|
+
- spec/spec_helper.rb
|
121
|
+
- test_files/benchmark.rb
|
122
|
+
- test_files/test.fa
|
57
123
|
homepage: https://github.com/mooreryan/parse_fasta
|
58
124
|
licenses:
|
59
125
|
- 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
|
@@ -78,4 +144,8 @@ rubygems_version: 2.2.2
|
|
78
144
|
signing_key:
|
79
145
|
specification_version: 4
|
80
146
|
summary: Easy-peasy parsing of fasta files
|
81
|
-
test_files:
|
147
|
+
test_files:
|
148
|
+
- spec/lib/fasta_file_spec.rb
|
149
|
+
- spec/lib/sequence_spec.rb
|
150
|
+
- spec/spec_helper.rb
|
151
|
+
has_rdoc:
|
data/LICENSE.txt
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
Copyright (c) 2014 Ryan Moore
|
2
|
-
|
3
|
-
MIT License
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
a copy of this software and associated documentation files (the
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be
|
14
|
-
included in all copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/benchmark.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'parse_fasta'
|
4
|
-
require 'bio'
|
5
|
-
require 'benchmark'
|
6
|
-
|
7
|
-
def parse_fasta fname
|
8
|
-
File.open(fname, 'r').each_record do |header, sequence|
|
9
|
-
[header, sequence.length].join("\t")
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def bioruby fname
|
14
|
-
Bio::FastaFormat.open(fname).each do |entry|
|
15
|
-
[entry.definition, entry.seq.length].join("\t")
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
Benchmark.bmbm do |x|
|
20
|
-
x.report('parse_fasta') { parse_fasta(ARGV.first) }
|
21
|
-
x.report('bioruby') { bioruby(ARGV.first) }
|
22
|
-
end
|