parse_fasta 0.0.5 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ # Copyright 2014 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'spec_helper'
20
+
21
+ describe FastaFile do
22
+ describe "#each_record" do
23
+
24
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
25
+ it "yields a block with header and sequence for each record in a fasta file" do
26
+ seqs = []
27
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
28
+ seqs << [header, sequence]
29
+ end
30
+
31
+ expect(seqs).to eq([["seq1 is fun", "AACTGGend"],
32
+ ["seq2", "AATCCTGend"],
33
+ ["seq3", "yyyyyyyyyyyyyyyend"]])
34
+
35
+ end
36
+
37
+ it "passes header of type string as first parameter" do
38
+ sequence_class = nil
39
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
40
+ sequence_class = sequence.class
41
+ break
42
+ end
43
+ expect(sequence_class).to be Sequence
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,52 @@
1
+ # Copyright 2014 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'spec_helper'
20
+ require 'bio'
21
+
22
+ describe Sequence do
23
+ describe "#gc" do
24
+
25
+ it "gives the same answer as BioRuby" do
26
+ s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
27
+ bioruby_gc = Bio::Sequence::NA.new(s).gc_content
28
+ expect(Sequence.new(s).gc).to eq bioruby_gc
29
+ end
30
+
31
+ context "when sequence isn't empty" do
32
+ it "calculates gc" do
33
+ s = Sequence.new('ActGnu')
34
+ expect(s.gc).to eq(2 / 5.to_f)
35
+ end
36
+ end
37
+
38
+ context "when sequence is empty" do
39
+ it "returns 0" do
40
+ s = Sequence.new('')
41
+ expect(s.gc).to eq 0
42
+ end
43
+ end
44
+
45
+ context "there are no A, C, T, G or U (ie only N)" do
46
+ it "returns 0" do
47
+ s = Sequence.new('NNNNNnn')
48
+ expect(s.gc).to eq 0
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ # Copyright 2014 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'parse_fasta'
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Copyright 2014 Ryan Moore
4
+ # Contact: moorer@udel.edu
5
+
6
+ # This file is part of parse_fasta.
7
+
8
+ # parse_fasta is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+
13
+ # parse_fasta is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
20
+
21
+ require 'parse_fasta'
22
+ require 'bio'
23
+ require 'benchmark'
24
+
25
+ def this_parse_fasta fname
26
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
27
+ [header, sequence.length].join("\t")
28
+ end
29
+ end
30
+
31
+ def bioruby_parse_fasta fname
32
+ Bio::FastaFormat.open(fname).each do |entry|
33
+ [entry.definition, entry.seq.length].join("\t")
34
+ end
35
+ end
36
+
37
+ # Benchmark.bmbm do |x|
38
+ # x.report('parse_fasta') { this_parse_fasta(ARGV.first) }
39
+ # x.report('bioruby') { bioruby_parse_fasta(ARGV.first) }
40
+ # end
41
+
42
+ ####
43
+
44
+ def this_gc(str)
45
+ Sequence.new(str).gc
46
+ end
47
+
48
+ def bioruby_gc(str)
49
+ Bio::Sequence::NA.new(str).gc_content
50
+ end
51
+
52
+ # make a random sequence of given length
53
+ def make_seq(num)
54
+ num.times.reduce('') { |str, n| str << %w[A a C c T t G g N n].sample }
55
+ end
56
+
57
+ s1 = make_seq(2000000)
58
+ s2 = make_seq(4000000)
59
+ s3 = make_seq(8000000)
60
+
61
+ Benchmark.bmbm do |x|
62
+ x.report('this_gc 1') { this_gc(s1) }
63
+ x.report('bioruby_gc 1') { bioruby_gc(s1) }
64
+
65
+ x.report('this_gc 2') { this_gc(s2) }
66
+ x.report('bioruby_gc 2') { bioruby_gc(s2) }
67
+
68
+ x.report('this_gc 3') { this_gc(s3) }
69
+ x.report('bioruby_gc 3') { bioruby_gc(s3) }
70
+ end
@@ -0,0 +1,9 @@
1
+ >seq1 is fun
2
+ AACTGGend
3
+ >seq2
4
+ AAT
5
+ CCTGend
6
+ >seq3
7
+ yyyyyyyyyy
8
+ yyyyy
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-31 00:00:00.000000000 Z
11
+ date: 2014-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,6 +38,48 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bio
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
41
83
  description: So you want to parse a fasta file...
42
84
  email:
43
85
  - moorer@udel.edu
@@ -46,14 +88,38 @@ extensions: []
46
88
  extra_rdoc_files: []
47
89
  files:
48
90
  - ".gitignore"
91
+ - COPYING
49
92
  - Gemfile
50
- - LICENSE.txt
51
93
  - README.md
52
94
  - Rakefile
53
- - benchmark.rb
95
+ - doc/FastaFile.html
96
+ - doc/File.html
97
+ - doc/ParseFasta.html
98
+ - doc/Sequence.html
99
+ - doc/_index.html
100
+ - doc/class_list.html
101
+ - doc/css/common.css
102
+ - doc/css/full_list.css
103
+ - doc/css/style.css
104
+ - doc/file.README.html
105
+ - doc/file_list.html
106
+ - doc/frames.html
107
+ - doc/index.html
108
+ - doc/js/app.js
109
+ - doc/js/full_list.js
110
+ - doc/js/jquery.js
111
+ - doc/method_list.html
112
+ - doc/top-level-namespace.html
54
113
  - lib/parse_fasta.rb
114
+ - lib/parse_fasta/fasta_file.rb
115
+ - lib/parse_fasta/sequence.rb
55
116
  - lib/parse_fasta/version.rb
56
117
  - parse_fasta.gemspec
118
+ - spec/lib/fasta_file_spec.rb
119
+ - spec/lib/sequence_spec.rb
120
+ - spec/spec_helper.rb
121
+ - test_files/benchmark.rb
122
+ - test_files/test.fa
57
123
  homepage: https://github.com/mooreryan/parse_fasta
58
124
  licenses:
59
125
  - 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
@@ -78,4 +144,8 @@ rubygems_version: 2.2.2
78
144
  signing_key:
79
145
  specification_version: 4
80
146
  summary: Easy-peasy parsing of fasta files
81
- test_files: []
147
+ test_files:
148
+ - spec/lib/fasta_file_spec.rb
149
+ - spec/lib/sequence_spec.rb
150
+ - spec/spec_helper.rb
151
+ has_rdoc:
data/LICENSE.txt DELETED
@@ -1,22 +0,0 @@
1
- Copyright (c) 2014 Ryan Moore
2
-
3
- MIT License
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/benchmark.rb DELETED
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'parse_fasta'
4
- require 'bio'
5
- require 'benchmark'
6
-
7
- def parse_fasta fname
8
- File.open(fname, 'r').each_record do |header, sequence|
9
- [header, sequence.length].join("\t")
10
- end
11
- end
12
-
13
- def bioruby fname
14
- Bio::FastaFormat.open(fname).each do |entry|
15
- [entry.definition, entry.seq.length].join("\t")
16
- end
17
- end
18
-
19
- Benchmark.bmbm do |x|
20
- x.report('parse_fasta') { parse_fasta(ARGV.first) }
21
- x.report('bioruby') { bioruby(ARGV.first) }
22
- end