parse_fasta 0.0.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ # Copyright 2014 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'spec_helper'
20
+
21
+ describe FastaFile do
22
+ describe "#each_record" do
23
+
24
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
25
+ it "yields a block with header and sequence for each record in a fasta file" do
26
+ seqs = []
27
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
28
+ seqs << [header, sequence]
29
+ end
30
+
31
+ expect(seqs).to eq([["seq1 is fun", "AACTGGend"],
32
+ ["seq2", "AATCCTGend"],
33
+ ["seq3", "yyyyyyyyyyyyyyyend"]])
34
+
35
+ end
36
+
37
+ it "passes header of type string as first parameter" do
38
+ sequence_class = nil
39
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
40
+ sequence_class = sequence.class
41
+ break
42
+ end
43
+ expect(sequence_class).to be Sequence
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,52 @@
1
+ # Copyright 2014 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'spec_helper'
20
+ require 'bio'
21
+
22
+ describe Sequence do
23
+ describe "#gc" do
24
+
25
+ it "gives the same answer as BioRuby" do
26
+ s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
27
+ bioruby_gc = Bio::Sequence::NA.new(s).gc_content
28
+ expect(Sequence.new(s).gc).to eq bioruby_gc
29
+ end
30
+
31
+ context "when sequence isn't empty" do
32
+ it "calculates gc" do
33
+ s = Sequence.new('ActGnu')
34
+ expect(s.gc).to eq(2 / 5.to_f)
35
+ end
36
+ end
37
+
38
+ context "when sequence is empty" do
39
+ it "returns 0" do
40
+ s = Sequence.new('')
41
+ expect(s.gc).to eq 0
42
+ end
43
+ end
44
+
45
+ context "there are no A, C, T, G or U (ie only N)" do
46
+ it "returns 0" do
47
+ s = Sequence.new('NNNNNnn')
48
+ expect(s.gc).to eq 0
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ # Copyright 2014 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'parse_fasta'
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Copyright 2014 Ryan Moore
4
+ # Contact: moorer@udel.edu
5
+
6
+ # This file is part of parse_fasta.
7
+
8
+ # parse_fasta is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+
13
+ # parse_fasta is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
20
+
21
+ require 'parse_fasta'
22
+ require 'bio'
23
+ require 'benchmark'
24
+
25
+ def this_parse_fasta fname
26
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
27
+ [header, sequence.length].join("\t")
28
+ end
29
+ end
30
+
31
+ def bioruby_parse_fasta fname
32
+ Bio::FastaFormat.open(fname).each do |entry|
33
+ [entry.definition, entry.seq.length].join("\t")
34
+ end
35
+ end
36
+
37
+ # Benchmark.bmbm do |x|
38
+ # x.report('parse_fasta') { this_parse_fasta(ARGV.first) }
39
+ # x.report('bioruby') { bioruby_parse_fasta(ARGV.first) }
40
+ # end
41
+
42
+ ####
43
+
44
+ def this_gc(str)
45
+ Sequence.new(str).gc
46
+ end
47
+
48
+ def bioruby_gc(str)
49
+ Bio::Sequence::NA.new(str).gc_content
50
+ end
51
+
52
+ # make a random sequence of given length
53
+ def make_seq(num)
54
+ num.times.reduce('') { |str, n| str << %w[A a C c T t G g N n].sample }
55
+ end
56
+
57
+ s1 = make_seq(2000000)
58
+ s2 = make_seq(4000000)
59
+ s3 = make_seq(8000000)
60
+
61
+ Benchmark.bmbm do |x|
62
+ x.report('this_gc 1') { this_gc(s1) }
63
+ x.report('bioruby_gc 1') { bioruby_gc(s1) }
64
+
65
+ x.report('this_gc 2') { this_gc(s2) }
66
+ x.report('bioruby_gc 2') { bioruby_gc(s2) }
67
+
68
+ x.report('this_gc 3') { this_gc(s3) }
69
+ x.report('bioruby_gc 3') { bioruby_gc(s3) }
70
+ end
@@ -0,0 +1,9 @@
1
+ >seq1 is fun
2
+ AACTGGend
3
+ >seq2
4
+ AAT
5
+ CCTGend
6
+ >seq3
7
+ yyyyyyyyyy
8
+ yyyyy
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-31 00:00:00.000000000 Z
11
+ date: 2014-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,6 +38,48 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bio
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
41
83
  description: So you want to parse a fasta file...
42
84
  email:
43
85
  - moorer@udel.edu
@@ -46,14 +88,38 @@ extensions: []
46
88
  extra_rdoc_files: []
47
89
  files:
48
90
  - ".gitignore"
91
+ - COPYING
49
92
  - Gemfile
50
- - LICENSE.txt
51
93
  - README.md
52
94
  - Rakefile
53
- - benchmark.rb
95
+ - doc/FastaFile.html
96
+ - doc/File.html
97
+ - doc/ParseFasta.html
98
+ - doc/Sequence.html
99
+ - doc/_index.html
100
+ - doc/class_list.html
101
+ - doc/css/common.css
102
+ - doc/css/full_list.css
103
+ - doc/css/style.css
104
+ - doc/file.README.html
105
+ - doc/file_list.html
106
+ - doc/frames.html
107
+ - doc/index.html
108
+ - doc/js/app.js
109
+ - doc/js/full_list.js
110
+ - doc/js/jquery.js
111
+ - doc/method_list.html
112
+ - doc/top-level-namespace.html
54
113
  - lib/parse_fasta.rb
114
+ - lib/parse_fasta/fasta_file.rb
115
+ - lib/parse_fasta/sequence.rb
55
116
  - lib/parse_fasta/version.rb
56
117
  - parse_fasta.gemspec
118
+ - spec/lib/fasta_file_spec.rb
119
+ - spec/lib/sequence_spec.rb
120
+ - spec/spec_helper.rb
121
+ - test_files/benchmark.rb
122
+ - test_files/test.fa
57
123
  homepage: https://github.com/mooreryan/parse_fasta
58
124
  licenses:
59
125
  - 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
@@ -78,4 +144,8 @@ rubygems_version: 2.2.2
78
144
  signing_key:
79
145
  specification_version: 4
80
146
  summary: Easy-peasy parsing of fasta files
81
- test_files: []
147
+ test_files:
148
+ - spec/lib/fasta_file_spec.rb
149
+ - spec/lib/sequence_spec.rb
150
+ - spec/spec_helper.rb
151
+ has_rdoc:
data/LICENSE.txt DELETED
@@ -1,22 +0,0 @@
1
- Copyright (c) 2014 Ryan Moore
2
-
3
- MIT License
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/benchmark.rb DELETED
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'parse_fasta'
4
- require 'bio'
5
- require 'benchmark'
6
-
7
- def parse_fasta fname
8
- File.open(fname, 'r').each_record do |header, sequence|
9
- [header, sequence.length].join("\t")
10
- end
11
- end
12
-
13
- def bioruby fname
14
- Bio::FastaFormat.open(fname).each do |entry|
15
- [entry.definition, entry.seq.length].join("\t")
16
- end
17
- end
18
-
19
- Benchmark.bmbm do |x|
20
- x.report('parse_fasta') { parse_fasta(ARGV.first) }
21
- x.report('bioruby') { bioruby(ARGV.first) }
22
- end