parse_fasta 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +58 -0
- data/Rakefile +2 -0
- data/benchmark.rb +22 -0
- data/lib/parse_fasta/version.rb +3 -0
- data/lib/parse_fasta.rb +3 -0
- data/parse_fasta.gemspec +23 -0
- metadata +44 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d0570f761a150d332eb2cdae3d1557b13c2e5ee
|
4
|
+
data.tar.gz: 6c7704803d913f27ed40b99ef645d81b34de2f6e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe9d3b8b4b56b821b1d1936b9c9cfe1cccc6b7991bb5cddb0bf030db2a74b168999cea17369781b78e0f81720af50cb27f7d26e4d048fad22926f1eacf28edf2
|
7
|
+
data.tar.gz: bbd86ee5d54b24e7b759496cbd636239621ebac23f8616fdf06703a500b9d78b4353fc4b2e72f4b6d01adf462af48b29b3ca319e0ad8f8cd1b0ee9aaa7ffa9ee
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Ryan Moore
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# parse_fasta #
|
2
|
+
|
3
|
+
So you want to parse a fasta file...
|
4
|
+
|
5
|
+
## Installation ##
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'parse_fasta'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install parse_fasta
|
18
|
+
|
19
|
+
## Overview ##
|
20
|
+
|
21
|
+
Provides the method `#each_record` for the `File` class.
|
22
|
+
|
23
|
+
each_record { |header, sequence| block }
|
24
|
+
|
25
|
+
The whole file is not loaded into memory, so have no fear of giant
|
26
|
+
fasta files!
|
27
|
+
|
28
|
+
## Usage ##
|
29
|
+
|
30
|
+
An example that lists the length for each sequence.
|
31
|
+
|
32
|
+
require 'parse_fasta'
|
33
|
+
|
34
|
+
File.open(ARGV.first, 'r').each_record do |header, sequence|
|
35
|
+
puts [header, sequence.length].join("\t")
|
36
|
+
end
|
37
|
+
|
38
|
+
## Benchmark ##
|
39
|
+
|
40
|
+
Just for fun, I wanted to compare the execution time to that of
|
41
|
+
BioRuby. I calculated sequence length for each fasta record with both
|
42
|
+
the `each_record` method from this gem and using the `FastaFormat`
|
43
|
+
class from BioRuby. You can see the test script in `benchmark.rb`.
|
44
|
+
|
45
|
+
The test file contained 2,009,897 illumina reads and the file size
|
46
|
+
was 1.1 gigabytes. Here are the results from Ruby's `Benchmark` class:
|
47
|
+
|
48
|
+
user system total real
|
49
|
+
parse_fasta 64.530000 1.740000 66.270000 ( 67.081502)
|
50
|
+
bioruby 116.250000 2.260000 118.510000 (120.223710)
|
51
|
+
|
52
|
+
I just wanted a nice, clean way to parse fasta files, but being nearly
|
53
|
+
twice as fasta as BioRuby doesn't hurt either!
|
54
|
+
|
55
|
+
## Notes ##
|
56
|
+
|
57
|
+
Currently in doesn't check whether your file is actually a fasta file
|
58
|
+
or anything, so watch out.
|
data/Rakefile
ADDED
data/benchmark.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'parse_fasta'
|
4
|
+
require 'bio'
|
5
|
+
require 'benchmark'
|
6
|
+
|
7
|
+
def parse_fasta fname
|
8
|
+
File.open(fname, 'r').each_record do |header, sequence|
|
9
|
+
[header, sequence.length].join("\t")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def bioruby fname
|
14
|
+
Bio::FastaFormat.open(fname).each do |entry|
|
15
|
+
[entry.definition, entry.seq.length].join("\t")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
Benchmark.bmbm do |x|
|
20
|
+
x.report('parse_fasta') { parse_fasta(ARGV.first) }
|
21
|
+
x.report('bioruby') { bioruby(ARGV.first) }
|
22
|
+
end
|
data/lib/parse_fasta.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# Copyright 2014 Ryan Moore
|
2
|
+
# Contact: moorer@udel.edu
|
2
3
|
|
3
4
|
# This program is free software: you can redistribute it and/or modify
|
4
5
|
# it under the terms of the GNU General Public License as published by
|
@@ -13,6 +14,8 @@
|
|
13
14
|
# You should have received a copy of the GNU General Public License
|
14
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
16
|
|
17
|
+
require "parse_fasta/version"
|
18
|
+
|
16
19
|
class File
|
17
20
|
def each_record
|
18
21
|
self.each("\n>") do |line|
|
data/parse_fasta.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'parse_fasta/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "parse_fasta"
|
8
|
+
spec.version = ParseFasta::VERSION
|
9
|
+
spec.authors = ["Ryan Moore"]
|
10
|
+
spec.email = ["moorer@udel.edu"]
|
11
|
+
spec.summary = %q{Easy-peasy parsing of fasta files}
|
12
|
+
spec.description = %q{So you want to parse a fasta file...}
|
13
|
+
spec.homepage = "https://github.com/mooreryan/parse_fasta"
|
14
|
+
spec.license = "GPLv3: http://www.gnu.org/licenses/gpl.txt"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
metadata
CHANGED
@@ -1,25 +1,62 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
12
|
-
dependencies:
|
13
|
-
|
14
|
-
|
11
|
+
date: 2014-05-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: So you want to parse a fasta file...
|
42
|
+
email:
|
43
|
+
- moorer@udel.edu
|
15
44
|
executables: []
|
16
45
|
extensions: []
|
17
46
|
extra_rdoc_files: []
|
18
47
|
files:
|
48
|
+
- ".gitignore"
|
49
|
+
- Gemfile
|
50
|
+
- LICENSE.txt
|
51
|
+
- README.md
|
52
|
+
- Rakefile
|
53
|
+
- benchmark.rb
|
19
54
|
- lib/parse_fasta.rb
|
55
|
+
- lib/parse_fasta/version.rb
|
56
|
+
- parse_fasta.gemspec
|
20
57
|
homepage: https://github.com/mooreryan/parse_fasta
|
21
58
|
licenses:
|
22
|
-
- http://www.gnu.org/licenses/gpl.txt
|
59
|
+
- 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
|
23
60
|
metadata: {}
|
24
61
|
post_install_message:
|
25
62
|
rdoc_options: []
|
@@ -40,5 +77,5 @@ rubyforge_project:
|
|
40
77
|
rubygems_version: 2.2.2
|
41
78
|
signing_key:
|
42
79
|
specification_version: 4
|
43
|
-
summary:
|
80
|
+
summary: Easy-peasy parsing of fasta files
|
44
81
|
test_files: []
|