fasta_util 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.5.2"
12
+ gem "rcov", ">= 0"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,20 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ git (1.2.5)
5
+ jeweler (1.5.2)
6
+ bundler (~> 1.0.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ rake (0.8.7)
10
+ rcov (0.9.9)
11
+ shoulda (2.11.3)
12
+
13
+ PLATFORMS
14
+ ruby
15
+
16
+ DEPENDENCIES
17
+ bundler (~> 1.0.0)
18
+ jeweler (~> 1.5.2)
19
+ rcov
20
+ shoulda
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 robsyme
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = fasta_util
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to fasta_util
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2011 robsyme. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "fasta_util"
16
+ gem.homepage = "http://github.com/robsyme/fasta_util"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{A simple tool for performing common tasks with fasta files.}
19
+ gem.description = %Q{Easy fasta filtering, wrapping, calculating common statistics, sorting etc. Based on the fasta_tool script that I think was written by Jason Stajich.}
20
+ gem.email = "rob.syme@gmail.com"
21
+ gem.authors = ["robsyme"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ gem.add_runtime_dependency 'bio', '>= 1.4.1'
25
+ gem.add_runtime_dependency 'thor', '>= 0.14.6'
26
+ gem.add_development_dependency 'rspec', '>= 1.2.3'
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rake/testtask'
31
+ Rake::TestTask.new(:test) do |test|
32
+ test.libs << 'lib' << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+
37
+ require 'rcov/rcovtask'
38
+ Rcov::RcovTask.new do |test|
39
+ test.libs << 'test'
40
+ test.pattern = 'test/**/test_*.rb'
41
+ test.verbose = true
42
+ end
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
49
+
50
+ rdoc.rdoc_dir = 'rdoc'
51
+ rdoc.title = "fasta_util #{version}"
52
+ rdoc.rdoc_files.include('README*')
53
+ rdoc.rdoc_files.include('lib/**/*.rb')
54
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
data/bin/fasta_util ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'fasta_util'
3
+
4
+ FastaUtility.start
@@ -0,0 +1,75 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{fasta_util}
8
+ s.version = "0.2.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["robsyme"]
12
+ s.date = %q{2011-02-17}
13
+ s.default_executable = %q{fasta_util}
14
+ s.description = %q{Easy fasta filtering, wrapping, calculating common statistics, sorting etc. Based on the fasta_tool script that I think was written by Jason Stajich.}
15
+ s.email = %q{rob.syme@gmail.com}
16
+ s.executables = ["fasta_util"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE.txt",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ "Gemfile",
24
+ "Gemfile.lock",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "bin/fasta_util",
30
+ "fasta_util.gemspec",
31
+ "lib/fasta_util.rb",
32
+ "test/helper.rb",
33
+ "test/test_fasta_util.rb"
34
+ ]
35
+ s.homepage = %q{http://github.com/robsyme/fasta_util}
36
+ s.licenses = ["MIT"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.5.0}
39
+ s.summary = %q{A simple tool for performing common tasks with fasta files.}
40
+ s.test_files = [
41
+ "test/helper.rb",
42
+ "test/test_fasta_util.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
50
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
51
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
52
+ s.add_development_dependency(%q<rcov>, [">= 0"])
53
+ s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
54
+ s.add_runtime_dependency(%q<thor>, [">= 0.14.6"])
55
+ s.add_development_dependency(%q<rspec>, [">= 1.2.3"])
56
+ else
57
+ s.add_dependency(%q<shoulda>, [">= 0"])
58
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
59
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
60
+ s.add_dependency(%q<rcov>, [">= 0"])
61
+ s.add_dependency(%q<bio>, [">= 1.4.1"])
62
+ s.add_dependency(%q<thor>, [">= 0.14.6"])
63
+ s.add_dependency(%q<rspec>, [">= 1.2.3"])
64
+ end
65
+ else
66
+ s.add_dependency(%q<shoulda>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
68
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
69
+ s.add_dependency(%q<rcov>, [">= 0"])
70
+ s.add_dependency(%q<bio>, [">= 1.4.1"])
71
+ s.add_dependency(%q<thor>, [">= 0.14.6"])
72
+ s.add_dependency(%q<rspec>, [">= 1.2.3"])
73
+ end
74
+ end
75
+
data/lib/fasta_util.rb ADDED
@@ -0,0 +1,86 @@
1
+ require 'thor'
2
+ require 'bio'
3
+
4
+ class FastaUtility < Thor
5
+ include Thor::Actions
6
+ Struct.new("Stats", :sum, :l50, :n50, :count, :mean, :median)
7
+
8
+ no_tasks do
9
+ def stats(lengths)
10
+ lengths = lengths.sort{|a, b| b <=> a}
11
+ stats = Struct::Stats.new
12
+
13
+ temp_sum = 0
14
+ stats[:sum] = lengths.inject(:+)
15
+ stats[:l50] = lengths.find{|length| (temp_sum += length) > stats[:sum]/2.0}
16
+ stats[:n50] = lengths.count{|length| length >= stats[:l50]}
17
+ stats[:mean] = stats[:sum].to_f/lengths.length
18
+ stats[:median] = (lengths.length % 2 == 0) ? (lengths[lengths.length/2-1] + lengths[lengths.length/2])/2.0 : lengths[lengths.length/2]
19
+ stats[:count] = lengths.count
20
+ return stats
21
+ end
22
+
23
+ def format(stats)
24
+ output = []
25
+ buffer_length = stats.members.map{|key| key.length}.max
26
+ stats.each_pair do |key, value|
27
+ numtype = value.is_a?(Float) ? "f" : "d"
28
+ output << " %-#{buffer_length}s: %#{numtype}" % [key.to_s.capitalize, value]
29
+ end
30
+ output.join("\n")
31
+ end
32
+ end
33
+
34
+ desc "filecheck", "Checks to see if a given file exists. Used internally, don't worry about it too much", :hide => true
35
+ def filecheck(filename)
36
+ say "The file '#{filename}' doesn't seem to exist!", :red unless File.exists?(filename)
37
+ end
38
+
39
+ desc "lengths", "Print a set of summary statistics for the given fasta file, including L50, N50, sum and count."
40
+ method_options [:cutoff, '-c'] => 0
41
+ def lengths(filename)
42
+ invoke :filecheck
43
+ lengths = Bio::FlatFile.open(filename).map{|entry| (entry.seq[-1,1] == "*") ? entry.length - 1 : entry.length}
44
+
45
+ say "All entries", :green
46
+ puts format(stats(lengths))
47
+ if options.cutoff > 0
48
+ say "Entries with length >= #{options.cutoff}", :green
49
+ puts format(stats(lengths.find_all{|l| l >= options.cutoff}))
50
+ end
51
+ end
52
+
53
+ desc "filter FILENAME [options]", "Impose a filter or set of filters on entries in a fasta file."
54
+ long_desc "Impose a filter or set of filters on entries in a fasta file where each sequence in the file has to pass all of the filters to be printed."
55
+ method_option :length_cutoff, :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Only entries with length >= cutoff will be returned.'
56
+ method_option :inverse_match, :aliases => '-v', :type => :boolean, :desc => "Return the inverse of the match after all the other filters have been applied."
57
+ method_option :defline_grep, :aliases => '-d', :type => :string, :default => '', :desc => "A regular expression, used to search the entry's definition line."
58
+ def filter(filename)
59
+ invoke :filecheck
60
+ Bio::FlatFile.open(filename).each do |entry|
61
+ passed = true
62
+ passed &&= (entry.length >= options.length_cutoff)
63
+ passed &&= (entry.definition.match(Regexp.new(options.defline_grep)))
64
+ passed = !passed if options.inverse_match
65
+ puts entry if passed
66
+ end
67
+ end
68
+
69
+ desc "clean FILENAME [options]", "Clean up a fasta file"
70
+ method_option :wrap_width, :aliases => '-w', :type => :numeric, :desc => 'Wrap the fasta to N columns'
71
+ def clean(filename)
72
+ invoke :filecheck
73
+ Bio::FlatFile.open(filename).each do |entry|
74
+ puts entry.to_biosequence.output(:fasta, :header => entry.definition, :width => options.wrap_width)
75
+ end
76
+ end
77
+
78
+
79
+ desc "sort FILENAME [options]", "Sorts a fasta file according to criteria"
80
+ def sort(filename)
81
+ invoke :filecheck
82
+ Bio::FlatFile.open(filename).to_a.sort{|a,b| b.length <=> a.length}.each do |entry|
83
+ puts entry
84
+ end
85
+ end
86
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'fasta_util'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestFastaUtil < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fasta_util
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.2.0
6
+ platform: ruby
7
+ authors:
8
+ - robsyme
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-02-17 00:00:00 +08:00
14
+ default_executable: fasta_util
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: shoulda
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :development
25
+ prerelease: false
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ~>
33
+ - !ruby/object:Gem::Version
34
+ version: 1.0.0
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: jeweler
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.5.2
46
+ type: :development
47
+ prerelease: false
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: rcov
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :development
58
+ prerelease: false
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: bio
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 1.4.1
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: thor
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 0.14.6
79
+ type: :runtime
80
+ prerelease: false
81
+ version_requirements: *id006
82
+ - !ruby/object:Gem::Dependency
83
+ name: rspec
84
+ requirement: &id007 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: 1.2.3
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: *id007
93
+ description: Easy fasta filtering, wrapping, calculating common statistics, sorting etc. Based on the fasta_tool script that I think was written by Jason Stajich.
94
+ email: rob.syme@gmail.com
95
+ executables:
96
+ - fasta_util
97
+ extensions: []
98
+
99
+ extra_rdoc_files:
100
+ - LICENSE.txt
101
+ - README.rdoc
102
+ files:
103
+ - .document
104
+ - Gemfile
105
+ - Gemfile.lock
106
+ - LICENSE.txt
107
+ - README.rdoc
108
+ - Rakefile
109
+ - VERSION
110
+ - bin/fasta_util
111
+ - fasta_util.gemspec
112
+ - lib/fasta_util.rb
113
+ - test/helper.rb
114
+ - test/test_fasta_util.rb
115
+ has_rdoc: true
116
+ homepage: http://github.com/robsyme/fasta_util
117
+ licenses:
118
+ - MIT
119
+ post_install_message:
120
+ rdoc_options: []
121
+
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ hash: -4274301518574144620
130
+ segments:
131
+ - 0
132
+ version: "0"
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: "0"
139
+ requirements: []
140
+
141
+ rubyforge_project:
142
+ rubygems_version: 1.5.0
143
+ signing_key:
144
+ specification_version: 3
145
+ summary: A simple tool for performing common tasks with fasta files.
146
+ test_files:
147
+ - test/helper.rb
148
+ - test/test_fasta_util.rb