fasta_util 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/.document CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/Gemfile.lock CHANGED
File without changes
data/LICENSE.txt CHANGED
File without changes
data/README.rdoc CHANGED
File without changes
data/Rakefile CHANGED
File without changes
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
data/bin/fasta_util CHANGED
@@ -1,4 +1,88 @@
1
1
  #!/usr/bin/env ruby
2
- require 'fasta_util'
2
+ require 'thor'
3
+ require 'bio'
3
4
 
5
+ class FastaUtility < Thor
6
+ include Thor::Actions
7
+ Struct.new("Stats", :sum, :l50, :n50, :count, :mean, :median)
8
+
9
+ no_tasks do
10
+ def stats(lengths)
11
+ lengths = lengths.sort{|a, b| b <=> a}
12
+ stats = Struct::Stats.new
13
+
14
+ temp_sum = 0
15
+ stats[:sum] = lengths.inject(:+)
16
+ stats[:l50] = lengths.find{|length| (temp_sum += length) > stats[:sum]/2.0}
17
+ stats[:n50] = lengths.count{|length| length >= stats[:l50]}
18
+ stats[:mean] = stats[:sum].to_f/lengths.length
19
+ stats[:median] = (lengths.length % 2 == 0) ? (lengths[lengths.length/2-1] + lengths[lengths.length/2])/2.0 : lengths[lengths.length/2]
20
+ stats[:count] = lengths.count
21
+ return stats
22
+ end
23
+
24
+ def format(stats)
25
+ output = []
26
+ buffer_length = stats.members.map{|key| key.length}.max
27
+ stats.each_pair do |key, value|
28
+ numtype = value.is_a?(Float) ? "f" : "d"
29
+ output << " %-#{buffer_length}s: %#{numtype}" % [key.to_s.capitalize, value]
30
+ end
31
+ output.join("\n")
32
+ end
33
+ end
34
+
35
+ desc "filecheck", "Checks to see if a given file exists. Used internally, don't worry about it too much", :hide => true
36
+ def filecheck(filename)
37
+ say "The file '#{filename}' doesn't seem to exist!", :red unless File.exists?(filename)
38
+ end
39
+
40
+ desc "lengths", "Print a set of summary statistics for the given fasta file, including L50, N50, sum and count."
41
+ method_options [:cutoff, '-c'] => 0
42
+ def lengths(filename)
43
+ invoke :filecheck
44
+ lengths = Bio::FlatFile.open(filename).map{|entry| (entry.seq[-1,1] == "*") ? entry.length - 1 : entry.length}
45
+
46
+ say "All entries", :green
47
+ puts format(stats(lengths))
48
+ if options.cutoff > 0
49
+ say "Entries with length >= #{options.cutoff}", :green
50
+ puts format(stats(lengths.find_all{|l| l >= options.cutoff}))
51
+ end
52
+ end
53
+
54
+ desc "filter FILENAME [options]", "Impose a filter or set of filters on entries in a fasta file."
55
+ long_desc "Impose a filter or set of filters on entries in a fasta file where each sequence in the file has to pass all of the filters to be printed."
56
+ method_option :length_cutoff, :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Only entries with length >= cutoff will be returned.'
57
+ method_option :inverse_match, :aliases => '-v', :type => :boolean, :desc => "Return the inverse of the match after all the other filters have been applied."
58
+ method_option :defline_grep, :aliases => '-d', :type => :string, :default => '', :desc => "A regular expression, used to search the entry's definition line."
59
+ def filter(filename)
60
+ invoke :filecheck
61
+ Bio::FlatFile.open(filename).each do |entry|
62
+ passed = true
63
+ passed &&= (entry.length >= options.length_cutoff)
64
+ passed &&= (entry.definition.match(Regexp.new(options.defline_grep)))
65
+ passed = !passed if options.inverse_match
66
+ puts entry if passed
67
+ end
68
+ end
69
+
70
+ desc "clean FILENAME [options]", "Clean up a fasta file"
71
+ method_option :wrap_width, :aliases => '-w', :type => :numeric, :desc => 'Wrap the fasta to N columns'
72
+ def clean(filename)
73
+ invoke :filecheck
74
+ Bio::FlatFile.open(filename).each do |entry|
75
+ puts entry.to_biosequence.output(:fasta, :header => entry.definition, :width => options.wrap_width)
76
+ end
77
+ end
78
+
79
+
80
+ desc "sort FILENAME [options]", "Sorts a fasta file according to criteria"
81
+ def sort(filename)
82
+ invoke :filecheck
83
+ Bio::FlatFile.open(filename).to_a.sort{|a,b| b.length <=> a.length}.each do |entry|
84
+ puts entry
85
+ end
86
+ end
87
+ end
4
88
  FastaUtility.start
data/fasta_util.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{fasta_util}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["robsyme"]
12
- s.date = %q{2011-02-17}
12
+ s.date = %q{2011-02-18}
13
13
  s.default_executable = %q{fasta_util}
14
14
  s.description = %q{Easy fasta filtering, wrapping, calculating common statistics, sorting etc. Based on the fasta_tool script that I think was written by Jason Stajich.}
15
15
  s.email = %q{rob.syme@gmail.com}
@@ -28,7 +28,6 @@ Gem::Specification.new do |s|
28
28
  "VERSION",
29
29
  "bin/fasta_util",
30
30
  "fasta_util.gemspec",
31
- "lib/fasta_util.rb",
32
31
  "test/helper.rb",
33
32
  "test/test_fasta_util.rb"
34
33
  ]
data/test/helper.rb CHANGED
File without changes
File without changes
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: fasta_util
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.1
5
+ version: 0.2.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - robsyme
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-02-17 00:00:00 +08:00
13
+ date: 2011-02-18 00:00:00 +08:00
14
14
  default_executable: fasta_util
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -109,7 +109,6 @@ files:
109
109
  - VERSION
110
110
  - bin/fasta_util
111
111
  - fasta_util.gemspec
112
- - lib/fasta_util.rb
113
112
  - test/helper.rb
114
113
  - test/test_fasta_util.rb
115
114
  has_rdoc: true
@@ -126,7 +125,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
125
  requirements:
127
126
  - - ">="
128
127
  - !ruby/object:Gem::Version
129
- hash: -728010957941595934
128
+ hash: -443172315906729972
130
129
  segments:
131
130
  - 0
132
131
  version: "0"
data/lib/fasta_util.rb DELETED
@@ -1,86 +0,0 @@
1
- require 'thor'
2
- require 'bio'
3
-
4
- class FastaUtility < Thor
5
- include Thor::Actions
6
- Struct.new("Stats", :sum, :l50, :n50, :count, :mean, :median)
7
-
8
- no_tasks do
9
- def stats(lengths)
10
- lengths = lengths.sort{|a, b| b <=> a}
11
- stats = Struct::Stats.new
12
-
13
- temp_sum = 0
14
- stats[:sum] = lengths.inject(:+)
15
- stats[:l50] = lengths.find{|length| (temp_sum += length) > stats[:sum]/2.0}
16
- stats[:n50] = lengths.count{|length| length >= stats[:l50]}
17
- stats[:mean] = stats[:sum].to_f/lengths.length
18
- stats[:median] = (lengths.length % 2 == 0) ? (lengths[lengths.length/2-1] + lengths[lengths.length/2])/2.0 : lengths[lengths.length/2]
19
- stats[:count] = lengths.count
20
- return stats
21
- end
22
-
23
- def format(stats)
24
- output = []
25
- buffer_length = stats.members.map{|key| key.length}.max
26
- stats.each_pair do |key, value|
27
- numtype = value.is_a?(Float) ? "f" : "d"
28
- output << " %-#{buffer_length}s: %#{numtype}" % [key.to_s.capitalize, value]
29
- end
30
- output.join("\n")
31
- end
32
- end
33
-
34
- desc "filecheck", "Checks to see if a given file exists. Used internally, don't worry about it too much", :hide => true
35
- def filecheck(filename)
36
- say "The file '#{filename}' doesn't seem to exist!", :red unless File.exists?(filename)
37
- end
38
-
39
- desc "lengths", "Print a set of summary statistics for the given fasta file, including L50, N50, sum and count."
40
- method_options [:cutoff, '-c'] => 0
41
- def lengths(filename)
42
- invoke :filecheck
43
- lengths = Bio::FlatFile.open(filename).map{|entry| (entry.seq[-1,1] == "*") ? entry.length - 1 : entry.length}
44
-
45
- say "All entries", :green
46
- puts format(stats(lengths))
47
- if options.cutoff > 0
48
- say "Entries with length >= #{options.cutoff}", :green
49
- puts format(stats(lengths.find_all{|l| l >= options.cutoff}))
50
- end
51
- end
52
-
53
- desc "filter FILENAME [options]", "Impose a filter or set of filters on entries in a fasta file."
54
- long_desc "Impose a filter or set of filters on entries in a fasta file where each sequence in the file has to pass all of the filters to be printed."
55
- method_option :length_cutoff, :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Only entries with length >= cutoff will be returned.'
56
- method_option :inverse_match, :aliases => '-v', :type => :boolean, :desc => "Return the inverse of the match after all the other filters have been applied."
57
- method_option :defline_grep, :aliases => '-d', :type => :string, :default => '', :desc => "A regular expression, used to search the entry's definition line."
58
- def filter(filename)
59
- invoke :filecheck
60
- Bio::FlatFile.open(filename).each do |entry|
61
- passed = true
62
- passed &&= (entry.length >= options.length_cutoff)
63
- passed &&= (entry.definition.match(Regexp.new(options.defline_grep)))
64
- passed = !passed if options.inverse_match
65
- puts entry if passed
66
- end
67
- end
68
-
69
- desc "clean FILENAME [options]", "Clean up a fasta file"
70
- method_option :wrap_width, :aliases => '-w', :type => :numeric, :desc => 'Wrap the fasta to N columns'
71
- def clean(filename)
72
- invoke :filecheck
73
- Bio::FlatFile.open(filename).each do |entry|
74
- puts entry.to_biosequence.output(:fasta, :header => entry.definition, :width => options.wrap_width)
75
- end
76
- end
77
-
78
-
79
- desc "sort FILENAME [options]", "Sorts a fasta file according to criteria"
80
- def sort(filename)
81
- invoke :filecheck
82
- Bio::FlatFile.open(filename).to_a.sort{|a,b| b.length <=> a.length}.each do |entry|
83
- puts entry
84
- end
85
- end
86
- end