fasta_util 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/Gemfile.lock CHANGED
File without changes
data/LICENSE.txt CHANGED
File without changes
data/README.rdoc CHANGED
File without changes
data/Rakefile CHANGED
File without changes
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
data/bin/fasta_util CHANGED
@@ -1,4 +1,88 @@
1
1
  #!/usr/bin/env ruby
2
- require 'fasta_util'
2
+ require 'thor'
3
+ require 'bio'
3
4
 
5
+ class FastaUtility < Thor
6
+ include Thor::Actions
7
+ Struct.new("Stats", :sum, :l50, :n50, :count, :mean, :median)
8
+
9
+ no_tasks do
10
+ def stats(lengths)
11
+ lengths = lengths.sort{|a, b| b <=> a}
12
+ stats = Struct::Stats.new
13
+
14
+ temp_sum = 0
15
+ stats[:sum] = lengths.inject(:+)
16
+ stats[:l50] = lengths.find{|length| (temp_sum += length) > stats[:sum]/2.0}
17
+ stats[:n50] = lengths.count{|length| length >= stats[:l50]}
18
+ stats[:mean] = stats[:sum].to_f/lengths.length
19
+ stats[:median] = (lengths.length % 2 == 0) ? (lengths[lengths.length/2-1] + lengths[lengths.length/2])/2.0 : lengths[lengths.length/2]
20
+ stats[:count] = lengths.count
21
+ return stats
22
+ end
23
+
24
+ def format(stats)
25
+ output = []
26
+ buffer_length = stats.members.map{|key| key.length}.max
27
+ stats.each_pair do |key, value|
28
+ numtype = value.is_a?(Float) ? "f" : "d"
29
+ output << " %-#{buffer_length}s: %#{numtype}" % [key.to_s.capitalize, value]
30
+ end
31
+ output.join("\n")
32
+ end
33
+ end
34
+
35
+ desc "filecheck", "Checks to see if a given file exists. Used internally, don't worry about it too much", :hide => true
36
+ def filecheck(filename)
37
+ say "The file '#{filename}' doesn't seem to exist!", :red unless File.exists?(filename)
38
+ end
39
+
40
+ desc "lengths", "Print a set of summary statistics for the given fasta file, including L50, N50, sum and count."
41
+ method_options [:cutoff, '-c'] => 0
42
+ def lengths(filename)
43
+ invoke :filecheck
44
+ lengths = Bio::FlatFile.open(filename).map{|entry| (entry.seq[-1,1] == "*") ? entry.length - 1 : entry.length}
45
+
46
+ say "All entries", :green
47
+ puts format(stats(lengths))
48
+ if options.cutoff > 0
49
+ say "Entries with length >= #{options.cutoff}", :green
50
+ puts format(stats(lengths.find_all{|l| l >= options.cutoff}))
51
+ end
52
+ end
53
+
54
+ desc "filter FILENAME [options]", "Impose a filter or set of filters on entries in a fasta file."
55
+ long_desc "Impose a filter or set of filters on entries in a fasta file where each sequence in the file has to pass all of the filters to be printed."
56
+ method_option :length_cutoff, :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Only entries with length >= cutoff will be returned.'
57
+ method_option :inverse_match, :aliases => '-v', :type => :boolean, :desc => "Return the inverse of the match after all the other filters have been applied."
58
+ method_option :defline_grep, :aliases => '-d', :type => :string, :default => '', :desc => "A regular expression, used to search the entry's definition line."
59
+ def filter(filename)
60
+ invoke :filecheck
61
+ Bio::FlatFile.open(filename).each do |entry|
62
+ passed = true
63
+ passed &&= (entry.length >= options.length_cutoff)
64
+ passed &&= (entry.definition.match(Regexp.new(options.defline_grep)))
65
+ passed = !passed if options.inverse_match
66
+ puts entry if passed
67
+ end
68
+ end
69
+
70
+ desc "clean FILENAME [options]", "Clean up a fasta file"
71
+ method_option :wrap_width, :aliases => '-w', :type => :numeric, :desc => 'Wrap the fasta to N columns'
72
+ def clean(filename)
73
+ invoke :filecheck
74
+ Bio::FlatFile.open(filename).each do |entry|
75
+ puts entry.to_biosequence.output(:fasta, :header => entry.definition, :width => options.wrap_width)
76
+ end
77
+ end
78
+
79
+
80
+ desc "sort FILENAME [options]", "Sorts a fasta file according to criteria"
81
+ def sort(filename)
82
+ invoke :filecheck
83
+ Bio::FlatFile.open(filename).to_a.sort{|a,b| b.length <=> a.length}.each do |entry|
84
+ puts entry
85
+ end
86
+ end
87
+ end
4
88
  FastaUtility.start
data/fasta_util.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{fasta_util}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["robsyme"]
12
- s.date = %q{2011-02-17}
12
+ s.date = %q{2011-02-18}
13
13
  s.default_executable = %q{fasta_util}
14
14
  s.description = %q{Easy fasta filtering, wrapping, calculating common statistics, sorting etc. Based on the fasta_tool script that I think was written by Jason Stajich.}
15
15
  s.email = %q{rob.syme@gmail.com}
@@ -28,7 +28,6 @@ Gem::Specification.new do |s|
28
28
  "VERSION",
29
29
  "bin/fasta_util",
30
30
  "fasta_util.gemspec",
31
- "lib/fasta_util.rb",
32
31
  "test/helper.rb",
33
32
  "test/test_fasta_util.rb"
34
33
  ]
data/test/helper.rb CHANGED
File without changes
File without changes
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: fasta_util
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.1
5
+ version: 0.2.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - robsyme
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-02-17 00:00:00 +08:00
13
+ date: 2011-02-18 00:00:00 +08:00
14
14
  default_executable: fasta_util
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -109,7 +109,6 @@ files:
109
109
  - VERSION
110
110
  - bin/fasta_util
111
111
  - fasta_util.gemspec
112
- - lib/fasta_util.rb
113
112
  - test/helper.rb
114
113
  - test/test_fasta_util.rb
115
114
  has_rdoc: true
@@ -126,7 +125,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
125
  requirements:
127
126
  - - ">="
128
127
  - !ruby/object:Gem::Version
129
- hash: -728010957941595934
128
+ hash: -443172315906729972
130
129
  segments:
131
130
  - 0
132
131
  version: "0"
data/lib/fasta_util.rb DELETED
@@ -1,86 +0,0 @@
1
- require 'thor'
2
- require 'bio'
3
-
4
- class FastaUtility < Thor
5
- include Thor::Actions
6
- Struct.new("Stats", :sum, :l50, :n50, :count, :mean, :median)
7
-
8
- no_tasks do
9
- def stats(lengths)
10
- lengths = lengths.sort{|a, b| b <=> a}
11
- stats = Struct::Stats.new
12
-
13
- temp_sum = 0
14
- stats[:sum] = lengths.inject(:+)
15
- stats[:l50] = lengths.find{|length| (temp_sum += length) > stats[:sum]/2.0}
16
- stats[:n50] = lengths.count{|length| length >= stats[:l50]}
17
- stats[:mean] = stats[:sum].to_f/lengths.length
18
- stats[:median] = (lengths.length % 2 == 0) ? (lengths[lengths.length/2-1] + lengths[lengths.length/2])/2.0 : lengths[lengths.length/2]
19
- stats[:count] = lengths.count
20
- return stats
21
- end
22
-
23
- def format(stats)
24
- output = []
25
- buffer_length = stats.members.map{|key| key.length}.max
26
- stats.each_pair do |key, value|
27
- numtype = value.is_a?(Float) ? "f" : "d"
28
- output << " %-#{buffer_length}s: %#{numtype}" % [key.to_s.capitalize, value]
29
- end
30
- output.join("\n")
31
- end
32
- end
33
-
34
- desc "filecheck", "Checks to see if a given file exists. Used internally, don't worry about it too much", :hide => true
35
- def filecheck(filename)
36
- say "The file '#{filename}' doesn't seem to exist!", :red unless File.exists?(filename)
37
- end
38
-
39
- desc "lengths", "Print a set of summary statistics for the given fasta file, including L50, N50, sum and count."
40
- method_options [:cutoff, '-c'] => 0
41
- def lengths(filename)
42
- invoke :filecheck
43
- lengths = Bio::FlatFile.open(filename).map{|entry| (entry.seq[-1,1] == "*") ? entry.length - 1 : entry.length}
44
-
45
- say "All entries", :green
46
- puts format(stats(lengths))
47
- if options.cutoff > 0
48
- say "Entries with length >= #{options.cutoff}", :green
49
- puts format(stats(lengths.find_all{|l| l >= options.cutoff}))
50
- end
51
- end
52
-
53
- desc "filter FILENAME [options]", "Impose a filter or set of filters on entries in a fasta file."
54
- long_desc "Impose a filter or set of filters on entries in a fasta file where each sequence in the file has to pass all of the filters to be printed."
55
- method_option :length_cutoff, :aliases => '-l', :type => :numeric, :default => 0, :desc => 'Only entries with length >= cutoff will be returned.'
56
- method_option :inverse_match, :aliases => '-v', :type => :boolean, :desc => "Return the inverse of the match after all the other filters have been applied."
57
- method_option :defline_grep, :aliases => '-d', :type => :string, :default => '', :desc => "A regular expression, used to search the entry's definition line."
58
- def filter(filename)
59
- invoke :filecheck
60
- Bio::FlatFile.open(filename).each do |entry|
61
- passed = true
62
- passed &&= (entry.length >= options.length_cutoff)
63
- passed &&= (entry.definition.match(Regexp.new(options.defline_grep)))
64
- passed = !passed if options.inverse_match
65
- puts entry if passed
66
- end
67
- end
68
-
69
- desc "clean FILENAME [options]", "Clean up a fasta file"
70
- method_option :wrap_width, :aliases => '-w', :type => :numeric, :desc => 'Wrap the fasta to N columns'
71
- def clean(filename)
72
- invoke :filecheck
73
- Bio::FlatFile.open(filename).each do |entry|
74
- puts entry.to_biosequence.output(:fasta, :header => entry.definition, :width => options.wrap_width)
75
- end
76
- end
77
-
78
-
79
- desc "sort FILENAME [options]", "Sorts a fasta file according to criteria"
80
- def sort(filename)
81
- invoke :filecheck
82
- Bio::FlatFile.open(filename).to_a.sort{|a,b| b.length <=> a.length}.each do |entry|
83
- puts entry
84
- end
85
- end
86
- end