lederhosen 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.0.2"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -8,24 +8,31 @@ module Lederhosen
8
8
  class CLI
9
9
 
10
10
  desc "otu_table",
11
- "create an OTU abundance matrix from USEARCH output"
11
+ "create an OTU abundance matrix from USEARCH prefix"
12
12
 
13
13
  method_option :files, :type => :string, :required => true
14
- method_option :output, :type => :string, :required => true
15
- method_option :level, :type => :string, :required => true, :banner => 'valid options: domain, kingdom, phylum, class, order, genus, or species'
14
+
15
+ method_option :prefix, :type => :string, :required => true,
16
+ :banner => 'prefix prefix'
17
+
18
+ method_option :levels, :type => :array, :required => true,
19
+ :banner => 'valid options: domain, kingdom, phylum, class, order, genus, or species (or all of them at once)'
16
20
 
17
21
  def otu_table
18
22
  input = Dir[options[:files]]
19
- output = options[:output]
20
- level = options[:level].downcase
23
+ prefix = options[:prefix]
24
+ levels = options[:levels].map(&:downcase)
21
25
 
22
- ohai "generating #{level} table from #{input.size} file(s) and saving to #{output}."
26
+ ohai "generating #{levels.join(', ')} table(s) from #{input.size} file(s) and saving to prefix #{prefix}."
23
27
 
24
- fail "bad level: #{level}" unless %w{domain phylum class order family genus species kingdom}.include? level
28
+ # sanity check
29
+ levels.each do |level|
30
+ fail "bad level: #{level}" unless %w{domain phylum class order family genus species kingdom}.include? level
31
+ end
25
32
 
26
- sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
33
+ level_sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } }
27
34
 
28
- all_names = Set.new
35
+ all_names = Hash.new { |h, k| h[k] = Set.new }
29
36
  pbar = ProgressBar.new "loading", input.size
30
37
 
31
38
  # Load cluster table
@@ -35,36 +42,41 @@ module Lederhosen
35
42
  handle.each do |line|
36
43
  dat = parse_usearch_line(line.strip)
37
44
  next if dat.nil?
38
- name = dat[level] rescue ohai(dat.inspect)
39
45
 
40
- all_names << name
41
- sample_cluster_count[input_file][name] += 1
46
+ levels.each do |level|
47
+ name = dat[level] rescue nil
48
+ all_names[level] << name
49
+ level_sample_cluster_count[level][input_file][name] += 1
50
+ end
51
+
42
52
  end
43
53
  end
44
54
  end
45
55
 
46
56
  pbar.finish
47
57
 
48
- ohai "found #{all_names.size} unique taxa at #{level} level"
58
+ # save to csv(s)
59
+ levels.each do |level|
60
+
61
+ ohai "saving #{level} table"
49
62
 
50
- # save to csv
51
- File.open(output, 'w') do |handle|
52
- header = all_names.to_a.compact.sort
53
- handle.puts "#{level.capitalize},#{header.join(',')}"
54
- samples = sample_cluster_count.keys.sort
63
+ File.open("#{prefix}.#{level}.csv", 'w') do |handle|
64
+ header = all_names[level].to_a.compact.sort
65
+ handle.puts "#{level.capitalize},#{header.join(',')}"
55
66
 
56
- samples.each do |sample|
57
- handle.print "#{sample}"
58
- header.each do |name|
59
- handle.print ",#{sample_cluster_count[sample][name]}"
67
+ input.each do |sample|
68
+ handle.print "#{sample}"
69
+ header.each do |name|
70
+ handle.print ",#{level_sample_cluster_count[level][sample][name]}"
71
+ end
72
+ handle.print "\n"
60
73
  end
61
- handle.print "\n"
62
74
  end
63
75
  end
64
76
  end
65
77
 
66
78
  no_tasks do
67
- # parse a line of usearch output
79
+ # parse a line of usearch prefix
68
80
  # return a hash in the form:
69
81
  # { :taxonomy => '', :identity => 0.00, ... }
70
82
  # unless the line is not a "hit" in which case
@@ -1,9 +1,9 @@
1
1
  module Lederhosen
2
2
  module Version
3
3
  MAJOR = 1
4
- CODENAME = 'Hefeweizen'
5
- MINOR = 0
6
- PATCH = 2
4
+ MINOR = 1
5
+ CODENAME = 'Apfelstrudel' # changes for minor versions
6
+ PATCH = 0
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/readme.md CHANGED
@@ -7,7 +7,7 @@ Cluster raw Illumina 16S rRNA amplicon data to generate OTUs.
7
7
  - Lederhosen is a project born out of the Triplett Lab at the University of Florida.
8
8
  - Lederhosen is designed to be a fast and simple method of clustering 16S rRNA amplicons sequenced
9
9
  using paired and non-paired end short reads such as those produced by Illumina (GAIIx, HiSeq and MiSeq).
10
- - Lederhosen uses Semantic Versioning.
10
+ - Lederhosen uses [Semantic Versioning](http://semver.org/).
11
11
  - Lederhosen is free and open source under the [MIT open source license](http://opensource.org/licenses/mit-license.php/).
12
12
  - Except for USEARCH which requires a license, Lederhosen is available for commercial use.
13
13
 
data/spec/cli_spec.rb CHANGED
@@ -26,15 +26,14 @@ describe Lederhosen::CLI do
26
26
  `./bin/lederhosen cluster --input #{$test_dir}/trimmed/ILT_L_9_B_001.fasta --database #{$test_dir}/test_db.udb --identity 0.95 --output #{$test_dir}/clusters.uc`
27
27
  end
28
28
 
29
- %w{domain phylum class ORDER Family genus species}.each do |level|
30
- it "should build #{level} abundance matrix" do
31
- `./bin/lederhosen otu_table --files=spec/data/test.uc --output=#{$test_dir}/otu_table.csv --level=#{level}`
32
- $?.success?.should be_true
33
- end
29
+ it 'should build abundance matrices for each level' do
30
+ levels = "domain phylum class order FAMILY genus Species"
31
+ `./bin/lederhosen otu_table --files=spec/data/test.uc --prefix=#{$test_dir}/otu_table --levels=#{levels}`
32
+ $?.success?.should be_true
34
33
  end
35
34
 
36
35
  it 'should filter OTU abundance matrices' do
37
- `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
36
+ `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.species.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
38
37
  $?.success?.should be_true
39
38
  end
40
39
 
metadata CHANGED
@@ -5,9 +5,9 @@ version: !ruby/object:Gem::Version
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
+ - 1
8
9
  - 0
9
- - 2
10
- version: 1.0.2
10
+ version: 1.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson