lederhosen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.1.0"
8
+ s.version = "1.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
27
27
  "lib/lederhosen.rb",
28
28
  "lib/lederhosen/cli.rb",
29
29
  "lib/lederhosen/tasks/cluster.rb",
30
+ "lib/lederhosen/tasks/join_otu_tables.rb",
30
31
  "lib/lederhosen/tasks/make_udb.rb",
31
32
  "lib/lederhosen/tasks/otu_filter.rb",
32
33
  "lib/lederhosen/tasks/otu_table.rb",
@@ -0,0 +1,59 @@
1
+ require 'set'
2
+
3
+ module Lederhosen
4
+
5
+ class CLI
6
+
7
+ desc 'join_otu_tables', 'combine multiple otu tables'
8
+
9
+ method_option :input, :type => :string, :required => true
10
+ method_option :output, :type => :string, :required => true
11
+
12
+ def join_otu_tables
13
+
14
+ input = Dir[options[:input]]
15
+ output = options[:output]
16
+
17
+ ohai "combining #{input.size} file(s) and saving to #{output}"
18
+
19
+ all_otu_names = Set.new
20
+ all_samples = Set.new
21
+
22
+ sample_name_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
23
+
24
+ # read all of the csv files
25
+ input.each do |input_file|
26
+ File.open(input_file) do |handle|
27
+ otu_names = handle.gets.strip.split(',')[1..-1]
28
+ all_otu_names += otu_names.to_set
29
+
30
+ handle.each do |line|
31
+ line = line.strip.split(',')
32
+ sample = File.basename(input_file)
33
+ all_samples << sample
34
+ read_counts = line[1..-1]
35
+ otu_names.zip(read_counts) do |name, count|
36
+ sample_name_count[sample][name] = count
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ # save to csv
43
+ File.open(output, 'w') do |handle|
44
+ header = all_otu_names.to_a.sort
45
+ handle.puts "-,#{header.join(',')}"
46
+
47
+ all_samples.to_a.sort.each do |sample|
48
+ handle.print "#{sample}"
49
+ header.each do |name|
50
+ handle.print ",#{sample_name_count[sample][name]}"
51
+ end
52
+ handle.print "\n"
53
+ end
54
+ end
55
+
56
+
57
+ end
58
+ end
59
+ end
@@ -33,13 +33,19 @@ module Lederhosen
33
33
  level_sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } }
34
34
 
35
35
  all_names = Hash.new { |h, k| h[k] = Set.new }
36
- pbar = ProgressBar.new "loading", input.size
36
+
37
+ # create a progress bar with the total number of bytes of
38
+ # the files we're slurping up
39
+ pbar = ProgressBar.new "loading", input.map{ |x| File.size(x) }.reduce(&:+)
37
40
 
38
41
  # Load cluster table
39
42
  input.each do |input_file|
40
- pbar.inc
41
43
  File.open(input_file) do |handle|
42
44
  handle.each do |line|
45
+
46
+ # increase progressbar by the number of bytes in each line
47
+ pbar.inc line.unpack('*C').size
48
+
43
49
  dat = parse_usearch_line(line.strip)
44
50
  next if dat.nil?
45
51
 
@@ -3,7 +3,7 @@ module Lederhosen
3
3
  MAJOR = 1
4
4
  MINOR = 1
5
5
  CODENAME = 'Apfelstrudel' # changes for minor versions
6
- PATCH = 0
6
+ PATCH = 1
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/readme.md CHANGED
@@ -54,10 +54,12 @@ Cluster reads using USEARCH. Output is a uc file.
54
54
 
55
55
  lederhosen cluster --input=trimmed/*.fasta --identity=0.95 --output=clusters_95.uc --database=taxcollector.udb
56
56
 
57
- ### Generate OTU tables
57
+ ### Generate OTU table(s)
58
58
 
59
59
  Create an OTU abundance table where rows are samples and columns are clusters. The entries are the number of reads for that cluster in a sample.
60
60
 
61
- lederhosen otu_table --clusters=clusters_95.uc --output=genus.csv --level=genus
61
+ lederhosen otu_table --clusters=clusters_95.uc --prefix=otu_table --level=domain phylum class order family genus species
62
62
 
63
- Level can be Kingdom, Domain, Phylum, Class, Order, Family or Genus.
63
+ This will create the files:
64
+
65
+ otu_table.domain.csv, ..., otu_table.species.csv
data/spec/cli_spec.rb CHANGED
@@ -37,6 +37,11 @@ describe Lederhosen::CLI do
37
37
  $?.success?.should be_true
38
38
  end
39
39
 
40
+ it 'should combine OTU abundance matrices' do
41
+ `./bin/lederhosen join_otu_tables --input=#{$test_dir}/otu_table*.csv --output=#{$test_dir}/merged.csv`
42
+ $?.success?.should be_true
43
+ end
44
+
40
45
  it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
41
46
  `./bin/lederhosen split_fasta --input=#{$test_dir}/trimmed/ILT_L_9_B_001.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
42
47
  $?.success?.should be_true
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 0
10
- version: 1.1.0
9
+ - 1
10
+ version: 1.1.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -123,6 +123,7 @@ files:
123
123
  - lib/lederhosen.rb
124
124
  - lib/lederhosen/cli.rb
125
125
  - lib/lederhosen/tasks/cluster.rb
126
+ - lib/lederhosen/tasks/join_otu_tables.rb
126
127
  - lib/lederhosen/tasks/make_udb.rb
127
128
  - lib/lederhosen/tasks/otu_filter.rb
128
129
  - lib/lederhosen/tasks/otu_table.rb