lederhosen 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lederhosen.gemspec +2 -1
- data/lib/lederhosen/tasks/join_otu_tables.rb +59 -0
- data/lib/lederhosen/tasks/otu_table.rb +8 -2
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +5 -3
- data/spec/cli_spec.rb +5 -0
- metadata +4 -3
data/lederhosen.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "1.1.
|
8
|
+
s.version = "1.1.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"lib/lederhosen.rb",
|
28
28
|
"lib/lederhosen/cli.rb",
|
29
29
|
"lib/lederhosen/tasks/cluster.rb",
|
30
|
+
"lib/lederhosen/tasks/join_otu_tables.rb",
|
30
31
|
"lib/lederhosen/tasks/make_udb.rb",
|
31
32
|
"lib/lederhosen/tasks/otu_filter.rb",
|
32
33
|
"lib/lederhosen/tasks/otu_table.rb",
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Lederhosen
|
4
|
+
|
5
|
+
class CLI
|
6
|
+
|
7
|
+
desc 'join_otu_tables', 'combine multiple otu tables'
|
8
|
+
|
9
|
+
method_option :input, :type => :string, :required => true
|
10
|
+
method_option :output, :type => :string, :required => true
|
11
|
+
|
12
|
+
def join_otu_tables
|
13
|
+
|
14
|
+
input = Dir[options[:input]]
|
15
|
+
output = options[:output]
|
16
|
+
|
17
|
+
ohai "combining #{input.size} file(s) and saving to #{output}"
|
18
|
+
|
19
|
+
all_otu_names = Set.new
|
20
|
+
all_samples = Set.new
|
21
|
+
|
22
|
+
sample_name_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
|
23
|
+
|
24
|
+
# read all of the csv files
|
25
|
+
input.each do |input_file|
|
26
|
+
File.open(input_file) do |handle|
|
27
|
+
otu_names = handle.gets.strip.split(',')[1..-1]
|
28
|
+
all_otu_names += otu_names.to_set
|
29
|
+
|
30
|
+
handle.each do |line|
|
31
|
+
line = line.strip.split(',')
|
32
|
+
sample = File.basename(input_file)
|
33
|
+
all_samples << sample
|
34
|
+
read_counts = line[1..-1]
|
35
|
+
otu_names.zip(read_counts) do |name, count|
|
36
|
+
sample_name_count[sample][name] = count
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# save to csv
|
43
|
+
File.open(output, 'w') do |handle|
|
44
|
+
header = all_otu_names.to_a.sort
|
45
|
+
handle.puts "-,#{header.join(',')}"
|
46
|
+
|
47
|
+
all_samples.to_a.sort.each do |sample|
|
48
|
+
handle.print "#{sample}"
|
49
|
+
header.each do |name|
|
50
|
+
handle.print ",#{sample_name_count[sample][name]}"
|
51
|
+
end
|
52
|
+
handle.print "\n"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -33,13 +33,19 @@ module Lederhosen
|
|
33
33
|
level_sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } }
|
34
34
|
|
35
35
|
all_names = Hash.new { |h, k| h[k] = Set.new }
|
36
|
-
|
36
|
+
|
37
|
+
# create a progress bar with the total number of bytes of
|
38
|
+
# the files we're slurping up
|
39
|
+
pbar = ProgressBar.new "loading", input.map{ |x| File.size(x) }.reduce(&:+)
|
37
40
|
|
38
41
|
# Load cluster table
|
39
42
|
input.each do |input_file|
|
40
|
-
pbar.inc
|
41
43
|
File.open(input_file) do |handle|
|
42
44
|
handle.each do |line|
|
45
|
+
|
46
|
+
# increase progressbar by the number of bytes in each line
|
47
|
+
pbar.inc line.unpack('*C').size
|
48
|
+
|
43
49
|
dat = parse_usearch_line(line.strip)
|
44
50
|
next if dat.nil?
|
45
51
|
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -54,10 +54,12 @@ Cluster reads using USEARCH. Output is a uc file.
|
|
54
54
|
|
55
55
|
lederhosen cluster --input=trimmed/*.fasta --identity=0.95 --output=clusters_95.uc --database=taxcollector.udb
|
56
56
|
|
57
|
-
### Generate OTU
|
57
|
+
### Generate OTU table(s)
|
58
58
|
|
59
59
|
Create an OTU abundance table where rows are samples and columns are clusters. The entries are the number of reads for that cluster in a sample.
|
60
60
|
|
61
|
-
lederhosen otu_table --clusters=clusters_95.uc --
|
61
|
+
lederhosen otu_table --clusters=clusters_95.uc --prefix=otu_table --level=domain phylum class order family genus species
|
62
62
|
|
63
|
-
|
63
|
+
This will create the files:
|
64
|
+
|
65
|
+
otu_table.domain.csv, ..., otu_table.species.csv
|
data/spec/cli_spec.rb
CHANGED
@@ -37,6 +37,11 @@ describe Lederhosen::CLI do
|
|
37
37
|
$?.success?.should be_true
|
38
38
|
end
|
39
39
|
|
40
|
+
it 'should combine OTU abundance matrices' do
|
41
|
+
`./bin/lederhosen join_otu_tables --input=#{$test_dir}/otu_table*.csv --output=#{$test_dir}/merged.csv`
|
42
|
+
$?.success?.should be_true
|
43
|
+
end
|
44
|
+
|
40
45
|
it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
|
41
46
|
`./bin/lederhosen split_fasta --input=#{$test_dir}/trimmed/ILT_L_9_B_001.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
|
42
47
|
$?.success?.should be_true
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 1
|
10
|
+
version: 1.1.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -123,6 +123,7 @@ files:
|
|
123
123
|
- lib/lederhosen.rb
|
124
124
|
- lib/lederhosen/cli.rb
|
125
125
|
- lib/lederhosen/tasks/cluster.rb
|
126
|
+
- lib/lederhosen/tasks/join_otu_tables.rb
|
126
127
|
- lib/lederhosen/tasks/make_udb.rb
|
127
128
|
- lib/lederhosen/tasks/otu_filter.rb
|
128
129
|
- lib/lederhosen/tasks/otu_table.rb
|