lederhosen 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lederhosen.gemspec +1 -1
- data/lib/lederhosen/tasks/otu_table.rb +36 -24
- data/lib/lederhosen/version.rb +3 -3
- data/readme.md +1 -1
- data/spec/cli_spec.rb +5 -6
- metadata +2 -2
data/lederhosen.gemspec
CHANGED
@@ -8,24 +8,31 @@ module Lederhosen
|
|
8
8
|
class CLI
|
9
9
|
|
10
10
|
desc "otu_table",
|
11
|
-
"create an OTU abundance matrix from USEARCH
|
11
|
+
"create an OTU abundance matrix from USEARCH prefix"
|
12
12
|
|
13
13
|
method_option :files, :type => :string, :required => true
|
14
|
-
|
15
|
-
method_option :
|
14
|
+
|
15
|
+
method_option :prefix, :type => :string, :required => true,
|
16
|
+
:banner => 'prefix prefix'
|
17
|
+
|
18
|
+
method_option :levels, :type => :array, :required => true,
|
19
|
+
:banner => 'valid options: domain, kingdom, phylum, class, order, genus, or species (or all of them at once)'
|
16
20
|
|
17
21
|
def otu_table
|
18
22
|
input = Dir[options[:files]]
|
19
|
-
|
20
|
-
|
23
|
+
prefix = options[:prefix]
|
24
|
+
levels = options[:levels].map(&:downcase)
|
21
25
|
|
22
|
-
ohai "generating #{
|
26
|
+
ohai "generating #{levels.join(', ')} table(s) from #{input.size} file(s) and saving to prefix #{prefix}."
|
23
27
|
|
24
|
-
|
28
|
+
# sanity check
|
29
|
+
levels.each do |level|
|
30
|
+
fail "bad level: #{level}" unless %w{domain phylum class order family genus species kingdom}.include? level
|
31
|
+
end
|
25
32
|
|
26
|
-
|
33
|
+
level_sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } }
|
27
34
|
|
28
|
-
all_names = Set.new
|
35
|
+
all_names = Hash.new { |h, k| h[k] = Set.new }
|
29
36
|
pbar = ProgressBar.new "loading", input.size
|
30
37
|
|
31
38
|
# Load cluster table
|
@@ -35,36 +42,41 @@ module Lederhosen
|
|
35
42
|
handle.each do |line|
|
36
43
|
dat = parse_usearch_line(line.strip)
|
37
44
|
next if dat.nil?
|
38
|
-
name = dat[level] rescue ohai(dat.inspect)
|
39
45
|
|
40
|
-
|
41
|
-
|
46
|
+
levels.each do |level|
|
47
|
+
name = dat[level] rescue nil
|
48
|
+
all_names[level] << name
|
49
|
+
level_sample_cluster_count[level][input_file][name] += 1
|
50
|
+
end
|
51
|
+
|
42
52
|
end
|
43
53
|
end
|
44
54
|
end
|
45
55
|
|
46
56
|
pbar.finish
|
47
57
|
|
48
|
-
|
58
|
+
# save to csv(s)
|
59
|
+
levels.each do |level|
|
60
|
+
|
61
|
+
ohai "saving #{level} table"
|
49
62
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
handle.puts "#{level.capitalize},#{header.join(',')}"
|
54
|
-
samples = sample_cluster_count.keys.sort
|
63
|
+
File.open("#{prefix}.#{level}.csv", 'w') do |handle|
|
64
|
+
header = all_names[level].to_a.compact.sort
|
65
|
+
handle.puts "#{level.capitalize},#{header.join(',')}"
|
55
66
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
67
|
+
input.each do |sample|
|
68
|
+
handle.print "#{sample}"
|
69
|
+
header.each do |name|
|
70
|
+
handle.print ",#{level_sample_cluster_count[level][sample][name]}"
|
71
|
+
end
|
72
|
+
handle.print "\n"
|
60
73
|
end
|
61
|
-
handle.print "\n"
|
62
74
|
end
|
63
75
|
end
|
64
76
|
end
|
65
77
|
|
66
78
|
no_tasks do
|
67
|
-
# parse a line of usearch
|
79
|
+
# parse a line of usearch prefix
|
68
80
|
# return a hash in the form:
|
69
81
|
# { :taxonomy => '', :identity => 0.00, ... }
|
70
82
|
# unless the line is not a "hit" in which case
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -7,7 +7,7 @@ Cluster raw Illumina 16S rRNA amplicon data to generate OTUs.
|
|
7
7
|
- Lederhosen is a project born out of the Triplett Lab at the University of Florida.
|
8
8
|
- Lederhosen is designed to be a fast and simple method of clustering 16S rRNA amplicons sequenced
|
9
9
|
using paired and non-paired end short reads such as those produced by Illumina (GAIIx, HiSeq and MiSeq).
|
10
|
-
- Lederhosen uses Semantic Versioning.
|
10
|
+
- Lederhosen uses [Semantic Versioning](http://semver.org/).
|
11
11
|
- Lederhosen is free and open source under the [MIT open source license](http://opensource.org/licenses/mit-license.php/).
|
12
12
|
- Except for USEARCH which requires a license, Lederhosen is available for commercial use.
|
13
13
|
|
data/spec/cli_spec.rb
CHANGED
@@ -26,15 +26,14 @@ describe Lederhosen::CLI do
|
|
26
26
|
`./bin/lederhosen cluster --input #{$test_dir}/trimmed/ILT_L_9_B_001.fasta --database #{$test_dir}/test_db.udb --identity 0.95 --output #{$test_dir}/clusters.uc`
|
27
27
|
end
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
end
|
29
|
+
it 'should build abundance matrices for each level' do
|
30
|
+
levels = "domain phylum class order FAMILY genus Species"
|
31
|
+
`./bin/lederhosen otu_table --files=spec/data/test.uc --prefix=#{$test_dir}/otu_table --levels=#{levels}`
|
32
|
+
$?.success?.should be_true
|
34
33
|
end
|
35
34
|
|
36
35
|
it 'should filter OTU abundance matrices' do
|
37
|
-
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
36
|
+
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.species.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
38
37
|
$?.success?.should be_true
|
39
38
|
end
|
40
39
|
|