lederhosen 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/lederhosen/tasks/add_names.rb +1 -1
- data/lib/lederhosen/tasks/otu_filter.rb +45 -0
- data/lib/lederhosen/tasks/otu_table.rb +15 -16
- data/lib/lederhosen/tasks/squish.rb +5 -5
- data/lib/version.rb +1 -1
- data/spec/pipeline_spec.rb +3 -1
- metadata +5 -4
@@ -6,7 +6,7 @@ module Lederhosen
|
|
6
6
|
class CLI
|
7
7
|
|
8
8
|
desc "add_names",
|
9
|
-
"add names to otu abundance matrix using blat output
|
9
|
+
"add names to otu abundance matrix using blat output"
|
10
10
|
|
11
11
|
method_option :blat, :type => :string, :required => true
|
12
12
|
method_option :table, :type => :string, :required => true
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Lederhosen
|
2
|
+
class CLI
|
3
|
+
|
4
|
+
desc 'otu_filter', 'works like uc_filter but uses an OTU table as input'
|
5
|
+
|
6
|
+
method_option :input, :type => :string, :required => true
|
7
|
+
method_option :output, :type => :string, :required => true
|
8
|
+
method_option :reads, :type => :numeric, :required => true
|
9
|
+
method_option :samples, :type => :numeric, :required => true
|
10
|
+
|
11
|
+
def otu_filter
|
12
|
+
input = options[:input]
|
13
|
+
output = options[:output]
|
14
|
+
reads = options[:reads]
|
15
|
+
samples = options[:samples]
|
16
|
+
|
17
|
+
##
|
18
|
+
# Iterate over otu table line by line.
|
19
|
+
# Only print if cluster meets criteria
|
20
|
+
#
|
21
|
+
kept = 0
|
22
|
+
File.open(input) do |handle|
|
23
|
+
header = handle.gets.strip
|
24
|
+
header = header.split(',')
|
25
|
+
samples = header[1..-1]
|
26
|
+
|
27
|
+
puts header.join(',')
|
28
|
+
|
29
|
+
handle.each do |line|
|
30
|
+
line = line.strip.split(',')
|
31
|
+
cluster_no = line[0]
|
32
|
+
counts = line[1..-1].collect { |x| x.to_i }
|
33
|
+
|
34
|
+
# should be the same as uc_filter
|
35
|
+
if counts.reject { |x| x < reads }.length > samples
|
36
|
+
puts line.join(',')
|
37
|
+
kept += 1
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
ohai "kept #{kept} clusters."
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -18,32 +18,31 @@ module Lederhosen
|
|
18
18
|
output = options[:output]
|
19
19
|
joined_reads = options[:joined]
|
20
20
|
|
21
|
+
# Load cluster table
|
21
22
|
|
22
|
-
# Load cluster table!
|
23
23
|
clstr_info = Helpers.load_uc_file input
|
24
24
|
clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
|
25
25
|
clstrnr_to_seed = clstr_info[:clstrnr_to_seed]
|
26
26
|
samples = clstr_info[:samples]
|
27
27
|
|
28
|
-
# print OTU
|
29
|
-
|
28
|
+
# print OTU abundance matrix
|
29
|
+
# clusters as columns
|
30
|
+
# samples as rows
|
31
|
+
|
30
32
|
File.open("#{output}.csv", 'w') do |h|
|
31
33
|
samples = samples.sort
|
32
34
|
clusters = clstr_counts.keys
|
33
35
|
|
34
|
-
# print header
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
h.print "\n"
|
45
|
-
end
|
46
|
-
|
36
|
+
# print header (cluster names)
|
37
|
+
h.puts '-' + SEP + clusters.map { |x| "cluster-#{x}" }.join(SEP)
|
38
|
+
|
39
|
+
samples.each do |sample|
|
40
|
+
h.print sample
|
41
|
+
clusters.each do |cluster|
|
42
|
+
h.print "#{SEP}#{clstr_counts[cluster][sample]}"
|
43
|
+
end
|
44
|
+
h.print "\n"
|
45
|
+
end
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
@@ -29,17 +29,17 @@ module Lederhosen
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
output = File.open(output) rescue $stdout
|
32
|
+
output = File.open(output, 'w') rescue $stdout
|
33
33
|
|
34
34
|
# print the new, squished csv file
|
35
35
|
column_names.uniq!.sort!
|
36
|
-
puts "-,#{column_names.join(',')}"
|
36
|
+
output.puts "-,#{column_names.join(',')}"
|
37
37
|
total_by_sample_by_column.each_pair do |sample_id, row|
|
38
|
-
print "#{sample_id}"
|
38
|
+
output.print "#{sample_id}"
|
39
39
|
column_names.each do |column_name|
|
40
|
-
print ",#{row[column_name]}"
|
40
|
+
output.print ",#{row[column_name]}"
|
41
41
|
end
|
42
|
-
print "\n"
|
42
|
+
output.print "\n"
|
43
43
|
end
|
44
44
|
|
45
45
|
output.close
|
data/lib/version.rb
CHANGED
data/spec/pipeline_spec.rb
CHANGED
@@ -2,6 +2,8 @@ test_dir = "/tmp/lederhosen_test_#{(0...8).map{65.+(rand(25)).chr}.join}/"
|
|
2
2
|
|
3
3
|
`mkdir -p #{test_dir}`
|
4
4
|
|
5
|
+
$stderr.puts "test dir: #{test_dir}"; sleep 1
|
6
|
+
|
5
7
|
describe 'the pipeline' do
|
6
8
|
|
7
9
|
it 'should exist' do
|
@@ -47,7 +49,7 @@ describe 'the pipeline' do
|
|
47
49
|
end
|
48
50
|
|
49
51
|
it 'should squish otu abundance matrix by same name' do
|
50
|
-
`./bin/lederhosen squish --csv-file=#{test_dir}/named_otus.csv`
|
52
|
+
`./bin/lederhosen squish --csv-file=#{test_dir}/named_otus.csv --output=#{test_dir}/squished.csv"`
|
51
53
|
$?.success?.should be_true
|
52
54
|
end
|
53
55
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 6
|
10
|
+
version: 0.1.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-07-
|
18
|
+
date: 2012-07-16 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: dna
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/lederhosen/tasks/join.rb
|
141
141
|
- lib/lederhosen/tasks/k_filter.rb
|
142
142
|
- lib/lederhosen/tasks/name.rb
|
143
|
+
- lib/lederhosen/tasks/otu_filter.rb
|
143
144
|
- lib/lederhosen/tasks/otu_table.rb
|
144
145
|
- lib/lederhosen/tasks/rep_reads.rb
|
145
146
|
- lib/lederhosen/tasks/sort.rb
|