lederhosen 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -1
- data/lib/lederhosen/tasks/join.rb +1 -1
- data/lib/lederhosen/tasks/{filter.rb → k_filter.rb} +5 -5
- data/lib/lederhosen/tasks/otu_table.rb +0 -28
- data/lib/lederhosen/tasks/uc_filter.rb +58 -0
- data/lib/lederhosen.rb +3 -4
- data/lib/version.rb +1 -1
- data/pipeline.sh +37 -0
- metadata +7 -5
data/Gemfile
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
module Lederhosen
|
6
6
|
class CLI
|
7
7
|
|
8
|
-
desc "
|
8
|
+
desc "k_filter khmer filtering",
|
9
9
|
"--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
|
10
10
|
|
11
11
|
method_option :input, :type => :string, :required => true
|
@@ -13,7 +13,7 @@ module Lederhosen
|
|
13
13
|
method_option :k, :type => :numeric, :required => true
|
14
14
|
method_option :cutoff, :type => :numeric, :required => true
|
15
15
|
|
16
|
-
def
|
16
|
+
def k_filter
|
17
17
|
input = options[:input]
|
18
18
|
output = options[:output]
|
19
19
|
k_len = options[:k].to_i
|
@@ -39,7 +39,7 @@ module Lederhosen
|
|
39
39
|
|
40
40
|
kept = 0
|
41
41
|
total_reads = total_reads.to_f
|
42
|
-
|
42
|
+
|
43
43
|
pbar = ProgressBar.new "saving", total_reads.to_i
|
44
44
|
output = File.open(output, 'w')
|
45
45
|
File.open(input) do |handle|
|
@@ -60,7 +60,7 @@ module Lederhosen
|
|
60
60
|
break
|
61
61
|
end
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
if keep
|
65
65
|
kept += 1
|
66
66
|
output.puts r
|
@@ -70,7 +70,7 @@ module Lederhosen
|
|
70
70
|
end
|
71
71
|
|
72
72
|
pbar.finish
|
73
|
-
|
73
|
+
|
74
74
|
ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
|
75
75
|
output.close
|
76
76
|
end
|
@@ -45,34 +45,6 @@ module Lederhosen
|
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
48
|
-
|
49
|
-
# # Get representative sequences!
|
50
|
-
# reads_total = 0
|
51
|
-
# representatives = {}
|
52
|
-
# clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
|
53
|
-
#
|
54
|
-
# out_handle = File.open("#{output}.fasta", 'w')
|
55
|
-
#
|
56
|
-
# File.open(joined_reads) do |handle|
|
57
|
-
# records = Dna.new handle
|
58
|
-
# records.each do |dna|
|
59
|
-
# reads_total += 1
|
60
|
-
# if !representatives[dna.name].nil?
|
61
|
-
# dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
|
62
|
-
# out_handle.puts dna
|
63
|
-
# end
|
64
|
-
# end
|
65
|
-
# end
|
66
|
-
#
|
67
|
-
# out_handle.close
|
68
|
-
#
|
69
|
-
# # Print some statistics
|
70
|
-
# ohai "reads in clusters: #{clusters_total}"
|
71
|
-
# ohai "number of reads: #{reads_total}"
|
72
|
-
# ohai "unique clusters: #{clusters.keys.length}"
|
73
|
-
|
74
|
-
|
75
|
-
|
76
48
|
end
|
77
49
|
|
78
50
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
##
|
2
|
+
# FILTER UC FILE BY MIN SAMPLES
|
3
|
+
#
|
4
|
+
|
5
|
+
module Lederhosen
|
6
|
+
class CLI
|
7
|
+
|
8
|
+
desc "uc_filter filter uc file by min samples",
|
9
|
+
"--input=clusters.uc --output=clusters.uc.filtered --reads=50 --samples=10"
|
10
|
+
|
11
|
+
method_option :input, :type => :string, :required => true
|
12
|
+
method_option :output, :type => :string, :required => true
|
13
|
+
method_option :reads, :type => :numeric, :required => true
|
14
|
+
method_option :samples, :type => :numeric, :required => true
|
15
|
+
|
16
|
+
def uc_filter
|
17
|
+
input = options[:input]
|
18
|
+
output = options[:output]
|
19
|
+
reads = options[:reads].to_i
|
20
|
+
samples = options[:samples].to_i
|
21
|
+
|
22
|
+
# load UC file
|
23
|
+
clstr_info = Helpers.load_uc_file input
|
24
|
+
clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
|
25
|
+
|
26
|
+
# filter
|
27
|
+
survivors = clstr_counts.reject do |a, b|
|
28
|
+
b.reject{ |i, j| j < reads }.length < samples
|
29
|
+
end
|
30
|
+
|
31
|
+
surviving_clusters = survivors.keys
|
32
|
+
|
33
|
+
# print filtered uc file
|
34
|
+
out = File.open(output, 'w')
|
35
|
+
kept, total = 0, 0
|
36
|
+
File.open(input) do |handle|
|
37
|
+
handle.each do |line|
|
38
|
+
if line =~ /^#/
|
39
|
+
out.print line
|
40
|
+
next
|
41
|
+
end
|
42
|
+
|
43
|
+
total += 1
|
44
|
+
|
45
|
+
if surviving_clusters.include? line.split[1].to_i
|
46
|
+
out.print line
|
47
|
+
kept += 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
out.close
|
52
|
+
ohai "Survivors"
|
53
|
+
ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
|
54
|
+
ohai "reads: #{kept}/#{total} = #{100*kept/total.to_f}%"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
data/lib/lederhosen.rb
CHANGED
data/lib/version.rb
CHANGED
data/pipeline.sh
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set +e
|
4
|
+
|
5
|
+
# An example OTU clustering pipeline
|
6
|
+
# Austin G. Davis-Richardson
|
7
|
+
# <harekrishna at gmail dot com>
|
8
|
+
|
9
|
+
raw_reads='raw_reads/*.txt'
|
10
|
+
identities='0.975'
|
11
|
+
out_dir='pipeline'
|
12
|
+
|
13
|
+
# trim reads
|
14
|
+
bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
|
15
|
+
|
16
|
+
# join reads
|
17
|
+
bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
|
18
|
+
|
19
|
+
# filter reads
|
20
|
+
bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
|
21
|
+
|
22
|
+
# sort
|
23
|
+
bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
|
24
|
+
|
25
|
+
# cluster
|
26
|
+
for i in $identities
|
27
|
+
do
|
28
|
+
bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
|
29
|
+
done
|
30
|
+
|
31
|
+
# generate otu tables
|
32
|
+
for i in $identities
|
33
|
+
do
|
34
|
+
bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
|
35
|
+
done
|
36
|
+
|
37
|
+
echo "complete!"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 9
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 11
|
10
|
+
version: 0.0.11
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-05-
|
18
|
+
date: 2012-05-22 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: dna
|
@@ -121,15 +121,17 @@ files:
|
|
121
121
|
- lib/lederhosen/cli.rb
|
122
122
|
- lib/lederhosen/helpers.rb
|
123
123
|
- lib/lederhosen/tasks/cluster.rb
|
124
|
-
- lib/lederhosen/tasks/filter.rb
|
125
124
|
- lib/lederhosen/tasks/join.rb
|
125
|
+
- lib/lederhosen/tasks/k_filter.rb
|
126
126
|
- lib/lederhosen/tasks/name.rb
|
127
127
|
- lib/lederhosen/tasks/otu_table.rb
|
128
128
|
- lib/lederhosen/tasks/rep_reads.rb
|
129
129
|
- lib/lederhosen/tasks/sort.rb
|
130
130
|
- lib/lederhosen/tasks/split.rb
|
131
131
|
- lib/lederhosen/tasks/trim.rb
|
132
|
+
- lib/lederhosen/tasks/uc_filter.rb
|
132
133
|
- lib/version.rb
|
134
|
+
- pipeline.sh
|
133
135
|
- readme.md
|
134
136
|
- spec/data/ILT_L_9_B_001_1.txt
|
135
137
|
- spec/data/ILT_L_9_B_001_3.txt
|