lederhosen 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -1
- data/lib/lederhosen/tasks/join.rb +1 -1
- data/lib/lederhosen/tasks/{filter.rb → k_filter.rb} +5 -5
- data/lib/lederhosen/tasks/otu_table.rb +0 -28
- data/lib/lederhosen/tasks/uc_filter.rb +58 -0
- data/lib/lederhosen.rb +3 -4
- data/lib/version.rb +1 -1
- data/pipeline.sh +37 -0
- metadata +7 -5
data/Gemfile
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
module Lederhosen
|
6
6
|
class CLI
|
7
7
|
|
8
|
-
desc "
|
8
|
+
desc "k_filter khmer filtering",
|
9
9
|
"--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
|
10
10
|
|
11
11
|
method_option :input, :type => :string, :required => true
|
@@ -13,7 +13,7 @@ module Lederhosen
|
|
13
13
|
method_option :k, :type => :numeric, :required => true
|
14
14
|
method_option :cutoff, :type => :numeric, :required => true
|
15
15
|
|
16
|
-
def
|
16
|
+
def k_filter
|
17
17
|
input = options[:input]
|
18
18
|
output = options[:output]
|
19
19
|
k_len = options[:k].to_i
|
@@ -39,7 +39,7 @@ module Lederhosen
|
|
39
39
|
|
40
40
|
kept = 0
|
41
41
|
total_reads = total_reads.to_f
|
42
|
-
|
42
|
+
|
43
43
|
pbar = ProgressBar.new "saving", total_reads.to_i
|
44
44
|
output = File.open(output, 'w')
|
45
45
|
File.open(input) do |handle|
|
@@ -60,7 +60,7 @@ module Lederhosen
|
|
60
60
|
break
|
61
61
|
end
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
if keep
|
65
65
|
kept += 1
|
66
66
|
output.puts r
|
@@ -70,7 +70,7 @@ module Lederhosen
|
|
70
70
|
end
|
71
71
|
|
72
72
|
pbar.finish
|
73
|
-
|
73
|
+
|
74
74
|
ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
|
75
75
|
output.close
|
76
76
|
end
|
@@ -45,34 +45,6 @@ module Lederhosen
|
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
48
|
-
|
49
|
-
# # Get representative sequences!
|
50
|
-
# reads_total = 0
|
51
|
-
# representatives = {}
|
52
|
-
# clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
|
53
|
-
#
|
54
|
-
# out_handle = File.open("#{output}.fasta", 'w')
|
55
|
-
#
|
56
|
-
# File.open(joined_reads) do |handle|
|
57
|
-
# records = Dna.new handle
|
58
|
-
# records.each do |dna|
|
59
|
-
# reads_total += 1
|
60
|
-
# if !representatives[dna.name].nil?
|
61
|
-
# dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
|
62
|
-
# out_handle.puts dna
|
63
|
-
# end
|
64
|
-
# end
|
65
|
-
# end
|
66
|
-
#
|
67
|
-
# out_handle.close
|
68
|
-
#
|
69
|
-
# # Print some statistics
|
70
|
-
# ohai "reads in clusters: #{clusters_total}"
|
71
|
-
# ohai "number of reads: #{reads_total}"
|
72
|
-
# ohai "unique clusters: #{clusters.keys.length}"
|
73
|
-
|
74
|
-
|
75
|
-
|
76
48
|
end
|
77
49
|
|
78
50
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
##
|
2
|
+
# FILTER UC FILE BY MIN SAMPLES
|
3
|
+
#
|
4
|
+
|
5
|
+
module Lederhosen
|
6
|
+
class CLI
|
7
|
+
|
8
|
+
desc "uc_filter filter uc file by min samples",
|
9
|
+
"--input=clusters.uc --output=clusters.uc.filtered --reads=50 --samples=10"
|
10
|
+
|
11
|
+
method_option :input, :type => :string, :required => true
|
12
|
+
method_option :output, :type => :string, :required => true
|
13
|
+
method_option :reads, :type => :numeric, :required => true
|
14
|
+
method_option :samples, :type => :numeric, :required => true
|
15
|
+
|
16
|
+
def uc_filter
|
17
|
+
input = options[:input]
|
18
|
+
output = options[:output]
|
19
|
+
reads = options[:reads].to_i
|
20
|
+
samples = options[:samples].to_i
|
21
|
+
|
22
|
+
# load UC file
|
23
|
+
clstr_info = Helpers.load_uc_file input
|
24
|
+
clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
|
25
|
+
|
26
|
+
# filter
|
27
|
+
survivors = clstr_counts.reject do |a, b|
|
28
|
+
b.reject{ |i, j| j < reads }.length < samples
|
29
|
+
end
|
30
|
+
|
31
|
+
surviving_clusters = survivors.keys
|
32
|
+
|
33
|
+
# print filtered uc file
|
34
|
+
out = File.open(output, 'w')
|
35
|
+
kept, total = 0, 0
|
36
|
+
File.open(input) do |handle|
|
37
|
+
handle.each do |line|
|
38
|
+
if line =~ /^#/
|
39
|
+
out.print line
|
40
|
+
next
|
41
|
+
end
|
42
|
+
|
43
|
+
total += 1
|
44
|
+
|
45
|
+
if surviving_clusters.include? line.split[1].to_i
|
46
|
+
out.print line
|
47
|
+
kept += 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
out.close
|
52
|
+
ohai "Survivors"
|
53
|
+
ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
|
54
|
+
ohai "reads: #{kept}/#{total} = #{100*kept/total.to_f}%"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
data/lib/lederhosen.rb
CHANGED
data/lib/version.rb
CHANGED
data/pipeline.sh
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set +e
|
4
|
+
|
5
|
+
# An example OTU clustering pipeline
|
6
|
+
# Austin G. Davis-Richardson
|
7
|
+
# <harekrishna at gmail dot com>
|
8
|
+
|
9
|
+
raw_reads='raw_reads/*.txt'
|
10
|
+
identities='0.975'
|
11
|
+
out_dir='pipeline'
|
12
|
+
|
13
|
+
# trim reads
|
14
|
+
bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
|
15
|
+
|
16
|
+
# join reads
|
17
|
+
bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
|
18
|
+
|
19
|
+
# filter reads
|
20
|
+
bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
|
21
|
+
|
22
|
+
# sort
|
23
|
+
bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
|
24
|
+
|
25
|
+
# cluster
|
26
|
+
for i in $identities
|
27
|
+
do
|
28
|
+
bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
|
29
|
+
done
|
30
|
+
|
31
|
+
# generate otu tables
|
32
|
+
for i in $identities
|
33
|
+
do
|
34
|
+
bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
|
35
|
+
done
|
36
|
+
|
37
|
+
echo "complete!"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 9
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 11
|
10
|
+
version: 0.0.11
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-05-
|
18
|
+
date: 2012-05-22 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: dna
|
@@ -121,15 +121,17 @@ files:
|
|
121
121
|
- lib/lederhosen/cli.rb
|
122
122
|
- lib/lederhosen/helpers.rb
|
123
123
|
- lib/lederhosen/tasks/cluster.rb
|
124
|
-
- lib/lederhosen/tasks/filter.rb
|
125
124
|
- lib/lederhosen/tasks/join.rb
|
125
|
+
- lib/lederhosen/tasks/k_filter.rb
|
126
126
|
- lib/lederhosen/tasks/name.rb
|
127
127
|
- lib/lederhosen/tasks/otu_table.rb
|
128
128
|
- lib/lederhosen/tasks/rep_reads.rb
|
129
129
|
- lib/lederhosen/tasks/sort.rb
|
130
130
|
- lib/lederhosen/tasks/split.rb
|
131
131
|
- lib/lederhosen/tasks/trim.rb
|
132
|
+
- lib/lederhosen/tasks/uc_filter.rb
|
132
133
|
- lib/version.rb
|
134
|
+
- pipeline.sh
|
133
135
|
- readme.md
|
134
136
|
- spec/data/ILT_L_9_B_001_1.txt
|
135
137
|
- spec/data/ILT_L_9_B_001_3.txt
|