lederhosen 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,5 +1,7 @@
1
1
  source :rubygems
2
+
2
3
  gem 'thor'
3
4
  gem 'rspec'
4
5
  gem 'dna'
5
- gem 'progressbar'
6
+ gem 'progressbar'
7
+ gem 'awesome_print'
@@ -31,7 +31,7 @@ module Lederhosen
31
31
  next
32
32
  end
33
33
 
34
- records.each_slice(2) do |r, l|
34
+ records.each_slice(2) do |l, r|
35
35
  output.puts ">#{r.name}:#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
36
36
  end
37
37
  end
@@ -5,7 +5,7 @@
5
5
  module Lederhosen
6
6
  class CLI
7
7
 
8
- desc "filter fasta file",
8
+ desc "k_filter khmer filtering",
9
9
  "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
10
10
 
11
11
  method_option :input, :type => :string, :required => true
@@ -13,7 +13,7 @@ module Lederhosen
13
13
  method_option :k, :type => :numeric, :required => true
14
14
  method_option :cutoff, :type => :numeric, :required => true
15
15
 
16
- def filter
16
+ def k_filter
17
17
  input = options[:input]
18
18
  output = options[:output]
19
19
  k_len = options[:k].to_i
@@ -39,7 +39,7 @@ module Lederhosen
39
39
 
40
40
  kept = 0
41
41
  total_reads = total_reads.to_f
42
-
42
+
43
43
  pbar = ProgressBar.new "saving", total_reads.to_i
44
44
  output = File.open(output, 'w')
45
45
  File.open(input) do |handle|
@@ -60,7 +60,7 @@ module Lederhosen
60
60
  break
61
61
  end
62
62
  end
63
-
63
+
64
64
  if keep
65
65
  kept += 1
66
66
  output.puts r
@@ -70,7 +70,7 @@ module Lederhosen
70
70
  end
71
71
 
72
72
  pbar.finish
73
-
73
+
74
74
  ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
75
75
  output.close
76
76
  end
@@ -45,34 +45,6 @@ module Lederhosen
45
45
  end
46
46
 
47
47
  end
48
-
49
- # # Get representative sequences!
50
- # reads_total = 0
51
- # representatives = {}
52
- # clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
53
- #
54
- # out_handle = File.open("#{output}.fasta", 'w')
55
- #
56
- # File.open(joined_reads) do |handle|
57
- # records = Dna.new handle
58
- # records.each do |dna|
59
- # reads_total += 1
60
- # if !representatives[dna.name].nil?
61
- # dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
62
- # out_handle.puts dna
63
- # end
64
- # end
65
- # end
66
- #
67
- # out_handle.close
68
- #
69
- # # Print some statistics
70
- # ohai "reads in clusters: #{clusters_total}"
71
- # ohai "number of reads: #{reads_total}"
72
- # ohai "unique clusters: #{clusters.keys.length}"
73
-
74
-
75
-
76
48
  end
77
49
 
78
50
  end
@@ -0,0 +1,58 @@
1
+ ##
2
+ # FILTER UC FILE BY MIN SAMPLES
3
+ #
4
+
5
+ module Lederhosen
6
+ class CLI
7
+
8
+ desc "uc_filter filter uc file by min samples",
9
+ "--input=clusters.uc --output=clusters.uc.filtered --reads=50 --samples=10"
10
+
11
+ method_option :input, :type => :string, :required => true
12
+ method_option :output, :type => :string, :required => true
13
+ method_option :reads, :type => :numeric, :required => true
14
+ method_option :samples, :type => :numeric, :required => true
15
+
16
+ def uc_filter
17
+ input = options[:input]
18
+ output = options[:output]
19
+ reads = options[:reads].to_i
20
+ samples = options[:samples].to_i
21
+
22
+ # load UC file
23
+ clstr_info = Helpers.load_uc_file input
24
+ clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
25
+
26
+ # filter
27
+ survivors = clstr_counts.reject do |a, b|
28
+ b.reject{ |i, j| j < reads }.length < samples
29
+ end
30
+
31
+ surviving_clusters = survivors.keys
32
+
33
+ # print filtered uc file
34
+ out = File.open(output, 'w')
35
+ kept, total = 0, 0
36
+ File.open(input) do |handle|
37
+ handle.each do |line|
38
+ if line =~ /^#/
39
+ out.print line
40
+ next
41
+ end
42
+
43
+ total += 1
44
+
45
+ if surviving_clusters.include? line.split[1].to_i
46
+ out.print line
47
+ kept += 1
48
+ end
49
+ end
50
+ end
51
+ out.close
52
+ ohai "Survivors"
53
+ ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
54
+ ohai "reads: #{kept}/#{total} = #{100*kept/total.to_f}%"
55
+ end
56
+ end
57
+
58
+ end
data/lib/lederhosen.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  require 'rubygems'
2
- require 'thor'
3
- require 'dna'
4
- require 'set'
5
- require 'progressbar'
2
+ require 'bundler'
3
+
4
+ Bundler.require
6
5
 
7
6
  Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
8
7
 
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.0.10'
2
+ VERSION = '0.0.11'
3
3
  end
data/pipeline.sh ADDED
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+
3
+ set +e
4
+
5
+ # An example OTU clustering pipeline
6
+ # Austin G. Davis-Richardson
7
+ # <harekrishna at gmail dot com>
8
+
9
+ raw_reads='raw_reads/*.txt'
10
+ identities='0.975'
11
+ out_dir='pipeline'
12
+
13
+ # trim reads
14
+ bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
15
+
16
+ # join reads
17
+ bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
18
+
19
+ # filter reads
20
+ bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
21
+
22
+ # sort
23
+ bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
24
+
25
+ # cluster
26
+ for i in $identities
27
+ do
28
+ bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
29
+ done
30
+
31
+ # generate otu tables
32
+ for i in $identities
33
+ do
34
+ bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
35
+ done
36
+
37
+ echo "complete!"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 9
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 10
10
- version: 0.0.10
9
+ - 11
10
+ version: 0.0.11
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-05-14 00:00:00 Z
18
+ date: 2012-05-22 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: dna
@@ -121,15 +121,17 @@ files:
121
121
  - lib/lederhosen/cli.rb
122
122
  - lib/lederhosen/helpers.rb
123
123
  - lib/lederhosen/tasks/cluster.rb
124
- - lib/lederhosen/tasks/filter.rb
125
124
  - lib/lederhosen/tasks/join.rb
125
+ - lib/lederhosen/tasks/k_filter.rb
126
126
  - lib/lederhosen/tasks/name.rb
127
127
  - lib/lederhosen/tasks/otu_table.rb
128
128
  - lib/lederhosen/tasks/rep_reads.rb
129
129
  - lib/lederhosen/tasks/sort.rb
130
130
  - lib/lederhosen/tasks/split.rb
131
131
  - lib/lederhosen/tasks/trim.rb
132
+ - lib/lederhosen/tasks/uc_filter.rb
132
133
  - lib/version.rb
134
+ - pipeline.sh
133
135
  - readme.md
134
136
  - spec/data/ILT_L_9_B_001_1.txt
135
137
  - spec/data/ILT_L_9_B_001_3.txt