lederhosen 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,5 +1,7 @@
1
1
  source :rubygems
2
+
2
3
  gem 'thor'
3
4
  gem 'rspec'
4
5
  gem 'dna'
5
- gem 'progressbar'
6
+ gem 'progressbar'
7
+ gem 'awesome_print'
@@ -31,7 +31,7 @@ module Lederhosen
31
31
  next
32
32
  end
33
33
 
34
- records.each_slice(2) do |r, l|
34
+ records.each_slice(2) do |l, r|
35
35
  output.puts ">#{r.name}:#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
36
36
  end
37
37
  end
@@ -5,7 +5,7 @@
5
5
  module Lederhosen
6
6
  class CLI
7
7
 
8
- desc "filter fasta file",
8
+ desc "k_filter khmer filtering",
9
9
  "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
10
10
 
11
11
  method_option :input, :type => :string, :required => true
@@ -13,7 +13,7 @@ module Lederhosen
13
13
  method_option :k, :type => :numeric, :required => true
14
14
  method_option :cutoff, :type => :numeric, :required => true
15
15
 
16
- def filter
16
+ def k_filter
17
17
  input = options[:input]
18
18
  output = options[:output]
19
19
  k_len = options[:k].to_i
@@ -39,7 +39,7 @@ module Lederhosen
39
39
 
40
40
  kept = 0
41
41
  total_reads = total_reads.to_f
42
-
42
+
43
43
  pbar = ProgressBar.new "saving", total_reads.to_i
44
44
  output = File.open(output, 'w')
45
45
  File.open(input) do |handle|
@@ -60,7 +60,7 @@ module Lederhosen
60
60
  break
61
61
  end
62
62
  end
63
-
63
+
64
64
  if keep
65
65
  kept += 1
66
66
  output.puts r
@@ -70,7 +70,7 @@ module Lederhosen
70
70
  end
71
71
 
72
72
  pbar.finish
73
-
73
+
74
74
  ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
75
75
  output.close
76
76
  end
@@ -45,34 +45,6 @@ module Lederhosen
45
45
  end
46
46
 
47
47
  end
48
-
49
- # # Get representative sequences!
50
- # reads_total = 0
51
- # representatives = {}
52
- # clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
53
- #
54
- # out_handle = File.open("#{output}.fasta", 'w')
55
- #
56
- # File.open(joined_reads) do |handle|
57
- # records = Dna.new handle
58
- # records.each do |dna|
59
- # reads_total += 1
60
- # if !representatives[dna.name].nil?
61
- # dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
62
- # out_handle.puts dna
63
- # end
64
- # end
65
- # end
66
- #
67
- # out_handle.close
68
- #
69
- # # Print some statistics
70
- # ohai "reads in clusters: #{clusters_total}"
71
- # ohai "number of reads: #{reads_total}"
72
- # ohai "unique clusters: #{clusters.keys.length}"
73
-
74
-
75
-
76
48
  end
77
49
 
78
50
  end
@@ -0,0 +1,58 @@
1
+ ##
2
+ # FILTER UC FILE BY MIN SAMPLES
3
+ #
4
+
5
+ module Lederhosen
6
+ class CLI
7
+
8
+ desc "uc_filter filter uc file by min samples",
9
+ "--input=clusters.uc --output=clusters.uc.filtered --reads=50 --samples=10"
10
+
11
+ method_option :input, :type => :string, :required => true
12
+ method_option :output, :type => :string, :required => true
13
+ method_option :reads, :type => :numeric, :required => true
14
+ method_option :samples, :type => :numeric, :required => true
15
+
16
+ def uc_filter
17
+ input = options[:input]
18
+ output = options[:output]
19
+ reads = options[:reads].to_i
20
+ samples = options[:samples].to_i
21
+
22
+ # load UC file
23
+ clstr_info = Helpers.load_uc_file input
24
+ clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
25
+
26
+ # filter
27
+ survivors = clstr_counts.reject do |a, b|
28
+ b.reject{ |i, j| j < reads }.length < samples
29
+ end
30
+
31
+ surviving_clusters = survivors.keys
32
+
33
+ # print filtered uc file
34
+ out = File.open(output, 'w')
35
+ kept, total = 0, 0
36
+ File.open(input) do |handle|
37
+ handle.each do |line|
38
+ if line =~ /^#/
39
+ out.print line
40
+ next
41
+ end
42
+
43
+ total += 1
44
+
45
+ if surviving_clusters.include? line.split[1].to_i
46
+ out.print line
47
+ kept += 1
48
+ end
49
+ end
50
+ end
51
+ out.close
52
+ ohai "Survivors"
53
+ ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
54
+ ohai "reads: #{kept}/#{total} = #{100*kept/total.to_f}%"
55
+ end
56
+ end
57
+
58
+ end
data/lib/lederhosen.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  require 'rubygems'
2
- require 'thor'
3
- require 'dna'
4
- require 'set'
5
- require 'progressbar'
2
+ require 'bundler'
3
+
4
+ Bundler.require
6
5
 
7
6
  Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
8
7
 
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.0.10'
2
+ VERSION = '0.0.11'
3
3
  end
data/pipeline.sh ADDED
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+
3
+ set +e
4
+
5
+ # An example OTU clustering pipeline
6
+ # Austin G. Davis-Richardson
7
+ # <harekrishna at gmail dot com>
8
+
9
+ raw_reads='raw_reads/*.txt'
10
+ identities='0.975'
11
+ out_dir='pipeline'
12
+
13
+ # trim reads
14
+ bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
15
+
16
+ # join reads
17
+ bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
18
+
19
+ # filter reads
20
+ bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
21
+
22
+ # sort
23
+ bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
24
+
25
+ # cluster
26
+ for i in $identities
27
+ do
28
+ bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
29
+ done
30
+
31
+ # generate otu tables
32
+ for i in $identities
33
+ do
34
+ bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
35
+ done
36
+
37
+ echo "complete!"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 9
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 10
10
- version: 0.0.10
9
+ - 11
10
+ version: 0.0.11
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-05-14 00:00:00 Z
18
+ date: 2012-05-22 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: dna
@@ -121,15 +121,17 @@ files:
121
121
  - lib/lederhosen/cli.rb
122
122
  - lib/lederhosen/helpers.rb
123
123
  - lib/lederhosen/tasks/cluster.rb
124
- - lib/lederhosen/tasks/filter.rb
125
124
  - lib/lederhosen/tasks/join.rb
125
+ - lib/lederhosen/tasks/k_filter.rb
126
126
  - lib/lederhosen/tasks/name.rb
127
127
  - lib/lederhosen/tasks/otu_table.rb
128
128
  - lib/lederhosen/tasks/rep_reads.rb
129
129
  - lib/lederhosen/tasks/sort.rb
130
130
  - lib/lederhosen/tasks/split.rb
131
131
  - lib/lederhosen/tasks/trim.rb
132
+ - lib/lederhosen/tasks/uc_filter.rb
132
133
  - lib/version.rb
134
+ - pipeline.sh
133
135
  - readme.md
134
136
  - spec/data/ILT_L_9_B_001_1.txt
135
137
  - spec/data/ILT_L_9_B_001_3.txt