lederhosen 0.2.9 → 0.2.10
Sign up to get free protection for your applications and to get access to all the features.
- data/lederhosen.gemspec +1 -1
- data/lib/lederhosen/cli.rb +5 -1
- data/lib/lederhosen/tasks/add_names.rb +6 -4
- data/lib/lederhosen/tasks/cluster.rb +6 -0
- data/lib/lederhosen/tasks/join.rb +4 -3
- data/lib/lederhosen/tasks/k_filter.rb +3 -2
- data/lib/lederhosen/tasks/name.rb +2 -0
- data/lib/lederhosen/tasks/otu_filter.rb +2 -0
- data/lib/lederhosen/tasks/otu_table.rb +2 -1
- data/lib/lederhosen/tasks/rep_reads.rb +1 -0
- data/lib/lederhosen/tasks/sort.rb +4 -1
- data/lib/lederhosen/tasks/split.rb +3 -0
- data/lib/lederhosen/tasks/squish.rb +3 -0
- data/lib/lederhosen/tasks/trim.rb +5 -4
- data/lib/lederhosen/tasks/uc_filter.rb +2 -0
- data/lib/lederhosen/tasks/uc_stats.rb +2 -0
- data/lib/lederhosen/tasks/uniquify.rb +9 -1
- data/lib/lederhosen/version.rb +1 -1
- metadata +3 -3
data/lederhosen.gemspec
CHANGED
data/lib/lederhosen/cli.rb
CHANGED
@@ -3,10 +3,14 @@ module Lederhosen
|
|
3
3
|
include Thor::Actions
|
4
4
|
|
5
5
|
no_tasks do
|
6
|
-
# just print string to STDERR
|
7
6
|
def ohai(s)
|
8
7
|
@shell.say_status('okay', s, 'green')
|
9
8
|
end
|
9
|
+
|
10
|
+
def ohno(s)
|
11
|
+
@shell.say_status('fail', s, 'red')
|
12
|
+
exit(-1)
|
13
|
+
end
|
10
14
|
end
|
11
15
|
|
12
16
|
@shell = Thor::Shell::Basic.new
|
@@ -14,9 +14,9 @@ module Lederhosen
|
|
14
14
|
method_option :output, :type => :string, :required => false
|
15
15
|
|
16
16
|
def add_names
|
17
|
-
blat
|
18
|
-
table
|
19
|
-
level
|
17
|
+
blat = options[:blat]
|
18
|
+
table = options[:table]
|
19
|
+
level = options[:level]
|
20
20
|
output = options[:output] || $stdout
|
21
21
|
|
22
22
|
levels = { 'kingdom' => 0,
|
@@ -28,7 +28,9 @@ module Lederhosen
|
|
28
28
|
'genus' => 5,
|
29
29
|
'species' => 6 }
|
30
30
|
|
31
|
-
|
31
|
+
ohno "unknown level #{level}. try #{levels.keys.join(', ')}" unless levels.include? level
|
32
|
+
|
33
|
+
ohai "adding names to #{table} using #{blat} @ #{levels[level]} (#{level}). Saving to #{output}"
|
32
34
|
|
33
35
|
# Corresponds with the numbers used in the TaxCollector database
|
34
36
|
# taxonomic descriptions
|
@@ -11,11 +11,12 @@ module Lederhosen
|
|
11
11
|
method_option :output, :type => :string, :required => true
|
12
12
|
|
13
13
|
def join
|
14
|
-
|
15
14
|
trimmed = Dir[options[:trimmed]]
|
16
|
-
output
|
15
|
+
output = options[:output]
|
16
|
+
|
17
|
+
ohai "joining #{File.dirname(trimmed.first)} saving to #{output}"
|
17
18
|
|
18
|
-
|
19
|
+
ohno "no reads in #{trimmed}" if trimmed.length == 0
|
19
20
|
|
20
21
|
output = File.open(output, 'w')
|
21
22
|
|
@@ -19,12 +19,13 @@ module Lederhosen
|
|
19
19
|
k_len = options[:k].to_i
|
20
20
|
cutoff = options[:cutoff]
|
21
21
|
|
22
|
+
ohai "kmer filtering #{input} (k = #{k_len}, cutoff = #{cutoff})"
|
23
|
+
|
22
24
|
counting_table = Hash.new { |h, k| h[k] = 0 }
|
23
25
|
total_reads = 0
|
24
26
|
|
25
|
-
ohai "counting kmers"
|
26
27
|
total_reads = `grep -c '^>' #{input}`.strip.split.first.to_i
|
27
|
-
pbar = ProgressBar.new '
|
28
|
+
pbar = ProgressBar.new 'counting', total_reads.to_i
|
28
29
|
File.open(input) do |handle|
|
29
30
|
records = Dna.new handle
|
30
31
|
records.each do |r|
|
@@ -14,6 +14,8 @@ module Lederhosen
|
|
14
14
|
reads = options[:reads]
|
15
15
|
samples = options[:samples]
|
16
16
|
|
17
|
+
ohai "filtering otu file #{input} (reads = #{reads}, samples = #{samples}), saving to #{output}"
|
18
|
+
|
17
19
|
##
|
18
20
|
# Iterate over otu table line by line.
|
19
21
|
# Only print if cluster meets criteria
|
@@ -12,8 +12,11 @@ module Lederhosen
|
|
12
12
|
method_option :output, :type => :string, :required => true
|
13
13
|
|
14
14
|
def sort
|
15
|
-
input
|
15
|
+
input = options[:input]
|
16
16
|
output = options[:output]
|
17
|
+
|
18
|
+
ohai "sorting #{input}"
|
19
|
+
|
17
20
|
@shell.mute {
|
18
21
|
run "uclust --mergesort #{input} --output #{output}"
|
19
22
|
}
|
@@ -22,6 +22,9 @@ module Lederhosen
|
|
22
22
|
min_clst_size = options[:min_clst_size]
|
23
23
|
finalize_every = 100_000
|
24
24
|
|
25
|
+
ohai "spltting #{reads} by #{clusters} and saving to #{out_dir}"
|
26
|
+
ohai "minimum cluster size = #{min_clst_size}"
|
27
|
+
|
25
28
|
run "mkdir -p #{out_dir}/"
|
26
29
|
|
27
30
|
ohai "loading #{clusters}"
|
@@ -14,9 +14,12 @@ module Lederhosen
|
|
14
14
|
csv_file = options[:csv_file]
|
15
15
|
output = options[:output] || $stdout
|
16
16
|
|
17
|
+
ohai "squishing #{csv_file} to #{output}"
|
18
|
+
|
17
19
|
# sample_name -> column name -> total number of reads
|
18
20
|
total_by_sample_by_column = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
|
19
21
|
column_names = '' # scope
|
22
|
+
|
20
23
|
# Load CSV file, merge counts in columns with the same name
|
21
24
|
File.open(csv_file) do |handle|
|
22
25
|
column_names = handle.gets.strip.split(',')[1..-1]
|
@@ -12,25 +12,26 @@ module Lederhosen
|
|
12
12
|
method_option :out_dir, :type => :string, :required => true
|
13
13
|
|
14
14
|
def trim
|
15
|
-
|
16
15
|
raw_reads = options[:reads_dir]
|
17
|
-
out_dir
|
16
|
+
out_dir = options[:out_dir]
|
17
|
+
|
18
|
+
ohai "trimming #{File.dirname(raw_reads)} and saving to #{out_dir}"
|
18
19
|
|
19
20
|
run "mkdir -p #{out_dir}"
|
20
21
|
|
21
22
|
raw_reads = Helpers.get_grouped_qseq_files raw_reads
|
23
|
+
|
22
24
|
ohai "found #{raw_reads.length} pairs of reads"
|
23
25
|
|
24
26
|
pbar = ProgressBar.new "trimming", raw_reads.length
|
25
|
-
|
26
27
|
raw_reads.each do |a|
|
27
28
|
pbar.inc
|
28
29
|
out = File.join(out_dir, "#{File.basename(a[0])}.fasta")
|
29
30
|
# TODO get total and trimmed
|
30
31
|
total, trimmed = Helpers.trim_pairs a[1][0], a[1][1], out, :min_length => 70
|
31
32
|
end
|
32
|
-
|
33
33
|
pbar.finish
|
34
|
+
|
34
35
|
end
|
35
36
|
end
|
36
37
|
end
|
@@ -20,6 +20,8 @@ module Lederhosen
|
|
20
20
|
reads = options[:reads].to_i
|
21
21
|
samples = options[:samples].to_i
|
22
22
|
|
23
|
+
ohai "filtering #{input} to #{output}, reads = #{reads} & samples = #{samples}"
|
24
|
+
|
23
25
|
# load UC file
|
24
26
|
clstr_info = Helpers.load_uc_file input
|
25
27
|
clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
|
@@ -18,12 +18,17 @@ module Lederhosen
|
|
18
18
|
output = options[:output]
|
19
19
|
table_out = options[:table_out]
|
20
20
|
|
21
|
+
ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
|
22
|
+
|
21
23
|
sequence_counts = Hash.new { |h, k| h[k] = 0 }
|
22
24
|
|
23
25
|
out = File.open(output, 'w')
|
24
26
|
|
27
|
+
no_records = `grep -c #{input} '^>'`.split.first.to_i
|
28
|
+
pbar = ProgressBar.new 'loading', no_records
|
25
29
|
File.open(input) do |handle|
|
26
30
|
Dna.new(handle).each do |record|
|
31
|
+
pbar.inc
|
27
32
|
unless sequence_counts.has_key? record.sequence
|
28
33
|
out.puts record
|
29
34
|
end
|
@@ -31,16 +36,19 @@ module Lederhosen
|
|
31
36
|
end
|
32
37
|
end
|
33
38
|
|
39
|
+
pbar.finish
|
34
40
|
out.close
|
35
41
|
|
36
42
|
# write table
|
43
|
+
pbar = ProgressBar.new 'table', no_records
|
37
44
|
File.open(table_out, 'w') do |out|
|
38
45
|
sequence_counts.each_pair do |sequence, count|
|
46
|
+
pbar.inc
|
39
47
|
digest = Digest::MD5.hexdigest(sequence)
|
40
48
|
out.puts "#{digest},#{count}"
|
41
49
|
end
|
42
50
|
end
|
43
|
-
|
51
|
+
pbar.finish
|
44
52
|
kept = sequence_counts.keys.size
|
45
53
|
total = sequence_counts.values.inject(:+)
|
46
54
|
ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
|
data/lib/lederhosen/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 10
|
10
|
+
version: 0.2.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|