lederhosen 0.2.9 → 0.2.10

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.2.9"
8
+ s.version = "0.2.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -3,10 +3,14 @@ module Lederhosen
3
3
  include Thor::Actions
4
4
 
5
5
  no_tasks do
6
- # just print string to STDERR
7
6
  def ohai(s)
8
7
  @shell.say_status('okay', s, 'green')
9
8
  end
9
+
10
+ def ohno(s)
11
+ @shell.say_status('fail', s, 'red')
12
+ exit(-1)
13
+ end
10
14
  end
11
15
 
12
16
  @shell = Thor::Shell::Basic.new
@@ -14,9 +14,9 @@ module Lederhosen
14
14
  method_option :output, :type => :string, :required => false
15
15
 
16
16
  def add_names
17
- blat = options[:blat]
18
- table = options[:table]
19
- level = options[:level]
17
+ blat = options[:blat]
18
+ table = options[:table]
19
+ level = options[:level]
20
20
  output = options[:output] || $stdout
21
21
 
22
22
  levels = { 'kingdom' => 0,
@@ -28,7 +28,9 @@ module Lederhosen
28
28
  'genus' => 5,
29
29
  'species' => 6 }
30
30
 
31
- fail "unknown level. try #{levels.keys.join(', ')}" unless levels.include? level
31
+ ohno "unknown level #{level}. try #{levels.keys.join(', ')}" unless levels.include? level
32
+
33
+ ohai "adding names to #{table} using #{blat} @ #{levels[level]} (#{level}). Saving to #{output}"
32
34
 
33
35
  # Corresponds with the numbers used in the TaxCollector database
34
36
  # taxonomic descriptions
@@ -27,6 +27,12 @@ module Lederhosen
27
27
  wordlen = options[:wordlen]
28
28
  lib = options[:lib]
29
29
 
30
+ ohai "clustering #{input}, saving to #{output}"
31
+
32
+ options.each_pair do |key, value|
33
+ ohai "#{key} = #{value}"
34
+ end
35
+
30
36
  cmd = [
31
37
  'uclust',
32
38
  "--input #{input}",
@@ -11,11 +11,12 @@ module Lederhosen
11
11
  method_option :output, :type => :string, :required => true
12
12
 
13
13
  def join
14
-
15
14
  trimmed = Dir[options[:trimmed]]
16
- output = options[:output]
15
+ output = options[:output]
16
+
17
+ ohai "joining #{File.dirname(trimmed.first)} saving to #{output}"
17
18
 
18
- fail "no reads in #{trimmed}" if trimmed.length == 0
19
+ ohno "no reads in #{trimmed}" if trimmed.length == 0
19
20
 
20
21
  output = File.open(output, 'w')
21
22
 
@@ -19,12 +19,13 @@ module Lederhosen
19
19
  k_len = options[:k].to_i
20
20
  cutoff = options[:cutoff]
21
21
 
22
+ ohai "kmer filtering #{input} (k = #{k_len}, cutoff = #{cutoff})"
23
+
22
24
  counting_table = Hash.new { |h, k| h[k] = 0 }
23
25
  total_reads = 0
24
26
 
25
- ohai "counting kmers"
26
27
  total_reads = `grep -c '^>' #{input}`.strip.split.first.to_i
27
- pbar = ProgressBar.new 'count kmers', total_reads.to_i
28
+ pbar = ProgressBar.new 'counting', total_reads.to_i
28
29
  File.open(input) do |handle|
29
30
  records = Dna.new handle
30
31
  records.each do |r|
@@ -17,6 +17,8 @@ module Lederhosen
17
17
  database = options[:database]
18
18
  output = options[:output]
19
19
 
20
+ ohai "identifying #{reps} in #{database} and saving to #{output}"
21
+
20
22
  # run blat/blast
21
23
  cmd = [
22
24
  'blat',
@@ -14,6 +14,8 @@ module Lederhosen
14
14
  reads = options[:reads]
15
15
  samples = options[:samples]
16
16
 
17
+ ohai "filtering otu file #{input} (reads = #{reads}, samples = #{samples}), saving to #{output}"
18
+
17
19
  ##
18
20
  # Iterate over otu table line by line.
19
21
  # Only print if cluster meets criteria
@@ -16,7 +16,8 @@ module Lederhosen
16
16
  def otu_table
17
17
  input = options[:clusters]
18
18
  output = options[:output]
19
- joined_reads = options[:joined]
19
+
20
+ ohai "generating otu table from #{input}, saving to #{output}"
20
21
 
21
22
  # Load cluster table
22
23
 
@@ -17,6 +17,7 @@ module Lederhosen
17
17
  output = options[:output]
18
18
  joined_reads = options[:joined]
19
19
 
20
+ ohai "getting represntative reads for #{input} w/ reads #{joined_reads} and saving to #{output}"
20
21
 
21
22
  # Load cluster table!
22
23
  clstr_info = Helpers.load_uc_file input
@@ -12,8 +12,11 @@ module Lederhosen
12
12
  method_option :output, :type => :string, :required => true
13
13
 
14
14
  def sort
15
- input = options[:input]
15
+ input = options[:input]
16
16
  output = options[:output]
17
+
18
+ ohai "sorting #{input}"
19
+
17
20
  @shell.mute {
18
21
  run "uclust --mergesort #{input} --output #{output}"
19
22
  }
@@ -22,6 +22,9 @@ module Lederhosen
22
22
  min_clst_size = options[:min_clst_size]
23
23
  finalize_every = 100_000
24
24
 
25
+ ohai "spltting #{reads} by #{clusters} and saving to #{out_dir}"
26
+ ohai "minimum cluster size = #{min_clst_size}"
27
+
25
28
  run "mkdir -p #{out_dir}/"
26
29
 
27
30
  ohai "loading #{clusters}"
@@ -14,9 +14,12 @@ module Lederhosen
14
14
  csv_file = options[:csv_file]
15
15
  output = options[:output] || $stdout
16
16
 
17
+ ohai "squishing #{csv_file} to #{output}"
18
+
17
19
  # sample_name -> column name -> total number of reads
18
20
  total_by_sample_by_column = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
19
21
  column_names = '' # scope
22
+
20
23
  # Load CSV file, merge counts in columns with the same name
21
24
  File.open(csv_file) do |handle|
22
25
  column_names = handle.gets.strip.split(',')[1..-1]
@@ -12,25 +12,26 @@ module Lederhosen
12
12
  method_option :out_dir, :type => :string, :required => true
13
13
 
14
14
  def trim
15
-
16
15
  raw_reads = options[:reads_dir]
17
- out_dir = options[:out_dir]
16
+ out_dir = options[:out_dir]
17
+
18
+ ohai "trimming #{File.dirname(raw_reads)} and saving to #{out_dir}"
18
19
 
19
20
  run "mkdir -p #{out_dir}"
20
21
 
21
22
  raw_reads = Helpers.get_grouped_qseq_files raw_reads
23
+
22
24
  ohai "found #{raw_reads.length} pairs of reads"
23
25
 
24
26
  pbar = ProgressBar.new "trimming", raw_reads.length
25
-
26
27
  raw_reads.each do |a|
27
28
  pbar.inc
28
29
  out = File.join(out_dir, "#{File.basename(a[0])}.fasta")
29
30
  # TODO get total and trimmed
30
31
  total, trimmed = Helpers.trim_pairs a[1][0], a[1][1], out, :min_length => 70
31
32
  end
32
-
33
33
  pbar.finish
34
+
34
35
  end
35
36
  end
36
37
  end
@@ -20,6 +20,8 @@ module Lederhosen
20
20
  reads = options[:reads].to_i
21
21
  samples = options[:samples].to_i
22
22
 
23
+ ohai "filtering #{input} to #{output}, reads = #{reads} & samples = #{samples}"
24
+
23
25
  # load UC file
24
26
  clstr_info = Helpers.load_uc_file input
25
27
  clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
@@ -12,6 +12,8 @@ module Lederhosen
12
12
  def uc_stats
13
13
  input = options[:input]
14
14
 
15
+ ohai "calculating statistics for #{input}"
16
+
15
17
  # TODO add more stats
16
18
  cluster_stats = Hash.new { |h, k|
17
19
  h[k] = {
@@ -18,12 +18,17 @@ module Lederhosen
18
18
  output = options[:output]
19
19
  table_out = options[:table_out]
20
20
 
21
+ ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
22
+
21
23
  sequence_counts = Hash.new { |h, k| h[k] = 0 }
22
24
 
23
25
  out = File.open(output, 'w')
24
26
 
27
+ no_records = `grep -c #{input} '^>'`.split.first.to_i
28
+ pbar = ProgressBar.new 'loading', no_records
25
29
  File.open(input) do |handle|
26
30
  Dna.new(handle).each do |record|
31
+ pbar.inc
27
32
  unless sequence_counts.has_key? record.sequence
28
33
  out.puts record
29
34
  end
@@ -31,16 +36,19 @@ module Lederhosen
31
36
  end
32
37
  end
33
38
 
39
+ pbar.finish
34
40
  out.close
35
41
 
36
42
  # write table
43
+ pbar = ProgressBar.new 'table', no_records
37
44
  File.open(table_out, 'w') do |out|
38
45
  sequence_counts.each_pair do |sequence, count|
46
+ pbar.inc
39
47
  digest = Digest::MD5.hexdigest(sequence)
40
48
  out.puts "#{digest},#{count}"
41
49
  end
42
50
  end
43
-
51
+ pbar.finish
44
52
  kept = sequence_counts.keys.size
45
53
  total = sequence_counts.values.inject(:+)
46
54
  ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- PATCH = 9
5
+ PATCH = 10
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 5
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 9
10
- version: 0.2.9
9
+ - 10
10
+ version: 0.2.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson