lederhosen 0.3.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.9"
8
+ s.version = "0.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-23"
12
+ s.date = "2012-08-27"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -5,14 +5,16 @@ module Lederhosen
5
5
  # PAIRED-END READ WORK-AROUND (JOIN THEM)
6
6
  #
7
7
  desc "join",
8
- "join trimmed reads into a single file"
8
+ "join paired or unpaired reads into a single file. Paired reads are joined end-to-end"
9
9
 
10
10
  method_option :trimmed, :type => :string, :required => true
11
11
  method_option :output, :type => :string, :required => true
12
+ method_option :paired, :type => :boolean, :default => true
12
13
 
13
14
  def join
14
15
  trimmed = Dir[options[:trimmed]]
15
16
  output = options[:output]
17
+ paired = options[:paired]
16
18
 
17
19
  ohai "joining #{File.dirname(trimmed.first)} saving to #{output}"
18
20
 
@@ -24,20 +26,43 @@ module Lederhosen
24
26
 
25
27
  trimmed.each do |fasta_file|
26
28
  pbar.inc
29
+ records =
30
+ begin
31
+ Dna.new File.open(fasta_file)
32
+ rescue
33
+ ohai "skipping #{fasta_file} (empty?)"
34
+ next
35
+ end
27
36
 
28
- begin
29
- records = Dna.new File.open(fasta_file)
30
- rescue
31
- ohai "skipping #{fasta_file} (empty?)"
32
- next
37
+ if paired
38
+ output_paired_reads(records, output, fasta_file)
39
+ else
40
+ output_unpaired_reads(records, output, fasta_file)
33
41
  end
42
+ end
43
+ pbar.finish
44
+ end
34
45
 
46
+ no_tasks do
47
+ ##
48
+ # Output paired reads joined together
49
+ #
50
+ def output_paired_reads(records, output, fasta_file)
35
51
  records.each_slice(2) do |l, r|
36
- output.puts ">#{r.name}:split=#{r.sequence.size}:sample=#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
52
+ output.puts ">#{r.name}:split=#{r.sequence.size}:sample=#{File.basename(fasta_file, '.fasta')}"
53
+ output.puts "#{r.sequence.reverse+l.sequence}"
37
54
  end
38
55
  end
39
- pbar.finish
40
- end
41
56
 
57
+ ##
58
+ # Output unpaired reads
59
+ #
60
+ def output_unpaired_reads(records, output, fasta_file)
61
+ records.each do |r|
62
+ output.puts ">#{r.name}:split=na:sample=#{File.basename(fasta_file, '.fasta')}"
63
+ output.puts r.sequence
64
+ end
65
+ end
66
+ end
42
67
  end
43
68
  end
@@ -16,10 +16,12 @@ module Lederhosen
16
16
  reads = options[:reads]
17
17
  min_samples = options[:samples]
18
18
 
19
- ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples}), saving to #{output}"
19
+ ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})"
20
20
 
21
21
  cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }
22
22
 
23
+ ohai "loading csv file #{input}"
24
+
23
25
  # slurp up CSV file
24
26
  File.open input do |handle|
25
27
  header = handle.gets.strip.split(',')
@@ -34,9 +36,13 @@ module Lederhosen
34
36
  end
35
37
  end
36
38
 
39
+ ohai "filtering"
40
+
37
41
  # filter sample_cluster_count
38
42
  filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
39
43
 
44
+ ohai "saving to #{output}"
45
+
40
46
  # save the table
41
47
  out = File.open(output, 'w')
42
48
  samples = filtered.values.map(&:keys).flatten.uniq
@@ -44,13 +44,14 @@ module Lederhosen
44
44
  pbar = ProgressBar.new 'saving', lines
45
45
  kept, total = 1, 0
46
46
 
47
+ # output lederhosen filtering information because I often
48
+ # forget to write this down :)
49
+ out.puts "# filtered: #{input}"
50
+ out.puts "# #{reads} reads in at least #{samples} samples"
51
+
47
52
  File.open(input) do |handle|
48
53
  pbar = ProgressBar.new 'saving', File.size(input)
49
54
  handle.each do |line|
50
- # output lederhosen filtering information because I often
51
- # forget to write this down :)
52
- out.puts "# filtered: #{input}"
53
- out.puts "# #{reads} reads in at least #{samples} samples"
54
55
 
55
56
  pbar.inc handle.pos
56
57
  if line =~ /^#/
@@ -1,8 +1,8 @@
1
1
  module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 3
5
- PATCH = 9
4
+ MINOR = 4
5
+ PATCH = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1
4
+ hash: 15
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 3
9
- - 9
10
- version: 0.3.9
8
+ - 4
9
+ - 0
10
+ version: 0.4.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-23 00:00:00 Z
18
+ date: 2012-08-27 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime