lederhosen 0.3.9 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.9"
8
+ s.version = "0.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-23"
12
+ s.date = "2012-08-27"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -5,14 +5,16 @@ module Lederhosen
5
5
  # PAIRED-END READ WORK-AROUND (JOIN THEM)
6
6
  #
7
7
  desc "join",
8
- "join trimmed reads into a single file"
8
+ "join paired or unpaired reads into a single file. Paired reads are joined end-to-end"
9
9
 
10
10
  method_option :trimmed, :type => :string, :required => true
11
11
  method_option :output, :type => :string, :required => true
12
+ method_option :paired, :type => :boolean, :default => true
12
13
 
13
14
  def join
14
15
  trimmed = Dir[options[:trimmed]]
15
16
  output = options[:output]
17
+ paired = options[:paired]
16
18
 
17
19
  ohai "joining #{File.dirname(trimmed.first)} saving to #{output}"
18
20
 
@@ -24,20 +26,43 @@ module Lederhosen
24
26
 
25
27
  trimmed.each do |fasta_file|
26
28
  pbar.inc
29
+ records =
30
+ begin
31
+ Dna.new File.open(fasta_file)
32
+ rescue
33
+ ohai "skipping #{fasta_file} (empty?)"
34
+ next
35
+ end
27
36
 
28
- begin
29
- records = Dna.new File.open(fasta_file)
30
- rescue
31
- ohai "skipping #{fasta_file} (empty?)"
32
- next
37
+ if paired
38
+ output_paired_reads(records, output, fasta_file)
39
+ else
40
+ output_unpaired_reads(records, output, fasta_file)
33
41
  end
42
+ end
43
+ pbar.finish
44
+ end
34
45
 
46
+ no_tasks do
47
+ ##
48
+ # Output paired reads joined together
49
+ #
50
+ def output_paired_reads(records, output, fasta_file)
35
51
  records.each_slice(2) do |l, r|
36
- output.puts ">#{r.name}:split=#{r.sequence.size}:sample=#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
52
+ output.puts ">#{r.name}:split=#{r.sequence.size}:sample=#{File.basename(fasta_file, '.fasta')}"
53
+ output.puts "#{r.sequence.reverse+l.sequence}"
37
54
  end
38
55
  end
39
- pbar.finish
40
- end
41
56
 
57
+ ##
58
+ # Output unpaired reads
59
+ #
60
+ def output_unpaired_reads(records, output, fasta_file)
61
+ records.each do |r|
62
+ output.puts ">#{r.name}:split=na:sample=#{File.basename(fasta_file, '.fasta')}"
63
+ output.puts r.sequence
64
+ end
65
+ end
66
+ end
42
67
  end
43
68
  end
@@ -16,10 +16,12 @@ module Lederhosen
16
16
  reads = options[:reads]
17
17
  min_samples = options[:samples]
18
18
 
19
- ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples}), saving to #{output}"
19
+ ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})"
20
20
 
21
21
  cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }
22
22
 
23
+ ohai "loading csv file #{input}"
24
+
23
25
  # slurp up CSV file
24
26
  File.open input do |handle|
25
27
  header = handle.gets.strip.split(',')
@@ -34,9 +36,13 @@ module Lederhosen
34
36
  end
35
37
  end
36
38
 
39
+ ohai "filtering"
40
+
37
41
  # filter sample_cluster_count
38
42
  filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
39
43
 
44
+ ohai "saving to #{output}"
45
+
40
46
  # save the table
41
47
  out = File.open(output, 'w')
42
48
  samples = filtered.values.map(&:keys).flatten.uniq
@@ -44,13 +44,14 @@ module Lederhosen
44
44
  pbar = ProgressBar.new 'saving', lines
45
45
  kept, total = 1, 0
46
46
 
47
+ # output lederhosen filtering information because I often
48
+ # forget to write this down :)
49
+ out.puts "# filtered: #{input}"
50
+ out.puts "# #{reads} reads in at least #{samples} samples"
51
+
47
52
  File.open(input) do |handle|
48
53
  pbar = ProgressBar.new 'saving', File.size(input)
49
54
  handle.each do |line|
50
- # output lederhosen filtering information because I often
51
- # forget to write this down :)
52
- out.puts "# filtered: #{input}"
53
- out.puts "# #{reads} reads in at least #{samples} samples"
54
55
 
55
56
  pbar.inc handle.pos
56
57
  if line =~ /^#/
@@ -1,8 +1,8 @@
1
1
  module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 3
5
- PATCH = 9
4
+ MINOR = 4
5
+ PATCH = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1
4
+ hash: 15
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 3
9
- - 9
10
- version: 0.3.9
8
+ - 4
9
+ - 0
10
+ version: 0.4.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-23 00:00:00 Z
18
+ date: 2012-08-27 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime