lederhosen 0.3.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-27"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -5,14 +5,16 @@ module Lederhosen
|
|
5
5
|
# PAIRED-END READ WORK-AROUND (JOIN THEM)
|
6
6
|
#
|
7
7
|
desc "join",
|
8
|
-
|
8
|
+
"join paired or unpaired reads into a single file. Paired reads are joined end-to-end"
|
9
9
|
|
10
10
|
method_option :trimmed, :type => :string, :required => true
|
11
11
|
method_option :output, :type => :string, :required => true
|
12
|
+
method_option :paired, :type => :boolean, :default => true
|
12
13
|
|
13
14
|
def join
|
14
15
|
trimmed = Dir[options[:trimmed]]
|
15
16
|
output = options[:output]
|
17
|
+
paired = options[:paired]
|
16
18
|
|
17
19
|
ohai "joining #{File.dirname(trimmed.first)} saving to #{output}"
|
18
20
|
|
@@ -24,20 +26,43 @@ module Lederhosen
|
|
24
26
|
|
25
27
|
trimmed.each do |fasta_file|
|
26
28
|
pbar.inc
|
29
|
+
records =
|
30
|
+
begin
|
31
|
+
Dna.new File.open(fasta_file)
|
32
|
+
rescue
|
33
|
+
ohai "skipping #{fasta_file} (empty?)"
|
34
|
+
next
|
35
|
+
end
|
27
36
|
|
28
|
-
|
29
|
-
records
|
30
|
-
|
31
|
-
|
32
|
-
next
|
37
|
+
if paired
|
38
|
+
output_paired_reads(records, output, fasta_file)
|
39
|
+
else
|
40
|
+
output_unpaired_reads(records, output, fasta_file)
|
33
41
|
end
|
42
|
+
end
|
43
|
+
pbar.finish
|
44
|
+
end
|
34
45
|
|
46
|
+
no_tasks do
|
47
|
+
##
|
48
|
+
# Output paired reads joined together
|
49
|
+
#
|
50
|
+
def output_paired_reads(records, output, fasta_file)
|
35
51
|
records.each_slice(2) do |l, r|
|
36
|
-
output.puts ">#{r.name}:split=#{r.sequence.size}:sample=#{File.basename(fasta_file, '.fasta')}
|
52
|
+
output.puts ">#{r.name}:split=#{r.sequence.size}:sample=#{File.basename(fasta_file, '.fasta')}"
|
53
|
+
output.puts "#{r.sequence.reverse+l.sequence}"
|
37
54
|
end
|
38
55
|
end
|
39
|
-
pbar.finish
|
40
|
-
end
|
41
56
|
|
57
|
+
##
|
58
|
+
# Output unpaired reads
|
59
|
+
#
|
60
|
+
def output_unpaired_reads(records, output, fasta_file)
|
61
|
+
records.each do |r|
|
62
|
+
output.puts ">#{r.name}:split=na:sample=#{File.basename(fasta_file, '.fasta')}"
|
63
|
+
output.puts r.sequence
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
42
67
|
end
|
43
68
|
end
|
@@ -16,10 +16,12 @@ module Lederhosen
|
|
16
16
|
reads = options[:reads]
|
17
17
|
min_samples = options[:samples]
|
18
18
|
|
19
|
-
ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})
|
19
|
+
ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})"
|
20
20
|
|
21
21
|
cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }
|
22
22
|
|
23
|
+
ohai "loading csv file #{input}"
|
24
|
+
|
23
25
|
# slurp up CSV file
|
24
26
|
File.open input do |handle|
|
25
27
|
header = handle.gets.strip.split(',')
|
@@ -34,9 +36,13 @@ module Lederhosen
|
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
39
|
+
ohai "filtering"
|
40
|
+
|
37
41
|
# filter sample_cluster_count
|
38
42
|
filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
|
39
43
|
|
44
|
+
ohai "saving to #{output}"
|
45
|
+
|
40
46
|
# save the table
|
41
47
|
out = File.open(output, 'w')
|
42
48
|
samples = filtered.values.map(&:keys).flatten.uniq
|
@@ -44,13 +44,14 @@ module Lederhosen
|
|
44
44
|
pbar = ProgressBar.new 'saving', lines
|
45
45
|
kept, total = 1, 0
|
46
46
|
|
47
|
+
# output lederhosen filtering information because I often
|
48
|
+
# forget to write this down :)
|
49
|
+
out.puts "# filtered: #{input}"
|
50
|
+
out.puts "# #{reads} reads in at least #{samples} samples"
|
51
|
+
|
47
52
|
File.open(input) do |handle|
|
48
53
|
pbar = ProgressBar.new 'saving', File.size(input)
|
49
54
|
handle.each do |line|
|
50
|
-
# output lederhosen filtering information because I often
|
51
|
-
# forget to write this down :)
|
52
|
-
out.puts "# filtered: #{input}"
|
53
|
-
out.puts "# #{reads} reads in at least #{samples} samples"
|
54
55
|
|
55
56
|
pbar.inc handle.pos
|
56
57
|
if line =~ /^#/
|
data/lib/lederhosen/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 0.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-08-
|
18
|
+
date: 2012-08-27 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|