lederhosen 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lederhosen.gemspec +2 -2
- data/lib/lederhosen/tasks/uniquify.rb +12 -8
- data/lib/lederhosen/version.rb +1 -1
- data/spec/cli_spec.rb +19 -18
- metadata +4 -4
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-20"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -1,16 +1,14 @@
|
|
1
1
|
##
|
2
|
-
# uniquify - uniquify a fasta file, also output table with
|
2
|
+
# uniquify - uniquify a fasta file, also output table with sequence_id -> number of reads
|
3
3
|
#
|
4
4
|
|
5
|
-
require 'digest/md5'
|
6
|
-
|
7
5
|
module Lederhosen
|
8
6
|
class CLI
|
9
7
|
desc 'uniquify',
|
10
|
-
'uniquify a fasta file and generate a table with
|
8
|
+
'uniquify a fasta file and generate a table with sequence_id -> abundance'
|
11
9
|
|
12
|
-
method_option :input,
|
13
|
-
method_option :output,
|
10
|
+
method_option :input, :type => :string, :required => true
|
11
|
+
method_option :output, :type => :string, :required => true
|
14
12
|
method_option :table_out, :type => :string, :required => true
|
15
13
|
|
16
14
|
def uniquify
|
@@ -21,15 +19,21 @@ module Lederhosen
|
|
21
19
|
ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
|
22
20
|
|
23
21
|
sequence_counts = Hash.new { |h, k| h[k] = 0 }
|
22
|
+
sequence_to_id = Hash.new
|
24
23
|
|
25
24
|
out = File.open(output, 'w')
|
26
25
|
|
27
26
|
no_records = `grep -c '^>' #{input}`.split.first.to_i
|
28
27
|
pbar = ProgressBar.new 'loading', no_records
|
28
|
+
|
29
29
|
File.open(input) do |handle|
|
30
30
|
Dna.new(handle).each do |record|
|
31
31
|
pbar.inc
|
32
32
|
unless sequence_counts.has_key? record.sequence
|
33
|
+
# store the sequence and id so we can have ids in the
|
34
|
+
# table. If the file is sorted by length then this
|
35
|
+
# should also be a seed sequence.
|
36
|
+
sequence_to_id[record.sequence] = record.name
|
33
37
|
out.puts record
|
34
38
|
end
|
35
39
|
sequence_counts[record.sequence] += 1
|
@@ -44,8 +48,8 @@ module Lederhosen
|
|
44
48
|
File.open(table_out, 'w') do |out|
|
45
49
|
sequence_counts.each_pair do |sequence, count|
|
46
50
|
pbar.inc
|
47
|
-
|
48
|
-
out.puts "#{
|
51
|
+
id = sequence_to_id[sequence]
|
52
|
+
out.puts "#{id}\t#{count}"
|
49
53
|
end
|
50
54
|
end
|
51
55
|
pbar.finish
|
data/lib/lederhosen/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -8,49 +8,54 @@ describe Lederhosen::CLI do
|
|
8
8
|
end
|
9
9
|
|
10
10
|
it 'should have a version command' do
|
11
|
-
`./bin/lederhosen version`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
11
|
+
`./bin/lederhosen version 2>/dev/null`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
12
12
|
end
|
13
13
|
|
14
14
|
it 'should trim reads' do
|
15
|
-
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
|
15
|
+
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed 2>/dev/null`
|
16
16
|
$?.success?.should be_true
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should join reads' do
|
20
|
-
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
|
20
|
+
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta 2>/dev/null`
|
21
21
|
$?.success?.should be_true
|
22
22
|
end
|
23
23
|
|
24
24
|
it 'should sort reads' do
|
25
|
-
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
|
25
|
+
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta 2>/dev/null`
|
26
26
|
$?.success?.should be_true
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'should k_filter reads' do
|
30
|
-
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
|
30
|
+
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1 2>/dev/null`
|
31
31
|
$?.success?.should be_true
|
32
32
|
end
|
33
33
|
|
34
34
|
it 'should cluster reads' do
|
35
|
-
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
|
35
|
+
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc 2>/dev/null`
|
36
36
|
$?.success?.should be_true
|
37
37
|
end
|
38
38
|
|
39
39
|
it 'should build OTU abundance matrices' do
|
40
|
-
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
|
40
|
+
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv 2>/dev/null`
|
41
41
|
$?.success?.should be_true
|
42
42
|
end
|
43
43
|
|
44
44
|
it 'should filter OTU abundance matrices' do
|
45
|
-
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
45
|
+
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1 2>/dev/null`
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should uniquify reads' do
|
49
|
+
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt 2>/dev/null`
|
50
|
+
$?.success?.should be_true
|
46
51
|
end
|
47
52
|
|
48
53
|
it 'should split joined.fasta into reads for each cluster' do
|
49
|
-
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
|
54
|
+
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1 2>/dev/null`
|
50
55
|
end
|
51
56
|
|
52
57
|
it 'should create a fasta file containing representative reads for each cluster' do
|
53
|
-
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
|
58
|
+
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta 2>/dev/null`
|
54
59
|
$?.success?.should be_true
|
55
60
|
end
|
56
61
|
|
@@ -59,18 +64,14 @@ describe Lederhosen::CLI do
|
|
59
64
|
|
60
65
|
it 'should add names to otu abundance matrix given blat output' do
|
61
66
|
levels = %w{kingdom domain phylum class order genus speces}
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
rescue
|
66
|
-
levels.choice # 1.8 and lower
|
67
|
-
end
|
68
|
-
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
|
67
|
+
# Ruby 1.9 vs Ruby 1.8
|
68
|
+
level = levels.sample rescue levels.choice
|
69
|
+
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv 2>/dev/null`
|
69
70
|
$?.success?.should be_true
|
70
71
|
end
|
71
72
|
|
72
73
|
it 'should squish otu abundance matrix by same name' do
|
73
|
-
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
|
74
|
+
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv 2>/dev/null`
|
74
75
|
$?.success?.should be_true
|
75
76
|
end
|
76
77
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 8
|
10
|
+
version: 0.3.8
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-08-
|
18
|
+
date: 2012-08-20 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|