lederhosen 0.3.7 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lederhosen.gemspec +2 -2
- data/lib/lederhosen/tasks/uniquify.rb +12 -8
- data/lib/lederhosen/version.rb +1 -1
- data/spec/cli_spec.rb +19 -18
- metadata +4 -4
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-20"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -1,16 +1,14 @@
|
|
1
1
|
##
|
2
|
-
# uniquify - uniquify a fasta file, also output table with
|
2
|
+
# uniquify - uniquify a fasta file, also output table with sequence_id -> number of reads
|
3
3
|
#
|
4
4
|
|
5
|
-
require 'digest/md5'
|
6
|
-
|
7
5
|
module Lederhosen
|
8
6
|
class CLI
|
9
7
|
desc 'uniquify',
|
10
|
-
'uniquify a fasta file and generate a table with
|
8
|
+
'uniquify a fasta file and generate a table with sequence_id -> abundance'
|
11
9
|
|
12
|
-
method_option :input,
|
13
|
-
method_option :output,
|
10
|
+
method_option :input, :type => :string, :required => true
|
11
|
+
method_option :output, :type => :string, :required => true
|
14
12
|
method_option :table_out, :type => :string, :required => true
|
15
13
|
|
16
14
|
def uniquify
|
@@ -21,15 +19,21 @@ module Lederhosen
|
|
21
19
|
ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
|
22
20
|
|
23
21
|
sequence_counts = Hash.new { |h, k| h[k] = 0 }
|
22
|
+
sequence_to_id = Hash.new
|
24
23
|
|
25
24
|
out = File.open(output, 'w')
|
26
25
|
|
27
26
|
no_records = `grep -c '^>' #{input}`.split.first.to_i
|
28
27
|
pbar = ProgressBar.new 'loading', no_records
|
28
|
+
|
29
29
|
File.open(input) do |handle|
|
30
30
|
Dna.new(handle).each do |record|
|
31
31
|
pbar.inc
|
32
32
|
unless sequence_counts.has_key? record.sequence
|
33
|
+
# store the sequence and id so we can have ids in the
|
34
|
+
# table. If the file is sorted by length then this
|
35
|
+
# should also be a seed sequence.
|
36
|
+
sequence_to_id[record.sequence] = record.name
|
33
37
|
out.puts record
|
34
38
|
end
|
35
39
|
sequence_counts[record.sequence] += 1
|
@@ -44,8 +48,8 @@ module Lederhosen
|
|
44
48
|
File.open(table_out, 'w') do |out|
|
45
49
|
sequence_counts.each_pair do |sequence, count|
|
46
50
|
pbar.inc
|
47
|
-
|
48
|
-
out.puts "#{
|
51
|
+
id = sequence_to_id[sequence]
|
52
|
+
out.puts "#{id}\t#{count}"
|
49
53
|
end
|
50
54
|
end
|
51
55
|
pbar.finish
|
data/lib/lederhosen/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -8,49 +8,54 @@ describe Lederhosen::CLI do
|
|
8
8
|
end
|
9
9
|
|
10
10
|
it 'should have a version command' do
|
11
|
-
`./bin/lederhosen version`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
11
|
+
`./bin/lederhosen version 2>/dev/null`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
12
12
|
end
|
13
13
|
|
14
14
|
it 'should trim reads' do
|
15
|
-
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
|
15
|
+
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed 2>/dev/null`
|
16
16
|
$?.success?.should be_true
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should join reads' do
|
20
|
-
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
|
20
|
+
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta 2>/dev/null`
|
21
21
|
$?.success?.should be_true
|
22
22
|
end
|
23
23
|
|
24
24
|
it 'should sort reads' do
|
25
|
-
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
|
25
|
+
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta 2>/dev/null`
|
26
26
|
$?.success?.should be_true
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'should k_filter reads' do
|
30
|
-
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
|
30
|
+
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1 2>/dev/null`
|
31
31
|
$?.success?.should be_true
|
32
32
|
end
|
33
33
|
|
34
34
|
it 'should cluster reads' do
|
35
|
-
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
|
35
|
+
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc 2>/dev/null`
|
36
36
|
$?.success?.should be_true
|
37
37
|
end
|
38
38
|
|
39
39
|
it 'should build OTU abundance matrices' do
|
40
|
-
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
|
40
|
+
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv 2>/dev/null`
|
41
41
|
$?.success?.should be_true
|
42
42
|
end
|
43
43
|
|
44
44
|
it 'should filter OTU abundance matrices' do
|
45
|
-
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
45
|
+
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1 2>/dev/null`
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should uniquify reads' do
|
49
|
+
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt 2>/dev/null`
|
50
|
+
$?.success?.should be_true
|
46
51
|
end
|
47
52
|
|
48
53
|
it 'should split joined.fasta into reads for each cluster' do
|
49
|
-
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
|
54
|
+
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1 2>/dev/null`
|
50
55
|
end
|
51
56
|
|
52
57
|
it 'should create a fasta file containing representative reads for each cluster' do
|
53
|
-
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
|
58
|
+
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta 2>/dev/null`
|
54
59
|
$?.success?.should be_true
|
55
60
|
end
|
56
61
|
|
@@ -59,18 +64,14 @@ describe Lederhosen::CLI do
|
|
59
64
|
|
60
65
|
it 'should add names to otu abundance matrix given blat output' do
|
61
66
|
levels = %w{kingdom domain phylum class order genus speces}
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
rescue
|
66
|
-
levels.choice # 1.8 and lower
|
67
|
-
end
|
68
|
-
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
|
67
|
+
# Ruby 1.9 vs Ruby 1.8
|
68
|
+
level = levels.sample rescue levels.choice
|
69
|
+
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv 2>/dev/null`
|
69
70
|
$?.success?.should be_true
|
70
71
|
end
|
71
72
|
|
72
73
|
it 'should squish otu abundance matrix by same name' do
|
73
|
-
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
|
74
|
+
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv 2>/dev/null`
|
74
75
|
$?.success?.should be_true
|
75
76
|
end
|
76
77
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 8
|
10
|
+
version: 0.3.8
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-08-
|
18
|
+
date: 2012-08-20 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|