lederhosen 0.3.8 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -1
- data/lederhosen.gemspec +2 -2
- data/lib/lederhosen/tasks/k_filter.rb +3 -5
- data/lib/lederhosen/tasks/split_fasta.rb +3 -0
- data/lib/lederhosen/tasks/uc_filter.rb +3 -2
- data/lib/lederhosen/tasks/uniquify.rb +8 -8
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +1 -1
- data/spec/cli_spec.rb +13 -13
- metadata +4 -4
data/.rspec
CHANGED
@@ -1 +1 @@
|
|
1
|
-
-c --fail-fast
|
1
|
+
-c --fail-fast -f d
|
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.9"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-23"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -24,18 +24,17 @@ module Lederhosen
|
|
24
24
|
counting_table = Hash.new { |h, k| h[k] = 0 }
|
25
25
|
total_reads = 0
|
26
26
|
|
27
|
-
total_reads = `grep -c '^>' #{input}`.strip.split.first.to_i
|
28
|
-
pbar = ProgressBar.new 'counting', total_reads.to_i
|
29
27
|
File.open(input) do |handle|
|
28
|
+
pbar = ProgressBar.new 'counting', File.size(input)
|
30
29
|
records = Dna.new handle
|
31
30
|
records.each do |r|
|
32
|
-
pbar.inc
|
31
|
+
pbar.inc(handle.pos)
|
33
32
|
total_reads += 1
|
34
33
|
kmers = r.sequence.to_kmers(k_len)
|
35
34
|
kmers.each { |x| counting_table[x] += 1 }
|
36
35
|
end
|
36
|
+
pbar.finish
|
37
37
|
end
|
38
|
-
pbar.finish
|
39
38
|
|
40
39
|
sum_of_kmers = counting_table.values.inject(:+)
|
41
40
|
|
@@ -49,7 +48,6 @@ module Lederhosen
|
|
49
48
|
output = File.open(output, 'w')
|
50
49
|
File.open(input) do |handle|
|
51
50
|
records = Dna.new handle
|
52
|
-
|
53
51
|
records.each do |r|
|
54
52
|
kmers = r.sequence.to_kmers(k_len)
|
55
53
|
|
@@ -22,10 +22,13 @@ module Lederhosen
|
|
22
22
|
`mkdir -p #{out_dir}`
|
23
23
|
|
24
24
|
File.open input do |handle|
|
25
|
+
pbar = ProgressBar.new 'splitting', File.size(handle)
|
25
26
|
Dna.new(handle).each_with_index do |record, i|
|
27
|
+
pbar.inc handle.pos
|
26
28
|
@out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
|
27
29
|
@out.puts record
|
28
30
|
end
|
31
|
+
pbar.finish
|
29
32
|
end
|
30
33
|
|
31
34
|
end
|
@@ -45,13 +45,14 @@ module Lederhosen
|
|
45
45
|
kept, total = 1, 0
|
46
46
|
|
47
47
|
File.open(input) do |handle|
|
48
|
+
pbar = ProgressBar.new 'saving', File.size(input)
|
48
49
|
handle.each do |line|
|
49
50
|
# output lederhosen filtering information because I often
|
50
51
|
# forget to write this down :)
|
51
52
|
out.puts "# filtered: #{input}"
|
52
53
|
out.puts "# #{reads} reads in at least #{samples} samples"
|
53
54
|
|
54
|
-
pbar.inc
|
55
|
+
pbar.inc handle.pos
|
55
56
|
if line =~ /^#/
|
56
57
|
out.print line
|
57
58
|
next
|
@@ -65,9 +66,9 @@ module Lederhosen
|
|
65
66
|
end
|
66
67
|
|
67
68
|
end
|
69
|
+
pbar.finish
|
68
70
|
end
|
69
71
|
|
70
|
-
pbar.finish
|
71
72
|
out.close
|
72
73
|
|
73
74
|
ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
|
@@ -1,11 +1,13 @@
|
|
1
1
|
##
|
2
|
-
# uniquify - uniquify a fasta file
|
2
|
+
# uniquify - uniquify a fasta file generating a fasta file of only unique sequences
|
3
|
+
# also output table with sequence_id -> number of reads
|
3
4
|
#
|
4
5
|
|
5
6
|
module Lederhosen
|
6
7
|
class CLI
|
7
8
|
desc 'uniquify',
|
8
|
-
'uniquify a fasta file
|
9
|
+
'uniquify a fasta file generating a fasta file of only unique sequences.' +\
|
10
|
+
'also generate a table with sequence_id -> abundance'
|
9
11
|
|
10
12
|
method_option :input, :type => :string, :required => true
|
11
13
|
method_option :output, :type => :string, :required => true
|
@@ -23,12 +25,10 @@ module Lederhosen
|
|
23
25
|
|
24
26
|
out = File.open(output, 'w')
|
25
27
|
|
26
|
-
no_records = `grep -c '^>' #{input}`.split.first.to_i
|
27
|
-
pbar = ProgressBar.new 'loading', no_records
|
28
|
-
|
29
28
|
File.open(input) do |handle|
|
29
|
+
pbar = ProgressBar.new 'loading', File.size(input)
|
30
30
|
Dna.new(handle).each do |record|
|
31
|
-
pbar.inc
|
31
|
+
pbar.inc handle.pos
|
32
32
|
unless sequence_counts.has_key? record.sequence
|
33
33
|
# store the sequence and id so we can have ids in the
|
34
34
|
# table. If the file is sorted by length then this
|
@@ -38,13 +38,13 @@ module Lederhosen
|
|
38
38
|
end
|
39
39
|
sequence_counts[record.sequence] += 1
|
40
40
|
end
|
41
|
+
pbar.finish
|
41
42
|
end
|
42
43
|
|
43
|
-
pbar.finish
|
44
44
|
out.close
|
45
45
|
|
46
46
|
# write table
|
47
|
-
pbar = ProgressBar.new 'table',
|
47
|
+
pbar = ProgressBar.new 'table', sequence_counts.size
|
48
48
|
File.open(table_out, 'w') do |out|
|
49
49
|
sequence_counts.each_pair do |sequence, count|
|
50
50
|
pbar.inc
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -8,7 +8,7 @@ Lederhosen is free and open source under the [MIT open source license](http://op
|
|
8
8
|
|
9
9
|
## How do I get Lederhosen?
|
10
10
|
|
11
|
-
0. Obtain & Install [UCLUST](http://www.drive5.com/)
|
11
|
+
0. Obtain & Install [UCLUST](http://www.drive5.com/)
|
12
12
|
1. Obtain & Install [BLAT](http://genome.ucsc.edu/FAQ/FAQblat.html#blat3)
|
13
13
|
2. Get a copy of [TaxCollector](http://github.com/audy/taxcollector)
|
14
14
|
3. Install Lederhosen by typing:
|
data/spec/cli_spec.rb
CHANGED
@@ -8,54 +8,54 @@ describe Lederhosen::CLI do
|
|
8
8
|
end
|
9
9
|
|
10
10
|
it 'should have a version command' do
|
11
|
-
`./bin/lederhosen version
|
11
|
+
`./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
12
12
|
end
|
13
13
|
|
14
14
|
it 'should trim reads' do
|
15
|
-
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed
|
15
|
+
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
|
16
16
|
$?.success?.should be_true
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should join reads' do
|
20
|
-
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta
|
20
|
+
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
|
21
21
|
$?.success?.should be_true
|
22
22
|
end
|
23
23
|
|
24
24
|
it 'should sort reads' do
|
25
|
-
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta
|
25
|
+
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
|
26
26
|
$?.success?.should be_true
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'should k_filter reads' do
|
30
|
-
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1
|
30
|
+
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
|
31
31
|
$?.success?.should be_true
|
32
32
|
end
|
33
33
|
|
34
34
|
it 'should cluster reads' do
|
35
|
-
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc
|
35
|
+
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
|
36
36
|
$?.success?.should be_true
|
37
37
|
end
|
38
38
|
|
39
39
|
it 'should build OTU abundance matrices' do
|
40
|
-
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv
|
40
|
+
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
|
41
41
|
$?.success?.should be_true
|
42
42
|
end
|
43
43
|
|
44
44
|
it 'should filter OTU abundance matrices' do
|
45
|
-
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1
|
45
|
+
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
46
46
|
end
|
47
47
|
|
48
48
|
it 'should uniquify reads' do
|
49
|
-
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt
|
49
|
+
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt`
|
50
50
|
$?.success?.should be_true
|
51
51
|
end
|
52
52
|
|
53
53
|
it 'should split joined.fasta into reads for each cluster' do
|
54
|
-
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1
|
54
|
+
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'should create a fasta file containing representative reads for each cluster' do
|
58
|
-
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta
|
58
|
+
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
|
59
59
|
$?.success?.should be_true
|
60
60
|
end
|
61
61
|
|
@@ -66,12 +66,12 @@ describe Lederhosen::CLI do
|
|
66
66
|
levels = %w{kingdom domain phylum class order genus speces}
|
67
67
|
# Ruby 1.9 vs Ruby 1.8
|
68
68
|
level = levels.sample rescue levels.choice
|
69
|
-
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv
|
69
|
+
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
|
70
70
|
$?.success?.should be_true
|
71
71
|
end
|
72
72
|
|
73
73
|
it 'should squish otu abundance matrix by same name' do
|
74
|
-
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv
|
74
|
+
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
|
75
75
|
$?.success?.should be_true
|
76
76
|
end
|
77
77
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 9
|
10
|
+
version: 0.3.9
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-08-
|
18
|
+
date: 2012-08-23 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|