lederhosen 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +1 -1
- data/lederhosen.gemspec +2 -2
- data/lib/lederhosen/tasks/k_filter.rb +3 -5
- data/lib/lederhosen/tasks/split_fasta.rb +3 -0
- data/lib/lederhosen/tasks/uc_filter.rb +3 -2
- data/lib/lederhosen/tasks/uniquify.rb +8 -8
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +1 -1
- data/spec/cli_spec.rb +13 -13
- metadata +4 -4
data/.rspec
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
-c --fail-fast
|
|
1
|
+
-c --fail-fast -f d
|
data/lederhosen.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = "lederhosen"
|
|
8
|
-
s.version = "0.3.
|
|
8
|
+
s.version = "0.3.9"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
|
12
|
-
s.date = "2012-08-
|
|
12
|
+
s.date = "2012-08-23"
|
|
13
13
|
s.description = "Various tools for OTU clustering"
|
|
14
14
|
s.email = "harekrishna@gmail.com"
|
|
15
15
|
s.executables = ["lederhosen"]
|
|
@@ -24,18 +24,17 @@ module Lederhosen
|
|
|
24
24
|
counting_table = Hash.new { |h, k| h[k] = 0 }
|
|
25
25
|
total_reads = 0
|
|
26
26
|
|
|
27
|
-
total_reads = `grep -c '^>' #{input}`.strip.split.first.to_i
|
|
28
|
-
pbar = ProgressBar.new 'counting', total_reads.to_i
|
|
29
27
|
File.open(input) do |handle|
|
|
28
|
+
pbar = ProgressBar.new 'counting', File.size(input)
|
|
30
29
|
records = Dna.new handle
|
|
31
30
|
records.each do |r|
|
|
32
|
-
pbar.inc
|
|
31
|
+
pbar.inc(handle.pos)
|
|
33
32
|
total_reads += 1
|
|
34
33
|
kmers = r.sequence.to_kmers(k_len)
|
|
35
34
|
kmers.each { |x| counting_table[x] += 1 }
|
|
36
35
|
end
|
|
36
|
+
pbar.finish
|
|
37
37
|
end
|
|
38
|
-
pbar.finish
|
|
39
38
|
|
|
40
39
|
sum_of_kmers = counting_table.values.inject(:+)
|
|
41
40
|
|
|
@@ -49,7 +48,6 @@ module Lederhosen
|
|
|
49
48
|
output = File.open(output, 'w')
|
|
50
49
|
File.open(input) do |handle|
|
|
51
50
|
records = Dna.new handle
|
|
52
|
-
|
|
53
51
|
records.each do |r|
|
|
54
52
|
kmers = r.sequence.to_kmers(k_len)
|
|
55
53
|
|
|
@@ -22,10 +22,13 @@ module Lederhosen
|
|
|
22
22
|
`mkdir -p #{out_dir}`
|
|
23
23
|
|
|
24
24
|
File.open input do |handle|
|
|
25
|
+
pbar = ProgressBar.new 'splitting', File.size(handle)
|
|
25
26
|
Dna.new(handle).each_with_index do |record, i|
|
|
27
|
+
pbar.inc handle.pos
|
|
26
28
|
@out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
|
|
27
29
|
@out.puts record
|
|
28
30
|
end
|
|
31
|
+
pbar.finish
|
|
29
32
|
end
|
|
30
33
|
|
|
31
34
|
end
|
|
@@ -45,13 +45,14 @@ module Lederhosen
|
|
|
45
45
|
kept, total = 1, 0
|
|
46
46
|
|
|
47
47
|
File.open(input) do |handle|
|
|
48
|
+
pbar = ProgressBar.new 'saving', File.size(input)
|
|
48
49
|
handle.each do |line|
|
|
49
50
|
# output lederhosen filtering information because I often
|
|
50
51
|
# forget to write this down :)
|
|
51
52
|
out.puts "# filtered: #{input}"
|
|
52
53
|
out.puts "# #{reads} reads in at least #{samples} samples"
|
|
53
54
|
|
|
54
|
-
pbar.inc
|
|
55
|
+
pbar.inc handle.pos
|
|
55
56
|
if line =~ /^#/
|
|
56
57
|
out.print line
|
|
57
58
|
next
|
|
@@ -65,9 +66,9 @@ module Lederhosen
|
|
|
65
66
|
end
|
|
66
67
|
|
|
67
68
|
end
|
|
69
|
+
pbar.finish
|
|
68
70
|
end
|
|
69
71
|
|
|
70
|
-
pbar.finish
|
|
71
72
|
out.close
|
|
72
73
|
|
|
73
74
|
ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
##
|
|
2
|
-
# uniquify - uniquify a fasta file
|
|
2
|
+
# uniquify - uniquify a fasta file generating a fasta file of only unique sequences
|
|
3
|
+
# also output table with sequence_id -> number of reads
|
|
3
4
|
#
|
|
4
5
|
|
|
5
6
|
module Lederhosen
|
|
6
7
|
class CLI
|
|
7
8
|
desc 'uniquify',
|
|
8
|
-
'uniquify a fasta file
|
|
9
|
+
'uniquify a fasta file generating a fasta file of only unique sequences.' +\
|
|
10
|
+
'also generate a table with sequence_id -> abundance'
|
|
9
11
|
|
|
10
12
|
method_option :input, :type => :string, :required => true
|
|
11
13
|
method_option :output, :type => :string, :required => true
|
|
@@ -23,12 +25,10 @@ module Lederhosen
|
|
|
23
25
|
|
|
24
26
|
out = File.open(output, 'w')
|
|
25
27
|
|
|
26
|
-
no_records = `grep -c '^>' #{input}`.split.first.to_i
|
|
27
|
-
pbar = ProgressBar.new 'loading', no_records
|
|
28
|
-
|
|
29
28
|
File.open(input) do |handle|
|
|
29
|
+
pbar = ProgressBar.new 'loading', File.size(input)
|
|
30
30
|
Dna.new(handle).each do |record|
|
|
31
|
-
pbar.inc
|
|
31
|
+
pbar.inc handle.pos
|
|
32
32
|
unless sequence_counts.has_key? record.sequence
|
|
33
33
|
# store the sequence and id so we can have ids in the
|
|
34
34
|
# table. If the file is sorted by length then this
|
|
@@ -38,13 +38,13 @@ module Lederhosen
|
|
|
38
38
|
end
|
|
39
39
|
sequence_counts[record.sequence] += 1
|
|
40
40
|
end
|
|
41
|
+
pbar.finish
|
|
41
42
|
end
|
|
42
43
|
|
|
43
|
-
pbar.finish
|
|
44
44
|
out.close
|
|
45
45
|
|
|
46
46
|
# write table
|
|
47
|
-
pbar = ProgressBar.new 'table',
|
|
47
|
+
pbar = ProgressBar.new 'table', sequence_counts.size
|
|
48
48
|
File.open(table_out, 'w') do |out|
|
|
49
49
|
sequence_counts.each_pair do |sequence, count|
|
|
50
50
|
pbar.inc
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
|
@@ -8,7 +8,7 @@ Lederhosen is free and open source under the [MIT open source license](http://op
|
|
|
8
8
|
|
|
9
9
|
## How do I get Lederhosen?
|
|
10
10
|
|
|
11
|
-
0. Obtain & Install [UCLUST](http://www.drive5.com/)
|
|
11
|
+
0. Obtain & Install [UCLUST](http://www.drive5.com/)
|
|
12
12
|
1. Obtain & Install [BLAT](http://genome.ucsc.edu/FAQ/FAQblat.html#blat3)
|
|
13
13
|
2. Get a copy of [TaxCollector](http://github.com/audy/taxcollector)
|
|
14
14
|
3. Install Lederhosen by typing:
|
data/spec/cli_spec.rb
CHANGED
|
@@ -8,54 +8,54 @@ describe Lederhosen::CLI do
|
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
it 'should have a version command' do
|
|
11
|
-
`./bin/lederhosen version
|
|
11
|
+
`./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
it 'should trim reads' do
|
|
15
|
-
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed
|
|
15
|
+
`./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
|
|
16
16
|
$?.success?.should be_true
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
it 'should join reads' do
|
|
20
|
-
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta
|
|
20
|
+
`./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
|
|
21
21
|
$?.success?.should be_true
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
it 'should sort reads' do
|
|
25
|
-
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta
|
|
25
|
+
`./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
|
|
26
26
|
$?.success?.should be_true
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
it 'should k_filter reads' do
|
|
30
|
-
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1
|
|
30
|
+
`./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
|
|
31
31
|
$?.success?.should be_true
|
|
32
32
|
end
|
|
33
33
|
|
|
34
34
|
it 'should cluster reads' do
|
|
35
|
-
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc
|
|
35
|
+
`./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
|
|
36
36
|
$?.success?.should be_true
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
it 'should build OTU abundance matrices' do
|
|
40
|
-
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv
|
|
40
|
+
`./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
|
|
41
41
|
$?.success?.should be_true
|
|
42
42
|
end
|
|
43
43
|
|
|
44
44
|
it 'should filter OTU abundance matrices' do
|
|
45
|
-
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1
|
|
45
|
+
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
it 'should uniquify reads' do
|
|
49
|
-
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt
|
|
49
|
+
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt`
|
|
50
50
|
$?.success?.should be_true
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
it 'should split joined.fasta into reads for each cluster' do
|
|
54
|
-
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1
|
|
54
|
+
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
it 'should create a fasta file containing representative reads for each cluster' do
|
|
58
|
-
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta
|
|
58
|
+
`./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
|
|
59
59
|
$?.success?.should be_true
|
|
60
60
|
end
|
|
61
61
|
|
|
@@ -66,12 +66,12 @@ describe Lederhosen::CLI do
|
|
|
66
66
|
levels = %w{kingdom domain phylum class order genus speces}
|
|
67
67
|
# Ruby 1.9 vs Ruby 1.8
|
|
68
68
|
level = levels.sample rescue levels.choice
|
|
69
|
-
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv
|
|
69
|
+
`./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
|
|
70
70
|
$?.success?.should be_true
|
|
71
71
|
end
|
|
72
72
|
|
|
73
73
|
it 'should squish otu abundance matrix by same name' do
|
|
74
|
-
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv
|
|
74
|
+
`./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
|
|
75
75
|
$?.success?.should be_true
|
|
76
76
|
end
|
|
77
77
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lederhosen
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 1
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 0
|
|
8
8
|
- 3
|
|
9
|
-
-
|
|
10
|
-
version: 0.3.
|
|
9
|
+
- 9
|
|
10
|
+
version: 0.3.9
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Austin G. Davis-Richardson
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2012-08-
|
|
18
|
+
date: 2012-08-23 00:00:00 Z
|
|
19
19
|
dependencies:
|
|
20
20
|
- !ruby/object:Gem::Dependency
|
|
21
21
|
type: :runtime
|