lederhosen 0.3.8 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
data/.rspec CHANGED
@@ -1 +1 @@
1
- -c --fail-fast
1
+ -c --fail-fast -f d
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.8"
8
+ s.version = "0.3.9"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-20"
12
+ s.date = "2012-08-23"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -24,18 +24,17 @@ module Lederhosen
24
24
  counting_table = Hash.new { |h, k| h[k] = 0 }
25
25
  total_reads = 0
26
26
 
27
- total_reads = `grep -c '^>' #{input}`.strip.split.first.to_i
28
- pbar = ProgressBar.new 'counting', total_reads.to_i
29
27
  File.open(input) do |handle|
28
+ pbar = ProgressBar.new 'counting', File.size(input)
30
29
  records = Dna.new handle
31
30
  records.each do |r|
32
- pbar.inc
31
+ pbar.inc(handle.pos)
33
32
  total_reads += 1
34
33
  kmers = r.sequence.to_kmers(k_len)
35
34
  kmers.each { |x| counting_table[x] += 1 }
36
35
  end
36
+ pbar.finish
37
37
  end
38
- pbar.finish
39
38
 
40
39
  sum_of_kmers = counting_table.values.inject(:+)
41
40
 
@@ -49,7 +48,6 @@ module Lederhosen
49
48
  output = File.open(output, 'w')
50
49
  File.open(input) do |handle|
51
50
  records = Dna.new handle
52
-
53
51
  records.each do |r|
54
52
  kmers = r.sequence.to_kmers(k_len)
55
53
 
@@ -22,10 +22,13 @@ module Lederhosen
22
22
  `mkdir -p #{out_dir}`
23
23
 
24
24
  File.open input do |handle|
25
+ pbar = ProgressBar.new 'splitting', File.size(handle)
25
26
  Dna.new(handle).each_with_index do |record, i|
27
+ pbar.inc handle.pos
26
28
  @out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
27
29
  @out.puts record
28
30
  end
31
+ pbar.finish
29
32
  end
30
33
 
31
34
  end
@@ -45,13 +45,14 @@ module Lederhosen
45
45
  kept, total = 1, 0
46
46
 
47
47
  File.open(input) do |handle|
48
+ pbar = ProgressBar.new 'saving', File.size(input)
48
49
  handle.each do |line|
49
50
  # output lederhosen filtering information because I often
50
51
  # forget to write this down :)
51
52
  out.puts "# filtered: #{input}"
52
53
  out.puts "# #{reads} reads in at least #{samples} samples"
53
54
 
54
- pbar.inc
55
+ pbar.inc handle.pos
55
56
  if line =~ /^#/
56
57
  out.print line
57
58
  next
@@ -65,9 +66,9 @@ module Lederhosen
65
66
  end
66
67
 
67
68
  end
69
+ pbar.finish
68
70
  end
69
71
 
70
- pbar.finish
71
72
  out.close
72
73
 
73
74
  ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
@@ -1,11 +1,13 @@
1
1
  ##
2
- # uniquify - uniquify a fasta file, also output table with sequence_id -> number of reads
2
+ # uniquify - uniquify a fasta file generating a fasta file of only unique sequences
3
+ # also output table with sequence_id -> number of reads
3
4
  #
4
5
 
5
6
  module Lederhosen
6
7
  class CLI
7
8
  desc 'uniquify',
8
- 'uniquify a fasta file and generate a table with sequence_id -> abundance'
9
+ 'uniquify a fasta file generating a fasta file of only unique sequences.' +\
10
+ 'also generate a table with sequence_id -> abundance'
9
11
 
10
12
  method_option :input, :type => :string, :required => true
11
13
  method_option :output, :type => :string, :required => true
@@ -23,12 +25,10 @@ module Lederhosen
23
25
 
24
26
  out = File.open(output, 'w')
25
27
 
26
- no_records = `grep -c '^>' #{input}`.split.first.to_i
27
- pbar = ProgressBar.new 'loading', no_records
28
-
29
28
  File.open(input) do |handle|
29
+ pbar = ProgressBar.new 'loading', File.size(input)
30
30
  Dna.new(handle).each do |record|
31
- pbar.inc
31
+ pbar.inc handle.pos
32
32
  unless sequence_counts.has_key? record.sequence
33
33
  # store the sequence and id so we can have ids in the
34
34
  # table. If the file is sorted by length then this
@@ -38,13 +38,13 @@ module Lederhosen
38
38
  end
39
39
  sequence_counts[record.sequence] += 1
40
40
  end
41
+ pbar.finish
41
42
  end
42
43
 
43
- pbar.finish
44
44
  out.close
45
45
 
46
46
  # write table
47
- pbar = ProgressBar.new 'table', no_records
47
+ pbar = ProgressBar.new 'table', sequence_counts.size
48
48
  File.open(table_out, 'w') do |out|
49
49
  sequence_counts.each_pair do |sequence, count|
50
50
  pbar.inc
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- PATCH = 8
5
+ PATCH = 9
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
data/readme.md CHANGED
@@ -8,7 +8,7 @@ Lederhosen is free and open source under the [MIT open source license](http://op
8
8
 
9
9
  ## How do I get Lederhosen?
10
10
 
11
- 0. Obtain & Install [UCLUST](http://www.drive5.com/) (64-bit)
11
+ 0. Obtain & Install [UCLUST](http://www.drive5.com/)
12
12
  1. Obtain & Install [BLAT](http://genome.ucsc.edu/FAQ/FAQblat.html#blat3)
13
13
  2. Get a copy of [TaxCollector](http://github.com/audy/taxcollector)
14
14
  3. Install Lederhosen by typing:
data/spec/cli_spec.rb CHANGED
@@ -8,54 +8,54 @@ describe Lederhosen::CLI do
8
8
  end
9
9
 
10
10
  it 'should have a version command' do
11
- `./bin/lederhosen version 2>/dev/null`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
11
+ `./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
12
12
  end
13
13
 
14
14
  it 'should trim reads' do
15
- `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed 2>/dev/null`
15
+ `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
16
16
  $?.success?.should be_true
17
17
  end
18
18
 
19
19
  it 'should join reads' do
20
- `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta 2>/dev/null`
20
+ `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
21
21
  $?.success?.should be_true
22
22
  end
23
23
 
24
24
  it 'should sort reads' do
25
- `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta 2>/dev/null`
25
+ `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
26
26
  $?.success?.should be_true
27
27
  end
28
28
 
29
29
  it 'should k_filter reads' do
30
- `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1 2>/dev/null`
30
+ `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
31
31
  $?.success?.should be_true
32
32
  end
33
33
 
34
34
  it 'should cluster reads' do
35
- `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc 2>/dev/null`
35
+ `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
36
36
  $?.success?.should be_true
37
37
  end
38
38
 
39
39
  it 'should build OTU abundance matrices' do
40
- `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv 2>/dev/null`
40
+ `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
41
41
  $?.success?.should be_true
42
42
  end
43
43
 
44
44
  it 'should filter OTU abundance matrices' do
45
- `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1 2>/dev/null`
45
+ `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
46
46
  end
47
47
 
48
48
  it 'should uniquify reads' do
49
- `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt 2>/dev/null`
49
+ `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt`
50
50
  $?.success?.should be_true
51
51
  end
52
52
 
53
53
  it 'should split joined.fasta into reads for each cluster' do
54
- `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1 2>/dev/null`
54
+ `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
55
55
  end
56
56
 
57
57
  it 'should create a fasta file containing representative reads for each cluster' do
58
- `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta 2>/dev/null`
58
+ `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
59
59
  $?.success?.should be_true
60
60
  end
61
61
 
@@ -66,12 +66,12 @@ describe Lederhosen::CLI do
66
66
  levels = %w{kingdom domain phylum class order genus speces}
67
67
  # Ruby 1.9 vs Ruby 1.8
68
68
  level = levels.sample rescue levels.choice
69
- `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv 2>/dev/null`
69
+ `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
70
70
  $?.success?.should be_true
71
71
  end
72
72
 
73
73
  it 'should squish otu abundance matrix by same name' do
74
- `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv 2>/dev/null`
74
+ `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
75
75
  $?.success?.should be_true
76
76
  end
77
77
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 8
10
- version: 0.3.8
9
+ - 9
10
+ version: 0.3.9
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-20 00:00:00 Z
18
+ date: 2012-08-23 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime