lederhosen 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.rspec CHANGED
@@ -1 +1 @@
1
- -c --fail-fast
1
+ -c --fail-fast -f d
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.8"
8
+ s.version = "0.3.9"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-20"
12
+ s.date = "2012-08-23"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -24,18 +24,17 @@ module Lederhosen
24
24
  counting_table = Hash.new { |h, k| h[k] = 0 }
25
25
  total_reads = 0
26
26
 
27
- total_reads = `grep -c '^>' #{input}`.strip.split.first.to_i
28
- pbar = ProgressBar.new 'counting', total_reads.to_i
29
27
  File.open(input) do |handle|
28
+ pbar = ProgressBar.new 'counting', File.size(input)
30
29
  records = Dna.new handle
31
30
  records.each do |r|
32
- pbar.inc
31
+ pbar.inc(handle.pos)
33
32
  total_reads += 1
34
33
  kmers = r.sequence.to_kmers(k_len)
35
34
  kmers.each { |x| counting_table[x] += 1 }
36
35
  end
36
+ pbar.finish
37
37
  end
38
- pbar.finish
39
38
 
40
39
  sum_of_kmers = counting_table.values.inject(:+)
41
40
 
@@ -49,7 +48,6 @@ module Lederhosen
49
48
  output = File.open(output, 'w')
50
49
  File.open(input) do |handle|
51
50
  records = Dna.new handle
52
-
53
51
  records.each do |r|
54
52
  kmers = r.sequence.to_kmers(k_len)
55
53
 
@@ -22,10 +22,13 @@ module Lederhosen
22
22
  `mkdir -p #{out_dir}`
23
23
 
24
24
  File.open input do |handle|
25
+ pbar = ProgressBar.new 'splitting', File.size(handle)
25
26
  Dna.new(handle).each_with_index do |record, i|
27
+ pbar.inc handle.pos
26
28
  @out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
27
29
  @out.puts record
28
30
  end
31
+ pbar.finish
29
32
  end
30
33
 
31
34
  end
@@ -45,13 +45,14 @@ module Lederhosen
45
45
  kept, total = 1, 0
46
46
 
47
47
  File.open(input) do |handle|
48
+ pbar = ProgressBar.new 'saving', File.size(input)
48
49
  handle.each do |line|
49
50
  # output lederhosen filtering information because I often
50
51
  # forget to write this down :)
51
52
  out.puts "# filtered: #{input}"
52
53
  out.puts "# #{reads} reads in at least #{samples} samples"
53
54
 
54
- pbar.inc
55
+ pbar.inc handle.pos
55
56
  if line =~ /^#/
56
57
  out.print line
57
58
  next
@@ -65,9 +66,9 @@ module Lederhosen
65
66
  end
66
67
 
67
68
  end
69
+ pbar.finish
68
70
  end
69
71
 
70
- pbar.finish
71
72
  out.close
72
73
 
73
74
  ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
@@ -1,11 +1,13 @@
1
1
  ##
2
- # uniquify - uniquify a fasta file, also output table with sequence_id -> number of reads
2
+ # uniquify - uniquify a fasta file generating a fasta file of only unique sequences
3
+ # also output table with sequence_id -> number of reads
3
4
  #
4
5
 
5
6
  module Lederhosen
6
7
  class CLI
7
8
  desc 'uniquify',
8
- 'uniquify a fasta file and generate a table with sequence_id -> abundance'
9
+ 'uniquify a fasta file generating a fasta file of only unique sequences.' +\
10
+ 'also generate a table with sequence_id -> abundance'
9
11
 
10
12
  method_option :input, :type => :string, :required => true
11
13
  method_option :output, :type => :string, :required => true
@@ -23,12 +25,10 @@ module Lederhosen
23
25
 
24
26
  out = File.open(output, 'w')
25
27
 
26
- no_records = `grep -c '^>' #{input}`.split.first.to_i
27
- pbar = ProgressBar.new 'loading', no_records
28
-
29
28
  File.open(input) do |handle|
29
+ pbar = ProgressBar.new 'loading', File.size(input)
30
30
  Dna.new(handle).each do |record|
31
- pbar.inc
31
+ pbar.inc handle.pos
32
32
  unless sequence_counts.has_key? record.sequence
33
33
  # store the sequence and id so we can have ids in the
34
34
  # table. If the file is sorted by length then this
@@ -38,13 +38,13 @@ module Lederhosen
38
38
  end
39
39
  sequence_counts[record.sequence] += 1
40
40
  end
41
+ pbar.finish
41
42
  end
42
43
 
43
- pbar.finish
44
44
  out.close
45
45
 
46
46
  # write table
47
- pbar = ProgressBar.new 'table', no_records
47
+ pbar = ProgressBar.new 'table', sequence_counts.size
48
48
  File.open(table_out, 'w') do |out|
49
49
  sequence_counts.each_pair do |sequence, count|
50
50
  pbar.inc
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- PATCH = 8
5
+ PATCH = 9
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
data/readme.md CHANGED
@@ -8,7 +8,7 @@ Lederhosen is free and open source under the [MIT open source license](http://op
8
8
 
9
9
  ## How do I get Lederhosen?
10
10
 
11
- 0. Obtain & Install [UCLUST](http://www.drive5.com/) (64-bit)
11
+ 0. Obtain & Install [UCLUST](http://www.drive5.com/)
12
12
  1. Obtain & Install [BLAT](http://genome.ucsc.edu/FAQ/FAQblat.html#blat3)
13
13
  2. Get a copy of [TaxCollector](http://github.com/audy/taxcollector)
14
14
  3. Install Lederhosen by typing:
data/spec/cli_spec.rb CHANGED
@@ -8,54 +8,54 @@ describe Lederhosen::CLI do
8
8
  end
9
9
 
10
10
  it 'should have a version command' do
11
- `./bin/lederhosen version 2>/dev/null`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
11
+ `./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
12
12
  end
13
13
 
14
14
  it 'should trim reads' do
15
- `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed 2>/dev/null`
15
+ `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
16
16
  $?.success?.should be_true
17
17
  end
18
18
 
19
19
  it 'should join reads' do
20
- `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta 2>/dev/null`
20
+ `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
21
21
  $?.success?.should be_true
22
22
  end
23
23
 
24
24
  it 'should sort reads' do
25
- `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta 2>/dev/null`
25
+ `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
26
26
  $?.success?.should be_true
27
27
  end
28
28
 
29
29
  it 'should k_filter reads' do
30
- `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1 2>/dev/null`
30
+ `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
31
31
  $?.success?.should be_true
32
32
  end
33
33
 
34
34
  it 'should cluster reads' do
35
- `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc 2>/dev/null`
35
+ `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
36
36
  $?.success?.should be_true
37
37
  end
38
38
 
39
39
  it 'should build OTU abundance matrices' do
40
- `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv 2>/dev/null`
40
+ `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
41
41
  $?.success?.should be_true
42
42
  end
43
43
 
44
44
  it 'should filter OTU abundance matrices' do
45
- `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1 2>/dev/null`
45
+ `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
46
46
  end
47
47
 
48
48
  it 'should uniquify reads' do
49
- `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt 2>/dev/null`
49
+ `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt`
50
50
  $?.success?.should be_true
51
51
  end
52
52
 
53
53
  it 'should split joined.fasta into reads for each cluster' do
54
- `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1 2>/dev/null`
54
+ `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
55
55
  end
56
56
 
57
57
  it 'should create a fasta file containing representative reads for each cluster' do
58
- `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta 2>/dev/null`
58
+ `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
59
59
  $?.success?.should be_true
60
60
  end
61
61
 
@@ -66,12 +66,12 @@ describe Lederhosen::CLI do
66
66
  levels = %w{kingdom domain phylum class order genus speces}
67
67
  # Ruby 1.9 vs Ruby 1.8
68
68
  level = levels.sample rescue levels.choice
69
- `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv 2>/dev/null`
69
+ `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
70
70
  $?.success?.should be_true
71
71
  end
72
72
 
73
73
  it 'should squish otu abundance matrix by same name' do
74
- `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv 2>/dev/null`
74
+ `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
75
75
  $?.success?.should be_true
76
76
  end
77
77
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 8
10
- version: 0.3.8
9
+ - 9
10
+ version: 0.3.9
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-20 00:00:00 Z
18
+ date: 2012-08-23 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime