lederhosen 0.5.2 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.5.2"
8
+ s.version = "0.5.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-27"
12
+ s.date = "2012-08-28"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -99,7 +99,7 @@ module Lederhosen
99
99
  pbar = ProgressBar.new 'loading uc file', bytes
100
100
  File.open(input) do |handle|
101
101
  handle.each do |line|
102
- pbar.inc handle.pos
102
+ pbar.set handle.pos
103
103
  next if line =~ /^#/ # skip comments
104
104
 
105
105
  line = line.strip.split
@@ -28,7 +28,7 @@ module Lederhosen
28
28
  pbar = ProgressBar.new 'counting', File.size(input)
29
29
  records = Dna.new handle
30
30
  records.each do |r|
31
- pbar.inc(handle.pos)
31
+ pbar.set handle.pos
32
32
  total_reads += 1
33
33
  kmers = r.sequence.to_kmers(k_len)
34
34
  kmers.each { |x| counting_table[x] += 1 }
@@ -2,6 +2,8 @@
2
2
  # Split a fasta file into many fasta files with n reads
3
3
  #
4
4
 
5
+ require 'zlib'
6
+
5
7
  module Lederhosen
6
8
  class CLI
7
9
 
@@ -11,26 +13,49 @@ module Lederhosen
11
13
  method_option :input, :type => :string, :required => true
12
14
  method_option :out_dir, :type => :string, :required => true
13
15
  method_option :n, :type => :numeric, :required => true
16
+ method_option :gzip, :type => :boolean, :default => false
14
17
 
15
18
  def split_fasta
16
19
  input = options[:input]
17
20
  out_dir = options[:out_dir]
18
21
  n = options[:n].to_i
22
+ gzip = options[:gzip]
19
23
 
20
24
  ohai "splitting #{input} into files with #{n} reads stored in #{out_dir}"
25
+ ohai "using gzip" if gzip
21
26
 
22
27
  `mkdir -p #{out_dir}`
23
28
 
24
29
  File.open input do |handle|
25
30
  pbar = ProgressBar.new 'splitting', File.size(handle)
26
31
  Dna.new(handle).each_with_index do |record, i|
27
- pbar.inc handle.pos
28
- @out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
32
+ pbar.set handle.pos
33
+ # I have to use a class variable here because
34
+ # if I don't the variable gets set to nil after
35
+ # after each iteration.
36
+ @out =
37
+ if i%n == 0 # start a new file
38
+ # GzipWriter must be closed explicitly
39
+ # this raises an exception this first time
40
+ @out.close rescue nil
41
+
42
+ # create an IO object depending on whether or
43
+ # not the user wants to use gzip
44
+ if gzip
45
+ Zlib::GzipWriter.open(File.join(out_dir, "split_#{i/n}.fasta.gz"))
46
+ else
47
+ File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w')
48
+ end
49
+ else # keep using current handle
50
+ @out
51
+ end
29
52
  @out.puts record
30
53
  end
31
54
  pbar.finish
55
+ @out.close
32
56
  end
33
57
 
58
+ ohai "created #{Dir[File.join(out_dir, '*')].size} files"
34
59
  end
35
60
  end
36
61
  end
@@ -53,7 +53,7 @@ module Lederhosen
53
53
  pbar = ProgressBar.new 'saving', File.size(input)
54
54
  handle.each do |line|
55
55
 
56
- pbar.inc handle.pos
56
+ pbar.set handle.pos
57
57
  if line =~ /^#/
58
58
  out.print line
59
59
  next
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 5
5
- PATCH = 2
5
+ PATCH = 4
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
data/spec/cli_spec.rb CHANGED
@@ -9,6 +9,7 @@ describe Lederhosen::CLI do
9
9
 
10
10
  it 'should have a version command' do
11
11
  `./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
12
+ $?.success?.should be_true
12
13
  end
13
14
 
14
15
  it 'should trim reads' do
@@ -43,10 +44,17 @@ describe Lederhosen::CLI do
43
44
 
44
45
  it 'should filter OTU abundance matrices' do
45
46
  `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
47
+ $?.success?.should be_true
48
+ end
49
+
50
+ it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
51
+ `./bin/lederhosen split_fasta --input=#{$test_dir}/joined.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
52
+ $?.success?.should be_true
46
53
  end
47
54
 
48
55
  it 'should split joined.fasta into reads for each cluster' do
49
56
  `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
57
+ $?.success?.should be_true
50
58
  end
51
59
 
52
60
  it 'should create a fasta file containing representative reads for each cluster' do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 2
10
- version: 0.5.2
9
+ - 4
10
+ version: 0.5.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-27 00:00:00 Z
18
+ date: 2012-08-28 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime