lederhosen 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.5.2"
8
+ s.version = "0.5.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-27"
12
+ s.date = "2012-08-28"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -99,7 +99,7 @@ module Lederhosen
99
99
  pbar = ProgressBar.new 'loading uc file', bytes
100
100
  File.open(input) do |handle|
101
101
  handle.each do |line|
102
- pbar.inc handle.pos
102
+ pbar.set handle.pos
103
103
  next if line =~ /^#/ # skip comments
104
104
 
105
105
  line = line.strip.split
@@ -28,7 +28,7 @@ module Lederhosen
28
28
  pbar = ProgressBar.new 'counting', File.size(input)
29
29
  records = Dna.new handle
30
30
  records.each do |r|
31
- pbar.inc(handle.pos)
31
+ pbar.set handle.pos
32
32
  total_reads += 1
33
33
  kmers = r.sequence.to_kmers(k_len)
34
34
  kmers.each { |x| counting_table[x] += 1 }
@@ -2,6 +2,8 @@
2
2
  # Split a fasta file into many fasta files with n reads
3
3
  #
4
4
 
5
+ require 'zlib'
6
+
5
7
  module Lederhosen
6
8
  class CLI
7
9
 
@@ -11,26 +13,49 @@ module Lederhosen
11
13
  method_option :input, :type => :string, :required => true
12
14
  method_option :out_dir, :type => :string, :required => true
13
15
  method_option :n, :type => :numeric, :required => true
16
+ method_option :gzip, :type => :boolean, :default => false
14
17
 
15
18
  def split_fasta
16
19
  input = options[:input]
17
20
  out_dir = options[:out_dir]
18
21
  n = options[:n].to_i
22
+ gzip = options[:gzip]
19
23
 
20
24
  ohai "splitting #{input} into files with #{n} reads stored in #{out_dir}"
25
+ ohai "using gzip" if gzip
21
26
 
22
27
  `mkdir -p #{out_dir}`
23
28
 
24
29
  File.open input do |handle|
25
30
  pbar = ProgressBar.new 'splitting', File.size(handle)
26
31
  Dna.new(handle).each_with_index do |record, i|
27
- pbar.inc handle.pos
28
- @out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
32
+ pbar.set handle.pos
33
+ # I have to use a class variable here because
34
+ # if I don't the variable gets set to nil after
35
+ # after each iteration.
36
+ @out =
37
+ if i%n == 0 # start a new file
38
+ # GzipWriter must be closed explicitly
39
+ # this raises an exception this first time
40
+ @out.close rescue nil
41
+
42
+ # create an IO object depending on whether or
43
+ # not the user wants to use gzip
44
+ if gzip
45
+ Zlib::GzipWriter.open(File.join(out_dir, "split_#{i/n}.fasta.gz"))
46
+ else
47
+ File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w')
48
+ end
49
+ else # keep using current handle
50
+ @out
51
+ end
29
52
  @out.puts record
30
53
  end
31
54
  pbar.finish
55
+ @out.close
32
56
  end
33
57
 
58
+ ohai "created #{Dir[File.join(out_dir, '*')].size} files"
34
59
  end
35
60
  end
36
61
  end
@@ -53,7 +53,7 @@ module Lederhosen
53
53
  pbar = ProgressBar.new 'saving', File.size(input)
54
54
  handle.each do |line|
55
55
 
56
- pbar.inc handle.pos
56
+ pbar.set handle.pos
57
57
  if line =~ /^#/
58
58
  out.print line
59
59
  next
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 5
5
- PATCH = 2
5
+ PATCH = 4
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
data/spec/cli_spec.rb CHANGED
@@ -9,6 +9,7 @@ describe Lederhosen::CLI do
9
9
 
10
10
  it 'should have a version command' do
11
11
  `./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
12
+ $?.success?.should be_true
12
13
  end
13
14
 
14
15
  it 'should trim reads' do
@@ -43,10 +44,17 @@ describe Lederhosen::CLI do
43
44
 
44
45
  it 'should filter OTU abundance matrices' do
45
46
  `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
47
+ $?.success?.should be_true
48
+ end
49
+
50
+ it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
51
+ `./bin/lederhosen split_fasta --input=#{$test_dir}/joined.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
52
+ $?.success?.should be_true
46
53
  end
47
54
 
48
55
  it 'should split joined.fasta into reads for each cluster' do
49
56
  `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
57
+ $?.success?.should be_true
50
58
  end
51
59
 
52
60
  it 'should create a fasta file containing representative reads for each cluster' do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 2
10
- version: 0.5.2
9
+ - 4
10
+ version: 0.5.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-27 00:00:00 Z
18
+ date: 2012-08-28 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime