lederhosen 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-28"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
data/lib/lederhosen/helpers.rb
CHANGED
@@ -28,7 +28,7 @@ module Lederhosen
|
|
28
28
|
pbar = ProgressBar.new 'counting', File.size(input)
|
29
29
|
records = Dna.new handle
|
30
30
|
records.each do |r|
|
31
|
-
pbar.
|
31
|
+
pbar.set handle.pos
|
32
32
|
total_reads += 1
|
33
33
|
kmers = r.sequence.to_kmers(k_len)
|
34
34
|
kmers.each { |x| counting_table[x] += 1 }
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Split a fasta file into many fasta files with n reads
|
3
3
|
#
|
4
4
|
|
5
|
+
require 'zlib'
|
6
|
+
|
5
7
|
module Lederhosen
|
6
8
|
class CLI
|
7
9
|
|
@@ -11,26 +13,49 @@ module Lederhosen
|
|
11
13
|
method_option :input, :type => :string, :required => true
|
12
14
|
method_option :out_dir, :type => :string, :required => true
|
13
15
|
method_option :n, :type => :numeric, :required => true
|
16
|
+
method_option :gzip, :type => :boolean, :default => false
|
14
17
|
|
15
18
|
def split_fasta
|
16
19
|
input = options[:input]
|
17
20
|
out_dir = options[:out_dir]
|
18
21
|
n = options[:n].to_i
|
22
|
+
gzip = options[:gzip]
|
19
23
|
|
20
24
|
ohai "splitting #{input} into files with #{n} reads stored in #{out_dir}"
|
25
|
+
ohai "using gzip" if gzip
|
21
26
|
|
22
27
|
`mkdir -p #{out_dir}`
|
23
28
|
|
24
29
|
File.open input do |handle|
|
25
30
|
pbar = ProgressBar.new 'splitting', File.size(handle)
|
26
31
|
Dna.new(handle).each_with_index do |record, i|
|
27
|
-
pbar.
|
28
|
-
|
32
|
+
pbar.set handle.pos
|
33
|
+
# I have to use a class variable here because
|
34
|
+
# if I don't the variable gets set to nil after
|
35
|
+
# after each iteration.
|
36
|
+
@out =
|
37
|
+
if i%n == 0 # start a new file
|
38
|
+
# GzipWriter must be closed explicitly
|
39
|
+
# this raises an exception this first time
|
40
|
+
@out.close rescue nil
|
41
|
+
|
42
|
+
# create an IO object depending on whether or
|
43
|
+
# not the user wants to use gzip
|
44
|
+
if gzip
|
45
|
+
Zlib::GzipWriter.open(File.join(out_dir, "split_#{i/n}.fasta.gz"))
|
46
|
+
else
|
47
|
+
File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w')
|
48
|
+
end
|
49
|
+
else # keep using current handle
|
50
|
+
@out
|
51
|
+
end
|
29
52
|
@out.puts record
|
30
53
|
end
|
31
54
|
pbar.finish
|
55
|
+
@out.close
|
32
56
|
end
|
33
57
|
|
58
|
+
ohai "created #{Dir[File.join(out_dir, '*')].size} files"
|
34
59
|
end
|
35
60
|
end
|
36
61
|
end
|
data/lib/lederhosen/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -9,6 +9,7 @@ describe Lederhosen::CLI do
|
|
9
9
|
|
10
10
|
it 'should have a version command' do
|
11
11
|
`./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
|
12
|
+
$?.success?.should be_true
|
12
13
|
end
|
13
14
|
|
14
15
|
it 'should trim reads' do
|
@@ -43,10 +44,17 @@ describe Lederhosen::CLI do
|
|
43
44
|
|
44
45
|
it 'should filter OTU abundance matrices' do
|
45
46
|
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
47
|
+
$?.success?.should be_true
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
|
51
|
+
`./bin/lederhosen split_fasta --input=#{$test_dir}/joined.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
|
52
|
+
$?.success?.should be_true
|
46
53
|
end
|
47
54
|
|
48
55
|
it 'should split joined.fasta into reads for each cluster' do
|
49
56
|
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
|
57
|
+
$?.success?.should be_true
|
50
58
|
end
|
51
59
|
|
52
60
|
it 'should create a fasta file containing representative reads for each cluster' do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 4
|
10
|
+
version: 0.5.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-08-
|
18
|
+
date: 2012-08-28 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|