lederhosen 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.2.0"
8
+ s.version = "1.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -9,21 +9,31 @@ module Lederhosen
9
9
  method_option :output, :type => :string, :required => true
10
10
 
11
11
  def get_reps
12
- input = options[:input]
12
+ inputs = Dir[options[:input]]
13
13
  database = options[:database]
14
14
  output = options[:output]
15
15
 
16
16
  taxa = Set.new
17
17
 
18
- ohai "getting representative database sequences from #{database} using #{input} clusters and saving to #{output}"
18
+ ohai "getting representative database sequences from #{database} using #{inputs} clusters and saving to #{output}"
19
19
 
20
20
  # parse uc file, get list of taxa we need to get
21
21
  # full sequences for from the database
22
- File.open(input).each do |line|
23
- header = parse_usearch_line(line.strip)
24
- taxa << header[:original] rescue nil
22
+ total_bytes = inputs.map { |x| File.size(x) }.inject(:+)
23
+ pbar = ProgressBar.new 'reading uc(s)', total_bytes
24
+
25
+ inputs.each do |input|
26
+ File.open(input) do |handle|
27
+ handle.each do |line|
28
+ pbar.inc line.unpack('*C').size
29
+ header = parse_usearch_line(line.strip)
30
+ taxa << header[:original] rescue nil
31
+ end
32
+ end
25
33
  end
26
34
 
35
+ pbar.finish
36
+
27
37
  ohai "found #{taxa.size} representative sequences"
28
38
 
29
39
  # print representative sequences from database
@@ -3,7 +3,7 @@ module Lederhosen
3
3
  MAJOR = 1
4
4
  MINOR = 2
5
5
  CODENAME = 'Regenmantel' # changes for minor versions
6
- PATCH = 0
6
+ PATCH = 1
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/readme.md CHANGED
@@ -79,3 +79,14 @@ lederhosen otu_table \
79
79
  This will create the files:
80
80
 
81
81
  otu_table.domain.csv, ..., otu_table.species.csv
82
+
83
+ ### Get representative sequences
84
+
85
+ You can get the representative sequences for each cluster using the `get_reps` tasks. This will extract the representative sequence from
86
+ the __database__ you ran usearch with. Make sure you use the same database that you used when running usearch.
87
+
88
+ lederhosen get_reps --input=clusters.uc --database=taxcollector.fa --output=representatives.fasta
89
+
90
+ You can get the representatives from more than one cluster file using a glob:
91
+
92
+ lederhosen get_reps --input=*.uc --database=taxcollector.fa --output=representatives.fasta
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -159,7 +159,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
159
159
  version: '0'
160
160
  segments:
161
161
  - 0
162
- hash: -2175708922821992201
162
+ hash: -2571588270597711944
163
163
  required_rubygems_version: !ruby/object:Gem::Requirement
164
164
  none: false
165
165
  requirements: