lederhosen 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.2.0"
8
+ s.version = "1.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -9,21 +9,31 @@ module Lederhosen
9
9
  method_option :output, :type => :string, :required => true
10
10
 
11
11
  def get_reps
12
- input = options[:input]
12
+ inputs = Dir[options[:input]]
13
13
  database = options[:database]
14
14
  output = options[:output]
15
15
 
16
16
  taxa = Set.new
17
17
 
18
- ohai "getting representative database sequences from #{database} using #{input} clusters and saving to #{output}"
18
+ ohai "getting representative database sequences from #{database} using #{inputs} clusters and saving to #{output}"
19
19
 
20
20
  # parse uc file, get list of taxa we need to get
21
21
  # full sequences for from the database
22
- File.open(input).each do |line|
23
- header = parse_usearch_line(line.strip)
24
- taxa << header[:original] rescue nil
22
+ total_bytes = inputs.map { |x| File.size(x) }.inject(:+)
23
+ pbar = ProgressBar.new 'reading uc(s)', total_bytes
24
+
25
+ inputs.each do |input|
26
+ File.open(input) do |handle|
27
+ handle.each do |line|
28
+ pbar.inc line.unpack('*C').size
29
+ header = parse_usearch_line(line.strip)
30
+ taxa << header[:original] rescue nil
31
+ end
32
+ end
25
33
  end
26
34
 
35
+ pbar.finish
36
+
27
37
  ohai "found #{taxa.size} representative sequences"
28
38
 
29
39
  # print representative sequences from database
@@ -3,7 +3,7 @@ module Lederhosen
3
3
  MAJOR = 1
4
4
  MINOR = 2
5
5
  CODENAME = 'Regenmantel' # changes for minor versions
6
- PATCH = 0
6
+ PATCH = 1
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/readme.md CHANGED
@@ -79,3 +79,14 @@ lederhosen otu_table \
79
79
  This will create the files:
80
80
 
81
81
  otu_table.domain.csv, ..., otu_table.species.csv
82
+
83
+ ### Get representative sequences
84
+
85
+ You can get the representative sequences for each cluster using the `get_reps` tasks. This will extract the representative sequence from
86
+ the __database__ you ran usearch with. Make sure you use the same database that you used when running usearch.
87
+
88
+ lederhosen get_reps --input=clusters.uc --database=taxcollector.fa --output=representatives.fasta
89
+
90
+ You can get the representatives from more than one cluster file using a glob:
91
+
92
+ lederhosen get_reps --input=*.uc --database=taxcollector.fa --output=representatives.fasta
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -159,7 +159,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
159
159
  version: '0'
160
160
  segments:
161
161
  - 0
162
- hash: -2175708922821992201
162
+ hash: -2571588270597711944
163
163
  required_rubygems_version: !ruby/object:Gem::Requirement
164
164
  none: false
165
165
  requirements: