lederhosen 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lederhosen.gemspec +1 -1
- data/lib/lederhosen/tasks/get_reps.rb +15 -5
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +11 -0
- metadata +2 -2
data/lederhosen.gemspec
CHANGED
@@ -9,21 +9,31 @@ module Lederhosen
|
|
9
9
|
method_option :output, :type => :string, :required => true
|
10
10
|
|
11
11
|
def get_reps
|
12
|
-
|
12
|
+
inputs = Dir[options[:input]]
|
13
13
|
database = options[:database]
|
14
14
|
output = options[:output]
|
15
15
|
|
16
16
|
taxa = Set.new
|
17
17
|
|
18
|
-
ohai "getting representative database sequences from #{database} using #{
|
18
|
+
ohai "getting representative database sequences from #{database} using #{inputs} clusters and saving to #{output}"
|
19
19
|
|
20
20
|
# parse uc file, get list of taxa we need to get
|
21
21
|
# full sequences for from the database
|
22
|
-
File.
|
23
|
-
|
24
|
-
|
22
|
+
total_bytes = inputs.map { |x| File.size(x) }.inject(:+)
|
23
|
+
pbar = ProgressBar.new 'reading uc(s)', total_bytes
|
24
|
+
|
25
|
+
inputs.each do |input|
|
26
|
+
File.open(input) do |handle|
|
27
|
+
handle.each do |line|
|
28
|
+
pbar.inc line.unpack('*C').size
|
29
|
+
header = parse_usearch_line(line.strip)
|
30
|
+
taxa << header[:original] rescue nil
|
31
|
+
end
|
32
|
+
end
|
25
33
|
end
|
26
34
|
|
35
|
+
pbar.finish
|
36
|
+
|
27
37
|
ohai "found #{taxa.size} representative sequences"
|
28
38
|
|
29
39
|
# print representative sequences from database
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -79,3 +79,14 @@ lederhosen otu_table \
|
|
79
79
|
This will create the files:
|
80
80
|
|
81
81
|
otu_table.domain.csv, ..., otu_table.species.csv
|
82
|
+
|
83
|
+
### Get representative sequences
|
84
|
+
|
85
|
+
You can get the representative sequences for each cluster using the `get_reps` tasks. This will extract the representative sequence from
|
86
|
+
the __database__ you ran usearch with. Make sure you use the same database that you used when running usearch.
|
87
|
+
|
88
|
+
lederhosen get_reps --input=clusters.uc --database=taxcollector.fa --output=representatives.fasta
|
89
|
+
|
90
|
+
You can get the representatives from more than one cluster file using a glob:
|
91
|
+
|
92
|
+
lederhosen get_reps --input=*.uc --database=taxcollector.fa --output=representatives.fasta
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -159,7 +159,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
159
159
|
version: '0'
|
160
160
|
segments:
|
161
161
|
- 0
|
162
|
-
hash: -
|
162
|
+
hash: -2571588270597711944
|
163
163
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
164
164
|
none: false
|
165
165
|
requirements:
|