lederhosen 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lederhosen.gemspec +1 -1
- data/lib/lederhosen/tasks/get_reps.rb +15 -5
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +11 -0
- metadata +2 -2
data/lederhosen.gemspec
CHANGED
|
@@ -9,21 +9,31 @@ module Lederhosen
|
|
|
9
9
|
method_option :output, :type => :string, :required => true
|
|
10
10
|
|
|
11
11
|
def get_reps
|
|
12
|
-
|
|
12
|
+
inputs = Dir[options[:input]]
|
|
13
13
|
database = options[:database]
|
|
14
14
|
output = options[:output]
|
|
15
15
|
|
|
16
16
|
taxa = Set.new
|
|
17
17
|
|
|
18
|
-
ohai "getting representative database sequences from #{database} using #{
|
|
18
|
+
ohai "getting representative database sequences from #{database} using #{inputs} clusters and saving to #{output}"
|
|
19
19
|
|
|
20
20
|
# parse uc file, get list of taxa we need to get
|
|
21
21
|
# full sequences for from the database
|
|
22
|
-
File.
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
total_bytes = inputs.map { |x| File.size(x) }.inject(:+)
|
|
23
|
+
pbar = ProgressBar.new 'reading uc(s)', total_bytes
|
|
24
|
+
|
|
25
|
+
inputs.each do |input|
|
|
26
|
+
File.open(input) do |handle|
|
|
27
|
+
handle.each do |line|
|
|
28
|
+
pbar.inc line.unpack('*C').size
|
|
29
|
+
header = parse_usearch_line(line.strip)
|
|
30
|
+
taxa << header[:original] rescue nil
|
|
31
|
+
end
|
|
32
|
+
end
|
|
25
33
|
end
|
|
26
34
|
|
|
35
|
+
pbar.finish
|
|
36
|
+
|
|
27
37
|
ohai "found #{taxa.size} representative sequences"
|
|
28
38
|
|
|
29
39
|
# print representative sequences from database
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
|
@@ -79,3 +79,14 @@ lederhosen otu_table \
|
|
|
79
79
|
This will create the files:
|
|
80
80
|
|
|
81
81
|
otu_table.domain.csv, ..., otu_table.species.csv
|
|
82
|
+
|
|
83
|
+
### Get representative sequences
|
|
84
|
+
|
|
85
|
+
You can get the representative sequences for each cluster using the `get_reps` tasks. This will extract the representative sequence from
|
|
86
|
+
the __database__ you ran usearch with. Make sure you use the same database that you used when running usearch.
|
|
87
|
+
|
|
88
|
+
lederhosen get_reps --input=clusters.uc --database=taxcollector.fa --output=representatives.fasta
|
|
89
|
+
|
|
90
|
+
You can get the representatives from more than one cluster file using a glob:
|
|
91
|
+
|
|
92
|
+
lederhosen get_reps --input=*.uc --database=taxcollector.fa --output=representatives.fasta
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lederhosen
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.2.
|
|
4
|
+
version: 1.2.1
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -159,7 +159,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
159
159
|
version: '0'
|
|
160
160
|
segments:
|
|
161
161
|
- 0
|
|
162
|
-
hash: -
|
|
162
|
+
hash: -2571588270597711944
|
|
163
163
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
164
|
none: false
|
|
165
165
|
requirements:
|