bio-rocker 1.1.12 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ROCker +3 -3
- data/lib/rocker.rb +6 -6
- data/lib/rocker/genome-set.rb +13 -1
- data/lib/rocker/protein-set.rb +9 -0
- data/lib/rocker/step/build.rb +21 -10
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f32b4736d380d7e8b0ff7b6ef46b221b9a0651
|
4
|
+
data.tar.gz: cfbff18723f9b2d95d45350aba30a9b75cb32d5d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a6d9fd0c94338415f334e50c928174ee595690c3c093b1478251d169b11265843eafccdec645d9997acd2ccff0cabcb7db67e77960e43a10d5bbc90298592ff
|
7
|
+
data.tar.gz: bd711c0f3a87f221b60014633130b73a581b5b5f6dd60eee5a948d5740f87bee68714f0b71b9b27412cf36ade84aac314ab20b267ae134e32a5d998e3855c056
|
data/bin/ROCker
CHANGED
@@ -103,6 +103,9 @@ opts = OptionParser.new do |opt|
|
|
103
103
|
opt.on("--reuse-files",
|
104
104
|
"Re-use existing result files. By default existing files are ignored."
|
105
105
|
){ |v| o[:reuse]=true }
|
106
|
+
opt.on("--keep-unlinked",
|
107
|
+
"Keep genomes with unlinked positive coordinates (missing references)."
|
108
|
+
){ |v| o[:keep_unlinked] = v }
|
106
109
|
|
107
110
|
opt.separator ""
|
108
111
|
opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
|
@@ -344,9 +347,6 @@ opts = OptionParser.new do |opt|
|
|
344
347
|
end
|
345
348
|
opts.parse!
|
346
349
|
|
347
|
-
p ARGV
|
348
|
-
|
349
|
-
|
350
350
|
#================================[ Main ]
|
351
351
|
rocker = ROCker.new(o)
|
352
352
|
begin
|
data/lib/rocker.rb
CHANGED
@@ -9,13 +9,13 @@ require "rocker/rocdata"
|
|
9
9
|
|
10
10
|
class ROCker
|
11
11
|
#================================[ Class ]
|
12
|
-
@@VERSION = "1.
|
12
|
+
@@VERSION = "1.2.0"
|
13
13
|
@@CITATION = [
|
14
|
-
"Orellana, Rodriguez-R
|
15
|
-
"
|
16
|
-
"metagenomic
|
17
|
-
"
|
18
|
-
@@DATE = "
|
14
|
+
"Orellana, Rodriguez-R & Konstantinidis, 2016. DOI:10.1093/nar/gkw900.",
|
15
|
+
"ROCker: accurate detection and quantification of target genes in",
|
16
|
+
"short-read metagenomic data sets by modeling sliding-window bitscores.",
|
17
|
+
"Nucleic Acids Research 45(3):e14."]
|
18
|
+
@@DATE = "2018-06-28"
|
19
19
|
@@DEFAULTS = {
|
20
20
|
# General
|
21
21
|
q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
|
data/lib/rocker/genome-set.rb
CHANGED
@@ -18,7 +18,16 @@ class GenomeSet
|
|
18
18
|
tmp_ids = Array.new(self.ids)
|
19
19
|
ofh = File.open(file, "w")
|
20
20
|
while tmp_ids.size>0
|
21
|
-
ofh.print rocker.
|
21
|
+
ofh.print rocker.
|
22
|
+
ebiFetch(:embl, tmp_ids.shift(200), :fasta).
|
23
|
+
each_line.to_a.select { |i|
|
24
|
+
if i =~ /^Entry: (\S+) (.*)/
|
25
|
+
warn "EBI returned an error fetching #{$1}: #{$2}"
|
26
|
+
false
|
27
|
+
else
|
28
|
+
true
|
29
|
+
end
|
30
|
+
}.join
|
22
31
|
end
|
23
32
|
ofh.close
|
24
33
|
end
|
@@ -44,6 +53,9 @@ class GenomeSet
|
|
44
53
|
end
|
45
54
|
def size() self.ids.size end
|
46
55
|
def empty?() self.ids.empty? end
|
56
|
+
def delete!(ids)
|
57
|
+
ids.map{ |i| @ids.delete(i) }.flatten.compact
|
58
|
+
end
|
47
59
|
|
48
60
|
#================================[ Utilities ]
|
49
61
|
def genome2taxon(genome_id, rank="species")
|
data/lib/rocker/protein-set.rb
CHANGED
@@ -63,6 +63,15 @@ class ProteinSet
|
|
63
63
|
return [] if @genomes.empty?
|
64
64
|
@genomes.values.reduce(:+).uniq
|
65
65
|
end
|
66
|
+
def genome_by_prot_id(prot_id)
|
67
|
+
@genomes[prot_id]
|
68
|
+
end
|
69
|
+
##
|
70
|
+
# Removes genomes linked to +prot_ids+ (an Array) and returns an Array
|
71
|
+
# of removed genomes.
|
72
|
+
def remove_genomes_by_prot_id!(prot_ids)
|
73
|
+
prot_ids.map{ |i| @genomes.delete(i) }.flatten.compact
|
74
|
+
end
|
66
75
|
def tranids
|
67
76
|
return [] if @tranids.empty?
|
68
77
|
@tranids.values.reduce(:+).uniq
|
data/lib/rocker/step/build.rb
CHANGED
@@ -10,9 +10,9 @@ require "rocker/genome-set"
|
|
10
10
|
|
11
11
|
class ROCker
|
12
12
|
#================================[ Class ]
|
13
|
-
@@EBIREST = "
|
13
|
+
@@EBIREST = "https://www.ebi.ac.uk/Tools"
|
14
14
|
@@DEFAULTS.merge!({positive:[], negative:[], seqdepth:0.03, readlen:100,
|
15
|
-
minovl:50,
|
15
|
+
minovl:50, keep_unlinked:false,
|
16
16
|
# Ext. Software
|
17
17
|
aligner: :clustalo, simulator: :grinder,
|
18
18
|
simulatorbin:{grinder:"grinder"},
|
@@ -27,12 +27,12 @@ class ROCker
|
|
27
27
|
def self.ebirest() @@EBIREST ; end
|
28
28
|
def self.has_build_gems?
|
29
29
|
return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
|
30
|
-
@@HAS_BUILD_GEMS =
|
30
|
+
@@HAS_BUILD_GEMS = true
|
31
31
|
begin
|
32
32
|
require "rubygems"
|
33
33
|
require "restclient"
|
34
34
|
rescue LoadError
|
35
|
-
@@HAS_BUILD_GEMS =
|
35
|
+
@@HAS_BUILD_GEMS = false
|
36
36
|
end
|
37
37
|
@@HAS_BUILD_GEMS
|
38
38
|
end
|
@@ -54,6 +54,7 @@ class ROCker
|
|
54
54
|
def ebiFetch(db, ids, format, outfile=nil)
|
55
55
|
url = "#{ROCker.ebirest}/dbfetch/dbfetch/" +
|
56
56
|
"#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
|
57
|
+
$stderr.puts url
|
57
58
|
self.restcall url, outfile
|
58
59
|
end
|
59
60
|
def get_coords_from_gff3(genome_ids, pset, thread_id, json_file)
|
@@ -152,7 +153,7 @@ class ROCker
|
|
152
153
|
ref_file = @o[:baseout] + ".ref.fasta"
|
153
154
|
if not protein_set[:+].aln.nil?
|
154
155
|
puts " * reusing aligned sequences as positive set." unless @o[:q]
|
155
|
-
protein_set[:+].get_from_aln(ref_file, aln)
|
156
|
+
protein_set[:+].get_from_aln(ref_file, protein_set[:+].aln)
|
156
157
|
@o[:noaln] = true
|
157
158
|
elsif @o[:reuse] and File.size? ref_file
|
158
159
|
puts " * reusing positive set: #{ref_file}." unless @o[:q]
|
@@ -250,8 +251,16 @@ class ROCker
|
|
250
251
|
raise "Cannot find the genomic location of any provided sequence." if
|
251
252
|
found.nil?
|
252
253
|
missing = protein_set[:+].ids - found
|
253
|
-
|
254
|
-
|
254
|
+
unless missing.empty?
|
255
|
+
warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
|
256
|
+
"sequence(s) #{missing.join(",")}.\n\n"
|
257
|
+
unless @o[:keep_unlinked]
|
258
|
+
del_genomes = protein_set[:+].remove_genomes_by_prot_id!(missing)
|
259
|
+
warn "WARNING: Ignoring #{del_genomes.size} genomes with missing " +
|
260
|
+
"coordinates #{del_genomes.join(",")}.\n\n"
|
261
|
+
genome_set[ :+ ].delete!(del_genomes)
|
262
|
+
end
|
263
|
+
end
|
255
264
|
|
256
265
|
# Download genomes
|
257
266
|
genome_set[:all] = GenomeSet.new(self,
|
@@ -260,7 +269,9 @@ class ROCker
|
|
260
269
|
if @o[:reuse] and File.size? genomes_file
|
261
270
|
puts " * reusing existing file: #{genomes_file}." unless @o[:q]
|
262
271
|
else
|
263
|
-
|
272
|
+
raise "Something went wrong: Nothing left to download." if
|
273
|
+
genome_set[:all].empty?
|
274
|
+
puts " * downloading " + genome_set[:all].size.to_s +
|
264
275
|
" genome(s) in FastA." unless @o[:q]
|
265
276
|
$stderr.puts " # #{genome_set[:all].ids}" if @o[:debug]
|
266
277
|
genome_set[:all].download genomes_file
|
@@ -321,8 +332,8 @@ class ROCker
|
|
321
332
|
if l =~ /^>/
|
322
333
|
rd = %r{
|
323
334
|
^>(?<id>\d+)\s
|
324
|
-
reference=[A-Za-
|
325
|
-
(?<genome_id>[A-Za-z0-9_]+)
|
335
|
+
reference=[A-Za-z_]+[\|:]
|
336
|
+
(?<genome_id>[A-Za-z0-9_]+)(?:\|.*)?\s
|
326
337
|
position=(?<comp>complement\()?(?<from>\d+)\.\.
|
327
338
|
(?<to>\d+)\)?\s
|
328
339
|
}x.match(l)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
@@ -64,7 +64,7 @@ files:
|
|
64
64
|
- lib/rocker/step/search.rb
|
65
65
|
homepage: http://enve-omics.ce.gatech.edu/rocker
|
66
66
|
licenses:
|
67
|
-
-
|
67
|
+
- Artistic-2.0
|
68
68
|
metadata: {}
|
69
69
|
post_install_message:
|
70
70
|
rdoc_options: []
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
84
|
rubyforge_project:
|
85
|
-
rubygems_version: 2.
|
85
|
+
rubygems_version: 2.6.13
|
86
86
|
signing_key:
|
87
87
|
specification_version: 4
|
88
88
|
summary: ROCker
|