bio-rocker 1.1.12 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ROCker +3 -3
- data/lib/rocker.rb +6 -6
- data/lib/rocker/genome-set.rb +13 -1
- data/lib/rocker/protein-set.rb +9 -0
- data/lib/rocker/step/build.rb +21 -10
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f32b4736d380d7e8b0ff7b6ef46b221b9a0651
|
4
|
+
data.tar.gz: cfbff18723f9b2d95d45350aba30a9b75cb32d5d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a6d9fd0c94338415f334e50c928174ee595690c3c093b1478251d169b11265843eafccdec645d9997acd2ccff0cabcb7db67e77960e43a10d5bbc90298592ff
|
7
|
+
data.tar.gz: bd711c0f3a87f221b60014633130b73a581b5b5f6dd60eee5a948d5740f87bee68714f0b71b9b27412cf36ade84aac314ab20b267ae134e32a5d998e3855c056
|
data/bin/ROCker
CHANGED
@@ -103,6 +103,9 @@ opts = OptionParser.new do |opt|
|
|
103
103
|
opt.on("--reuse-files",
|
104
104
|
"Re-use existing result files. By default existing files are ignored."
|
105
105
|
){ |v| o[:reuse]=true }
|
106
|
+
opt.on("--keep-unlinked",
|
107
|
+
"Keep genomes with unlinked positive coordinates (missing references)."
|
108
|
+
){ |v| o[:keep_unlinked] = v }
|
106
109
|
|
107
110
|
opt.separator ""
|
108
111
|
opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
|
@@ -344,9 +347,6 @@ opts = OptionParser.new do |opt|
|
|
344
347
|
end
|
345
348
|
opts.parse!
|
346
349
|
|
347
|
-
p ARGV
|
348
|
-
|
349
|
-
|
350
350
|
#================================[ Main ]
|
351
351
|
rocker = ROCker.new(o)
|
352
352
|
begin
|
data/lib/rocker.rb
CHANGED
@@ -9,13 +9,13 @@ require "rocker/rocdata"
|
|
9
9
|
|
10
10
|
class ROCker
|
11
11
|
#================================[ Class ]
|
12
|
-
@@VERSION = "1.
|
12
|
+
@@VERSION = "1.2.0"
|
13
13
|
@@CITATION = [
|
14
|
-
"Orellana, Rodriguez-R
|
15
|
-
"
|
16
|
-
"metagenomic
|
17
|
-
"
|
18
|
-
@@DATE = "
|
14
|
+
"Orellana, Rodriguez-R & Konstantinidis, 2016. DOI:10.1093/nar/gkw900.",
|
15
|
+
"ROCker: accurate detection and quantification of target genes in",
|
16
|
+
"short-read metagenomic data sets by modeling sliding-window bitscores.",
|
17
|
+
"Nucleic Acids Research 45(3):e14."]
|
18
|
+
@@DATE = "2018-06-28"
|
19
19
|
@@DEFAULTS = {
|
20
20
|
# General
|
21
21
|
q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
|
data/lib/rocker/genome-set.rb
CHANGED
@@ -18,7 +18,16 @@ class GenomeSet
|
|
18
18
|
tmp_ids = Array.new(self.ids)
|
19
19
|
ofh = File.open(file, "w")
|
20
20
|
while tmp_ids.size>0
|
21
|
-
ofh.print rocker.
|
21
|
+
ofh.print rocker.
|
22
|
+
ebiFetch(:embl, tmp_ids.shift(200), :fasta).
|
23
|
+
each_line.to_a.select { |i|
|
24
|
+
if i =~ /^Entry: (\S+) (.*)/
|
25
|
+
warn "EBI returned an error fetching #{$1}: #{$2}"
|
26
|
+
false
|
27
|
+
else
|
28
|
+
true
|
29
|
+
end
|
30
|
+
}.join
|
22
31
|
end
|
23
32
|
ofh.close
|
24
33
|
end
|
@@ -44,6 +53,9 @@ class GenomeSet
|
|
44
53
|
end
|
45
54
|
def size() self.ids.size end
|
46
55
|
def empty?() self.ids.empty? end
|
56
|
+
def delete!(ids)
|
57
|
+
ids.map{ |i| @ids.delete(i) }.flatten.compact
|
58
|
+
end
|
47
59
|
|
48
60
|
#================================[ Utilities ]
|
49
61
|
def genome2taxon(genome_id, rank="species")
|
data/lib/rocker/protein-set.rb
CHANGED
@@ -63,6 +63,15 @@ class ProteinSet
|
|
63
63
|
return [] if @genomes.empty?
|
64
64
|
@genomes.values.reduce(:+).uniq
|
65
65
|
end
|
66
|
+
def genome_by_prot_id(prot_id)
|
67
|
+
@genomes[prot_id]
|
68
|
+
end
|
69
|
+
##
|
70
|
+
# Removes genomes linked to +prot_ids+ (an Array) and returns an Array
|
71
|
+
# of removed genomes.
|
72
|
+
def remove_genomes_by_prot_id!(prot_ids)
|
73
|
+
prot_ids.map{ |i| @genomes.delete(i) }.flatten.compact
|
74
|
+
end
|
66
75
|
def tranids
|
67
76
|
return [] if @tranids.empty?
|
68
77
|
@tranids.values.reduce(:+).uniq
|
data/lib/rocker/step/build.rb
CHANGED
@@ -10,9 +10,9 @@ require "rocker/genome-set"
|
|
10
10
|
|
11
11
|
class ROCker
|
12
12
|
#================================[ Class ]
|
13
|
-
@@EBIREST = "
|
13
|
+
@@EBIREST = "https://www.ebi.ac.uk/Tools"
|
14
14
|
@@DEFAULTS.merge!({positive:[], negative:[], seqdepth:0.03, readlen:100,
|
15
|
-
minovl:50,
|
15
|
+
minovl:50, keep_unlinked:false,
|
16
16
|
# Ext. Software
|
17
17
|
aligner: :clustalo, simulator: :grinder,
|
18
18
|
simulatorbin:{grinder:"grinder"},
|
@@ -27,12 +27,12 @@ class ROCker
|
|
27
27
|
def self.ebirest() @@EBIREST ; end
|
28
28
|
def self.has_build_gems?
|
29
29
|
return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
|
30
|
-
@@HAS_BUILD_GEMS =
|
30
|
+
@@HAS_BUILD_GEMS = true
|
31
31
|
begin
|
32
32
|
require "rubygems"
|
33
33
|
require "restclient"
|
34
34
|
rescue LoadError
|
35
|
-
@@HAS_BUILD_GEMS =
|
35
|
+
@@HAS_BUILD_GEMS = false
|
36
36
|
end
|
37
37
|
@@HAS_BUILD_GEMS
|
38
38
|
end
|
@@ -54,6 +54,7 @@ class ROCker
|
|
54
54
|
def ebiFetch(db, ids, format, outfile=nil)
|
55
55
|
url = "#{ROCker.ebirest}/dbfetch/dbfetch/" +
|
56
56
|
"#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
|
57
|
+
$stderr.puts url
|
57
58
|
self.restcall url, outfile
|
58
59
|
end
|
59
60
|
def get_coords_from_gff3(genome_ids, pset, thread_id, json_file)
|
@@ -152,7 +153,7 @@ class ROCker
|
|
152
153
|
ref_file = @o[:baseout] + ".ref.fasta"
|
153
154
|
if not protein_set[:+].aln.nil?
|
154
155
|
puts " * reusing aligned sequences as positive set." unless @o[:q]
|
155
|
-
protein_set[:+].get_from_aln(ref_file, aln)
|
156
|
+
protein_set[:+].get_from_aln(ref_file, protein_set[:+].aln)
|
156
157
|
@o[:noaln] = true
|
157
158
|
elsif @o[:reuse] and File.size? ref_file
|
158
159
|
puts " * reusing positive set: #{ref_file}." unless @o[:q]
|
@@ -250,8 +251,16 @@ class ROCker
|
|
250
251
|
raise "Cannot find the genomic location of any provided sequence." if
|
251
252
|
found.nil?
|
252
253
|
missing = protein_set[:+].ids - found
|
253
|
-
|
254
|
-
|
254
|
+
unless missing.empty?
|
255
|
+
warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
|
256
|
+
"sequence(s) #{missing.join(",")}.\n\n"
|
257
|
+
unless @o[:keep_unlinked]
|
258
|
+
del_genomes = protein_set[:+].remove_genomes_by_prot_id!(missing)
|
259
|
+
warn "WARNING: Ignoring #{del_genomes.size} genomes with missing " +
|
260
|
+
"coordinates #{del_genomes.join(",")}.\n\n"
|
261
|
+
genome_set[ :+ ].delete!(del_genomes)
|
262
|
+
end
|
263
|
+
end
|
255
264
|
|
256
265
|
# Download genomes
|
257
266
|
genome_set[:all] = GenomeSet.new(self,
|
@@ -260,7 +269,9 @@ class ROCker
|
|
260
269
|
if @o[:reuse] and File.size? genomes_file
|
261
270
|
puts " * reusing existing file: #{genomes_file}." unless @o[:q]
|
262
271
|
else
|
263
|
-
|
272
|
+
raise "Something went wrong: Nothing left to download." if
|
273
|
+
genome_set[:all].empty?
|
274
|
+
puts " * downloading " + genome_set[:all].size.to_s +
|
264
275
|
" genome(s) in FastA." unless @o[:q]
|
265
276
|
$stderr.puts " # #{genome_set[:all].ids}" if @o[:debug]
|
266
277
|
genome_set[:all].download genomes_file
|
@@ -321,8 +332,8 @@ class ROCker
|
|
321
332
|
if l =~ /^>/
|
322
333
|
rd = %r{
|
323
334
|
^>(?<id>\d+)\s
|
324
|
-
reference=[A-Za-
|
325
|
-
(?<genome_id>[A-Za-z0-9_]+)
|
335
|
+
reference=[A-Za-z_]+[\|:]
|
336
|
+
(?<genome_id>[A-Za-z0-9_]+)(?:\|.*)?\s
|
326
337
|
position=(?<comp>complement\()?(?<from>\d+)\.\.
|
327
338
|
(?<to>\d+)\)?\s
|
328
339
|
}x.match(l)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
@@ -64,7 +64,7 @@ files:
|
|
64
64
|
- lib/rocker/step/search.rb
|
65
65
|
homepage: http://enve-omics.ce.gatech.edu/rocker
|
66
66
|
licenses:
|
67
|
-
-
|
67
|
+
- Artistic-2.0
|
68
68
|
metadata: {}
|
69
69
|
post_install_message:
|
70
70
|
rdoc_options: []
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
84
|
rubyforge_project:
|
85
|
-
rubygems_version: 2.
|
85
|
+
rubygems_version: 2.6.13
|
86
86
|
signing_key:
|
87
87
|
specification_version: 4
|
88
88
|
summary: ROCker
|