bio-rocker 1.1.9 → 1.1.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ROCker +6 -5
- data/lib/rocker.rb +19 -16
- data/lib/rocker/genome-set.rb +4 -4
- data/lib/rocker/protein-set.rb +12 -8
- data/lib/rocker/step/build.rb +14 -10
- metadata +10 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e05b58cc0291bee319fcc3b154922e9b258f0c8b
|
4
|
+
data.tar.gz: d3780e512d02ef04c60696a01f8c736b61703707
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12d2993d29fa3fb4bb42255b472eee9a3a15d281e9c6761c9b1a2173fe086a00c140031d311680e774441a94b93139397860f04973b055d67f751f9c53c0f8af
|
7
|
+
data.tar.gz: c6025ff319794cd9fbc7b52f2a019bb5021fe0474b9536270d9eaf72385738db11d965676201d6440c1b6dceacfb851bb9f31e909c33d04e93f2308b2fc6cf01
|
data/bin/ROCker
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
4
|
# @author Luis (Coto) Orellana
|
5
5
|
# @license artistic license 2.0
|
6
|
-
# @update
|
6
|
+
# @update Nov-25-2015
|
7
7
|
#
|
8
8
|
|
9
|
-
lib = File.expand_path(File.dirname(__FILE__)
|
9
|
+
lib = File.expand_path("../lib", File.dirname(__FILE__))
|
10
10
|
$:.push lib if Dir.exist? lib
|
11
|
-
require
|
12
|
-
require
|
11
|
+
require "rocker"
|
12
|
+
require "optparse"
|
13
13
|
|
14
14
|
|
15
15
|
#================================[ Options parsing ]
|
@@ -355,13 +355,14 @@ begin
|
|
355
355
|
puts "ROCker v" + ROCker.VERSION
|
356
356
|
puts ""
|
357
357
|
puts "If you use ROCker in your research, please cite:"
|
358
|
-
puts ROCker.CITATION
|
358
|
+
puts ROCker.CITATION("\n")
|
359
359
|
puts ""
|
360
360
|
puts "ROCker can be freely used and distributed under the "
|
361
361
|
puts "terms of the Artistic License 2.0. See LICENSE.txt."
|
362
362
|
puts ""
|
363
363
|
puts "For additional information and documentation, visit:"
|
364
364
|
puts "http://enve-omics.ce.gatech.edu/rocker/"
|
365
|
+
puts ""
|
365
366
|
puts "For our list of curated models, visits:"
|
366
367
|
puts "http://enve-omics.ce.gatech.edu/rocker/models"
|
367
368
|
end
|
data/lib/rocker.rb
CHANGED
@@ -1,20 +1,22 @@
|
|
1
1
|
#
|
2
|
-
# @author
|
3
|
-
# @author
|
2
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
|
+
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update Dec-01-2015
|
6
6
|
#
|
7
7
|
|
8
|
-
require
|
9
|
-
require
|
8
|
+
require "rocker/blasthit"
|
9
|
+
require "rocker/rocdata"
|
10
10
|
|
11
11
|
class ROCker
|
12
12
|
#================================[ Class ]
|
13
|
-
@@VERSION = "1.1.
|
14
|
-
@@CITATION =
|
15
|
-
"
|
16
|
-
"
|
17
|
-
"
|
13
|
+
@@VERSION = "1.1.10"
|
14
|
+
@@CITATION = [
|
15
|
+
"Orellana, Rodriguez-R, & Konstantinidis. Under review.",
|
16
|
+
"Detecting and quantifying functional genes in short-read",
|
17
|
+
"metagenomic datasets: method development and application",
|
18
|
+
"to the nitrogen cycle genes."]
|
19
|
+
@@DATE = "2015-12-01"
|
18
20
|
@@DEFAULTS = {
|
19
21
|
# General
|
20
22
|
q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
|
@@ -33,7 +35,8 @@ class ROCker
|
|
33
35
|
def self.defaults() @@DEFAULTS ; end
|
34
36
|
def self.default(k) @@DEFAULTS[k] ; end
|
35
37
|
def self.VERSION; @@VERSION ; end
|
36
|
-
def self.
|
38
|
+
def self.DATE; @@DATE ; end
|
39
|
+
def self.CITATION(j=" ") @@CITATION.join(j) ; end
|
37
40
|
|
38
41
|
#================================[ Instance ]
|
39
42
|
attr_reader :o
|
@@ -64,11 +67,11 @@ end
|
|
64
67
|
|
65
68
|
#================================[ Extensions ]
|
66
69
|
# To ROCker
|
67
|
-
require
|
68
|
-
require
|
69
|
-
require
|
70
|
-
require
|
71
|
-
require
|
70
|
+
require "rocker/step/build"
|
71
|
+
require "rocker/step/compile"
|
72
|
+
require "rocker/step/search"
|
73
|
+
require "rocker/step/filter"
|
74
|
+
require "rocker/step/plot"
|
72
75
|
|
73
76
|
# To other
|
74
77
|
class Numeric
|
data/lib/rocker/genome-set.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update Dec-01-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
class GenomeSet
|
@@ -46,10 +46,10 @@ class GenomeSet
|
|
46
46
|
def empty?() self.ids.empty? end
|
47
47
|
|
48
48
|
#================================[ Utilities ]
|
49
|
-
def genome2taxon(genome_id, rank=
|
49
|
+
def genome2taxon(genome_id, rank="species")
|
50
50
|
v = genome2taxid(genome_id)
|
51
51
|
unless v.nil?
|
52
|
-
xml = rocker.ebiFetch(
|
52
|
+
xml = rocker.ebiFetch(:taxonomy, [v], :enataxonomyxml).gsub(/\s*\n\s*/,"")
|
53
53
|
v = xml.scan(/<taxon [^>]+>/).grep(/rank="#{rank}"/).first
|
54
54
|
v.sub!(/.* taxId="(\d+)".*/,"\\1") unless v.nil?
|
55
55
|
end
|
@@ -57,7 +57,7 @@ class GenomeSet
|
|
57
57
|
v
|
58
58
|
end
|
59
59
|
def genome2taxid(genome_id)
|
60
|
-
doc = rocker.ebiFetch(
|
60
|
+
doc = rocker.ebiFetch(:embl, [genome_id], :annot).split(/[\n\r]/)
|
61
61
|
ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
|
62
62
|
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
63
63
|
return nil if ln.nil?
|
data/lib/rocker/protein-set.rb
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update Dec-01-2015
|
6
6
|
#
|
7
7
|
|
8
|
-
require
|
8
|
+
require "rocker/alignment"
|
9
9
|
|
10
10
|
class ProteinSet
|
11
11
|
attr_reader :rocker, :ids, :aln
|
@@ -43,7 +43,7 @@ class ProteinSet
|
|
43
43
|
self.ids.each do |id|
|
44
44
|
doc = self.rocker.ebiFetch(:uniprotkb, [id], :annot).split("\n")
|
45
45
|
doc.grep( /^DR\s+EMBL;/ ).map do |ln|
|
46
|
-
r=ln.split(
|
46
|
+
r=ln.split("; ")
|
47
47
|
self.link_genome(id, r[1])
|
48
48
|
self.link_tranid(id, r[2])
|
49
49
|
end
|
@@ -67,16 +67,20 @@ class ProteinSet
|
|
67
67
|
return [] if @tranids.empty?
|
68
68
|
@tranids.values.reduce(:+).uniq
|
69
69
|
end
|
70
|
+
def tranids_dump
|
71
|
+
@tranids.map{|k,v| "{#{k}: #{v}}"}.join(", ")
|
72
|
+
end
|
70
73
|
def in_coords(coords)
|
71
74
|
coords.keys.map do |genome|
|
72
75
|
locations = coords[ genome ]
|
73
76
|
locations.map do |loc|
|
74
|
-
if not loc[:prot_id].nil?
|
75
|
-
loc[:prot_id] if
|
76
|
-
elsif not loc[:tran_id].nil?
|
77
|
-
@tranids.
|
77
|
+
if not loc[:prot_id].nil?
|
78
|
+
loc[:prot_id] if include? loc[:prot_id]
|
79
|
+
elsif not loc[:tran_id].nil?
|
80
|
+
@tranids.map{ |k,v| v.include?(loc[:tran_id]) ? k : nil }.compact.first
|
78
81
|
else
|
79
|
-
warn "Warning: Impossible to resolve protein located
|
82
|
+
warn "Warning: Impossible to resolve protein located " +
|
83
|
+
"in '#{genome}' at: #{loc}."
|
80
84
|
nil
|
81
85
|
end
|
82
86
|
end
|
data/lib/rocker/step/build.rb
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update Dec-01-2015
|
6
6
|
#
|
7
7
|
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
8
|
+
require "json"
|
9
|
+
require "rocker/protein-set"
|
10
|
+
require "rocker/genome-set"
|
11
11
|
|
12
12
|
class ROCker
|
13
13
|
#================================[ Class ]
|
@@ -30,8 +30,8 @@ class ROCker
|
|
30
30
|
return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
|
31
31
|
@@HAS_BUILD_GEMS = TRUE
|
32
32
|
begin
|
33
|
-
require
|
34
|
-
require
|
33
|
+
require "rubygems"
|
34
|
+
require "restclient"
|
35
35
|
rescue LoadError
|
36
36
|
@@HAS_BUILD_GEMS = FALSE
|
37
37
|
end
|
@@ -41,8 +41,8 @@ class ROCker
|
|
41
41
|
#================================[ Utilities ]
|
42
42
|
def restcall(url, outfile=nil)
|
43
43
|
$stderr.puts " # Calling: #{url}" if @o[:debug]
|
44
|
-
response = RestClient::Request.execute(:
|
45
|
-
:
|
44
|
+
response = RestClient::Request.execute(method: :get, url: url,
|
45
|
+
timeout: 600)
|
46
46
|
raise "Unable to reach EBI REST client, error code " +
|
47
47
|
response.code.to_s + "." unless response.code == 200
|
48
48
|
unless outfile.nil?
|
@@ -73,6 +73,10 @@ class ROCker
|
|
73
73
|
genome_file=nil unless @o[:noclean]
|
74
74
|
doc = ebiFetch(:embl, [genome_id], :gff3,
|
75
75
|
genome_file).split("\n").grep(/^[^#]/)
|
76
|
+
if doc.first =~ /ERROR 12 No entries found/
|
77
|
+
doc = ebiFetch(:emblconexp, [genome_id], :gff3,
|
78
|
+
genome_file).split("\n").grep(/^[^#]/)
|
79
|
+
end
|
76
80
|
end
|
77
81
|
doc.each do |ln|
|
78
82
|
next if ln =~ /^#/
|
@@ -80,7 +84,7 @@ class ROCker
|
|
80
84
|
next if r.size < 9
|
81
85
|
prots = r[8].split(/;/).grep(
|
82
86
|
/^db_xref=UniProtKB[\/A-Za-z-]*:/){ |xref| xref.split(/:/)[1] }
|
83
|
-
p = prots.select{ |id| pset.ids.include? id }.first
|
87
|
+
p = prots.compact.select{ |id| pset.ids.include? id }.first
|
84
88
|
trans = r[8].split(/;/).grep(
|
85
89
|
/^protein_id=/){ |pid| pid.split(/=/)[1] }
|
86
90
|
t = trans.select{ |id| pset.tranids.include? id }.first
|
@@ -191,7 +195,7 @@ class ROCker
|
|
191
195
|
puts " * downloading and parsing #{genome_set[set_type].size} " +
|
192
196
|
"GFF3 document(s) in #{thrs} threads." unless @o[:q]
|
193
197
|
$stderr.puts " # Looking for translations: " +
|
194
|
-
"#{protein_set[set_type].
|
198
|
+
"#{protein_set[set_type].tranids_dump}" if @o[:debug]
|
195
199
|
$stderr.puts " # Looking into: #{genome_set[set_type].ids}" if
|
196
200
|
@o[:debug]
|
197
201
|
# Launch threads
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,36 +9,36 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 1.7
|
20
|
+
version: '1.7'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 1.7
|
27
|
+
version: '1.7'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: json
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- - "
|
32
|
+
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.8
|
34
|
+
version: '1.8'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- - "
|
39
|
+
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: 1.8
|
41
|
+
version: '1.8'
|
42
42
|
description: Detecting and quantifying functional genes in short-read metagenomic
|
43
43
|
datasets
|
44
44
|
email: lhorellana@gatech.edu
|