bio-rocker 1.1.12 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 77c08046b7d0b8f4528e38726c4b0681ce55a5c1
4
- data.tar.gz: 8335d7484f0227f768a1cebdf0937bd8e7ef0513
3
+ metadata.gz: d4f32b4736d380d7e8b0ff7b6ef46b221b9a0651
4
+ data.tar.gz: cfbff18723f9b2d95d45350aba30a9b75cb32d5d
5
5
  SHA512:
6
- metadata.gz: 1a359c796f73a44fb008032786df546e640079d186751429c100e06b09cbc9141ecbe19d7937dc15ae7a3e1a2a02885fa6f08c42c22c97a131d48c471275f96e
7
- data.tar.gz: 38ca17ae7f4cd04f85ea799851e23cb6f095a17a3475dfb298b8863f1be8dcc3701e2d9dac08e2f3f8bc5b5f88726904488c411b99d03d277b4ef11f6b5a3a05
6
+ metadata.gz: 1a6d9fd0c94338415f334e50c928174ee595690c3c093b1478251d169b11265843eafccdec645d9997acd2ccff0cabcb7db67e77960e43a10d5bbc90298592ff
7
+ data.tar.gz: bd711c0f3a87f221b60014633130b73a581b5b5f6dd60eee5a948d5740f87bee68714f0b71b9b27412cf36ade84aac314ab20b267ae134e32a5d998e3855c056
data/bin/ROCker CHANGED
@@ -103,6 +103,9 @@ opts = OptionParser.new do |opt|
103
103
  opt.on("--reuse-files",
104
104
  "Re-use existing result files. By default existing files are ignored."
105
105
  ){ |v| o[:reuse]=true }
106
+ opt.on("--keep-unlinked",
107
+ "Keep genomes with unlinked positive coordinates (missing references)."
108
+ ){ |v| o[:keep_unlinked] = v }
106
109
 
107
110
  opt.separator ""
108
111
  opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
@@ -344,9 +347,6 @@ opts = OptionParser.new do |opt|
344
347
  end
345
348
  opts.parse!
346
349
 
347
- p ARGV
348
-
349
-
350
350
  #================================[ Main ]
351
351
  rocker = ROCker.new(o)
352
352
  begin
@@ -9,13 +9,13 @@ require "rocker/rocdata"
9
9
 
10
10
  class ROCker
11
11
  #================================[ Class ]
12
- @@VERSION = "1.1.12"
12
+ @@VERSION = "1.2.0"
13
13
  @@CITATION = [
14
- "Orellana, Rodriguez-R, & Konstantinidis. Under review.",
15
- "Detecting and quantifying functional genes in short-read",
16
- "metagenomic datasets: method development and application",
17
- "to the nitrogen cycle genes."]
18
- @@DATE = "2016-05-17"
14
+ "Orellana, Rodriguez-R & Konstantinidis, 2016. DOI:10.1093/nar/gkw900.",
15
+ "ROCker: accurate detection and quantification of target genes in",
16
+ "short-read metagenomic data sets by modeling sliding-window bitscores.",
17
+ "Nucleic Acids Research 45(3):e14."]
18
+ @@DATE = "2018-06-28"
19
19
  @@DEFAULTS = {
20
20
  # General
21
21
  q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
@@ -18,7 +18,16 @@ class GenomeSet
18
18
  tmp_ids = Array.new(self.ids)
19
19
  ofh = File.open(file, "w")
20
20
  while tmp_ids.size>0
21
- ofh.print rocker.ebiFetch(:embl, tmp_ids.shift(200), :fasta)
21
+ ofh.print rocker.
22
+ ebiFetch(:embl, tmp_ids.shift(200), :fasta).
23
+ each_line.to_a.select { |i|
24
+ if i =~ /^Entry: (\S+) (.*)/
25
+ warn "EBI returned an error fetching #{$1}: #{$2}"
26
+ false
27
+ else
28
+ true
29
+ end
30
+ }.join
22
31
  end
23
32
  ofh.close
24
33
  end
@@ -44,6 +53,9 @@ class GenomeSet
44
53
  end
45
54
  def size() self.ids.size end
46
55
  def empty?() self.ids.empty? end
56
+ def delete!(ids)
57
+ ids.map{ |i| @ids.delete(i) }.flatten.compact
58
+ end
47
59
 
48
60
  #================================[ Utilities ]
49
61
  def genome2taxon(genome_id, rank="species")
@@ -63,6 +63,15 @@ class ProteinSet
63
63
  return [] if @genomes.empty?
64
64
  @genomes.values.reduce(:+).uniq
65
65
  end
66
+ def genome_by_prot_id(prot_id)
67
+ @genomes[prot_id]
68
+ end
69
+ ##
70
+ # Removes genomes linked to +prot_ids+ (an Array) and returns an Array
71
+ # of removed genomes.
72
+ def remove_genomes_by_prot_id!(prot_ids)
73
+ prot_ids.map{ |i| @genomes.delete(i) }.flatten.compact
74
+ end
66
75
  def tranids
67
76
  return [] if @tranids.empty?
68
77
  @tranids.values.reduce(:+).uniq
@@ -10,9 +10,9 @@ require "rocker/genome-set"
10
10
 
11
11
  class ROCker
12
12
  #================================[ Class ]
13
- @@EBIREST = "http://www.ebi.ac.uk/Tools"
13
+ @@EBIREST = "https://www.ebi.ac.uk/Tools"
14
14
  @@DEFAULTS.merge!({positive:[], negative:[], seqdepth:0.03, readlen:100,
15
- minovl:50,
15
+ minovl:50, keep_unlinked:false,
16
16
  # Ext. Software
17
17
  aligner: :clustalo, simulator: :grinder,
18
18
  simulatorbin:{grinder:"grinder"},
@@ -27,12 +27,12 @@ class ROCker
27
27
  def self.ebirest() @@EBIREST ; end
28
28
  def self.has_build_gems?
29
29
  return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
30
- @@HAS_BUILD_GEMS = TRUE
30
+ @@HAS_BUILD_GEMS = true
31
31
  begin
32
32
  require "rubygems"
33
33
  require "restclient"
34
34
  rescue LoadError
35
- @@HAS_BUILD_GEMS = FALSE
35
+ @@HAS_BUILD_GEMS = false
36
36
  end
37
37
  @@HAS_BUILD_GEMS
38
38
  end
@@ -54,6 +54,7 @@ class ROCker
54
54
  def ebiFetch(db, ids, format, outfile=nil)
55
55
  url = "#{ROCker.ebirest}/dbfetch/dbfetch/" +
56
56
  "#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
57
+ $stderr.puts url
57
58
  self.restcall url, outfile
58
59
  end
59
60
  def get_coords_from_gff3(genome_ids, pset, thread_id, json_file)
@@ -152,7 +153,7 @@ class ROCker
152
153
  ref_file = @o[:baseout] + ".ref.fasta"
153
154
  if not protein_set[:+].aln.nil?
154
155
  puts " * reusing aligned sequences as positive set." unless @o[:q]
155
- protein_set[:+].get_from_aln(ref_file, aln)
156
+ protein_set[:+].get_from_aln(ref_file, protein_set[:+].aln)
156
157
  @o[:noaln] = true
157
158
  elsif @o[:reuse] and File.size? ref_file
158
159
  puts " * reusing positive set: #{ref_file}." unless @o[:q]
@@ -250,8 +251,16 @@ class ROCker
250
251
  raise "Cannot find the genomic location of any provided sequence." if
251
252
  found.nil?
252
253
  missing = protein_set[:+].ids - found
253
- warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
254
- "sequence(s) #{missing.join(",")}.\n\n" unless missing.empty?
254
+ unless missing.empty?
255
+ warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
256
+ "sequence(s) #{missing.join(",")}.\n\n"
257
+ unless @o[:keep_unlinked]
258
+ del_genomes = protein_set[:+].remove_genomes_by_prot_id!(missing)
259
+ warn "WARNING: Ignoring #{del_genomes.size} genomes with missing " +
260
+ "coordinates #{del_genomes.join(",")}.\n\n"
261
+ genome_set[ :+ ].delete!(del_genomes)
262
+ end
263
+ end
255
264
 
256
265
  # Download genomes
257
266
  genome_set[:all] = GenomeSet.new(self,
@@ -260,7 +269,9 @@ class ROCker
260
269
  if @o[:reuse] and File.size? genomes_file
261
270
  puts " * reusing existing file: #{genomes_file}." unless @o[:q]
262
271
  else
263
- puts " * downloading " + genome_set[:all].size.to_s +
272
+ raise "Something went wrong: Nothing left to download." if
273
+ genome_set[:all].empty?
274
+ puts " * downloading " + genome_set[:all].size.to_s +
264
275
  " genome(s) in FastA." unless @o[:q]
265
276
  $stderr.puts " # #{genome_set[:all].ids}" if @o[:debug]
266
277
  genome_set[:all].download genomes_file
@@ -321,8 +332,8 @@ class ROCker
321
332
  if l =~ /^>/
322
333
  rd = %r{
323
334
  ^>(?<id>\d+)\s
324
- reference=[A-Za-z]+\|
325
- (?<genome_id>[A-Za-z0-9_]+)\|.*\s
335
+ reference=[A-Za-z_]+[\|:]
336
+ (?<genome_id>[A-Za-z0-9_]+)(?:\|.*)?\s
326
337
  position=(?<comp>complement\()?(?<from>\d+)\.\.
327
338
  (?<to>\d+)\)?\s
328
339
  }x.match(l)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-rocker
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.12
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis (Coto) Orellana
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-17 00:00:00.000000000 Z
12
+ date: 2018-06-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rest-client
@@ -64,7 +64,7 @@ files:
64
64
  - lib/rocker/step/search.rb
65
65
  homepage: http://enve-omics.ce.gatech.edu/rocker
66
66
  licenses:
67
- - artistic 2.0
67
+ - Artistic-2.0
68
68
  metadata: {}
69
69
  post_install_message:
70
70
  rdoc_options: []
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  version: '0'
83
83
  requirements: []
84
84
  rubyforge_project:
85
- rubygems_version: 2.5.1
85
+ rubygems_version: 2.6.13
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: ROCker