bio-rocker 1.1.12 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 77c08046b7d0b8f4528e38726c4b0681ce55a5c1
4
- data.tar.gz: 8335d7484f0227f768a1cebdf0937bd8e7ef0513
3
+ metadata.gz: d4f32b4736d380d7e8b0ff7b6ef46b221b9a0651
4
+ data.tar.gz: cfbff18723f9b2d95d45350aba30a9b75cb32d5d
5
5
  SHA512:
6
- metadata.gz: 1a359c796f73a44fb008032786df546e640079d186751429c100e06b09cbc9141ecbe19d7937dc15ae7a3e1a2a02885fa6f08c42c22c97a131d48c471275f96e
7
- data.tar.gz: 38ca17ae7f4cd04f85ea799851e23cb6f095a17a3475dfb298b8863f1be8dcc3701e2d9dac08e2f3f8bc5b5f88726904488c411b99d03d277b4ef11f6b5a3a05
6
+ metadata.gz: 1a6d9fd0c94338415f334e50c928174ee595690c3c093b1478251d169b11265843eafccdec645d9997acd2ccff0cabcb7db67e77960e43a10d5bbc90298592ff
7
+ data.tar.gz: bd711c0f3a87f221b60014633130b73a581b5b5f6dd60eee5a948d5740f87bee68714f0b71b9b27412cf36ade84aac314ab20b267ae134e32a5d998e3855c056
data/bin/ROCker CHANGED
@@ -103,6 +103,9 @@ opts = OptionParser.new do |opt|
103
103
  opt.on("--reuse-files",
104
104
  "Re-use existing result files. By default existing files are ignored."
105
105
  ){ |v| o[:reuse]=true }
106
+ opt.on("--keep-unlinked",
107
+ "Keep genomes with unlinked positive coordinates (missing references)."
108
+ ){ |v| o[:keep_unlinked] = v }
106
109
 
107
110
  opt.separator ""
108
111
  opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
@@ -344,9 +347,6 @@ opts = OptionParser.new do |opt|
344
347
  end
345
348
  opts.parse!
346
349
 
347
- p ARGV
348
-
349
-
350
350
  #================================[ Main ]
351
351
  rocker = ROCker.new(o)
352
352
  begin
@@ -9,13 +9,13 @@ require "rocker/rocdata"
9
9
 
10
10
  class ROCker
11
11
  #================================[ Class ]
12
- @@VERSION = "1.1.12"
12
+ @@VERSION = "1.2.0"
13
13
  @@CITATION = [
14
- "Orellana, Rodriguez-R, & Konstantinidis. Under review.",
15
- "Detecting and quantifying functional genes in short-read",
16
- "metagenomic datasets: method development and application",
17
- "to the nitrogen cycle genes."]
18
- @@DATE = "2016-05-17"
14
+ "Orellana, Rodriguez-R & Konstantinidis, 2016. DOI:10.1093/nar/gkw900.",
15
+ "ROCker: accurate detection and quantification of target genes in",
16
+ "short-read metagenomic data sets by modeling sliding-window bitscores.",
17
+ "Nucleic Acids Research 45(3):e14."]
18
+ @@DATE = "2018-06-28"
19
19
  @@DEFAULTS = {
20
20
  # General
21
21
  q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
@@ -18,7 +18,16 @@ class GenomeSet
18
18
  tmp_ids = Array.new(self.ids)
19
19
  ofh = File.open(file, "w")
20
20
  while tmp_ids.size>0
21
- ofh.print rocker.ebiFetch(:embl, tmp_ids.shift(200), :fasta)
21
+ ofh.print rocker.
22
+ ebiFetch(:embl, tmp_ids.shift(200), :fasta).
23
+ each_line.to_a.select { |i|
24
+ if i =~ /^Entry: (\S+) (.*)/
25
+ warn "EBI returned an error fetching #{$1}: #{$2}"
26
+ false
27
+ else
28
+ true
29
+ end
30
+ }.join
22
31
  end
23
32
  ofh.close
24
33
  end
@@ -44,6 +53,9 @@ class GenomeSet
44
53
  end
45
54
  def size() self.ids.size end
46
55
  def empty?() self.ids.empty? end
56
+ def delete!(ids)
57
+ ids.map{ |i| @ids.delete(i) }.flatten.compact
58
+ end
47
59
 
48
60
  #================================[ Utilities ]
49
61
  def genome2taxon(genome_id, rank="species")
@@ -63,6 +63,15 @@ class ProteinSet
63
63
  return [] if @genomes.empty?
64
64
  @genomes.values.reduce(:+).uniq
65
65
  end
66
+ def genome_by_prot_id(prot_id)
67
+ @genomes[prot_id]
68
+ end
69
+ ##
70
+ # Removes genomes linked to +prot_ids+ (an Array) and returns an Array
71
+ # of removed genomes.
72
+ def remove_genomes_by_prot_id!(prot_ids)
73
+ prot_ids.map{ |i| @genomes.delete(i) }.flatten.compact
74
+ end
66
75
  def tranids
67
76
  return [] if @tranids.empty?
68
77
  @tranids.values.reduce(:+).uniq
@@ -10,9 +10,9 @@ require "rocker/genome-set"
10
10
 
11
11
  class ROCker
12
12
  #================================[ Class ]
13
- @@EBIREST = "http://www.ebi.ac.uk/Tools"
13
+ @@EBIREST = "https://www.ebi.ac.uk/Tools"
14
14
  @@DEFAULTS.merge!({positive:[], negative:[], seqdepth:0.03, readlen:100,
15
- minovl:50,
15
+ minovl:50, keep_unlinked:false,
16
16
  # Ext. Software
17
17
  aligner: :clustalo, simulator: :grinder,
18
18
  simulatorbin:{grinder:"grinder"},
@@ -27,12 +27,12 @@ class ROCker
27
27
  def self.ebirest() @@EBIREST ; end
28
28
  def self.has_build_gems?
29
29
  return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
30
- @@HAS_BUILD_GEMS = TRUE
30
+ @@HAS_BUILD_GEMS = true
31
31
  begin
32
32
  require "rubygems"
33
33
  require "restclient"
34
34
  rescue LoadError
35
- @@HAS_BUILD_GEMS = FALSE
35
+ @@HAS_BUILD_GEMS = false
36
36
  end
37
37
  @@HAS_BUILD_GEMS
38
38
  end
@@ -54,6 +54,7 @@ class ROCker
54
54
  def ebiFetch(db, ids, format, outfile=nil)
55
55
  url = "#{ROCker.ebirest}/dbfetch/dbfetch/" +
56
56
  "#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
57
+ $stderr.puts url
57
58
  self.restcall url, outfile
58
59
  end
59
60
  def get_coords_from_gff3(genome_ids, pset, thread_id, json_file)
@@ -152,7 +153,7 @@ class ROCker
152
153
  ref_file = @o[:baseout] + ".ref.fasta"
153
154
  if not protein_set[:+].aln.nil?
154
155
  puts " * reusing aligned sequences as positive set." unless @o[:q]
155
- protein_set[:+].get_from_aln(ref_file, aln)
156
+ protein_set[:+].get_from_aln(ref_file, protein_set[:+].aln)
156
157
  @o[:noaln] = true
157
158
  elsif @o[:reuse] and File.size? ref_file
158
159
  puts " * reusing positive set: #{ref_file}." unless @o[:q]
@@ -250,8 +251,16 @@ class ROCker
250
251
  raise "Cannot find the genomic location of any provided sequence." if
251
252
  found.nil?
252
253
  missing = protein_set[:+].ids - found
253
- warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
254
- "sequence(s) #{missing.join(",")}.\n\n" unless missing.empty?
254
+ unless missing.empty?
255
+ warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
256
+ "sequence(s) #{missing.join(",")}.\n\n"
257
+ unless @o[:keep_unlinked]
258
+ del_genomes = protein_set[:+].remove_genomes_by_prot_id!(missing)
259
+ warn "WARNING: Ignoring #{del_genomes.size} genomes with missing " +
260
+ "coordinates #{del_genomes.join(",")}.\n\n"
261
+ genome_set[ :+ ].delete!(del_genomes)
262
+ end
263
+ end
255
264
 
256
265
  # Download genomes
257
266
  genome_set[:all] = GenomeSet.new(self,
@@ -260,7 +269,9 @@ class ROCker
260
269
  if @o[:reuse] and File.size? genomes_file
261
270
  puts " * reusing existing file: #{genomes_file}." unless @o[:q]
262
271
  else
263
- puts " * downloading " + genome_set[:all].size.to_s +
272
+ raise "Something went wrong: Nothing left to download." if
273
+ genome_set[:all].empty?
274
+ puts " * downloading " + genome_set[:all].size.to_s +
264
275
  " genome(s) in FastA." unless @o[:q]
265
276
  $stderr.puts " # #{genome_set[:all].ids}" if @o[:debug]
266
277
  genome_set[:all].download genomes_file
@@ -321,8 +332,8 @@ class ROCker
321
332
  if l =~ /^>/
322
333
  rd = %r{
323
334
  ^>(?<id>\d+)\s
324
- reference=[A-Za-z]+\|
325
- (?<genome_id>[A-Za-z0-9_]+)\|.*\s
335
+ reference=[A-Za-z_]+[\|:]
336
+ (?<genome_id>[A-Za-z0-9_]+)(?:\|.*)?\s
326
337
  position=(?<comp>complement\()?(?<from>\d+)\.\.
327
338
  (?<to>\d+)\)?\s
328
339
  }x.match(l)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-rocker
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.12
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis (Coto) Orellana
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-17 00:00:00.000000000 Z
12
+ date: 2018-06-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rest-client
@@ -64,7 +64,7 @@ files:
64
64
  - lib/rocker/step/search.rb
65
65
  homepage: http://enve-omics.ce.gatech.edu/rocker
66
66
  licenses:
67
- - artistic 2.0
67
+ - Artistic-2.0
68
68
  metadata: {}
69
69
  post_install_message:
70
70
  rdoc_options: []
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  version: '0'
83
83
  requirements: []
84
84
  rubyforge_project:
85
- rubygems_version: 2.5.1
85
+ rubygems_version: 2.6.13
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: ROCker