RubyGems - bio-rocker - Versions diffs - 1.0.0 → 1.1.9 - Mend

bio-rocker 1.0.0 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/lib/rocker.rb CHANGED

@@ -2,7 +2,7 @@
 # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
 # @author Luis (Coto) Orellana
 # @license artistic license 2.0
-# @update Jun-05-2015
+# @update Sep-07-2015
 #
 require 'rocker/blasthit'
@@ -10,20 +10,30 @@ require 'rocker/rocdata'
 class ROCker
    #================================[ Class ]
+   @@VERSION = "1.1.9"
+   @@CITATION = "Orellana, Rodriguez-R, & Konstantinidis. Under review. " +
+      "Detecting and quantifying functional genes in short-read metagenomic " +
+      "datasets: method development and application to the nitrogen cycle " +
+      "genes."
    @@DEFAULTS = {
       # General
-      :q=>false, :r=>'R', :nucl=>false, :debug=>false,:thr=>2,:search=>:blast,
+      q: false, r: "R", nucl: false, debug: false, thr: 2, search: :blast,
       # External software
-      :searchbins=>'',
-      :searchcmd=>{
-	 :blast=>'%1$s%2$s -query "%3$s" -db "%4$s" -out "%5$s" -num_threads %6$d -outfmt 6 -max_target_seqs 1',
-	 :diamond=>'%1$sdiamond %2$s -q "%3$s" -d "%4$s" -o "%5$s" -t %6$d -k 1 --min-score 20 --sensitive'},
-      :makedbcmd=>{
-	 :blast=>'%1$smakeblastdb -dbtype %2$s -in "%3$s" -out "%4$s"',
-	 :diamond=>'%1$sdiamond makedb --in "%3$s" -d "%4$s"'}
+      searchbins: "",
+      searchcmd: {
+	 blast: '%1$s%2$s -query "%3$s" -db "%4$s" -out "%5$s" ' +
+	    '-num_threads %6$d -outfmt 6 -max_target_seqs 1',
+	 diamond: '%1$sdiamond %2$s -q "%3$s" -d "%4$s" -a "%5$s.daa" -p %6$d' +
+	    ' -k 1 --min-score 20 --sensitive && %1$sdiamond view -a "%5$s"' +
+	    ' -o "%5$s"'},
+      makedbcmd: {
+	 blast: '%1$smakeblastdb -dbtype %2$s -in "%3$s" -out "%4$s"',
+	 diamond: '%1$sdiamond makedb --in "%3$s" -d "%4$s"'}
    }
    def self.defaults() @@DEFAULTS ; end
    def self.default(k) @@DEFAULTS[k] ; end
+   def self.VERSION; @@VERSION ; end
+   def self.CITATION; @@CITATION ; end
    #================================[ Instance ]
    attr_reader :o
@@ -46,7 +56,8 @@ class ROCker
    end
    def bash(cmd, err_msg=nil)
       o = `#{cmd} 2>&1 && echo '{'`
-      raise (err_msg.nil? ? "Error executing: #{cmd}\n\n#{o}" : err_msg) unless o[-2]=='{'
+      raise (err_msg.nil? ? "Error executing: #{cmd}\n\n#{o}" : err_msg) unless
+	 o[-2]=="{"
       true
    end
 end
@@ -63,10 +74,10 @@ require 'rocker/step/plot'
 class Numeric
    def ordinalize
       n= self.to_s
-      s= n[-2]=='1' ? 'th' :
-	 n[-1]=='1' ? 'st' :
-	 n[-1]=='2' ? 'nd' :
-	 n[-1]=='3' ? 'rd' : 'th'
+      s= n[-2]=='1' ? "th" :
+	 n[-1]=='1' ? "st" :
+	 n[-1]=='2' ? "nd" :
+	 n[-1]=='3' ? "rd" : "th"
       n + s
    end
 end

data/lib/rocker/blasthit.rb CHANGED

@@ -2,12 +2,13 @@
 # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
 # @author Luis (Coto) Orellana
 # @license artistic license 2.0
-# @update Jan-22-2015
+# @update Sep-06-2015
 #
 class BlastHit
-   attr_reader :sbj, :sfrom, :sto, :bits, :istrue, :midpoint
-   # Initialize from BLAST using new(ln,aln), initialize from TABLE using new(ln)
+   attr_reader :sbj, :sfrom, :sto, :bits, :istrue, :isfalse, :midpoint
+   # Initialize from BLAST using new(ln,aln),
+   # initialize from TABLE using new(ln)
    def initialize(ln, aln=nil)
       l = ln.chomp.split(/\t/)
       if aln.nil?
@@ -16,6 +17,7 @@ class BlastHit
 	 @sto	= l[2].to_i
 	 @bits	= l[3].to_f
 	 @istrue = l[4]=='1'
+	 @istrue = l[4]=='-1'
 	 @midpoint = l[5].to_i
       else
 	 s = aln.seq(l[1])
@@ -27,13 +29,14 @@ class BlastHit
 	 @sto	= [a,b].max
 	 @bits	= l[11].to_f
 	 @istrue = ! /@%/.match(l[0]).nil?
+	 @isfalse = ! /@\$/.match(l[0]).nil?
 	 @midpoint = s.pos2col(((l[8].to_f+l[9].to_f)/2).ceil)
       end
    end
    def to_s
       self.sbj.nil? ? "" :
-	 [self.sbj, self.sfrom.to_s, self.sto.to_s, self.bits.to_s,
-	    self.istrue ? '1' : '0', self.midpoint].join("\t") + "\n"
+	 [sbj, sfrom.to_s, sto.to_s, bits.to_s,
+	    istrue ? "1" : (isfalse ? "-1" : "0"), midpoint].join("\t") + "\n"
    end
 end

data/lib/rocker/genome-set.rb ADDED

@@ -0,0 +1,70 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jun-23-2015
+#
+class GenomeSet
+   attr_reader :rocker, :ids, :taxa
+   def initialize(rocker, ids)
+      @rocker = rocker
+      @ids = ids
+      @ids = [] if ids.nil?
+      @taxa = {}
+      @all_taxa = {}
+   end
+   def download(file)
+      tmp_ids = Array.new(self.ids)
+      ofh = File.open(file, "w")
+      while tmp_ids.size>0
+	 ofh.print rocker.ebiFetch(:embl, tmp_ids.shift(200), :fasta)
+      end
+      ofh.close
+   end
+   def link_taxon(id, taxon)
+      @all_taxa[ taxon.to_sym ] ||= []
+      @all_taxa[ taxon.to_sym ] << id
+   end
+   def choose_genomes!(rank)
+      @taxa = {}
+      self.get_taxonomy! rank
+      @all_taxa.each_pair{ |taxon,ids| @taxa[taxon] = ids.sample }
+      @ids = @taxa.values
+   end
+   def get_taxonomy!(rank)
+      @all_taxa = {}
+      ids.each do |id|
+	 self.link_taxon(id, genome2taxon(id, rank))
+      end
+   end
+   def taxa=(hash)
+      @taxa = {}
+      hash.each_pair{ |taxon, id| @taxa[taxon] = id if self.ids.include? id }
+   end
+   def size() self.ids.size end
+   def empty?() self.ids.empty? end
+   #================================[ Utilities ]
+   def genome2taxon(genome_id, rank='species')
+      v = genome2taxid(genome_id)
+      unless v.nil?
+	 xml = rocker.ebiFetch('taxonomy', [v], 'enataxonomyxml').gsub(/\s*\n\s*/,'')
+	 v = xml.scan(/<taxon [^>]+>/).grep(/rank="#{rank}"/).first
+	 v.sub!(/.* taxId="(\d+)".*/,"\\1") unless v.nil?
+      end
+      return "no-taxon-#{(0...12).map { (65 + rand(26)).chr }.join}" if v.nil? or v !~ /^\d+$/
+      v
+   end
+   def genome2taxid(genome_id)
+      doc = rocker.ebiFetch('embl', [genome_id], 'annot').split(/[\n\r]/)
+      ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
+      ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
+      return nil if ln.nil?
+      ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
+      return nil unless ln =~ /^\d+$/
+      ln
+   end
+end

data/lib/rocker/protein-set.rb ADDED

@@ -0,0 +1,90 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jul-20-2015
+#
+require 'rocker/alignment'
+class ProteinSet
+   attr_reader :rocker, :ids, :aln
+   def initialize(rocker, ids=nil, file=nil, aln_file=nil)
+      @genomes = {}
+      @tranids = {}
+      @aln = nil
+      @rocker = rocker
+      @ids = []
+      @ids += ids unless ids.nil?
+      @ids += File.readlines(file).map{ |l| l.chomp } unless file.nil?
+      unless aln_file.nil?
+	 aln = Alignment.new
+	 aln.read_fasta aln_file
+	 aln_ids = aln.get_ids
+	 @aln = aln if (@ids - aln_ids).empty?
+	 @ids += aln_ids
+      end
+      @ids.uniq!
+   end
+   def download(file)
+      tmp_ids = Array.new(self.ids)
+      f = File.open(file, "w")
+      while tmp_ids.size>0
+	 f.print rocker.ebiFetch(:uniprotkb, tmp_ids.shift(200), :fasta)
+      end
+      f.close
+   end
+   def get_from_aln(file, aln)
+      f = File.open(file, "w")
+      f.print aln.to_seq_s
+      f.close
+   end
+   def get_genomes!
+      self.ids.each do |id|
+	 doc = self.rocker.ebiFetch(:uniprotkb, [id], :annot).split("\n")
+	 doc.grep( /^DR\s+EMBL;/ ).map do |ln|
+	    r=ln.split('; ')
+	    self.link_genome(id, r[1])
+	    self.link_tranid(id, r[2])
+	 end
+      end
+   end
+   def link_genome(prot_id, genome_id)
+      @genomes[prot_id] ||= []
+      @genomes[prot_id] << genome_id
+      @genomes[prot_id].uniq!
+   end
+   def link_tranid(prot_id, transl_id)
+      @tranids[prot_id] ||= []
+      @tranids[prot_id] << transl_id
+      @tranids[prot_id].uniq!
+   end
+   def genomes
+      return [] if @genomes.empty?
+      @genomes.values.reduce(:+).uniq
+   end
+   def tranids
+      return [] if @tranids.empty?
+      @tranids.values.reduce(:+).uniq
+   end
+   def in_coords(coords)
+      coords.keys.map do |genome|
+	 locations = coords[ genome ]
+	 locations.map do |loc|
+	    if not loc[:prot_id].nil?
+	       loc[:prot_id] if self.include? loc[:prot_id]
+	    elsif not loc[:tran_id].nil? and not @tranids.rassoc(loc[:tran_id]).nil?
+	       @tranids.rassoc(loc[:tran_id]).first
+	    else
+	       warn "Warning: Impossible to resolve protein located in '#{genome}' at: #{loc}."
+	       nil
+	    end
+	 end
+      end.reduce([], :+).compact.uniq
+   end
+   def size() self.ids.size end
+   def empty?() self.ids.empty? end
+   def include?(id) self.ids.include?(id) end
+end

data/lib/rocker/rocdata.rb CHANGED

@@ -2,7 +2,7 @@
 # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
 # @author Luis (Coto) Orellana
 # @license artistic license 2.0
-# @update Jan-22-2015
+# @update Sep-07-2015
 #
 require 'rocker/rinterface'
@@ -11,11 +11,13 @@ require 'rocker/alignment'
 require 'tmpdir'
 class ROCData
-   attr_reader :aln, :windows, :r
-   # Use ROCData.new(table,aln,window) to re-compute from table, use ROCData.new(data) to load
+   attr_reader :aln, :windows, :r, :refined
+   # Use ROCData.new(table,aln,window) to re-compute from table, use
+   # ROCData.new(data) to load
    def initialize(val, aln=nil, window=nil)
       @r = RInterface.new
       @nucl = false
+      @refined = false
       if not aln.nil?
 	 @aln = aln
 	 self.rrun "library('pROC');"
@@ -33,7 +35,9 @@ class ROCData
 	 @aln.read_rocker(val)
       end
    end
-   def win_at_col(col) self.windows.select{|w| (w.from<=col) and (w.to>=col)}.first end
+   def win_at_col(col)
+      self.windows.select{|w| (w.from<=col) and (w.to>=col)}.first
+   end
    def in_nucl?() @nucl end
    def nucl=(nucl) @nucl=nucl end
    def refine! table
@@ -41,14 +45,17 @@ class ROCData
 	 return false unless self.load_table! table
 	 break if self._refine_iter(table)==0
       end
+      @refined = true
       return true
    end
+   def is_refined? ; @refined ; end
    def _refine_iter table
       to_refine = []
       self.windows.each do |w|
 	 next if w.almost_empty or w.length <= 5
 	 self.rrun "acc <- w$accuracy[w$V1==#{w.from}];"
-	 to_refine << w if self.rrun("ifelse(is.na(acc), 100, acc)", :float) < 95.0
+	 to_refine << w if
+	    self.rrun("ifelse(is.na(acc), 100, acc)", :float) < 95.0
       end
       n = to_refine.size
       return 0 unless n > 0
@@ -86,9 +93,17 @@ class ROCData
 	    win <- which( (m>=w$V1) & (m<=w$V2))[1];
 	    if(!is.na(win)){
 	       if(x$V4[i] >= w$V5[win]){
-		  if(x$V5[i]==1){ w$tp[win] <- w$tp[win]+1 }else{ w$fp[win] <- w$fp[win]+1 };
+		  if(x$V5[i]==1){
+		     w$tp[win] <- w$tp[win]+1
+		  } else {
+		     w$fp[win] <- w$fp[win]+1
+		  }
 	       }else{
-		  if(x$V5[i]==1){ w$fn[win] <- w$fn[win]+1 }else{ w$tn[win] <- w$tn[win]+1 };
+		  if(x$V5[i]==1){
+		     w$fn[win] <- w$fn[win]+1
+		  } else {
+		     w$tn[win] <- w$tn[win]+1
+		  };
 	       }
 	    }
 	 }
@@ -106,7 +121,9 @@ class ROCData
    end
    def init_windows!(size)
       @windows = []
-      1.step(self.aln.cols,size).each { |a| @windows << ROCWindow.new(self, a, a+size-1) }
+      1.step(self.aln.cols,size).each do |a|
+	 @windows << ROCWindow.new(self, a, a+size-1)
+      end
    end
    def rrun(cmd, type=nil) self.r.run cmd, type end
    def save(file)
@@ -115,7 +132,7 @@ class ROCData
       f.close
    end
    def to_s
-      o = ''
+      o = "#v ROCker " + ROCker.VERSION + "\n"
       self.windows.each{|w| o += w.to_s}
       o += self.aln.to_s
       return o

data/lib/rocker/rocwindow.rb CHANGED

@@ -2,7 +2,7 @@
 # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
 # @author Luis (Coto) Orellana
 # @license artistic license 2.0
-# @update Jan-22-2015
+# @update Sep-07-2015
 #
 class ROCWindow
@@ -22,16 +22,18 @@ class ROCWindow
 	 @from = [a,b].min
 	 @to = [a,b].max
 	 @thr = nil
-	 self.compute!
+	 compute!
       end
    end
    def compute!
-      self.load_hits
-      @hits = self.rrun "nrow(y);", :int
-      @tps = self.rrun "sum(y$V5);", :int
-      unless self.almost_empty
-	 self.rrun "rocobj <- roc(y$V5, y$V4);"
-	 thr = self.rrun 'coords(rocobj, "best", ret="threshold", best.method="youden", best.weights=c(0.5, sum(y$V5)/nrow(y)))[1];', :float
+      load_hits
+      @hits = rrun("nrow(y);", :int)
+      @tps = rrun("sum(y$V5==1);", :int)
+      unless almost_empty
+	 rrun "rocobj <- roc(as.numeric(y$V5==1), y$V4);"
+	 thr = rrun("coords(rocobj, 'best', ret='threshold', " +
+	    "best.method='youden', " +
+	    "best.weights=c(0.5, sum(y$V5==1)/nrow(y)))[1];", :float)
 	 @thr = thr.to_f
 	 @thr = nil if @thr==0.0 or @thr.infinite?
       end
@@ -48,16 +50,16 @@ class ROCWindow
       return nil if a.nil? and b.nil?
       return a.thr if b.nil?
       return b.thr if a.nil?
-      return (b.thr*(self.from-a.from) - a.thr*(self.from-b.from))/(b.from-a.from)
+      return (b.thr*(from-a.from) - a.thr*(from-b.from))/(b.from-a.from)
    end
-   def load_hits() self.rrun "y <- x[x$V6>=#{self.from} & x$V6<=#{self.to},];" end
-   def previous() (self.from == 1) ? nil : self.data.win_at_col(self.from - 1) end
-   def next() (self.to == self.data.aln.cols) ? nil : self.data.win_at_col(self.to + 1) end
-   def thr_notnil() (@thr.nil? or @thr.infinite?) ? self.around_thr : @thr end
-   def fps() self.hits - self.tps end
-   def almost_empty() self.fps < 3 or self.tps < 3 end
-   def length() self.to - self.from + 1 end
-   def rrun(cmd, type=nil) self.data.rrun cmd, type end
-   def to_s() [self.from, self.to, self.hits, self.tps, self.thr_notnil].join("\t") + "\n" end
+   def load_hits() self.rrun "y <- x[x$V6>=#{from} & x$V6<=#{to},];" end
+   def previous() (from == 1) ? nil : data.win_at_col(from - 1) end
+   def next() (to == data.aln.cols) ? nil : data.win_at_col(to + 1) end
+   def thr_notnil() (@thr.nil? or @thr.infinite?) ? around_thr : @thr end
+   def fps() hits - tps end
+   def almost_empty() fps < 3 or tps < 3 end
+   def length() to - from + 1 end
+   def rrun(cmd, type=nil) data.rrun(cmd, type) end
+   def to_s() [from, to, hits, tps, thr_notnil].join("\t") + "\n" end
 end

data/lib/rocker/step/build.rb CHANGED

@@ -2,21 +2,27 @@
 # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
 # @author Luis (Coto) Orellana
 # @license artistic license 2.0
-# @update Jun-05-2015
+# @update Sep-11-2015
 #
 require 'json'
+require 'rocker/protein-set'
+require 'rocker/genome-set'
 class ROCker
    #================================[ Class ]
-   @@EBIREST = 'http://www.ebi.ac.uk/Tools'
-   @@DEFAULTS.merge!({:positive=>[], :negative=>[], :genomefrx=>1.0, :seqdepth=>0.03, :readlen=>100, :minovl=>50,
+   @@EBIREST = "http://www.ebi.ac.uk/Tools"
+   @@DEFAULTS.merge!({positive:[], negative:[], seqdepth:0.03, readlen:100,
+      minovl:50,
       # Ext. Software
-      :aligner=>:clustalo, :simulator=>:grinder,
-      :simulatorbin=>{:grinder=>'grinder'},
-      :simulatorcmd=>{:grinder=>'%1$s -reference_file "%2$s" -cf "%3$f" -dc \'-~*NnKkMmRrYySsWwBbVvHhDdXx\' -md uniform 0.1 -mr 95 5 -rd %4$d uniform 5 -base_name "%5$s"'},
-      :alignerbin=>{:muscle=>'muscle', :clustalo=>'clustalo'},
-      :alignercmd=>{:muscle=>'%1$s -in "%2$s" -out "%3$s" -quiet', :clustalo=>'%1$s -i "%2$s" -o "%3$s" --threads=%4$d --force'}
+      aligner: :clustalo, simulator: :grinder,
+      simulatorbin:{grinder:"grinder"},
+      simulatorcmd:{grinder:"%1$s -reference_file \"%2$s\" -cf \"%3$f\" " +
+	 "-dc '-~*NnKkMmRrYySsWwBbVvHhDdXx' -md uniform 0.1 -mr 95 5 " +
+	 "-rd %4$d uniform 5 -base_name \"%5$s\""},
+      alignerbin:{muscle:"muscle", clustalo:"clustalo"},
+      alignercmd:{muscle:"%1$s -in \"%2$s\" -out \"%3$s\" -quiet",
+	 clustalo:"%1$s -i \"%2$s\" -o \"%3$s\" --threads=%4$d --force"}
    })
    @@HAS_BUILD_GEMS = nil
    def self.ebirest() @@EBIREST ; end
@@ -33,90 +39,65 @@ class ROCker
    end
    #================================[ Utilities ]
-   def genes2genomes(gene_ids)
-      genomes = []
-      ids = Array.new(gene_ids)
-      while ids.size>0
-	 doc = ebiFetch(:uniprotkb, ids.shift(200), :annot).split("\n")
-	 genomes += doc.grep( /^DR\s+EMBL;/ ).map do |ln|
-	    r=ln.split('; ')
-	    {:genome_id=>r[1], :transl_id=>r[2]}
-	 end
-      end
-      genomes.uniq
-   end
-   def genome2taxid(genome_id)
-      ln = ebiFetch('embl', [genome_id], 'annot').split(/[\n\r]/).grep(/^FT\s+\/db_xref="taxon:/).first
-      return ln if ln.nil?
-      ln.sub(/.*"taxon:(\d+)".*/, "\\1")
-   end
-   def genome2taxon(genome_id, rank='species')
-      xml = ebiFetch('taxonomy', [genome2taxid(genome_id)], 'enataxonomyxml').gsub(/\s*\n\s*/,'')
-      xml.scan(/<taxon [^>]+>/).grep(/rank="#{rank}"/).first.sub(/.* taxId="(\d+)".*/,"\\1")
-   end
    def restcall(url, outfile=nil)
       $stderr.puts "   # Calling: #{url}" if @o[:debug]
-      response = RestClient::Request.execute(:method=>:get,  :url=>url, :timeout=>600)
-      raise "Unable to reach EBI REST client, error code #{response.code}." unless response.code == 200
+      response = RestClient::Request.execute(:method=>:get, :url=>url,
+	 :timeout=>600)
+      raise "Unable to reach EBI REST client, error code " +
+	 response.code.to_s + "." unless response.code == 200
       unless outfile.nil?
-	 ohf = File.open(outfile, 'w')
+	 ohf = File.open(outfile, "w")
 	 ohf.print response.to_s
 	 ohf.close
       end
       response.to_s
    end
    def ebiFetch(db, ids, format, outfile=nil)
-      url = "#{ROCker.ebirest}/dbfetch/dbfetch/#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
-      res = self.restcall url
-      unless outfile.nil?
-	 ohf = File.open(outfile, 'w')
-	 ohf.print res
-	 ohf.close
-      end
-      res
+      url = "#{ROCker.ebirest}/dbfetch/dbfetch/" +
+	 "#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
+      self.restcall url, outfile
    end
-   def get_coords_from_gff3(genome_ids, protein_ids, transl_ids, thread_id, json_file)
-      positive_coords = {}
-      genomes_org = {}
+   def get_coords_from_gff3(genome_ids, pset, thread_id, json_file)
+      coords = {}
       i = 0
       genome_ids.each do |genome_id|
-	 print "  * scanning #{(i+=1).ordinalize} genome out of #{genome_ids.size} in first thread.  \r" if thread_id==0 and not @o[:q]
-	 unless @o[:pertaxon].nil?
-	    genome_taxon = genome2taxon(genome_id, @o[:pertaxon])
-	    genomes_org[ genome_taxon.to_sym ] ||= []
-	    genomes_org[ genome_taxon.to_sym ] << genome_id
-	 end
+	 print "  * scanning #{(i+=1).ordinalize} genome out of " +
+	    "#{genome_ids.size} in first thread.  \r" if
+	    thread_id==0 and not @o[:q]
 	 genome_file = @o[:baseout] + ".src." + genome_id + ".gff3"
 	 if @o[:reuse] and File.size? genome_file
-	    ifh = File.open(genome_file, 'r')
+	    ifh = File.open(genome_file, "r")
 	    doc = ifh.readlines.grep(/^[^#]/)
 	    ifh.close
 	 else
 	    genome_file=nil unless @o[:noclean]
-	    doc = ebiFetch(:embl, [genome_id], :gff3, genome_file).split("\n").grep(/^[^#]/)
+	    doc = ebiFetch(:embl, [genome_id], :gff3,
+	       genome_file).split("\n").grep(/^[^#]/)
 	 end
 	 doc.each do |ln|
 	    next if ln =~ /^#/
 	    r = ln.chomp.split /\t/
 	    next if r.size < 9
-	    prots = r[8].split(/;/).grep(/^db_xref=UniProtKB[\/A-Za-z-]*:/){ |xref| xref.split(/:/)[1] }
-	    p = prots.select{ |id| protein_ids.include? id }.first
-	    trans = r[8].split(/;/).grep(/^protein_id=/){ |pid| pid.split(/=/)[1] }
-	    t = trans.select{ |id|  transl_ids.include? id }.first
+	    prots = r[8].split(/;/).grep(
+	       /^db_xref=UniProtKB[\/A-Za-z-]*:/){ |xref| xref.split(/:/)[1] }
+	    p = prots.select{ |id| pset.ids.include? id }.first
+	    trans = r[8].split(/;/).grep(
+	       /^protein_id=/){ |pid| pid.split(/=/)[1] }
+	    t = trans.select{ |id| pset.tranids.include? id }.first
 	    next if p.nil? and t.nil?
-	    positive_coords[ r[0].to_sym ] ||= []
-	    positive_coords[ r[0].to_sym ] << {
-	       :prot_id	=> p,
-	       :tran_id => t,
-	       :from	=> r[3].to_i,
-	       :to	=> r[4].to_i,
-	       :strand	=> r[6]
+	    coords[ r[0].to_sym ] ||= []
+	    coords[ r[0].to_sym ] << {
+	       prot_id:	p,
+	       tran_id:	t,
+	       from:	r[3].to_i,
+	       to:	r[4].to_i,
+	       strand:	r[6]
 	    }
 	 end
       end
       print "\n" if thread_id==0 and not @o[:q]
-      ofh = File.open json_file, "w"
-      ofh.print({:positive_coords=>positive_coords, :genomes_org=>genomes_org}.to_json)
+      ofh = File.open(json_file, "w")
+      ofh.print({coords:coords}.to_json)
       ofh.close
    end
@@ -124,211 +105,251 @@ class ROCker
    def build!
       # Check requirements
       puts "Testing environment." unless @o[:q]
-      @o[:searchcmd] = @o[:searchcmd][@o[:search]] if @o[:searchcmd].is_a? Hash
-      @o[:makedbcmd] = @o[:makedbcmd][@o[:search]] if @o[:makedbcmd].is_a? Hash
-      @o[:alignercmd] = @o[:alignercmd][@o[:aligner]] if @o[:alignercmd].is_a? Hash
-      @o[:simulatorcmd] = @o[:simulatorcmd][@o[:simulator]] if @o[:simulatorcmd].is_a? Hash
-      @o[:alignerbin] = @o[:alignerbin][@o[:aligner]] if @o[:alignerbin].is_a? Hash
-      @o[:simulatorbin] = @o[:simulatorbin][@o[:simulator]] if @o[:simulatorbin].is_a? Hash
+      {  searchcmd: :search, makedbcmd: :search,
+	 alignercmd: :aligner, alignerbin: :aligner,
+	 simulatorcmd: :simulator, simulatorbin: :simulator
+      }.each_pair { |k,v| @o[k] = @o[k][@o[v]] if @o[k].is_a? Hash }
       @o[:nosearch]=true if @o[:nosimulate]
-      raise "Unsatisfied requirements, please see the help message (-h)." unless ROCker.has_build_gems?
-      @o[:positive] += @o[:posori] unless @o[:posori].nil?
-      @o[:positive] += File.readlines(@o[:posfile]).map{ |l| l.chomp } unless @o[:posfile].nil?
-      @o[:negative] += File.readlines(@o[:negfile]).map{ |l| l.chomp } unless @o[:negfile].nil?
-      unless @o[:aln].nil?
-         aln = Alignment.new
-	 aln.read_fasta @o[:aln]
-	 @o[:positive] += aln.get_ids
-      end
-      raise "-p or -P are mandatory." if @o[:positive].size==0
+      raise "Unsatisfied requirements, please see the help message (-h)." unless
+	 ROCker.has_build_gems?
+      protein_set = {}
+      protein_set[:+] = ProteinSet.new(self,@o[:positive],@o[:posfile],@o[:aln])
+      protein_set[:-] = ProteinSet.new(self,@o[:negative],@o[:negfile])
+      raise "-p, -P, or -a are mandatory." if protein_set[:+].empty?
       raise "-o/--baseout is mandatory." if @o[:baseout].nil?
-      if @o[:positive].size == 1 and not @o[:noaln]
-	 warn "\nWARNING: Positive set contains only one sequence, turning off alignment.\n\n"
+      if protein_set[:+].size==1 and not @o[:noaln]
+	 warn "\nWARNING: Positive set contains only one sequence, turning " +
+	    "off alignment.\n\n"
 	 @o[:noaln] = true
       end
       unless @o[:nosimulate]
-	 self.bash "#{@o[:simulatorbin]} --version", "--simulator-bin must be executable. Is Grinder installed?" if @o[:simulator]==:grinder
+	 self.bash("#{@o[:simulatorbin]} --version",
+	    "--simulator-bin must be executable. Is Grinder installed?") if
+	    @o[:simulator]==:grinder
       end
       unless @o[:noaln]
-	 self.bash "#{@o[:alignerbin]} -version", "--aligner-bin must be executable. Is Muscle installed?" if @o[:aligner]==:muscle
-	 self.bash "#{@o[:alignerbin]} --version", "--aligner-bin must be executable. Is ClustalOmega installed?" if @o[:aligner]==:clustalo
+	 self.bash("#{@o[:alignerbin]} -version",
+	    "--aligner-bin must be executable. Is Muscle installed?") if
+	    @o[:aligner]==:muscle
+	 self.bash("#{@o[:alignerbin]} --version",
+	    "--aligner-bin must be executable. Is ClustalOmega installed?") if
+	    @o[:aligner]==:clustalo
       end
       unless @o[:nosearch]
-	 self.bash "#{@o[:searchbins]}makeblastdb -version", "--search-bins must contain executables. Is BLAST+ installed?" if @o[:search]==:blast
-	 self.bash "#{@o[:searchbins]}diamond --help", "--search-bins must contain executables. Is DIAMOND installed?" if @o[:search]==:diamond
+	 self.bash("#{@o[:searchbins]}makeblastdb -version",
+	    "--search-bins must contain executables. Is BLAST+ installed?") if
+	    @o[:search]==:blast
+	 self.bash("#{@o[:searchbins]}diamond --help",
+	    "--search-bins must contain executables. Is DIAMOND installed?") if
+	    @o[:search]==:diamond
       end
       # Download genes
       puts "Downloading gene data." unless @o[:q]
       ref_file = @o[:baseout] + ".ref.fasta"
-      if @o[:posori].nil? and @o[:posfile].nil? and not @o[:aln].nil?
+      if not protein_set[:+].aln.nil?
 	 puts "  * reusing aligned sequences as positive set." unless @o[:q]
-	 f = File.open(ref_file, "w")
-	 f.print aln.to_seq_s
-	 f.close
+	 protein_set[:+].get_from_aln(ref_file, aln)
 	 @o[:noaln] = true
       elsif @o[:reuse] and File.size? ref_file
 	 puts "  * reusing positive set: #{ref_file}." unless @o[:q]
       else
-	 puts "  * downloading #{@o[:positive].size} sequence(s) in positive set." unless @o[:q]
-	 $stderr.puts "   # #{@o[:positive]}" if @o[:debug]
-	 ids = Array.new(@o[:positive])
-	 f = File.open(ref_file, "w")
-	 while ids.size>0
-	    f.print ebiFetch(:uniprotkb, ids.shift(200), :fasta)
-	 end
-	 f.close
+	 puts "  * downloading #{protein_set[:+].size} sequence(s) in " +
+	    "positive set." unless @o[:q]
+	 $stderr.puts "   # #{protein_set[:+].ids}" if @o[:debug]
+	 protein_set[:+].download(ref_file)
       end
-      genome_ids = {:positive=>[], :negative=>[]}
-      transl_ids = {:positive=>[], :negative=>[]}
-      [:positive, :negative].each do |set|
-         unless @o[set].size==0
-	    puts "  * linking genomes from #{@o[set].size} #{set.to_s} sequence(s)." unless @o[:q]
-	    $stderr.puts "   # #{@o[set]}" if @o[:debug]
-	    r = genes2genomes(@o[set])
-	    genome_ids[set] = r.map{|i| i[:genome_id]}.uniq
-	    transl_ids[set] = r.map{|i| i[:transl_id]}.uniq
+      [:+, :-].each do |set|
+         unless protein_set[set].empty?
+	    puts "  * linking genomes from #{protein_set[set].size} " +
+	       "[#{set.to_s}] sequence(s)." unless @o[:q]
+	    $stderr.puts "   # #{protein_set[set].ids}" if @o[:debug]
+	    protein_set[set].get_genomes!
 	 end
       end
-      raise "No genomes associated with the positive set." if genome_ids[:positive].size==0
-      genome_ids[:positive] = genome_ids[:positive].sample( (genome_ids[:positive].size*@o[:genomefrx]).round ) if @o[:genomefrx]
-      raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if genome_ids[:positive].empty?
-      all_genome_ids = genome_ids.values.reduce(:+).uniq
+      raise "No genomes associated with the positive set." if
+	 protein_set[:+].genomes.empty?
+      genome_set = {:+ => GenomeSet.new(self, protein_set[:+].genomes),
+	 :- => GenomeSet.new(self, protein_set[:-].genomes)}
       # Locate genes
       puts "Analyzing genome data." unless @o[:q]
       coords_file = @o[:baseout] + ".src.coords"
       if @o[:reuse] and File.size? coords_file
 	 puts "  * reusing coordinates: #{coords_file}." unless @o[:q]
-	 c = JSON.parse File.read(coords_file), {:symbolize_names=>true}
+	 c = JSON.parse File.read(coords_file), {symbolize_names:true}
 	 positive_coords = c[:positive_coords]
-	 genome_org = c[:genome_org]
+	 negative_coords = c[:negative_coords]
+	 genome_set[:+].taxa = c[:taxa_pos]
+	 genome_set[:-].taxa = c[:taxa_neg]
       else
-	 thrs = [@o[:thr], genome_ids[:positive].size].min
-	 puts "  * downloading and parsing #{genome_ids[:positive].size} GFF3 document(s) in #{thrs} threads." unless @o[:q]
-	 $stderr.puts "   # Looking for proteins: #{@o[:positive]}" if @o[:debug]
-	 $stderr.puts "   # Looking for translations: #{transl_ids[:positive]}" if @o[:debug]
-	 $stderr.puts "   # Looking into: #{genome_ids[:positive]}" if @o[:debug]
-	 thr_obj = []
-	 (0 .. (thrs-1)).each do |thr_i|
-	    ids_to_parse = []
-	    (0 .. (genome_ids[:positive].size-1)).each do |i|
-	       ids_to_parse << genome_ids[:positive][i] if (i % thrs)==thr_i
-	    end
-	    json_file = @o[:baseout] + ".src.coords." + thr_i.to_s
-	    thr_obj << json_file
-	    fork do
-	       get_coords_from_gff3(ids_to_parse, @o[:positive], transl_ids[:positive], thr_i, json_file)
-	    end
-	 end
-	 Process.waitall
-	 # Combine results
-	 positive_coords = {}
-	 genomes_org = {}
-	 genome_org = {}
-	 thr_obj.each do |t|
-	    raise "Thread failed without error trace: #{t}" unless File.exist? t
-	    o = JSON.parse File.read(t), {:symbolize_names=>true, :create_additions=>true}
-	    o[:positive_coords].each_pair do |k,v|
-	       positive_coords[ k ] ||= []
-	       positive_coords[ k ] += v
+	 all_coords = {}
+	 [:+, :-].each do |set_type|
+	    all_coords[set_type] = {}
+	    next if genome_set[set_type].empty?
+	    thrs = [@o[:thr], genome_set[set_type].size].min
+	    puts "  * downloading and parsing #{genome_set[set_type].size} " +
+	       "GFF3 document(s) in #{thrs} threads." unless @o[:q]
+	    $stderr.puts "   # Looking for translations: " +
+	       "#{protein_set[set_type].tranids}" if @o[:debug]
+	    $stderr.puts "   # Looking into: #{genome_set[set_type].ids}" if
+	       @o[:debug]
+	    # Launch threads
+	    thr_obj = []
+	    (0 .. (thrs-1)).each do |thr_i|
+	       ids_to_parse = []
+	       (0 .. (genome_set[set_type].size-1)).each do |i|
+		  ids_to_parse << protein_set[set_type].genomes[i] if
+		     (i % thrs) == thr_i
+	       end
+	       json_file = @o[:baseout] + ".src.coords." + thr_i.to_s + ".tmp"
+	       thr_obj << json_file
+	       fork do
+		  get_coords_from_gff3(ids_to_parse, protein_set[set_type],
+		     thr_i, json_file)
+	       end
 	    end
-	    o[:genomes_org].each_pair do |k,v|
-	       genomes_org[ k ] ||= []
-	       genomes_org[ k ] << v
+	    # Combine results
+	    Process.waitall
+	    thr_obj.each do |t|
+	       raise "Thread failed without error trace: #{t}" unless
+		  File.exist? t
+	       o = JSON.parse(File.read(t), {symbolize_names:true})
+	       o[:coords].each_pair do |k,v|
+		  all_coords[set_type][ k ] ||= []
+		  all_coords[set_type][ k ] += v
+	       end
+	       File.unlink t
 	    end
-	    File.unlink t
-	 end
+	 end # [:+, :-].each
+	 positive_coords = all_coords[:+]
+	 negative_coords = all_coords[:-]
 	 # Select one genome per taxon
 	 unless @o[:pertaxon].nil?
-	    genomes_org.each_pair{ |k,v| genome_org[ k ] = v.sample.first }
+	    puts "  Selecting genomes by #{@o[:pertaxon]}." unless @o[:q]
+	    [:+,:-].each{ |set| genome_set[set].choose_genomes! @o[:pertaxon] }
 	 end
-	 # Save coordinates
+	 # Save coordinates and taxa
 	 ofh = File.open(coords_file, "w")
-	 ofh.print JSON.pretty_generate({:positive_coords=>positive_coords, :genome_org=>genome_org})
+	 ofh.print JSON.pretty_generate({
+	    positive_coords:positive_coords,
+	    negative_coords:negative_coords,
+	    taxa_pos:genome_set[:+].taxa,
+	    taxa_neg:genome_set[:-].taxa})
 	 ofh.close
-      end
+      end # if @o[:reuse] and File.size? coords_file ... else
       unless @o[:pertaxon].nil?
-	 genome_ids[:positive] = genome_org.values
-	 puts "  Using #{genome_org.size} genome(s) after filtering by #{@o[:pertaxon]}." unless @o[:q]
+	 puts "  Using " +
+	    [:+,:-].map{ |set| genome_set[set].size }.reduce(:+).to_s +
+	    " genome(s) after filtering by #{@o[:pertaxon]}." unless @o[:q]
       end
-      all_genome_ids = genome_ids.values.reduce(:+).uniq
-      found = positive_coords.values.map{ |a| a.map{ |b| b[:prot_id] } }.reduce(:+).compact.uniq
-      unknown_pid = positive_coords.values.map{ |a| a.map{ |b| b[:prot_id].nil? ? b[:tran_id] : nil } }.reduce(:+).compact.uniq
-      raise "Cannot find the genomic location of any provided sequence." if found.nil?
-      missing = @o[:positive] - found
-      warn "\nWARNING: Cannot find genomic location of sequence(s) #{missing.join(',')}.\nMissing: #{missing.size}, Unlinked translations: #{unknown_pid.size}\n\n" unless missing.size==0 or missing.size==unknown_pid.size or @o[:genomefrx]<1.0
+      found = protein_set[:+].in_coords(positive_coords)
+      raise "Cannot find the genomic location of any provided sequence." if
+	 found.nil?
+      missing = protein_set[:+].ids - found
+      warn "\nWARNING: Cannot find genomic location of #{missing.size} " +
+	 "sequence(s) #{missing.join(",")}.\n\n" unless missing.empty?
       # Download genomes
-      genomes_file = @o[:baseout] + '.src.fasta'
+      genome_set[:all] = GenomeSet.new(self,
+	 genome_set[ :+ ].ids + genome_set[ :- ].ids)
+      genomes_file = @o[:baseout] + ".src.fasta"
       if @o[:reuse] and File.size? genomes_file
 	 puts "  * reusing existing file: #{genomes_file}." unless @o[:q]
       else
-	 puts "  * downloading #{all_genome_ids.size} genome(s) in FastA." unless @o[:q]
-	 $stderr.puts "   # #{all_genome_ids}" if @o[:debug]
-	 ids = Array.new(all_genome_ids)
-	 ofh = File.open(genomes_file, 'w')
-	 while ids.size>0
-	    ofh.print ebiFetch('embl', ids.shift(200), 'fasta')
-	 end
-	 ofh.close
+	 puts "  * downloading " + genome_set[:all].size.to_s +
+	    " genome(s) in FastA." unless @o[:q]
+	 $stderr.puts "   # #{genome_set[:all].ids}" if @o[:debug]
+	 genome_set[:all].download genomes_file
       end
       # Generate metagenome
       unless @o[:nosimulate]
 	 puts "Generating in silico metagenome" unless @o[:q]
 	 if @o[:reuse] and File.size? @o[:baseout] + ".mg.fasta"
-	    puts "  * reusing existing file: #{@o[:baseout]}.mg.fasta." unless @o[:q]
+	    puts "  * reusing existing file: #{@o[:baseout]}.mg.fasta." unless
+	       @o[:q]
 	 else
-	    all_src = File.readlines("#{@o[:baseout]}.src.fasta").select{ |l| l =~ /^>/ }.size
+	    all_src = File.readlines("#{@o[:baseout]}.src.fasta"
+	       ).select{ |l| l =~ /^>/ }.size
 	    thrs = [@o[:thr], all_src].min
-	    puts "  * simulating metagenomes and tagging positive reads in #{thrs} threads." unless @o[:q]
-	    $stderr.puts "   # #{positive_coords}" if @o[:debug]
 	    thr_obj = []
-	    seqs_per_thr = (all_src/thrs).ceil
+	    seqs_per_thr = (all_src.to_f/thrs).ceil
+	    thrs = (all_src.to_f/seqs_per_thr).ceil
+	    puts "  * simulating metagenomes and tagging positive reads in " +
+	       thrs.to_s + " threads." unless @o[:q]
+	    $stderr.puts "   # #{positive_coords}" if @o[:debug]
 	    (0 .. (thrs-1)).each do |thr_i|
 	       output = @o[:baseout] + ".mg.fasta.#{thr_i.to_s}"
 	       thr_obj << output
 	       fork do
 		  seqs_a = thr_i*seqs_per_thr + 1
-		  seqs_b = [seqs_a + seqs_per_thr, all_src].min
+		  seqs_b = [seqs_a + seqs_per_thr - 1, all_src].min
 		  # Create sub-fasta
-		  ofh = File.open("#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", "w")
-		  ifh = File.open("#{@o[:baseout]}.src.fasta", "r")
+		  ofh = File.open("#{@o[:baseout]}.src.fasta.#{thr_i.to_s}","w")
+		  ifh = File.open("#{@o[:baseout]}.src.fasta","r")
 		  seq_i = 0
 		  while l = ifh.gets
 		     seq_i+=1 if l =~ /^>/
-		     break if seq_i > seqs_b
+			     break if seq_i > seqs_b
 		     ofh.print l if seq_i >= seqs_a
 		  end
 		  ifh.close
 		  ofh.close
-		  # Run simulator (except if the temporal file is already there and can be reused)
-		  unless @o[:reuse] and File.size? @o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa"
-		     bash sprintf(@o[:simulatorcmd], @o[:simulatorbin], "#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", @o[:seqdepth]*@o[:readlen].to_f, @o[:readlen], "#{@o[:baseout]}.mg.tmp.#{thr_i.to_s}")
-		  end
+		  # Run simulator (except if the temporal file is already
+		  # there and can be reused)
+		  bash sprintf(@o[:simulatorcmd], @o[:simulatorbin],
+		     "#{@o[:baseout]}.src.fasta.#{thr_i.to_s}",
+		     @o[:seqdepth]*@o[:readlen].to_f, @o[:readlen],
+		     "#{@o[:baseout]}.mg.tmp.#{thr_i.to_s}") unless
+			@o[:reuse] and
+			File.size? @o[:baseout] +
+			".mg.tmp.#{thr_i.to_s}-reads.fa"
-		  # Tag positives
-		  puts "  * tagging positive reads [thread #{thr_i.to_s}]." unless @o[:q]
-		  ifh = File.open(@o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa", 'r')
-		  ofh = File.open(@o[:baseout] + ".mg.fasta.#{thr_i.to_s}", 'w')
+		  # Tag positive and negative reads
+		  puts "  * tagging reads [thread #{thr_i}]." unless
+		     @o[:q]
+		  ifh = File.open(@o[:baseout] + ".mg.tmp.#{thr_i}-reads.fa",
+		     "r")
+		  ofh = File.open(@o[:baseout] + ".mg.fasta.#{thr_i}", "w")
 		  while l = ifh.gets
 		     if l =~ /^>/
-			rd = /^>(?<id>\d+) reference=[A-Za-z]+\|(?<genome_id>[A-Za-z0-9_]+)\|.* position=(?<comp>complement\()?(?<from>\d+)\.\.(?<to>\d+)\)? /.match(l)
-			raise "Cannot parse simulated read's defline, are you using Grinder?: #{l}" if rd.nil?
+			rd = %r{
+			   ^>(?<id>\d+)\s
+			   reference=[A-Za-z]+\|
+			   (?<genome_id>[A-Za-z0-9_]+)\|.*\s
+			   position=(?<comp>complement\()?(?<from>\d+)\.\.
+			   (?<to>\d+)\)?\s
+			}x.match(l)
+			raise "Cannot parse simulated read's defline, are " +
+			   "you using Grinder?: #{l}" if rd.nil?
 			positive = false
 			positive_coords[rd[:genome_id].to_sym] ||= []
 			positive_coords[rd[:genome_id].to_sym].each do |gn|
 			   left  = rd[:to].to_i - gn[:from]
 			   right = gn[:to] - rd[:from].to_i
-			   if (left*right >= 0) and ([left, right].min >= @o[:minovl])
+			   if (left*right >= 0) and
+				 ([left, right].min >= @o[:minovl])
 			      positive = true
 			      break
 			   end
 			end
-			l = ">#{thr_i.to_s}_#{rd[:id]}#{positive ? "@%" : ""} " +
-			   "ref=#{rd[:genome_id]}:#{rd[:from]}..#{rd[:to]}#{(rd[:comp]=='complement(')?'-':'+'}\n"
+			negative = false
+			negative_coords[rd[:genome_id].to_sym] ||= []
+			negative_coords[rd[:genome_id].to_sym].each do |gn|
+			   left  = rd[:to].to_i - gn[:from]
+			   right = gn[:to] - rd[:from].to_i
+			   if (left*right >= 0) and
+				 ([left, right].min >= @o[:minovl])
+			      negative = true
+			      break
+			   end
+			end
+			l = ">#{thr_i.to_s}_#{rd[:id]}" +
+			   "#{positive ? "@%" : (negative ? "@$" : "")} " +
+			   "ref=#{rd[:genome_id]}:#{rd[:from]}..#{rd[:to]}" +
+			   "#{(rd[:comp]=="complement(") ? "-" : "+"}\n"
 		     end
 		     ofh.print l
 		  end
@@ -338,9 +359,10 @@ class ROCker
 	    end # (1 .. thrs).each
 	    Process.waitall
 	    # Concatenate results
-	    ofh = File.open(@o[:baseout] + ".mg.fasta", 'w')
+	    ofh = File.open(@o[:baseout] + ".mg.fasta", "w")
 	    thr_obj.each do |t|
-	       raise "Thread failed without error trace: #{t}" unless File.exist? t
+	       raise "Thread failed without error trace: #{t}" unless
+		  File.exist? t
 	       ifh = File.open(t, "r")
 	       while l = ifh.gets
 	          ofh.print l
@@ -356,23 +378,33 @@ class ROCker
       unless @o[:noaln]
 	 puts "Aligning reference set." unless @o[:q]
 	 if @o[:reuse] and File.size? "#{@o[:baseout]}.ref.aln"
-	    puts "  * reusing existing file: #{@o[:baseout]}.ref.aln." unless @o[:q]
+	    puts "  * reusing existing file: #{@o[:baseout]}.ref.aln." unless
+	       @o[:q]
 	 else
-	    bash sprintf(@o[:alignercmd], @o[:alignerbin], "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref.aln", @o[:thr])
-	    puts "  +--\n  | IMPORTANT NOTE: Manually checking the alignment before\n  | the 'compile' step is *strongly* encouraged.\n  +--\n" unless @o[:q]
+	    bash(sprintf(@o[:alignercmd],
+	       @o[:alignerbin], "#{@o[:baseout]}.ref.fasta",
+	       "#{@o[:baseout]}.ref.aln", @o[:thr]))
+	    puts "  +--\n  | IMPORTANT NOTE: Manually checking the alignment " +
+	       "before\n  | the 'compile' step is *strongly* encouraged.\n  " +
+	       "+--\n" unless @o[:q]
 	 end
       end
       # Run similarity search
       unless @o[:nosearch]
-	 puts "Running homology search." unless @o[:q]
+	 puts "Running similarity search." unless @o[:q]
 	 if @o[:reuse] and File.size? "#{@o[:baseout]}.ref.blast"
-	    puts "  * reusing existing file: #{@o[:baseout]}.ref.blast." unless @o[:q]
+	    puts "  * reusing existing file: #{@o[:baseout]}.ref.blast." unless
+	       @o[:q]
 	 else
 	    puts "  * preparing database." unless @o[:q]
-	    bash sprintf(@o[:makedbcmd][@o[:search]], @o[:searchbins], 'prot', "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref")
+	    bash(sprintf(@o[:makedbcmd],
+	       @o[:searchbins], "prot", "#{@o[:baseout]}.ref.fasta",
+	       "#{@o[:baseout]}.ref"))
 	    puts "  * running similarity search." unless @o[:q]
-	    bash sprintf(@o[:searchcmd][@o[:search]], @o[:searchbins], 'blastx', "#{@o[:baseout]}.mg.fasta", "#{@o[:baseout]}.ref", "#{@o[:baseout]}.ref.blast", @o[:thr])
+	    bash(sprintf(@o[:searchcmd],
+	       @o[:searchbins], "blastx", "#{@o[:baseout]}.mg.fasta",
+	       "#{@o[:baseout]}.ref", "#{@o[:baseout]}.ref.blast", @o[:thr]))
 	 end
       end
@@ -382,7 +414,8 @@ class ROCker
 	 sff  = %w{.src.xml .src.fasta}
 	 sff += %w{.mg.tmp-reads.fa .mg.tmp-ranks.txt} unless @o[:nosimulate]
 	 sff += %w{.ref.phr .ref.pin .ref.psq} unless @o[:nosearch]
-	 sff.each { |sf| File.unlink @o[:baseout] + sf if File.exist? @o[:baseout] + sf }
+	 sff.each { |sf| File.unlink @o[:baseout] + sf if
+	    File.exist? @o[:baseout] + sf }
       end
    end # build!
 end # ROCker