RubyGems - bio-rocker - Versions diffs - 0.2.5 → 1.0.0 - Mend

bio-rocker 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 14d82f0e8c6f1cf052b52c82de99c5616ebfd2a3
-  data.tar.gz: 175ffb75e14ecfa7f12073ef8fddff2d8f8bda5d
+  metadata.gz: b8a10cdc85d8b7b54c21d26f12b90c0b3dff4f82
+  data.tar.gz: c837b3c6687f6705dbfc7c959824dd530e7ee932
 SHA512:
-  metadata.gz: d8ce626b7731d7293339c74edbc8ac06005c6b4f988d7cb45e191b24ccc2194091d214d2352c5cfd9e34018abc4b94ed3ffa1b734a5438bc6246fc4f564f6abf
-  data.tar.gz: 0d7187d853a4ac73b91808f52757c94f7d61bc4f19762a7ba4ec97363c55c1eeb32791f78b31ae5d91a7dd72b6779b3bc4cd34b9783e3509bae72be33e4e523a
+  metadata.gz: 869cdadfed2dad125fc11c03133e2f56df53074a54b0f35d8ea5c6674029e7069332e4c35c486b1f39a417aeff01932a7eee30da44e15de036ce1a2d878d15d4
+  data.tar.gz: 823b30e7923c243f8dc8bb122f50426898e2cdda634516cc53e0325b150946b0699e0a9257a55c06ed8868a0f843f0274ed1d23bdf8ef2de9629fafb66f33552

data/bin/ROCker CHANGED

@@ -16,8 +16,8 @@ require 'optparse'
 $t = {
    'build'   => 'Creates in silico metagenomes and training sets from reference genomes.',
    'compile' => 'Identifies the most discriminant bit-score per alignment position in a set of sequence.',
-   'filter'  => 'Uses a pre-compiled set of bit-score thresholds to filter a search result.',
    'search'  => 'Uses a ROCker compilation to identify reads putatively derived from a set of sequences.',
+   'filter'  => 'Uses a pre-compiled set of bit-score thresholds to filter a search result.',
    'plot'    => 'Generates a graphical representation of the alignment, the thresholds, and the hits.',
 }
 task = (ARGV.size > 0 ? ARGV.shift : '').downcase
@@ -43,49 +43,51 @@ opts = OptionParser.new do |opt|
       opt.on("-p", "--positive ID1,ID2,ID3", Array, "Comma-separated list of UniProtKB IDs corresponding to the 'positive' training set. Required unless -P or -a are used."){ |v| o[:posori]=v }
       opt.on("-n", "--negative ID1,ID2,ID3", Array, "Comma-separated list of UniProtKB IDs corresponding to the 'negative' training set. See also -N."){ |v| o[:negative]=v }
       opt.on("-o", "--baseout PATH", "Prefix for the output files to be generated. Required."){ |v| o[:baseout]=v }
-      #opt.on(      "--nucleotides", "If set, it assumes that the input sequences are in nucleotides (currently not implemented)."){ raise "--nucleotides: This option is currently not implemented." }
       opt.on("-t", "--threads INT", "Number of threads to use. By default: #{ROCker.default :thr}."){ |v| o[:thr]=v.to_i }
       opt.separator ""
       opt.separator "+ ADVANCED BUILDING ARGUMENTS"
       opt.on("-P", "--positive-file PATH", "File containing the positive set (see -p), one UniProtKB ID per line. If used, -p is not required."){ |v| o[:posfile]=v }
       opt.on("-N", "--negative-file PATH", "File containing the negative set (see -n), one UniProtKB ID per line."){ |v| o[:negfile]=v }
       opt.on("-a", "--alignment PATH", "Protein alignment of the reference sequences. The defline must contain UniProtKB ID. If used, -p is not required."){ |v| o[:aln]=v }
-      opt.on("-s", "--seqdepth NUMBER", "Sequencing depth to be used in building the in silico metagenome. By default: '#{ROCker.default :seqdepth}'."){ |v| o[:seqdepth]=v.to_f }
-      opt.on("-v", "--overlap NUMBER", "Minimum overlap with reference gene to tag a read as positive. By default: '#{ROCker.default :minovl}'."){ |v| o[:minovl]=v.to_f }
-      opt.on(      "--genome-frx NUMBER", "Fraction to subsample the positive set genomes to generate the metagenome. By default: #{ROCker.default :genomefrx}"){ |v| o[:genomefrx]=v.to_f }
-      opt.on(      "--per-taxon RANK", "If selected, only one genome per taxon is used to build the metagenome. Valid ranks include: species, genus, family, order, class, phylum.",
-	 "This option replaces --per-genus and --per-species from v0.1.*."){ |v| o[:pertaxon]=v.downcase }
-      opt.on(      "--nometagenome", "Do not create metagenome. Implies --noblast. By default, metagenome is created."){ |v| o[:nomg]=v }
-      opt.on(      "--noblast", "Do not execute BLAST. By default, BLAST is executed."){ |v| o[:noblast]=v }
+      opt.on("-s", "--seqdepth NUMBER", "Sequencing depth (reads/bp) to be used in building the in silico metagenome. By default: '#{ROCker.default :seqdepth}'."){ |v| o[:seqdepth]=v.to_f }
+      opt.on("-l", "--readlen INTEGER", "Average read length of in silico metagenome (in bp). By default: '#{ROCker.default :readlen}'."){ |v| o[:readlen]=v.to_i }
+      opt.on("-v", "--overlap INTEGER", "Minimum overlap (in bp) with reference gene to tag a read as positive. By default: '#{ROCker.default :minovl}'."){ |v| o[:minovl]=v.to_i }
+      opt.on(      "--per-taxon RANK", "If selected, only one genome per taxon is used to build the metagenome. Valid ranks include: species, genus, family, order, class, phylum."){ |v| o[:pertaxon]=v.downcase }
+      opt.on(      "--genome-frx NUMBER", "Fraction to subsample genomes to generate the metagenome. By default: #{ROCker.default :genomefrx}."){ |v| o[:genomefrx]=v.to_f }
+      opt.on(      "--nosimulate", "Do not simulate metagenome. Implies --nosearch. By default, metagenome is simulated."){ |v| o[:nosimulate]=v }
+      opt.on(      "--nosearch", "Do not execute similarity search. By default, it is executed."){ |v| o[:nosearch]=v }
       opt.on(      "--noalignment", "Do not align reference set. By default, references are aligned."){ |v| o[:noaln]=v }
       opt.on(      "--nocleanup", "Keep all intermediate files. By default, intermediate files are removed."){ |v| o[:noclean]=v }
       opt.on(      "--reuse-files", "Re-use existing result files. By default, existing files are ignored."){ |v| o[:reuse]=true }
       opt.separator ""
       opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
-      opt.on("-G", "--grinder PATH", "Path to the grinder executable. By default: '#{ROCker.default :grinder}' (in the $PATH)."){ |v| o[:grinder]=v }
-      opt.on("-M", "--muscle PATH", "Path to the muscle executable. By default: '#{ROCker.default :muscle}' (in the $PATH)."){ |v| o[:muscle]=v }
-      opt.on("-B", "--blastbins PATH", "Path to the Blast+ executables. By default: '#{ROCker.default :blastbins}' (in the $PATH)."){ |v| o[:blastbins]=v }
-      opt.on(      "--grinder-cmd STR", "Command calling grinder, where %1$s: grinder bin, %2$s: input, %3$s: seq. depth, %4$s: output.",
-	 "By default: '#{ROCker.default :grindercmd}'."){ |v| o[:grindercmd]=v }
-      opt.on("--muscle-cmd STR", "Command calling muscle, where %1$s: muscle bin, %2$s: input, %3$s: output.",
-	 "By default: '#{ROCker.default :musclecmd}'."){ |v| o[:musclecmd]=v }
-      opt.on("--blast-cmd STR", "Command calling BLAST search, where %1$s: blast bins, %2$s: program, %3$s: input, %4$s: database, %5$s: output, %6$d: threads.",
-	 "By default: '#{ROCker.default :blastcmd}'."){ |v| o[:blastcmd]=v }
-      opt.on("--makedb-cmd STR", "Command calling BLAST format, where %1$s: blast bins, %2$s: dbtype, %3$s: input, %4$s: database.",
-	 "By default: '#{ROCker.default :makedbcmd}'."){ |v| o[:makedbcmd]=v }
+      opt.on(      "--search STR",	"Similarity search algorithm to use. Supported: 'blast' and 'diamond'. By default: '#{ROCker.default :search}'.")	{ |v| o[:search]=v.to_sym }
+      opt.on(      "--simulator STR",	"In silico metagenome simulator to use. Supported: 'grinder'. By default: '#{ROCker.default :simulator}'.")		{ |v| o[:ssimulator]=v.to_sym }
+      opt.on(      "--aligner STR",	"Multiple alignment algorithm to use. Supported: 'clustalo' and 'muscle'. By default: '#{ROCker.default :aligner}'.")	{ |v| o[:aligner]=v.to_sym }
+      opt.on(      "--search-bins PATH",	"Path to the similarity search executables. By default in the $PATH: '#{ROCker.default :searchbins}'.")		{ |v| o[:searchbins]=v }
+      opt.on(      "--simulator-bin PATH",	"Path to the simulator executable. By default in the $PATH: '#{ROCker.default(:simulatorbin).values.join("' or '")}'.")	{ |v| o[:simulatorbin]=v }
+      opt.on(      "--aligner-bin PATH",	"Path to the aligner executable. By default in the $PATH: '#{ROCker.default(:alignerbin).values.join("' or '")}'.")	{ |v| o[:alignerbin]=v }
+      opt.on(      "--search-cmd STR", "Command calling similarity search, where %1$s: binaries, %2$s: program, %3$s: input, %4$s: database, %5$s: output, %6$d: threads.",
+	 *ROCker.default(:searchcmd).keys.map{|k| "By default if --search #{k}: '#{ROCker.default(:searchcmd)[k]}'."}){ |v| o[:searchcmd]=v }
+      opt.on(      "--makedb-cmd STR", "Command calling database format for similarity search, where %1$s: binaries, %2$s: dbtype, %3$s: input, %4$s: database.",
+	 *ROCker.default(:makedbcmd).keys.map{|k| "By default if --search #{k}: '#{ROCker.default(:makedbcmd)[k]}'."}){ |v| o[:makedbcmd]=v }
+      opt.on(      "--simulator-cmd STR", "Command calling simulator, where %1$s: binary, %2$s: input, %3$s: seq. depth (X), %4$d: read len., %5$s: output.",
+	 *ROCker.default(:simulatorcmd).keys.map{|k| "By default if --simulator #{k}: '#{ROCker.default(:simulatorcmd)[k]}'."}){ |v| o[:simulatorcmd]=v }
+      opt.on("--aligner-cmd STR", "Command calling aligner, where %1$s: binary, %2$s: input, %3$s: output, %4$d: threads.",
+	 *ROCker.default(:alignercmd).keys.map{|k| "By default if --aligner #{k}: '#{ROCker.default(:alignercmd)[k]}'."}){ |v| o[:alignercmd]=v }
    when 'compile'
       opt.separator "+ COMPILATION ARGUMENTS"
       opt.on("-a", "--alignment PATH", "Protein alignment of the reference sequences. Required."){ |v| o[:aln]=v }
       opt.on("-b", "--ref-blast PATH",
       		"Tabular BLAST (blastx) of the test reads vs. the reference dataset. Required unless -t exists."){ |v| o[:blast]=v }
       opt.on("-k", "--rocker PATH", "ROCker file to be created. Required."){ |v| o[:rocker]=v }
-      opt.on(      "--nucleotides", "If set, it assumes that the input sequences are in nucleotides. By default, proteins are assumed."){ raise "--nucleotides: This option is currently not implemented." }
       opt.separator ""
       opt.separator "+ ADVANCED COMPILATION ARGUMENTS"
       opt.on("-t", "--table PATH", "Formated tabular file to be created (or reused). Required unless -b is provided."){ |v| o[:table]=v }
       opt.on(      "--min-score NUMBER", "Minimum Bit-Score to consider a hit. By default: #{ROCker.default :minscore}"){ |v| o[:minscore]=v.to_f }
       opt.on(      "--norefine", "Do not refine windows."){ o[:refine]=false }
       opt.on("-w", "--window INT", "Initial size of alignment windows (in number of AA columns). By default: #{ROCker.default :win}."){ |v| o[:win]=v.to_i }
+      opt.on(      "--reuse-files", "Re-use existing result files. By default, existing files are ignored."){ |v| o[:reuse]=true }
       opt.separator ""
       opt.separator "+ INPUT/OUTPUT"
       opt.separator "   o The input alignment (-a) MUST be in FastA format, and the IDs must"
@@ -107,15 +109,23 @@ opts = OptionParser.new do |opt|
       opt.separator "      5. Bit score threshold set for the window."
       opt.separator "     The file also contains the alignment (commented with #:)."
       opt.separator ""
+   when 'search'
+      opt.on("-k", "--rocker PATH", "ROCker file generated by the compile task (-k). Required."){ |v| o[:rocker]=v }
+      opt.on("-q", "--query PATH", "File containing the query sequences in FastA format. Required."){ |v| o[:query]=v }
+      opt.on("-o", "--out-blast PATH", "Filtered tabular BLAST to be created. Required."){ |v| o[:oblast]=v }
+      opt.separator ""
+      opt.separator "+ EXTERNAL SOFTWARE OPTIONS"
+      opt.on(      "--search STR",	"Similarity search algorithm to use. Supported: 'blast' and 'diamond'. By default: '#{ROCker.default :search}'.")	{ |v| o[:search]=v.to_sym }
+      opt.on(      "--search-bins PATH",	"Path to the similarity search executables. By default in the $PATH: '#{ROCker.default :searchbins}'.")		{ |v| o[:searchbins]=v }
+      opt.on(      "--search-cmd STR", "Command calling similarity search, where %1$s: binaries, %2$s: program, %3$s: input, %4$s: database, %5$s: output, %6$d: threads.",
+	 *ROCker.default(:searchcmd).keys.map{|k| "By default if --search #{k}: '#{ROCker.default(:searchcmd)[k]}'."}){ |v| o[:searchcmd]=v }
+      opt.on(      "--makedb-cmd STR", "Command calling database format for similarity search, where %1$s: binaries, %2$s: dbtype, %3$s: input, %4$s: database.",
+	 *ROCker.default(:makedbcmd).keys.map{|k| "By default if --search #{k}: '#{ROCker.default(:makedbcmd)[k]}'."}){ |v| o[:makedbcmd]=v }
    when 'filter'
       opt.separator "+ FILTERING ARGUMENTS"
       opt.on("-k", "--rocker PATH", "ROCker file generated by the compile task (-k). Required."){ |v| o[:rocker]=v }
       opt.on("-x", "--query-blast PATH", "Tabular BLAST (blastx) of the query reads vs. the reference dataset. Required."){ |v| o[:qblast]=v }
       opt.on("-o", "--out-blast PATH", "Filtered tabular BLAST to be created. Required."){ |v| o[:oblast]=v }
-   when 'search'
-      opt.on("-k", "--rocker PATH", "ROCker file generated by the compile task (-k). Required."){ |v| o[:rocker]=v }
-      opt.on("-q", "--query PATH", "File containing the query sequences in FastA format. Required."){ |v| o[:query]=v }
-      opt.on("-o", "--out-blast PATH", "Filtered tabular BLAST to be created. Required."){ |v| o[:oblast]=v }
    when 'plot'
       opt.separator "+ PLOTTING ARGUMENTS"
       opt.on("-k", "--rocker PATH", "ROCker file generated by the compile task (-k). Required."){ |v| o[:rocker]=v }
@@ -127,7 +137,7 @@ opts = OptionParser.new do |opt|
       opt.on("-t", "--table PATH", "Formated tabular file to be created (or reused). Required unless -b is provided."){ |v| o[:table]=v }
       opt.on(      "--color", "Color alignment by amino acid."){ o[:color]=true }
       opt.on(      "--no-transparency", "Do not use (semi-)transparencies."){ |v| o[:transparency] = v }
-      opt.on(      "--min-score NUMBER", "Minimum Bit-Score to consider a hit. By default: #{ROCker.default :minscore}"){ |v| o[:minscore]=v.to_f }
+      opt.on(      "--min-score NUMBER", "Minimum Bit-Score to consider a hit. By default: #{ROCker.default :minscore}."){ |v| o[:minscore]=v.to_f }
       opt.on(      "--stats-impact", "Plot impact on statistics, instead of absolute values per window."){ o[:impact]=true }
       opt.on(      "--stats-ylim STRING", "Limits of the Y-axis in the bottom panel. By default: '-2,.1' if --stats-impact is set, '50,100' otherwise."){ |v| o[:ylim]=v }
       opt.on("-s", "--subject SBJ1,SBJ2,...", Array,

data/lib/rocker.rb CHANGED

@@ -2,7 +2,7 @@
 # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
 # @author Luis (Coto) Orellana
 # @license artistic license 2.0
-# @update May-14-2015
+# @update Jun-05-2015
 #
 require 'rocker/blasthit'
@@ -10,40 +10,20 @@ require 'rocker/rocdata'
 class ROCker
    #================================[ Class ]
-   @@EBIREST = 'http://www.ebi.ac.uk/Tools'
    @@DEFAULTS = {
       # General
-      :q=>false, :r=>'R', :nucl=>false, :debug=>false,
-      # Build
-      :positive=>[], :negative=>[], :thr=>2,:genomefrx=>1.0,
-	 # ext. software
-	 :grinder=>'grinder', :muscle=>'muscle', :blastbins=>'', :seqdepth=>3, :minovl=>0.75,
-	 :grindercmd=>'%1$s -reference_file "%2$s" -cf "%3$f" -base_name "%4$s" -dc \'-~*Nn\' -md "uniform 0.1" -mr "95 5" -rd "100 uniform 5"',
-	 :musclecmd=>'%1$s -in "%2$s" -out "%3$s" -quiet',
-	 :blastcmd=>'%1$s%2$s -query "%3$s" -db "%4$s" -out "%5$s" -num_threads %6$d -outfmt 6 -max_target_seqs 1',
-	 :makedbcmd=>'%1$smakeblastdb -dbtype %2$s -in "%3$s" -out "%4$s"',
-      # Compile
-      :refine=>true, :win=>20, :minscore=>0,
-      # Filter
-      :sbj=>[],
-      # Plot
-      :color=>false, :gformat=>'pdf', :width=>9, :height=>9, :impact=>false, :transparency=>true,
+      :q=>false, :r=>'R', :nucl=>false, :debug=>false,:thr=>2,:search=>:blast,
+      # External software
+      :searchbins=>'',
+      :searchcmd=>{
+	 :blast=>'%1$s%2$s -query "%3$s" -db "%4$s" -out "%5$s" -num_threads %6$d -outfmt 6 -max_target_seqs 1',
+	 :diamond=>'%1$sdiamond %2$s -q "%3$s" -d "%4$s" -o "%5$s" -t %6$d -k 1 --min-score 20 --sensitive'},
+      :makedbcmd=>{
+	 :blast=>'%1$smakeblastdb -dbtype %2$s -in "%3$s" -out "%4$s"',
+	 :diamond=>'%1$sdiamond makedb --in "%3$s" -d "%4$s"'}
    }
-   @@HAS_BUILD_GEMS = nil
-   def self.ebirest() @@EBIREST ; end
    def self.defaults() @@DEFAULTS ; end
    def self.default(k) @@DEFAULTS[k] ; end
-   def self.has_build_gems?
-      return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
-      @@HAS_BUILD_GEMS = TRUE
-      begin
-	 require 'rubygems'
-	 require 'restclient'
-      rescue LoadError
-	 @@HAS_BUILD_GEMS = FALSE
-      end
-      @@HAS_BUILD_GEMS
-   end
    #================================[ Instance ]
    attr_reader :o
@@ -53,374 +33,6 @@ class ROCker
       RInterface.R_BIN = opts[:r] unless opts[:r].nil?
    end
-   #================================[ Build ]
-   def build!
-      # Check requirements
-      puts "Testing environment." unless @o[:q]
-      @o[:noblast]=true if @o[:nomg]
-      raise "Unsatisfied requirements, please see the help message (-h)." unless ROCker.has_build_gems?
-      @o[:positive] += @o[:posori] unless @o[:posori].nil?
-      @o[:positive] += File.readlines(@o[:posfile]).map{ |l| l.chomp } unless @o[:posfile].nil?
-      @o[:negative] += File.readlines(@o[:negfile]).map{ |l| l.chomp } unless @o[:negfile].nil?
-      unless @o[:aln].nil?
-         aln = Alignment.new
-	 aln.read_fasta @o[:aln]
-	 @o[:positive] += aln.get_ids
-      end
-      raise "-p or -P are mandatory." if @o[:positive].size==0
-      raise "-o/--baseout is mandatory." if @o[:baseout].nil?
-      if @o[:positive].size == 1 and not @o[:noaln]
-	 warn "\nWARNING: Positive set contains only one sequence, turning off alignment.\n\n"
-	 @o[:noaln] = true
-      end
-      self.bash "#{@o[:grinder]} --version", "-G/--grinder must be executable. Is Grinder installed?" unless @o[:nomg]
-      self.bash "#{@o[:muscle]} -version", "-M/--muscle must be executable. Is Muscle installed?" unless @o[:noaln]
-      self.bash "#{@o[:blastbins]}makeblastdb -version", "-B/--blastbins must contain executables. Is BLAST+ installed?" unless @o[:noblast]
-      # Download genes
-      puts "Downloading gene data." unless @o[:q]
-      f = File.open(@o[:baseout] + '.ref.fasta', 'w')
-      if @o[:posori].nil? and @o[:posfile].nil? and not @o[:aln].nil?
-	 puts "  * re-using aligned sequences as positive set." unless @o[:q]
-	 f.print aln.to_seq_s
-	 @o[:noaln] = true
-      else
-	 puts "  * downloading #{@o[:positive].size} sequence(s) in positive set." unless @o[:q]
-	 $stderr.puts "   # #{@o[:positive]}" if @o[:debug]
-	 ids = Array.new(@o[:positive])
-	 while ids.size>0
-	    f.print ebiFetch(:uniprotkb, ids.shift(200), :fasta)
-	 end
-      end
-      f.close
-      genome_ids = {:positive=>[], :negative=>[]}
-      [:positive, :negative].each do |set|
-         unless @o[set].size==0
-	    puts "  * gathering genomes from #{@o[set].size} #{set.to_s} sequence(s)." unless @o[:q]
-	    $stderr.puts "   # #{@o[set]}" if @o[:debug]
-	    genome_ids[set] = genes2genomes(@o[set])
-	 end
-      end
-      raise "No genomes associated with the positive set." if genome_ids[:positive].size==0
-      genome_ids[:positive] = genome_ids[:positive].sample( (genome_ids[:positive].size*@o[:genomefrx]).round ) if @o[:genomefrx]
-      raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if genome_ids[:positive].empty?
-      all_genome_ids = genome_ids.values.reduce(:+).uniq
-      # Locate genes
-      puts "Analyzing genome data." unless @o[:q]
-      puts "  * downloading and parsing #{genome_ids[:positive].size} GFF3 document(s)." unless @o[:q]
-      $stderr.puts "   # #{genome_ids[:positive]}" if @o[:debug]
-      positive_coords = {}
-      genome_org = {}
-      i = 0
-      genome_ids[:positive].each do |genome_id|
-	 print "  * scanning #{(i+=1).ordinalize} genome out of #{genome_ids[:positive].size}. \r" unless @o[:q]
-	 unless @o[:pertaxon].nil?
-	    genome_taxon = genome2taxon(genome_id, @o[:pertaxon])
-	    next unless genome_org[ genome_taxon ].nil?
-	    genome_org[ genome_taxon ] = genome_id
-	 end
-	 $stderr.puts "   # Looking for any of #{@o[:positive]}" if @o[:debug]
-	 genome_file = @o[:baseout] + '.src.' + i.to_s + '.gff3'
-	 if @o[:reuse] and File.exist? genome_file
-	    puts "  * reusing existing file: #{genome_file}." unless @o[:q]
-	    ifh = File.open(genome_file, 'r')
-	    doc = ifh.readlines.grep(/^[^#]/)
-	    ifh.close
-	 else
-	    genome_file=nil unless @o[:noclean]
-	    res = ebiFetch(:embl, [genome_id], :gff3, genome_file)
-	    doc = res.split("\n").grep(/^[^#]/)
-	 end
-	 doc.each do |ln|
-	    next if ln =~ /^#/
-	    r = ln.chomp.split /\t/
-	    next if r.size < 9
-	    prots = r[8].split(/;/).grep(/^db_xref=UniProtKB[\/A-Za-z-]*:/){ |xref| xref.split(/:/)[1] }
-	    p = prots.select{ |p| @o[:positive].include? p }.first
-	    next if p.nil?
-	    positive_coords[ r[0] ] ||= []
-	    positive_coords[ r[0] ] << {
-	       #:strand	=> r[6],
-	       :prot_id	=> p,
-	       :from	=> r[3].to_i,
-	       :to	=> r[4].to_i
-	    }
-	 end
-      end
-      print "\n" unless @o[:q]
-      unless @o[:pertaxon].nil?
-	 genome_ids[:positive] = genome_org.values
-	 puts "  Using #{genome_org.size} genome(s) after filtering by #{@o[:pertaxon]}." unless @o[:q]
-      end
-      all_genome_ids = genome_ids.values.reduce(:+).uniq
-      found = positive_coords.values.map{ |a| a.map{ |b| b[:prot_id] } }.reduce(:+)
-      raise "Cannot find the genomic location of any provided sequence." if found.nil?
-      missing = @o[:positive] - found
-      warn "\nWARNING: Cannot find genomic location of sequence(s) #{missing.join(',')}.\n\n" unless missing.size==0 or @o[:genomefrx]<1.0 or not @o[:pertaxon].nil?
-      # Download genomes
-      genomes_file = @o[:baseout] + '.src.fasta'
-      if @o[:reuse] and File.exist? genomes_file
-	 puts "  * reusing existing file: #{genomes_file}." unless @o[:q]
-      else
-	 puts "  * downloading #{all_genome_ids.size} genome(s) in FastA." unless @o[:q]
-	 $stderr.puts "   # #{all_genome_ids}" if @o[:debug]
-	 ids = Array.new(all_genome_ids)
-	 ofh = File.open(genomes_file, 'w')
-	 while ids.size>0
-	    ofh.print ebiFetch('embl', ids.shift(200), 'fasta')
-	 end
-	 ofh.close
-      end
-      # Generate metagenome
-      unless @o[:nomg]
-	 puts "Generating in silico metagenome" unless @o[:q]
-	 if @o[:reuse] and File.exist? @o[:baseout] + ".mg.fasta"
-	    puts "  * reusing existing file: #{@o[:baseout]}.mg.fasta." unless @o[:q]
-	 else
-	    all_src = File.readlines("#{@o[:baseout]}.src.fasta").select{ |l| l =~ /^>/ }.size
-	    thrs = [@o[:thr], all_src].min
-	    puts "  * running grinder and tagging positive reads (#{thrs} threads)." unless @o[:q]
-	    $stderr.puts "   # #{positive_coords}" if @o[:debug]
-	    thr_obj = []
-	    seqs_per_thr = (all_src/thrs).ceil
-	    (0 .. (thrs-1)).each do |thr_i|
-	       thr_obj << Thread.new do
-		  Thread.current[:seqs_a] = thr_i*seqs_per_thr + 1
-		  Thread.current[:seqs_b] = [Thread.current[:seqs_a] + seqs_per_thr, all_src].min
-		  # Create sub-fasta
-		  Thread.current[:ofh] = File.open("#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", 'w')
-		  Thread.current[:ifh] = File.open("#{@o[:baseout]}.src.fasta", 'r')
-		  Thread.current[:seq_i] = 0
-		  while Thread.current[:l] = Thread.current[:ifh].gets
-		     Thread.current[:seq_i]+=1 if Thread.current[:l] =~ /^>/
-		     break if Thread.current[:seq_i] > Thread.current[:seqs_b]
-		     Thread.current[:ofh].print Thread.current[:l] if Thread.current[:seq_i] >= Thread.current[:seqs_a]
-		  end
-		  Thread.current[:ifh].close
-		  Thread.current[:ofh].close
-		  bash sprintf(@o[:grindercmd], @o[:grinder], "#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", @o[:seqdepth], "#{@o[:baseout]}.mg.tmp.#{thr_i.to_s}")
-		  # Tag positives
-		  puts "  * tagging positive reads." unless @o[:q]
-		  Thread.current[:ifh] = File.open(@o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa", 'r')
-		  Thread.current[:ofh] = File.open(@o[:baseout] + ".mg.fasta.#{thr_i.to_s}", 'w')
-		  while Thread.current[:l]=Thread.current[:ifh].gets
-		     Thread.current[:rd] = /^>(?<id>\d+) reference=[A-Za-z]+\|(?<genome_id>[A-Za-z0-9_]+)\|.* position=(?<comp>complement\()?(?<from>\d+)\.\.(?<to>\d+)\)? /.match(Thread.current[:l])
-		     unless Thread.current[:rd].nil?
-			Thread.current[:positive] = false
-			positive_coords[Thread.current[:rd][:genome_id]] ||= []
-			positive_coords[Thread.current[:rd][:genome_id]].each do |gn|
-			   Thread.current[:left]  = Thread.current[:rd][:to].to_i - gn[:from]
-			   Thread.current[:right] = gn[:to] - Thread.current[:rd][:from].to_i
-			   if (Thread.current[:left]*Thread.current[:right] >= 0) and ([Thread.current[:left], Thread.current[:right]].min/(Thread.current[:rd][:to].to_i-Thread.current[:rd][:from].to_i) >= @o[:minovl])
-			      Thread.current[:positive] = true
-			      break
-			   end
-			end
-			Thread.current[:l] = ">#{Thread.current[:rd][:id]}#{Thread.current[:positive] ? "@%" : ""} ref=#{Thread.current[:rd][:genome_id]}:#{Thread.current[:rd][:from]}..#{Thread.current[:rd][:to]}#{(Thread.current[:rd][:comp]=='complement(')?'-':'+'}\n"
-		     end
-		     Thread.current[:ofh].print Thread.current[:l]
-		  end
-		  Thread.current[:ofh].close
-		  Thread.current[:ifh].close
-		  Thread.current[:output] = @o[:baseout] + ".mg.fasta.#{thr_i.to_s}"
-	       end # Thread.new do
-	    end # (1 .. thrs).each
-	    # Concatenate results
-	    ofh = File.open(@o[:baseout] + ".mg.fasta", 'w')
-	    thr_obj.each do |t|
-	       t.join
-	       raise "Thread failed without error trace: #{t}" if t[:output].nil?
-	       ifh = File.open(t[:output], 'r')
-	       while l = ifh.gets
-	          ofh.print l
-	       end
-	       ifh.close
-	       File.unlink t[:output]
-	    end
-	    ofh.close
-         end
-      end # unless @o[:nomg]
-      # Align references
-      unless @o[:noaln]
-	 puts "Aligning reference set." unless @o[:q]
-	 if @o[:reuse] and File.exist? "#{@o[:baseout]}.ref.aln"
-	    puts "  * reusing existing file: #{@o[:baseout]}.ref.aln." unless @o[:q]
-	 else
-	    bash sprintf(@o[:musclecmd], @o[:muscle], "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref.aln")
-	    puts "  +--\n  | IMPORTANT NOTE: Manually checking the alignment before\n  | the 'compile' step is *strongly* encouraged.\n  +--\n" unless @o[:q]
-	 end
-      end
-      # Run BLAST
-      unless @o[:noblast]
-	 puts "Running homology search." unless @o[:q]
-	 if @o[:reuse] and File.exist? "#{@o[:baseout]}.ref.blast"
-	    puts "  * reusing existing file: #{@o[:baseout]}.ref.blast." unless @o[:q]
-	 else
-	    puts "  * preparing database." unless @o[:q]
-	    bash sprintf(@o[:makedbcmd], @o[:blastbins], (@o[:nucl]?'nucl':'prot'), "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref")
-	    puts "  * running BLAST." unless @o[:q]
-	    bash sprintf(@o[:blastcmd], @o[:blastbins], (@o[:nucl]?'blastn':'blastx'), "#{@o[:baseout]}.mg.fasta", "#{@o[:baseout]}.ref", "#{@o[:baseout]}.ref.blast", @o[:thr])
-	 end
-      end
-      # Clean
-      unless @o[:noclean]
-	 puts "Cleaning." unless @o[:q]
-	 sff  = %w{.src.xml .src.fasta}
-	 sff += %w{.mg.tmp-reads.fa .mg.tmp-ranks.txt} unless @o[:nomg]
-	 sff += %w{.ref.phr .ref.pin .ref.psq} unless @o[:noblast]
-	 sff.each { |sf| File.unlink @o[:baseout] + sf if File.exist? @o[:baseout] + sf }
-      end
-   end # build!
-   #================================[ Compile ]
-   def compile!
-      raise "-a/--alignment is mandatory." if @o[:aln].nil?
-      raise "-a/--alignment must exist." unless File.exist? @o[:aln]
-      if @o[:table].nil?
-	 raise "-t/--table is mandatory unless -b is provided." if @o[:blast].nil?
-	 @o[:table] = "#{@o[:blast]}.table"
-      end
-      raise "-b/--blast is mandatory unless -t exists." if @o[:blast].nil? and not File.exist? @o[:table]
-      raise "-k/--rocker is mandatory." if @o[:rocker].nil?
-      puts "Testing environment." unless @o[:q]
-      bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be executable. Is R installed?"
-      bash "echo \"library('pROC')\" | #{@o[:r]} --vanilla", "Please install the 'pROC' library for R first."
-      puts "Reading files." unless @o[:q]
-      puts "  * loading alignment: #{@o[:aln]}." unless @o[:q]
-      aln = Alignment.new
-      aln.read_fasta @o[:aln]
-      if File.exist? @o[:table]
-	 puts "  * reusing existing file: #{@o[:table]}." unless @o[:q]
-      else
-	 puts "  * generating table: #{@o[:table]}." unless @o[:q]
-	 blast2table(@o[:blast], @o[:table], aln, @o[:minscore])
-      end
-      puts "Analyzing data." unless @o[:q]
-      puts "  * computing windows." unless @o[:q]
-      data = ROCData.new(@o[:table], aln, @o[:win])
-      data.nucl = @o[:nucl]
-      if @o[:refine]
-	 puts "  * refining windows." unless @o[:q]
-	 warn "Insufficient hits to refine results." unless data.refine! @o[:table]
-      end
-      puts "  * saving ROCker file: #{@o[:rocker]}." unless @o[:q]
-      data.save @o[:rocker]
-   end # compile!
-   #================================[ Filter ]
-   def filter!
-      raise "-k/--rocker is mandatory." if @o[:rocker].nil?
-      raise "-x/--query-blast is mandatory." if @o[:qblast].nil?
-      raise "-o/--out-blast is mandatory." if @o[:oblast].nil?
-      puts "Reading ROCker file." unless @o[:q]
-      data = ROCData.new @o[:rocker]
-      puts "Filtering BLAST." unless @o[:q]
-      ih = File.open(@o[:qblast], 'r')
-      oh = File.open(@o[:oblast], 'w')
-      while ln = ih.gets
-	 bh = BlastHit.new(ln, data.aln)
-	 oh.print ln if not(bh.sfrom.nil?) and bh.bits >= data.win_at_col(bh.midpoint).thr
-      end
-      ih.close
-      oh.close
-   end # filter!
-   #================================[ Search ]
-   def search!
-      raise "-k/--rocker is mandatory." if @o[:rocker].nil?
-      raise "Code Under development..."
-      # ToDo
-      # [ ... ]
-   end # search!
-   #================================[ Plot ]
-   def plot!
-      raise "-k/--rocker is mandatory." if o[:rocker].nil?
-      if @o[:table].nil?
-	 raise "-t/--table is mandatory unless -b is provided." if @o[:blast].nil?
-	 @o[:table] = "#{@o[:blast]}.table"
-      end
-      raise "-b/--blast is mandatory unless -t exists." if @o[:blast].nil? and not File.exist? @o[:table]
-      puts "Testing environment." unless @o[:q]
-      bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be executable. Is R installed?"
-      puts "Reading files." unless @o[:q]
-      puts "  * loding ROCker file: #{@o[:rocker]}." unless @o[:q]
-      data = ROCData.new @o[:rocker]
-      if File.exist? @o[:table]
-	 puts "  * reusing existing file: #{@o[:table]}." unless @o[:q]
-      else
-	 puts "  * generating table: #{@o[:table]}." unless @o[:q]
-	 blast2table(@o[:blast], @o[:table], data.aln, @o[:minscore])
-      end
-      puts "Plotting matches." unless @o[:q]
-      extra = @o[:gformat]=='pdf' ? "" : ", units='in', res=300"
-      @o[:gout] ||= "#{@o[:rocker]}.#{@o[:gformat]}"
-      data.rrun "#{@o[:gformat]}('#{@o[:gout]}', #{@o[:width]}, #{@o[:height]}#{extra});"
-      data.rrun "layout(c(2,1,3), heights=c(2-1/#{data.aln.size},3,1));"
-      some_thr = data.load_table! @o[:table], @o[:sbj], @o[:minscore]
-      data.rrun "par(mar=c(0,4,0,0.5)+.1);"
-      data.rrun "plot(1, t='n', xlim=c(0.5,#{data.aln.cols}+0.5), ylim=range(x$V4)+c(-0.04,0.04)*diff(range(x$V4)), xlab='', ylab='Bit score', xaxs='i', xaxt='n');"
-      data.rrun "noise <- runif(ncol(x),-.2,.2)"
-      data.rrun "arrows(x0=x$V2, x1=x$V3, y0=x$V4+noise, col=ifelse(x$V5==1, rgb(0,0,.5,#{@o[:transparency] ? ".2" : "1"}), rgb(.5,0,0,#{@o[:transparency] ? ".2" : "1"})), length=0);"
-      data.rrun "points(x$V6, x$V4+noise, col=ifelse(x$V5==1, rgb(0,0,.5,#{@o[:transparency] ? ".5" : "1"}), rgb(.5,0,0,#{@o[:transparency] ? ".5" : "1"})), pch=19, cex=1/4);"
-      puts "Plotting windows." unless @o[:q]
-      if some_thr
-	 data.rrun "arrows(x0=w$V1, x1=w$V2, y0=w$V5, lwd=2, length=0)"
-	 data.rrun "arrows(x0=w$V2[-nrow(w)], x1=w$V1[-1], y0=w$V5[-nrow(w)], y1=w$V5[-1], lwd=2, length=0)"
-      end
-      data.rrun "legend('bottomright',legend=c('Match span','Match mid-point','Reference','Non-reference')," +
-	 "lwd=c(1,NA,1,1),pch=c(NA,19,19,19),col=c('black','black','darkblue','darkred'),ncol=4,bty='n')"
-      puts "Plotting alignment." unless @o[:q]
-      data.rrun "par(mar=c(0,4,0.5,0.5)+0.1);"
-      data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}),ylim=c(1,#{data.aln.seqs.size}),xlab='',ylab='Alignment',xaxs='i',xaxt='n',yaxs='i',yaxt='n',bty='n');"
-      i = 0
-      data.rrun "clr <- rainbow(26, v=1/2, s=3/4);" if @o[:color]
-      data.aln.seqs.values.each do |s|
-         color = s.aln.split(//).map{|c| c=="-" ? "'grey80'" : (@o[:sbj].include?(s.id) ? "'red'" : (@o[:color] ? "clr[#{c.ord-64}]" : "'black'"))}.join(',')
-	 data.rrun "rect((1:#{data.aln.cols-1})-0.5, rep(#{i}, #{data.aln.cols-1}), (1:#{data.aln.cols-1})+0.5, rep(#{i+1}, #{data.aln.cols-1}), col=c(#{color}), border=NA);"
-	 i += 1
-      end
-      puts "Plotting statistics." unless @o[:q]
-      data.rrun "par(mar=c(5,4,0,0.5)+.1);"
-      data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}),ylim=c(#{@o[:ylim].nil? ? (@o[:impact] ? "-2,.1" : "50,100") : @o[:ylim]}),xlab='Alignment position (amino acids)',ylab='Precision',xaxs='i');"
-      if some_thr
-	 sn = data.rrun "100*sum(w$tp)/(sum(w$tp)+sum(w$fn))", :float
-	 sp = data.rrun "100*sum(w$tn)/(sum(w$fp)+sum(w$tn))", :float
-	 ac = data.rrun "100*(sum(w$tp)+sum(w$tn))/(sum(w$p)+sum(w$n))", :float
-	 unless @o[:q]
-	    puts "  * sensitivity: #{sn}%"
-	    puts "  * specificity: #{sp}%"
-	    puts "  * accuracy: #{ac}%"
-	 end
-	 data.rrun "pos <- (w$V1+w$V2)/2"
-	 if @o[:impact]
-	    data.rrun "lines(pos[!is.na(w$specificity)], (w$specificity[!is.na(w$specificity)]-#{sp})*w$tp[!is.na(w$specificity)]/sum(w$tp), col='darkred', lwd=2, t='o', cex=1/3, pch=19);"
-	    data.rrun "lines(pos[!is.na(w$sensitivity)], (w$sensitivity[!is.na(w$sensitivity)]-#{sn})*w$tn[!is.na(w$sensitivity)]/sum(w$tn), col='darkgreen', lwd=2, t='o', cex=1/3, pch=19);"
-	    data.rrun "lines(pos[!is.na(w$accuracy)], (w$accuracy[!is.na(w$accuracy)]-#{ac})*(w$tp+w$tn)[!is.na(w$accuracy)]/sum(c(w$tp, w$tn)), col='darkblue', lwd=2, t='o', cex=1/3, pch=19);"
-	 else
-	    data.rrun "lines(pos[!is.na(w$specificity)], w$specificity[!is.na(w$specificity)], col='darkred', lwd=2, t='o', cex=1/3, pch=19);"
-	    data.rrun "lines(pos[!is.na(w$sensitivity)], w$sensitivity[!is.na(w$sensitivity)], col='darkgreen', lwd=2, t='o', cex=1/3, pch=19);"
-	    data.rrun "lines(pos[!is.na(w$accuracy)], w$accuracy[!is.na(w$accuracy)], col='darkblue', lwd=2, t='o', cex=1/3, pch=19);"
-	 end
-	 #data.rrun "lines(pos[!is.na(w$precision)], w$precision[!is.na(w$precision)], col='purple', lwd=2, t='o', cex=1/3, pch=19);"
-      end
-      data.rrun "legend('bottomright',legend=c('Specificity','Sensitivity','Accuracy'),lwd=2,col=c('darkred','darkgreen','darkblue'),ncol=3,bty='n')"
-      data.rrun "dev.off();"
-   end # plot!
    #================================[ Utilities ]
    def blast2table(blast_f, table_f, aln, minscore)
       ifh = File.open(blast_f, "r")
@@ -432,39 +44,6 @@ class ROCker
       ifh.close
       ofh.close
    end
-   def genes2genomes(gene_ids)
-      genomes = []
-      ids = Array.new(gene_ids)
-      while ids.size>0
-	 doc = ebiFetch(:uniprotkb, ids.shift(200), :annot).split("\n")
-	 genomes += doc.grep( /^DR\s+EMBL;/ ).map{ |ln| ln.split('; ')[1] }
-      end
-      genomes.uniq
-   end
-   def genome2taxid(genome_id)
-      ln = ebiFetch('embl', [genome_id], 'annot').split(/[\n\r]/).grep(/^FT\s+\/db_xref="taxon:/).first
-      return ln if ln.nil?
-      ln.sub(/.*"taxon:(\d+)".*/, "\\1")
-   end
-   def genome2taxon(genome_id, rank='species')
-      xml = ebiFetch('taxonomy', [genome2taxid(genome_id)], 'enataxonomyxml').gsub(/\s*\n\s*/,'')
-      xml.scan(/<taxon [^>]+>/).grep(/rank="#{rank}"/).first.sub(/.* taxId="(\d+)".*/,"\\1")
-   end
-   def restcall(url, outfile=nil)
-      response = RestClient.get url
-      raise "Unable to reach EBI REST client, error code #{response.code}." unless response.code == 200
-      unless outfile.nil?
-	 ohf = File.open(outfile, 'w')
-	 ohf.print response.to_s
-	 ohf.close
-      end
-      response.to_s
-   end
-   def ebiFetch(db, ids, format, outfile=nil)
-      url = "#{ROCker.ebirest}/dbfetch/dbfetch/#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
-      $stderr.puts "   # Calling: #{url}" if @o[:debug]
-      self.restcall url
-   end
    def bash(cmd, err_msg=nil)
       o = `#{cmd} 2>&1 && echo '{'`
       raise (err_msg.nil? ? "Error executing: #{cmd}\n\n#{o}" : err_msg) unless o[-2]=='{'
@@ -473,6 +52,14 @@ class ROCker
 end
 #================================[ Extensions ]
+# To ROCker
+require 'rocker/step/build'
+require 'rocker/step/compile'
+require 'rocker/step/search'
+require 'rocker/step/filter'
+require 'rocker/step/plot'
+# To other
 class Numeric
    def ordinalize
       n= self.to_s

data/lib/rocker/step/build.rb ADDED

@@ -0,0 +1,389 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jun-05-2015
+#
+require 'json'
+class ROCker
+   #================================[ Class ]
+   @@EBIREST = 'http://www.ebi.ac.uk/Tools'
+   @@DEFAULTS.merge!({:positive=>[], :negative=>[], :genomefrx=>1.0, :seqdepth=>0.03, :readlen=>100, :minovl=>50,
+      # Ext. Software
+      :aligner=>:clustalo, :simulator=>:grinder,
+      :simulatorbin=>{:grinder=>'grinder'},
+      :simulatorcmd=>{:grinder=>'%1$s -reference_file "%2$s" -cf "%3$f" -dc \'-~*NnKkMmRrYySsWwBbVvHhDdXx\' -md uniform 0.1 -mr 95 5 -rd %4$d uniform 5 -base_name "%5$s"'},
+      :alignerbin=>{:muscle=>'muscle', :clustalo=>'clustalo'},
+      :alignercmd=>{:muscle=>'%1$s -in "%2$s" -out "%3$s" -quiet', :clustalo=>'%1$s -i "%2$s" -o "%3$s" --threads=%4$d --force'}
+   })
+   @@HAS_BUILD_GEMS = nil
+   def self.ebirest() @@EBIREST ; end
+   def self.has_build_gems?
+      return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
+      @@HAS_BUILD_GEMS = TRUE
+      begin
+	 require 'rubygems'
+	 require 'restclient'
+      rescue LoadError
+	 @@HAS_BUILD_GEMS = FALSE
+      end
+      @@HAS_BUILD_GEMS
+   end
+   #================================[ Utilities ]
+   def genes2genomes(gene_ids)
+      genomes = []
+      ids = Array.new(gene_ids)
+      while ids.size>0
+	 doc = ebiFetch(:uniprotkb, ids.shift(200), :annot).split("\n")
+	 genomes += doc.grep( /^DR\s+EMBL;/ ).map do |ln|
+	    r=ln.split('; ')
+	    {:genome_id=>r[1], :transl_id=>r[2]}
+	 end
+      end
+      genomes.uniq
+   end
+   def genome2taxid(genome_id)
+      ln = ebiFetch('embl', [genome_id], 'annot').split(/[\n\r]/).grep(/^FT\s+\/db_xref="taxon:/).first
+      return ln if ln.nil?
+      ln.sub(/.*"taxon:(\d+)".*/, "\\1")
+   end
+   def genome2taxon(genome_id, rank='species')
+      xml = ebiFetch('taxonomy', [genome2taxid(genome_id)], 'enataxonomyxml').gsub(/\s*\n\s*/,'')
+      xml.scan(/<taxon [^>]+>/).grep(/rank="#{rank}"/).first.sub(/.* taxId="(\d+)".*/,"\\1")
+   end
+   def restcall(url, outfile=nil)
+      $stderr.puts "   # Calling: #{url}" if @o[:debug]
+      response = RestClient::Request.execute(:method=>:get,  :url=>url, :timeout=>600)
+      raise "Unable to reach EBI REST client, error code #{response.code}." unless response.code == 200
+      unless outfile.nil?
+	 ohf = File.open(outfile, 'w')
+	 ohf.print response.to_s
+	 ohf.close
+      end
+      response.to_s
+   end
+   def ebiFetch(db, ids, format, outfile=nil)
+      url = "#{ROCker.ebirest}/dbfetch/dbfetch/#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
+      res = self.restcall url
+      unless outfile.nil?
+	 ohf = File.open(outfile, 'w')
+	 ohf.print res
+	 ohf.close
+      end
+      res
+   end
+   def get_coords_from_gff3(genome_ids, protein_ids, transl_ids, thread_id, json_file)
+      positive_coords = {}
+      genomes_org = {}
+      i = 0
+      genome_ids.each do |genome_id|
+	 print "  * scanning #{(i+=1).ordinalize} genome out of #{genome_ids.size} in first thread.  \r" if thread_id==0 and not @o[:q]
+	 unless @o[:pertaxon].nil?
+	    genome_taxon = genome2taxon(genome_id, @o[:pertaxon])
+	    genomes_org[ genome_taxon.to_sym ] ||= []
+	    genomes_org[ genome_taxon.to_sym ] << genome_id
+	 end
+	 genome_file = @o[:baseout] + ".src." + genome_id + ".gff3"
+	 if @o[:reuse] and File.size? genome_file
+	    ifh = File.open(genome_file, 'r')
+	    doc = ifh.readlines.grep(/^[^#]/)
+	    ifh.close
+	 else
+	    genome_file=nil unless @o[:noclean]
+	    doc = ebiFetch(:embl, [genome_id], :gff3, genome_file).split("\n").grep(/^[^#]/)
+	 end
+	 doc.each do |ln|
+	    next if ln =~ /^#/
+	    r = ln.chomp.split /\t/
+	    next if r.size < 9
+	    prots = r[8].split(/;/).grep(/^db_xref=UniProtKB[\/A-Za-z-]*:/){ |xref| xref.split(/:/)[1] }
+	    p = prots.select{ |id| protein_ids.include? id }.first
+	    trans = r[8].split(/;/).grep(/^protein_id=/){ |pid| pid.split(/=/)[1] }
+	    t = trans.select{ |id|  transl_ids.include? id }.first
+	    next if p.nil? and t.nil?
+	    positive_coords[ r[0].to_sym ] ||= []
+	    positive_coords[ r[0].to_sym ] << {
+	       :prot_id	=> p,
+	       :tran_id => t,
+	       :from	=> r[3].to_i,
+	       :to	=> r[4].to_i,
+	       :strand	=> r[6]
+	    }
+	 end
+      end
+      print "\n" if thread_id==0 and not @o[:q]
+      ofh = File.open json_file, "w"
+      ofh.print({:positive_coords=>positive_coords, :genomes_org=>genomes_org}.to_json)
+      ofh.close
+   end
+   #================================[ Build ]
+   def build!
+      # Check requirements
+      puts "Testing environment." unless @o[:q]
+      @o[:searchcmd] = @o[:searchcmd][@o[:search]] if @o[:searchcmd].is_a? Hash
+      @o[:makedbcmd] = @o[:makedbcmd][@o[:search]] if @o[:makedbcmd].is_a? Hash
+      @o[:alignercmd] = @o[:alignercmd][@o[:aligner]] if @o[:alignercmd].is_a? Hash
+      @o[:simulatorcmd] = @o[:simulatorcmd][@o[:simulator]] if @o[:simulatorcmd].is_a? Hash
+      @o[:alignerbin] = @o[:alignerbin][@o[:aligner]] if @o[:alignerbin].is_a? Hash
+      @o[:simulatorbin] = @o[:simulatorbin][@o[:simulator]] if @o[:simulatorbin].is_a? Hash
+      @o[:nosearch]=true if @o[:nosimulate]
+      raise "Unsatisfied requirements, please see the help message (-h)." unless ROCker.has_build_gems?
+      @o[:positive] += @o[:posori] unless @o[:posori].nil?
+      @o[:positive] += File.readlines(@o[:posfile]).map{ |l| l.chomp } unless @o[:posfile].nil?
+      @o[:negative] += File.readlines(@o[:negfile]).map{ |l| l.chomp } unless @o[:negfile].nil?
+      unless @o[:aln].nil?
+         aln = Alignment.new
+	 aln.read_fasta @o[:aln]
+	 @o[:positive] += aln.get_ids
+      end
+      raise "-p or -P are mandatory." if @o[:positive].size==0
+      raise "-o/--baseout is mandatory." if @o[:baseout].nil?
+      if @o[:positive].size == 1 and not @o[:noaln]
+	 warn "\nWARNING: Positive set contains only one sequence, turning off alignment.\n\n"
+	 @o[:noaln] = true
+      end
+      unless @o[:nosimulate]
+	 self.bash "#{@o[:simulatorbin]} --version", "--simulator-bin must be executable. Is Grinder installed?" if @o[:simulator]==:grinder
+      end
+      unless @o[:noaln]
+	 self.bash "#{@o[:alignerbin]} -version", "--aligner-bin must be executable. Is Muscle installed?" if @o[:aligner]==:muscle
+	 self.bash "#{@o[:alignerbin]} --version", "--aligner-bin must be executable. Is ClustalOmega installed?" if @o[:aligner]==:clustalo
+      end
+      unless @o[:nosearch]
+	 self.bash "#{@o[:searchbins]}makeblastdb -version", "--search-bins must contain executables. Is BLAST+ installed?" if @o[:search]==:blast
+	 self.bash "#{@o[:searchbins]}diamond --help", "--search-bins must contain executables. Is DIAMOND installed?" if @o[:search]==:diamond
+      end
+      # Download genes
+      puts "Downloading gene data." unless @o[:q]
+      ref_file = @o[:baseout] + ".ref.fasta"
+      if @o[:posori].nil? and @o[:posfile].nil? and not @o[:aln].nil?
+	 puts "  * reusing aligned sequences as positive set." unless @o[:q]
+	 f = File.open(ref_file, "w")
+	 f.print aln.to_seq_s
+	 f.close
+	 @o[:noaln] = true
+      elsif @o[:reuse] and File.size? ref_file
+	 puts "  * reusing positive set: #{ref_file}." unless @o[:q]
+      else
+	 puts "  * downloading #{@o[:positive].size} sequence(s) in positive set." unless @o[:q]
+	 $stderr.puts "   # #{@o[:positive]}" if @o[:debug]
+	 ids = Array.new(@o[:positive])
+	 f = File.open(ref_file, "w")
+	 while ids.size>0
+	    f.print ebiFetch(:uniprotkb, ids.shift(200), :fasta)
+	 end
+	 f.close
+      end
+      genome_ids = {:positive=>[], :negative=>[]}
+      transl_ids = {:positive=>[], :negative=>[]}
+      [:positive, :negative].each do |set|
+         unless @o[set].size==0
+	    puts "  * linking genomes from #{@o[set].size} #{set.to_s} sequence(s)." unless @o[:q]
+	    $stderr.puts "   # #{@o[set]}" if @o[:debug]
+	    r = genes2genomes(@o[set])
+	    genome_ids[set] = r.map{|i| i[:genome_id]}.uniq
+	    transl_ids[set] = r.map{|i| i[:transl_id]}.uniq
+	 end
+      end
+      raise "No genomes associated with the positive set." if genome_ids[:positive].size==0
+      genome_ids[:positive] = genome_ids[:positive].sample( (genome_ids[:positive].size*@o[:genomefrx]).round ) if @o[:genomefrx]
+      raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if genome_ids[:positive].empty?
+      all_genome_ids = genome_ids.values.reduce(:+).uniq
+      # Locate genes
+      puts "Analyzing genome data." unless @o[:q]
+      coords_file = @o[:baseout] + ".src.coords"
+      if @o[:reuse] and File.size? coords_file
+	 puts "  * reusing coordinates: #{coords_file}." unless @o[:q]
+	 c = JSON.parse File.read(coords_file), {:symbolize_names=>true}
+	 positive_coords = c[:positive_coords]
+	 genome_org = c[:genome_org]
+      else
+	 thrs = [@o[:thr], genome_ids[:positive].size].min
+	 puts "  * downloading and parsing #{genome_ids[:positive].size} GFF3 document(s) in #{thrs} threads." unless @o[:q]
+	 $stderr.puts "   # Looking for proteins: #{@o[:positive]}" if @o[:debug]
+	 $stderr.puts "   # Looking for translations: #{transl_ids[:positive]}" if @o[:debug]
+	 $stderr.puts "   # Looking into: #{genome_ids[:positive]}" if @o[:debug]
+	 thr_obj = []
+	 (0 .. (thrs-1)).each do |thr_i|
+	    ids_to_parse = []
+	    (0 .. (genome_ids[:positive].size-1)).each do |i|
+	       ids_to_parse << genome_ids[:positive][i] if (i % thrs)==thr_i
+	    end
+	    json_file = @o[:baseout] + ".src.coords." + thr_i.to_s
+	    thr_obj << json_file
+	    fork do
+	       get_coords_from_gff3(ids_to_parse, @o[:positive], transl_ids[:positive], thr_i, json_file)
+	    end
+	 end
+	 Process.waitall
+	 # Combine results
+	 positive_coords = {}
+	 genomes_org = {}
+	 genome_org = {}
+	 thr_obj.each do |t|
+	    raise "Thread failed without error trace: #{t}" unless File.exist? t
+	    o = JSON.parse File.read(t), {:symbolize_names=>true, :create_additions=>true}
+	    o[:positive_coords].each_pair do |k,v|
+	       positive_coords[ k ] ||= []
+	       positive_coords[ k ] += v
+	    end
+	    o[:genomes_org].each_pair do |k,v|
+	       genomes_org[ k ] ||= []
+	       genomes_org[ k ] << v
+	    end
+	    File.unlink t
+	 end
+	 # Select one genome per taxon
+	 unless @o[:pertaxon].nil?
+	    genomes_org.each_pair{ |k,v| genome_org[ k ] = v.sample.first }
+	 end
+	 # Save coordinates
+	 ofh = File.open(coords_file, "w")
+	 ofh.print JSON.pretty_generate({:positive_coords=>positive_coords, :genome_org=>genome_org})
+	 ofh.close
+      end
+      unless @o[:pertaxon].nil?
+	 genome_ids[:positive] = genome_org.values
+	 puts "  Using #{genome_org.size} genome(s) after filtering by #{@o[:pertaxon]}." unless @o[:q]
+      end
+      all_genome_ids = genome_ids.values.reduce(:+).uniq
+      found = positive_coords.values.map{ |a| a.map{ |b| b[:prot_id] } }.reduce(:+).compact.uniq
+      unknown_pid = positive_coords.values.map{ |a| a.map{ |b| b[:prot_id].nil? ? b[:tran_id] : nil } }.reduce(:+).compact.uniq
+      raise "Cannot find the genomic location of any provided sequence." if found.nil?
+      missing = @o[:positive] - found
+      warn "\nWARNING: Cannot find genomic location of sequence(s) #{missing.join(',')}.\nMissing: #{missing.size}, Unlinked translations: #{unknown_pid.size}\n\n" unless missing.size==0 or missing.size==unknown_pid.size or @o[:genomefrx]<1.0
+      # Download genomes
+      genomes_file = @o[:baseout] + '.src.fasta'
+      if @o[:reuse] and File.size? genomes_file
+	 puts "  * reusing existing file: #{genomes_file}." unless @o[:q]
+      else
+	 puts "  * downloading #{all_genome_ids.size} genome(s) in FastA." unless @o[:q]
+	 $stderr.puts "   # #{all_genome_ids}" if @o[:debug]
+	 ids = Array.new(all_genome_ids)
+	 ofh = File.open(genomes_file, 'w')
+	 while ids.size>0
+	    ofh.print ebiFetch('embl', ids.shift(200), 'fasta')
+	 end
+	 ofh.close
+      end
+      # Generate metagenome
+      unless @o[:nosimulate]
+	 puts "Generating in silico metagenome" unless @o[:q]
+	 if @o[:reuse] and File.size? @o[:baseout] + ".mg.fasta"
+	    puts "  * reusing existing file: #{@o[:baseout]}.mg.fasta." unless @o[:q]
+	 else
+	    all_src = File.readlines("#{@o[:baseout]}.src.fasta").select{ |l| l =~ /^>/ }.size
+	    thrs = [@o[:thr], all_src].min
+	    puts "  * simulating metagenomes and tagging positive reads in #{thrs} threads." unless @o[:q]
+	    $stderr.puts "   # #{positive_coords}" if @o[:debug]
+	    thr_obj = []
+	    seqs_per_thr = (all_src/thrs).ceil
+	    (0 .. (thrs-1)).each do |thr_i|
+	       output = @o[:baseout] + ".mg.fasta.#{thr_i.to_s}"
+	       thr_obj << output
+	       fork do
+		  seqs_a = thr_i*seqs_per_thr + 1
+		  seqs_b = [seqs_a + seqs_per_thr, all_src].min
+		  # Create sub-fasta
+		  ofh = File.open("#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", "w")
+		  ifh = File.open("#{@o[:baseout]}.src.fasta", "r")
+		  seq_i = 0
+		  while l = ifh.gets
+		     seq_i+=1 if l =~ /^>/
+		     break if seq_i > seqs_b
+		     ofh.print l if seq_i >= seqs_a
+		  end
+		  ifh.close
+		  ofh.close
+		  # Run simulator (except if the temporal file is already there and can be reused)
+		  unless @o[:reuse] and File.size? @o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa"
+		     bash sprintf(@o[:simulatorcmd], @o[:simulatorbin], "#{@o[:baseout]}.src.fasta.#{thr_i.to_s}", @o[:seqdepth]*@o[:readlen].to_f, @o[:readlen], "#{@o[:baseout]}.mg.tmp.#{thr_i.to_s}")
+		  end
+		  # Tag positives
+		  puts "  * tagging positive reads [thread #{thr_i.to_s}]." unless @o[:q]
+		  ifh = File.open(@o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa", 'r')
+		  ofh = File.open(@o[:baseout] + ".mg.fasta.#{thr_i.to_s}", 'w')
+		  while l = ifh.gets
+		     if l =~ /^>/
+			rd = /^>(?<id>\d+) reference=[A-Za-z]+\|(?<genome_id>[A-Za-z0-9_]+)\|.* position=(?<comp>complement\()?(?<from>\d+)\.\.(?<to>\d+)\)? /.match(l)
+			raise "Cannot parse simulated read's defline, are you using Grinder?: #{l}" if rd.nil?
+			positive = false
+			positive_coords[rd[:genome_id].to_sym] ||= []
+			positive_coords[rd[:genome_id].to_sym].each do |gn|
+			   left  = rd[:to].to_i - gn[:from]
+			   right = gn[:to] - rd[:from].to_i
+			   if (left*right >= 0) and ([left, right].min >= @o[:minovl])
+			      positive = true
+			      break
+			   end
+			end
+			l = ">#{thr_i.to_s}_#{rd[:id]}#{positive ? "@%" : ""} " +
+			   "ref=#{rd[:genome_id]}:#{rd[:from]}..#{rd[:to]}#{(rd[:comp]=='complement(')?'-':'+'}\n"
+		     end
+		     ofh.print l
+		  end
+		  ofh.close
+		  ifh.close
+	       end # fork
+	    end # (1 .. thrs).each
+	    Process.waitall
+	    # Concatenate results
+	    ofh = File.open(@o[:baseout] + ".mg.fasta", 'w')
+	    thr_obj.each do |t|
+	       raise "Thread failed without error trace: #{t}" unless File.exist? t
+	       ifh = File.open(t, "r")
+	       while l = ifh.gets
+	          ofh.print l
+	       end
+	       ifh.close
+	       File.unlink t
+	    end
+	    ofh.close
+         end
+      end # unless @o[:nosimulate]
+      # Align references
+      unless @o[:noaln]
+	 puts "Aligning reference set." unless @o[:q]
+	 if @o[:reuse] and File.size? "#{@o[:baseout]}.ref.aln"
+	    puts "  * reusing existing file: #{@o[:baseout]}.ref.aln." unless @o[:q]
+	 else
+	    bash sprintf(@o[:alignercmd], @o[:alignerbin], "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref.aln", @o[:thr])
+	    puts "  +--\n  | IMPORTANT NOTE: Manually checking the alignment before\n  | the 'compile' step is *strongly* encouraged.\n  +--\n" unless @o[:q]
+	 end
+      end
+      # Run similarity search
+      unless @o[:nosearch]
+	 puts "Running homology search." unless @o[:q]
+	 if @o[:reuse] and File.size? "#{@o[:baseout]}.ref.blast"
+	    puts "  * reusing existing file: #{@o[:baseout]}.ref.blast." unless @o[:q]
+	 else
+	    puts "  * preparing database." unless @o[:q]
+	    bash sprintf(@o[:makedbcmd][@o[:search]], @o[:searchbins], 'prot', "#{@o[:baseout]}.ref.fasta", "#{@o[:baseout]}.ref")
+	    puts "  * running similarity search." unless @o[:q]
+	    bash sprintf(@o[:searchcmd][@o[:search]], @o[:searchbins], 'blastx', "#{@o[:baseout]}.mg.fasta", "#{@o[:baseout]}.ref", "#{@o[:baseout]}.ref.blast", @o[:thr])
+	 end
+      end
+      # Clean
+      unless @o[:noclean]
+	 puts "Cleaning." unless @o[:q]
+	 sff  = %w{.src.xml .src.fasta}
+	 sff += %w{.mg.tmp-reads.fa .mg.tmp-ranks.txt} unless @o[:nosimulate]
+	 sff += %w{.ref.phr .ref.pin .ref.psq} unless @o[:nosearch]
+	 sff.each { |sf| File.unlink @o[:baseout] + sf if File.exist? @o[:baseout] + sf }
+      end
+   end # build!
+end # ROCker

data/lib/rocker/step/compile.rb ADDED

@@ -0,0 +1,53 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jun-05-2015
+#
+class ROCker
+   #================================[ Class ]
+   @@DEFAULTS.merge!({:refine=>true, :win=>20, :minscore=>0})
+   #================================[ Compile ]
+   def compile!
+      raise "-a/--alignment is mandatory." if @o[:aln].nil?
+      raise "-a/--alignment must exist." unless File.exist? @o[:aln]
+      if @o[:table].nil?
+	 raise "-t/--table is mandatory unless -b is provided." if @o[:blast].nil? or not File.exist? @o[:blast]
+	 @o[:table] = "#{@o[:blast]}.table"
+      else
+	 @o[:reuse] = true
+      end
+      raise "-b/--blast is mandatory unless -t exists." if @o[:blast].nil? and not File.exist? @o[:table]
+      raise "-k/--rocker is mandatory." if @o[:rocker].nil?
+      puts "Testing environment." unless @o[:q]
+      bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be executable. Is R installed?"
+      bash "echo \"library('pROC')\" | #{@o[:r]} --vanilla", "Please install the 'pROC' library for R first."
+      puts "Reading files." unless @o[:q]
+      puts "  * loading alignment: #{@o[:aln]}." unless @o[:q]
+      aln = Alignment.new
+      aln.read_fasta @o[:aln]
+      if @o[:reuse] and File.exist? @o[:table]
+	 puts "  * reusing existing file: #{@o[:table]}." unless @o[:q]
+      else
+	 puts "  * generating table: #{@o[:table]}." unless @o[:q]
+	 blast2table(@o[:blast], @o[:table], aln, @o[:minscore])
+      end
+      puts "Analyzing data." unless @o[:q]
+      puts "  * computing windows." unless @o[:q]
+      data = ROCData.new(@o[:table], aln, @o[:win])
+      data.nucl = @o[:nucl]
+      if @o[:refine]
+	 puts "  * refining windows." unless @o[:q]
+	 warn "Insufficient hits to refine results." unless data.refine! @o[:table]
+      end
+      puts "  * saving ROCker file: #{@o[:rocker]}." unless @o[:q]
+      data.save @o[:rocker]
+   end # compile!
+end # ROCker

data/lib/rocker/step/filter.rb ADDED

@@ -0,0 +1,32 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jun-04-2015
+#
+class ROCker
+   #================================[ Class ]
+   #@@DEFAULTS.merge!({  })
+   #================================[ Filter ]
+   def filter!
+      raise "-k/--rocker is mandatory." if @o[:rocker].nil?
+      raise "-x/--query-blast is mandatory." if @o[:qblast].nil?
+      raise "-o/--out-blast is mandatory." if @o[:oblast].nil?
+      puts "Reading ROCker file." unless @o[:q]
+      data = ROCData.new @o[:rocker]
+      puts "Filtering BLAST." unless @o[:q]
+      ih = File.open(@o[:qblast], 'r')
+      oh = File.open(@o[:oblast], 'w')
+      while ln = ih.gets
+	 bh = BlastHit.new(ln, data.aln)
+	 oh.print ln if not(bh.sfrom.nil?) and bh.bits >= data.win_at_col(bh.midpoint).thr
+      end
+      ih.close
+      oh.close
+   end # filter!
+end # ROCker

data/lib/rocker/step/plot.rb ADDED

@@ -0,0 +1,93 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jun-04-2015
+#
+class ROCker
+   #================================[ Class ]
+   @@DEFAULTS.merge!({:color=>false, :gformat=>'pdf', :width=>9, :height=>9, :impact=>false, :transparency=>true, :sbj=>[]})
+   #================================[ Search ]
+   def plot!
+      raise "-k/--rocker is mandatory." if o[:rocker].nil?
+      if @o[:table].nil?
+	 raise "-t/--table is mandatory unless -b is provided." if @o[:blast].nil?
+	 @o[:table] = "#{@o[:blast]}.table"
+      end
+      raise "-b/--blast is mandatory unless -t exists." if @o[:blast].nil? and not File.exist? @o[:table]
+      puts "Testing environment." unless @o[:q]
+      bash "echo '' | #{@o[:r]} --vanilla", "-r/--path-to-r must be executable. Is R installed?"
+      puts "Reading files." unless @o[:q]
+      puts "  * loding ROCker file: #{@o[:rocker]}." unless @o[:q]
+      data = ROCData.new @o[:rocker]
+      if File.exist? @o[:table]
+	 puts "  * reusing existing file: #{@o[:table]}." unless @o[:q]
+      else
+	 puts "  * generating table: #{@o[:table]}." unless @o[:q]
+	 blast2table(@o[:blast], @o[:table], data.aln, @o[:minscore])
+      end
+      puts "Plotting matches." unless @o[:q]
+      extra = @o[:gformat]=='pdf' ? "" : ", units='in', res=300"
+      @o[:gout] ||= "#{@o[:rocker]}.#{@o[:gformat]}"
+      data.rrun "#{@o[:gformat]}('#{@o[:gout]}', #{@o[:width]}, #{@o[:height]}#{extra});"
+      data.rrun "layout(c(2,1,3), heights=c(2-1/#{data.aln.size},3,1));"
+      some_thr = data.load_table! @o[:table], @o[:sbj], @o[:minscore]
+      data.rrun "par(mar=c(0,4,0,0.5)+.1);"
+      data.rrun "plot(1, t='n', xlim=c(0.5,#{data.aln.cols}+0.5), ylim=range(x$V4)+c(-0.04,0.04)*diff(range(x$V4)), xlab='', ylab='Bit score', xaxs='i', xaxt='n');"
+      data.rrun "noise <- runif(ncol(x),-.2,.2)"
+      data.rrun "arrows(x0=x$V2, x1=x$V3, y0=x$V4+noise, col=ifelse(x$V5==1, rgb(0,0,.5,#{@o[:transparency] ? ".2" : "1"}), rgb(.5,0,0,#{@o[:transparency] ? ".2" : "1"})), length=0);"
+      data.rrun "points(x$V6, x$V4+noise, col=ifelse(x$V5==1, rgb(0,0,.5,#{@o[:transparency] ? ".5" : "1"}), rgb(.5,0,0,#{@o[:transparency] ? ".5" : "1"})), pch=19, cex=1/4);"
+      puts "Plotting windows." unless @o[:q]
+      if some_thr
+	 data.rrun "arrows(x0=w$V1, x1=w$V2, y0=w$V5, lwd=2, length=0)"
+	 data.rrun "arrows(x0=w$V2[-nrow(w)], x1=w$V1[-1], y0=w$V5[-nrow(w)], y1=w$V5[-1], lwd=2, length=0)"
+      end
+      data.rrun "legend('bottomright',legend=c('Match span','Match mid-point','Reference','Non-reference')," +
+	 "lwd=c(1,NA,1,1),pch=c(NA,19,19,19),col=c('black','black','darkblue','darkred'),ncol=4,bty='n')"
+      puts "Plotting alignment." unless @o[:q]
+      data.rrun "par(mar=c(0,4,0.5,0.5)+0.1);"
+      data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}),ylim=c(1,#{data.aln.seqs.size}),xlab='',ylab='Alignment',xaxs='i',xaxt='n',yaxs='i',yaxt='n',bty='n');"
+      i = 0
+      data.rrun "clr <- rainbow(26, v=1/2, s=3/4);" if @o[:color]
+      data.aln.seqs.values.each do |s|
+         color = s.aln.split(//).map{|c| c=="-" ? "'grey80'" : (@o[:sbj].include?(s.id) ? "'red'" : (@o[:color] ? "clr[#{c.ord-64}]" : "'black'"))}.join(',')
+	 data.rrun "rect((1:#{data.aln.cols-1})-0.5, rep(#{i}, #{data.aln.cols-1}), (1:#{data.aln.cols-1})+0.5, rep(#{i+1}, #{data.aln.cols-1}), col=c(#{color}), border=NA);"
+	 i += 1
+      end
+      puts "Plotting statistics." unless @o[:q]
+      data.rrun "par(mar=c(5,4,0,0.5)+.1);"
+      data.rrun "plot(1, t='n', xlim=c(0,#{data.aln.cols}),ylim=c(#{@o[:ylim].nil? ? (@o[:impact] ? "-2,.1" : "50,100") : @o[:ylim]}),xlab='Alignment position (amino acids)',ylab='Precision',xaxs='i');"
+      if some_thr
+	 sn = data.rrun "100*sum(w$tp)/(sum(w$tp)+sum(w$fn))", :float
+	 sp = data.rrun "100*sum(w$tn)/(sum(w$fp)+sum(w$tn))", :float
+	 ac = data.rrun "100*(sum(w$tp)+sum(w$tn))/(sum(w$p)+sum(w$n))", :float
+	 unless @o[:q]
+	    puts "  * sensitivity: #{sn}%"
+	    puts "  * specificity: #{sp}%"
+	    puts "  * accuracy: #{ac}%"
+	 end
+	 data.rrun "pos <- (w$V1+w$V2)/2"
+	 if @o[:impact]
+	    data.rrun "lines(pos[!is.na(w$specificity)], (w$specificity[!is.na(w$specificity)]-#{sp})*w$tp[!is.na(w$specificity)]/sum(w$tp), col='darkred', lwd=2, t='o', cex=1/3, pch=19);"
+	    data.rrun "lines(pos[!is.na(w$sensitivity)], (w$sensitivity[!is.na(w$sensitivity)]-#{sn})*w$tn[!is.na(w$sensitivity)]/sum(w$tn), col='darkgreen', lwd=2, t='o', cex=1/3, pch=19);"
+	    data.rrun "lines(pos[!is.na(w$accuracy)], (w$accuracy[!is.na(w$accuracy)]-#{ac})*(w$tp+w$tn)[!is.na(w$accuracy)]/sum(c(w$tp, w$tn)), col='darkblue', lwd=2, t='o', cex=1/3, pch=19);"
+	 else
+	    data.rrun "lines(pos[!is.na(w$specificity)], w$specificity[!is.na(w$specificity)], col='darkred', lwd=2, t='o', cex=1/3, pch=19);"
+	    data.rrun "lines(pos[!is.na(w$sensitivity)], w$sensitivity[!is.na(w$sensitivity)], col='darkgreen', lwd=2, t='o', cex=1/3, pch=19);"
+	    data.rrun "lines(pos[!is.na(w$accuracy)], w$accuracy[!is.na(w$accuracy)], col='darkblue', lwd=2, t='o', cex=1/3, pch=19);"
+	 end
+	 #data.rrun "lines(pos[!is.na(w$precision)], w$precision[!is.na(w$precision)], col='purple', lwd=2, t='o', cex=1/3, pch=19);"
+      end
+      data.rrun "legend('bottomright',legend=c('Specificity','Sensitivity','Accuracy'),lwd=2,col=c('darkred','darkgreen','darkblue'),ncol=3,bty='n')"
+      data.rrun "dev.off();"
+   end # plot!
+end # ROCker

data/lib/rocker/step/search.rb ADDED

@@ -0,0 +1,20 @@
+#
+# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @author Luis (Coto) Orellana
+# @license artistic license 2.0
+# @update Jun-04-2015
+#
+class ROCker
+   #================================[ Class ]
+   #@@DEFAULTS.merge!({  })
+   #================================[ Search ]
+   def search!
+      raise "-k/--rocker is mandatory." if @o[:rocker].nil?
+      raise "Code Under development..."
+      # ToDo
+      # [ ... ]
+   end # search!
+end # ROCker

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: bio-rocker
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 1.0.0
 platform: ruby
 authors:
 - Luis (Coto) Orellana
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-05-07 00:00:00.000000000 Z
+date: 2015-06-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rest-client
@@ -25,6 +25,20 @@ dependencies:
     - - ~>
       - !ruby/object:Gem::Version
         version: 1.7.3
+- !ruby/object:Gem::Dependency
+  name: json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.8.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.8.1
 description: Detecting and quantifying functional genes in short-read metagenomic
   datasets
 email: lhorellana@gatech.edu
@@ -40,6 +54,11 @@ files:
 - lib/rocker/rocwindow.rb
 - lib/rocker/rocdata.rb
 - lib/rocker/rinterface.rb
+- lib/rocker/step/build.rb
+- lib/rocker/step/compile.rb
+- lib/rocker/step/search.rb
+- lib/rocker/step/filter.rb
+- lib/rocker/step/plot.rb
 - bin/ROCker
 homepage: http://enve-omics.ce.gatech.edu/rocker
 licenses:
@@ -51,9 +70,9 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ~>
     - !ruby/object:Gem::Version
-      version: '0'
+      version: '2.0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - '>='