RubyGems - miga-base - Versions diffs - 0.3.1.7 → 0.3.2.0 - Mend

miga-base 0.3.1.7 → 0.3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

checksums.yaml +4 -4
data/actions/ncbi_get.rb +8 -0
data/lib/miga/common.rb +9 -215
data/lib/miga/common/base.rb +49 -0
data/lib/miga/common/format.rb +135 -0
data/lib/miga/common/path.rb +49 -0
data/lib/miga/daemon.rb +3 -60
data/lib/miga/daemon/base.rb +69 -0
data/lib/miga/dataset.rb +3 -3
data/lib/miga/dataset/result.rb +5 -5
data/lib/miga/result.rb +5 -0
data/lib/miga/version.rb +7 -5
data/scripts/distances.bash +2 -19
data/scripts/taxonomy.bash +2 -21
data/test/common_test.rb +9 -0
data/utils/distance/base.rb +6 -0
data/utils/distance/commands.rb +82 -0
data/utils/distance/database.rb +86 -0
data/utils/distance/pipeline.rb +98 -0
data/utils/distance/runner.rb +104 -0
data/utils/distance/temporal.rb +37 -0
data/utils/distances.rb +9 -0
data/utils/enveomics/Docs/recplot2.md +233 -0
data/utils/enveomics/Makefile +1 -1
data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
data/utils/enveomics/Manifest/categories.json +11 -1
data/utils/enveomics/Manifest/examples.json +2 -2
data/utils/enveomics/README.md +2 -0
data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
data/utils/enveomics/Scripts/SRA.download.bash +28 -21
data/utils/enveomics/Scripts/Table.barplot.R +1 -0
data/utils/enveomics/Scripts/aai.rb +4 -2
data/utils/enveomics/build_enveomics_r.bash +5 -5
data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
data/utils/enveomics/enveomics.R/README.md +26 -17
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
data/utils/requirements.txt +1 -1
metadata +28 -4
data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18

data/utils/enveomics/Scripts/FastA.interpose.pl CHANGED

@@ -1,43 +1,49 @@
 #!/usr/bin/env perl
-# Interpose sequences in FastA format from two files into one output file.  If more than two files are
-# provided, the script will interpose all the input files.
-# Please note that this script will check for the consistency of the names (assuming a pair of related reads
-# contains the same name varying only in a trailing slash (/) followed by a digit.  If you want to turn this
-# feature off just set the $eval_T variable to zero.  If you want to decrease the sampling period (to speed
-# the script up) or increase it (to make it more sensitive to errors) just change $eval_T accordingly.
-#
 # @author Luis M. Rodriguez-R
-# @version 1.0
-# @created Nov-27-2012
-# @update Mar-23-2015
 # @license artistic license 2.0
-#
-# Usage: FastQ.interpose.pl <output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
 use strict;
 use warnings;
 use Symbol;
 my $HELP = <<HELP
+  Description:
+    Interposes sequences in FastA format from two files into one output file.
+    If more than two files are provided, the script will interpose all the input
+    files.
+    Note that this script will check for the consistency of the names (assuming
+    a pair of related reads contains the same name varying only in a trailing
+    slash (/) followed by a digit.  If you want to turn this feature off just
+    set the -T option to zero.  If you want to decrease the sampling period (to
+    speed the script up) or increase it (to make it more sensitive to errors)
+    just change the -T option accordingly.
   Usage:
-     $0 <output_fasta> <input_fasta_1> <input_fasta_2> [additional input files...]
+    $0 [-T <int> ]<output_fasta> <input_fasta_1> <input_fasta_2> [additional input files...]
   Where,
-     output_fasta	: Output file
-     input_fasta_1	: First FastA file
-     input_fasta_2	: Second FastA file
-     ...		: Any additional FastA files (or none)
+    -T <int>		: Optional.  Integer indicating the sampling period for
+    			  names evaluation (see Description above).
+			  By default: 1000.
+    output_fasta	: Output file
+    input_fasta_1	: First FastA file
+    input_fasta_2	: Second FastA file
+    ... 		: Any additional FastA files (or none)
 HELP
 ;
-my $eval_T = 1000;	# Period (in number of entries) of evaluation for consistency of the names.
-			# To turn off evaluation set to 0 (zero).
+my $eval_T = 1000;
+if(exists $ARGV[0] and exists $ARGV[1] and $ARGV[0] eq '-T'){
+   $eval_T = $ARGV[1]+0;
+   shift @ARGV;
+   shift @ARGV;
+}
 my $out = shift @ARGV;
 my @in = @ARGV;
 $/ = "\n>";
 die $HELP unless $out and $#in >= 1;
 open OUT, ">", $out or die "Unable to write on $out: $!\n";
 print "Output file: $out\n";

data/utils/enveomics/Scripts/FastQ.interpose.pl CHANGED

@@ -1,11 +1,7 @@
 #!/usr/bin/env perl
-#
 # @author Luis M. Rodriguez-R
-# @version 2.0
-# @update: Mar-23-2015
 # @license artistic license 2.0
-#
-# Usage: FastQ.interpose.pl <output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
 use strict;
 use warnings;
@@ -14,24 +10,27 @@ use Symbol;
 my $HELP = <<HELP
   Description:
-    Interposes sequences in FastQ format from two files into one output file.  If more than two files are
-    provided, the script will interpose all the input files.
-    Note that this script will check for the consistency of the names (assuming a pair of related reads
-    contains the same name varying only in a trailing slash (/) followed by a digit.  If you want to turn
-    this feature off just set the -T option to zero.  If you want to decrease the sampling period
-    (to speed the script up) or increase it (to make it more sensitive to errors) just change -T option
-    accordingly.
+    Interposes sequences in FastQ format from two files into one output file.
+    If more than two files are provided, the script will interpose all the input
+    files.
+    Note that this script will check for the consistency of the names (assuming
+    a pair of related reads contains the same name varying only in a trailing
+    slash (/) followed by a digit.  If you want to turn this feature off just
+    set the -T option to zero.  If you want to decrease the sampling period (to
+    speed the script up) or increase it (to make it more sensitive to errors)
+    just change the -T option accordingly.
   Usage:
-     $0 [-T <int> ]<output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
+    $0 [-T <int> ]<output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
   Where,
-     -T <int>		: Optional.  Integer indicating the sampling period for names evaluation (see
-     			  Description above).  By default: 1000.
-     output_fastq	: Output file
-     input_fastq_1	: First FastQ file
-     input_fastq_2	: Second FastQ file
-     ...		: Any additional FastQ files (or none)
+    -T <int>		: Optional.  Integer indicating the sampling period for
+    			  names evaluation (see Description above).
+			  By default: 1000.
+    output_fastq	: Output file
+    input_fastq_1	: First FastQ file
+    input_fastq_2	: Second FastQ file
+    ... 		: Any additional FastQ files (or none)
 HELP
 ;
@@ -44,6 +43,7 @@ if(exists $ARGV[0] and exists $ARGV[1] and $ARGV[0] eq '-T'){
 my $out = shift @ARGV;
 my @in = @ARGV;
 die $HELP unless $out and $#in >= 1;
 open OUT, ">", $out or die "Unable to write on $out: $!\n";
 print "Output file: $out\n";

data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R ADDED

@@ -0,0 +1,32 @@
+#!/usr/bin/env Rscript
+#
+# @author  Luis M. Rodriguez-R
+# @update  Jan-04-2016
+# @license artistic license 2.0
+#
+#= Load stuff
+args <- commandArgs(trailingOnly = F)
+enveomics_R <- file.path(dirname(
+   sub("^--file=", "", args[grep("^--file=", args)])),
+   "lib", "enveomics.R")
+library(methods)
+source(file.path(enveomics_R, "R", "cliopts.R"))
+source(file.path(enveomics_R, "R", "recplot2.R"))
+#= Generate interface
+opt <- enve.cliopts(enve.recplot2.compareIdentities,
+   file.path(enveomics_R, "man", "enve.recplot2.compareIdentities.Rd"),
+   positional_arguments=2,
+   usage="usage: %prog [options] recplot-A.Rdata recplot-B.Rdata",
+   number=c("pseudocounts", "max.deviation"), ignore=c("x", "y"),
+   p_desc="Calculates the difference between identity distributions of two recruitment plots.")
+#= Run it!
+load(opt$args[1])
+opt$options[['x']] <- rp
+load(opt$args[2])
+opt$options[['y']] <- rp
+dist <- do.call("enve.recplot2.compareIdentities", opt$options)
+cat(dist, '\n')

data/utils/enveomics/Scripts/SRA.download.bash CHANGED

@@ -2,14 +2,13 @@
 #
 # @author  Luis M. Rodriguez-R
-# @update  Nov-27-2015
 # @license artistic license 2.0
 #
-DATA_LINK="http://www.ebi.ac.uk/ena/data/warehouse/filereport"
+DATA_LINK="https://www.ebi.ac.uk/ena/data/warehouse/filereport"
 DATA_OPS="result=read_run&fields=run_accession,fastq_ftp,fastq_md5"
 SRX=$1
-DIR=${1:-$SRX}
+DIR=${2:-$SRX}
 if [[ "$SRX" == "" ]] ; then
 echo "
@@ -27,24 +26,32 @@ fi
 [[ -d "$DIR" ]] || mkdir "$DIR"
+function md5value {
+  local file=$1
+  o=$(md5 "$file" | perl -pe 's/.* //')
+  [[ -n $o ]] || o=$(md5sum-lite "$file" | awk '{print $1}')
+  [[ -n $o ]] || o=$(md5sum "$file" | awk '{print $1}')
+  echo "$o"
+}
 curl -s "$DATA_LINK?$DATA_OPS&accession=$SRX" -o "$DIR/srr_list.txt"
 tail -n +2 "$DIR/srr_list.txt" | while read ln ; do
-   srr=$(echo "$ln"|cut -f 1)
-   ftp=$(echo "$ln"|cut -f 2)
-   md5=$(echo "$ln"|cut -f 3)
-   dir="$DIR/$srr"
-   [[ -d "$dir" ]] || mkdir "$dir"
-   echo "o $srr" >&2
-   for uri in $(echo "$ftp" | tr ";" " ") ; do
-      file="$dir/$(basename $uri)"
-      curl "$uri" -o "$file"
-      md5obs=$(md5sum "$file" | awk '{print $1}')
-      if [[ "$md5" == "$md5obs"* ]] ; then
-	 md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
-      else
-	 echo "Corrupt file: $file" >&2
-	 echo "  MD5 mismatch: $md5obs not in $md5" >&2
-	 exit 1;
-      fi
-   done
+  srr=$(echo "$ln"|cut -f 1)
+  ftp=$(echo "$ln"|cut -f 2)
+  md5=$(echo "$ln"|cut -f 3)
+  dir="$DIR/$srr"
+  [[ -d "$dir" ]] || mkdir "$dir"
+  echo "o $srr" >&2
+  for uri in $(echo "$ftp" | tr ";" " ") ; do
+    file="$dir/$(basename $uri)"
+    curl "$uri" -o "$file"
+    md5obs=$(md5value "$file")
+    if [[ "$md5" == "$md5obs"* ]] ; then
+      md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
+    else
+      echo "Corrupt file: $file" >&2
+      echo "  MD5 mismatch: $md5obs not in $md5" >&2
+      exit 1;
+    fi
+  done
 done

data/utils/enveomics/Scripts/Table.barplot.R CHANGED

@@ -11,6 +11,7 @@ enveomics_R <- file.path(dirname(
    sub("^--file=", "", args[grep("^--file=", args)])),
    "lib", "enveomics.R")
 source(file.path(enveomics_R, "R", "cliopts.R"))
+source(file.path(enveomics_R, "R", "utils.R"))
 source(file.path(enveomics_R, "R", "barplot.R"))
 #= Generate interface

data/utils/enveomics/Scripts/aai.rb CHANGED

@@ -296,8 +296,10 @@ Dir.mktmpdir do |dir|
       "#{dir}/#{i}.tab.uns"`
       `sort -k 1 "#{dir}/#{i}.tab.uns" > "#{dir}/#{i}.tab"`
     when "diamond"
-      `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --outfmt 6 \
-      --db "#{s}.dmnd" --query "#{q}" --out "#{dir}/#{i}.tab" --more-sensitive`
+      `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
+      --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" \
+      && "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt 6 \
+      --out "#{dir}/#{i}.tab"`
     else
       abort "Unsupported program: #{o[:program]}."
     end

data/utils/enveomics/build_enveomics_r.bash CHANGED

@@ -34,11 +34,11 @@ library(inlinedocs)
 package.skeleton.dx('./');
 " | R --vanilla
 cat man/enveomics.R-package.Rd | tr -d '\r' \
-   | grep -v '^}$' | grep -v '^\\author{' \
-   | grep -v '^Maintainer' \
-   | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
-   | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
-   > o && mv o man/enveomics.R-package.Rd
+  | grep -v '^}$' | grep -v '^\\author{' \
+  | grep -v '^Maintainer' \
+  | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
+  | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
+  > o && mv o man/enveomics.R-package.Rd
 #[[ ! -d inst/doc ]] && mkdir -p inst/doc
 #pandoc -o inst/doc/enveomics.R.pdf -f markdown_github README.md

data/utils/enveomics/enveomics.R/DESCRIPTION CHANGED

@@ -1,5 +1,5 @@
 Package: enveomics.R
-Version: 1.1.6
+Version: 1.3
 Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
 	   email="lmrodriguezr@gmail.com"))
 Title: Various Utilities for Microbial Genomics and Metagenomics

data/utils/enveomics/enveomics.R/NAMESPACE CHANGED

@@ -10,7 +10,8 @@ importFrom("graphics", "abline", "axis", "barplot", "hist", "image",
   "layout", "legend", "lines", "par", "points", "polygon",
   "rect", "text")
 importFrom("stats", "as.dist", "cophenetic", "cor", "median",
-  "quantile", "runif", "smooth.spline", "nls", "nls.control", "qnorm")
+  "quantile", "runif", "smooth.spline", "nls", "nls.control", "qnorm",
+  "dnorm", "kmeans")
 importFrom("utils", "head", "read.table", "setTxtProgressBar", "tail",
   "txtProgressBar")
 exportClasses(enve.RecPlot2, enve.RecPlot2.Peak, enve.TRIBS, enve.TRIBStest,
@@ -29,7 +30,10 @@ export(
   plot.enve.TRIBStest, summary.enve.TRIBStest,
   enve.df2dist, enve.df2dist.group, enve.df2dist.list,
   enve.recplot2, plot.enve.RecPlot2, enve.recplot2.findPeaks,
+  enve.recplot2.findPeaks.emauto, enve.recplot2.findPeaks.em,
+  enve.recplot2.findPeaks.mower,
   enve.recplot2.corePeak, enve.recplot2.changeCutoff,
-  enve.recplot2.extractWindows,
+  enve.recplot2.extractWindows, enve.recplot2.compareIdentities,
+  enve.recplot2.coordinates, enve.recplot2.seqdepth, enve.recplot2.ANIr,
   enve.growthcurve, plot.enve.GrowthCurve, summary.enve.GrowthCurve,
   enve.col2alpha)

data/utils/enveomics/enveomics.R/R/recplot2.R CHANGED

@@ -9,7 +9,7 @@ setClass("enve.RecPlot2",
    id.counts='numeric',		##<< Counts per ID bin.
    id.breaks='numeric',		##<< Breaks of identity bins.
    pos.breaks='numeric',	##<< Breaks of position bins.
-   seq.breaks='numeric',
+   seq.breaks='numeric',	##<< Breaks of input sequences.
    peaks='list',                ##<< Peaks identified in the recplot.
    ### Limits of the subject sequences after concatenation.
    seq.names='character',	##<< Names of the subject sequences.
@@ -45,9 +45,13 @@ setClass("enve.RecPlot2.Peak",
    ### number of position bins with non-zero sequencing depth in the recruitment
    ### plot (regardless of peak count).
    err.res='numeric',
-   ### Error left after adding the peak.
-   merge.logdist='numeric'
+   ### Error left after adding the peak (mower) or log-likelihood (em or emauto).
+   merge.logdist='numeric',
    ### Attempted `merge.logdist` parameter.
+   seq.depth='numeric',
+   ### Best estimate available for the sequencing depth of the peak (centrality).
+   log='logical'
+   ### Indicates if the estimation was performed in natural logarithm space
    ));
 setMethod("$", "enve.RecPlot2", function(x, name) attr(x, name))
 setMethod("$", "enve.RecPlot2.Peak", function(x, name) attr(x, name))
@@ -83,6 +87,9 @@ plot.enve.RecPlot2 <- function
       peaks.col='darkred',
       ### If not NA, it attempts to represent peaks in the population histogram
       ### in the specified color. Set to NA to avoid peak-finding.
+      use.peaks,
+      ### A list of `enve.RecPlot2.Peak` objects, as returned by
+      ### `enve.recplot2.findPeaks`. If passed, `peaks.opts` is ignored.
       id.lim=range(x$id.breaks),
       ### Limits of identities to represent.
       pos.lim=range(x$pos.breaks),
@@ -98,15 +105,17 @@ plot.enve.RecPlot2 <- function
       ### the number of the panel as index (see `layout`).
       pos.splines=0,
       ### Smoothing parameter for the splines in the position histogram. Zero
-      ### (0) for no splines. If non-zero, requires the stats package.
+      ### (0) for no splines. Use NULL to automatically detect by leave-one-out
+      ### cross-validation.
       id.splines=1/2,
       ### Smoothing parameter for the splines in the identity histogram. Zero
-      ### (0) for no splines. If non-zero, requires the stats package.
-      in.lwd=ifelse(pos.splines>0, 1/2, 2),
+      ### (0) for no splines. Use NULL to automatically detect by leave-one-out
+      ### cross-validation.
+      in.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
       ### Line width for the sequencing depth of in-group matches.
-      out.lwd=ifelse(pos.splines>0, 1/2, 2),
+      out.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
       ### Line width for the sequencing depth of out-group matches.
-      id.lwd=ifelse(id.splines>0, 1/2, 2),
+      id.lwd=ifelse(is.null(id.splines) || id.splines>0, 1/2, 2),
       ### Line width for the identity histogram.
       in.col='darkblue',
       ### Color associated to in-group matches.
@@ -193,7 +202,7 @@ plot.enve.RecPlot2 <- function
       pos.f <- rep(seqdepth.in,each=2)
       lines(pos.x, rep(seqdepth.out,each=2), lwd=out.lwd, col=out.col);
       lines(pos.x, pos.f, lwd=in.lwd, col=in.col);
-      if(pos.splines > 0){
+      if(is.null(pos.splines) || pos.splines > 0){
 	 pos.spline <- smooth.spline(pos.x[pos.f>0], log(pos.f[pos.f>0]),
 	    spar=pos.splines)
 	 lines(pos.spline$x, exp(pos.spline$y), lwd=2, col=in.col)
@@ -230,7 +239,7 @@ plot.enve.RecPlot2 <- function
 	 id.f <- rep(id.counts,each=2)
 	 id.x <- rep(id.breaks,each=2)[-c(1,2*length(id.breaks))]
 	 lines(id.f, id.x, lwd=id.lwd, col=id.col);
-	 if(id.splines > 0){
+	 if(is.null(id.splines) || id.splines > 0){
 	    id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]),
 	       spar=id.splines)
 	    lines(exp(id.spline$y), id.spline$x, lwd=2, col=id.col)
@@ -267,8 +276,12 @@ plot.enve.RecPlot2 <- function
       polygon(c(0,rep(h.in$counts,each=2),0,0,rep(sum(pos.counts.in==0),2),0),
 	 y.tmp.in, border=NA, col=in.col)
       if(!is.na(peaks.col)){
-	 o	<- peaks.opts; o$x = x;
-	 peaks	<- do.call(enve.recplot2.findPeaks, o);
+	 o <- peaks.opts; o$x = x;
+         if(missing(use.peaks)){
+           peaks <- do.call(enve.recplot2.findPeaks, o)
+         }else{
+           peaks <- use.peaks
+         }
 	 h.mids <- (10^h.breaks[-1] + 10^h.breaks[-length(h.breaks)])/2
 	 if(!is.null(peaks) & length(peaks)>0){
 	    pf <- h.mids*0;
@@ -276,18 +289,23 @@ plot.enve.RecPlot2 <- function
 	       cnt <- enve.recplot2.__peakHist(peaks[[i]], h.mids)
 	       lines(cnt, h.mids, col='red');
 	       pf <- pf+cnt;
-	       axis(4, at=peaks[[i]]$param.hat[[length(peaks[[i]]$param.hat)]],
-		  letters[i], las=1, hadj=1/2)
+	       axis(4, at=peaks[[i]]$seq.depth, letters[i], las=1, hadj=1/2)
 	    }
 	    lines(pf, h.mids, col='red',lwd=1.5);
-	    legend('bottomright', legend=paste(
-	       letters[1:length(peaks)],'. ',
-	       signif(as.numeric(lapply(peaks,
-		  function(x) tail(as.numeric(x$param.hat),n=1))),3),'X (',
-	       signif(100*as.numeric(lapply(peaks,
-		  function(x) (length(x$values)/x$n.total))), 3), '%, err: ',
-	       signif(as.numeric(lapply(peaks, function(x) x$err.res)), 3), ')',
-	       sep=''), bty='n');
+            dpt <- signif(as.numeric(lapply(peaks, function(x) x$seq.depth)),2)
+            frx <- signif(100*as.numeric(
+                  lapply(peaks,
+                    function(x) ifelse(length(x$values)==0, x$n.hat,
+                      length(x$values))/x$n.total)), 2)
+            if(peaks[[1]]$err.res < 0){
+              err <- paste(', LL:', signif(peaks[[1]]$err.res, 3))
+            }else{
+              err <- paste(', err:',
+                    signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2))
+            }
+	    legend('topright', bty='n', cex=1/2,
+                  legend=paste(letters[1:length(peaks)],'. ',
+                    dpt,'X (', frx, '%', err, ')', sep=''))
 	 }
       }
    }
@@ -334,6 +352,10 @@ enve.recplot2 <- function(
       id.breaks=300,
       ### Breaks in the identity histogram. It can also be a vector of break
       ### points, and values outside the range are ignored.
+      id.free.range=FALSE,
+      ### Indicates that the range should be freely set from the observed
+      ### values. Otherwise, 70-100% is included in the identity histogram
+      ### (default).
       id.metric=c('identity', 'corrected identity', 'bit score'),
       ### Metric of identity to be used (Y-axis). Corrected identity is only
       ### supported if the original BLAST file included sequence lengths.
@@ -373,12 +395,14 @@ enve.recplot2 <- function(
       if(pos.breaks>0){
          pos.breaks <- seq(min(lim[,2]), max(lim[,3]), length.out=pos.breaks+1);
       }else{
-         pos.breaks <- c(lim[,2], tail(lim[,3], n=1))
+         pos.breaks <- c(lim[1,2], lim[,3])
       }
    }
    if(length(id.breaks)==1){
-      id.breaks <- seq(min(rec[,rec.idcol]), max(rec[,rec.idcol]),
-	 length.out=id.breaks+1);
+      id.range.v <- rec[,rec.idcol]
+      if(!id.free.range) id.range.v <- c(id.range.v,70,100)
+      id.range.v <- range(id.range.v)
+      id.breaks <- seq(id.range.v[1], id.range.v[2], length.out=id.breaks+1);
    }
    # Run in parallel
@@ -423,8 +447,159 @@ enve.recplot2 <- function(
 }
 enve.recplot2.findPeaks <- function(
+  ### Identifies peaks in the population histogram potentially indicating
+  ### sub-population mixtures
+    x,
+    ### An `enve.RecPlot2` object.
+    method="emauto",
+    ### Peak-finder method. This should be one of:
+    ### "emauto" (Expectation-Maximization with auto-selection of components),
+    ### "em" (Expectation-Maximization),
+    ### "mower" (Custom distribution-mowing method).
+    ...
+    ### Any additional parameters supported by
+    ### `enve.recplot2.findPeaks.<method>`.
+    ){
+  if(method == "emauto"){
+    peaks <- enve.recplot2.findPeaks.emauto(x, ...)
+  }else if(method == "em"){
+    peaks <- enve.recplot2.findPeaks.em(x, ...)
+  }else if(method == "mower"){
+    peaks <- enve.recplot2.findPeaks.mower(x, ...)
+  }else{
+    stop("Invalid peak-finder method ", method)
+  }
+  return(peaks)
+  ### Returns a list of `enve.RecPlot2.Peak` objects.
+}
+enve.recplot2.findPeaks.emauto <- function(
+  ### Identifies peaks in the population histogram using a Gaussian Mixture
+  ### Model Expectation Maximization (GMM-EM) method with number of components
+  ### automatically detected.
+    x,
+    ### An `enve.RecPlot2` object.
+    components=seq(1,10),
+    ### A vector of number of components to evaluate.
+    criterion='aic',
+    ### Criterion to use for components selection. Must be one of:
+    ### 'aic' (Akaike Information Criterion),
+    ### 'bic' or 'sbc' (Bayesian Information Criterion or Schwarz Criterion).
+    merge.tol=2L,
+    ### When attempting to merge peaks with very similar sequencing depth, use
+    ### this number of significant digits (in log-scale).
+    verbose=FALSE,
+    ### Display (mostly debugging) information.
+    ...
+    ### Any additional parameters supported by `enve.recplot2.findPeaks.em`.
+    ){
+  best <- list(crit=0, pstore=list())
+  if(criterion == 'aic'){
+    do_crit <- function(ll, k, n) 2*k - 2*ll
+  }else if(criterion %in% c('bic', 'sbc')){
+    do_crit <- function(ll, k, n) log(n)*k - 2*ll
+  }else{
+    stop('Invalid criterion ', criterion)
+  }
+  for(comp in components){
+    best <- enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best,
+          verbose, ...)
+  }
+  seqdepths.r <- signif(log(sapply(best[['peaks']],
+        function(x) x$seq.depth)), merge.tol)
+  distinct <- length(unique(seqdepths.r))
+  if(distinct < length(best[['peaks']])){
+    if(verbose) cat('Attempting merge to', distinct, 'components\n')
+    init <- apply(sapply(best[['peaks']],
+          function(x) c(x$param.hat, alpha=x$n.hat/x$n.total)), 1, as.numeric)
+    init <- init[!duplicated(seqdepths.r),]
+    init <- list(mu=init[,'mean'], sd=init[,'sd'],
+          alpha=init[,'alpha']/sum(init[,'alpha']))
+    best <- enve.recplot2.findPeaks.__emauto_one(x, distinct, do_crit, best,
+          verbose, ...)
+  }
+  return(best[['peaks']])
+  ### Returns a list of `enve.RecPlot2.Peak` objects.
+}
+enve.recplot2.findPeaks.em <- function(
+  ### Identifies peaks in the population histogram using a Gaussian Mixture
+  ### Model Expectation Maximization (GMM-EM) method.
+    x,
+    ### An `enve.RecPlot2` object.
+    max.iter=1000,
+    ### Maximum number of EM iterations.
+    ll.diff.res=1e-8,
+    ### Maximum Log-Likelihood difference to be considered as convergent.
+    components=2,
+    ### Number of distributions assumed in the mixture.
+    rm.top=0.05,
+    ### Top-values to remove before finding peaks, as a quantile probability.
+    ### This step is useful to remove highly conserved regions, but can be
+    ### turned off by setting rm.top=0. The quantile is determined *after*
+    ### removing zero-coverage windows.
+    verbose=FALSE,
+    ### Display (mostly debugging) information.
+    init,
+    ### Initialization parameters. By default, these are derived from k-means
+    ### clustering. A named list with vectors for 'mu', 'sd', and 'alpha', each
+    ### of length `components`.
+    log=TRUE
+    ### Logical value indicating if the estimations should be performed in
+    ### natural logarithm units. Do not change unless you know what you're
+    ### doing.
+  ){
+  # Essential vars
+  pos.binsize  <- x$pos.breaks[-1] - x$pos.breaks[-length(x$pos.breaks)]
+  lsd1  <- (x$pos.counts.in/pos.binsize)[ x$pos.counts.in > 0 ]
+  lsd1 <- lsd1[ lsd1 < quantile(lsd1, 1-rm.top, names=FALSE) ]
+  if(log) lsd1 <- log(lsd1)
+  # 1. Initialize
+  if(missing(init)){
+    km.clust <- kmeans(lsd1, components)$cluster
+    init <- list(
+      mu = tapply(lsd1, km.clust, mean),
+      sd = tapply(lsd1, km.clust, sd),
+      alpha = table(km.clust)/length(km.clust)
+    )
+  }
+  m.step <- init
+  ll <- c()
+  cur.ll <- -Inf
+  for(i in 1:max.iter){
+    # 2/3. EM
+    e.step <- enve.recplot2.findPeaks.__em_e(lsd1, m.step)
+    m.step <- enve.recplot2.findPeaks.__em_m(lsd1, e.step[['posterior']])
+    # 4. Convergence
+    ll <- c(ll, e.step[["ll"]])
+    ll.diff <- abs(cur.ll - e.step[["ll"]])
+    cur.ll <- e.step[["ll"]]
+    if(verbose) cat(i, '\t| LL =', cur.ll, '\t| LL.diff =', ll.diff, '\n')
+    if(ll.diff <= ll.diff.res) break
+  }
+  # Return
+  peaks <- list()
+  for(i in 1:components){
+    n.hat <- m.step[['alpha']][i]*length(lsd1)
+    peaks[[i]] <- new('enve.RecPlot2.Peak', dist='norm', values=as.numeric(),
+      values.res=0, mode=m.step[['mu']][i],
+      param.hat=list(sd=m.step[['sd']][i], mean=m.step[['mu']][i]),
+      n.hat=n.hat, n.total=length(lsd1), err.res=cur.ll,
+      merge.logdist=as.numeric(), log=log,
+      seq.depth=ifelse(log, exp(m.step[['mu']][i]), m.step[['mu']][i]))
+  }
+  return(peaks)
+  ### Returns a list of `enve.RecPlot2.Peak` objects.
+}
+enve.recplot2.findPeaks.mower <- function(
    ### Identifies peaks in the population histogram potentially indicating
-   ### sub-population mixtures.
+   ### sub-population mixtures, using a custom distribution-mowing method.
       x,
       ### An `enve.RecPlot2` object.
       min.points=10,
@@ -436,13 +611,12 @@ enve.recplot2.findPeaks <- function(
       mlv.opts=list(method='parzen'),
       ### Options passed to `mlv` to estimate the mode.
       fitdist.opts.sn=list(distr='sn', method='qme', probs=c(0.1,0.5,0.8),
-	 start=list(omega=1, alpha=-1), lower=c(1e-6, -Inf, 0),
-	 upper=c(Inf, 0, Inf)),
+	 start=list(omega=1, alpha=-1), lower=c(0, -Inf, -Inf)),
       ### Options passed to `fitdist` to estimate the standard deviation if
       ### with.skewness=TRUE. Note that the `start` parameter will be ammended
       ### with xi=estimated mode for each peak.
-      fitdist.opts.norm=list(distr='norm', method='qme', probs=c(.4,.6),
-	 start=list(sd=1), lower=c(1e-8, 0)),
+      fitdist.opts.norm=list(distr='norm', method='qme', probs=c(0.4,0.6),
+	 start=list(sd=1), lower=c(0, -Inf)),
       ### Options passed to `fitdist` to estimate the standard deviation if
       ### with.skewness=FALSE. Note that the `start` parameter will be ammended
       ### with mean=estimated mode for each peak.
@@ -460,7 +634,7 @@ enve.recplot2.findPeaks <- function(
       ### "tail distribution".
       optim.rounds=200,
       ### Maximum rounds of peak optimization.
-      optim.epsilon=1e-8,
+      optim.epsilon=1e-4,
       ### Trace change at which optimization stops (unless `optim.rounds` is
       ### reached first). The trace change is estimated as the sum of square
       ### differences between parameters in one round and those from two rounds
@@ -469,8 +643,12 @@ enve.recplot2.findPeaks <- function(
       ### Maximum value of |log-ratio| between centrality parameters in peaks to
       ### attempt merging. The default of ~0.22 corresponds to a maximum
       ### difference of 25%.
-      verbose=FALSE
+      verbose=FALSE,
       ### Display (mostly debugging) information.
+      log=TRUE
+      ### Logical value indicating if the estimations should be performed in
+      ### natural logarithm units. Do not change unless you know what you're
+      ### doing.
    ){
    # Essential vars
@@ -478,6 +656,7 @@ enve.recplot2.findPeaks <- function(
    seqdepth.in	<- x$pos.counts.in/pos.binsize;
    lsd1 <- seqdepth.in[seqdepth.in>0];
    lsd1 <- lsd1[ lsd1 < quantile(lsd1, 1-rm.top, names=FALSE) ]
+   if(log) lsd1 <- log(lsd1)
    if(with.skewness){
       fitdist.opts <- fitdist.opts.sn
    }else{
@@ -486,11 +665,11 @@ enve.recplot2.findPeaks <- function(
    peaks.opts <- list(lsd1=lsd1, min.points=min.points, quant.est=quant.est,
       mlv.opts=mlv.opts, fitdist.opts=fitdist.opts, with.skewness=with.skewness,
       optim.rounds=optim.rounds, optim.epsilon=optim.epsilon, verbose=verbose,
-      n.total=length(lsd1), merge.logdist=merge.logdist)
+      n.total=length(lsd1), merge.logdist=merge.logdist, log=log)
    # Find seed peaks
    if(verbose) cat('Mowing peaks for n =',length(lsd1),'\n')
-   peaks <- enve.recplot2.__findPeaks(peaks.opts);
+   peaks <- enve.recplot2.findPeaks.__mower(peaks.opts);
    # Merge overlapping peaks
    if(verbose) cat('Trying to merge',length(peaks),'peaks\n')
@@ -511,7 +690,7 @@ enve.recplot2.findPeaks <- function(
 		  p$param.hat[[ length(p$param.hat) ]],'&',
 		  p2$param.hat[[ length(p2$param.hat) ]],'X\n');
 	    peaks.opts$lsd1 <- c(p$values, p2$values)
-	    p.new <- enve.recplot2.__findPeaks(peaks.opts)
+	    p.new <- enve.recplot2.findPeaks.__mower(peaks.opts)
 	    if(length(p.new)==1){
 	       peaks2[[ length(peaks2)+1 ]] <- p.new[[ 1 ]]
 	       ignore <- c(ignore, j)
@@ -542,16 +721,19 @@ enve.recplot2.corePeak <- function
 	    function(y) y$param.hat[[ length(y$param.hat) ]])))
       ]]
    # If a "larger" peak (a peak explaining more bins of the genome) is within
-   # the "merge.logdist" distance, take that one instead.
+   # the default "merge.logdist" distance, take that one instead.
    corePeak <- maxPeak
    for(p in x){
-      sz.d = log(length(p$values)/length(corePeak$values))
-      if(sz.d < 0)
-	 next;
-      sq.d.a <- p$param.hat[[ length(p$param.hat) ]]
-      sq.d.b <- maxPeak$param.hat[[ length(maxPeak$param.hat) ]]
-      if(abs(log(sq.d.a/sq.d.b )) < maxPeak$merge.logdist+sz.d/5)
-         corePeak <- p
+     p.len <- ifelse(length(p$values)==0, p$n.hat, length(p$values))
+     corePeak.len <- ifelse(
+           length(corePeak$values)==0, corePeak$n.hat, length(corePeak$values))
+     sz.d <- log(p.len/corePeak.len)
+     if(is.nan(sz.d) || sz.d < 0) next
+     sq.d.a <- as.numeric(tail(p$param.hat, n=1))
+     sq.d.b <- as.numeric(tail(maxPeak$param.hat, n=1))
+     if(p$log) sq.d.a <- exp(sq.d.a)
+     if(corePeak$log) sq.d.b <- exp(sq.d.b)
+     if(abs(log(sq.d.a/sq.d.b)) < log(1.75)+sz.d/5) corePeak <- p
    }
    return(corePeak)
 }
@@ -580,43 +762,204 @@ enve.recplot2.extractWindows <- function
    ### Extract windows significantly below (or above) the peak in sequencing
    ### depth.
       (rp,
-      ### Recruitment plot, a enve.Recplot2 object.
+      ### Recruitment plot, a enve.RecPlot2 object.
       peak,
-      ### Peak, a enve.RecPlot2.Peak object. If list, it is assumed to be a list
-      ### of enve.RecPlot2.Peak objects, in which case the core peak is used
-      ### (see enve.recplot2.corePeak).
+      ### Peak, an `enve.RecPlot2.Peak` object. If list, it is assumed to be a
+      ### list of enve.RecPlot2.Peak objects, in which case the core peak is
+      ### used (see `enve.recplot2.corePeak`).
       lower.tail=TRUE,
       ### If FALSE, it returns windows significantly above the peak in
       ### sequencing depth.
       significance=0.05,
       ### Significance threshold (alpha) to select windows.
       seq.names=FALSE
-      ### Returns subject sequence names instead of a vector of Booleans. It
-      ### assumes that the recruitment plot was generated with pos.breaks=0.
+      ### Returns subject sequence names instead of a vector of Booleans. If
+      ### the recruitment plot was generated with pos.breaks=0 it returns a
+      ### vector of characters (the sequence identifiers), otherwise it returns
+      ### a data.frame with a name column and two columns of coordinates.
       ){
    # Determine the threshold
    if(is.list(peak)) peak <- enve.recplot2.corePeak(peak)
    par <- peak$param.hat
    par[["p"]] <- ifelse(lower.tail, significance, 1-significance)
    thr <- do.call(ifelse(length(par)==4, qsn, qnorm), par)
+   if(peak$log) thr <- exp(thr)
-   # Estimate sequencing depths per window
-   pos.cnts.in <- rp$pos.counts.in
-   pos.breaks  <- rp$pos.breaks
-   pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
-   seqdepth.in <- pos.cnts.in/pos.binsize
    # Select windows past the threshold
+   seqdepth.in <- enve.recplot2.seqdepth(rp)
    if(lower.tail){
       sel <- seqdepth.in < thr
    }else{
       sel <- seqdepth.in > thr
    }
+   # seq.names=FALSE
    if(!seq.names) return(sel)
-   if(length(seqdepth.in) != length(rp$seq.names))
-      stop(paste("Requesting subject sequence names, but the recruitment plot",
-         "was not generated with pos.breaks=0."))
-   return(rp$seq.names[sel])
+   # seq.names=TRUE and pos.breaks=0
+   if(length(rp$pos.breaks)==length(rp$seq.breaks) &&
+         rp$pos.breaks==rp$seq.breaks)
+           return(rp$seq.names[sel])
+   # seq.names=TRUE and pos.breaks!=0
+   return(enve.recplot2.coordinates(rp,sel))
+   ### Returns a vector of logicals if `seq.names=FALSE`. If `seq.names=TRUE`,
+   ### it returns a vector of characters if the object was built with
+   ### `pos.breaks=0` or a data.frame with four columns otherwise: name.from,
+   ### name.to, pos.from, and pos.to (see `enve.recplot2.coordinates`).
+}
+enve.recplot2.compareIdentities <- function
+  ### Compare the distribution of identities between two enve.RecPlot2 objects.
+    (x,
+    ### First enve.RecPlot2 object.
+    y,
+    ### Second enve.RecPlot2 object.
+    method="hellinger",
+    ### Distance method to use. This should be (an unambiguous abbreviation of)
+    ### one of:
+    ### "hellinger" (Hellinger, 1090, doi:10.1515/crll.1909.136.210),
+    ### "bhattacharyya" (Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35),
+    ### "kl" or "kullback-leibler" (Kullback & Leibler, 1951,
+    ### doi:10.1214/aoms/1177729694), or "euclidean".
+    smooth.par=NULL,
+    ### Smoothing parameter for cubic spline smoothing. Use 0 for no smoothing.
+    ### Use NULL to automatically determine this value using leave-one-out
+    ### cross-validation (see `smooth.spline` parameter `spar`).
+    pseudocounts=0,
+    ### Smoothing parameter for Laplace smoothing. Use 0 for no smoothing, or
+    ### 1 for add-one smoothing.
+    max.deviation=0.75
+    ### Maximum mean deviation between identity breaks tolerated (as percent
+    ### identity). Difference in number of id.breaks is never tolerated.
+    ){
+  METHODS <- c("hellinger","bhattacharyya","kullback-leibler","kl","euclidean")
+  i.meth <- pmatch(method, METHODS)
+  if (is.na(i.meth)) stop("Invalid distance ", method)
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  if(!inherits(y, "enve.RecPlot2"))
+    stop("'y' must inherit from class `enve.RecPlot2`")
+  if(length(x$id.breaks) != length(y$id.breaks))
+    stop("'x' and 'y' must have the same number of `id.breaks`")
+  dev <- mean(abs(x$id.breaks - y$id.breaks))
+  if(dev > max.deviation)
+    stop("'x' and 'y' must have similar `id.breaks`; exceeding max.deviation: ",
+          dev)
+  x.cnt <- x$id.counts
+  y.cnt <- y$id.counts
+  if(is.null(smooth.par) || smooth.par > 0){
+    x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)])/2
+    y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)])/2
+    p.spline <- smooth.spline(x.mids, x.cnt, spar=smooth.par)
+    q.spline <- smooth.spline(y.mids, y.cnt, spar=smooth.par)
+    x.cnt <- pmax(p.spline$y, 0)
+    y.cnt <- pmax(q.spline$y, 0)
+  }
+  a <- as.numeric(pseudocounts)
+  p <- (x.cnt + a) / sum(x.cnt + a)
+  q <- (y.cnt + a) / sum(y.cnt + a)
+  d <- NA
+  if(i.meth %in% c(1L, 2L)){
+    d <- sqrt(sum((sqrt(p) - sqrt(q))**2))/sqrt(2)
+    if(i.meth==2L) d <- 1 - d**2
+  }else if(i.meth %in% c(3L, 4L)){
+    sel <- p>0
+    if(any(q[sel]==0))
+      stop("Undefined distance without absolute continuity, use pseudocounts")
+    d <- -sum(p[sel]*log(q[sel]/p[sel]))
+  }else if(i.meth == 5L){
+    d <- sqrt(sum((q-p)**2))
+  }
+  return(d)
+}
+enve.recplot2.coordinates <- function
+  ### Returns the sequence name and coordinates of the requested position bins.
+    (x,
+    ### `enve.RecPlot2` object.
+    bins
+    ### Vector of selected bins to return. It can be a vector of logical values
+    ### with the same length as `x$pos.breaks`-1 or a vector of integers. If
+    ### missing, returns the coordinates of all windows.
+    ){
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  if(missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
+  if(!is.vector(bins)) stop("'bins' must be a vector")
+  if(inherits(bins, "logical")) bins <- which(bins)
+  y <- data.frame(stringsAsFactors=FALSE, row.names=bins)
+  for(i in 1:length(bins)){
+    j <- bins[i]
+    # Concatenated coordinates
+    cc <- x$pos.breaks[c(j, j+1)]
+    # Find the corresponding `seq.breaks`
+    sb.from <- which(
+          cc[1] >=x$seq.breaks[-length(x$seq.breaks)] &
+          cc[1] < x$seq.breaks[-1])
+    sb.to   <- which(
+          cc[2] > x$seq.breaks[-length(x$seq.breaks)] &
+          cc[2] <=x$seq.breaks[-1])
+    # Translate coordinates
+    if(length(sb.from)==1 & length(sb.to)==1){
+      y[i, 'name.from'] <- x$seq.names[sb.from]
+      y[i, 'pos.from'] <- floor(x$seq.breaks[sb.from] + cc[1] - 1)
+      y[i, 'name.to']   <- x$seq.names[sb.to]
+      y[i, 'pos.to']   <- ceiling(x$seq.breaks[sb.to] + cc[2] - 1)
+    }
+  }
+  return(y)
+  ### Returns a data.frame with four columns: name.from (character), pos.from
+  ### (numeric) name.to (character), and pos.to (numeric). The first two
+  ### correspond to sequence and position of the start point of the bin, the
+  ### last two correspond to the sequence and position of the end point of the
+  ### bin.
+}
+enve.recplot2.seqdepth <- function
+  ### Calculate the sequencing depth of the given window(s)
+    (x,
+    ### `enve.RecPlot2` object.
+    sel,
+    ### Window(s) for which the sequencing depth is to be calculated. If not
+    ### passed, it returns the sequencing depth of all windows
+    low.identity=FALSE
+    ### A logical indicating if the sequencing depth is to be estimated only
+    ### with low-identity matches. By default, only high-identity matches are
+    ### used.
+    ){
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  pos.cnts.in <- x$pos.counts.in
+  pos.breaks  <- x$pos.breaks
+  pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
+  seqdepth.in <- pos.cnts.in/pos.binsize
+  if(missing(sel)) return(seqdepth.in)
+  return(seqdepth.in[sel])
+  ### Returns a numeric vector of sequencing depths (in bp/bp).
+}
+enve.recplot2.ANIr <- function
+  ### Estimate the Average Nucleotide Identity from reads (ANIr) from a
+  ### recruitment plot
+    (x,
+    ### `enve.RecPlot2` object.
+    range=c(0,Inf)
+    ### Range of identities to be considered. By default, the full range
+    ### is used (note that the upper boundary is `Inf` and not 100 because
+    ### recruitment plots can also be built with bit-scores). To use only
+    ### intra-population matches (with identities), use c(95,100). To use only
+    ### inter-population values, use c(0,95).
+    ){
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  id.b <- x$id.breaks
+  id <- (id.b[-1]+id.b[-length(id.b)])/2
+  cnt <- x$id.counts
+  cnt[id < range[1]] <- 0
+  cnt[id > range[2]] <- 0
+  return(sum(id*cnt/sum(cnt)))
 }
 #==============> Define internal functions
@@ -640,21 +983,67 @@ enve.recplot2.__counts <- function
    return(counts);
 }
+enve.recplot2.findPeaks.__emauto_one <- function
+  ### Internal ancilliary function (see `enve.recplot2.findPeaks.emauto).
+    (x, comp, do_crit, best, verbose, ...){
+  peaks <- enve.recplot2.findPeaks.em(x=x, components=comp, ...)
+  k <- comp*3 - 1 # mean & sd for each component, and n-1 free alpha parameters
+  crit <- do_crit(peaks[[1]]$err.res, k, peaks[[1]]$n.total)
+  if(verbose) cat(comp,'\t| LL =', peaks[[1]]$err.res, '\t| Estimate =', crit,
+        ifelse(crit > best[['crit']], '*', ''), '\n')
+  if(crit > best[['crit']]){
+    best[['crit']] <- crit
+    best[['peaks']] <- peaks
+  }
+  best[['pstore']][[comp]] <- peaks
+  return(best)
+}
+enve.recplot2.findPeaks.__em_e <- function
+  ### Internal ancilliary function (see `enve.recplot2.findPeaks.em`).
+    (x, theta){
+  components <- length(theta[['mu']])
+  product <- do.call(cbind,
+        lapply(1:components,
+          function(i) dnorm(x, theta[['mu']][i],
+             theta[['sd']][i])*theta[['alpha']][i]))
+  sum.of.components <- rowSums(product)
+  posterior <- product / sum.of.components
+  return(list(ll=sum(log(sum.of.components)), posterior=posterior))
+}
+enve.recplot2.findPeaks.__em_m <- function
+  ### Internal ancilliary function (see `enve.recplot2.findPeaks.em`
+    (x, posterior){
+  components <- ncol(posterior)
+  n <- colSums(posterior)
+  mu <- colSums(posterior * x) / n
+  sd <- sqrt( colSums(
+        posterior * (matrix(rep(x,components), ncol=components) - mu)^2) / n )
+  alpha <- n/length(x)
+  return(list(mu=mu, sd=sd, alpha=alpha))
+}
 enve.recplot2.__peakHist <- function
    ### Internal ancilliary function (see `enve.RecPlot2.Peak`).
       (x, mids, counts=TRUE){
    d.o <- x$param.hat
-   d.o$x <- mids
+   if(length(x$log)==0) x$log <- FALSE
+   if(x$log){
+     d.o$x <- log(mids)
+   }else{
+     d.o$x <- mids
+   }
    prob  <- do.call(paste('d', x$dist, sep=''), d.o)
    if(!counts) return(prob)
    if(length(x$values)>0) return(prob*length(x$values)/sum(prob))
    return(prob*x$n.hat/sum(prob))
 }
-enve.recplot2.__findPeak <- function
-   ### Internall ancilliary function (see `enve.recplot2.findPeaks`).
+enve.recplot2.findPeaks.__mow_one <- function
+   ### Internall ancilliary function (see `enve.recplot2.findPeaks.mower`).
       (lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
-      optim.rounds, optim.epsilon, n.total, merge.logdist, verbose
+      optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
    ){
    dist	<- ifelse(with.skewness, 'sn', 'norm');
@@ -683,8 +1072,14 @@ enve.recplot2.__findPeak <- function
 	 if(round>1) param.hat <- last.hat;
 	 break;
       }
-      epsilon <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
-      if(round>2) if(epsilon < optim.epsilon) break;
+      if(round > 1){
+        epsilon1 <- sum((as.numeric(last.hat)-as.numeric(param.hat))^2)
+        if(epsilon1 < optim.epsilon) break;
+        if(round > 2){
+          epsilon2 <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
+          if(epsilon2 < optim.epsilon) break;
+        }
+      }
    }
    if(verbose) cat('\n')
    if(is.na(param.hat[1]) | is.na(lim[1])) return(NULL);
@@ -695,14 +1090,14 @@ enve.recplot2.__findPeak <- function
    n.hat <- length(lsd1.pop)/diff(quant.est)
    peak <- new('enve.RecPlot2.Peak', dist=dist, values=as.numeric(), mode=mode1,
       param.hat=param.hat, n.hat=n.hat, n.total=n.total,
-      merge.logdist=merge.logdist)
+      merge.logdist=merge.logdist, log=log)
    peak.breaks <- seq(min(lsd1), max(lsd1), length=20)
    peak.cnt <- enve.recplot2.__peakHist(peak,
       (peak.breaks[-length(peak.breaks)]+peak.breaks[-1])/2)
    for(i in 2:length(peak.breaks)){
       values <- lsd1[ (lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i]) ]
       n.exp <- peak.cnt[i-1]
-      if(n.exp==0) n.exp=0.1
+      if(is.na(n.exp) | n.exp==0) n.exp <- 0.1
       if(length(values)==0) next
       in.peak <- runif(length(values)) <= n.exp/length(values)
       lsd2 <- c(lsd2, values[!in.peak])
@@ -716,17 +1111,19 @@ enve.recplot2.__findPeak <- function
    attr(peak, 'err.res') <- 1-(cor(hist(lsd.pop, breaks=peak.breaks,
       plot=FALSE)$counts, hist(lsd1, breaks=peak.breaks,
       plot=FALSE)$counts)+1)/2
+   mu <- tail(param.hat, n=1)
+   attr(peak, 'seq.depth') <- ifelse(log, exp(mu), mu)
    if(verbose) cat(' Extracted peak with n =',length(lsd.pop),
 	 'with expected n =',n.hat,'\n')
    return(peak)
 }
-enve.recplot2.__findPeaks <- function
-   ### Internal ancilliary function (see `enve.recplot2.findPeaks`).
+enve.recplot2.findPeaks.__mower <- function
+   ### Internal ancilliary function (see `enve.recplot2.findPeaks.mower`).
       (peaks.opts){
    peaks <- list()
    while(length(peaks.opts$lsd1) > peaks.opts$min.points){
-      peak <- do.call(enve.recplot2.__findPeak, peaks.opts)
+      peak <- do.call(enve.recplot2.findPeaks.__mow_one, peaks.opts)
       if(is.null(peak)) break
       peaks[[ length(peaks)+1 ]] <- peak
       peaks.opts$lsd1 <- peak$values.res
@@ -738,7 +1135,10 @@ enve.recplot2.__findPeaks <- function
 enve.recplot2.__whichClosestPeak <- function
    ### Internal ancilliary function (see `enve.recplot2.findPeaks`).
       (peak, peaks){
-   dist <- as.numeric(lapply(peaks, function(x) abs(log(x$param.hat[[ length(x$param.hat) ]]/peak$param.hat[[ length(peak$param.hat) ]] ))))
+   dist <- as.numeric(lapply(peaks,
+         function(x)
+           abs(log(x$param.hat[[ length(x$param.hat) ]] /
+             peak$param.hat[[ length(peak$param.hat) ]] ))))
    dist[ dist==0 ] <- Inf
    return(which.min(dist))
 }