RubyGems - miga-base - Versions diffs - 1.2.17.0 → 1.2.17.1 - Mend

miga-base 1.2.17.0 → 1.2.17.1

Files changed (299) hide show

checksums.yaml +4 -4
data/lib/miga/version.rb +1 -1
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
data/utils/FastAAI/FastAAI +3659 -0
data/utils/FastAAI/FastAAI-legacy/FastAAI +1336 -0
data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +1296 -0
data/utils/FastAAI/README.md +84 -0
data/utils/enveomics/Docs/recplot2.md +244 -0
data/utils/enveomics/Examples/aai-matrix.bash +66 -0
data/utils/enveomics/Examples/ani-matrix.bash +66 -0
data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
data/utils/enveomics/LICENSE.txt +73 -0
data/utils/enveomics/Makefile +52 -0
data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
data/utils/enveomics/Manifest/Tasks/blasttab.json +790 -0
data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
data/utils/enveomics/Manifest/Tasks/other.json +906 -0
data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +650 -0
data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
data/utils/enveomics/Manifest/categories.json +165 -0
data/utils/enveomics/Manifest/examples.json +162 -0
data/utils/enveomics/Manifest/tasks.json +4 -0
data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
data/utils/enveomics/README.md +42 -0
data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +123 -0
data/utils/enveomics/Scripts/Chao1.pl +97 -0
data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
data/utils/enveomics/Scripts/FastA.length.pl +38 -0
data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
data/utils/enveomics/Scripts/FastA.split.pl +55 -0
data/utils/enveomics/Scripts/FastA.split.rb +79 -0
data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
data/utils/enveomics/Scripts/SRA.download.bash +55 -0
data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
data/utils/enveomics/Scripts/Table.barplot.R +31 -0
data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
data/utils/enveomics/Scripts/Table.filter.pl +61 -0
data/utils/enveomics/Scripts/Table.merge.pl +77 -0
data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
data/utils/enveomics/Scripts/Table.replace.rb +69 -0
data/utils/enveomics/Scripts/Table.round.rb +63 -0
data/utils/enveomics/Scripts/Table.split.pl +57 -0
data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
data/utils/enveomics/Scripts/aai.rb +421 -0
data/utils/enveomics/Scripts/ani.rb +362 -0
data/utils/enveomics/Scripts/anir.rb +137 -0
data/utils/enveomics/Scripts/clust.rand.rb +102 -0
data/utils/enveomics/Scripts/gi2tax.rb +103 -0
data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +88 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +74 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
data/utils/enveomics/Scripts/ogs.rb +104 -0
data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
data/utils/enveomics/Scripts/rbm.rb +108 -0
data/utils/enveomics/Scripts/sam.filter.rb +148 -0
data/utils/enveomics/Tests/Makefile +10 -0
data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
data/utils/enveomics/Tests/alkB.nwk +1 -0
data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
data/utils/enveomics/Tests/hiv1.faa +59 -0
data/utils/enveomics/Tests/hiv1.fna +134 -0
data/utils/enveomics/Tests/hiv2.faa +70 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
data/utils/enveomics/build_enveomics_r.bash +45 -0
data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
data/utils/enveomics/enveomics.R/R/utils.R +80 -0
data/utils/enveomics/enveomics.R/README.md +81 -0
data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
data/utils/enveomics/globals.mk +8 -0
data/utils/enveomics/manifest.json +9 -0
data/utils/multitrim/Multitrim How-To.pdf +0 -0
data/utils/multitrim/README.md +67 -0
data/utils/multitrim/multitrim.py +1555 -0
data/utils/multitrim/multitrim.yml +13 -0
metadata +301 -5

data/utils/enveomics/enveomics.R/R/recplot2.R ADDED Viewed

@@ -0,0 +1,1631 @@
+#==============> Define S4 classes
+#' Enveomics: Recruitment Plot (2) - S4 Class
+#'
+#' Enve-omics representation of Recruitment plots. This object can
+#' be produced by \code{\link{enve.recplot2}} and supports S4 method plot.
+#'
+#' @slot counts \code{(matrix)} Counts as a two-dimensional histogram.
+#' @slot pos.counts.in \code{(numeric)} Counts of in-group hits per position bin.
+#' @slot pos.counts.out \code{(numeric)} Counts of out-group hits per position bin.
+#' @slot id.counts \code{(numeric)} Counts per ID bin.
+#' @slot id.breaks \code{(numeric)} Breaks of identity bins.
+#' @slot pos.breaks \code{(numeric)} Breaks of position bins.
+#' @slot pos.names \code{(character)} Names of the position bins.
+#' @slot seq.breaks \code{(numeric)} Breaks of input sequences.
+#' @slot peaks \code{(list)} Peaks identified in the recplot.
+#' Limits of the subject sequences after concatenation.
+#' @slot seq.names \code{(character}) Names of the subject sequences.
+#' @slot id.metric \code{(character}) Metric used as 'identity'.
+#' @slot id.ingroup \code{(logical}) Identity bins considered in-group.
+#' @slot call \code{(call)} Call producing this object.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @exportClass
+enve.RecPlot2 <- setClass("enve.RecPlot2",
+                          representation(
+                            # slots = list(
+                            counts='matrix',
+                            pos.counts.in='numeric',
+                            pos.counts.out='numeric',
+                            id.counts='numeric',
+                            id.breaks='numeric',
+                            pos.breaks='numeric',
+                            pos.names='character',
+                            seq.breaks='numeric',
+                            peaks='list',
+                            seq.names='character',
+                            id.metric='character',
+                            id.ingroup='logical',
+                            call='call')
+                          ,package='enveomics.R'
+);
+#' Enveomics: Recruitment Plot (2) Peak - S4 Class
+#'
+#' Enve-omics representation of a peak in the sequencing depth histogram
+#' of a Recruitment plot (see \code{\link{enve.recplot2.findPeaks}}).
+#'
+#' @slot dist \code{(character)}
+#' Distribution of the peak. Currently supported: \code{norm} (normal) and \code{sn}
+#' (skew-normal).
+#' @slot values \code{(numeric)}
+#' Sequencing depth values predicted to conform the peak.
+#' @slot values.res \code{(numeric)}
+#' Sequencing depth values not explained by this or previously identified
+#' peaks.
+#' @slot mode \code{(numeric)}
+#' Seed-value of mode anchoring the peak.
+#' @slot param.hat \code{(list)}
+#' Parameters of the distribution. A list of two values if dist=\code{norm} (sd
+#' and mean), or three values if dist=\code{sn}(omega=scale, alpha=shape, and
+#' xi=location). Note that the "dispersion" parameter is always first and
+#' the "location" parameter is always last.
+#' @slot n.hat \code{(numeric)}
+#' Number of bins estimated to be explained by this peak. This should
+#' ideally be equal to the length of  \code{values}, but it's not an integer.
+#' @slot n.total \code{(numeric)}
+#' Total number of bins from which the peak was extracted. I.e., total
+#' number of position bins with non-zero sequencing depth in the recruitment
+#' plot (regardless of peak count).
+#' @slot err.res \code{(numeric)}
+#' Error left after adding the peak (mower) or log-likelihood (em or emauto).
+#' @slot merge.logdist \code{(numeric)}
+#' Attempted \code{merge.logdist} parameter.
+#' @slot seq.depth \code{(numeric)}
+#' Best estimate available for the sequencing depth of the peak (centrality).
+#' @slot log \code{(logical)}
+#' Indicates if the estimation was performed in natural logarithm space.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @exportClass
+enve.RecPlot2.Peak <- setClass("enve.RecPlot2.Peak",
+                               representation(
+                                 # slots = list(
+                                 dist='character',
+                                 values='numeric',
+                                 values.res='numeric',
+                                 mode='numeric',
+                                 param.hat='list',
+                                 n.hat='numeric',
+                                 n.total='numeric',
+                                 err.res='numeric',
+                                 merge.logdist='numeric',
+                                 seq.depth='numeric',
+                                 log='logical'
+                               ));
+#' Attribute accessor
+#'
+#'
+#' @param x Object
+#' @param name Attribute name
+setMethod("$", "enve.RecPlot2", function(x, name) attr(x, name))
+#' Attribute accessor
+#'
+#'
+#' @param x Object
+#' @param name Attribute name
+setMethod("$", "enve.RecPlot2.Peak", function(x, name) attr(x, name))
+#==============> Define S4 methods
+#' Enveomics: Recruitment Plot (2)
+#'
+#' Plots an \code{\link{enve.RecPlot2}} object.
+#'
+#' @param x
+#' \code{\link{enve.RecPlot2}} object to plot.
+#' @param layout
+#' Matrix indicating the position of the different panels in the layout,
+#' where:
+#' \itemize{
+#'   \item 0: Empty space
+#'   \item 1: Counts matrix
+#'   \item 2: position histogram (sequencing depth)
+#'   \item 3: identity histogram
+#'   \item 4: Populations histogram (histogram of sequencing depths)
+#'   \item 5: Color scale for the counts matrix (vertical)
+#'   \item 6: Color scale of the counts matrix (horizontal)
+#' }
+#' Only panels indicated here will be plotted. To plot only one panel
+#' simply set this to the number of the panel you want to plot.
+#' @param panel.fun
+#' List of functions to be executed after drawing each panel. Use the
+#' indices in \code{layout} (as characters) as keys. Functions for indices
+#' missing in \code{layout} are ignored. For example, to add a vertical line
+#' at the 3Mbp mark in both the position histogram and the counts matrix:
+#' \code{list('1'=function() abline(v=3), '2'=function() abline(v=3))}.
+#' Note that the X-axis in both panels is in Mbp by default. To change
+#' this behavior, set \code{pos.units} accordingly.
+#' @param widths
+#' Relative widths of the columns of \code{layout}.
+#' @param heights
+#' Relative heights of the rows of \code{layout}.
+#' @param palette
+#' Colors to be used to represent the counts matrix, sorted from no hits
+#' to the maximum sequencing depth.
+#' @param underlay.group
+#' If TRUE, it indicates the in-group and out-group areas couloured based
+#' on \code{in.col} and \code{out.col}. Requires support for semi-transparency.
+#' @param peaks.col
+#' If not \code{NA}, it attempts to represent peaks in the population histogram
+#' in the specified color. Set to \code{NA} to avoid peak-finding.
+#' @param use.peaks
+#' A list of \code{\link{enve.RecPlot2.Peak}} objects, as returned by
+#' \code{\link{enve.recplot2.findPeaks}}. If passed, \code{peaks.opts} is ignored.
+#' @param id.lim
+#' Limits of identities to represent.
+#' @param pos.lim
+#' Limits of positions to represent (in bp, regardless of \code{pos.units}).
+#' @param pos.units
+#' Units in which the positions should be represented (powers of 1,000
+#' base pairs).
+#' @param mar
+#' Margins of the panels as a list, with the character representation of
+#' the number of the panel as index (see \code{layout}).
+#' @param pos.splines
+#' Smoothing parameter for the splines in the position histogram. Zero
+#' (0) for no splines. Use \code{NULL} to automatically detect by leave-one-out
+#' cross-validation.
+#' @param id.splines
+#' Smoothing parameter for the splines in the identity histogram. Zero
+#' (0) for no splines. Use \code{NULL} to automatically detect by leave-one-out
+#' cross-validation.
+#' @param in.lwd
+#' Line width for the sequencing depth of in-group matches.
+#' @param out.lwd
+#' Line width for the sequencing depth of out-group matches.
+#' @param id.lwd
+#' Line width for the identity histogram.
+#' @param in.col
+#' Color associated to in-group matches.
+#' @param out.col
+#' Color associated to out-group matches.
+#' @param id.col
+#' Color for the identity histogram.
+#' @param breaks.col
+#' Color of the vertical lines indicating sequence breaks.
+#' @param peaks.opts
+#' Options passed to \code{\link{enve.recplot2.findPeaks}},
+#' if \code{peaks.col} is not \code{NA}.
+#' @param ...
+#' Any other graphic parameters (currently ignored).
+#'
+#' @return
+#' Returns a list of \code{\link{enve.RecPlot2.Peak}} objects (see
+#' \code{\link{enve.recplot2.findPeaks}}). If \code{peaks.col=NA} or
+#' \code{layout} doesn't include 4, returns \code{NA}.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @method plot enve.RecPlot2
+#' @export
+plot.enve.RecPlot2 <- function
+(x,
+ layout=matrix(c(5,5,2,1,4,3), nrow=2),
+ panel.fun=list(),
+ widths=c(1,7,2),
+ heights=c(1,2),
+ palette=grey((100:0)/100),
+ underlay.group=TRUE,
+ peaks.col='darkred',
+ use.peaks,
+ id.lim=range(x$id.breaks),
+ pos.lim=range(x$pos.breaks),
+ pos.units=c('Mbp','Kbp','bp'),
+ mar=list('1'=c(5,4,1,1)+.1, '2'=c(ifelse(any(layout==1),1,5),4,4,1)+.1,
+          '3'=c(5,ifelse(any(layout==1),1,4),1,2)+0.1,
+          '4'=c(ifelse(any(layout==1),1,5),ifelse(any(layout==2),1,4),4,2)+0.1,
+          '5'=c(5,3,4,1)+0.1, '6'=c(5,4,4,2)+0.1),
+ pos.splines=0,
+ id.splines=1/2,
+ in.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
+ out.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
+ id.lwd=ifelse(is.null(id.splines) || id.splines>0, 1/2, 2),
+ in.col='darkblue',
+ out.col='lightblue',
+ id.col='black',
+ breaks.col='#AAAAAA40',
+ peaks.opts=list(),
+ ...
+){
+  pos.units	<- match.arg(pos.units);
+  pos.factor	<- ifelse(pos.units=='bp',1,ifelse(pos.units=='Kbp',1e3,1e6));
+  pos.lim	<- pos.lim/pos.factor;
+  lmat <- layout;
+  for(i in 1:6) if(!any(layout==i)) lmat[layout>i] <- lmat[layout>i]-1;
+  layout(lmat, widths=widths, heights=heights);
+  ori.mar <- par('mar');
+  # Essential vars
+  counts	<- x$counts
+  id.ingroup	<- x$id.ingroup
+  id.counts	<- x$id.counts
+  id.breaks	<- x$id.breaks
+  id.mids	<- (id.breaks[-length(id.breaks)]+id.breaks[-1])/2
+  id.binsize	<- id.breaks[-1] - id.breaks[-length(id.breaks)]
+  pos.counts.in  <- x$pos.counts.in
+  pos.counts.out <- x$pos.counts.out
+  pos.breaks   <- x$pos.breaks/pos.factor
+  pos.mids     <- (pos.breaks[-length(pos.breaks)]+pos.breaks[-1])/2
+  pos.binsize  <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])*pos.factor
+  seqdepth.in  <- pos.counts.in/pos.binsize
+  seqdepth.out <- pos.counts.out/pos.binsize
+  seqdepth.lim <- range(c(seqdepth.in[seqdepth.in>0],
+                          seqdepth.out[seqdepth.out>0]))*c(1/2,2)
+  if(underlay.group){
+    in.bg  <- do.call(rgb, c(as.list(col2rgb(in.col)),
+                             list(maxColorValue=256, alpha=62)));
+    out.bg <- do.call(rgb, c(as.list(col2rgb(out.col)[,1]),
+                             list(maxColorValue=256, alpha=52)));
+  }
+  # [1] Counts matrix
+  if(any(layout==1)){
+    par(mar=mar[['1']]);
+    plot(1, t='n', bty='l',
+         xlim=pos.lim, xlab=paste('Position in genome (',pos.units,')',sep=''),
+         xaxs='i', ylim=id.lim,  ylab=x$id.metric, yaxs='i');
+    if(underlay.group){
+      rect(pos.lim[1], id.lim[1], pos.lim[2],
+           min(id.breaks[c(id.ingroup,TRUE)]), col=out.bg, border=NA);
+      rect(pos.lim[1], min(id.breaks[c(id.ingroup,TRUE)]), pos.lim[2],
+           id.lim[2], col=in.bg,  border=NA);
+    }
+    abline(v=x$seq.breaks/pos.factor, col=breaks.col);
+    image(x=pos.breaks, y=id.breaks, z=log10(counts),col=palette,
+          bg=grey(1,0), breaks=seq(-.1,log10(max(counts)),
+                                   length.out=1+length(palette)), add=TRUE);
+    if(exists('1',panel.fun)) panel.fun[['1']]();
+  }
+  # [2] Position histogram
+  if(any(layout==2)){
+    par(mar=mar[['2']]);
+    if(any(layout==1)){
+      xlab=''
+      xaxt='n'
+    }else{
+      xlab=paste('Position in genome (',pos.units,')',sep='')
+      xaxt='s'
+    }
+    plot(1,t='n', bty='l', log='y',
+         xlim=pos.lim, xlab=xlab, xaxt=xaxt, xaxs='i',
+         ylim=seqdepth.lim, yaxs='i', ylab='Sequencing depth (X)');
+    abline(v=x$seq.breaks/pos.factor, col=breaks.col)
+    pos.x <- rep(pos.breaks,each=2)[-c(1,2*length(pos.breaks))]
+    pos.f <- rep(seqdepth.in,each=2)
+    lines(pos.x, rep(seqdepth.out,each=2), lwd=out.lwd, col=out.col);
+    lines(pos.x, pos.f, lwd=in.lwd, col=in.col);
+    if(is.null(pos.splines) || pos.splines > 0){
+      pos.spline <- smooth.spline(pos.x[pos.f>0], log(pos.f[pos.f>0]),
+                                  spar=pos.splines)
+      lines(pos.spline$x, exp(pos.spline$y), lwd=2, col=in.col)
+    }
+    if(any(pos.counts.out==0)) rect(pos.breaks[c(pos.counts.out==0,FALSE)],
+                                    seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
+                                    seqdepth.lim[1]*3/2, col=out.col, border=NA);
+    if(any(pos.counts.in==0))  rect(pos.breaks[c(pos.counts.in==0,FALSE)],
+                                    seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
+                                    seqdepth.lim[1]*3/2, col=in.col,  border=NA);
+    if(exists('2',panel.fun)) panel.fun[['2']]();
+  }
+  # [3] Identity histogram
+  if(any(layout==3)){
+    par(mar=mar[['3']]);
+    if(any(layout==1)){
+      ylab=''
+      yaxt='n'
+    }else{
+      ylab=x$id.metric
+      yaxt='s'
+    }
+    if(sum(id.counts>0) >= 4){
+      id.counts.range <- range(id.counts[id.counts>0])*c(1/2,2);
+      plot(1,t='n', bty='l', log='x',
+           xlim=id.counts.range, xlab='bps per bin', xaxs='i',
+           ylim=id.lim, yaxs='i', ylab=ylab, yaxt=yaxt);
+      if(underlay.group){
+        rect(id.counts.range[1], id.lim[1], id.counts.range[2],
+             min(id.breaks[c(id.ingroup,TRUE)]), col=out.bg, border=NA);
+        rect(id.counts.range[1], min(id.breaks[c(id.ingroup,TRUE)]),
+             id.counts.range[2], id.lim[2], col=in.bg,  border=NA);
+      }
+      id.f <- rep(id.counts,each=2)
+      id.x <- rep(id.breaks,each=2)[-c(1,2*length(id.breaks))]
+      lines(id.f, id.x, lwd=id.lwd, col=id.col);
+      if(is.null(id.splines) || id.splines > 0){
+        id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]),
+                                   spar=id.splines)
+        lines(exp(id.spline$y), id.spline$x, lwd=2, col=id.col)
+      }
+    }else{
+      plot(1,t='n',bty='l',xlab='', xaxt='n', ylab='', yaxt='n')
+      text(1,1,labels='Insufficient data', srt=90)
+    }
+    if(exists('3',panel.fun)) panel.fun[['3']]();
+  }
+  # [4] Populations histogram
+  peaks <- NA;
+  if(any(layout==4)){
+    par(mar=mar[['4']]);
+    if(any(layout==2)){
+      ylab=''
+      yaxt='n'
+    }else{
+      ylab='Sequencing depth (X)'
+      yaxt='s'
+    }
+    h.breaks <- seq(log10(seqdepth.lim[1]*2), log10(seqdepth.lim[2]/2),
+                    length.out=200);
+    h.in <- hist(log10(seqdepth.in), breaks=h.breaks, plot=FALSE);
+    h.out <- hist(log10(seqdepth.out), breaks=h.breaks, plot=FALSE);
+    plot(1, t='n', log='y',
+         xlim=range(c(h.in$counts,h.out$counts,sum(pos.counts.in==0))),
+         xaxs='r', xlab='', xaxt='n', ylim=seqdepth.lim, yaxs='i', ylab=ylab,
+         yaxt=yaxt)
+    y.tmp.in <- c(rep(10^h.in$breaks,each=2),seqdepth.lim[1]*c(1,1,3/2,3/2))
+    y.tmp.out <- c(rep(10^h.out$breaks,each=2),seqdepth.lim[1]*c(1,1,3/2,3/2))
+    lines(c(0,rep(h.out$counts,each=2),0,0,rep(sum(pos.counts.out==0),2),0),
+          y.tmp.out, col=out.col)
+    polygon(c(0,rep(h.in$counts,each=2),0,0,rep(sum(pos.counts.in==0),2),0),
+            y.tmp.in, border=NA, col=in.col)
+    if(!is.na(peaks.col)){
+      o <- peaks.opts; o$x = x;
+      if(missing(use.peaks)){
+        peaks <- do.call(enve.recplot2.findPeaks, o)
+      }else{
+        peaks <- use.peaks
+      }
+      h.mids <- (10^h.breaks[-1] + 10^h.breaks[-length(h.breaks)])/2
+      if(!is.null(peaks) & length(peaks)>0){
+        pf <- h.mids*0;
+        for(i in 1:length(peaks)){
+          cnt <- enve.recplot2.__peakHist(peaks[[i]], h.mids)
+          lines(cnt, h.mids, col='red');
+          pf <- pf+cnt;
+          axis(4, at=peaks[[i]]$seq.depth, letters[i], las=1, hadj=1/2)
+        }
+        lines(pf, h.mids, col='red',lwd=1.5);
+        dpt <- signif(as.numeric(lapply(peaks, function(x) x$seq.depth)),2)
+        frx <- signif(100*as.numeric(
+          lapply(peaks,
+                 function(x) ifelse(length(x$values)==0, x$n.hat,
+                                    length(x$values))/x$n.total)), 2)
+        if(peaks[[1]]$err.res < 0){
+          err <- paste(', LL:', signif(peaks[[1]]$err.res, 3))
+        }else{
+          err <- paste(', err:',
+                       signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2))
+        }
+        legend('topright', bty='n', cex=1/2,
+               legend=paste(letters[1:length(peaks)],'. ',
+                            dpt,'X (', frx, '%', err, ')', sep=''))
+      }
+    }
+    if(exists('4',panel.fun)) panel.fun[['4']]();
+  }
+  # [5] Color scale of the counts matrix (vertical)
+  count.bins <- 10^seq(log10(min(counts[counts>0])), log10(max(counts)),
+                       length.out=1+length(palette))
+  if(any(layout==5)){
+    par(mar=mar[['5']]);
+    plot(1,t='n',log='y',xlim=0:1,xaxt='n',xlab='',xaxs='i',
+         ylim=range(count.bins), yaxs='i', ylab='')
+    rect(0,count.bins[-length(count.bins)],1,count.bins[-1],col=palette,
+         border=NA)
+    if(exists('5',panel.fun)) panel.fun[['5']]();
+  }
+  # [6] Color scale of the coutnts matrix (horizontal)
+  if(any(layout==6)){
+    par(mar=mar[['6']]);
+    plot(1,t='n',log='x',ylim=0:1,yaxt='n',ylab='',yaxs='i',
+         xlim=range(count.bins), xaxs='i',xlab='');
+    rect(count.bins[-length(count.bins)],0,count.bins[-1],1,col=palette,
+         border=NA);
+    if(exists('6',panel.fun)) panel.fun[['6']]();
+  }
+  par(mar=ori.mar);
+  return(peaks);
+}
+#==============> Define core functions
+#' Enveomics: Recruitment Plot (2)
+#'
+#' Produces recruitment plots provided that \code{BlastTab.catsbj.pl} has
+#' been previously executed.
+#'
+#' @param prefix
+#' Path to the prefix of the \code{BlastTab.catsbj.pl} output files. At
+#' least the files .rec and .lim must exist with this prefix.
+#' @param plot
+#' Should the object be plotted?
+#' @param pos.breaks
+#' Breaks in the positions histogram. It can also be a vector of break
+#' points, and values outside the range are ignored. If zero (0), it
+#' uses the sequence breaks as defined in the .lim file, which means
+#' one bin per contig (or gene, if the mapping is agains genes). Ignored
+#' if `pos.breaks.tsv` is passed.
+#' @param pos.breaks.tsv
+#' Path to a list of (absolute) coordinates to use as position breaks.
+#' This tab-delimited file can be produced by \code{GFF.catsbj.pl}, and it
+#' must contain at least one column: coordinates of the break positions of
+#' each position bin. If it has a second column, this is used as the name
+#' of the position bin that ends at the given coordinate (the first row is
+#' ignored). Any additional columns are currently ignored. If \code{NA},
+#' position bins are determined by \code{pos.breaks}.
+#' @param id.breaks
+#' Breaks in the identity histogram. It can also be a vector of break
+#' points, and values outside the range are ignored.
+#' @param id.free.range
+#' Indicates that the range should be freely set from the observed
+#' values. Otherwise, 70-100\% is included in the identity histogram
+#' (default).
+#' @param id.metric
+#' Metric of identity to be used (Y-axis). Corrected identity is only
+#' supported if the original BLAST file included sequence lengths.
+#' @param id.summary
+#' Function summarizing the identity bins. Other recommended options
+#' include: \code{median} to estimate the median instead of total bins, and
+#' \code{function(x) mlv(x,method='parzen')$M} to estimate the mode.
+#' @param id.cutoff
+#' Cutoff of identity metric above which the hits are considered
+#' \code{in-group}. The 95\% identity corresponds to the expectation of
+#' ANI<95\% within species.
+#' @param threads
+#' Number of threads to use.
+#' @param verbose
+#' Indicates if the function should report the advance.
+#' @param ...
+#' Any additional parameters supported by \code{\link{plot.enve.RecPlot2}}.
+#'
+#' @return Returns an object of class \code{\link{enve.RecPlot2}}.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#' @author Kenji Gerhardt [aut]
+#'
+#' @export
+enve.recplot2 <- function(
+  prefix,
+  plot = TRUE,
+  pos.breaks = 1e3,
+  pos.breaks.tsv = NA,
+  id.breaks = 60,
+  id.free.range = FALSE,
+  id.metric = c('identity', 'corrected identity', 'bit score'),
+  id.summary = sum,
+  id.cutoff = 95,
+  threads = 2,
+  verbose = TRUE,
+  ...
+){
+  # Settings
+  id.metric <- match.arg(id.metric);
+  #Read files
+  if (verbose) cat("Reading files.\n")
+  rec <- read.table(paste(prefix, ".rec", sep = ""),
+    sep = "\t", comment.char = "", quote = "");
+  lim <- read.table(paste(prefix, ".lim", sep = ""),
+    sep = "\t", comment.char = "", quote = "", as.is = TRUE);
+  # Build matrix
+  if (verbose) cat("Building counts matrix.\n")
+  if (id.metric == "corrected identity" & ncol(rec) < 6) {
+    stop("Requesting corr. identity, but .rec file doesn't have 6th column")
+  }
+  rec.idcol <- ifelse(id.metric == "identity", 3,
+                      ifelse(id.metric == "corrected identity", 6, 4))
+  pos.names <- as.character(NULL)
+  if (!is.na(pos.breaks.tsv)){
+    tmp <- read.table(pos.breaks.tsv, sep = "\t", header = FALSE, as.is = TRUE)
+    pos.breaks <- as.numeric(tmp[, 1])
+    if (ncol(tmp) > 1) pos.names <- as.character(tmp[-1, 2])
+  } else if (length(pos.breaks) == 1) {
+    if (pos.breaks > 0){
+      pos.breaks <- seq(min(lim[, 2]), max(lim[, 3]), length.out = pos.breaks + 1)
+    } else {
+      pos.breaks <- c(lim[1, 2], lim[, 3])
+      pos.names  <- lim[, 1]
+    }
+  }
+  if (length(id.breaks) == 1) {
+    id.range.v <- rec[, rec.idcol]
+    if (!id.free.range) id.range.v <- c(id.range.v, 70, 100)
+    id.range.v <- range(id.range.v)
+    id.breaks <- seq(id.range.v[1], id.range.v[2], length.out = id.breaks + 1)
+  }
+  # Run in parallel
+  # If they already set threads to 1 manually, there's no point in launching
+  # clusters, it's just slower. Ditto for small files.
+  if (nrow(rec) < 75000 | threads == 1) {
+    # Coerces rec into a form that __counts is happy about
+    rec.l <- list()
+    rec.l[[1]] <- list(rec = rec, verbose = FALSE)
+    # No need to make a temporary variable, there's only one return for sure
+    # and it's not a list because it isn't coming back from an apply
+    counts <- enve.recplot2.__counts(
+      rec.l[[1]], pos.breaks = pos.breaks, id.breaks = id.breaks,
+      rec.idcol = rec.idcol)
+  } else {
+    cl <- makeCluster(threads)
+    rec.l <- list()
+    thl <- ceiling(nrow(rec)/threads)
+    for (i in 0:(threads - 1)) {
+      rec.l[[i + 1]] <- list(
+        rec = rec[(i * thl + 1):min(((i + 1) * thl), nrow(rec)), ],
+        verbose = ifelse(i == 0, verbose, FALSE))
+    }
+    counts.l <- clusterApply(
+      cl, rec.l, enve.recplot2.__counts, pos.breaks = pos.breaks,
+      id.breaks = id.breaks, rec.idcol = rec.idcol)
+    stopCluster(cl) # No spooky ghost clusters
+    counts <- counts.l[[1]]
+    for (i in 2:threads) counts <- counts + counts.l[[i]]
+  }
+  # Estimate 1D histograms
+  if (verbose) cat("Building histograms.\n")
+  id.mids	<- (id.breaks[-length(id.breaks)] + id.breaks[-1])/2;
+  id.ingroup	<- (id.mids > id.cutoff);
+  id.counts	<- apply(counts, 2, id.summary);
+  pos.counts.in   <- apply(counts[, id.ingroup], 1, sum);
+  pos.counts.out  <- apply(counts[, !id.ingroup], 1, sum);
+  # Plot and return
+  recplot <- new('enve.RecPlot2',
+                 counts = counts, id.counts = id.counts,
+                 pos.counts.in = pos.counts.in, pos.counts.out = pos.counts.out,
+                 id.breaks = id.breaks, pos.breaks = pos.breaks,
+                 pos.names = pos.names, seq.breaks = c(lim[1, 2], lim[, 3]),
+                 seq.names = lim[, 1], id.ingroup = id.ingroup,
+                 id.metric = id.metric, call = match.call());
+  if (plot) {
+    if (verbose) cat("Plotting.\n")
+    peaks <- plot(recplot, ...);
+    attr(recplot, "peaks") <- peaks
+  }
+  return(recplot);
+}
+#' Enveomics: Recruitment Plot (2) Peak Finder
+#'
+#' Identifies peaks in the population histogram potentially indicating
+#' sub-population mixtures.
+#'
+#' @param x
+#' An \code{\link{enve.RecPlot2}} object.
+#' @param method
+#' Peak-finder method. This should be one of:
+#' \itemize{
+#'    \item \strong{emauto}
+#'    (Expectation-Maximization with auto-selection of components)
+#'    \item \strong{em}
+#'    (Expectation-Maximization)
+#'    \item \strong{mower}
+#'    (Custom distribution-mowing method)
+#' }
+#' @param ...
+#' Any additional parameters supported by
+#' \code{\link{enve.recplot2.findPeaks}}.
+#'
+#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' export
+enve.recplot2.findPeaks <- function(
+  x,
+  method="emauto",
+  ...
+){
+  if(method == "emauto"){
+    peaks <- enve.recplot2.findPeaks.emauto(x, ...)
+  }else if(method == "em"){
+    peaks <- enve.recplot2.findPeaks.em(x, ...)
+  }else if(method == "mower"){
+    peaks <- enve.recplot2.findPeaks.mower(x, ...)
+  }else{
+    stop("Invalid peak-finder method ", method)
+  }
+  return(peaks)
+}
+#' Enveomics: Recruitment Plot (2) Emauto Peak Finder
+#'
+#' Identifies peaks in the population histogram using a Gaussian Mixture
+#' Model Expectation Maximization (GMM-EM) method with number of components
+#' automatically detected.
+#'
+#' @param x
+#' An \code{\link{enve.RecPlot2}} object.
+#' @param components
+#' A vector of number of components to evaluate.
+#' @param criterion
+#' Criterion to use for components selection. Must be one of:
+#' \code{aic} (Akaike Information Criterion), \code{bic} or \code{sbc}
+#' (Bayesian Information Criterion or Schwarz Criterion).
+#' @param merge.tol
+#' When attempting to merge peaks with very similar sequencing depth, use
+#' this number of significant digits (in log-scale).
+#' @param verbose
+#' Display (mostly debugging) information.
+#' @param ...
+#' Any additional parameters supported by
+#' \code{\link{enve.recplot2.findPeaks.em}}.
+#'
+#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.emauto <- function(
+  x,
+  components = seq(1, 5),
+  criterion = 'aic',
+  merge.tol = 2L,
+  verbose = FALSE,
+  ...
+){
+  best <- list(crit=0, pstore=list())
+  if(criterion == 'aic'){
+    do_crit <- function(ll, k, n) 2*k - 2*ll
+  }else if(criterion %in% c('bic', 'sbc')){
+    do_crit <- function(ll, k, n) log(n)*k - 2*ll
+  }else{
+    stop('Invalid criterion ', criterion)
+  }
+  for(comp in components){
+    if(verbose) cat('Testing:',comp,'\n')
+    best <- enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best,
+                                                 verbose, ...)
+  }
+  if(length(best[['peaks']])==0) return(list())
+  seqdepths.r <- signif(log(sapply(best[['peaks']],
+                                   function(x) x$seq.depth)), merge.tol)
+  distinct <- length(unique(seqdepths.r))
+  if(distinct < length(best[['peaks']])){
+    if(verbose) cat('Attempting merge to', distinct, 'components\n')
+    init <- apply(sapply(best[['peaks']],
+                         function(x) c(x$param.hat, alpha=x$n.hat/x$n.total)), 1, as.numeric)
+    init <- init[!duplicated(seqdepths.r),]
+    init <- list(mu=init[,'mean'], sd=init[,'sd'],
+                 alpha=init[,'alpha']/sum(init[,'alpha']))
+    best <- enve.recplot2.findPeaks.__emauto_one(x, distinct, do_crit, best,
+                                                 verbose, ...)
+  }
+  return(best[['peaks']])
+}
+#' Enveomics: Recruitment Plot (2) Em Peak Finder
+#'
+#' Identifies peaks in the population histogram using a Gaussian Mixture
+#' Model Expectation Maximization (GMM-EM) method.
+#'
+#' @param x
+#' An \code{\link{enve.RecPlot2}} object.
+#' @param max.iter
+#' Maximum number of EM iterations.
+#' @param ll.diff.res
+#' Maximum Log-Likelihood difference to be considered as convergent.
+#' @param components
+#' Number of distributions assumed in the mixture.
+#' @param rm.top
+#' Top-values to remove before finding peaks, as a quantile probability.
+#' This step is useful to remove highly conserved regions, but can be
+#' turned off by setting \code{rm.top=0}. The quantile is determined
+#' \strong{after} removing zero-coverage windows.
+#' @param verbose
+#' Display (mostly debugging) information.
+#' @param init
+#' Initialization parameters. By default, these are derived from k-means
+#' clustering. A named list with vectors for \code{mu}, \code{sd}, and
+#' \code{alpha}, each of length \code{components}.
+#' @param log
+#' Logical value indicating if the estimations should be performed in
+#' natural logarithm units. Do not change unless you know what you're
+#' doing.
+#'
+#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.em <- function(
+  x,
+  max.iter = 1000,
+  ll.diff.res = 1e-8,
+  components = 2,
+  rm.top = 0.05,
+  verbose = FALSE,
+  init,
+  log = TRUE
+){
+  # Essential vars
+  pos.binsize  <- x$pos.breaks[-1] - x$pos.breaks[-length(x$pos.breaks)]
+  lsd1  <- (x$pos.counts.in/pos.binsize)[ x$pos.counts.in > 0 ]
+  lsd1 <- lsd1[ lsd1 < quantile(lsd1, 1-rm.top, names = FALSE) ]
+  if(log) lsd1 <- log(lsd1)
+  # 1. Initialize
+  if(missing(init)){
+    km.clust <- kmeans(lsd1, components)$cluster
+    init <- list(
+      mu = tapply(lsd1, km.clust, mean),
+      sd = tapply(lsd1, km.clust, sd),
+      alpha = table(km.clust) / length(km.clust)
+    )
+  }
+  m.step <- init
+  ll <- c()
+  cur.ll <- -Inf
+  for(i in 1:max.iter){
+    # 2/3. EM
+    e.step <- enve.recplot2.findPeaks.__em_e(lsd1, m.step)
+    m.step <- enve.recplot2.findPeaks.__em_m(lsd1, e.step[['posterior']])
+    # 4. Convergence
+    ll <- c(ll, e.step[["ll"]])
+    ll.diff <- abs(cur.ll - e.step[["ll"]])
+    cur.ll <- e.step[["ll"]]
+    if(verbose) cat(i, '\t| LL =', cur.ll, '\t| LL.diff =', ll.diff, '\n')
+    if(is.na(ll.diff) || ll.diff == Inf) break
+    if(ll.diff <= ll.diff.res) break
+  }
+  # Return
+  peaks <- list()
+  for(i in 1:components){
+    n.hat <- m.step[['alpha']][i]*length(lsd1)
+    peaks[[i]] <- new('enve.RecPlot2.Peak', dist='norm', values=as.numeric(),
+                      values.res=0, mode=m.step[['mu']][i],
+                      param.hat=list(sd=m.step[['sd']][i], mean=m.step[['mu']][i]),
+                      n.hat=n.hat, n.total=length(lsd1), err.res=cur.ll,
+                      merge.logdist=as.numeric(), log=log,
+                      seq.depth=ifelse(log, exp(m.step[['mu']][i]), m.step[['mu']][i]))
+  }
+  return(peaks)
+}
+#' Enveomics: Recruitment Plot (2) Mowing Peak Finder
+#'
+#' Identifies peaks in the population histogram potentially indicating
+#' sub-population mixtures, using a custom distribution-mowing method.
+#'
+#' @param x
+#' An \code{\link{enve.RecPlot2}} object.
+#' @param min.points
+#' Minimum number of points in the quantile-estimation-range
+#' \code{(quant.est)} to estimate a peak.
+#' @param quant.est
+#' Range of quantiles to be used in the estimation of a peak's
+#' parameters.
+#' @param mlv.opts
+#' Ignored. For backwards compatibility.
+#' @param fitdist.opts.sn
+#' Options passed to \code{fitdist} to estimate the standard deviation if
+#' \code{with.skewness=TRUE}. Note that the \code{start} parameter will be
+#' ammended with \code{xi=estimated} mode for each peak.
+#' @param fitdist.opts.norm
+#' Options passed to \code{fitdist} to estimate the standard deviation if
+#' \code{with.skewness=FALSE}. Note that the \code{start} parameter will be
+#' ammended with \code{mean=estimated} mode for each peak.
+#' @param rm.top
+#' Top-values to remove before finding peaks, as a quantile probability.
+#' This step is useful to remove highly conserved regions, but can be
+#' turned off by setting \code{rm.top=0}. The quantile is determined
+#' \strong{after} removing zero-coverage windows.
+#' @param with.skewness
+#' Allow skewness correction of the peaks. Typically, the
+#' sequencing-depth distribution for a single peak is left-skewed, due
+#' partly (but not exclusively) to fragmentation and mapping sensitivity.
+#' See \emph{Lindner et al 2013, Bioinformatics 29(10):1260-7} for an
+#' alternative solution for the first problem (fragmentation) called
+#' "tail distribution".
+#' @param optim.rounds
+#' Maximum rounds of peak optimization.
+#' @param optim.epsilon
+#' Trace change at which optimization stops (unless \code{optim.rounds} is
+#' reached first). The trace change is estimated as the sum of square
+#' differences between parameters in one round and those from two rounds
+#' earlier (to avoid infinite loops from approximation).
+#' @param merge.logdist
+#' Maximum value of \code{|log-ratio|} between centrality parameters in peaks
+#' to attempt merging. The default of ~0.22 corresponds to a maximum
+#' difference of 25\%.
+#' @param verbose
+#' Display (mostly debugging) information.
+#' @param log
+#' Logical value indicating if the estimations should be performed in
+#' natural logarithm units. Do not change unless you know what you're
+#' doing.
+#'
+#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.mower <- function(
+  x,
+  min.points=10,
+  quant.est=c(0.002, 0.998),
+  mlv.opts=list(method='parzen'),
+  fitdist.opts.sn=list(distr='sn', method='qme', probs=c(0.1,0.5,0.8),
+                       start=list(omega=1, alpha=-1), lower=c(0, -Inf, -Inf)),
+  fitdist.opts.norm=list(distr='norm', method='qme', probs=c(0.4,0.6),
+                         start=list(sd=1), lower=c(0, -Inf)),
+  rm.top=0.05,
+  with.skewness=TRUE,
+  optim.rounds=200,
+  optim.epsilon=1e-4,
+  merge.logdist=log(1.75),
+  verbose=FALSE,
+  log=TRUE
+){
+  # Essential vars
+  pos.binsize	<- x$pos.breaks[-1] - x$pos.breaks[-length(x$pos.breaks)];
+  seqdepth.in	<- x$pos.counts.in/pos.binsize;
+  lsd1 <- seqdepth.in[seqdepth.in>0];
+  lsd1 <- lsd1[ lsd1 < quantile(lsd1, 1-rm.top, names=FALSE) ]
+  if(log) lsd1 <- log(lsd1)
+  if(with.skewness){
+    fitdist.opts <- fitdist.opts.sn
+  }else{
+    fitdist.opts <- fitdist.opts.norm
+  }
+  peaks.opts <- list(lsd1=lsd1, min.points=min.points, quant.est=quant.est,
+                     mlv.opts=mlv.opts, fitdist.opts=fitdist.opts, with.skewness=with.skewness,
+                     optim.rounds=optim.rounds, optim.epsilon=optim.epsilon, verbose=verbose,
+                     n.total=length(lsd1), merge.logdist=merge.logdist, log=log)
+  # Find seed peaks
+  if(verbose) cat('Mowing peaks for n =',length(lsd1),'\n')
+  peaks <- enve.recplot2.findPeaks.__mower(peaks.opts);
+  # Merge overlapping peaks
+  if(verbose) cat('Trying to merge',length(peaks),'peaks\n')
+  merged <- (length(peaks)>1)
+  while(merged){
+    merged <- FALSE
+    ignore <- c()
+    peaks2 <- list();
+    for(i in 1:length(peaks)){
+      if(i %in% ignore) next
+      p <- peaks[[ i ]]
+      j <- enve.recplot2.__whichClosestPeak(p, peaks)
+      p2 <- peaks[[ j ]]
+      dst.a <- p$param.hat[[ length(p$param.hat) ]]
+      dst.b <- p2$param.hat[[ length(p2$param.hat) ]]
+      if( abs(log(dst.a/dst.b)) < merge.logdist ){
+        if(verbose) cat('==> Attempting a merge at',
+                        p$param.hat[[ length(p$param.hat) ]],'&',
+                        p2$param.hat[[ length(p2$param.hat) ]],'X\n');
+        peaks.opts$lsd1 <- c(p$values, p2$values)
+        p.new <- enve.recplot2.findPeaks.__mower(peaks.opts)
+        if(length(p.new)==1){
+          peaks2[[ length(peaks2)+1 ]] <- p.new[[ 1 ]]
+          ignore <- c(ignore, j)
+          merged <- TRUE
+        }
+      }
+      if(!merged) peaks2[[ length(peaks2)+1 ]] <- p
+    }
+    peaks <- peaks2
+    if(length(peaks)==1) break
+  }
+  if(verbose) cat('Found',length(peaks),'peak(s)\n')
+  return(peaks);
+}
+#==============> Define utils
+#' Enveomics: Recruitment Plot (2) Core Peak Finder
+#'
+#' Finds the peak in a list of peaks that is most likely to represent the
+#' "core genome" of a population.
+#'
+#' @param x \code{list} of \code{\link{enve.RecPlot2.Peak}} objects.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.corePeak <- function
+(x
+){
+  # Find the peak with maximum depth (centrality)
+  maxPeak <- x[[
+    which.max(as.numeric(lapply(x,
+                                function(y) y$param.hat[[ length(y$param.hat) ]])))
+    ]]
+  # If a "larger" peak (a peak explaining more bins of the genome) is within
+  # the default "merge.logdist" distance, take that one instead.
+  corePeak <- maxPeak
+  for(p in x){
+    p.len <- ifelse(length(p$values)==0, p$n.hat, length(p$values))
+    corePeak.len <- ifelse(
+      length(corePeak$values)==0, corePeak$n.hat, length(corePeak$values))
+    sz.d <- log(p.len/corePeak.len)
+    if(is.nan(sz.d) || sz.d < 0) next
+    sq.d.a <- as.numeric(tail(p$param.hat, n=1))
+    sq.d.b <- as.numeric(tail(maxPeak$param.hat, n=1))
+    if(p$log) sq.d.a <- exp(sq.d.a)
+    if(corePeak$log) sq.d.b <- exp(sq.d.b)
+    if(abs(log(sq.d.a/sq.d.b)) < log(1.75)+sz.d/5) corePeak <- p
+  }
+  return(corePeak)
+}
+#' Enveomics: Recruitment Plot (2) Change Cutoff
+#'
+#' Change the intra-species cutoff of an existing recruitment plot.
+#'
+#' @param rp
+#' \code{\link{enve.RecPlot2}} object.
+#' @param new.cutoff
+#' New cutoff to use.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.changeCutoff <- function
+(rp,
+ new.cutoff=98
+){
+  # Re-calculate vectors
+  id.mids	<- (rp$id.breaks[-length(rp$id.breaks)]+rp$id.breaks[-1])/2
+  id.ingroup	<- (id.mids > new.cutoff)
+  pos.counts.in  <- apply(rp$counts[,id.ingroup], 1, sum)
+  pos.counts.out <- apply(rp$counts[,!id.ingroup], 1, sum)
+  # Update object
+  attr(rp, "id.ingroup")     <- id.ingroup
+  attr(rp, "pos.counts.in")  <- pos.counts.in
+  attr(rp, "pos.counts.out") <- pos.counts.out
+  attr(rp, "call")           <- match.call()
+  return(rp)
+}
+#' Enveomics: Recruitment Plot (2) Window Depth Threshold
+#'
+#' Identifies the threshold below which windows should be identified as
+#' variable or absent.
+#'
+#' @param rp
+#' Recruitment plot, an \code{\link{enve.RecPlot2}} object.
+#' @param peak
+#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
+#' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
+#' used (see \code{\link{enve.recplot2.corePeak}}).
+#' @param lower.tail
+#' If \code{FALSE}, it returns windows significantly above the peak in
+#' sequencing depth.
+#' @param significance
+#' Significance threshold (alpha) to select windows.
+#'
+#' @return
+#' Returns a float. The units are depth if the peaks were estimated in
+#' linear scale, or log-depth otherwise (\code{peak$log}).
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.windowDepthThreshold <- function
+(rp,
+ peak,
+ lower.tail=TRUE,
+ significance=0.05
+){
+  if(is.list(peak)) peak <- enve.recplot2.corePeak(peak)
+  par <- peak$param.hat
+  par[["p"]] <- ifelse(lower.tail, significance, 1-significance)
+  thr <- do.call(ifelse(length(par)==4, qsn, qnorm), par)
+  if(peak$log) thr <- exp(thr)
+  return(thr)
+}
+#' Enveomics: Recruitment Plot (2) Extract Windows
+#'
+#' Extract windows significantly below (or above) the peak in sequencing
+#' depth.
+#'
+#' @param rp
+#' Recruitment plot, a \code{\link{enve.RecPlot2}} object.
+#' @param peak
+#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
+#' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
+#' used (see \code{\link{enve.recplot2.corePeak}}).
+#' @param lower.tail
+#' If \code{FALSE}, it returns windows significantly above the peak in
+#' sequencing depth.
+#' @param significance
+#' Significance threshold (alpha) to select windows.
+#' @param seq.names
+#' Returns subject sequence names instead of a vector of Booleans. If
+#' the recruitment plot was generated with named position bins (e.g, using
+#' \code{pos.breaks=0} or a two-column \code{pos.breaks.tsv}), it returns a
+#' vector of characters (the sequence identifiers), otherwise it returns a
+#' data.frame with a name column and two columns of coordinates.
+#'
+#' @return
+#' Returns a vector of logicals if \code{seq.names = FALSE}.
+#' If \code{seq.names = TRUE}, it returns a data.frame with five columns:
+#' \code{name.from}, \code{name.to}, \code{pos.from}, \code{pos.to}, and
+#' \code{seq.name} (see \code{\link{enve.recplot2.coordinates}}).
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.extractWindows <- function
+(rp,
+ peak,
+ lower.tail = TRUE,
+ significance = 0.05,
+ seq.names = FALSE
+){
+  # Determine the threshold
+  thr <- enve.recplot2.windowDepthThreshold(rp, peak, lower.tail, significance)
+  # Select windows past the threshold
+  seqdepth.in <- enve.recplot2.seqdepth(rp)
+  if(lower.tail){
+    sel <- seqdepth.in < thr
+  }else{
+    sel <- seqdepth.in > thr
+  }
+  # seq.names = FALSE
+  if(!seq.names) return(sel)
+  # seq.names = TRUE
+  return(enve.recplot2.coordinates(rp, sel))
+}
+#' Enveomics: Recruitment Plot (2) Compare Identities
+#'
+#' Compare the distribution of identities between two
+#' \code{\link{enve.RecPlot2}} objects.
+#'
+#' @param x
+#' First \code{\link{enve.RecPlot2}} object.
+#' @param y
+#' Second \code{\link{enve.RecPlot2}} object.
+#' @param method
+#' Distance method to use. This should be (an unambiguous abbreviation of)
+#' one of:
+#' \itemize{
+#'    \item{"hellinger" (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
+#'    \item{"bhattacharyya" (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
+#'    \item{"kl" or "kullback-leibler" (\emph{Kullback & Leibler, 1951,
+#'    doi:10.1214/aoms/1177729694}), or}
+#'    \item{"euclidean"}
+#' }
+#' @param smooth.par
+#' Smoothing parameter for cubic spline smoothing. Use 0 for no smoothing.
+#' Use \code{NULL} to automatically determine this value using leave-one-out
+#' cross-validation (see \code{smooth.spline} parameter \code{spar}).
+#' @param pseudocounts
+#' Smoothing parameter for Laplace smoothing. Use 0 for no smoothing, or
+#' 1 for add-one smoothing.
+#' @param max.deviation
+#' Maximum mean deviation between identity breaks tolerated (as percent
+#' identity). Difference in number of \code{id.breaks} is never tolerated.
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.compareIdentities <- function
+(x,
+ y,
+ method="hellinger",
+ smooth.par=NULL,
+ pseudocounts=0,
+ max.deviation=0.75
+){
+  METHODS <- c("hellinger","bhattacharyya","kullback-leibler","kl","euclidean")
+  i.meth <- pmatch(method, METHODS)
+  if (is.na(i.meth)) stop("Invalid distance ", method)
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  if(!inherits(y, "enve.RecPlot2"))
+    stop("'y' must inherit from class `enve.RecPlot2`")
+  if(length(x$id.breaks) != length(y$id.breaks))
+    stop("'x' and 'y' must have the same number of `id.breaks`")
+  dev <- mean(abs(x$id.breaks - y$id.breaks))
+  if(dev > max.deviation)
+    stop("'x' and 'y' must have similar `id.breaks`; exceeding max.deviation: ",
+         dev)
+  x.cnt <- x$id.counts
+  y.cnt <- y$id.counts
+  if(is.null(smooth.par) || smooth.par > 0){
+    x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)])/2
+    y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)])/2
+    p.spline <- smooth.spline(x.mids, x.cnt, spar=smooth.par)
+    q.spline <- smooth.spline(y.mids, y.cnt, spar=smooth.par)
+    x.cnt <- pmax(p.spline$y, 0)
+    y.cnt <- pmax(q.spline$y, 0)
+  }
+  a <- as.numeric(pseudocounts)
+  p <- (x.cnt + a) / sum(x.cnt + a)
+  q <- (y.cnt + a) / sum(y.cnt + a)
+  d <- NA
+  if(i.meth %in% c(1L, 2L)){
+    d <- sqrt(sum((sqrt(p) - sqrt(q))**2))/sqrt(2)
+    if(i.meth==2L) d <- 1 - d**2
+  }else if(i.meth %in% c(3L, 4L)){
+    sel <- p>0
+    if(any(q[sel]==0))
+      stop("Undefined distance without absolute continuity, use pseudocounts")
+    d <- -sum(p[sel]*log(q[sel]/p[sel]))
+  }else if(i.meth == 5L){
+    d <- sqrt(sum((q-p)**2))
+  }
+  return(d)
+}
+#' Enveomics: Recruitment Plot (2) Coordinates
+#'
+#' Returns the sequence name and coordinates of the requested position bins.
+#'
+#' @param x
+#' \code{\link{enve.RecPlot2}} object.
+#' @param bins
+#' Vector of selected bins to return. It can be a vector of logical values
+#' with the same length as \code{x$pos.breaks-1} or a vector of integers. If
+#' missing, returns the coordinates of all windows.
+#'
+#' @return
+#' Returns a data.frame with five columns: \code{name.from} (character),
+#' \code{pos.from} (numeric), \code{name.to} (character), \code{pos.to}
+#' (numeric), and \code{seq.name} (character).
+#' The first two correspond to sequence and position of the start point of the
+#' bin. The next two correspond to the sequence and position of the end point of
+#' the bin. The last one indicates the name of the sequence (if defined).
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.coordinates <- function
+(x,
+ bins
+){
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  if(missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
+  if(!is.vector(bins)) stop("'bins' must be a vector")
+  if(inherits(bins, "logical")) bins <- which(bins)
+  y <- data.frame(stringsAsFactors = FALSE, row.names = bins)
+  for(i in 1:length(bins)){
+    j <- bins[i]
+    # Concatenated coordinates
+    cc <- x$pos.breaks[c(j, j+1)]
+    # Find the corresponding `seq.breaks`
+    sb.from <- which(
+      cc[1] >= x$seq.breaks[-length(x$seq.breaks)] &
+        cc[1] <  x$seq.breaks[-1])
+    sb.to   <- which(
+      cc[2] >  x$seq.breaks[-length(x$seq.breaks)] &
+        cc[2] <= x$seq.breaks[-1])
+    # Translate coordinates
+    if(length(sb.from)==1 & length(sb.to)==1){
+      y[i, 'name.from'] <- x$seq.names[sb.from]
+      y[i, 'pos.from']  <- floor(x$seq.breaks[sb.from] + cc[1] - 1)
+      y[i, 'name.to']   <- x$seq.names[sb.to]
+      y[i, 'pos.to']    <- ceiling(x$seq.breaks[sb.to] + cc[2] - 1)
+      y[i, 'seq.name']  <- x$pos.names[i]
+    }
+  }
+  return(y)
+}
+#' Enveomics: Recruitment Plot (2) Sequencing Depth
+#'
+#' Calculate the sequencing depth of the given window(s).
+#'
+#' @param x
+#' \code{\link{enve.RecPlot2}} object.
+#' @param sel
+#' Window(s) for which the sequencing depth is to be calculated. If not
+#' passed, it returns the sequencing depth of all windows.
+#' @param low.identity
+#' A logical indicating if the sequencing depth is to be estimated only
+#' with low-identity matches. By default, only high-identity matches are
+#' used.
+#'
+#' @return
+#' Returns a numeric vector of sequencing depths (in bp/bp).
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.seqdepth <- function
+(x,
+ sel,
+ low.identity=FALSE
+){
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  if(low.identity){
+    pos.cnts.in <- x$pos.counts.out
+  }else{
+    pos.cnts.in <- x$pos.counts.in
+  }
+  pos.breaks  <- x$pos.breaks
+  pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
+  seqdepth.in <- pos.cnts.in/pos.binsize
+  if(missing(sel)) return(seqdepth.in)
+  return(seqdepth.in[sel])
+}
+#' Enveomics: Recruitment Plot (2) ANI Estimate
+#'
+#' Estimate the Average Nucleotide Identity from reads (ANIr) from a
+#' recruitment plot.
+#'
+#' @param x
+#' \code{\link{enve.RecPlot2}} object.
+#' @param range
+#' Range of identities to be considered. By default, the full range
+#' is used (note that the upper boundary is \code{Inf} and not 100 because
+#' recruitment plots can also be built with bit-scores). To use only
+#' intra-population matches (with identities), use c(95,100). To use only
+#' inter-population values, use c(0,95).
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.ANIr <- function
+(x,
+ range=c(0,Inf)
+){
+  if(!inherits(x, "enve.RecPlot2"))
+    stop("'x' must inherit from class `enve.RecPlot2`")
+  id.b <- x$id.breaks
+  id <- (id.b[-1]+id.b[-length(id.b)])/2
+  cnt <- x$id.counts
+  cnt[id < range[1]] <- 0
+  cnt[id > range[2]] <- 0
+  return(sum(id*cnt/sum(cnt)))
+}
+#==============> Define internal functions
+#' Enveomics: Recruitment Plot (2) Internal Ancillary Function
+#'
+#' Internal ancillary function (see \code{\link{enve.recplot2}}).
+#'
+#' @param x \code{\link{enve.RecPlot2}} object
+#' @param pos.breaks Position breaks
+#' @param id.breaks Identity breaks
+#' @param rec.idcol Identity column to use
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#' @author Kenji Gerhardt [aut]
+#'
+#' @export
+enve.recplot2.__counts <- function
+(x, pos.breaks, id.breaks, rec.idcol) {
+  rec2 <- x$rec
+  verbose <- x$verbose
+  # get counts of how many occurrences of each genome pos.bin there are per read
+  x.bins <- mapply(
+    function(start, end) {
+      list(rle(findInterval(start:end, pos.breaks, left.open = T)))
+    }, rec2[, 1], rec2[, 2])
+  # find the single y bin for each row, replicates it at the correct places to
+  # the number of distinct bins found in its row
+  y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = T),
+                times = unlist(lapply(x.bins, function(a) length(a$lengths))))
+  # x.bins_counts is the number of occurrences of each bin a row contains,
+  # per row, then unlisted
+  x.bins_counts <- unlist(lapply(x.bins, function(a) a$lengths))
+  # these are the pos. in. genome bins that each count in x.bins_counts falls into
+  x.bins <- unlist(lapply(x.bins, function(a) a$values))
+  # much more efficient counts implementation in R using lists instead of a matrix:
+  counts <- lapply(
+    1:(length(pos.breaks) - 1),
+    function(col_len) rep(0, length(id.breaks) - 1))
+  # accesses the correct list in counts by x.bin, then
+  # accesses the position in that row by y.bins and adds the new count
+  for(i in 1:length(x.bins)){
+    counts[[x.bins[i]]][y.bins[i]] <- counts[[x.bins[i]]][y.bins[i]] + x.bins_counts[i]
+  }
+  counts <- do.call(rbind, counts)
+  return(counts)
+}
+#' Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function
+#'
+#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
+#'
+#' @param x \code{\link{enve.RecPlot2}} object
+#' @param comp Components
+#' @param do_crit Function estimating the criterion
+#' @param best Best solution thus far
+#' @param verbose If verbose
+#' @param ... Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.__emauto_one <- function
+(x, comp, do_crit, best, verbose, ...){
+  peaks <- enve.recplot2.findPeaks.em(x=x, components=comp, ...)
+  if(length(peaks)==0) return(best)
+  k <- comp*3 - 1 # mean & sd for each component, and n-1 free alpha parameters
+  crit <- do_crit(peaks[[1]]$err.res, k, peaks[[1]]$n.total)
+  if(verbose) cat(comp,'\t| LL =', peaks[[1]]$err.res, '\t| Estimate =', crit,
+                  ifelse(crit > best[['crit']], '*', ''), '\n')
+  if(crit > best[['crit']]){
+    best[['crit']] <- crit
+    best[['peaks']] <- peaks
+  }
+  best[['pstore']][[comp]] <- peaks
+  return(best)
+}
+#' Enveomics: Recruitment Plot (2) EM Peak Finder - Internal Ancillary Function Expectation
+#'
+#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.em}}).
+#'
+#' @param x Vector of log-transformed sequencing depths
+#' @param theta Parameters list
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.__em_e <- function
+(x, theta){
+  components <- length(theta[['mu']])
+  product <- do.call(cbind,
+                     lapply(1:components,
+                            function(i) dnorm(x, theta[['mu']][i],
+                                              theta[['sd']][i])*theta[['alpha']][i]))
+  sum.of.components <- rowSums(product)
+  posterior <- product / sum.of.components
+  for(i in which(sum.of.components == Inf)) {
+    cat(i,'/',nrow(product), ':', product[i,], '\n')
+  }
+  return(list(ll=sum(log(sum.of.components)), posterior=posterior))
+}
+#' Enveomics: Recruitment Plot (2) Em Peak Finder - Internal Ancillary Function Maximization
+#'
+#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.em}}).
+#'
+#' @param x Vector of log-transformed sequencing depths
+#' @param posterior Posterior probability
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.__em_m <- function
+(x, posterior){
+  components <- ncol(posterior)
+  n <- colSums(posterior)
+  mu <- colSums(posterior * x) / n
+  sd <- sqrt( colSums(
+    posterior * (matrix(rep(x,components), ncol=components) - mu)^2) / n )
+  alpha <- n/length(x)
+  return(list(mu=mu, sd=sd, alpha=alpha))
+}
+#' Enveomics: Recruitment Plot (2) Peak S4 Class - Internal Ancillary Function
+#'
+#' Internal ancillary function (see \code{\link{enve.RecPlot2.Peak}}).
+#'
+#' @param x \code{\link{enve.RecPlot2.Peak}} object
+#' @param mids Midpoints
+#' @param counts Counts
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.__peakHist <- function
+(x, mids, counts=TRUE){
+  d.o <- x$param.hat
+  if(length(x$log)==0) x$log <- FALSE
+  if(x$log){
+    d.o$x <- log(mids)
+  }else{
+    d.o$x <- mids
+  }
+  prob  <- do.call(paste('d', x$dist, sep=''), d.o)
+  if(!counts) return(prob)
+  if(length(x$values)>0) return(prob*length(x$values)/sum(prob))
+  return(prob*x$n.hat/sum(prob))
+}
+#' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1
+#'
+#' Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
+#'
+#' @param lsd1 Vector of log-transformed sequencing depths
+#' @param min.points Minimum number of points
+#' @param quant.est Quantile estimate
+#' @param mlv.opts List of options for \code{mlv}
+#' @param fitdist.opts List of options for \code{fitdist}
+#' @param with.skewness If skewed-normal should be used
+#' @param optim.rounds Maximum number of optimization rounds
+#' @param optim.epsilon Minimum difference considered negligible
+#' @param n.total Global number of windows
+#' @param merge.logdist Attempted \code{merge.logdist} parameter
+#' @param verbose If verbose
+#' @param log If log-transformed depths
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.__mow_one <- function
+(lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
+ optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
+){
+  dist	<- ifelse(with.skewness, 'sn', 'norm');
+  # Find peak
+  o <- mlv.opts; o$x = lsd1;
+  mode1 <- median(lsd1); # mode1 <- do.call(mlv, o)$M;
+  if(verbose) cat('Anchoring at mode =',mode1,'\n')
+  param.hat <- fitdist.opts$start; last.hat <- param.hat;
+  lim <- NA;
+  if(with.skewness){ param.hat$xi <- mode1 }else{ param.hat$mean <- mode1 }
+  # Refine peak parameters
+  for(round in 1:optim.rounds){
+    param.hat[[ 1 ]] <- param.hat[[ 1 ]]/diff(quant.est)# <- expand dispersion
+    lim.o <- param.hat
+    lim.o$p <- quant.est; lim <- do.call(paste('q',dist,sep=''), lim.o)
+    lsd1.pop <- lsd1[(lsd1>lim[1]) & (lsd1<lim[2])];
+    if(verbose) cat(' Round', round, 'with n =',length(lsd1.pop),
+                    'and params =',as.numeric(param.hat),' \r')
+    if(length(lsd1.pop) < min.points) break;
+    o <- fitdist.opts; o$data = lsd1.pop; o$start = param.hat;
+    last.last.hat <- last.hat
+    last.hat <- param.hat
+    param.hat <- as.list(do.call(fitdist, o)$estimate);
+    if(any(is.na(param.hat))){
+      if(round>1) param.hat <- last.hat;
+      break;
+    }
+    if(round > 1){
+      epsilon1 <- sum((as.numeric(last.hat)-as.numeric(param.hat))^2)
+      if(epsilon1 < optim.epsilon) break;
+      if(round > 2){
+        epsilon2 <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
+        if(epsilon2 < optim.epsilon) break;
+      }
+    }
+  }
+  if(verbose) cat('\n')
+  if(is.na(param.hat[1]) | is.na(lim[1])) return(NULL);
+  # Mow distribution
+  lsd2 <- c();
+  lsd.pop <- c();
+  n.hat <- length(lsd1.pop)/diff(quant.est)
+  peak <- new('enve.RecPlot2.Peak', dist=dist, values=as.numeric(), mode=mode1,
+              param.hat=param.hat, n.hat=n.hat, n.total=n.total,
+              merge.logdist=merge.logdist, log=log)
+  peak.breaks <- seq(min(lsd1), max(lsd1), length=20)
+  peak.cnt <- enve.recplot2.__peakHist(peak,
+                                       (peak.breaks[-length(peak.breaks)]+peak.breaks[-1])/2)
+  for(i in 2:length(peak.breaks)){
+    values <- lsd1[ (lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i]) ]
+    n.exp <- peak.cnt[i-1]
+    if(is.na(n.exp) | n.exp==0) n.exp <- 0.1
+    if(length(values)==0) next
+    in.peak <- runif(length(values)) <= n.exp/length(values)
+    lsd2 <- c(lsd2, values[!in.peak])
+    lsd.pop <- c(lsd.pop, values[in.peak])
+  }
+  if(length(lsd.pop) < min.points) return(NULL)
+  # Return peak
+  attr(peak, 'values') <- lsd.pop
+  attr(peak, 'values.res') <- lsd2
+  attr(peak, 'err.res') <- 1-(cor(hist(lsd.pop, breaks=peak.breaks,
+                                       plot=FALSE)$counts, hist(lsd1, breaks=peak.breaks,
+                                                                plot=FALSE)$counts)+1)/2
+  mu <- tail(param.hat, n=1)
+  attr(peak, 'seq.depth') <- ifelse(log, exp(mu), mu)
+  if(verbose) cat(' Extracted peak with n =',length(lsd.pop),
+                  'with expected n =',n.hat,'\n')
+  return(peak)
+}
+#' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 2
+#'
+#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
+#'
+#' @param peaks.opts List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.findPeaks.__mower <- function
+(peaks.opts){
+  peaks <- list()
+  while(length(peaks.opts$lsd1) > peaks.opts$min.points){
+    peak <- do.call(enve.recplot2.findPeaks.__mow_one, peaks.opts)
+    if(is.null(peak)) break
+    peaks[[ length(peaks)+1 ]] <- peak
+    peaks.opts$lsd1 <- peak$values.res
+  }
+  return(peaks)
+}
+#' Enveomics: Recruitment Plot (2) Peak Finder - Internal Ancillary Function
+#'
+#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks}}).
+#'
+#' @param peak Query \code{\link{enve.RecPlot2.Peak}} object
+#' @param peaks list of \code{\link{enve.RecPlot2.Peak}} objects
+#'
+#' @author Luis M. Rodriguez-R [aut, cre]
+#'
+#' @export
+enve.recplot2.__whichClosestPeak <- function
+(peak, peaks){
+  dist <- as.numeric(lapply(peaks,
+                            function(x)
+                              abs(log(x$param.hat[[ length(x$param.hat) ]] /
+                                        peak$param.hat[[ length(peak$param.hat) ]] ))))
+  dist[ dist==0 ] <- Inf
+  return(which.min(dist))
+}