miga-base 0.7.26.0 → 1.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +906 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +165 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +55 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +419 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +100 -0
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +80 -0
- data/utils/enveomics/enveomics.R/README.md +81 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +304 -3
@@ -0,0 +1,583 @@
|
|
1
|
+
|
2
|
+
# Use as:
|
3
|
+
# > # Estimate reference (null) model:
|
4
|
+
# > tab <- read.table('Ecoli-ML-dmatrix.txt', sep='\t', h=T, row.names=1)
|
5
|
+
# > dist <- as.dist(tab);
|
6
|
+
# > all.dist <- enve.tribs(dist);
|
7
|
+
# >
|
8
|
+
# > # Estimate subset (test) model:
|
9
|
+
# > lee <- read.table('LEE-strains.txt', as.is=T)$V1
|
10
|
+
# > lee.dist <- enve.tribs(dist, lee, subsamples=seq(0,1,by=0.05), threads=12,
|
11
|
+
# + verbosity=2, pre.tribs=all.dist.merge);
|
12
|
+
# ...
|
13
|
+
# >
|
14
|
+
# > # Plot reference and selection at different subsampling levels:
|
15
|
+
# > plot(all.dist, t='boxplot');
|
16
|
+
# > plot(lee, new=FALSE, col='darkred');
|
17
|
+
# ...
|
18
|
+
# >
|
19
|
+
# > # Test significance of overclustering (or overdispersion):
|
20
|
+
# > lee.test <- enve.tribs.test(dist, lee, pre.tribs=all.dist.merge,
|
21
|
+
# + verbosity=2, threads=12);
|
22
|
+
# > summary(lee.test);
|
23
|
+
# > plot(lee.test);
|
24
|
+
# ...
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
#==============> Define S4 classes
|
29
|
+
|
30
|
+
#' Enveomics: TRIBS S4 Class
|
31
|
+
#'
|
32
|
+
#' Enve-omics representation of "Transformed-space Resampling In Biased Sets
|
33
|
+
#' (TRIBS)". This object represents sets of distances between objects,
|
34
|
+
#' sampled nearly-uniformly at random in "distance space". Subsampling
|
35
|
+
#' without selection is trivial, since both the distances space and the
|
36
|
+
#' selection occur in the same transformed space. However, it's useful to
|
37
|
+
#' compare randomly subsampled sets against a selected set of objects. This
|
38
|
+
#' is intended to identify overdispersion or overclustering (see
|
39
|
+
#' \code{\link{enve.TRIBStest}}) of a subset against the entire collection of objects
|
40
|
+
#' with minimum impact of sampling biases. This object can be produced by
|
41
|
+
#' \code{\link{enve.tribs}} and supports S4 methods \code{plot} and \code{summary}.
|
42
|
+
#'
|
43
|
+
#' @slot distance \code{(numeric)} Centrality measurement of the distances
|
44
|
+
#' between the selected objects (without subsampling).
|
45
|
+
#' @slot points \code{(matrix)} Position of the different objects in distance
|
46
|
+
#' space.
|
47
|
+
#' @slot distances \code{(matrix)} Subsampled distances, where the rows are
|
48
|
+
#' replicates and the columns are subsampling levels.
|
49
|
+
#' @slot spaceSize \code{(numeric)} Number of objects.
|
50
|
+
#' @slot selSize \code{(numeric)} Number of selected objects.
|
51
|
+
#' @slot dimensions \code{(numeric)} Number of dimensions in the distance space.
|
52
|
+
#' @slot subsamples \code{(numeric)} Subsampling levels (as fractions, from
|
53
|
+
#' 0 to 1).
|
54
|
+
#' @slot call \code{(call)} Call producing this object.
|
55
|
+
#'
|
56
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
57
|
+
#'
|
58
|
+
#' @exportClass
|
59
|
+
|
60
|
+
enve.TRIBS <- setClass("enve.TRIBS",
|
61
|
+
representation(
|
62
|
+
distance='numeric',
|
63
|
+
points='matrix',
|
64
|
+
distances='matrix',
|
65
|
+
spaceSize='numeric',
|
66
|
+
selSize='numeric',
|
67
|
+
dimensions='numeric',
|
68
|
+
subsamples='numeric',
|
69
|
+
call='call')
|
70
|
+
,package='enveomics.R'
|
71
|
+
);
|
72
|
+
|
73
|
+
#' Enveomics: TRIBS Test S4 Class
|
74
|
+
#'
|
75
|
+
#' Test of significance of overclustering or overdispersion in a selected
|
76
|
+
#' set of objects with respect to the entire set (see \code{\link{enve.TRIBS}}). This
|
77
|
+
#' object can be produced by \code{\link{enve.tribs.test}} and supports S4 methods
|
78
|
+
#' \code{plot} and \code{summary}.
|
79
|
+
#'
|
80
|
+
#' @slot pval.gt \code{(numeric)}
|
81
|
+
#' P-value for the overdispersion test.
|
82
|
+
#' @slot pval.lt \code{(numeric)}
|
83
|
+
#' P-value for the overclustering test.
|
84
|
+
#' @slot all.dist \code{(numeric)}
|
85
|
+
#' Empiric PDF of distances for the entire dataset (subsampled at selection
|
86
|
+
#' size).
|
87
|
+
#' @slot sel.dist \code{(numeric)}
|
88
|
+
#' Empiric PDF of distances for the selected objects (without subsampling).
|
89
|
+
#' @slot diff.dist \code{(numeric)}
|
90
|
+
#' Empiric PDF of the difference between \code{all.dist} and \code{sel.dist}.
|
91
|
+
#' The p-values are estimating by comparing areas in this PDF greater than and
|
92
|
+
#' lesser than zero.
|
93
|
+
#' @slot dist.mids \code{(numeric)}
|
94
|
+
#' Midpoints of the empiric PDFs of distances.
|
95
|
+
#' @slot diff.mids \code{(numeric)}
|
96
|
+
#' Midpoints of the empiric PDF of difference of distances.
|
97
|
+
#' @slot call \code{(call)}
|
98
|
+
#' Call producing this object.
|
99
|
+
#'
|
100
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
101
|
+
#'
|
102
|
+
#' @exportClass
|
103
|
+
|
104
|
+
enve.TRIBStest <- setClass("enve.TRIBStest",
|
105
|
+
representation(
|
106
|
+
pval.gt='numeric',
|
107
|
+
pval.lt='numeric',
|
108
|
+
all.dist='numeric',
|
109
|
+
sel.dist='numeric',
|
110
|
+
diff.dist='numeric',
|
111
|
+
dist.mids='numeric',
|
112
|
+
diff.mids='numeric',
|
113
|
+
call='call')
|
114
|
+
,package='enveomics.R'
|
115
|
+
);
|
116
|
+
|
117
|
+
#==============> Define S4 methods
|
118
|
+
|
119
|
+
#' Enveomics: TRIBS Summary
|
120
|
+
#'
|
121
|
+
#' Summary of an \code{\link{enve.TRIBS}} object.
|
122
|
+
#'
|
123
|
+
#' @param object
|
124
|
+
#' \code{\link{enve.TRIBS}} object.
|
125
|
+
#' @param ...
|
126
|
+
#' No additional parameters are currently supported.
|
127
|
+
#'
|
128
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
129
|
+
#'
|
130
|
+
#' @method summary enve.TRIBS
|
131
|
+
#' @export
|
132
|
+
|
133
|
+
summary.enve.TRIBS <- function
|
134
|
+
(object,
|
135
|
+
...
|
136
|
+
){
|
137
|
+
cat('===[ enve.TRIBS ]-------------------------\n');
|
138
|
+
cat('Selected',attr(object,'selSize'),'of',
|
139
|
+
attr(object,'spaceSize'),'objects in',
|
140
|
+
attr(object,'dimensions'),'dimensions.\n');
|
141
|
+
cat('Collected',length(attr(object,'subsamples')),'subsamples with',
|
142
|
+
nrow(attr(object,'distances')),'replicates each.\n');
|
143
|
+
cat('------------------------------------------\n');
|
144
|
+
cat('call:',as.character(attr(object,'call')),'\n');
|
145
|
+
cat('------------------------------------------\n');
|
146
|
+
}
|
147
|
+
|
148
|
+
#' Enveomics: TRIBS Plot
|
149
|
+
#'
|
150
|
+
#' Plot an \code{\link{enve.TRIBS}} object.
|
151
|
+
#'
|
152
|
+
#' @param x
|
153
|
+
#' \code{\link{enve.TRIBS}} object to plot.
|
154
|
+
#' @param new
|
155
|
+
#' Should a new canvas be drawn?
|
156
|
+
#' @param type
|
157
|
+
#' Type of plot. The \strong{points} plot shows all the replicates, the
|
158
|
+
#' \strong{boxplot} plot represents the values found by
|
159
|
+
#' \code{\link[grDevices]{boxplot.stats}}.
|
160
|
+
#' as areas, and plots the outliers as points.
|
161
|
+
#' @param col
|
162
|
+
#' Color of the areas and/or the points.
|
163
|
+
#' @param pt.cex
|
164
|
+
#' Size of the points.
|
165
|
+
#' @param pt.pch
|
166
|
+
#' Points character.
|
167
|
+
#' @param pt.col
|
168
|
+
#' Color of the points.
|
169
|
+
#' @param ln.col
|
170
|
+
#' Color of the lines.
|
171
|
+
#' @param ...
|
172
|
+
#' Any additional parameters supported by \code{plot}.
|
173
|
+
#'
|
174
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
175
|
+
#'
|
176
|
+
#' @method plot enve.TRIBS
|
177
|
+
#' @export
|
178
|
+
|
179
|
+
plot.enve.TRIBS <- function
|
180
|
+
(x,
|
181
|
+
new=TRUE,
|
182
|
+
type=c('boxplot', 'points'),
|
183
|
+
col='#00000044',
|
184
|
+
pt.cex=1/2,
|
185
|
+
pt.pch=19,
|
186
|
+
pt.col=col,
|
187
|
+
ln.col=col,
|
188
|
+
...
|
189
|
+
){
|
190
|
+
type <- match.arg(type);
|
191
|
+
plot.opts <- list(xlim=range(attr(x,'subsamples'))*attr(x,'selSize'),
|
192
|
+
ylim=range(attr(x,'distances')), ..., t='n', x=1);
|
193
|
+
if(new) do.call(plot, plot.opts);
|
194
|
+
abline(h=attr(x,'distance'), lty=3, col=ln.col);
|
195
|
+
replicates <- nrow(attr(x,'distances'));
|
196
|
+
if(type=='points'){
|
197
|
+
for(i in 1:ncol(attr(x,'distances')))
|
198
|
+
points(rep(round(attr(x,'subsamples')[i]*attr(x,'selSize')),
|
199
|
+
replicates), attr(x,'distances')[,i], cex=pt.cex, pch=pt.pch,
|
200
|
+
col=pt.col);
|
201
|
+
}else{
|
202
|
+
stats <- matrix(NA, nrow=7, ncol=ncol(attr(x,'distances')));
|
203
|
+
for(i in 1:ncol(attr(x,'distances'))){
|
204
|
+
b <- boxplot.stats(attr(x,'distances')[,i]);
|
205
|
+
points(rep(round(attr(x,'subsamples')[i]*attr(x,'selSize')),
|
206
|
+
length(b$out)), b$out, cex=pt.cex, pch=pt.pch, col=pt.col);
|
207
|
+
stats[, i] <- c(b$conf, b$stats[c(1,5,2,4,3)]);
|
208
|
+
}
|
209
|
+
x <- round(attr(x,'subsamples')*attr(x,'selSize'))
|
210
|
+
for(i in c(1,3,5))
|
211
|
+
polygon(c(x, rev(x)), c(stats[i,], rev(stats[i+1,])), border=NA,
|
212
|
+
col=col);
|
213
|
+
lines(x, stats[7,], col=ln.col, lwd=2);
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
#' Enveomics: TRIBS Summary Test
|
218
|
+
#'
|
219
|
+
#' Summary of an \code{\link{enve.TRIBStest}} object.
|
220
|
+
#'
|
221
|
+
#' @param object
|
222
|
+
#' \code{\link{enve.TRIBStest}} object.
|
223
|
+
#' @param ...
|
224
|
+
#' No additional parameters are currently supported.
|
225
|
+
#'
|
226
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
227
|
+
#'
|
228
|
+
#' @method summary enve.TRIBStest
|
229
|
+
#' @export
|
230
|
+
|
231
|
+
summary.enve.TRIBStest <- function
|
232
|
+
(object,
|
233
|
+
...
|
234
|
+
){
|
235
|
+
cat('===[ enve.TRIBStest ]---------------------\n');
|
236
|
+
cat('Alternative hypothesis:\n');
|
237
|
+
cat(' The distances in the selection are\n');
|
238
|
+
if(attr(object, 'pval.gt') > attr(object, 'pval.lt')){
|
239
|
+
cat(' smaller than in the entire dataset\n (overclustering)\n');
|
240
|
+
}else{
|
241
|
+
cat(' larger than in the entire dataset\n (overdispersion)\n');
|
242
|
+
}
|
243
|
+
p.val <- min(attr(object, 'pval.gt'), attr(object, 'pval.lt'));
|
244
|
+
if(p.val==0){
|
245
|
+
diff.dist <- attr(object, 'diff.dist');
|
246
|
+
p.val.lim <- min(diff.dist[diff.dist>0]);
|
247
|
+
cat('\n P-value <= ', signif(p.val.lim, 4), sep='');
|
248
|
+
}else{
|
249
|
+
p.val.lim <- p.val;
|
250
|
+
cat('\n P-value: ', signif(p.val, 4), sep='');
|
251
|
+
}
|
252
|
+
cat(' ', ifelse(p.val.lim<=0.01, "**", ifelse(p.val.lim<=0.05, "*", "")),
|
253
|
+
'\n', sep='');
|
254
|
+
cat('------------------------------------------\n');
|
255
|
+
cat('call:',as.character(attr(object,'call')),'\n');
|
256
|
+
cat('------------------------------------------\n');
|
257
|
+
}
|
258
|
+
|
259
|
+
#' Enveomics: TRIBS Plot Test
|
260
|
+
#'
|
261
|
+
#' Plots an \code{\link{enve.TRIBStest}} object.
|
262
|
+
#'
|
263
|
+
#' @param x
|
264
|
+
#' \code{\link{enve.TRIBStest}} object to plot.
|
265
|
+
#' @param type
|
266
|
+
#' What to plot. \code{overlap} generates a plot of the two contrasting empirical
|
267
|
+
#' PDFs (to compare against each other), \code{difference} produces a plot of the
|
268
|
+
#' differences between the empirical PDFs (to compare against zero).
|
269
|
+
#' @param col
|
270
|
+
#' Main color of the plot if type=\code{difference}.
|
271
|
+
#' @param col1
|
272
|
+
#' First color of the plot if type=\code{overlap}.
|
273
|
+
#' @param col2
|
274
|
+
#' Second color of the plot if type=\code{overlap}.
|
275
|
+
#' @param ylab
|
276
|
+
#' Y-axis label.
|
277
|
+
#' @param xlim
|
278
|
+
#' X-axis limits.
|
279
|
+
#' @param ylim
|
280
|
+
#' Y-axis limits.
|
281
|
+
#' @param ...
|
282
|
+
#' Any other graphical arguments.
|
283
|
+
#'
|
284
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
285
|
+
#'
|
286
|
+
#' @method plot enve.TRIBStest
|
287
|
+
#' @export
|
288
|
+
|
289
|
+
plot.enve.TRIBStest <- function
|
290
|
+
(x,
|
291
|
+
type=c('overlap', 'difference'),
|
292
|
+
col='#00000044',
|
293
|
+
col1=col,
|
294
|
+
col2='#44001144',
|
295
|
+
ylab='Probability',
|
296
|
+
xlim=range(attr(x, 'dist.mids')),
|
297
|
+
ylim=c(0,max(c(attr(x, 'all.dist'), attr(x, 'sel.dist')))),
|
298
|
+
...
|
299
|
+
){
|
300
|
+
type <- match.arg(type);
|
301
|
+
if(type=='overlap'){
|
302
|
+
plot.opts <- list(xlim=xlim, ylim=ylim, ylab=ylab, ..., t='n', x=1);
|
303
|
+
do.call(plot, plot.opts);
|
304
|
+
bins <- length(attr(x, 'dist.mids'))
|
305
|
+
polygon(attr(x, 'dist.mids')[c(1, 1:bins, bins)],
|
306
|
+
c(0,attr(x, 'all.dist'),0), col=col1,
|
307
|
+
border=do.call(rgb, as.list(c(col2rgb(col1)/256, 0.5))));
|
308
|
+
polygon(attr(x, 'dist.mids')[c(1, 1:bins, bins)],
|
309
|
+
c(0,attr(x, 'sel.dist'),0), col=col2,
|
310
|
+
border=do.call(rgb, as.list(c(col2rgb(col2)/256, 0.5))));
|
311
|
+
}else{
|
312
|
+
plot.opts <- list(xlim=range(attr(x, 'diff.mids')),
|
313
|
+
ylim=c(0,max(attr(x, 'diff.dist'))), ylab=ylab, ..., t='n', x=1);
|
314
|
+
do.call(plot, plot.opts);
|
315
|
+
bins <- length(attr(x, 'diff.mids'));
|
316
|
+
polygon(attr(x, 'diff.mids')[c(1, 1:bins, bins)],
|
317
|
+
c(0,attr(x, 'diff.dist'),0), col=col,
|
318
|
+
border=do.call(rgb, as.list(c(col2rgb(col)/256, 0.5))));
|
319
|
+
}
|
320
|
+
}
|
321
|
+
|
322
|
+
#' Enveomics: TRIBS Merge
|
323
|
+
#'
|
324
|
+
#' Merges two \code{\link{enve.TRIBS}} objects generated from the same objects at
|
325
|
+
#' different subsampling levels.
|
326
|
+
#'
|
327
|
+
#' @param x
|
328
|
+
#' First \code{\link{enve.TRIBS}} object.
|
329
|
+
#' @param y
|
330
|
+
#' Second \code{\link{enve.TRIBS}} object.
|
331
|
+
#'
|
332
|
+
#' @return Returns an \code{\link{enve.TRIBS}} object.
|
333
|
+
#'
|
334
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
335
|
+
#'
|
336
|
+
#' @export
|
337
|
+
|
338
|
+
enve.TRIBS.merge <- function
|
339
|
+
(x,
|
340
|
+
y
|
341
|
+
){
|
342
|
+
# Check consistency
|
343
|
+
if(attr(x,'distance') != attr(y,'distance'))
|
344
|
+
stop('Total distances in objects are different.');
|
345
|
+
if(any(attr(x,'points') != attr(y,'points')))
|
346
|
+
stop('Points in objects are different.');
|
347
|
+
if(attr(x,'spaceSize') != attr(y,'spaceSize'))
|
348
|
+
stop('Space size in objects are different.');
|
349
|
+
if(attr(x,'selSize') != attr(y,'selSize'))
|
350
|
+
stop('Selection size in objects are different.');
|
351
|
+
if(attr(x,'dimensions') != attr(y,'dimensions'))
|
352
|
+
stop('Dimensions in objects are different.');
|
353
|
+
if(nrow(attr(x,'distances')) != nrow(attr(y,'distances')))
|
354
|
+
stop('Replicates in objects are different.');
|
355
|
+
# Merge
|
356
|
+
a <- attr(x,'subsamples');
|
357
|
+
b <- attr(y,'subsamples');
|
358
|
+
o <- order(c(a,b));
|
359
|
+
o <- o[!duplicated(c(a,b)[o])] ;
|
360
|
+
d <- cbind(attr(x,'distances'), attr(y,'distances'))[, o] ;
|
361
|
+
z <- new('enve.TRIBS',
|
362
|
+
distance=attr(x,'distance'), points=attr(x,'points'),
|
363
|
+
distances=d, spaceSize=attr(x,'spaceSize'),
|
364
|
+
selSize=attr(x,'selSize'), dimensions=attr(x,'dimensions'),
|
365
|
+
subsamples=c(a,b)[o], call=match.call());
|
366
|
+
return(z) ;
|
367
|
+
}
|
368
|
+
|
369
|
+
#==============> Define core functions
|
370
|
+
|
371
|
+
#' Enveomics: TRIBS Test
|
372
|
+
#'
|
373
|
+
#' Estimates the empirical difference between all the distances in a set of
|
374
|
+
#' objects and a subset, together with its statistical significance.
|
375
|
+
#'
|
376
|
+
#' @param dist
|
377
|
+
#' Distances as \code{dist} object.
|
378
|
+
#' @param selection
|
379
|
+
#' Selection defining the subset.
|
380
|
+
#' @param bins
|
381
|
+
#' Number of bins to evaluate in the range of distances.
|
382
|
+
#' @param ...
|
383
|
+
#' Any other parameters supported by \code{\link{enve.tribs}},
|
384
|
+
#' except \code{subsamples}.
|
385
|
+
#'
|
386
|
+
#' @return Returns an \code{\link{enve.TRIBStest}} object.
|
387
|
+
#'
|
388
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
389
|
+
#'
|
390
|
+
#' @export
|
391
|
+
|
392
|
+
enve.tribs.test <- function
|
393
|
+
(dist,
|
394
|
+
selection,
|
395
|
+
bins=50,
|
396
|
+
...
|
397
|
+
){
|
398
|
+
s.tribs <- enve.tribs(dist, selection, subsamples=c(0,1), ...);
|
399
|
+
a.tribs <- enve.tribs(dist,
|
400
|
+
subsamples=c(0,attr(s.tribs, 'selSize')/attr(s.tribs, 'spaceSize')), ...);
|
401
|
+
s.dist <- attr(s.tribs, 'distances')[, 2];
|
402
|
+
a.dist <- attr(a.tribs, 'distances')[, 2];
|
403
|
+
range <- range(c(s.dist, a.dist));
|
404
|
+
a.f <- hist(a.dist, breaks=seq(range[1], range[2], length.out=bins),
|
405
|
+
plot=FALSE);
|
406
|
+
s.f <- hist(s.dist, breaks=seq(range[1], range[2], length.out=bins),
|
407
|
+
plot=FALSE);
|
408
|
+
zp.f <- c(); zz.f <- 0; zn.f <- c();
|
409
|
+
p.x <- a.f$counts/sum(a.f$counts);
|
410
|
+
p.y <- s.f$counts/sum(s.f$counts);
|
411
|
+
for(z in 1:length(a.f$mids)){
|
412
|
+
zn.f[z] <- 0;
|
413
|
+
zz.f <- 0;
|
414
|
+
zp.f[z] <- 0;
|
415
|
+
for(k in 1:length(a.f$mids)){
|
416
|
+
if(z < k){
|
417
|
+
zp.f[z] <- zp.f[z] + p.x[k]*p.y[k-z];
|
418
|
+
zn.f[z] <- zn.f[z] + p.x[k-z]*p.y[k];
|
419
|
+
}
|
420
|
+
zz.f <- zz.f + p.x[k]*p.y[k];
|
421
|
+
}
|
422
|
+
}
|
423
|
+
return(new('enve.TRIBStest',
|
424
|
+
pval.gt=sum(c(zz.f, zp.f)), pval.lt=sum(c(zz.f, zn.f)),
|
425
|
+
all.dist=p.x, sel.dist=p.y, diff.dist=c(rev(zn.f), zz.f, zp.f),
|
426
|
+
dist.mids=a.f$mids,
|
427
|
+
diff.mids=seq(diff(range(a.f$mids)), -diff(range(a.f$mids)),
|
428
|
+
length.out=1+2*length(a.f$mids)),
|
429
|
+
call=match.call()));
|
430
|
+
}
|
431
|
+
|
432
|
+
#' Enveomics: TRIBS
|
433
|
+
#'
|
434
|
+
#' Subsample any objects in "distance space" to reduce the effect of
|
435
|
+
#' sample-clustering. This function was originally designed to subsample
|
436
|
+
#' genomes in "phylogenetic distance space", a clear case of strong
|
437
|
+
#' clustering bias in sampling, by Luis M. Rodriguez-R and Michael R
|
438
|
+
#' Weigand.
|
439
|
+
#'
|
440
|
+
#' @param dist
|
441
|
+
#' Distances as a \code{dist} object.
|
442
|
+
#' @param selection
|
443
|
+
#' Objects to include in the subsample. By default, all objects are
|
444
|
+
#' selected.
|
445
|
+
#' @param replicates
|
446
|
+
#' Number of replications per point.
|
447
|
+
#' @param summary.fx
|
448
|
+
#' Function to summarize the distance distributions in a given replicate. By
|
449
|
+
#' default, the median distance is estimated.
|
450
|
+
#' @param dist.method
|
451
|
+
#' Distance method between random points and samples in the transformed
|
452
|
+
#' space. See \code{dist}.
|
453
|
+
#' @param subsamples
|
454
|
+
#' Subsampling fractions.
|
455
|
+
#' @param dimensions
|
456
|
+
#' Dimensions to use in the NMDS. By default, 5\% of the selection length.
|
457
|
+
#' @param metaMDS.opts
|
458
|
+
#' Any additional options to pass to metaMDS, as \code{list}.
|
459
|
+
#' @param threads
|
460
|
+
#' Number of threads to use.
|
461
|
+
#' @param verbosity
|
462
|
+
#' Verbosity. Use 0 to run quietly, increase for additional information.
|
463
|
+
#' @param points
|
464
|
+
#' Optional. If passed, the MDS step is skipped and this object is used
|
465
|
+
#' instead. It can be the \code{$points} slot of class \code{metaMDS}
|
466
|
+
#' (from \code{vegan}).
|
467
|
+
#' It must be a matrix or matrix-coercible object, with samples as rows and
|
468
|
+
#' dimensions as columns.
|
469
|
+
#' @param pre.tribs
|
470
|
+
#' Optional. If passed, the points are recovered from this object (except if
|
471
|
+
#' \code{points} is also passed. This should be an \code{\link{enve.TRIBS}} object
|
472
|
+
#' estimated on the same objects (the selection is unimportant).
|
473
|
+
#'
|
474
|
+
#' @return Returns an \code{\link{enve.TRIBS}} object.
|
475
|
+
#'
|
476
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
477
|
+
#'
|
478
|
+
#' @export
|
479
|
+
|
480
|
+
enve.tribs <- function
|
481
|
+
(dist,
|
482
|
+
selection=labels(dist),
|
483
|
+
replicates=1000,
|
484
|
+
summary.fx=median,
|
485
|
+
dist.method='euclidean',
|
486
|
+
subsamples=seq(0,1,by=0.01),
|
487
|
+
dimensions=ceiling(length(selection)*0.05),
|
488
|
+
metaMDS.opts=list(),
|
489
|
+
threads=2,
|
490
|
+
verbosity=1,
|
491
|
+
points,
|
492
|
+
pre.tribs
|
493
|
+
){
|
494
|
+
if(!is(dist, 'dist'))
|
495
|
+
stop('`dist` parameter must be a `dist` object.');
|
496
|
+
# 1. NMDS
|
497
|
+
if(missing(points)){
|
498
|
+
if(missing(pre.tribs)){
|
499
|
+
if(verbosity > 0)
|
500
|
+
cat('===[ Estimating NMDS ]\n');
|
501
|
+
if(!suppressPackageStartupMessages(
|
502
|
+
requireNamespace("vegan", quietly=TRUE)))
|
503
|
+
stop('Unavailable required package: `vegan`.');
|
504
|
+
mds.args <- c(metaMDS.opts, list(comm=dist, k=dimensions,
|
505
|
+
trace=verbosity));
|
506
|
+
points <- do.call(vegan::metaMDS, mds.args)$points;
|
507
|
+
}else{
|
508
|
+
points <- attr(pre.tribs, 'points');
|
509
|
+
dimensions <- ncol(points);
|
510
|
+
}
|
511
|
+
}else{
|
512
|
+
points <- as.matrix(points);
|
513
|
+
dimensions <- ncol(points);
|
514
|
+
}
|
515
|
+
# 2. Pad ranges
|
516
|
+
if(verbosity > 0) cat('===[ Padding ranges ]\n');
|
517
|
+
dots <- matrix(NA, nrow=nrow(points), ncol=dimensions,
|
518
|
+
dimnames=list(rownames(points), 1:dimensions));
|
519
|
+
selection <- selection[!is.na(match(selection, rownames(dots)))];
|
520
|
+
for(dim in 1:dimensions){
|
521
|
+
dimRange <- range(points[,dim]) +
|
522
|
+
c(-1,1)*diff(range(points[,1]))/length(selection);
|
523
|
+
dots[, dim] <- (points[,dim]-dimRange[1])/diff(dimRange);
|
524
|
+
}
|
525
|
+
# 3. Select points and summarize distances
|
526
|
+
if(verbosity > 0) cat('===[ Sub-sampling ]\n');
|
527
|
+
distances <- matrix(NA, nrow=replicates, ncol=length(subsamples),
|
528
|
+
dimnames=list(1:replicates, as.character(subsamples)));
|
529
|
+
cl <- makeCluster(threads);
|
530
|
+
for(frx in subsamples){
|
531
|
+
if(verbosity > 1) cat('Sub-sampling at ',(frx*100),'%\n',sep='');
|
532
|
+
distances[, as.character(frx)] = parSapply(cl, 1:replicates, enve.__tribs,
|
533
|
+
frx, match(selection, rownames(dots)), dimensions, dots, dist.method,
|
534
|
+
summary.fx, dist);
|
535
|
+
}
|
536
|
+
stopCluster(cl);
|
537
|
+
# 4. Build object and return
|
538
|
+
return(new('enve.TRIBS',
|
539
|
+
distance=do.call(summary.fx, list(as.matrix(dist)[selection, selection])),
|
540
|
+
points=points, distances=distances, spaceSize=nrow(points),
|
541
|
+
selSize=length(selection), dimensions=dimensions, subsamples=subsamples,
|
542
|
+
call=match.call()));
|
543
|
+
}
|
544
|
+
|
545
|
+
#' Enveomics: TRIBS - Internal Ancillary Function
|
546
|
+
#'
|
547
|
+
#' Internal ancillary function (see \code{\link{enve.tribs}}).
|
548
|
+
#'
|
549
|
+
#' @param rep Replicates
|
550
|
+
#' @param frx Fraction
|
551
|
+
#' @param selection Selection
|
552
|
+
#' @param dimensions Dimensions
|
553
|
+
#' @param dots Sampling points
|
554
|
+
#' @param dist.method Distance method
|
555
|
+
#' @param summary.fx Summary function
|
556
|
+
#' @param dist Distance
|
557
|
+
#'
|
558
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
559
|
+
#'
|
560
|
+
#' @export
|
561
|
+
|
562
|
+
enve.__tribs <- function
|
563
|
+
(rep, frx, selection, dimensions, dots, dist.method, summary.fx, dist){
|
564
|
+
sample <- c();
|
565
|
+
if(frx==0) return(0);
|
566
|
+
for(point in 1:round(frx*length(selection))){
|
567
|
+
rand.point <- runif(dimensions);
|
568
|
+
closest.dot <- '';
|
569
|
+
closest.dist <- Inf;
|
570
|
+
for(dot in selection){
|
571
|
+
dot.dist <- as.numeric(dist(matrix(c(rand.point, dots[dot,]), nrow=2,
|
572
|
+
byrow=TRUE), method=dist.method));
|
573
|
+
if(dot.dist < closest.dist){
|
574
|
+
closest.dot <- dot;
|
575
|
+
closest.dist <- dot.dist;
|
576
|
+
}
|
577
|
+
}
|
578
|
+
sample <- c(sample, closest.dot);
|
579
|
+
}
|
580
|
+
return( do.call(summary.fx, list(as.matrix(dist)[sample, sample])) );
|
581
|
+
}
|
582
|
+
|
583
|
+
|