miga-base 1.2.15.1 → 1.2.15.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +44 -8
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +3 -1
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
@@ -1,1631 +0,0 @@
|
|
1
|
-
#==============> Define S4 classes
|
2
|
-
|
3
|
-
#' Enveomics: Recruitment Plot (2) - S4 Class
|
4
|
-
#'
|
5
|
-
#' Enve-omics representation of Recruitment plots. This object can
|
6
|
-
#' be produced by \code{\link{enve.recplot2}} and supports S4 method plot.
|
7
|
-
#'
|
8
|
-
#' @slot counts \code{(matrix)} Counts as a two-dimensional histogram.
|
9
|
-
#' @slot pos.counts.in \code{(numeric)} Counts of in-group hits per position bin.
|
10
|
-
#' @slot pos.counts.out \code{(numeric)} Counts of out-group hits per position bin.
|
11
|
-
#' @slot id.counts \code{(numeric)} Counts per ID bin.
|
12
|
-
#' @slot id.breaks \code{(numeric)} Breaks of identity bins.
|
13
|
-
#' @slot pos.breaks \code{(numeric)} Breaks of position bins.
|
14
|
-
#' @slot pos.names \code{(character)} Names of the position bins.
|
15
|
-
#' @slot seq.breaks \code{(numeric)} Breaks of input sequences.
|
16
|
-
#' @slot peaks \code{(list)} Peaks identified in the recplot.
|
17
|
-
#' Limits of the subject sequences after concatenation.
|
18
|
-
#' @slot seq.names \code{(character}) Names of the subject sequences.
|
19
|
-
#' @slot id.metric \code{(character}) Metric used as 'identity'.
|
20
|
-
#' @slot id.ingroup \code{(logical}) Identity bins considered in-group.
|
21
|
-
#' @slot call \code{(call)} Call producing this object.
|
22
|
-
#'
|
23
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
24
|
-
#'
|
25
|
-
#' @exportClass
|
26
|
-
|
27
|
-
enve.RecPlot2 <- setClass("enve.RecPlot2",
|
28
|
-
representation(
|
29
|
-
# slots = list(
|
30
|
-
counts='matrix',
|
31
|
-
pos.counts.in='numeric',
|
32
|
-
pos.counts.out='numeric',
|
33
|
-
id.counts='numeric',
|
34
|
-
id.breaks='numeric',
|
35
|
-
pos.breaks='numeric',
|
36
|
-
pos.names='character',
|
37
|
-
seq.breaks='numeric',
|
38
|
-
peaks='list',
|
39
|
-
seq.names='character',
|
40
|
-
id.metric='character',
|
41
|
-
id.ingroup='logical',
|
42
|
-
call='call')
|
43
|
-
,package='enveomics.R'
|
44
|
-
);
|
45
|
-
|
46
|
-
#' Enveomics: Recruitment Plot (2) Peak - S4 Class
|
47
|
-
#'
|
48
|
-
#' Enve-omics representation of a peak in the sequencing depth histogram
|
49
|
-
#' of a Recruitment plot (see \code{\link{enve.recplot2.findPeaks}}).
|
50
|
-
#'
|
51
|
-
#' @slot dist \code{(character)}
|
52
|
-
#' Distribution of the peak. Currently supported: \code{norm} (normal) and \code{sn}
|
53
|
-
#' (skew-normal).
|
54
|
-
#' @slot values \code{(numeric)}
|
55
|
-
#' Sequencing depth values predicted to conform the peak.
|
56
|
-
#' @slot values.res \code{(numeric)}
|
57
|
-
#' Sequencing depth values not explained by this or previously identified
|
58
|
-
#' peaks.
|
59
|
-
#' @slot mode \code{(numeric)}
|
60
|
-
#' Seed-value of mode anchoring the peak.
|
61
|
-
#' @slot param.hat \code{(list)}
|
62
|
-
#' Parameters of the distribution. A list of two values if dist=\code{norm} (sd
|
63
|
-
#' and mean), or three values if dist=\code{sn}(omega=scale, alpha=shape, and
|
64
|
-
#' xi=location). Note that the "dispersion" parameter is always first and
|
65
|
-
#' the "location" parameter is always last.
|
66
|
-
#' @slot n.hat \code{(numeric)}
|
67
|
-
#' Number of bins estimated to be explained by this peak. This should
|
68
|
-
#' ideally be equal to the length of \code{values}, but it's not an integer.
|
69
|
-
#' @slot n.total \code{(numeric)}
|
70
|
-
#' Total number of bins from which the peak was extracted. I.e., total
|
71
|
-
#' number of position bins with non-zero sequencing depth in the recruitment
|
72
|
-
#' plot (regardless of peak count).
|
73
|
-
#' @slot err.res \code{(numeric)}
|
74
|
-
#' Error left after adding the peak (mower) or log-likelihood (em or emauto).
|
75
|
-
#' @slot merge.logdist \code{(numeric)}
|
76
|
-
#' Attempted \code{merge.logdist} parameter.
|
77
|
-
#' @slot seq.depth \code{(numeric)}
|
78
|
-
#' Best estimate available for the sequencing depth of the peak (centrality).
|
79
|
-
#' @slot log \code{(logical)}
|
80
|
-
#' Indicates if the estimation was performed in natural logarithm space.
|
81
|
-
#'
|
82
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
83
|
-
#'
|
84
|
-
#' @exportClass
|
85
|
-
|
86
|
-
enve.RecPlot2.Peak <- setClass("enve.RecPlot2.Peak",
|
87
|
-
representation(
|
88
|
-
# slots = list(
|
89
|
-
dist='character',
|
90
|
-
values='numeric',
|
91
|
-
values.res='numeric',
|
92
|
-
mode='numeric',
|
93
|
-
param.hat='list',
|
94
|
-
n.hat='numeric',
|
95
|
-
n.total='numeric',
|
96
|
-
err.res='numeric',
|
97
|
-
merge.logdist='numeric',
|
98
|
-
seq.depth='numeric',
|
99
|
-
log='logical'
|
100
|
-
));
|
101
|
-
|
102
|
-
#' Attribute accessor
|
103
|
-
#'
|
104
|
-
#'
|
105
|
-
#' @param x Object
|
106
|
-
#' @param name Attribute name
|
107
|
-
setMethod("$", "enve.RecPlot2", function(x, name) attr(x, name))
|
108
|
-
|
109
|
-
#' Attribute accessor
|
110
|
-
#'
|
111
|
-
#'
|
112
|
-
#' @param x Object
|
113
|
-
#' @param name Attribute name
|
114
|
-
setMethod("$", "enve.RecPlot2.Peak", function(x, name) attr(x, name))
|
115
|
-
|
116
|
-
#==============> Define S4 methods
|
117
|
-
|
118
|
-
#' Enveomics: Recruitment Plot (2)
|
119
|
-
#'
|
120
|
-
#' Plots an \code{\link{enve.RecPlot2}} object.
|
121
|
-
#'
|
122
|
-
#' @param x
|
123
|
-
#' \code{\link{enve.RecPlot2}} object to plot.
|
124
|
-
#' @param layout
|
125
|
-
#' Matrix indicating the position of the different panels in the layout,
|
126
|
-
#' where:
|
127
|
-
#' \itemize{
|
128
|
-
#' \item 0: Empty space
|
129
|
-
#' \item 1: Counts matrix
|
130
|
-
#' \item 2: position histogram (sequencing depth)
|
131
|
-
#' \item 3: identity histogram
|
132
|
-
#' \item 4: Populations histogram (histogram of sequencing depths)
|
133
|
-
#' \item 5: Color scale for the counts matrix (vertical)
|
134
|
-
#' \item 6: Color scale of the counts matrix (horizontal)
|
135
|
-
#' }
|
136
|
-
#' Only panels indicated here will be plotted. To plot only one panel
|
137
|
-
#' simply set this to the number of the panel you want to plot.
|
138
|
-
#' @param panel.fun
|
139
|
-
#' List of functions to be executed after drawing each panel. Use the
|
140
|
-
#' indices in \code{layout} (as characters) as keys. Functions for indices
|
141
|
-
#' missing in \code{layout} are ignored. For example, to add a vertical line
|
142
|
-
#' at the 3Mbp mark in both the position histogram and the counts matrix:
|
143
|
-
#' \code{list('1'=function() abline(v=3), '2'=function() abline(v=3))}.
|
144
|
-
#' Note that the X-axis in both panels is in Mbp by default. To change
|
145
|
-
#' this behavior, set \code{pos.units} accordingly.
|
146
|
-
#' @param widths
|
147
|
-
#' Relative widths of the columns of \code{layout}.
|
148
|
-
#' @param heights
|
149
|
-
#' Relative heights of the rows of \code{layout}.
|
150
|
-
#' @param palette
|
151
|
-
#' Colors to be used to represent the counts matrix, sorted from no hits
|
152
|
-
#' to the maximum sequencing depth.
|
153
|
-
#' @param underlay.group
|
154
|
-
#' If TRUE, it indicates the in-group and out-group areas couloured based
|
155
|
-
#' on \code{in.col} and \code{out.col}. Requires support for semi-transparency.
|
156
|
-
#' @param peaks.col
|
157
|
-
#' If not \code{NA}, it attempts to represent peaks in the population histogram
|
158
|
-
#' in the specified color. Set to \code{NA} to avoid peak-finding.
|
159
|
-
#' @param use.peaks
|
160
|
-
#' A list of \code{\link{enve.RecPlot2.Peak}} objects, as returned by
|
161
|
-
#' \code{\link{enve.recplot2.findPeaks}}. If passed, \code{peaks.opts} is ignored.
|
162
|
-
#' @param id.lim
|
163
|
-
#' Limits of identities to represent.
|
164
|
-
#' @param pos.lim
|
165
|
-
#' Limits of positions to represent (in bp, regardless of \code{pos.units}).
|
166
|
-
#' @param pos.units
|
167
|
-
#' Units in which the positions should be represented (powers of 1,000
|
168
|
-
#' base pairs).
|
169
|
-
#' @param mar
|
170
|
-
#' Margins of the panels as a list, with the character representation of
|
171
|
-
#' the number of the panel as index (see \code{layout}).
|
172
|
-
#' @param pos.splines
|
173
|
-
#' Smoothing parameter for the splines in the position histogram. Zero
|
174
|
-
#' (0) for no splines. Use \code{NULL} to automatically detect by leave-one-out
|
175
|
-
#' cross-validation.
|
176
|
-
#' @param id.splines
|
177
|
-
#' Smoothing parameter for the splines in the identity histogram. Zero
|
178
|
-
#' (0) for no splines. Use \code{NULL} to automatically detect by leave-one-out
|
179
|
-
#' cross-validation.
|
180
|
-
#' @param in.lwd
|
181
|
-
#' Line width for the sequencing depth of in-group matches.
|
182
|
-
#' @param out.lwd
|
183
|
-
#' Line width for the sequencing depth of out-group matches.
|
184
|
-
#' @param id.lwd
|
185
|
-
#' Line width for the identity histogram.
|
186
|
-
#' @param in.col
|
187
|
-
#' Color associated to in-group matches.
|
188
|
-
#' @param out.col
|
189
|
-
#' Color associated to out-group matches.
|
190
|
-
#' @param id.col
|
191
|
-
#' Color for the identity histogram.
|
192
|
-
#' @param breaks.col
|
193
|
-
#' Color of the vertical lines indicating sequence breaks.
|
194
|
-
#' @param peaks.opts
|
195
|
-
#' Options passed to \code{\link{enve.recplot2.findPeaks}},
|
196
|
-
#' if \code{peaks.col} is not \code{NA}.
|
197
|
-
#' @param ...
|
198
|
-
#' Any other graphic parameters (currently ignored).
|
199
|
-
#'
|
200
|
-
#' @return
|
201
|
-
#' Returns a list of \code{\link{enve.RecPlot2.Peak}} objects (see
|
202
|
-
#' \code{\link{enve.recplot2.findPeaks}}). If \code{peaks.col=NA} or
|
203
|
-
#' \code{layout} doesn't include 4, returns \code{NA}.
|
204
|
-
#'
|
205
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
206
|
-
#'
|
207
|
-
#' @method plot enve.RecPlot2
|
208
|
-
#' @export
|
209
|
-
|
210
|
-
plot.enve.RecPlot2 <- function
|
211
|
-
(x,
|
212
|
-
layout=matrix(c(5,5,2,1,4,3), nrow=2),
|
213
|
-
panel.fun=list(),
|
214
|
-
widths=c(1,7,2),
|
215
|
-
heights=c(1,2),
|
216
|
-
palette=grey((100:0)/100),
|
217
|
-
underlay.group=TRUE,
|
218
|
-
peaks.col='darkred',
|
219
|
-
use.peaks,
|
220
|
-
id.lim=range(x$id.breaks),
|
221
|
-
pos.lim=range(x$pos.breaks),
|
222
|
-
pos.units=c('Mbp','Kbp','bp'),
|
223
|
-
mar=list('1'=c(5,4,1,1)+.1, '2'=c(ifelse(any(layout==1),1,5),4,4,1)+.1,
|
224
|
-
'3'=c(5,ifelse(any(layout==1),1,4),1,2)+0.1,
|
225
|
-
'4'=c(ifelse(any(layout==1),1,5),ifelse(any(layout==2),1,4),4,2)+0.1,
|
226
|
-
'5'=c(5,3,4,1)+0.1, '6'=c(5,4,4,2)+0.1),
|
227
|
-
pos.splines=0,
|
228
|
-
id.splines=1/2,
|
229
|
-
in.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
|
230
|
-
out.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
|
231
|
-
id.lwd=ifelse(is.null(id.splines) || id.splines>0, 1/2, 2),
|
232
|
-
in.col='darkblue',
|
233
|
-
out.col='lightblue',
|
234
|
-
id.col='black',
|
235
|
-
breaks.col='#AAAAAA40',
|
236
|
-
peaks.opts=list(),
|
237
|
-
...
|
238
|
-
){
|
239
|
-
pos.units <- match.arg(pos.units);
|
240
|
-
pos.factor <- ifelse(pos.units=='bp',1,ifelse(pos.units=='Kbp',1e3,1e6));
|
241
|
-
pos.lim <- pos.lim/pos.factor;
|
242
|
-
lmat <- layout;
|
243
|
-
for(i in 1:6) if(!any(layout==i)) lmat[layout>i] <- lmat[layout>i]-1;
|
244
|
-
|
245
|
-
layout(lmat, widths=widths, heights=heights);
|
246
|
-
ori.mar <- par('mar');
|
247
|
-
|
248
|
-
# Essential vars
|
249
|
-
counts <- x$counts
|
250
|
-
|
251
|
-
id.ingroup <- x$id.ingroup
|
252
|
-
id.counts <- x$id.counts
|
253
|
-
id.breaks <- x$id.breaks
|
254
|
-
id.mids <- (id.breaks[-length(id.breaks)]+id.breaks[-1])/2
|
255
|
-
id.binsize <- id.breaks[-1] - id.breaks[-length(id.breaks)]
|
256
|
-
|
257
|
-
pos.counts.in <- x$pos.counts.in
|
258
|
-
pos.counts.out <- x$pos.counts.out
|
259
|
-
pos.breaks <- x$pos.breaks/pos.factor
|
260
|
-
pos.mids <- (pos.breaks[-length(pos.breaks)]+pos.breaks[-1])/2
|
261
|
-
pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])*pos.factor
|
262
|
-
|
263
|
-
seqdepth.in <- pos.counts.in/pos.binsize
|
264
|
-
seqdepth.out <- pos.counts.out/pos.binsize
|
265
|
-
seqdepth.lim <- range(c(seqdepth.in[seqdepth.in>0],
|
266
|
-
seqdepth.out[seqdepth.out>0]))*c(1/2,2)
|
267
|
-
|
268
|
-
if(underlay.group){
|
269
|
-
in.bg <- do.call(rgb, c(as.list(col2rgb(in.col)),
|
270
|
-
list(maxColorValue=256, alpha=62)));
|
271
|
-
out.bg <- do.call(rgb, c(as.list(col2rgb(out.col)[,1]),
|
272
|
-
list(maxColorValue=256, alpha=52)));
|
273
|
-
}
|
274
|
-
|
275
|
-
# [1] Counts matrix
|
276
|
-
if(any(layout==1)){
|
277
|
-
par(mar=mar[['1']]);
|
278
|
-
plot(1, t='n', bty='l',
|
279
|
-
xlim=pos.lim, xlab=paste('Position in genome (',pos.units,')',sep=''),
|
280
|
-
xaxs='i', ylim=id.lim, ylab=x$id.metric, yaxs='i');
|
281
|
-
if(underlay.group){
|
282
|
-
rect(pos.lim[1], id.lim[1], pos.lim[2],
|
283
|
-
min(id.breaks[c(id.ingroup,TRUE)]), col=out.bg, border=NA);
|
284
|
-
rect(pos.lim[1], min(id.breaks[c(id.ingroup,TRUE)]), pos.lim[2],
|
285
|
-
id.lim[2], col=in.bg, border=NA);
|
286
|
-
}
|
287
|
-
abline(v=x$seq.breaks/pos.factor, col=breaks.col);
|
288
|
-
image(x=pos.breaks, y=id.breaks, z=log10(counts),col=palette,
|
289
|
-
bg=grey(1,0), breaks=seq(-.1,log10(max(counts)),
|
290
|
-
length.out=1+length(palette)), add=TRUE);
|
291
|
-
if(exists('1',panel.fun)) panel.fun[['1']]();
|
292
|
-
}
|
293
|
-
|
294
|
-
# [2] Position histogram
|
295
|
-
if(any(layout==2)){
|
296
|
-
par(mar=mar[['2']]);
|
297
|
-
if(any(layout==1)){
|
298
|
-
xlab=''
|
299
|
-
xaxt='n'
|
300
|
-
}else{
|
301
|
-
xlab=paste('Position in genome (',pos.units,')',sep='')
|
302
|
-
xaxt='s'
|
303
|
-
}
|
304
|
-
plot(1,t='n', bty='l', log='y',
|
305
|
-
xlim=pos.lim, xlab=xlab, xaxt=xaxt, xaxs='i',
|
306
|
-
ylim=seqdepth.lim, yaxs='i', ylab='Sequencing depth (X)');
|
307
|
-
abline(v=x$seq.breaks/pos.factor, col=breaks.col)
|
308
|
-
pos.x <- rep(pos.breaks,each=2)[-c(1,2*length(pos.breaks))]
|
309
|
-
pos.f <- rep(seqdepth.in,each=2)
|
310
|
-
lines(pos.x, rep(seqdepth.out,each=2), lwd=out.lwd, col=out.col);
|
311
|
-
lines(pos.x, pos.f, lwd=in.lwd, col=in.col);
|
312
|
-
if(is.null(pos.splines) || pos.splines > 0){
|
313
|
-
pos.spline <- smooth.spline(pos.x[pos.f>0], log(pos.f[pos.f>0]),
|
314
|
-
spar=pos.splines)
|
315
|
-
lines(pos.spline$x, exp(pos.spline$y), lwd=2, col=in.col)
|
316
|
-
}
|
317
|
-
if(any(pos.counts.out==0)) rect(pos.breaks[c(pos.counts.out==0,FALSE)],
|
318
|
-
seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
|
319
|
-
seqdepth.lim[1]*3/2, col=out.col, border=NA);
|
320
|
-
if(any(pos.counts.in==0)) rect(pos.breaks[c(pos.counts.in==0,FALSE)],
|
321
|
-
seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
|
322
|
-
seqdepth.lim[1]*3/2, col=in.col, border=NA);
|
323
|
-
if(exists('2',panel.fun)) panel.fun[['2']]();
|
324
|
-
}
|
325
|
-
|
326
|
-
# [3] Identity histogram
|
327
|
-
if(any(layout==3)){
|
328
|
-
par(mar=mar[['3']]);
|
329
|
-
if(any(layout==1)){
|
330
|
-
ylab=''
|
331
|
-
yaxt='n'
|
332
|
-
}else{
|
333
|
-
ylab=x$id.metric
|
334
|
-
yaxt='s'
|
335
|
-
}
|
336
|
-
if(sum(id.counts>0) >= 4){
|
337
|
-
id.counts.range <- range(id.counts[id.counts>0])*c(1/2,2);
|
338
|
-
plot(1,t='n', bty='l', log='x',
|
339
|
-
xlim=id.counts.range, xlab='bps per bin', xaxs='i',
|
340
|
-
ylim=id.lim, yaxs='i', ylab=ylab, yaxt=yaxt);
|
341
|
-
if(underlay.group){
|
342
|
-
rect(id.counts.range[1], id.lim[1], id.counts.range[2],
|
343
|
-
min(id.breaks[c(id.ingroup,TRUE)]), col=out.bg, border=NA);
|
344
|
-
rect(id.counts.range[1], min(id.breaks[c(id.ingroup,TRUE)]),
|
345
|
-
id.counts.range[2], id.lim[2], col=in.bg, border=NA);
|
346
|
-
}
|
347
|
-
id.f <- rep(id.counts,each=2)
|
348
|
-
id.x <- rep(id.breaks,each=2)[-c(1,2*length(id.breaks))]
|
349
|
-
lines(id.f, id.x, lwd=id.lwd, col=id.col);
|
350
|
-
if(is.null(id.splines) || id.splines > 0){
|
351
|
-
id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]),
|
352
|
-
spar=id.splines)
|
353
|
-
lines(exp(id.spline$y), id.spline$x, lwd=2, col=id.col)
|
354
|
-
}
|
355
|
-
}else{
|
356
|
-
plot(1,t='n',bty='l',xlab='', xaxt='n', ylab='', yaxt='n')
|
357
|
-
text(1,1,labels='Insufficient data', srt=90)
|
358
|
-
}
|
359
|
-
if(exists('3',panel.fun)) panel.fun[['3']]();
|
360
|
-
}
|
361
|
-
|
362
|
-
# [4] Populations histogram
|
363
|
-
peaks <- NA;
|
364
|
-
if(any(layout==4)){
|
365
|
-
par(mar=mar[['4']]);
|
366
|
-
if(any(layout==2)){
|
367
|
-
ylab=''
|
368
|
-
yaxt='n'
|
369
|
-
}else{
|
370
|
-
ylab='Sequencing depth (X)'
|
371
|
-
yaxt='s'
|
372
|
-
}
|
373
|
-
h.breaks <- seq(log10(seqdepth.lim[1]*2), log10(seqdepth.lim[2]/2),
|
374
|
-
length.out=200);
|
375
|
-
h.in <- hist(log10(seqdepth.in), breaks=h.breaks, plot=FALSE);
|
376
|
-
h.out <- hist(log10(seqdepth.out), breaks=h.breaks, plot=FALSE);
|
377
|
-
plot(1, t='n', log='y',
|
378
|
-
xlim=range(c(h.in$counts,h.out$counts,sum(pos.counts.in==0))),
|
379
|
-
xaxs='r', xlab='', xaxt='n', ylim=seqdepth.lim, yaxs='i', ylab=ylab,
|
380
|
-
yaxt=yaxt)
|
381
|
-
y.tmp.in <- c(rep(10^h.in$breaks,each=2),seqdepth.lim[1]*c(1,1,3/2,3/2))
|
382
|
-
y.tmp.out <- c(rep(10^h.out$breaks,each=2),seqdepth.lim[1]*c(1,1,3/2,3/2))
|
383
|
-
lines(c(0,rep(h.out$counts,each=2),0,0,rep(sum(pos.counts.out==0),2),0),
|
384
|
-
y.tmp.out, col=out.col)
|
385
|
-
polygon(c(0,rep(h.in$counts,each=2),0,0,rep(sum(pos.counts.in==0),2),0),
|
386
|
-
y.tmp.in, border=NA, col=in.col)
|
387
|
-
if(!is.na(peaks.col)){
|
388
|
-
o <- peaks.opts; o$x = x;
|
389
|
-
if(missing(use.peaks)){
|
390
|
-
peaks <- do.call(enve.recplot2.findPeaks, o)
|
391
|
-
}else{
|
392
|
-
peaks <- use.peaks
|
393
|
-
}
|
394
|
-
h.mids <- (10^h.breaks[-1] + 10^h.breaks[-length(h.breaks)])/2
|
395
|
-
if(!is.null(peaks) & length(peaks)>0){
|
396
|
-
pf <- h.mids*0;
|
397
|
-
for(i in 1:length(peaks)){
|
398
|
-
cnt <- enve.recplot2.__peakHist(peaks[[i]], h.mids)
|
399
|
-
lines(cnt, h.mids, col='red');
|
400
|
-
pf <- pf+cnt;
|
401
|
-
axis(4, at=peaks[[i]]$seq.depth, letters[i], las=1, hadj=1/2)
|
402
|
-
}
|
403
|
-
lines(pf, h.mids, col='red',lwd=1.5);
|
404
|
-
dpt <- signif(as.numeric(lapply(peaks, function(x) x$seq.depth)),2)
|
405
|
-
frx <- signif(100*as.numeric(
|
406
|
-
lapply(peaks,
|
407
|
-
function(x) ifelse(length(x$values)==0, x$n.hat,
|
408
|
-
length(x$values))/x$n.total)), 2)
|
409
|
-
if(peaks[[1]]$err.res < 0){
|
410
|
-
err <- paste(', LL:', signif(peaks[[1]]$err.res, 3))
|
411
|
-
}else{
|
412
|
-
err <- paste(', err:',
|
413
|
-
signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2))
|
414
|
-
}
|
415
|
-
legend('topright', bty='n', cex=1/2,
|
416
|
-
legend=paste(letters[1:length(peaks)],'. ',
|
417
|
-
dpt,'X (', frx, '%', err, ')', sep=''))
|
418
|
-
}
|
419
|
-
}
|
420
|
-
if(exists('4',panel.fun)) panel.fun[['4']]();
|
421
|
-
}
|
422
|
-
|
423
|
-
# [5] Color scale of the counts matrix (vertical)
|
424
|
-
count.bins <- 10^seq(log10(min(counts[counts>0])), log10(max(counts)),
|
425
|
-
length.out=1+length(palette))
|
426
|
-
if(any(layout==5)){
|
427
|
-
par(mar=mar[['5']]);
|
428
|
-
plot(1,t='n',log='y',xlim=0:1,xaxt='n',xlab='',xaxs='i',
|
429
|
-
ylim=range(count.bins), yaxs='i', ylab='')
|
430
|
-
rect(0,count.bins[-length(count.bins)],1,count.bins[-1],col=palette,
|
431
|
-
border=NA)
|
432
|
-
if(exists('5',panel.fun)) panel.fun[['5']]();
|
433
|
-
}
|
434
|
-
|
435
|
-
# [6] Color scale of the coutnts matrix (horizontal)
|
436
|
-
if(any(layout==6)){
|
437
|
-
par(mar=mar[['6']]);
|
438
|
-
plot(1,t='n',log='x',ylim=0:1,yaxt='n',ylab='',yaxs='i',
|
439
|
-
xlim=range(count.bins), xaxs='i',xlab='');
|
440
|
-
rect(count.bins[-length(count.bins)],0,count.bins[-1],1,col=palette,
|
441
|
-
border=NA);
|
442
|
-
if(exists('6',panel.fun)) panel.fun[['6']]();
|
443
|
-
}
|
444
|
-
|
445
|
-
par(mar=ori.mar);
|
446
|
-
return(peaks);
|
447
|
-
}
|
448
|
-
|
449
|
-
#==============> Define core functions
|
450
|
-
|
451
|
-
#' Enveomics: Recruitment Plot (2)
|
452
|
-
#'
|
453
|
-
#' Produces recruitment plots provided that \code{BlastTab.catsbj.pl} has
|
454
|
-
#' been previously executed.
|
455
|
-
#'
|
456
|
-
#' @param prefix
|
457
|
-
#' Path to the prefix of the \code{BlastTab.catsbj.pl} output files. At
|
458
|
-
#' least the files .rec and .lim must exist with this prefix.
|
459
|
-
#' @param plot
|
460
|
-
#' Should the object be plotted?
|
461
|
-
#' @param pos.breaks
|
462
|
-
#' Breaks in the positions histogram. It can also be a vector of break
|
463
|
-
#' points, and values outside the range are ignored. If zero (0), it
|
464
|
-
#' uses the sequence breaks as defined in the .lim file, which means
|
465
|
-
#' one bin per contig (or gene, if the mapping is agains genes). Ignored
|
466
|
-
#' if `pos.breaks.tsv` is passed.
|
467
|
-
#' @param pos.breaks.tsv
|
468
|
-
#' Path to a list of (absolute) coordinates to use as position breaks.
|
469
|
-
#' This tab-delimited file can be produced by \code{GFF.catsbj.pl}, and it
|
470
|
-
#' must contain at least one column: coordinates of the break positions of
|
471
|
-
#' each position bin. If it has a second column, this is used as the name
|
472
|
-
#' of the position bin that ends at the given coordinate (the first row is
|
473
|
-
#' ignored). Any additional columns are currently ignored. If \code{NA},
|
474
|
-
#' position bins are determined by \code{pos.breaks}.
|
475
|
-
#' @param id.breaks
|
476
|
-
#' Breaks in the identity histogram. It can also be a vector of break
|
477
|
-
#' points, and values outside the range are ignored.
|
478
|
-
#' @param id.free.range
|
479
|
-
#' Indicates that the range should be freely set from the observed
|
480
|
-
#' values. Otherwise, 70-100\% is included in the identity histogram
|
481
|
-
#' (default).
|
482
|
-
#' @param id.metric
|
483
|
-
#' Metric of identity to be used (Y-axis). Corrected identity is only
|
484
|
-
#' supported if the original BLAST file included sequence lengths.
|
485
|
-
#' @param id.summary
|
486
|
-
#' Function summarizing the identity bins. Other recommended options
|
487
|
-
#' include: \code{median} to estimate the median instead of total bins, and
|
488
|
-
#' \code{function(x) mlv(x,method='parzen')$M} to estimate the mode.
|
489
|
-
#' @param id.cutoff
|
490
|
-
#' Cutoff of identity metric above which the hits are considered
|
491
|
-
#' \code{in-group}. The 95\% identity corresponds to the expectation of
|
492
|
-
#' ANI<95\% within species.
|
493
|
-
#' @param threads
|
494
|
-
#' Number of threads to use.
|
495
|
-
#' @param verbose
|
496
|
-
#' Indicates if the function should report the advance.
|
497
|
-
#' @param ...
|
498
|
-
#' Any additional parameters supported by \code{\link{plot.enve.RecPlot2}}.
|
499
|
-
#'
|
500
|
-
#' @return Returns an object of class \code{\link{enve.RecPlot2}}.
|
501
|
-
#'
|
502
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
503
|
-
#' @author Kenji Gerhardt [aut]
|
504
|
-
#'
|
505
|
-
#' @export
|
506
|
-
|
507
|
-
enve.recplot2 <- function(
|
508
|
-
prefix,
|
509
|
-
plot = TRUE,
|
510
|
-
pos.breaks = 1e3,
|
511
|
-
pos.breaks.tsv = NA,
|
512
|
-
id.breaks = 60,
|
513
|
-
id.free.range = FALSE,
|
514
|
-
id.metric = c('identity', 'corrected identity', 'bit score'),
|
515
|
-
id.summary = sum,
|
516
|
-
id.cutoff = 95,
|
517
|
-
threads = 2,
|
518
|
-
verbose = TRUE,
|
519
|
-
...
|
520
|
-
){
|
521
|
-
# Settings
|
522
|
-
id.metric <- match.arg(id.metric);
|
523
|
-
|
524
|
-
#Read files
|
525
|
-
if (verbose) cat("Reading files.\n")
|
526
|
-
rec <- read.table(paste(prefix, ".rec", sep = ""),
|
527
|
-
sep = "\t", comment.char = "", quote = "");
|
528
|
-
lim <- read.table(paste(prefix, ".lim", sep = ""),
|
529
|
-
sep = "\t", comment.char = "", quote = "", as.is = TRUE);
|
530
|
-
|
531
|
-
# Build matrix
|
532
|
-
if (verbose) cat("Building counts matrix.\n")
|
533
|
-
if (id.metric == "corrected identity" & ncol(rec) < 6) {
|
534
|
-
stop("Requesting corr. identity, but .rec file doesn't have 6th column")
|
535
|
-
}
|
536
|
-
rec.idcol <- ifelse(id.metric == "identity", 3,
|
537
|
-
ifelse(id.metric == "corrected identity", 6, 4))
|
538
|
-
pos.names <- as.character(NULL)
|
539
|
-
if (!is.na(pos.breaks.tsv)){
|
540
|
-
tmp <- read.table(pos.breaks.tsv, sep = "\t", header = FALSE, as.is = TRUE)
|
541
|
-
pos.breaks <- as.numeric(tmp[, 1])
|
542
|
-
if (ncol(tmp) > 1) pos.names <- as.character(tmp[-1, 2])
|
543
|
-
} else if (length(pos.breaks) == 1) {
|
544
|
-
if (pos.breaks > 0){
|
545
|
-
pos.breaks <- seq(min(lim[, 2]), max(lim[, 3]), length.out = pos.breaks + 1)
|
546
|
-
} else {
|
547
|
-
pos.breaks <- c(lim[1, 2], lim[, 3])
|
548
|
-
pos.names <- lim[, 1]
|
549
|
-
}
|
550
|
-
}
|
551
|
-
if (length(id.breaks) == 1) {
|
552
|
-
id.range.v <- rec[, rec.idcol]
|
553
|
-
if (!id.free.range) id.range.v <- c(id.range.v, 70, 100)
|
554
|
-
id.range.v <- range(id.range.v)
|
555
|
-
id.breaks <- seq(id.range.v[1], id.range.v[2], length.out = id.breaks + 1)
|
556
|
-
}
|
557
|
-
|
558
|
-
# Run in parallel
|
559
|
-
# If they already set threads to 1 manually, there's no point in launching
|
560
|
-
# clusters, it's just slower. Ditto for small files.
|
561
|
-
if (nrow(rec) < 75000 | threads == 1) {
|
562
|
-
# Coerces rec into a form that __counts is happy about
|
563
|
-
rec.l <- list()
|
564
|
-
rec.l[[1]] <- list(rec = rec, verbose = FALSE)
|
565
|
-
|
566
|
-
# No need to make a temporary variable, there's only one return for sure
|
567
|
-
# and it's not a list because it isn't coming back from an apply
|
568
|
-
counts <- enve.recplot2.__counts(
|
569
|
-
rec.l[[1]], pos.breaks = pos.breaks, id.breaks = id.breaks,
|
570
|
-
rec.idcol = rec.idcol)
|
571
|
-
} else {
|
572
|
-
cl <- makeCluster(threads)
|
573
|
-
rec.l <- list()
|
574
|
-
thl <- ceiling(nrow(rec)/threads)
|
575
|
-
for (i in 0:(threads - 1)) {
|
576
|
-
rec.l[[i + 1]] <- list(
|
577
|
-
rec = rec[(i * thl + 1):min(((i + 1) * thl), nrow(rec)), ],
|
578
|
-
verbose = ifelse(i == 0, verbose, FALSE))
|
579
|
-
}
|
580
|
-
counts.l <- clusterApply(
|
581
|
-
cl, rec.l, enve.recplot2.__counts, pos.breaks = pos.breaks,
|
582
|
-
id.breaks = id.breaks, rec.idcol = rec.idcol)
|
583
|
-
stopCluster(cl) # No spooky ghost clusters
|
584
|
-
|
585
|
-
counts <- counts.l[[1]]
|
586
|
-
for (i in 2:threads) counts <- counts + counts.l[[i]]
|
587
|
-
}
|
588
|
-
|
589
|
-
# Estimate 1D histograms
|
590
|
-
if (verbose) cat("Building histograms.\n")
|
591
|
-
id.mids <- (id.breaks[-length(id.breaks)] + id.breaks[-1])/2;
|
592
|
-
id.ingroup <- (id.mids > id.cutoff);
|
593
|
-
id.counts <- apply(counts, 2, id.summary);
|
594
|
-
pos.counts.in <- apply(counts[, id.ingroup], 1, sum);
|
595
|
-
pos.counts.out <- apply(counts[, !id.ingroup], 1, sum);
|
596
|
-
|
597
|
-
# Plot and return
|
598
|
-
recplot <- new('enve.RecPlot2',
|
599
|
-
counts = counts, id.counts = id.counts,
|
600
|
-
pos.counts.in = pos.counts.in, pos.counts.out = pos.counts.out,
|
601
|
-
id.breaks = id.breaks, pos.breaks = pos.breaks,
|
602
|
-
pos.names = pos.names, seq.breaks = c(lim[1, 2], lim[, 3]),
|
603
|
-
seq.names = lim[, 1], id.ingroup = id.ingroup,
|
604
|
-
id.metric = id.metric, call = match.call());
|
605
|
-
if (plot) {
|
606
|
-
if (verbose) cat("Plotting.\n")
|
607
|
-
peaks <- plot(recplot, ...);
|
608
|
-
attr(recplot, "peaks") <- peaks
|
609
|
-
}
|
610
|
-
return(recplot);
|
611
|
-
}
|
612
|
-
|
613
|
-
#' Enveomics: Recruitment Plot (2) Peak Finder
|
614
|
-
#'
|
615
|
-
#' Identifies peaks in the population histogram potentially indicating
|
616
|
-
#' sub-population mixtures.
|
617
|
-
#'
|
618
|
-
#' @param x
|
619
|
-
#' An \code{\link{enve.RecPlot2}} object.
|
620
|
-
#' @param method
|
621
|
-
#' Peak-finder method. This should be one of:
|
622
|
-
#' \itemize{
|
623
|
-
#' \item \strong{emauto}
|
624
|
-
#' (Expectation-Maximization with auto-selection of components)
|
625
|
-
#' \item \strong{em}
|
626
|
-
#' (Expectation-Maximization)
|
627
|
-
#' \item \strong{mower}
|
628
|
-
#' (Custom distribution-mowing method)
|
629
|
-
#' }
|
630
|
-
#' @param ...
|
631
|
-
#' Any additional parameters supported by
|
632
|
-
#' \code{\link{enve.recplot2.findPeaks}}.
|
633
|
-
#'
|
634
|
-
#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
|
635
|
-
#'
|
636
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
637
|
-
#'
|
638
|
-
#' export
|
639
|
-
|
640
|
-
enve.recplot2.findPeaks <- function(
|
641
|
-
x,
|
642
|
-
method="emauto",
|
643
|
-
...
|
644
|
-
){
|
645
|
-
if(method == "emauto"){
|
646
|
-
peaks <- enve.recplot2.findPeaks.emauto(x, ...)
|
647
|
-
}else if(method == "em"){
|
648
|
-
peaks <- enve.recplot2.findPeaks.em(x, ...)
|
649
|
-
}else if(method == "mower"){
|
650
|
-
peaks <- enve.recplot2.findPeaks.mower(x, ...)
|
651
|
-
}else{
|
652
|
-
stop("Invalid peak-finder method ", method)
|
653
|
-
}
|
654
|
-
return(peaks)
|
655
|
-
}
|
656
|
-
|
657
|
-
#' Enveomics: Recruitment Plot (2) Emauto Peak Finder
|
658
|
-
#'
|
659
|
-
#' Identifies peaks in the population histogram using a Gaussian Mixture
|
660
|
-
#' Model Expectation Maximization (GMM-EM) method with number of components
|
661
|
-
#' automatically detected.
|
662
|
-
#'
|
663
|
-
#' @param x
|
664
|
-
#' An \code{\link{enve.RecPlot2}} object.
|
665
|
-
#' @param components
|
666
|
-
#' A vector of number of components to evaluate.
|
667
|
-
#' @param criterion
|
668
|
-
#' Criterion to use for components selection. Must be one of:
|
669
|
-
#' \code{aic} (Akaike Information Criterion), \code{bic} or \code{sbc}
|
670
|
-
#' (Bayesian Information Criterion or Schwarz Criterion).
|
671
|
-
#' @param merge.tol
|
672
|
-
#' When attempting to merge peaks with very similar sequencing depth, use
|
673
|
-
#' this number of significant digits (in log-scale).
|
674
|
-
#' @param verbose
|
675
|
-
#' Display (mostly debugging) information.
|
676
|
-
#' @param ...
|
677
|
-
#' Any additional parameters supported by
|
678
|
-
#' \code{\link{enve.recplot2.findPeaks.em}}.
|
679
|
-
#'
|
680
|
-
#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
|
681
|
-
#'
|
682
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
683
|
-
#'
|
684
|
-
#' @export
|
685
|
-
|
686
|
-
enve.recplot2.findPeaks.emauto <- function(
|
687
|
-
x,
|
688
|
-
components = seq(1, 5),
|
689
|
-
criterion = 'aic',
|
690
|
-
merge.tol = 2L,
|
691
|
-
verbose = FALSE,
|
692
|
-
...
|
693
|
-
){
|
694
|
-
best <- list(crit=0, pstore=list())
|
695
|
-
if(criterion == 'aic'){
|
696
|
-
do_crit <- function(ll, k, n) 2*k - 2*ll
|
697
|
-
}else if(criterion %in% c('bic', 'sbc')){
|
698
|
-
do_crit <- function(ll, k, n) log(n)*k - 2*ll
|
699
|
-
}else{
|
700
|
-
stop('Invalid criterion ', criterion)
|
701
|
-
}
|
702
|
-
for(comp in components){
|
703
|
-
if(verbose) cat('Testing:',comp,'\n')
|
704
|
-
best <- enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best,
|
705
|
-
verbose, ...)
|
706
|
-
}
|
707
|
-
if(length(best[['peaks']])==0) return(list())
|
708
|
-
|
709
|
-
seqdepths.r <- signif(log(sapply(best[['peaks']],
|
710
|
-
function(x) x$seq.depth)), merge.tol)
|
711
|
-
distinct <- length(unique(seqdepths.r))
|
712
|
-
if(distinct < length(best[['peaks']])){
|
713
|
-
if(verbose) cat('Attempting merge to', distinct, 'components\n')
|
714
|
-
init <- apply(sapply(best[['peaks']],
|
715
|
-
function(x) c(x$param.hat, alpha=x$n.hat/x$n.total)), 1, as.numeric)
|
716
|
-
init <- init[!duplicated(seqdepths.r),]
|
717
|
-
init <- list(mu=init[,'mean'], sd=init[,'sd'],
|
718
|
-
alpha=init[,'alpha']/sum(init[,'alpha']))
|
719
|
-
best <- enve.recplot2.findPeaks.__emauto_one(x, distinct, do_crit, best,
|
720
|
-
verbose, ...)
|
721
|
-
}
|
722
|
-
return(best[['peaks']])
|
723
|
-
}
|
724
|
-
|
725
|
-
#' Enveomics: Recruitment Plot (2) Em Peak Finder
|
726
|
-
#'
|
727
|
-
#' Identifies peaks in the population histogram using a Gaussian Mixture
|
728
|
-
#' Model Expectation Maximization (GMM-EM) method.
|
729
|
-
#'
|
730
|
-
#' @param x
|
731
|
-
#' An \code{\link{enve.RecPlot2}} object.
|
732
|
-
#' @param max.iter
|
733
|
-
#' Maximum number of EM iterations.
|
734
|
-
#' @param ll.diff.res
|
735
|
-
#' Maximum Log-Likelihood difference to be considered as convergent.
|
736
|
-
#' @param components
|
737
|
-
#' Number of distributions assumed in the mixture.
|
738
|
-
#' @param rm.top
|
739
|
-
#' Top-values to remove before finding peaks, as a quantile probability.
|
740
|
-
#' This step is useful to remove highly conserved regions, but can be
|
741
|
-
#' turned off by setting \code{rm.top=0}. The quantile is determined
|
742
|
-
#' \strong{after} removing zero-coverage windows.
|
743
|
-
#' @param verbose
|
744
|
-
#' Display (mostly debugging) information.
|
745
|
-
#' @param init
|
746
|
-
#' Initialization parameters. By default, these are derived from k-means
|
747
|
-
#' clustering. A named list with vectors for \code{mu}, \code{sd}, and
|
748
|
-
#' \code{alpha}, each of length \code{components}.
|
749
|
-
#' @param log
|
750
|
-
#' Logical value indicating if the estimations should be performed in
|
751
|
-
#' natural logarithm units. Do not change unless you know what you're
|
752
|
-
#' doing.
|
753
|
-
#'
|
754
|
-
#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
|
755
|
-
#'
|
756
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
757
|
-
#'
|
758
|
-
#' @export
|
759
|
-
|
760
|
-
enve.recplot2.findPeaks.em <- function(
|
761
|
-
x,
|
762
|
-
max.iter = 1000,
|
763
|
-
ll.diff.res = 1e-8,
|
764
|
-
components = 2,
|
765
|
-
rm.top = 0.05,
|
766
|
-
verbose = FALSE,
|
767
|
-
init,
|
768
|
-
log = TRUE
|
769
|
-
){
|
770
|
-
|
771
|
-
# Essential vars
|
772
|
-
pos.binsize <- x$pos.breaks[-1] - x$pos.breaks[-length(x$pos.breaks)]
|
773
|
-
lsd1 <- (x$pos.counts.in/pos.binsize)[ x$pos.counts.in > 0 ]
|
774
|
-
lsd1 <- lsd1[ lsd1 < quantile(lsd1, 1-rm.top, names = FALSE) ]
|
775
|
-
if(log) lsd1 <- log(lsd1)
|
776
|
-
|
777
|
-
# 1. Initialize
|
778
|
-
if(missing(init)){
|
779
|
-
km.clust <- kmeans(lsd1, components)$cluster
|
780
|
-
init <- list(
|
781
|
-
mu = tapply(lsd1, km.clust, mean),
|
782
|
-
sd = tapply(lsd1, km.clust, sd),
|
783
|
-
alpha = table(km.clust) / length(km.clust)
|
784
|
-
)
|
785
|
-
}
|
786
|
-
m.step <- init
|
787
|
-
ll <- c()
|
788
|
-
cur.ll <- -Inf
|
789
|
-
|
790
|
-
for(i in 1:max.iter){
|
791
|
-
# 2/3. EM
|
792
|
-
e.step <- enve.recplot2.findPeaks.__em_e(lsd1, m.step)
|
793
|
-
m.step <- enve.recplot2.findPeaks.__em_m(lsd1, e.step[['posterior']])
|
794
|
-
# 4. Convergence
|
795
|
-
ll <- c(ll, e.step[["ll"]])
|
796
|
-
ll.diff <- abs(cur.ll - e.step[["ll"]])
|
797
|
-
cur.ll <- e.step[["ll"]]
|
798
|
-
if(verbose) cat(i, '\t| LL =', cur.ll, '\t| LL.diff =', ll.diff, '\n')
|
799
|
-
if(is.na(ll.diff) || ll.diff == Inf) break
|
800
|
-
if(ll.diff <= ll.diff.res) break
|
801
|
-
}
|
802
|
-
|
803
|
-
# Return
|
804
|
-
peaks <- list()
|
805
|
-
for(i in 1:components){
|
806
|
-
n.hat <- m.step[['alpha']][i]*length(lsd1)
|
807
|
-
peaks[[i]] <- new('enve.RecPlot2.Peak', dist='norm', values=as.numeric(),
|
808
|
-
values.res=0, mode=m.step[['mu']][i],
|
809
|
-
param.hat=list(sd=m.step[['sd']][i], mean=m.step[['mu']][i]),
|
810
|
-
n.hat=n.hat, n.total=length(lsd1), err.res=cur.ll,
|
811
|
-
merge.logdist=as.numeric(), log=log,
|
812
|
-
seq.depth=ifelse(log, exp(m.step[['mu']][i]), m.step[['mu']][i]))
|
813
|
-
}
|
814
|
-
return(peaks)
|
815
|
-
}
|
816
|
-
|
817
|
-
#' Enveomics: Recruitment Plot (2) Mowing Peak Finder
|
818
|
-
#'
|
819
|
-
#' Identifies peaks in the population histogram potentially indicating
|
820
|
-
#' sub-population mixtures, using a custom distribution-mowing method.
|
821
|
-
#'
|
822
|
-
#' @param x
|
823
|
-
#' An \code{\link{enve.RecPlot2}} object.
|
824
|
-
#' @param min.points
|
825
|
-
#' Minimum number of points in the quantile-estimation-range
|
826
|
-
#' \code{(quant.est)} to estimate a peak.
|
827
|
-
#' @param quant.est
|
828
|
-
#' Range of quantiles to be used in the estimation of a peak's
|
829
|
-
#' parameters.
|
830
|
-
#' @param mlv.opts
|
831
|
-
#' Ignored. For backwards compatibility.
|
832
|
-
#' @param fitdist.opts.sn
|
833
|
-
#' Options passed to \code{fitdist} to estimate the standard deviation if
|
834
|
-
#' \code{with.skewness=TRUE}. Note that the \code{start} parameter will be
|
835
|
-
#' ammended with \code{xi=estimated} mode for each peak.
|
836
|
-
#' @param fitdist.opts.norm
|
837
|
-
#' Options passed to \code{fitdist} to estimate the standard deviation if
|
838
|
-
#' \code{with.skewness=FALSE}. Note that the \code{start} parameter will be
|
839
|
-
#' ammended with \code{mean=estimated} mode for each peak.
|
840
|
-
#' @param rm.top
|
841
|
-
#' Top-values to remove before finding peaks, as a quantile probability.
|
842
|
-
#' This step is useful to remove highly conserved regions, but can be
|
843
|
-
#' turned off by setting \code{rm.top=0}. The quantile is determined
|
844
|
-
#' \strong{after} removing zero-coverage windows.
|
845
|
-
#' @param with.skewness
|
846
|
-
#' Allow skewness correction of the peaks. Typically, the
|
847
|
-
#' sequencing-depth distribution for a single peak is left-skewed, due
|
848
|
-
#' partly (but not exclusively) to fragmentation and mapping sensitivity.
|
849
|
-
#' See \emph{Lindner et al 2013, Bioinformatics 29(10):1260-7} for an
|
850
|
-
#' alternative solution for the first problem (fragmentation) called
|
851
|
-
#' "tail distribution".
|
852
|
-
#' @param optim.rounds
|
853
|
-
#' Maximum rounds of peak optimization.
|
854
|
-
#' @param optim.epsilon
|
855
|
-
#' Trace change at which optimization stops (unless \code{optim.rounds} is
|
856
|
-
#' reached first). The trace change is estimated as the sum of square
|
857
|
-
#' differences between parameters in one round and those from two rounds
|
858
|
-
#' earlier (to avoid infinite loops from approximation).
|
859
|
-
#' @param merge.logdist
|
860
|
-
#' Maximum value of \code{|log-ratio|} between centrality parameters in peaks
|
861
|
-
#' to attempt merging. The default of ~0.22 corresponds to a maximum
|
862
|
-
#' difference of 25\%.
|
863
|
-
#' @param verbose
|
864
|
-
#' Display (mostly debugging) information.
|
865
|
-
#' @param log
|
866
|
-
#' Logical value indicating if the estimations should be performed in
|
867
|
-
#' natural logarithm units. Do not change unless you know what you're
|
868
|
-
#' doing.
|
869
|
-
#'
|
870
|
-
#' @return Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
|
871
|
-
#'
|
872
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
873
|
-
#'
|
874
|
-
#' @export
|
875
|
-
|
876
|
-
enve.recplot2.findPeaks.mower <- function(
|
877
|
-
x,
|
878
|
-
min.points=10,
|
879
|
-
quant.est=c(0.002, 0.998),
|
880
|
-
mlv.opts=list(method='parzen'),
|
881
|
-
fitdist.opts.sn=list(distr='sn', method='qme', probs=c(0.1,0.5,0.8),
|
882
|
-
start=list(omega=1, alpha=-1), lower=c(0, -Inf, -Inf)),
|
883
|
-
fitdist.opts.norm=list(distr='norm', method='qme', probs=c(0.4,0.6),
|
884
|
-
start=list(sd=1), lower=c(0, -Inf)),
|
885
|
-
rm.top=0.05,
|
886
|
-
with.skewness=TRUE,
|
887
|
-
optim.rounds=200,
|
888
|
-
optim.epsilon=1e-4,
|
889
|
-
merge.logdist=log(1.75),
|
890
|
-
verbose=FALSE,
|
891
|
-
log=TRUE
|
892
|
-
){
|
893
|
-
|
894
|
-
# Essential vars
|
895
|
-
pos.binsize <- x$pos.breaks[-1] - x$pos.breaks[-length(x$pos.breaks)];
|
896
|
-
seqdepth.in <- x$pos.counts.in/pos.binsize;
|
897
|
-
lsd1 <- seqdepth.in[seqdepth.in>0];
|
898
|
-
lsd1 <- lsd1[ lsd1 < quantile(lsd1, 1-rm.top, names=FALSE) ]
|
899
|
-
if(log) lsd1 <- log(lsd1)
|
900
|
-
if(with.skewness){
|
901
|
-
fitdist.opts <- fitdist.opts.sn
|
902
|
-
}else{
|
903
|
-
fitdist.opts <- fitdist.opts.norm
|
904
|
-
}
|
905
|
-
peaks.opts <- list(lsd1=lsd1, min.points=min.points, quant.est=quant.est,
|
906
|
-
mlv.opts=mlv.opts, fitdist.opts=fitdist.opts, with.skewness=with.skewness,
|
907
|
-
optim.rounds=optim.rounds, optim.epsilon=optim.epsilon, verbose=verbose,
|
908
|
-
n.total=length(lsd1), merge.logdist=merge.logdist, log=log)
|
909
|
-
|
910
|
-
# Find seed peaks
|
911
|
-
if(verbose) cat('Mowing peaks for n =',length(lsd1),'\n')
|
912
|
-
peaks <- enve.recplot2.findPeaks.__mower(peaks.opts);
|
913
|
-
|
914
|
-
# Merge overlapping peaks
|
915
|
-
if(verbose) cat('Trying to merge',length(peaks),'peaks\n')
|
916
|
-
merged <- (length(peaks)>1)
|
917
|
-
while(merged){
|
918
|
-
merged <- FALSE
|
919
|
-
ignore <- c()
|
920
|
-
peaks2 <- list();
|
921
|
-
for(i in 1:length(peaks)){
|
922
|
-
if(i %in% ignore) next
|
923
|
-
p <- peaks[[ i ]]
|
924
|
-
j <- enve.recplot2.__whichClosestPeak(p, peaks)
|
925
|
-
p2 <- peaks[[ j ]]
|
926
|
-
dst.a <- p$param.hat[[ length(p$param.hat) ]]
|
927
|
-
dst.b <- p2$param.hat[[ length(p2$param.hat) ]]
|
928
|
-
if( abs(log(dst.a/dst.b)) < merge.logdist ){
|
929
|
-
if(verbose) cat('==> Attempting a merge at',
|
930
|
-
p$param.hat[[ length(p$param.hat) ]],'&',
|
931
|
-
p2$param.hat[[ length(p2$param.hat) ]],'X\n');
|
932
|
-
peaks.opts$lsd1 <- c(p$values, p2$values)
|
933
|
-
p.new <- enve.recplot2.findPeaks.__mower(peaks.opts)
|
934
|
-
if(length(p.new)==1){
|
935
|
-
peaks2[[ length(peaks2)+1 ]] <- p.new[[ 1 ]]
|
936
|
-
ignore <- c(ignore, j)
|
937
|
-
merged <- TRUE
|
938
|
-
}
|
939
|
-
}
|
940
|
-
if(!merged) peaks2[[ length(peaks2)+1 ]] <- p
|
941
|
-
}
|
942
|
-
peaks <- peaks2
|
943
|
-
if(length(peaks)==1) break
|
944
|
-
}
|
945
|
-
|
946
|
-
if(verbose) cat('Found',length(peaks),'peak(s)\n')
|
947
|
-
return(peaks);
|
948
|
-
}
|
949
|
-
|
950
|
-
#==============> Define utils
|
951
|
-
|
952
|
-
#' Enveomics: Recruitment Plot (2) Core Peak Finder
|
953
|
-
#'
|
954
|
-
#' Finds the peak in a list of peaks that is most likely to represent the
|
955
|
-
#' "core genome" of a population.
|
956
|
-
#'
|
957
|
-
#' @param x \code{list} of \code{\link{enve.RecPlot2.Peak}} objects.
|
958
|
-
#'
|
959
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
960
|
-
#'
|
961
|
-
#' @export
|
962
|
-
|
963
|
-
enve.recplot2.corePeak <- function
|
964
|
-
(x
|
965
|
-
){
|
966
|
-
# Find the peak with maximum depth (centrality)
|
967
|
-
maxPeak <- x[[
|
968
|
-
which.max(as.numeric(lapply(x,
|
969
|
-
function(y) y$param.hat[[ length(y$param.hat) ]])))
|
970
|
-
]]
|
971
|
-
# If a "larger" peak (a peak explaining more bins of the genome) is within
|
972
|
-
# the default "merge.logdist" distance, take that one instead.
|
973
|
-
corePeak <- maxPeak
|
974
|
-
for(p in x){
|
975
|
-
p.len <- ifelse(length(p$values)==0, p$n.hat, length(p$values))
|
976
|
-
corePeak.len <- ifelse(
|
977
|
-
length(corePeak$values)==0, corePeak$n.hat, length(corePeak$values))
|
978
|
-
sz.d <- log(p.len/corePeak.len)
|
979
|
-
if(is.nan(sz.d) || sz.d < 0) next
|
980
|
-
sq.d.a <- as.numeric(tail(p$param.hat, n=1))
|
981
|
-
sq.d.b <- as.numeric(tail(maxPeak$param.hat, n=1))
|
982
|
-
if(p$log) sq.d.a <- exp(sq.d.a)
|
983
|
-
if(corePeak$log) sq.d.b <- exp(sq.d.b)
|
984
|
-
if(abs(log(sq.d.a/sq.d.b)) < log(1.75)+sz.d/5) corePeak <- p
|
985
|
-
}
|
986
|
-
return(corePeak)
|
987
|
-
}
|
988
|
-
|
989
|
-
#' Enveomics: Recruitment Plot (2) Change Cutoff
|
990
|
-
#'
|
991
|
-
#' Change the intra-species cutoff of an existing recruitment plot.
|
992
|
-
#'
|
993
|
-
#' @param rp
|
994
|
-
#' \code{\link{enve.RecPlot2}} object.
|
995
|
-
#' @param new.cutoff
|
996
|
-
#' New cutoff to use.
|
997
|
-
#'
|
998
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
999
|
-
#'
|
1000
|
-
#' @export
|
1001
|
-
|
1002
|
-
enve.recplot2.changeCutoff <- function
|
1003
|
-
(rp,
|
1004
|
-
new.cutoff=98
|
1005
|
-
){
|
1006
|
-
# Re-calculate vectors
|
1007
|
-
id.mids <- (rp$id.breaks[-length(rp$id.breaks)]+rp$id.breaks[-1])/2
|
1008
|
-
id.ingroup <- (id.mids > new.cutoff)
|
1009
|
-
pos.counts.in <- apply(rp$counts[,id.ingroup], 1, sum)
|
1010
|
-
pos.counts.out <- apply(rp$counts[,!id.ingroup], 1, sum)
|
1011
|
-
# Update object
|
1012
|
-
attr(rp, "id.ingroup") <- id.ingroup
|
1013
|
-
attr(rp, "pos.counts.in") <- pos.counts.in
|
1014
|
-
attr(rp, "pos.counts.out") <- pos.counts.out
|
1015
|
-
attr(rp, "call") <- match.call()
|
1016
|
-
return(rp)
|
1017
|
-
}
|
1018
|
-
|
1019
|
-
#' Enveomics: Recruitment Plot (2) Window Depth Threshold
|
1020
|
-
#'
|
1021
|
-
#' Identifies the threshold below which windows should be identified as
|
1022
|
-
#' variable or absent.
|
1023
|
-
#'
|
1024
|
-
#' @param rp
|
1025
|
-
#' Recruitment plot, an \code{\link{enve.RecPlot2}} object.
|
1026
|
-
#' @param peak
|
1027
|
-
#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
|
1028
|
-
#' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
|
1029
|
-
#' used (see \code{\link{enve.recplot2.corePeak}}).
|
1030
|
-
#' @param lower.tail
|
1031
|
-
#' If \code{FALSE}, it returns windows significantly above the peak in
|
1032
|
-
#' sequencing depth.
|
1033
|
-
#' @param significance
|
1034
|
-
#' Significance threshold (alpha) to select windows.
|
1035
|
-
#'
|
1036
|
-
#' @return
|
1037
|
-
#' Returns a float. The units are depth if the peaks were estimated in
|
1038
|
-
#' linear scale, or log-depth otherwise (\code{peak$log}).
|
1039
|
-
#'
|
1040
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1041
|
-
#'
|
1042
|
-
#' @export
|
1043
|
-
|
1044
|
-
enve.recplot2.windowDepthThreshold <- function
|
1045
|
-
(rp,
|
1046
|
-
peak,
|
1047
|
-
lower.tail=TRUE,
|
1048
|
-
significance=0.05
|
1049
|
-
){
|
1050
|
-
if(is.list(peak)) peak <- enve.recplot2.corePeak(peak)
|
1051
|
-
par <- peak$param.hat
|
1052
|
-
par[["p"]] <- ifelse(lower.tail, significance, 1-significance)
|
1053
|
-
thr <- do.call(ifelse(length(par)==4, qsn, qnorm), par)
|
1054
|
-
if(peak$log) thr <- exp(thr)
|
1055
|
-
|
1056
|
-
return(thr)
|
1057
|
-
}
|
1058
|
-
|
1059
|
-
#' Enveomics: Recruitment Plot (2) Extract Windows
|
1060
|
-
#'
|
1061
|
-
#' Extract windows significantly below (or above) the peak in sequencing
|
1062
|
-
#' depth.
|
1063
|
-
#'
|
1064
|
-
#' @param rp
|
1065
|
-
#' Recruitment plot, a \code{\link{enve.RecPlot2}} object.
|
1066
|
-
#' @param peak
|
1067
|
-
#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
|
1068
|
-
#' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
|
1069
|
-
#' used (see \code{\link{enve.recplot2.corePeak}}).
|
1070
|
-
#' @param lower.tail
|
1071
|
-
#' If \code{FALSE}, it returns windows significantly above the peak in
|
1072
|
-
#' sequencing depth.
|
1073
|
-
#' @param significance
|
1074
|
-
#' Significance threshold (alpha) to select windows.
|
1075
|
-
#' @param seq.names
|
1076
|
-
#' Returns subject sequence names instead of a vector of Booleans. If
|
1077
|
-
#' the recruitment plot was generated with named position bins (e.g, using
|
1078
|
-
#' \code{pos.breaks=0} or a two-column \code{pos.breaks.tsv}), it returns a
|
1079
|
-
#' vector of characters (the sequence identifiers), otherwise it returns a
|
1080
|
-
#' data.frame with a name column and two columns of coordinates.
|
1081
|
-
#'
|
1082
|
-
#' @return
|
1083
|
-
#' Returns a vector of logicals if \code{seq.names = FALSE}.
|
1084
|
-
#' If \code{seq.names = TRUE}, it returns a data.frame with five columns:
|
1085
|
-
#' \code{name.from}, \code{name.to}, \code{pos.from}, \code{pos.to}, and
|
1086
|
-
#' \code{seq.name} (see \code{\link{enve.recplot2.coordinates}}).
|
1087
|
-
#'
|
1088
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1089
|
-
#'
|
1090
|
-
#' @export
|
1091
|
-
|
1092
|
-
enve.recplot2.extractWindows <- function
|
1093
|
-
(rp,
|
1094
|
-
peak,
|
1095
|
-
lower.tail = TRUE,
|
1096
|
-
significance = 0.05,
|
1097
|
-
seq.names = FALSE
|
1098
|
-
){
|
1099
|
-
# Determine the threshold
|
1100
|
-
thr <- enve.recplot2.windowDepthThreshold(rp, peak, lower.tail, significance)
|
1101
|
-
|
1102
|
-
# Select windows past the threshold
|
1103
|
-
seqdepth.in <- enve.recplot2.seqdepth(rp)
|
1104
|
-
if(lower.tail){
|
1105
|
-
sel <- seqdepth.in < thr
|
1106
|
-
}else{
|
1107
|
-
sel <- seqdepth.in > thr
|
1108
|
-
}
|
1109
|
-
|
1110
|
-
# seq.names = FALSE
|
1111
|
-
if(!seq.names) return(sel)
|
1112
|
-
# seq.names = TRUE
|
1113
|
-
return(enve.recplot2.coordinates(rp, sel))
|
1114
|
-
}
|
1115
|
-
|
1116
|
-
#' Enveomics: Recruitment Plot (2) Compare Identities
|
1117
|
-
#'
|
1118
|
-
#' Compare the distribution of identities between two
|
1119
|
-
#' \code{\link{enve.RecPlot2}} objects.
|
1120
|
-
#'
|
1121
|
-
#' @param x
|
1122
|
-
#' First \code{\link{enve.RecPlot2}} object.
|
1123
|
-
#' @param y
|
1124
|
-
#' Second \code{\link{enve.RecPlot2}} object.
|
1125
|
-
#' @param method
|
1126
|
-
#' Distance method to use. This should be (an unambiguous abbreviation of)
|
1127
|
-
#' one of:
|
1128
|
-
#' \itemize{
|
1129
|
-
#' \item{"hellinger" (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
|
1130
|
-
#' \item{"bhattacharyya" (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
|
1131
|
-
#' \item{"kl" or "kullback-leibler" (\emph{Kullback & Leibler, 1951,
|
1132
|
-
#' doi:10.1214/aoms/1177729694}), or}
|
1133
|
-
#' \item{"euclidean"}
|
1134
|
-
#' }
|
1135
|
-
#' @param smooth.par
|
1136
|
-
#' Smoothing parameter for cubic spline smoothing. Use 0 for no smoothing.
|
1137
|
-
#' Use \code{NULL} to automatically determine this value using leave-one-out
|
1138
|
-
#' cross-validation (see \code{smooth.spline} parameter \code{spar}).
|
1139
|
-
#' @param pseudocounts
|
1140
|
-
#' Smoothing parameter for Laplace smoothing. Use 0 for no smoothing, or
|
1141
|
-
#' 1 for add-one smoothing.
|
1142
|
-
#' @param max.deviation
|
1143
|
-
#' Maximum mean deviation between identity breaks tolerated (as percent
|
1144
|
-
#' identity). Difference in number of \code{id.breaks} is never tolerated.
|
1145
|
-
#'
|
1146
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1147
|
-
#'
|
1148
|
-
#' @export
|
1149
|
-
|
1150
|
-
enve.recplot2.compareIdentities <- function
|
1151
|
-
(x,
|
1152
|
-
y,
|
1153
|
-
method="hellinger",
|
1154
|
-
smooth.par=NULL,
|
1155
|
-
pseudocounts=0,
|
1156
|
-
max.deviation=0.75
|
1157
|
-
){
|
1158
|
-
METHODS <- c("hellinger","bhattacharyya","kullback-leibler","kl","euclidean")
|
1159
|
-
i.meth <- pmatch(method, METHODS)
|
1160
|
-
if (is.na(i.meth)) stop("Invalid distance ", method)
|
1161
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1162
|
-
stop("'x' must inherit from class `enve.RecPlot2`")
|
1163
|
-
if(!inherits(y, "enve.RecPlot2"))
|
1164
|
-
stop("'y' must inherit from class `enve.RecPlot2`")
|
1165
|
-
if(length(x$id.breaks) != length(y$id.breaks))
|
1166
|
-
stop("'x' and 'y' must have the same number of `id.breaks`")
|
1167
|
-
dev <- mean(abs(x$id.breaks - y$id.breaks))
|
1168
|
-
if(dev > max.deviation)
|
1169
|
-
stop("'x' and 'y' must have similar `id.breaks`; exceeding max.deviation: ",
|
1170
|
-
dev)
|
1171
|
-
x.cnt <- x$id.counts
|
1172
|
-
y.cnt <- y$id.counts
|
1173
|
-
if(is.null(smooth.par) || smooth.par > 0){
|
1174
|
-
x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)])/2
|
1175
|
-
y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)])/2
|
1176
|
-
p.spline <- smooth.spline(x.mids, x.cnt, spar=smooth.par)
|
1177
|
-
q.spline <- smooth.spline(y.mids, y.cnt, spar=smooth.par)
|
1178
|
-
x.cnt <- pmax(p.spline$y, 0)
|
1179
|
-
y.cnt <- pmax(q.spline$y, 0)
|
1180
|
-
}
|
1181
|
-
a <- as.numeric(pseudocounts)
|
1182
|
-
p <- (x.cnt + a) / sum(x.cnt + a)
|
1183
|
-
q <- (y.cnt + a) / sum(y.cnt + a)
|
1184
|
-
d <- NA
|
1185
|
-
if(i.meth %in% c(1L, 2L)){
|
1186
|
-
d <- sqrt(sum((sqrt(p) - sqrt(q))**2))/sqrt(2)
|
1187
|
-
if(i.meth==2L) d <- 1 - d**2
|
1188
|
-
}else if(i.meth %in% c(3L, 4L)){
|
1189
|
-
sel <- p>0
|
1190
|
-
if(any(q[sel]==0))
|
1191
|
-
stop("Undefined distance without absolute continuity, use pseudocounts")
|
1192
|
-
d <- -sum(p[sel]*log(q[sel]/p[sel]))
|
1193
|
-
}else if(i.meth == 5L){
|
1194
|
-
d <- sqrt(sum((q-p)**2))
|
1195
|
-
}
|
1196
|
-
return(d)
|
1197
|
-
}
|
1198
|
-
|
1199
|
-
#' Enveomics: Recruitment Plot (2) Coordinates
|
1200
|
-
#'
|
1201
|
-
#' Returns the sequence name and coordinates of the requested position bins.
|
1202
|
-
#'
|
1203
|
-
#' @param x
|
1204
|
-
#' \code{\link{enve.RecPlot2}} object.
|
1205
|
-
#' @param bins
|
1206
|
-
#' Vector of selected bins to return. It can be a vector of logical values
|
1207
|
-
#' with the same length as \code{x$pos.breaks-1} or a vector of integers. If
|
1208
|
-
#' missing, returns the coordinates of all windows.
|
1209
|
-
#'
|
1210
|
-
#' @return
|
1211
|
-
#' Returns a data.frame with five columns: \code{name.from} (character),
|
1212
|
-
#' \code{pos.from} (numeric), \code{name.to} (character), \code{pos.to}
|
1213
|
-
#' (numeric), and \code{seq.name} (character).
|
1214
|
-
#' The first two correspond to sequence and position of the start point of the
|
1215
|
-
#' bin. The next two correspond to the sequence and position of the end point of
|
1216
|
-
#' the bin. The last one indicates the name of the sequence (if defined).
|
1217
|
-
#'
|
1218
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1219
|
-
#'
|
1220
|
-
#' @export
|
1221
|
-
|
1222
|
-
enve.recplot2.coordinates <- function
|
1223
|
-
(x,
|
1224
|
-
bins
|
1225
|
-
){
|
1226
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1227
|
-
stop("'x' must inherit from class `enve.RecPlot2`")
|
1228
|
-
if(missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
|
1229
|
-
if(!is.vector(bins)) stop("'bins' must be a vector")
|
1230
|
-
if(inherits(bins, "logical")) bins <- which(bins)
|
1231
|
-
|
1232
|
-
y <- data.frame(stringsAsFactors = FALSE, row.names = bins)
|
1233
|
-
|
1234
|
-
for(i in 1:length(bins)){
|
1235
|
-
j <- bins[i]
|
1236
|
-
# Concatenated coordinates
|
1237
|
-
cc <- x$pos.breaks[c(j, j+1)]
|
1238
|
-
# Find the corresponding `seq.breaks`
|
1239
|
-
sb.from <- which(
|
1240
|
-
cc[1] >= x$seq.breaks[-length(x$seq.breaks)] &
|
1241
|
-
cc[1] < x$seq.breaks[-1])
|
1242
|
-
sb.to <- which(
|
1243
|
-
cc[2] > x$seq.breaks[-length(x$seq.breaks)] &
|
1244
|
-
cc[2] <= x$seq.breaks[-1])
|
1245
|
-
# Translate coordinates
|
1246
|
-
if(length(sb.from)==1 & length(sb.to)==1){
|
1247
|
-
y[i, 'name.from'] <- x$seq.names[sb.from]
|
1248
|
-
y[i, 'pos.from'] <- floor(x$seq.breaks[sb.from] + cc[1] - 1)
|
1249
|
-
y[i, 'name.to'] <- x$seq.names[sb.to]
|
1250
|
-
y[i, 'pos.to'] <- ceiling(x$seq.breaks[sb.to] + cc[2] - 1)
|
1251
|
-
y[i, 'seq.name'] <- x$pos.names[i]
|
1252
|
-
}
|
1253
|
-
}
|
1254
|
-
|
1255
|
-
return(y)
|
1256
|
-
}
|
1257
|
-
|
1258
|
-
#' Enveomics: Recruitment Plot (2) Sequencing Depth
|
1259
|
-
#'
|
1260
|
-
#' Calculate the sequencing depth of the given window(s).
|
1261
|
-
#'
|
1262
|
-
#' @param x
|
1263
|
-
#' \code{\link{enve.RecPlot2}} object.
|
1264
|
-
#' @param sel
|
1265
|
-
#' Window(s) for which the sequencing depth is to be calculated. If not
|
1266
|
-
#' passed, it returns the sequencing depth of all windows.
|
1267
|
-
#' @param low.identity
|
1268
|
-
#' A logical indicating if the sequencing depth is to be estimated only
|
1269
|
-
#' with low-identity matches. By default, only high-identity matches are
|
1270
|
-
#' used.
|
1271
|
-
#'
|
1272
|
-
#' @return
|
1273
|
-
#' Returns a numeric vector of sequencing depths (in bp/bp).
|
1274
|
-
#'
|
1275
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1276
|
-
#'
|
1277
|
-
#' @export
|
1278
|
-
|
1279
|
-
enve.recplot2.seqdepth <- function
|
1280
|
-
|
1281
|
-
(x,
|
1282
|
-
sel,
|
1283
|
-
low.identity=FALSE
|
1284
|
-
){
|
1285
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1286
|
-
stop("'x' must inherit from class `enve.RecPlot2`")
|
1287
|
-
if(low.identity){
|
1288
|
-
pos.cnts.in <- x$pos.counts.out
|
1289
|
-
}else{
|
1290
|
-
pos.cnts.in <- x$pos.counts.in
|
1291
|
-
}
|
1292
|
-
pos.breaks <- x$pos.breaks
|
1293
|
-
pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
|
1294
|
-
seqdepth.in <- pos.cnts.in/pos.binsize
|
1295
|
-
if(missing(sel)) return(seqdepth.in)
|
1296
|
-
return(seqdepth.in[sel])
|
1297
|
-
}
|
1298
|
-
|
1299
|
-
#' Enveomics: Recruitment Plot (2) ANI Estimate
|
1300
|
-
#'
|
1301
|
-
#' Estimate the Average Nucleotide Identity from reads (ANIr) from a
|
1302
|
-
#' recruitment plot.
|
1303
|
-
#'
|
1304
|
-
#' @param x
|
1305
|
-
#' \code{\link{enve.RecPlot2}} object.
|
1306
|
-
#' @param range
|
1307
|
-
#' Range of identities to be considered. By default, the full range
|
1308
|
-
#' is used (note that the upper boundary is \code{Inf} and not 100 because
|
1309
|
-
#' recruitment plots can also be built with bit-scores). To use only
|
1310
|
-
#' intra-population matches (with identities), use c(95,100). To use only
|
1311
|
-
#' inter-population values, use c(0,95).
|
1312
|
-
#'
|
1313
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1314
|
-
#'
|
1315
|
-
#' @export
|
1316
|
-
|
1317
|
-
enve.recplot2.ANIr <- function
|
1318
|
-
(x,
|
1319
|
-
range=c(0,Inf)
|
1320
|
-
){
|
1321
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1322
|
-
stop("'x' must inherit from class `enve.RecPlot2`")
|
1323
|
-
id.b <- x$id.breaks
|
1324
|
-
id <- (id.b[-1]+id.b[-length(id.b)])/2
|
1325
|
-
cnt <- x$id.counts
|
1326
|
-
cnt[id < range[1]] <- 0
|
1327
|
-
cnt[id > range[2]] <- 0
|
1328
|
-
return(sum(id*cnt/sum(cnt)))
|
1329
|
-
}
|
1330
|
-
|
1331
|
-
#==============> Define internal functions
|
1332
|
-
|
1333
|
-
#' Enveomics: Recruitment Plot (2) Internal Ancillary Function
|
1334
|
-
#'
|
1335
|
-
#' Internal ancillary function (see \code{\link{enve.recplot2}}).
|
1336
|
-
#'
|
1337
|
-
#' @param x \code{\link{enve.RecPlot2}} object
|
1338
|
-
#' @param pos.breaks Position breaks
|
1339
|
-
#' @param id.breaks Identity breaks
|
1340
|
-
#' @param rec.idcol Identity column to use
|
1341
|
-
#'
|
1342
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1343
|
-
#' @author Kenji Gerhardt [aut]
|
1344
|
-
#'
|
1345
|
-
#' @export
|
1346
|
-
|
1347
|
-
enve.recplot2.__counts <- function
|
1348
|
-
(x, pos.breaks, id.breaks, rec.idcol) {
|
1349
|
-
rec2 <- x$rec
|
1350
|
-
verbose <- x$verbose
|
1351
|
-
|
1352
|
-
# get counts of how many occurrences of each genome pos.bin there are per read
|
1353
|
-
x.bins <- mapply(
|
1354
|
-
function(start, end) {
|
1355
|
-
list(rle(findInterval(start:end, pos.breaks, left.open = T)))
|
1356
|
-
}, rec2[, 1], rec2[, 2])
|
1357
|
-
|
1358
|
-
# find the single y bin for each row, replicates it at the correct places to
|
1359
|
-
# the number of distinct bins found in its row
|
1360
|
-
y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = T),
|
1361
|
-
times = unlist(lapply(x.bins, function(a) length(a$lengths))))
|
1362
|
-
|
1363
|
-
# x.bins_counts is the number of occurrences of each bin a row contains,
|
1364
|
-
# per row, then unlisted
|
1365
|
-
x.bins_counts <- unlist(lapply(x.bins, function(a) a$lengths))
|
1366
|
-
|
1367
|
-
# these are the pos. in. genome bins that each count in x.bins_counts falls into
|
1368
|
-
x.bins <- unlist(lapply(x.bins, function(a) a$values))
|
1369
|
-
|
1370
|
-
# much more efficient counts implementation in R using lists instead of a matrix:
|
1371
|
-
counts <- lapply(
|
1372
|
-
1:(length(pos.breaks) - 1),
|
1373
|
-
function(col_len) rep(0, length(id.breaks) - 1))
|
1374
|
-
|
1375
|
-
# accesses the correct list in counts by x.bin, then
|
1376
|
-
# accesses the position in that row by y.bins and adds the new count
|
1377
|
-
for(i in 1:length(x.bins)){
|
1378
|
-
counts[[x.bins[i]]][y.bins[i]] <- counts[[x.bins[i]]][y.bins[i]] + x.bins_counts[i]
|
1379
|
-
}
|
1380
|
-
|
1381
|
-
counts <- do.call(rbind, counts)
|
1382
|
-
return(counts)
|
1383
|
-
}
|
1384
|
-
|
1385
|
-
#' Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function
|
1386
|
-
#'
|
1387
|
-
#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
|
1388
|
-
#'
|
1389
|
-
#' @param x \code{\link{enve.RecPlot2}} object
|
1390
|
-
#' @param comp Components
|
1391
|
-
#' @param do_crit Function estimating the criterion
|
1392
|
-
#' @param best Best solution thus far
|
1393
|
-
#' @param verbose If verbose
|
1394
|
-
#' @param ... Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}
|
1395
|
-
#'
|
1396
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1397
|
-
#'
|
1398
|
-
#' @export
|
1399
|
-
|
1400
|
-
enve.recplot2.findPeaks.__emauto_one <- function
|
1401
|
-
(x, comp, do_crit, best, verbose, ...){
|
1402
|
-
peaks <- enve.recplot2.findPeaks.em(x=x, components=comp, ...)
|
1403
|
-
if(length(peaks)==0) return(best)
|
1404
|
-
k <- comp*3 - 1 # mean & sd for each component, and n-1 free alpha parameters
|
1405
|
-
crit <- do_crit(peaks[[1]]$err.res, k, peaks[[1]]$n.total)
|
1406
|
-
if(verbose) cat(comp,'\t| LL =', peaks[[1]]$err.res, '\t| Estimate =', crit,
|
1407
|
-
ifelse(crit > best[['crit']], '*', ''), '\n')
|
1408
|
-
if(crit > best[['crit']]){
|
1409
|
-
best[['crit']] <- crit
|
1410
|
-
best[['peaks']] <- peaks
|
1411
|
-
}
|
1412
|
-
best[['pstore']][[comp]] <- peaks
|
1413
|
-
return(best)
|
1414
|
-
}
|
1415
|
-
|
1416
|
-
#' Enveomics: Recruitment Plot (2) EM Peak Finder - Internal Ancillary Function Expectation
|
1417
|
-
#'
|
1418
|
-
#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.em}}).
|
1419
|
-
#'
|
1420
|
-
#' @param x Vector of log-transformed sequencing depths
|
1421
|
-
#' @param theta Parameters list
|
1422
|
-
#'
|
1423
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1424
|
-
#'
|
1425
|
-
#' @export
|
1426
|
-
|
1427
|
-
enve.recplot2.findPeaks.__em_e <- function
|
1428
|
-
(x, theta){
|
1429
|
-
components <- length(theta[['mu']])
|
1430
|
-
product <- do.call(cbind,
|
1431
|
-
lapply(1:components,
|
1432
|
-
function(i) dnorm(x, theta[['mu']][i],
|
1433
|
-
theta[['sd']][i])*theta[['alpha']][i]))
|
1434
|
-
sum.of.components <- rowSums(product)
|
1435
|
-
posterior <- product / sum.of.components
|
1436
|
-
for(i in which(sum.of.components == Inf)) {
|
1437
|
-
cat(i,'/',nrow(product), ':', product[i,], '\n')
|
1438
|
-
}
|
1439
|
-
|
1440
|
-
return(list(ll=sum(log(sum.of.components)), posterior=posterior))
|
1441
|
-
}
|
1442
|
-
|
1443
|
-
#' Enveomics: Recruitment Plot (2) Em Peak Finder - Internal Ancillary Function Maximization
|
1444
|
-
#'
|
1445
|
-
#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.em}}).
|
1446
|
-
#'
|
1447
|
-
#' @param x Vector of log-transformed sequencing depths
|
1448
|
-
#' @param posterior Posterior probability
|
1449
|
-
#'
|
1450
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1451
|
-
#'
|
1452
|
-
#' @export
|
1453
|
-
|
1454
|
-
enve.recplot2.findPeaks.__em_m <- function
|
1455
|
-
(x, posterior){
|
1456
|
-
components <- ncol(posterior)
|
1457
|
-
n <- colSums(posterior)
|
1458
|
-
mu <- colSums(posterior * x) / n
|
1459
|
-
sd <- sqrt( colSums(
|
1460
|
-
posterior * (matrix(rep(x,components), ncol=components) - mu)^2) / n )
|
1461
|
-
alpha <- n/length(x)
|
1462
|
-
return(list(mu=mu, sd=sd, alpha=alpha))
|
1463
|
-
}
|
1464
|
-
|
1465
|
-
#' Enveomics: Recruitment Plot (2) Peak S4 Class - Internal Ancillary Function
|
1466
|
-
#'
|
1467
|
-
#' Internal ancillary function (see \code{\link{enve.RecPlot2.Peak}}).
|
1468
|
-
#'
|
1469
|
-
#' @param x \code{\link{enve.RecPlot2.Peak}} object
|
1470
|
-
#' @param mids Midpoints
|
1471
|
-
#' @param counts Counts
|
1472
|
-
#'
|
1473
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1474
|
-
#'
|
1475
|
-
#' @export
|
1476
|
-
|
1477
|
-
enve.recplot2.__peakHist <- function
|
1478
|
-
(x, mids, counts=TRUE){
|
1479
|
-
d.o <- x$param.hat
|
1480
|
-
if(length(x$log)==0) x$log <- FALSE
|
1481
|
-
if(x$log){
|
1482
|
-
d.o$x <- log(mids)
|
1483
|
-
}else{
|
1484
|
-
d.o$x <- mids
|
1485
|
-
}
|
1486
|
-
prob <- do.call(paste('d', x$dist, sep=''), d.o)
|
1487
|
-
if(!counts) return(prob)
|
1488
|
-
if(length(x$values)>0) return(prob*length(x$values)/sum(prob))
|
1489
|
-
return(prob*x$n.hat/sum(prob))
|
1490
|
-
}
|
1491
|
-
|
1492
|
-
#' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1
|
1493
|
-
#'
|
1494
|
-
#' Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
|
1495
|
-
#'
|
1496
|
-
#' @param lsd1 Vector of log-transformed sequencing depths
|
1497
|
-
#' @param min.points Minimum number of points
|
1498
|
-
#' @param quant.est Quantile estimate
|
1499
|
-
#' @param mlv.opts List of options for \code{mlv}
|
1500
|
-
#' @param fitdist.opts List of options for \code{fitdist}
|
1501
|
-
#' @param with.skewness If skewed-normal should be used
|
1502
|
-
#' @param optim.rounds Maximum number of optimization rounds
|
1503
|
-
#' @param optim.epsilon Minimum difference considered negligible
|
1504
|
-
#' @param n.total Global number of windows
|
1505
|
-
#' @param merge.logdist Attempted \code{merge.logdist} parameter
|
1506
|
-
#' @param verbose If verbose
|
1507
|
-
#' @param log If log-transformed depths
|
1508
|
-
#'
|
1509
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1510
|
-
#'
|
1511
|
-
#' @export
|
1512
|
-
|
1513
|
-
enve.recplot2.findPeaks.__mow_one <- function
|
1514
|
-
(lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
|
1515
|
-
optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
|
1516
|
-
){
|
1517
|
-
dist <- ifelse(with.skewness, 'sn', 'norm');
|
1518
|
-
|
1519
|
-
# Find peak
|
1520
|
-
o <- mlv.opts; o$x = lsd1;
|
1521
|
-
mode1 <- median(lsd1); # mode1 <- do.call(mlv, o)$M;
|
1522
|
-
if(verbose) cat('Anchoring at mode =',mode1,'\n')
|
1523
|
-
param.hat <- fitdist.opts$start; last.hat <- param.hat;
|
1524
|
-
lim <- NA;
|
1525
|
-
if(with.skewness){ param.hat$xi <- mode1 }else{ param.hat$mean <- mode1 }
|
1526
|
-
|
1527
|
-
# Refine peak parameters
|
1528
|
-
for(round in 1:optim.rounds){
|
1529
|
-
param.hat[[ 1 ]] <- param.hat[[ 1 ]]/diff(quant.est)# <- expand dispersion
|
1530
|
-
lim.o <- param.hat
|
1531
|
-
lim.o$p <- quant.est; lim <- do.call(paste('q',dist,sep=''), lim.o)
|
1532
|
-
lsd1.pop <- lsd1[(lsd1>lim[1]) & (lsd1<lim[2])];
|
1533
|
-
if(verbose) cat(' Round', round, 'with n =',length(lsd1.pop),
|
1534
|
-
'and params =',as.numeric(param.hat),' \r')
|
1535
|
-
if(length(lsd1.pop) < min.points) break;
|
1536
|
-
o <- fitdist.opts; o$data = lsd1.pop; o$start = param.hat;
|
1537
|
-
last.last.hat <- last.hat
|
1538
|
-
last.hat <- param.hat
|
1539
|
-
param.hat <- as.list(do.call(fitdist, o)$estimate);
|
1540
|
-
if(any(is.na(param.hat))){
|
1541
|
-
if(round>1) param.hat <- last.hat;
|
1542
|
-
break;
|
1543
|
-
}
|
1544
|
-
if(round > 1){
|
1545
|
-
epsilon1 <- sum((as.numeric(last.hat)-as.numeric(param.hat))^2)
|
1546
|
-
if(epsilon1 < optim.epsilon) break;
|
1547
|
-
if(round > 2){
|
1548
|
-
epsilon2 <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
|
1549
|
-
if(epsilon2 < optim.epsilon) break;
|
1550
|
-
}
|
1551
|
-
}
|
1552
|
-
}
|
1553
|
-
if(verbose) cat('\n')
|
1554
|
-
if(is.na(param.hat[1]) | is.na(lim[1])) return(NULL);
|
1555
|
-
|
1556
|
-
# Mow distribution
|
1557
|
-
lsd2 <- c();
|
1558
|
-
lsd.pop <- c();
|
1559
|
-
n.hat <- length(lsd1.pop)/diff(quant.est)
|
1560
|
-
peak <- new('enve.RecPlot2.Peak', dist=dist, values=as.numeric(), mode=mode1,
|
1561
|
-
param.hat=param.hat, n.hat=n.hat, n.total=n.total,
|
1562
|
-
merge.logdist=merge.logdist, log=log)
|
1563
|
-
peak.breaks <- seq(min(lsd1), max(lsd1), length=20)
|
1564
|
-
peak.cnt <- enve.recplot2.__peakHist(peak,
|
1565
|
-
(peak.breaks[-length(peak.breaks)]+peak.breaks[-1])/2)
|
1566
|
-
for(i in 2:length(peak.breaks)){
|
1567
|
-
values <- lsd1[ (lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i]) ]
|
1568
|
-
n.exp <- peak.cnt[i-1]
|
1569
|
-
if(is.na(n.exp) | n.exp==0) n.exp <- 0.1
|
1570
|
-
if(length(values)==0) next
|
1571
|
-
in.peak <- runif(length(values)) <= n.exp/length(values)
|
1572
|
-
lsd2 <- c(lsd2, values[!in.peak])
|
1573
|
-
lsd.pop <- c(lsd.pop, values[in.peak])
|
1574
|
-
}
|
1575
|
-
if(length(lsd.pop) < min.points) return(NULL)
|
1576
|
-
|
1577
|
-
# Return peak
|
1578
|
-
attr(peak, 'values') <- lsd.pop
|
1579
|
-
attr(peak, 'values.res') <- lsd2
|
1580
|
-
attr(peak, 'err.res') <- 1-(cor(hist(lsd.pop, breaks=peak.breaks,
|
1581
|
-
plot=FALSE)$counts, hist(lsd1, breaks=peak.breaks,
|
1582
|
-
plot=FALSE)$counts)+1)/2
|
1583
|
-
mu <- tail(param.hat, n=1)
|
1584
|
-
attr(peak, 'seq.depth') <- ifelse(log, exp(mu), mu)
|
1585
|
-
if(verbose) cat(' Extracted peak with n =',length(lsd.pop),
|
1586
|
-
'with expected n =',n.hat,'\n')
|
1587
|
-
return(peak)
|
1588
|
-
}
|
1589
|
-
|
1590
|
-
#' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 2
|
1591
|
-
#'
|
1592
|
-
#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
|
1593
|
-
#'
|
1594
|
-
#' @param peaks.opts List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}
|
1595
|
-
#'
|
1596
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1597
|
-
#'
|
1598
|
-
#' @export
|
1599
|
-
|
1600
|
-
enve.recplot2.findPeaks.__mower <- function
|
1601
|
-
(peaks.opts){
|
1602
|
-
peaks <- list()
|
1603
|
-
while(length(peaks.opts$lsd1) > peaks.opts$min.points){
|
1604
|
-
peak <- do.call(enve.recplot2.findPeaks.__mow_one, peaks.opts)
|
1605
|
-
if(is.null(peak)) break
|
1606
|
-
peaks[[ length(peaks)+1 ]] <- peak
|
1607
|
-
peaks.opts$lsd1 <- peak$values.res
|
1608
|
-
}
|
1609
|
-
return(peaks)
|
1610
|
-
}
|
1611
|
-
|
1612
|
-
#' Enveomics: Recruitment Plot (2) Peak Finder - Internal Ancillary Function
|
1613
|
-
#'
|
1614
|
-
#' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks}}).
|
1615
|
-
#'
|
1616
|
-
#' @param peak Query \code{\link{enve.RecPlot2.Peak}} object
|
1617
|
-
#' @param peaks list of \code{\link{enve.RecPlot2.Peak}} objects
|
1618
|
-
#'
|
1619
|
-
#' @author Luis M. Rodriguez-R [aut, cre]
|
1620
|
-
#'
|
1621
|
-
#' @export
|
1622
|
-
|
1623
|
-
enve.recplot2.__whichClosestPeak <- function
|
1624
|
-
(peak, peaks){
|
1625
|
-
dist <- as.numeric(lapply(peaks,
|
1626
|
-
function(x)
|
1627
|
-
abs(log(x$param.hat[[ length(x$param.hat) ]] /
|
1628
|
-
peak$param.hat[[ length(peak$param.hat) ]] ))))
|
1629
|
-
dist[ dist==0 ] <- Inf
|
1630
|
-
return(which.min(dist))
|
1631
|
-
}
|