miga-base 1.2.15.2 → 1.2.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/daemon.rb +4 -1
- data/lib/miga/lair.rb +6 -4
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +25 -7
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
@@ -1,221 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @author Luis M. Rodriguez-R
|
4
|
-
# @license artistic license 2.0
|
5
|
-
|
6
|
-
$VERSION = 1.0
|
7
|
-
$:.push File.expand_path('../lib', __FILE__)
|
8
|
-
require 'enveomics_rb/enveomics'
|
9
|
-
|
10
|
-
o = {
|
11
|
-
q: false, missing: '-', model: 'AUTO', removeinvar: false, undefined: '-.Xx?'
|
12
|
-
}
|
13
|
-
|
14
|
-
OptionParser.new do |opt|
|
15
|
-
cmd = File.basename($0)
|
16
|
-
opt.banner = <<~BANNER
|
17
|
-
|
18
|
-
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
19
|
-
|
20
|
-
Concatenates several multiple alignments in FastA format into a single
|
21
|
-
multiple alignment. The IDs of the sequences (or the ID prefixes, if using
|
22
|
-
--ignore-after) must coincide across files.
|
23
|
-
|
24
|
-
Usage: #{cmd} [options] aln1.fa aln2.fa ... > aln.fa
|
25
|
-
|
26
|
-
BANNER
|
27
|
-
opt.on(
|
28
|
-
'-c', '--coords FILE',
|
29
|
-
'Output file of coordinates in RAxML-compliant format'
|
30
|
-
) { |v| o[:coords] = v }
|
31
|
-
opt.on(
|
32
|
-
'-i', '--ignore-after STRING',
|
33
|
-
'Remove everything in the IDs after the specified string'
|
34
|
-
) { |v| o[:ignoreafter] = v }
|
35
|
-
opt.on(
|
36
|
-
'-I', '--remove-invariable', 'Remove invariable sites',
|
37
|
-
'Note: Invariable sites are defined as columns with only one state and',
|
38
|
-
'undefined characters. Additional ambiguous characters may exist and',
|
39
|
-
'should be declared using --undefined'
|
40
|
-
) { |v| o[:removeinvar] = v }
|
41
|
-
opt.on(
|
42
|
-
'-u', '--missing-char CHAR',
|
43
|
-
"Character denoting missing data. By default: '#{o[:missing]}'"
|
44
|
-
) do |v|
|
45
|
-
if v.length != 1
|
46
|
-
abort "-missing-char can only be denoted by single characters: #{v}"
|
47
|
-
end
|
48
|
-
o[:missing] = v
|
49
|
-
end
|
50
|
-
opt.on(
|
51
|
-
'-m', '--model STRING',
|
52
|
-
'Name of the model to use if --coords is used. See RAxML docs;',
|
53
|
-
'supported values in v8+ include:',
|
54
|
-
'~ For DNA alignments:',
|
55
|
-
' "DNA[F|X]", or "DNA[F|X]/3" (to estimate rates per codon position,',
|
56
|
-
' particular notation for this script)',
|
57
|
-
'~ General protein alignments:',
|
58
|
-
' "AUTO" (default in this script), "DAYHOFF" (1978), "DCMUT" (MBE 2005;',
|
59
|
-
' 22(2):193-199), "JTT" (Nat 1992;358:86-89), "VT" (JCompBiol 2000;',
|
60
|
-
' 7(6):761-776), "BLOSUM62" (PNAS 1992;89:10915), and "LG" (MBE 2008;',
|
61
|
-
' 25(7):1307-1320)',
|
62
|
-
'~ Specialized protein alignments:',
|
63
|
-
' "MTREV" (mitochondrial, JME 1996;42(4):459-468), "WAG" (globular, MBE',
|
64
|
-
' 2001;18(5):691-699), "RTREV" (retrovirus, JME 2002;55(1):65-73),',
|
65
|
-
' "CPREV" (chloroplast, JME 2000;50(4):348-358), and "MTMAM" (nuclear',
|
66
|
-
' mammal proteins, JME 1998;46(4):409-418)'
|
67
|
-
) { |v| o[:model] = v }
|
68
|
-
opt.on(
|
69
|
-
'--undefined STRING',
|
70
|
-
'All characters to be regarded as "undefined". It should include all',
|
71
|
-
'ambiguous and missing data chars. Ignored unless --remove-invariable',
|
72
|
-
"By default: '#{o[:undefined]}'"
|
73
|
-
) { |v| o[:undefined] = v }
|
74
|
-
opt.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
75
|
-
opt.on('-V', '--version', 'Returns version') { puts $VERSION ; exit }
|
76
|
-
opt.on('-h', '--help', 'Display this screen') { puts opt ; exit }
|
77
|
-
opt.separator ''
|
78
|
-
end.parse!
|
79
|
-
files = ARGV
|
80
|
-
abort 'Alignment files are mandatory' if files.nil? || files.empty?
|
81
|
-
$QUIET = o[:q]
|
82
|
-
|
83
|
-
# Read individual gene alignments and return them as a single hash with genome
|
84
|
-
# IDs as keys and arrays of single-line strings as values
|
85
|
-
#
|
86
|
-
# IDs are trimmed after the first occurrence of +ignoreafter+, if defined
|
87
|
-
def read_alignments(files, ignoreafter = nil)
|
88
|
-
aln = {}
|
89
|
-
files.each_with_index do |file, i|
|
90
|
-
key = nil
|
91
|
-
File.open(file, 'r').each do |ln|
|
92
|
-
ln.chomp!
|
93
|
-
if ln =~ /^>(\S+)/
|
94
|
-
key = $1
|
95
|
-
key.sub!(/#{ignoreafter}.*/, '') if ignoreafter
|
96
|
-
aln[key] ||= []
|
97
|
-
aln[key][i] = ''
|
98
|
-
else
|
99
|
-
if key.nil?
|
100
|
-
abort "Invalid FastA file: #{file}: Leading line not a def-line"
|
101
|
-
end
|
102
|
-
ln.gsub!(/\s/, '')
|
103
|
-
aln[key][i] += ln
|
104
|
-
end
|
105
|
-
end
|
106
|
-
abort "Empty alignment file: #{file}" if key.nil?
|
107
|
-
end
|
108
|
-
aln
|
109
|
-
end
|
110
|
-
|
111
|
-
# Remove invariable sites from the alignment hash +aln+, using +undefined+ as
|
112
|
-
# a string including all characters representing undefined positions (e.g., X)
|
113
|
-
#
|
114
|
-
# Returns number of columns removed
|
115
|
-
def remove_invariable(aln, undefined)
|
116
|
-
invs = 0
|
117
|
-
lengths = aln.values.first.map(&:length)
|
118
|
-
undef_chars = undefined.chars
|
119
|
-
|
120
|
-
lengths.each_with_index do |len, i|
|
121
|
-
(0 .. len - 1).each do |pos|
|
122
|
-
chr = nil
|
123
|
-
inv = true
|
124
|
-
aln.each_key do |key|
|
125
|
-
next if aln[key][i].nil?
|
126
|
-
chr = aln[key][i][pos] if chr.nil? || undefined.chars.include?(chr)
|
127
|
-
if chr != aln[key][i][pos] && !undef_chars.include?(aln[key][i][pos])
|
128
|
-
inv = false
|
129
|
-
break
|
130
|
-
end
|
131
|
-
end
|
132
|
-
if inv
|
133
|
-
aln.each_key { |key| aln[key][i][pos] = '!' unless aln[key][i].nil? }
|
134
|
-
lengths[i] -= 1
|
135
|
-
invs += 1
|
136
|
-
end
|
137
|
-
end
|
138
|
-
aln.each_key { |key| aln[key][i].gsub!('!', '') unless aln[key][i].nil? }
|
139
|
-
end
|
140
|
-
invs
|
141
|
-
end
|
142
|
-
|
143
|
-
# Concatenate the alignments hash +aln+ using the character +missing+ to
|
144
|
-
# indicate missing alignments, and send each entry in the concatenated alignment
|
145
|
-
# to +blk+ as two variables: key (name) and value (alignment string)
|
146
|
-
#
|
147
|
-
# Returns an array with the lengths of each individual alignment
|
148
|
-
def concatenate(aln, missing, &blk)
|
149
|
-
say 'Concatenating'
|
150
|
-
lengths = aln.values.first.map(&:length)
|
151
|
-
aln.each_key do |key|
|
152
|
-
# Pad missing entries
|
153
|
-
lengths.each_with_index { |len, i| aln[key][i] ||= missing * len }
|
154
|
-
|
155
|
-
# Check length
|
156
|
-
obs_len = aln[key].map(&:length)
|
157
|
-
unless lengths == obs_len
|
158
|
-
abort "Inconsistent lengths in '#{key}'\nexp: #{lengths}\nobs: #{obs_len}"
|
159
|
-
end
|
160
|
-
|
161
|
-
# Pass entry to the block and remove from alignment hash
|
162
|
-
blk[key, aln[key].join('')]
|
163
|
-
aln.delete(key)
|
164
|
-
end
|
165
|
-
lengths
|
166
|
-
end
|
167
|
-
|
168
|
-
# Save the coordinates in +file+ based on +files+ paths (for the names), and
|
169
|
-
# using +lengths+ individual alignment lengths
|
170
|
-
#
|
171
|
-
# The saved format is RAxML coords, including the +model+ for each alignment
|
172
|
-
def save_coords(file, names, lengths, model)
|
173
|
-
File.open(file, 'w') do |fh|
|
174
|
-
s = 0
|
175
|
-
names.each_with_index do |name, i|
|
176
|
-
l = lengths[i]
|
177
|
-
next unless l > 0
|
178
|
-
name += "_#{i}" while names.count(name) > 1
|
179
|
-
if model =~ /(DNA.?)\/3/
|
180
|
-
fh.puts "#{$1}, #{name}codon1 = #{s + 1}-#{s + l}\\3"
|
181
|
-
fh.puts "#{$1}, #{name}codon2 = #{s + 2}-#{s + l}\\3"
|
182
|
-
fh.puts "#{$1}, #{name}codon3 = #{s + 3}-#{s + l}\\3"
|
183
|
-
else
|
184
|
-
fh.puts "#{model}, #{name} = #{s + 1}-#{s + l}"
|
185
|
-
end
|
186
|
-
s += l
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
# ------ MAIN ------
|
192
|
-
begin
|
193
|
-
say 'Reading'
|
194
|
-
alignments = read_alignments(files, o[:ignoreafter])
|
195
|
-
|
196
|
-
if o[:removeinvar]
|
197
|
-
say 'Removing invariable sites'
|
198
|
-
inv = remove_invariable(alignments, o[:undefined])
|
199
|
-
say " Removed #{inv} sites"
|
200
|
-
end
|
201
|
-
|
202
|
-
lengths = concatenate(alignments, o[:missing]) do |name, seq|
|
203
|
-
puts ">#{name}", seq.gsub(/(.{1,60})/, "\\1\n")
|
204
|
-
end
|
205
|
-
say " #{lengths.inject(:+)} columns"
|
206
|
-
|
207
|
-
unless o[:coords].nil?
|
208
|
-
say 'Generating coordinates'
|
209
|
-
names = files.map do |i|
|
210
|
-
File.basename(i).gsub(/\..*/, '').gsub(/[^A-Za-z0-9_]/, '_')
|
211
|
-
end
|
212
|
-
save_coords(o[:coords], names, lengths, o[:model])
|
213
|
-
end
|
214
|
-
|
215
|
-
$stderr.puts 'Done' unless o[:q]
|
216
|
-
rescue => err
|
217
|
-
$stderr.puts "Exception: #{err}\n\n"
|
218
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
219
|
-
err
|
220
|
-
end
|
221
|
-
|
@@ -1,35 +0,0 @@
|
|
1
|
-
#!/usr/bin/env perl
|
2
|
-
|
3
|
-
#
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @update: Mar-23-2015
|
6
|
-
# @license: artistic license 2.0
|
7
|
-
#
|
8
|
-
|
9
|
-
use Bio::AlignIO;
|
10
|
-
|
11
|
-
my($iformat,$oformat) = @ARGV;
|
12
|
-
($iformat and $oformat) or die "
|
13
|
-
Usage:
|
14
|
-
$0 in-format out-format < in_file > output_file
|
15
|
-
|
16
|
-
in-format Input file's format.
|
17
|
-
out-format Output file's format.
|
18
|
-
in_file Input file.
|
19
|
-
out_file Output file.
|
20
|
-
|
21
|
-
Example:
|
22
|
-
# Re-format example.fa into Stockholm
|
23
|
-
$0 fasta stockholm < example.fa > example.stk
|
24
|
-
|
25
|
-
Supported formats are:
|
26
|
-
bl2seq, clustalw, emboss, fasta, maf, mase, mega,
|
27
|
-
meme, metafasta, msf, nexus, pfam, phylip, po,
|
28
|
-
prodom, psi, selex, stockholm, XMFA, arp
|
29
|
-
|
30
|
-
";
|
31
|
-
|
32
|
-
$in = Bio::AlignIO->new(-fh => \*STDIN, -format => $iformat);
|
33
|
-
$out = Bio::AlignIO->new(-fh => \*STDOUT, -format => $oformat);
|
34
|
-
while ( my $aln = $in->next_aln ) { $out->write_aln($aln) }
|
35
|
-
|
@@ -1,152 +0,0 @@
|
|
1
|
-
#!/usr/bin/env perl
|
2
|
-
|
3
|
-
#
|
4
|
-
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
-
# @update: Nov-29-2015
|
6
|
-
# @license: artistic license 2.0
|
7
|
-
#
|
8
|
-
|
9
|
-
use warnings;
|
10
|
-
use strict;
|
11
|
-
use Getopt::Std;
|
12
|
-
use List::Util qw/min max sum/;
|
13
|
-
|
14
|
-
sub VERSION_MESSAGE(){print "Alpha-diversity indices (enveomics)\n"}
|
15
|
-
sub HELP_MESSAGE(){die "
|
16
|
-
Description:
|
17
|
-
Takes a table of OTU abundance in one or more samples and calculates the Rao
|
18
|
-
(Q_alpha), Rao-Jost (Q_alpha_eqv), Shannon (Hprime), and inverse Simpson
|
19
|
-
(1_lambda) indices of alpha diversity for each sample.
|
20
|
-
|
21
|
-
To use it with Qiime OTU Tables, run it as:
|
22
|
-
$0 -i OTU_Table.txt -h
|
23
|
-
|
24
|
-
Usage:
|
25
|
-
$0 [opts]
|
26
|
-
|
27
|
-
-i <str> * Input table (columns:samples, rows:OTUs, first column:OTU
|
28
|
-
names).
|
29
|
-
-r <int> Number of rows to ignore. By default: 0.
|
30
|
-
-c <int> Number of columns to ignore after the first column (i.e.,
|
31
|
-
between the first column, containing the name of the categories,
|
32
|
-
and the first column of abundance values). By default: 0.
|
33
|
-
-C <int> Number of columns to ignore at the end of each line.
|
34
|
-
By default: 0.
|
35
|
-
-d <str> Delimiter. Supported escaped characters are: \"\\t\"
|
36
|
-
(tabulation), and \"\\0\" (null bit). By default: \"\\t\".
|
37
|
-
-h If set, the first row is assumed to have the names of the
|
38
|
-
samples.
|
39
|
-
-D <str> Distances file. A squared matrix (or bottom-left half matrix)
|
40
|
-
with the distances between categories (OTUs or functions). The
|
41
|
-
first column must contain the names of the categories, and it
|
42
|
-
shouldn't have headers. If not set, all distances are assumed
|
43
|
-
to be one. Only used for Rao.
|
44
|
-
-R Do not calculate Rao indices. This significantly decreases the
|
45
|
-
total running time. Note that Rao indices are highly susceptible
|
46
|
-
to precision errors, and shouldn't be trusted for very big
|
47
|
-
numbers.
|
48
|
-
-q <int> Estimate the qD index (true diversity order q). By default: 0.
|
49
|
-
--help This help message.
|
50
|
-
|
51
|
-
* Mandatory.
|
52
|
-
|
53
|
-
"}
|
54
|
-
|
55
|
-
# Input arguments
|
56
|
-
my %o;
|
57
|
-
getopts('i:c:C:d:r:hD:Rq:', \%o);
|
58
|
-
|
59
|
-
#$o{B} and (eval("use bignum; 1") or die "Cannot use bignum.\n");
|
60
|
-
&HELP_MESSAGE() unless $o{i};
|
61
|
-
$o{c} ||= 0;
|
62
|
-
$o{C} ||= 0;
|
63
|
-
$o{r} ||= 0;
|
64
|
-
$o{d} ||= "\\t";
|
65
|
-
$o{q} ||= 0;
|
66
|
-
|
67
|
-
$o{d}="\t" if $o{d} eq "\\t";
|
68
|
-
$o{d}="\0" if $o{d} eq "\\0";
|
69
|
-
|
70
|
-
# Distance matrix
|
71
|
-
my $D = {};
|
72
|
-
if($o{D} and not $o{R}){
|
73
|
-
my @Didx = ();
|
74
|
-
open DIST, "<", $o{D} or die "Cannot read file: $o{D}: $!\n";
|
75
|
-
while(<DIST>){
|
76
|
-
chomp;
|
77
|
-
my @d = split /\t/;
|
78
|
-
my $idx = shift @d;
|
79
|
-
push @Didx, $idx;
|
80
|
-
$D->{ $idx } ||= {};
|
81
|
-
$D->{ $idx }->{ $Didx[$_] } = $d[$_] for(0 .. $#d);
|
82
|
-
}
|
83
|
-
close DIST;
|
84
|
-
undef @Didx;
|
85
|
-
}
|
86
|
-
|
87
|
-
# Abundance matrix
|
88
|
-
my @names = ();
|
89
|
-
my @cats = ();
|
90
|
-
my @values = ();
|
91
|
-
open TABLE, "<", $o{i} or die "Cannot open file: ".$o{i}.": $!\n";
|
92
|
-
<TABLE> for (1 .. $o{r});
|
93
|
-
if($o{h}){
|
94
|
-
my $h = <TABLE>;
|
95
|
-
$h or die "Empty table!\n";
|
96
|
-
chomp $h;
|
97
|
-
@names = split $o{d}, $h;
|
98
|
-
shift @names for (0 .. $o{c});
|
99
|
-
}
|
100
|
-
|
101
|
-
while(<TABLE>){
|
102
|
-
chomp;
|
103
|
-
my @ln = split $o{d};
|
104
|
-
push @cats, shift(@ln);
|
105
|
-
shift @ln for (1 .. $o{c});
|
106
|
-
pop @ln for (1 .. $o{C});
|
107
|
-
push @{$values[$_] ||= []}, $ln[$_] for (0 .. $#ln);
|
108
|
-
push @{$values[$#ln+1]}, sum(@ln);
|
109
|
-
}
|
110
|
-
close TABLE;
|
111
|
-
$names[$#values] = "gamma";
|
112
|
-
|
113
|
-
if($o{R}){
|
114
|
-
print "".join($o{d}, qw/Sample Hprime 1_lambda qD/)."\n";
|
115
|
-
}else{
|
116
|
-
print "".join($o{d}, qw/Sample Q_alpha Q_alpha_eqv Hprime 1_lambda qD/)."\n";
|
117
|
-
}
|
118
|
-
for my $i (0 .. $#values){
|
119
|
-
print "".(exists $names[$i] ? $names[$i] : $i).$o{d};
|
120
|
-
my $N = sum @{$values[$i]};
|
121
|
-
my $Q = 0;
|
122
|
-
my $H = 0;
|
123
|
-
my $l = 0;
|
124
|
-
my $qD = 0 unless $o{q}==1;
|
125
|
-
for my $ik (0 .. $#{$values[$i]}){
|
126
|
-
unless($o{R}){
|
127
|
-
my $Qi = 0;
|
128
|
-
for my $jk (0 .. $#{$values[$i]}){
|
129
|
-
my $dij = (!$o{D}) ? 1 :
|
130
|
-
exists $D->{ $cats[$ik] }->{ $cats[$jk] } ?
|
131
|
-
$D->{ $cats[$ik] }->{ $cats[$jk] } :
|
132
|
-
exists $D->{ $cats[$jk] }->{ $cats[$ik] } ?
|
133
|
-
$D->{ $cats[$jk] }->{ $cats[$ik] } :
|
134
|
-
die "Cannot find distance between ".$cats[$ik].
|
135
|
-
" and ".$cats[$jk].".\n";
|
136
|
-
$Qi += $dij * ($values[$i]->[$ik]/$N) * ($values[$i]->[$jk]/$N);
|
137
|
-
}
|
138
|
-
$Q += $Qi;
|
139
|
-
}
|
140
|
-
my $pi = $N ? $values[$i]->[$ik]/$N : 0;
|
141
|
-
$H -= $pi * log($pi) if $pi;
|
142
|
-
$l += $pi**2;
|
143
|
-
$qD += $pi * ($pi**($o{q}-1)) unless $o{q}==1 or not $pi;
|
144
|
-
}
|
145
|
-
$qD = $o{q}==1 ? exp($H) : 1/($qD**(1/($o{q}-1)));
|
146
|
-
if($o{R}){
|
147
|
-
print "".join($o{d}, $H, $l ? 1/$l : "Inf", $qD)."\n";
|
148
|
-
}else{
|
149
|
-
print "".join($o{d}, $Q, ($Q==1 ? "NA" : 1/(1-$Q)), $H, 1/$l, $qD)."\n";
|
150
|
-
}
|
151
|
-
}
|
152
|
-
|
@@ -1,93 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
|
5
|
-
o = {range: 0.5, perseq: false, length: false}
|
6
|
-
ARGV << '-h' if ARGV.empty?
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt.banner = "
|
9
|
-
Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
|
10
|
-
|
11
|
-
IMPORTANT: This script doesn't consider zero-coverage positions if missing
|
12
|
-
from the file. If you produce your BedGraph file with bedtools genomecov and
|
13
|
-
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
14
|
-
|
15
|
-
Usage: #{$0} [options]"
|
16
|
-
opt.separator ''
|
17
|
-
opt.on('-i', '--input PATH',
|
18
|
-
'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
|
19
|
-
opt.on('-r', '--range FLOAT',
|
20
|
-
'Central range to consider, between 0 and 1.',
|
21
|
-
"By default: #{o[:range]} (inter-quartile range)."
|
22
|
-
){ |v| o[:range]=v.to_f }
|
23
|
-
opt.on('-s', '--per-seq',
|
24
|
-
'Calculate averages per reference sequence, not total.',
|
25
|
-
'Assumes a sorted BedGraph file.'
|
26
|
-
){ |v| o[:perseq] = v }
|
27
|
-
opt.on('-l', '--length',
|
28
|
-
'Add sequence length to the output.'){ |v| o[:length] = v }
|
29
|
-
opt.on('-h', '--help', 'Display this screen.') do
|
30
|
-
puts opt
|
31
|
-
exit
|
32
|
-
end
|
33
|
-
opt.separator ''
|
34
|
-
end.parse!
|
35
|
-
abort '-i is mandatory.' if o[:i].nil?
|
36
|
-
|
37
|
-
def pad(d, idx, r)
|
38
|
-
idx.each do |i|
|
39
|
-
next if d[i].nil?
|
40
|
-
d[i] -= r
|
41
|
-
break unless d[i] < 0
|
42
|
-
r = -d[i]
|
43
|
-
d[i] = nil
|
44
|
-
end
|
45
|
-
d
|
46
|
-
end
|
47
|
-
|
48
|
-
def report(sq, d, ln, o)
|
49
|
-
# Estimate padding ranges
|
50
|
-
pad = (1.0-o[:range])/2.0
|
51
|
-
r = (pad*ln).round
|
52
|
-
|
53
|
-
# Pad
|
54
|
-
d = pad(d, d.each_index.to_a, r+0)
|
55
|
-
d = pad(d, d.each_index.to_a.reverse, r+0)
|
56
|
-
|
57
|
-
# Average
|
58
|
-
y = [0.0]
|
59
|
-
unless d.compact.empty?
|
60
|
-
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
61
|
-
y[0] = s.to_f/d.compact.inject(:+)
|
62
|
-
end
|
63
|
-
|
64
|
-
# Report
|
65
|
-
y.unshift(sq) if o[:perseq]
|
66
|
-
y << ln if o[:length]
|
67
|
-
puts y.join("\t")
|
68
|
-
end
|
69
|
-
|
70
|
-
# Read BedGraph
|
71
|
-
d = []
|
72
|
-
ln = 0
|
73
|
-
pre_sq = nil
|
74
|
-
File.open(o[:i], "r") do |ifh|
|
75
|
-
ifh.each_line do |i|
|
76
|
-
next if i =~ /^#/
|
77
|
-
r = i.chomp.split("\t")
|
78
|
-
sq = r.shift
|
79
|
-
if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
|
80
|
-
report(pre_sq, d, ln, o)
|
81
|
-
d = []
|
82
|
-
ln = 0
|
83
|
-
end
|
84
|
-
r.map! { |j| j.to_i }
|
85
|
-
l = r[1]-r[0]
|
86
|
-
d[ r[2] ] ||= 0
|
87
|
-
d[ r[2] ] += l
|
88
|
-
ln += l
|
89
|
-
pre_sq = sq
|
90
|
-
end
|
91
|
-
end
|
92
|
-
report(pre_sq, d, ln, o)
|
93
|
-
|
@@ -1,71 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
|
5
|
-
o = {win: 1000}
|
6
|
-
ARGV << '-h' if ARGV.empty?
|
7
|
-
OptionParser.new do |opt|
|
8
|
-
opt.banner = "
|
9
|
-
Estimates the sequencing depth per windows from a BedGraph file.
|
10
|
-
|
11
|
-
IMPORTANT: This script doesn't consider zero-coverage positions if missing
|
12
|
-
from the file. If you produce your BedGraph file with bedtools genomecov and
|
13
|
-
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
14
|
-
|
15
|
-
Usage: #{$0} [options]"
|
16
|
-
opt.separator ''
|
17
|
-
opt.on('-i', '--input PATH',
|
18
|
-
'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
|
19
|
-
opt.on('-w', '--win INT',
|
20
|
-
'Window size, in base pairs.', "By default: #{o[:win]}."
|
21
|
-
){ |v| o[:win]=v.to_i }
|
22
|
-
opt.on('-h', '--help', 'Display this screen.') do
|
23
|
-
puts opt
|
24
|
-
exit
|
25
|
-
end
|
26
|
-
opt.separator ''
|
27
|
-
end.parse!
|
28
|
-
abort '-i is mandatory.' if o[:i].nil?
|
29
|
-
|
30
|
-
def report(d, a, b, seqs)
|
31
|
-
# Average
|
32
|
-
y = 0.0
|
33
|
-
unless d.compact.empty?
|
34
|
-
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
35
|
-
y = s.to_f/d.compact.inject(:+)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Report
|
39
|
-
puts [a, b, y, seqs.keys.join(",")].join("\t")
|
40
|
-
end
|
41
|
-
|
42
|
-
# Read BedGraph
|
43
|
-
d = []
|
44
|
-
ln = 0
|
45
|
-
a = 1
|
46
|
-
seqs = {}
|
47
|
-
b = o[:win]
|
48
|
-
File.open(o[:i], "r") do |ifh|
|
49
|
-
ifh.each_line do |i|
|
50
|
-
next if i =~ /^#/
|
51
|
-
r = i.chomp.split("\t")
|
52
|
-
sq = r.shift
|
53
|
-
seqs[sq] = 1
|
54
|
-
r.map!{ |j| j.to_i }
|
55
|
-
l = r[1]-r[0]
|
56
|
-
d[ r[2] ] ||= 0
|
57
|
-
d[ r[2] ] += l
|
58
|
-
ln += l
|
59
|
-
while ln >= b
|
60
|
-
d[ r[2] ] -= (ln-b)
|
61
|
-
report(d, a, b, seqs)
|
62
|
-
seqs = {}
|
63
|
-
seqs[ sq ] = 1 if ln > b
|
64
|
-
d = []
|
65
|
-
d[ r[2] ] = (ln-b)
|
66
|
-
a = b + 1
|
67
|
-
b = a + o[:win] - 1
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
@@ -1,102 +0,0 @@
|
|
1
|
-
#!/usr/bin/env perl
|
2
|
-
#
|
3
|
-
# @authors Konstantinos Konstantinidis (initial version)
|
4
|
-
# modified to work with the BLASTp 2.2.25+ m0 output by
|
5
|
-
# Despina Tsementzi & Luis M. Rodriguez-R
|
6
|
-
# @updated Dec-21-2015
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
$/ = "Lambda ";
|
11
|
-
use strict;
|
12
|
-
my %hash_depth;
|
13
|
-
|
14
|
-
my @query;
|
15
|
-
my @subject;
|
16
|
-
my @similarity;
|
17
|
-
my $length = "0";
|
18
|
-
|
19
|
-
my($cigar_chr, $blast) = @ARGV;
|
20
|
-
|
21
|
-
($cigar_chr and $blast) or die "
|
22
|
-
.Description:
|
23
|
-
Counts the different AA substitutions in the best hit blast alignments, from
|
24
|
-
a BLASTP pairwise format output (-outfmt 0 in BLAST+, -m 0 in legacy BLAST).
|
25
|
-
|
26
|
-
.Usage: $0 cigar_char blast.m0.txt > aa-subs.list
|
27
|
-
|
28
|
-
cigar_char Use '+' for similar substitutions, use '_' for non similar
|
29
|
-
substitutions
|
30
|
-
blast.m0.txt Blast in 'text' format (-outfmt/-m 0).
|
31
|
-
aa-subs.list A tab-delimited raw file with one substitution per row and
|
32
|
-
columns:
|
33
|
-
(1) Name-of-query_Name-of-subject
|
34
|
-
(2) AA-in-subject
|
35
|
-
(3) AA-in-query
|
36
|
-
(4) Total-Align-Length
|
37
|
-
|
38
|
-
";
|
39
|
-
|
40
|
-
# For each blast result (i.e., for each query)
|
41
|
-
open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
|
42
|
-
while(my $data=<BLAST>) {
|
43
|
-
$data =~ s/\r//g;
|
44
|
-
my ($data_q, @array_matches) = split(/>/,$data);
|
45
|
-
my ($name_query) = ($data_q =~ /Query\= (\S+?)(?:_GENE|\s)/);
|
46
|
-
my ($length_query) = ($data_q =~ /\(([\d,]+) letters/ );
|
47
|
-
($length_query) = ($data_q =~ /Length=([\d,]+)/) unless $length_query;
|
48
|
-
$length_query =~ tr/,//d;
|
49
|
-
|
50
|
-
# For each alignment (i.e., for each HSP),
|
51
|
-
# note the "last" at the end of the block,
|
52
|
-
# so only the best match is considered
|
53
|
-
foreach my $data_f (@array_matches) {
|
54
|
-
# Capture statistics
|
55
|
-
my ($length_match) = ($data_f =~ /Identities = \d+\/(\d+)/);
|
56
|
-
my ($identity_match) = ($data_f =~ /Identities = \d+\/\d+ \((\d+)%/);
|
57
|
-
my ($target_name) = ($data_f =~ /^\s?(\S+)/);
|
58
|
-
|
59
|
-
# If the alignment meets minimum requirements
|
60
|
-
if ($length_query >30 && ($length_match/$length_query > 0.7) && $identity_match > 60) {
|
61
|
-
$data_f =~ tr/ /_/;
|
62
|
-
my @array = split ("\n", $data_f);
|
63
|
-
my $blanks = 0;
|
64
|
-
my $prefix_size = 0;
|
65
|
-
|
66
|
-
# For each line in the alignment
|
67
|
-
for my $data_fff (@array) {
|
68
|
-
if ($data_fff =~ /(Query[:_]_+\d+_+)([^_]+)/){
|
69
|
-
# Query lines
|
70
|
-
$prefix_size = length($1);
|
71
|
-
$length = length($2);
|
72
|
-
@query = split (//, $2);
|
73
|
-
}elsif ($data_fff =~ /^_{11}/){
|
74
|
-
# Cigar lines
|
75
|
-
@similarity = split(//, substr($data_fff, $prefix_size, $length));
|
76
|
-
}elsif ($data_fff =~ /Sbjct[:_]_+\d+_+([^_]+)/){
|
77
|
-
# Subject lines
|
78
|
-
@subject = split(//, $1);
|
79
|
-
# For each alignment column
|
80
|
-
for(my $i=0; $i <= $length; $i++){
|
81
|
-
if ($similarity[$i] eq $cigar_chr) {
|
82
|
-
print "$name_query\_$target_name\t$subject[$i]\t$query[$i]\t$length_match\n";
|
83
|
-
}
|
84
|
-
}
|
85
|
-
undef @query;
|
86
|
-
undef @similarity;
|
87
|
-
undef @subject;
|
88
|
-
}
|
89
|
-
|
90
|
-
# Remove secondary alignments
|
91
|
-
if ($data_fff =~ /^$/){
|
92
|
-
$blanks++;
|
93
|
-
last if $blanks >= 3;
|
94
|
-
}else{
|
95
|
-
$blanks=0;
|
96
|
-
}
|
97
|
-
} # for my $data_fff (@array)
|
98
|
-
} # if ($length_query >30 ...
|
99
|
-
last; # <---- So it takes only the best match!
|
100
|
-
} # foreach my $data_f (@array_matches)
|
101
|
-
} # while(my $data=<>)
|
102
|
-
|