miga-base 0.7.26.0 → 0.7.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +829 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +156 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +57 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +418 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm.rb +146 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +50 -0
- data/utils/enveomics/enveomics.R/README.md +80 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- metadata +277 -4
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
|
9
|
+
require "enveomics_rb/remote_data"
|
10
|
+
use "nokogiri"
|
11
|
+
|
12
|
+
#================================[ Options parsing ]
|
13
|
+
o = {
|
14
|
+
:q=>false, :gis=>[], :dbfrom=>"nuccore", :header=>true,
|
15
|
+
:exact_gi=>false, :no_nil=>false, :ret=>"ScientificName",
|
16
|
+
:ranks=>%w(superkingdom phylum class order family genus species)}
|
17
|
+
OptionParser.new do |opt|
|
18
|
+
opt.banner = "
|
19
|
+
Maps a list of NCBI GIs to their corresponding taxonomy using the NCBI
|
20
|
+
EUtilities. Avoid using this script on millions of entries at a time, since
|
21
|
+
each entry elicits two requests to NCBI's servers.
|
22
|
+
|
23
|
+
*IMPORTANT NOTE*: NCBI is phasing out support for GIs. Please use acc.ver
|
24
|
+
instead with NCBIacc2tax.rb.
|
25
|
+
|
26
|
+
Usage: #{$0} [options]".gsub(/^ +/,"")
|
27
|
+
opt.separator ""
|
28
|
+
opt.on("-g", "--gis GI1,GI2,...", Array,
|
29
|
+
"Comma-separated list of GIs. Required unless -i is passed."
|
30
|
+
){ |v| o[:gis]=v }
|
31
|
+
opt.on("-i", "--infile FILE",
|
32
|
+
"Raw text file containing the list of GIs, one per line.",
|
33
|
+
"Required unless -g is passed."){ |v| o[:infile]=v }
|
34
|
+
opt.on("-p", "--protein",
|
35
|
+
"Use if the GIs are proteins. Otherwise, GIs are assumed to be from " +
|
36
|
+
"the Nuccore Database."){ o[:dbfrom]="protein" }
|
37
|
+
opt.on("-r", "--ranks RANK1,RANK2,...", Array,
|
38
|
+
"Taxonomic ranks to report. By default: #{o[:ranks].join(",")}."
|
39
|
+
){ |v| o[:ranks]=v }
|
40
|
+
opt.on("-n", "--noheader",
|
41
|
+
"Do not include a header in the output."){ o[:header]=false }
|
42
|
+
opt.on("-t", "--taxids",
|
43
|
+
"Return Taxonomy IDs instead of scientific names."){ o[:ret]="TaxId" }
|
44
|
+
opt.on("--exact-gi",
|
45
|
+
"Returns only taxonomy associated with the exact GI passed.",
|
46
|
+
"By default, it attempts to update accession versions if possible."
|
47
|
+
){ |v| o[:exact_gi]=v }
|
48
|
+
opt.on("--ignore-missing",
|
49
|
+
"Does not report missing GIs in the output file.",
|
50
|
+
"By default, it reports GI and empty values for all other columns."
|
51
|
+
){ |v| o[:no_nil]=v }
|
52
|
+
opt.on("-q", "--quiet", "Run quietly."){ |v| o[:q]=true }
|
53
|
+
opt.on("-h", "--help","Display this screen") do
|
54
|
+
puts opt
|
55
|
+
exit
|
56
|
+
end
|
57
|
+
opt.separator ""
|
58
|
+
end.parse!
|
59
|
+
|
60
|
+
#================================[ Functions ]
|
61
|
+
def gi2taxid(db, gi)
|
62
|
+
doc = Nokogiri::XML( RemoteData.elink({:dbfrom=>db,
|
63
|
+
:db=>"taxonomy", :id=>gi}) )
|
64
|
+
doc.at_xpath("/eLinkResult/LinkSet/LinkSetDb/Link/Id")
|
65
|
+
end
|
66
|
+
#================================[ Main ]
|
67
|
+
begin
|
68
|
+
o[:gis] += File.readlines(o[:infile]).map{ |l| l.chomp } unless
|
69
|
+
o[:infile].nil?
|
70
|
+
o[:ranks].map!{ |r| r.downcase }
|
71
|
+
puts (["GI", "TaxId"] + o[:ranks].map{ |r| r.capitalize }).join("\t") if
|
72
|
+
o[:header]
|
73
|
+
o[:gis].each do |gi|
|
74
|
+
taxid = gi2taxid(o[:dbfrom], gi)
|
75
|
+
status = ""
|
76
|
+
if taxid.nil? and not o[:exact_gi]
|
77
|
+
new_gi, status = RemoteData.update_gi(o[:dbfrom], gi)
|
78
|
+
taxid = gi2taxid(o[:dbfrom], new_gi) unless new_gi.nil?
|
79
|
+
end
|
80
|
+
if taxid.nil?
|
81
|
+
warn "Cannot find link to taxonomy: #{gi} #{status}"
|
82
|
+
puts ([gi, ""] + o[:ranks].map{ |i| "" }).join("\t") unless o[:no_nil]
|
83
|
+
next
|
84
|
+
end
|
85
|
+
taxonomy = {}
|
86
|
+
unless taxid.nil?
|
87
|
+
doc = Nokogiri::XML( RemoteData.efetch({:db=>"taxonomy",
|
88
|
+
:id=>taxid.content}) )
|
89
|
+
taxonomy[ doc.at_xpath("/TaxaSet/Taxon/Rank").content ] =
|
90
|
+
doc.at_xpath("/TaxaSet/Taxon/#{o[:ret]}").content
|
91
|
+
doc.xpath("/TaxaSet/Taxon/LineageEx/Taxon").each do |taxon|
|
92
|
+
taxonomy[ taxon.at_xpath("./Rank").content ] =
|
93
|
+
taxon.at_xpath("./#{o[:ret]}").content
|
94
|
+
end
|
95
|
+
end
|
96
|
+
puts ([gi, taxid.content] +
|
97
|
+
o[:ranks].map{ |rank| taxonomy[ rank ] ||= "" }).join("\t")
|
98
|
+
end
|
99
|
+
rescue => err
|
100
|
+
$stderr.puts "Exception: #{err}\n\n"
|
101
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
102
|
+
err
|
103
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# usage perl in_silico_GA.pl [options]
|
2
|
+
|
3
|
+
use Getopt::Long;
|
4
|
+
use Math::Random qw(:all);
|
5
|
+
|
6
|
+
$argu=GetOptions('in=s'=>\$infile, # input fasta chr file
|
7
|
+
'out=s'=>\$outfile, # output file name
|
8
|
+
'coverage=s'=>\$cov, # desired output
|
9
|
+
'seq_error=s'=>\$seq_error, # sequencing error
|
10
|
+
'read_len=s'=>\$read_len, # simulated read length
|
11
|
+
'ins_len=s'=>\$ins_len, # insertion length
|
12
|
+
'ins_var=s'=>\$ins_var);
|
13
|
+
|
14
|
+
$chr='';
|
15
|
+
open(IN,$infile);
|
16
|
+
open(OUT,">$outfile");
|
17
|
+
%code=();
|
18
|
+
$code{'0'}='C';
|
19
|
+
$code{'1'}='A';
|
20
|
+
$code{'2'}='T';
|
21
|
+
$code{'3'}='G';
|
22
|
+
|
23
|
+
while(<IN>){
|
24
|
+
chomp;
|
25
|
+
if(!/^\>/){
|
26
|
+
$chr.=$_;
|
27
|
+
}
|
28
|
+
else{
|
29
|
+
$gi=$_;
|
30
|
+
if($gi= ~/^\>gi\|(\S+)\|\S+\|\S+/){
|
31
|
+
$gi=$1;}
|
32
|
+
|
33
|
+
}
|
34
|
+
}
|
35
|
+
close(IN);
|
36
|
+
|
37
|
+
$chr_size=length $chr;
|
38
|
+
print "chromosome size: $chr_size\n";
|
39
|
+
$seg_size=2*$read_len+$ins_len;
|
40
|
+
$reads_number=int($cov*$chr_size/($read_len*2));
|
41
|
+
print "generated reads $reads_number x 2\n";
|
42
|
+
|
43
|
+
for(1..$reads_number){
|
44
|
+
$index=$_;
|
45
|
+
$l=length $index;
|
46
|
+
$k=8-$l;
|
47
|
+
$kk='0' x $k;
|
48
|
+
$id= 'read'.$kk.$index.'_'.$gi;
|
49
|
+
|
50
|
+
#make start site;
|
51
|
+
$start_site=int(rand($chr_size));
|
52
|
+
#make short seg length;
|
53
|
+
$seg_length=int(random_normal(1,$seg_size,$ins_var));
|
54
|
+
|
55
|
+
#extract the segment
|
56
|
+
$seg=substr($chr,$start_site,$seg_length);
|
57
|
+
$s_len=length $seg;
|
58
|
+
$gap=$seg_length-$s_len;
|
59
|
+
if($gap!=0){
|
60
|
+
$makeup=substr($chr,0,$gap);
|
61
|
+
$seg.=$makeup;
|
62
|
+
}
|
63
|
+
|
64
|
+
$id.='.start'.$start_site.'.seg_len'.$seg_length;
|
65
|
+
|
66
|
+
#get the reads
|
67
|
+
$seq1=substr($seg,0,$read_len);
|
68
|
+
#$seg=~tr/ATCG/TAGC/ this line can change the orientation of the second read;
|
69
|
+
$seq2=substr($seg,-$read_len);
|
70
|
+
# sequencing error introducing
|
71
|
+
@seq1=split(//,$seq1);
|
72
|
+
@seq2=split(//,$seq2);
|
73
|
+
@mut1=random_binomial($read_len,1,$seq_error);
|
74
|
+
@mut2=random_binomial($read_len,1,$seq_error);
|
75
|
+
|
76
|
+
for(0..$#mut1){
|
77
|
+
$i=$_;
|
78
|
+
if($mut1[$i]==1){
|
79
|
+
$r=int(rand(4));
|
80
|
+
$seq1[$i]=$code{$r};
|
81
|
+
}
|
82
|
+
if($mut2[$i]==1){
|
83
|
+
$r=int(rand(4));
|
84
|
+
$seq2[$i]=$code{$r};
|
85
|
+
}
|
86
|
+
}
|
87
|
+
$seq1=join('',@seq1);
|
88
|
+
$seq2=join('',@seq2);
|
89
|
+
|
90
|
+
$id1=$id.'#0/1';
|
91
|
+
$id2=$id.'#0/2';
|
92
|
+
|
93
|
+
print OUT ">$id1\n$seq1\n>$id2\n$seq2\n";
|
94
|
+
}
|
95
|
+
|
96
|
+
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
../../enveomics.R
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @license: artistic license 2.0
|
5
|
+
#
|
6
|
+
|
7
|
+
require "optparse"
|
8
|
+
ARGV << "-h" if ARGV.size==0
|
9
|
+
|
10
|
+
def use(gems, mandatory=true)
|
11
|
+
gems = [gems] unless gems.is_a? Array
|
12
|
+
begin
|
13
|
+
require "rubygems"
|
14
|
+
while ! gems.empty?
|
15
|
+
require gems.shift
|
16
|
+
end
|
17
|
+
return true
|
18
|
+
rescue LoadError
|
19
|
+
abort "\nUnmet requirements, please install required gems:" +
|
20
|
+
gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
|
21
|
+
return false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @update: Jul-14-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
module JPlace
|
9
|
+
##### CLASSES:
|
10
|
+
# Placement.new(placement[, fields]): Initializes a new read placement.
|
11
|
+
# placement: A hash containing the placement.
|
12
|
+
# fields: If passed, sets the field order for all subsequent placements.
|
13
|
+
class Placement
|
14
|
+
attr_writer :flag # This attribute is used by JPlace.distances.rb as a placeholder
|
15
|
+
attr_reader :p, :n, :m, :flag
|
16
|
+
@@fields = nil
|
17
|
+
def self.fields=(fields)
|
18
|
+
@@fields=fields
|
19
|
+
end
|
20
|
+
def self.fields
|
21
|
+
@@fields
|
22
|
+
end
|
23
|
+
def initialize(placement, fields=nil)
|
24
|
+
@@fields = fields if @@fields.nil? and not fields.nil?
|
25
|
+
# Save only the best (first) placement:
|
26
|
+
abort "Placements must contain a 'p' field.\n" if placement["p"].nil?
|
27
|
+
abort "Placements must contain a 'p' field with at least one entry.\n" if placement["p"][0].nil?
|
28
|
+
@p = [placement["p"][0]]
|
29
|
+
# Find name-only placements (EPA-style):
|
30
|
+
unless placement["n"].nil?
|
31
|
+
@n = placement["n"]
|
32
|
+
@m = @n.map{ |n| 1 }
|
33
|
+
end
|
34
|
+
# Find multiplicity placements (pplacer-style):
|
35
|
+
unless placement["nm"].nil?
|
36
|
+
@n = placement["nm"].map{ |nm| nm[0] }
|
37
|
+
@m = placement["nm"].map{ |nm| nm[1].to_i }
|
38
|
+
end
|
39
|
+
abort "Placements must contain one of 'n' or 'nm' fields.\n" if @n.nil? or @m.nil?
|
40
|
+
end
|
41
|
+
def nm
|
42
|
+
(0 .. (self.n.length-1)).map{ |i| {:n=>self.n[i], :m=>self.m[i]} }
|
43
|
+
end
|
44
|
+
def get_field_value(field)
|
45
|
+
abort "Impossible to read placement with undefined fields." if @@fields.nil?
|
46
|
+
f = @@fields.find_index(field)
|
47
|
+
abort "Undefined field #{field}." if f.nil?
|
48
|
+
self.p[0][f]
|
49
|
+
end
|
50
|
+
def set_field_value(field, value)
|
51
|
+
f = @@fields.find_index(field)
|
52
|
+
abort "Undefined field #{field}." if f.nil?
|
53
|
+
self.p[0][f] = value
|
54
|
+
end
|
55
|
+
def edge_num
|
56
|
+
self.get_field_value('edge_num').to_i
|
57
|
+
end
|
58
|
+
def likelihood
|
59
|
+
self.get_field_value('likelihood').to_f
|
60
|
+
end
|
61
|
+
def like_weight_ratio
|
62
|
+
self.get_field_value('like_weight_ratio').to_f
|
63
|
+
end
|
64
|
+
def distal_length
|
65
|
+
(self.get_field_value('distal_length') || 0).to_f
|
66
|
+
end
|
67
|
+
def pendant_length
|
68
|
+
(self.get_field_value('pendant_length') || 0).to_f
|
69
|
+
end
|
70
|
+
def to_s
|
71
|
+
"#<Placement of #{self.n}: #{self.p}>"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Ancilliary class Tree
|
76
|
+
class Tree
|
77
|
+
@@HAS_ICONV = nil
|
78
|
+
def self.has_iconv?
|
79
|
+
if @@HAS_ICONV.nil?
|
80
|
+
@@HAS_ICONV = true
|
81
|
+
begin
|
82
|
+
require 'rubygems'
|
83
|
+
require 'iconv'
|
84
|
+
rescue LoadError
|
85
|
+
@@HAS_ICONV = false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@@HAS_ICONV
|
89
|
+
end
|
90
|
+
def self.from_nwk(nwk)
|
91
|
+
if Tree.has_iconv?
|
92
|
+
ic = Iconv.new('UTF-8//IGNORE','UTF-8')
|
93
|
+
nwk = ic.iconv(nwk + ' ')[0..-2]
|
94
|
+
end
|
95
|
+
Node.new(nwk)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Node.new(nwk[, parent]): Initializes a new Node.
|
100
|
+
# nwk: Node's description in Newick format.
|
101
|
+
# parent: Node's parent, or nil if root node.
|
102
|
+
class Node
|
103
|
+
# Class
|
104
|
+
@@edges = []
|
105
|
+
def self.edges
|
106
|
+
@@edges
|
107
|
+
end
|
108
|
+
def self.register(node)
|
109
|
+
@@edges[node.index] = node unless node.index.nil?
|
110
|
+
end
|
111
|
+
# Class-level functions related to JPlace
|
112
|
+
def self.link_placement(placement)
|
113
|
+
abort "Trying to link placement in undefined edge #{placement.edge_num}: #{placement.to_s}" if @@edges[placement.edge_num].nil?
|
114
|
+
@@edges[placement.edge_num].add_placement!(placement)
|
115
|
+
end
|
116
|
+
def self.unlink_placement(placement)
|
117
|
+
@@edges[placement.edge_num].delete_placement!(placement)
|
118
|
+
end
|
119
|
+
# Instance
|
120
|
+
attr_reader :children, :length, :name, :label, :index, :nwk, :parent, :placements, :collapsed
|
121
|
+
def initialize(nwk, parent=nil)
|
122
|
+
abort "Empty newick.\n" if nwk.nil? or nwk==''
|
123
|
+
nwk.gsub! /;(.)/, '--\1'
|
124
|
+
@nwk = nwk
|
125
|
+
@parent = parent
|
126
|
+
@placements = []
|
127
|
+
@collapsed = false
|
128
|
+
# Find index
|
129
|
+
index_m = /^(?<pre>.*){(?<idx>[0-9]+)}(?<post>[^\(\),;]*);?$/.match(nwk)
|
130
|
+
if index_m.nil? and parent.nil? and nwk[nwk.length-1]==';'
|
131
|
+
@index = nil
|
132
|
+
else
|
133
|
+
abort "Unindexed edge found:\n#{@nwk}\n" if index_m.nil?
|
134
|
+
nwk = index_m[:pre]+index_m[:post]
|
135
|
+
@index = index_m[:idx].to_i
|
136
|
+
end
|
137
|
+
# Find name, label, and length
|
138
|
+
meta_m = /^(\((?<cont>.+)\))?(?<name>[^:\(\);]*)(:(?<length>[0-9\.Ee+-]*)(?<label>\[[^\[\]\(\);]+\])?)?;?$/.match(nwk) or
|
139
|
+
abort "Cannot parse node metadata (index #{@index}):\n#{@nwk}\n"
|
140
|
+
nwk = meta_m[:cont]
|
141
|
+
@name = meta_m[:name]
|
142
|
+
@length = meta_m[:length]
|
143
|
+
@label = meta_m[:label]
|
144
|
+
# Find children
|
145
|
+
@children = []
|
146
|
+
nwk ||= ''
|
147
|
+
quote = nil
|
148
|
+
while nwk != ''
|
149
|
+
i = 0
|
150
|
+
j = 0
|
151
|
+
nwk.each_char do |chr|
|
152
|
+
if quote.nil?
|
153
|
+
if chr=='"' or chr=="'"
|
154
|
+
quote = chr
|
155
|
+
else
|
156
|
+
i += 1 if chr=='('
|
157
|
+
i -= 1 if chr==')'
|
158
|
+
if i==0 and chr==','
|
159
|
+
i=nil
|
160
|
+
break
|
161
|
+
end
|
162
|
+
end
|
163
|
+
else
|
164
|
+
quote = nil if chr==quote
|
165
|
+
end
|
166
|
+
j += 1
|
167
|
+
end
|
168
|
+
abort "Unbalanced node at edge {#{@index}}, with leftness #{i}:\n#{@nwk}\n" unless i.nil? or i==0
|
169
|
+
@children << Node.new(nwk[0 .. j-1],self)
|
170
|
+
nwk = nwk.length==j ? '' : nwk[j+1 .. -1]
|
171
|
+
end
|
172
|
+
Node.register(self)
|
173
|
+
end
|
174
|
+
# Accessors/Setters
|
175
|
+
def name=(new_name)
|
176
|
+
@name = new_name.gsub(/[\s\(\),;:]/, '_')
|
177
|
+
end
|
178
|
+
# Tree algorithms
|
179
|
+
def post_order &blk
|
180
|
+
self.children.each { |n| n.post_order &blk }
|
181
|
+
blk[self]
|
182
|
+
end
|
183
|
+
def in_order &blk
|
184
|
+
abort "Tree must be dycotomic to traverse in_order, node #{self.cannonical_name} "+
|
185
|
+
"has #{self.children.lenght} children." unless [0,2].include? self.children.length
|
186
|
+
self.children[0].in_order &blk unless self.children[0].nil?
|
187
|
+
blk[self]
|
188
|
+
self.children[1].in_order &blk unless self.children[1].nil?
|
189
|
+
end
|
190
|
+
def pre_order &blk
|
191
|
+
blk[self]
|
192
|
+
self.children.each { |n| n.pre_order &blk }
|
193
|
+
end
|
194
|
+
def path_to_root
|
195
|
+
if @path_to_root.nil?
|
196
|
+
@path_to_root = [self]
|
197
|
+
@path_to_root += self.parent.path_to_root unless self.parent.nil?
|
198
|
+
end
|
199
|
+
@path_to_root
|
200
|
+
end
|
201
|
+
def distance_to_root
|
202
|
+
if @distance_to_root.nil?
|
203
|
+
@distance_to_root = path_to_root.map{ |n| n.length.nil? ? 0.0 : n.length.to_f }.reduce(0.0, :+)
|
204
|
+
end
|
205
|
+
@distance_to_root
|
206
|
+
end
|
207
|
+
def lca(node)
|
208
|
+
p1 = self.path_to_root
|
209
|
+
p2 = node.path_to_root
|
210
|
+
p1.find{ |n| p2.include? n }
|
211
|
+
end
|
212
|
+
def distance(node)
|
213
|
+
self.distance_to_root + node.distance_to_root - (2.0 * self.lca(node).distance_to_root)
|
214
|
+
end
|
215
|
+
def ==(node) self.index == node.index ; end
|
216
|
+
# Tree representation
|
217
|
+
def cannonical_name
|
218
|
+
return(self.name) unless self.name.nil? or self.name == ""
|
219
|
+
return(self.label) unless self.label.nil? or self.label == ""
|
220
|
+
return("{#{self.index.to_s}}") unless self.index.nil?
|
221
|
+
""
|
222
|
+
end
|
223
|
+
def to_s
|
224
|
+
o = ""
|
225
|
+
o += "(" + self.children.map{ |c| c.to_s }.join(",") + ")" if self.children.length > 0
|
226
|
+
o += self.cannonical_name
|
227
|
+
u = "#{self.length.nil? ? "" : self.length}#{self.label.nil? ? "" : self.label}"
|
228
|
+
o += ":#{u}" unless u==""
|
229
|
+
o
|
230
|
+
end
|
231
|
+
# Instance-level functions related to JPlace
|
232
|
+
def collapse!
|
233
|
+
self.pre_order do |n|
|
234
|
+
if n!=self
|
235
|
+
while n.placements.length > 0
|
236
|
+
p = Node.unlink_placement(n.placements[0])
|
237
|
+
p.set_field_value('edge_num', self.index)
|
238
|
+
Node.link_placement(p)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
@collapsed = true
|
243
|
+
end
|
244
|
+
def add_placement!(placement)
|
245
|
+
@placements << placement
|
246
|
+
end
|
247
|
+
def delete_placement!(placement)
|
248
|
+
@placements.delete(placement)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
end # module JPlace
|
253
|
+
|