RubyGems - miga-base - Versions diffs - 1.2.15.2 → 1.2.15.4 - Mend

miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

checksums.yaml +4 -4
data/lib/miga/cli/action/download/gtdb.rb +4 -1
data/lib/miga/cli/action/gtdb_get.rb +4 -0
data/lib/miga/daemon.rb +4 -1
data/lib/miga/lair.rb +6 -4
data/lib/miga/remote_dataset/download.rb +3 -2
data/lib/miga/remote_dataset.rb +25 -7
data/lib/miga/taxonomy.rb +6 -0
data/lib/miga/version.rb +2 -2
metadata +6 -302
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
data/utils/FastAAI/FastAAI +0 -3659
data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
data/utils/FastAAI/README.md +0 -84
data/utils/enveomics/Docs/recplot2.md +0 -244
data/utils/enveomics/Examples/aai-matrix.bash +0 -66
data/utils/enveomics/Examples/ani-matrix.bash +0 -66
data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
data/utils/enveomics/LICENSE.txt +0 -73
data/utils/enveomics/Makefile +0 -52
data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
data/utils/enveomics/Manifest/Tasks/other.json +0 -906
data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
data/utils/enveomics/Manifest/categories.json +0 -165
data/utils/enveomics/Manifest/examples.json +0 -162
data/utils/enveomics/Manifest/tasks.json +0 -4
data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
data/utils/enveomics/README.md +0 -42
data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
data/utils/enveomics/Scripts/Chao1.pl +0 -97
data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
data/utils/enveomics/Scripts/FastA.length.pl +0 -38
data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
data/utils/enveomics/Scripts/FastA.split.pl +0 -55
data/utils/enveomics/Scripts/FastA.split.rb +0 -79
data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
data/utils/enveomics/Scripts/SRA.download.bash +0 -55
data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
data/utils/enveomics/Scripts/Table.barplot.R +0 -31
data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
data/utils/enveomics/Scripts/Table.filter.pl +0 -61
data/utils/enveomics/Scripts/Table.merge.pl +0 -77
data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
data/utils/enveomics/Scripts/Table.replace.rb +0 -69
data/utils/enveomics/Scripts/Table.round.rb +0 -63
data/utils/enveomics/Scripts/Table.split.pl +0 -57
data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
data/utils/enveomics/Scripts/aai.rb +0 -421
data/utils/enveomics/Scripts/ani.rb +0 -362
data/utils/enveomics/Scripts/anir.rb +0 -137
data/utils/enveomics/Scripts/clust.rand.rb +0 -102
data/utils/enveomics/Scripts/gi2tax.rb +0 -103
data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
data/utils/enveomics/Scripts/ogs.rb +0 -104
data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
data/utils/enveomics/Scripts/rbm.rb +0 -108
data/utils/enveomics/Scripts/sam.filter.rb +0 -148
data/utils/enveomics/Tests/Makefile +0 -10
data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
data/utils/enveomics/Tests/alkB.nwk +0 -1
data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
data/utils/enveomics/Tests/hiv1.faa +0 -59
data/utils/enveomics/Tests/hiv1.fna +0 -134
data/utils/enveomics/Tests/hiv2.faa +0 -70
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
data/utils/enveomics/build_enveomics_r.bash +0 -45
data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
data/utils/enveomics/enveomics.R/R/utils.R +0 -80
data/utils/enveomics/enveomics.R/README.md +0 -81
data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
data/utils/enveomics/globals.mk +0 -8
data/utils/enveomics/manifest.json +0 -9
data/utils/multitrim/Multitrim How-To.pdf +0 -0
data/utils/multitrim/README.md +0 -67
data/utils/multitrim/multitrim.py +0 -1555
data/utils/multitrim/multitrim.yml +0 -13

data/utils/enveomics/Scripts/ogs.annotate.rb DELETED Viewed

@@ -1,88 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author: Luis M. Rodriguez-R
-# @update: Mar-23-2015
-# @license: artistic license 2.0
-#
-$:.push File.expand_path(File.dirname(__FILE__) + '/lib')
-require 'enveomics_rb/og'
-require 'optparse'
-o = {:q=>FALSE, :f=>"(\\S+)\\.txt", :consolidate=>TRUE, :pre=>[]}
-ARGV << '-h' if ARGV.size==0
-OptionParser.new do |opts|
-   opts.banner = "
-Annotates Orthology Groups (OGs) using one or more reference genomes.
-Usage: #{$0} [options]"
-   opts.separator ""
-   opts.separator "Mandatory"
-   opts.on("-i", "--in FILE", "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
-   opts.on("-o", "--out FILE", "Output file containing the annotated OGs."){ |v| o[:out]=v }
-   opts.on("-a FILE1,FILE2,...", Array, "Input file(s) containing the annotations.One or more tab-delimited files",
-   		"with the gene names in the first column and the annotation in the second."){ |v| o[:annotations]=v }
-   opts.separator ""
-   opts.separator "Other Options"
-   opts.on("-f","--format STRING", "Format of the filenames for the annotation files, using regex syntax.",
-   		"By default: '#{o[:f]}'."){ |v| o[:f]=v }
-   opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
-   opts.on("-h", "--help", "Display this screen.") do
-      puts opts
-      exit
-   end
-   opts.separator ""
-end.parse!
-abort "-i is mandatory" if o[:in].nil?
-abort "-o is mandatory" if o[:out].nil?
-abort "-a is mandatory" if o[:annotations].nil?
-##### MAIN:
-begin
-   # Read the pre-computed OGs
-   collection = OGCollection.new
-   $stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
-   f = File.open(o[:in], 'r')
-   h = f.gets.chomp.split /\t/
-   while ln = f.gets
-      collection << OG.new(h, ln.chomp.split(/\t/))
-   end
-   f.close
-   $stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
-   # Read annotations
-   o[:annotations].each do |annot|
-      m = /#{o[:f]}/.match(annot)
-      if m.nil? or m[1].nil?
-	 warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
-	 next
-      end
-      f = File.open(annot, 'r')
-      no_og = 0
-      collection.add_note_src m[1]+' annotation'
-      while ln=f.gets
-	 r = ln.chomp.split /\t/
-	 g = Gene.new m[1], r[0]
-	 og = collection.get_og g
-	 if og.nil?
-	    no_og += 1
-	 else
-	    og.add_note g.id + ': ' + r[1], collection.note_srcs.length-1
-	 end
-      end
-      warn "Warning: Cannot find #{no_og} genes from #{m[1]} in OG collection." if no_og>0
-   end
-   # Save the output matrix
-   $stderr.puts "Saving annotated OGs into '#{o[:out]}'." unless o[:q]
-   f = File.open(o[:out], "w")
-   f.puts collection.to_s
-   f.close
-   $stderr.puts "Done.\n" unless o[:q]
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
-end

data/utils/enveomics/Scripts/ogs.core-pan.rb DELETED Viewed

@@ -1,160 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author: Luis M. Rodriguez-R
-# @license: artistic-2.0
-#
-$:.push File.expand_path("../lib", __FILE__)
-require "optparse"
-require "json"
-require "tmpdir"
-o = {q:false, n:100, thr:2}
-ARGV << "-h" if ARGV.size==0
-OptionParser.new do |opts|
-  opts.banner = "
-Subsamples the genomes in a set of Orthology Groups (OGs) and estimates the
-trend of core genome and pangenome sizes.
-Usage: #{$0} [options]"
-  opts.separator ""
-  opts.separator "Mandatory"
-  opts.on("-o", "--ogs FILE",
-    "Input file containing the precomputed OGs."){ |v| o[:ogs]=v }
-  opts.separator ""
-  opts.separator "Output Options"
-  opts.on("-s", "--summary FILE",
-    "Output file in tabular format with summary statistics."){ |v| o[:summ]=v }
-  opts.on("-t", "--tab FILE","Output file in tabular format."){ |v| o[:tab]=v }
-  opts.on("-j", "--json FILE", "Output file in JSON format."){ |v| o[:json]=v }
-  opts.separator ""
-  opts.separator "Other Options"
-  opts.on("-n", "--replicates INT",
-    "Number of replicates to estimate. By default: #{o[:n]}."
-    ){ |v| o[:n]=v.to_i }
-  opts.on("--threads INT",
-    "Children threads to spawn. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i}
-  opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
-  opts.on("-h", "--help", "Display this screen.") do
-    puts opts
-    exit
-  end
-  opts.separator ""
-end.parse!
-abort "-o is mandatory" if o[:ogs].nil?
-##### MAIN:
-begin
-  # Read the pre-computed OGs
-  $stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
-  bool_a = []
-  genomes_n = nil
-  File.open(o[:ogs], "r") do |f|
-    h = f.gets.chomp.split "\t"
-    genomes_n = h.size
-    while ln = f.gets
-      bool_a << ln.chomp.split("\t").map{ |g| g!="-" }
-    end
-  end
-  $stderr.puts " Loaded OGs: #{bool_a.size}." unless o[:q]
-  bool_a_b = bool_a.map{ |og| og.map{ |g| g ? "1" : "0" }.join("").to_i(2) }
-  # Generate subsamples
-  size = {core:[], pan:[]}
-  Dir.mktmpdir do |dir|
-    children = 0
-    (0 .. o[:n]-1).each do |i|
-      fork do
-        # Generate trajectory
-        genomes = (0 .. genomes_n-1).to_a.shuffle
-        genomes_b = (2 ** genomes_n) - 1
-        core = []
-        pan = []
-        while not genomes.empty?
-          core.unshift 0
-          pan.unshift 0
-          bool_a_b.map! do |og|
-            r_og = og & genomes_b
-            if r_og>0
-              core[0] += 1 if r_og==genomes_b
-              pan[0]  += 1
-              og
-            else
-              nil
-            end
-          end
-          bool_a_b.compact!
-          genomes_b ^= 2 ** genomes.pop
-        end
-        abort "UNEXPECTED ERROR: Final genomes_b=#{genomes_b}." if genomes_b>0
-        # Store trajectory
-        File.open("#{dir}/#{i}", "w") do |tfh|
-          tfh.puts JSON.generate({core:core, pan:pan})
-        end
-      end # fork
-      children += 1
-      if children >= o[:thr]
-        Process.wait
-        children -= 1
-      end
-    end
-    Process.waitall
-    # Recover trajectories
-    (0 .. o[:n]-1).each do |i|
-      s = JSON.parse(File.read("#{dir}/#{i}"), {:symbolize_names=>true})
-      size[:core][i] = s[:core]
-      size[:pan][i] = s[:pan]
-    end
-  end # Dir.mktmpdir
-  # Show result
-  $stderr.puts "Generating reports." unless o[:q]
-  # Save results in JSON
-  unless o[:json].nil?
-    ofh = File.open(o[:json], "w")
-    ofh.puts JSON.pretty_generate(size)
-    ofh.close
-  end
-  # Save results in tab
-  unless o[:tab].nil?
-    ofh = File.open(o[:tab], "w")
-    ofh.puts (%w{replicate metric}+(1 .. genomes_n).to_a).join("\t")
-    (0 .. o[:n]-1).each do |i|
-      ofh.puts ([i+1,"core"] + size[:core][i]).join("\t")
-      ofh.puts ([i+1,"pan"] + size[:pan][i]).join("\t")
-    end
-    ofh.close
-  end
-  # Save summary results in tab
-  unless o[:summ].nil?
-    ofh = File.open(o[:summ], "w")
-    ofh.puts %w{genomes core_avg core_sd core_q1 core_q2 core_q3
-      pan_avg pan_sd pan_q1 pan_q2 pan_q3}.join("\t")
-    (0 .. genomes_n-1).each do |i|
-      res = [ i+1 ]
-      [:core, :pan].each do |met|
-        a = size[met].map{ |r| r[i] }.sort
-        avg = a.inject(0,:+).to_f / a.size
-        var = a.map{ |v| v**2 }.inject(0,:+).to_f/a.size - avg**2
-        sd = Math.sqrt(var)
-        q1 = a[ a.size*1/4 ]
-        q2 = a[ a.size*2/4 ]
-        q3 = a[ a.size*3/4 ]
-        res += [avg,sd,q1,q2,q3]
-      end
-      ofh.puts res.join("\t")
-    end
-    ofh.close
-  end
-  $stderr.puts "Done.\n" unless o[:q]
-rescue => err
-  $stderr.puts "Exception: #{err}\n\n"
-  err.backtrace.each { |l| $stderr.puts l + "\n" }
-  err
-end

data/utils/enveomics/Scripts/ogs.extract.rb DELETED Viewed

@@ -1,125 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author Luis M. Rodriguez-R
-# @license artistic license 2.0
-#
-$:.push File.expand_path("lib", File.dirname(__FILE__))
-require "enveomics_rb/enveomics"
-require "enveomics_rb/og"
-o = {q:false, pergenome:false, prefix:false, first:false, rand:false,
-   core:0.0, dups:0}
-OptionParser.new do |opts|
-   opts.banner = "
-Extracts sequences of Orthology Groups (OGs) from genomes (proteomes).
-Usage: #{$0} [options]"
-   opts.separator ""
-   opts.separator "Mandatory"
-   opts.on("-i", "--in FILE",
-      "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
-   opts.on("-o", "--out FILE",
-      "Output directory where to place extracted sequences."){ |v| o[:out]=v }
-   opts.on("-s", "--seqs STRING",
-      "Path to the proteomes in FastA format, using '%s' to denote the genome.",
-      "For example: /path/to/seqs/%s.faa."){ |v| o[:seqs]=v }
-   opts.separator ""
-   opts.separator "Other Options"
-   opts.on("-c", "--core FLOAT",
-      "Use only OGs present in at least this fraction of the genomes.",
-      "To use only the strict core genome*, use -c 1."){ |v| o[:core]=v.to_f }
-   opts.on("-d", "--duplicates INT",
-      "Use only OGs with less than this number of in-paralogs in a genome.",
-      "To use only genes without in-paralogs*, use -d 1."
-      ){ |v| o[:dups]=v.to_i }
-   opts.on("-g", "--per-genome",
-      "If set, the output is generated per genome.",
-      "By default, the output is per OG."){ |v| o[:pergenome]=v }
-   opts.on("-p", "--prefix",
-      "If set, each sequence is prefixed with the genome name",
-      "(or OG number, if --per-genome) and a dash."){ |v| o[:prefix]=v }
-   opts.on("-r", "--rand",
-      "Get only one gene per genome per OG (random) regardless of in-paralogs.",
-      "By default all genes are extracted."){ |v| o[:rand]=v }
-   opts.on("-f", "--first",
-      "Get only one gene per genome per OG (first) regardless of in-paralogs.",
-      "By default all genes are extracted. Takes precendece over --rand."
-      ){ |v| o[:first]=v }
-   opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
-   opts.on("-h", "--help", "Display this screen.") do
-      puts opts
-      exit
-   end
-   opts.separator ""
-   opts.separator "    * To use only the unus genome (OGs with exactly one " +
-      "gene per genome), use: -c 1 -d 1."
-   opts.separator ""
-end.parse!
-abort "-i is mandatory" if o[:in].nil?
-abort "-o is mandatory" if o[:out].nil?
-abort "-s is mandatory" if o[:seqs].nil?
-##### MAIN:
-begin
-   # Read the pre-computed OGs
-   collection = OGCollection.new
-   $stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
-   f = File.open(o[:in], "r")
-   h = f.gets.chomp.split /\t/
-   while ln = f.gets
-      collection << OG.new(h, ln.chomp.split(/\t/))
-   end
-   f.close
-   $stderr.puts " Loaded OGs: #{collection.ogs.size}." unless o[:q]
-   $stderr.puts " Reported Genomes: #{Gene.genomes.size}." unless o[:q]
-   # Filter core/in-paralog genes
-   collection.filter_core! o[:core] unless o[:core]==0.0
-   collection.remove_inparalogs! o[:dups] unless o[:dups]==0
-   $stderr.puts " Filtered OGs: #{collection.ogs.size}." unless
-      o[:q] or o[:core]==0.0
-   # Open outputs
-   $stderr.puts "Initializing output files." unless o[:q]
-   Dir.mkdir(o[:out]) unless Dir.exist? o[:out]
-   ofhs = o[:pergenome] ?
-      Gene.genomes.map{|g| File.open("#{o[:out]}/#{g}.fa", "w")} :
-      ( (1 .. collection.ogs.size).map do |og|
-	 File.open("#{o[:out]}/OG#{og}.fa", "w")
-      end )
-   $stderr.puts " Created files: #{ofhs.size}." unless o[:q]
-   # Read genomes
-   $stderr.puts "Filtering genes." unless o[:q]
-   genome_i = -1
-   Gene.genomes.each do |genome|
-      genome_i = Gene.genomes.index(genome)
-      $stderr.print "  Genome #{genome_i+1}.   \r" unless o[:q]
-      genes = ( collection.get_genome_genes(genome).map do |og|
-	    o[:first] ? [og.first] : (o[:rand] ? [og.sample] : og)
-	 end )
-      hand = nil
-      File.open(sprintf(o[:seqs], genome), "r").each do |ln|
-	 if ln =~ /^>(\S+)/
-	    og = genes.index{|g| g.include? $1}
-	    hand = og.nil? ? nil : ( o[:pergenome] ? genome_i : og )
-	    ln.sub!(/^>/, ">#{o[:pergenome] ? "OG#{og}" : genome}-") if
-	       o[:prefix] and not hand.nil?
-	 end
-	 ofhs[hand].puts(ln) unless hand.nil?
-      end
-   end
-   $stderr.puts "  #{genome_i+1} genomes processed." unless o[:q]
-   # Close outputs
-   $stderr.puts "Closing output files." unless o[:q]
-   ofhs.each{|h| h.close}
-   $stderr.puts "Done.\n" unless o[:q]
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
-end

data/utils/enveomics/Scripts/ogs.mcl.rb DELETED Viewed

@@ -1,186 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author: Luis M. Rodriguez-R
-# @update: Sep-11-2015
-# @license: artistic license 2.0
-#
-$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
-require 'enveomics_rb/og'
-require 'optparse'
-require 'tmpdir'
-o = {q:false, f:"(\\S+)-(\\S+)\\.rbm", mcl:"", inflation:1.5, blind:false,
-   evalue:false, thr:2, identity:false, bestmatch:false}
-ARGV << "-h" if ARGV.size==0
-OptionParser.new do |opts|
-   opts.banner = "
-Identifies Orthology Groups (OGs) in Reciprocal Best Matches (RBM)
-between all pairs in a collection of genomes, using the Markov Cluster
-Algorithm.
-Requires MCL (see http://www.micans.org/mcl).
-Usage: #{$0} [options]"
-   opts.separator ""
-   opts.separator "Mandatory"
-   opts.on("-o", "--out FILE",
-      "Output file containing the detected OGs."){ |v| o[:out]=v }
-   opts.on("-d", "--dir DIR",
-      "Directory containing the RBM files.",
-      "Becomes optional iff --abc is set to a non-empty file."){ |v| o[:dir]=v }
-   opts.separator ""
-   opts.separator "Other Options"
-   opts.on("-f", "--format STRING",
-      "Format of the filenames for the RBM files (within -d), using regex " +
-      "syntax.", "By default: '#{o[:f]}'."){ |v| o[:f]=v }
-   opts.on("-I", "--inflation FLOAT",
-      "Inflation parameter for MCL clustering. By default: #{o[:inflation]}."
-      ){ |v| o[:inflation]=v.to_f }
-   opts.on("-b", "--blind",
-      "If set, computes clusters without taking bitscore into account."
-      ){ |v| o[:blind]=v }
-   opts.on("-e", "--evalue",
-      "If set, uses the e-value to weight edges, instead of the default " +
-      "Bit-Score."){ |v| o[:evalue]=v }
-   opts.on("-i", "--identity",
-      "If set, uses the identity to weight edges, instead of the default " +
-      "Bit-Score."){ |v| o[:identity]=v }
-   opts.on("-B", "--best-match",
-      "If set, it assumes best-matches instead reciprocal best matches."
-      ){ |v| o[:bestmatch]=v }
-   opts.on("-m", "--mcl-bin DIR",
-      "Path to the directory containing the mcl binaries.",
-      "By default, assumed to be in the PATH."){ |v| o[:mcl]=v+"/" }
-   opts.on("--abc FILE",
-      "Use this abc file instead of a temporal file."){ |v| o[:abc] = v }
-   opts.on("-t", "--threads INT",
-      "Number of threads to use. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i }
-   opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
-   opts.on("-h", "--help", "Display this screen.") do
-      puts opts
-      exit
-   end
-   opts.separator ""
-end.parse!
-abort "-o is mandatory" if o[:out].nil?
-o[:evalue] = false if o[:identity]
-o[:evalue] = false if o[:blind]
-o[:identity] = false if o[:blind]
-##### MAIN:
-begin
-   Dir.mktmpdir do |dir|
-      o[:abc] = "#{dir}/rbms.abc" if o[:abc].nil?
-      abort "-d must exist and be a directory" unless
-	 File.size?(o[:abc]) or
-	 (!o[:dir].nil? and File.exists?(o[:dir]) and File.directory?(o[:dir]))
-      # Traverse the whole directory
-      if File.size? o[:abc]
-	 $stderr.puts "Reusing existing abc file '#{o[:abc]}'." unless o[:q]
-      else
-	 file_i = 0
-	 ln_i = 0
-	 $stderr.puts "Reading RBM files within '#{o[:dir]}'." unless o[:q]
-	 abc = File.open(o[:abc] + ".tmp", "w")
-	 Dir.entries(o[:dir]).each do |rbm_file|
-	    next unless File.file?(o[:dir]+"/"+rbm_file)
-	    # Parse the filename to identify the genomes
-	    m = /#{o[:f]}/.match(rbm_file)
-	    if m.nil? or m[2].nil?
-	       warn "Ignoring #{rbm_file}: doesn't match /#{o[:f]}/."
-	       next
-	    end
-	    file_i += 1
-	    # Read the RBMs list
-	    f = File.open(o[:dir]+"/"+rbm_file, "r")
-	    while ln = f.gets
-	       # Add the RBM to the abc file
-	       row = ln.split(/\t/)
-	       abc.puts [m[1]+">"+row[0], m[2]+">"+row[1],
-		  (o[:blind] ? "1" :
-		  (o[:evalue] ? row[10] :
-		  (o[:identity] ? row[2] : row[11])))].join("\t")
-	       ln_i += 1
-	    end
-	    f.close
-	    $stderr.print " Scanned files: #{file_i}. " +
-	       "Found RBMs: #{ln_i}.   \r" unless o[:q]
-	 end
-	 abc.close
-	 File.rename(o[:abc] + ".tmp", o[:abc])
-	 $stderr.print "\n" unless o[:q]
-      end # if File.size? o[:abc] ... else
-      # Build .mci file (mcxload) and compute .mccl file (mcl)
-      $stderr.puts "Markov-Clustering" unless o[:q]
-      `'#{o[:mcl]}mcxload' #{"--stream-mirror" unless o[:bestmatch]} \
-	 -abc '#{o[:abc]}' -o '#{dir}/rbms.mci' --write-binary \
-	 -write-tab '#{dir}/genes.tab' #{"--stream-neg-log10" if o[:evalue]} \
-	 &>/dev/null`
-      `'#{o[:mcl]}mcl' '#{dir}/rbms.mci' -V all -I #{o[:inflation].to_s} \
-	 -o '#{dir}/ogs.mcl' -te #{o[:thr].to_s}`
-      # Load .tab as Gene objects
-      $stderr.puts "Loading gene table from '#{dir}/genes.tab'." unless o[:q]
-      genes = []
-      tab = File.open("#{dir}/genes.tab", "r")
-      while ln = tab.gets
-	 ln.chomp!
-	 r = ln.split /\t|>/
-	 genes[ r[0].to_i ] = Gene.new(r[1], r[2])
-      end
-      tab.close
-      $stderr.puts " Got " + genes.size.to_s + " genes in " +
-	 Gene.genomes.size.to_s + " genomes." unless o[:q]
-      # Load .mcl file as OGCollection
-      $stderr.puts "Loading clusters from '#{dir}/ogs.mcl'." unless o[:q]
-      collection = OGCollection.new
-      mcl = File.open("#{dir}/ogs.mcl", "r")
-      in_matrix = false
-      my_genes = nil
-      while ln = mcl.gets
-         ln.chomp!
-	 if ln =~ /^\(mclmatrix$/
-	    in_matrix = true
-	    next
-	 end
-	 next if ln =~ /^begin$/
-	 if in_matrix
-	    break if ln =~ /^\)$/
-	    if ln =~ /^\d+\s+/
-	       ln.sub!(/^\d+\s+/, "")
-	       my_genes = []
-	    end
-	    ln.sub!(/^\s+/, "")
-	    raise "Incomplete mcl matrix, offending line: #{$.}: #{ln}" if
-	       my_genes.nil?
-	    my_genes += ln.split(/\s/)
-	    if my_genes.last == "$"
-	       my_genes.pop
-	       og = OG.new
-	       my_genes.each{|i| og << genes[ i.to_i ]}
-	       collection << og
-	       my_genes = nil
-	    end
-	 end
-      end
-      mcl.close
-      $stderr.puts " Got #{collection.ogs.size} clusters." unless o[:q]
-      # Save the output matrix
-      $stderr.puts "Saving matrix into '#{o[:out]}'." unless o[:q]
-      f = File.open(o[:out], "w")
-      f.puts collection.to_s
-      f.close
-      $stderr.puts "Done.\n" unless o[:q]
-   end
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
-end

data/utils/enveomics/Scripts/ogs.rb DELETED Viewed

@@ -1,104 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author: Luis M. Rodriguez-R
-# @update: Apr-29-2015
-# @license: artistic license 2.0
-#
-$:.push File.expand_path(File.dirname(__FILE__) + '/lib')
-require 'enveomics_rb/og'
-require 'optparse'
-o = {:q=>FALSE, :f=>"(\\S+)-(\\S+)\\.rbm", :consolidate=>TRUE, :pre=>[]}
-ARGV << '-h' if ARGV.size==0
-OptionParser.new do |opts|
-   opts.banner = "
-***IMPORTANT NOTE***
-This script suffers from chaining effect and is very sensitive to spurious connections,
-because it applies a greedy clustering algorithm. For most practical purposes, the use
-of this script is discouraged and `ogs.mcl.rb` should be preferred. [ Apr-29-2015 ]
-Identifies Orthology Groups (OGs) in Reciprocal Best Matches (RBM)
-between all pairs in a collection of genomes.
-Usage: #{$0} [options]"
-   opts.separator ""
-   opts.separator "Mandatory"
-   opts.on("-o", "--out FILE", "Output file containing the detected OGs."){ |v| o[:out]=v }
-   opts.separator ""
-   opts.separator "Other Options"
-   opts.on("-d", "--dir DIR", "Directory containing the RBM files."){ |v| o[:dir]=v }
-   opts.on("-p", "--pre-ogs FILE1,FILE2,...", Array, "Pre-computed OGs file(s), separated by commas."){ |v| o[:pre]=v }
-   opts.on("-n", "--unchecked", "Do not check internal redundancy in OGs."){ o[:consolidate]=FALSE }
-   opts.on("-f","--format STRING", "Format of the filenames for the RBM files (within -d), using regex syntax. By default: '#{o[:f]}'."){ |v| o[:f]=v }
-   opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
-   opts.on("-h", "--help", "Display this screen.") do
-      puts opts
-      exit
-   end
-   opts.separator ""
-end.parse!
-abort "-o is mandatory" if o[:out].nil?
-##### MAIN:
-begin
-   # Initialize the collection of OGs.
-   collection = OGCollection.new
-   # Read the pre-computed OGs (if -p is passed).
-   o[:pre].each do |pre|
-      $stderr.puts "Reading pre-computed OGs in '#{pre}'." unless o[:q]
-      f = File.open(pre, 'r')
-      h = f.gets.chomp.split /\t/
-      while ln = f.gets
-	 collection << OG.new(h, ln.chomp.split(/\t/))
-      end
-      f.close
-      $stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
-   end
-   # Read the RBM files in the directory (if -d is passed).
-   unless o[:dir].nil?
-      abort "-d must exist and be a directory" unless File.exists?(o[:dir]) and File.directory?(o[:dir])
-      # Traverse the whole directory.
-      file_i = 0
-      $stderr.puts "Reading RBM files within '#{o[:dir]}'." unless o[:q]
-      Dir.entries(o[:dir]).each do |rbm_file|
-	 next unless File.file?(o[:dir]+"/"+rbm_file)
-	 # Parse the filename to identify the genomes.
-	 m = /#{o[:f]}/.match(rbm_file)
-	 if m.nil? or m[2].nil?
-	    warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
-	    next
-	 end
-	 file_i += 1
-	 # Read the RBMs list
-	 f = File.open(o[:dir]+"/"+rbm_file, "r")
-	 while ln = f.gets
-	    # Add the RBM to the collection of OGs. Only the first two columns are used.
-	    row = ln.split(/\t/)
-	    collection.add_rbm( Gene.new(m[1],row[0]), Gene.new(m[2],row[1]) )
-	 end
-	 f.close
-	 $stderr.print " Scanned files: #{file_i}. Found OGs: #{collection.ogs.length}.   \r" unless o[:q]
-      end
-      $stderr.print "\n" unless o[:q]
-   end
-   # Evaluate internal consistency merging linked OGs (unless -n is passed).
-   if o[:consolidate]
-      $stderr.puts "Evaluating internal consistency." unless o[:q]
-      collection.consolidate!
-      $stderr.puts " Final OGs: #{collection.ogs.length}." unless o[:q]
-   end
-   # Save the output matrix
-   $stderr.puts "Saving matrix into '#{o[:out]}'." unless o[:q]
-   f = File.open(o[:out], "w")
-   f.puts collection.to_s
-   f.close
-   $stderr.puts "Done.\n" unless o[:q]
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
-end