miga-base 1.2.15.2 → 1.2.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/daemon.rb +4 -1
- data/lib/miga/lair.rb +6 -4
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +25 -7
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
# @author: Luis M. Rodriguez-R
|
|
3
|
-
# @update: Nov-29-2012
|
|
4
|
-
|
|
5
|
-
kSelector <- function(file, lib){
|
|
6
|
-
red <- rgb(0.6, 0, 0);
|
|
7
|
-
d <- read.table(file, sep=" ", h=T, fill=T);
|
|
8
|
-
d <- d[!is.na(d$N50) & !is.na(d$used), ];
|
|
9
|
-
d$reads <- max(d$reads, na.rm=T)
|
|
10
|
-
d <- d[order(d$K), ];
|
|
11
|
-
rownames(d) <- 1:nrow(d);
|
|
12
|
-
par(mar=c(5,4,4,5)+.1, cex=0.8);
|
|
13
|
-
barplot(d$reads/1e6, names=d$K, col='white', ylab='Number of reads (in millions)', xlab='K',
|
|
14
|
-
main=paste('Reads used and N50 by K-mers in the assembly of', lib));
|
|
15
|
-
barplot(d$used/1e6, col='grey', add=T);
|
|
16
|
-
par(new=T);
|
|
17
|
-
plot(1:length(d$K)-0.5, d$N50, col=red, t='b', lty=2, pch=20, cex=1, lwd=1.5,
|
|
18
|
-
xlim=c(0, length(d$K)), xaxt='n', yaxt='n', xlab='', ylab='');
|
|
19
|
-
axis(4, col.axis=red);
|
|
20
|
-
mtext('N50 (bp)', side=4, line=3, col=red);
|
|
21
|
-
# Suggest best k-mers
|
|
22
|
-
if(nrow(d) >= 3){
|
|
23
|
-
x = data.frame(K=d$K, l=(d$N50 - mean(d$N50))/sd(d$N50), u=(d$used - mean(d$used))/sd(d$used));
|
|
24
|
-
rownames(x) <- rownames(d)
|
|
25
|
-
d <- cbind(d, sel=FALSE);
|
|
26
|
-
k_s = c();
|
|
27
|
-
for(l_star in c(2, 1/2, 1)){
|
|
28
|
-
k_s_i = x$K[which.max(l_star*x$l + x$u)];
|
|
29
|
-
k_s <- c(k_s, k_s_i);
|
|
30
|
-
x <- x[x$K!=k_s_i, ];
|
|
31
|
-
d$sel[d$K==k_s_i] <- TRUE;
|
|
32
|
-
}
|
|
33
|
-
abline(v=as.numeric(rownames(d)[d$sel])-0.5, col='darkgreen', lty=6);
|
|
34
|
-
}
|
|
35
|
-
return(d);
|
|
36
|
-
}
|
|
37
|
-
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
#PBS -l nodes=1:ppn=1
|
|
3
|
-
#PBS -k oe
|
|
4
|
-
|
|
5
|
-
# Some defaults for the parameters
|
|
6
|
-
BIN454=${BIN454:-"$HOME/454/bin"};
|
|
7
|
-
|
|
8
|
-
# Check mandatory variables
|
|
9
|
-
if [[ "$LIB" == "" ]]; then
|
|
10
|
-
echo "Error: LIB is mandatory" >&2
|
|
11
|
-
exit 1;
|
|
12
|
-
fi
|
|
13
|
-
if [[ "$PDIR" == "" ]]; then
|
|
14
|
-
echo "Error: PDIR is mandatory" >&2
|
|
15
|
-
exit 1;
|
|
16
|
-
fi
|
|
17
|
-
if [[ "$KVELVET$KSOAP" == "" ]]; then
|
|
18
|
-
echo "Error: KVELVET and/or KSOAP are mandatory" >&2
|
|
19
|
-
exit 1;
|
|
20
|
-
fi
|
|
21
|
-
|
|
22
|
-
# Prepare input
|
|
23
|
-
KVELVET=$(echo $KVELVET | sed -e 's/:/ /g')
|
|
24
|
-
KSOAP=$(echo $KSOAP | sed -e 's/:/ /g')
|
|
25
|
-
NP=$(cat "$PBS_NODEFILE" | wc -l)
|
|
26
|
-
CWD=$(pwd)
|
|
27
|
-
DIR="$CWD/$LIB.newbler"
|
|
28
|
-
LOG="$DIR.log"
|
|
29
|
-
module load perl/5.14.4
|
|
30
|
-
export PATH=$PATH:$BIN454
|
|
31
|
-
|
|
32
|
-
# Create project
|
|
33
|
-
echo new > $DIR.proc
|
|
34
|
-
nohup newAssembly $DIR > $LOG
|
|
35
|
-
|
|
36
|
-
# Prepare Velvet
|
|
37
|
-
if [[ "$KVELVET" != "" ]] ; then
|
|
38
|
-
echo pre-velvet > $DIR.proc
|
|
39
|
-
rm $LIB.velvet.tmp1 &>/dev/null
|
|
40
|
-
for K in $KVELVET ; do
|
|
41
|
-
perl "$PDIR/FastA.filterN.pl" "$LIB.velvet_$K/contigs.fa" >> $LIB.velvet.tmp1
|
|
42
|
-
done
|
|
43
|
-
perl "$PDIR/newbler_preparator.pl" $LIB.velvet.tmp1 $LIB.velvet.tmp2
|
|
44
|
-
cd $DIR
|
|
45
|
-
nohup addRun ../$LIB.velvet.tmp2 >> $LOG
|
|
46
|
-
cd $CWD
|
|
47
|
-
fi ;
|
|
48
|
-
|
|
49
|
-
# Prepare SOAP
|
|
50
|
-
if [[ "$KSOAP" != "" ]] ; then
|
|
51
|
-
echo pre-soap > $DIR.proc
|
|
52
|
-
rm $LIB.soap.tmp1 &>/dev/null
|
|
53
|
-
for K in $KSOAP ; do
|
|
54
|
-
cat "$LIB.soap_$K/O.contig" >> $LIB.soap.tmp1
|
|
55
|
-
done
|
|
56
|
-
perl "$PDIR/newbler_preparator.pl" $LIB.soap.tmp1 $LIB.soap.tmp2
|
|
57
|
-
cd $DIR
|
|
58
|
-
nohup addRun ../$LIB.soap.tmp2 >> $LOG
|
|
59
|
-
cd $CWD
|
|
60
|
-
fi ;
|
|
61
|
-
|
|
62
|
-
# Run
|
|
63
|
-
cd $DIR
|
|
64
|
-
echo newbler > $DIR.proc
|
|
65
|
-
nohup runProject -cpu $NP >> $LOG
|
|
66
|
-
cd $CWD
|
|
67
|
-
echo done > $DIR.proc
|
|
68
|
-
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/perl
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
my ($in, $out) = @ARGV;
|
|
5
|
-
($in and $out) or die "
|
|
6
|
-
Usage: $0 input.fa output.fa
|
|
7
|
-
";
|
|
8
|
-
|
|
9
|
-
open IN, "<", $in or die "Cannot read file: $in: $!\n";
|
|
10
|
-
open OUT,">", $out or die "Cannot create file: $out: $!\n";
|
|
11
|
-
|
|
12
|
-
%reads=();
|
|
13
|
-
@reads=();
|
|
14
|
-
while(<IN>){
|
|
15
|
-
chomp;
|
|
16
|
-
if(/^\>/){
|
|
17
|
-
$tag=$_;
|
|
18
|
-
$reads{$tag}='';
|
|
19
|
-
push(@reads,$tag);
|
|
20
|
-
}else{
|
|
21
|
-
$reads{$tag}.=$_;
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
close(IN);
|
|
25
|
-
|
|
26
|
-
for(0..$#reads){
|
|
27
|
-
$tag=$reads[$_];
|
|
28
|
-
$read=$reads{$tag};
|
|
29
|
-
$l=length $read;
|
|
30
|
-
if($l<100){
|
|
31
|
-
next;
|
|
32
|
-
}else{
|
|
33
|
-
if($l<1500){
|
|
34
|
-
print OUT "$tag\n$read\n";
|
|
35
|
-
}else{
|
|
36
|
-
$r=int($l/1500)+1;
|
|
37
|
-
$start=0;
|
|
38
|
-
$i=1;
|
|
39
|
-
while($start<$l-100){
|
|
40
|
-
$tag_new=$tag.':r'.$i;
|
|
41
|
-
$i++;
|
|
42
|
-
$read_new=substr($read,$start,1500);
|
|
43
|
-
$start+=200;
|
|
44
|
-
print OUT "$tag_new\n$read_new\n";
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
close(OUT);
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
#PBS -k oe
|
|
3
|
-
|
|
4
|
-
# Some defaults for the parameters
|
|
5
|
-
INSLEN=${INSLEN:-300};
|
|
6
|
-
USECOUPLED=${USECOUPLED:-yes}
|
|
7
|
-
USESINGLE=${USESINGLE:-no}
|
|
8
|
-
CLEANUP=${CLEANUP:-yes}
|
|
9
|
-
|
|
10
|
-
# Check mandatory variables
|
|
11
|
-
if [[ "$LIB" == "" ]]; then
|
|
12
|
-
echo "Error: LIB is mandatory" >&2
|
|
13
|
-
exit 1;
|
|
14
|
-
fi
|
|
15
|
-
if [[ "$PDIR" == "" ]]; then
|
|
16
|
-
echo "Error: PDIR is mandatory" >&2
|
|
17
|
-
exit 1;
|
|
18
|
-
fi
|
|
19
|
-
if [[ "$DATA" == "" ]]; then
|
|
20
|
-
echo "Error: DATA is mandatory" >&2
|
|
21
|
-
exit 1;
|
|
22
|
-
fi
|
|
23
|
-
|
|
24
|
-
# Prepare input
|
|
25
|
-
module load perl/5.14.4
|
|
26
|
-
KMER=$PBS_ARRAYID
|
|
27
|
-
DIR="$LIB.soap_$KMER"
|
|
28
|
-
if [[ "$USECOUPLED" == "yes" ]]; then
|
|
29
|
-
MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.CoupledReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
|
|
30
|
-
elif [[ "$USESINGLE" == "yes" ]]; then
|
|
31
|
-
MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.SingleReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
|
|
32
|
-
else
|
|
33
|
-
echo "$0: Error: Nothing to do, neither USECOUPLED nor USESINGLE set to yes." >&2
|
|
34
|
-
exit 2
|
|
35
|
-
fi
|
|
36
|
-
NP=$(cat "$PBS_NODEFILE" | wc -l)
|
|
37
|
-
|
|
38
|
-
# Config
|
|
39
|
-
module load SOAP/denovo2/r240
|
|
40
|
-
echo config > $DIR.proc
|
|
41
|
-
echo "max_rd_len=$MAXRDLEN
|
|
42
|
-
[LIB]
|
|
43
|
-
reverse_seq=0
|
|
44
|
-
asm_flag=3
|
|
45
|
-
rank=1" > $DIR.config
|
|
46
|
-
if [[ "$USECOUPLED" == "yes" ]]; then
|
|
47
|
-
echo "avg_ins=$INSLEN
|
|
48
|
-
p=$DATA/$LIB.CoupledReads.fa" >> $DIR.config
|
|
49
|
-
fi
|
|
50
|
-
if [[ "$USESINGLE" == "yes" ]]; then
|
|
51
|
-
echo "f=$DATA/$LIB.SingleReads.fa" >> $DIR.config
|
|
52
|
-
fi
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
# Run
|
|
56
|
-
echo pre > $DIR.proc
|
|
57
|
-
if [[ -d $DIR ]] ; then rm -R $DIR ; fi
|
|
58
|
-
mkdir $DIR
|
|
59
|
-
echo soap > $DIR.proc
|
|
60
|
-
SOAPdenovo-63mer all -s $DIR.config -p $NP -K $KMER -o $DIR/O &> $DIR.log
|
|
61
|
-
if [[ -d $DIR ]] ; then
|
|
62
|
-
if [[ -s $DIR/O.contig ]] ; then
|
|
63
|
-
if [[ "$CLEANUP" != "no" ]] ; then
|
|
64
|
-
echo cleanup > $DIR.proc
|
|
65
|
-
rm $DIR/*edge
|
|
66
|
-
rm $DIR/*vertex
|
|
67
|
-
rm $DIR/*Arc*
|
|
68
|
-
rm $DIR/*Graph*
|
|
69
|
-
rm $DIR/*readInGap*
|
|
70
|
-
fi
|
|
71
|
-
echo done > $DIR.proc
|
|
72
|
-
else
|
|
73
|
-
echo "$0: Error: File $DIR/O.contig doesn't exist, something went wrong" >&2
|
|
74
|
-
exit 1
|
|
75
|
-
fi
|
|
76
|
-
else
|
|
77
|
-
echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
|
|
78
|
-
exit 1
|
|
79
|
-
fi
|
|
80
|
-
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
#PBS -q iw-shared-6
|
|
3
|
-
#PBS -l nodes=1:ppn=1
|
|
4
|
-
#PBS -l mem=1gb
|
|
5
|
-
#PBS -l walltime=3:00:00
|
|
6
|
-
#PBS -k oe
|
|
7
|
-
|
|
8
|
-
# Check mandatory variables
|
|
9
|
-
if [[ "$LIB" == "" ]]; then
|
|
10
|
-
echo "Error: LIB is mandatory" >&2
|
|
11
|
-
exit 1;
|
|
12
|
-
fi
|
|
13
|
-
if [[ "$PDIR" == "" ]]; then
|
|
14
|
-
echo "Error: PDIR is mandatory" >&2
|
|
15
|
-
exit 1;
|
|
16
|
-
fi
|
|
17
|
-
|
|
18
|
-
# Run
|
|
19
|
-
module load perl/5.14.4
|
|
20
|
-
echo "K N50 used reads " > $LIB.velvet.n50
|
|
21
|
-
echo "K N50 used reads " > $LIB.soap.n50
|
|
22
|
-
for ID in $(seq 10 31); do
|
|
23
|
-
let KMER=$ID*2+1
|
|
24
|
-
DIRV="$LIB.velvet_$KMER"
|
|
25
|
-
DIRS="$LIB.soap_$KMER"
|
|
26
|
-
echo $KMER > $LIB.velvet.n50.$KMER
|
|
27
|
-
echo $KMER > $LIB.soap.n50.$KMER
|
|
28
|
-
# N50 (>=500)
|
|
29
|
-
perl "$PDIR/FastA.N50.pl" "$DIRV/contigs.fa" 500 | grep '^N50' | sed -e 's/.*: //' >> $LIB.velvet.n50.$KMER
|
|
30
|
-
perl "$PDIR/FastA.N50.pl" "$DIRS/O.contig" 500 | grep '^N50' | sed -e 's/.*: //' >> $LIB.soap.n50.$KMER
|
|
31
|
-
# Used and Total reads
|
|
32
|
-
tail -n 1 $DIRV/Log | sed -e 's/.* using \\([0-9]*\\)\\/\\([0-9]*\\) reads.*/\\1\\n\\2/' >> $LIB.velvet.n50.$KMER
|
|
33
|
-
if [ -e "$DIRS/O.readOnContig" ] ; then
|
|
34
|
-
cat "$DIRS/O.readOnContig" | grep -vc '^read' >> $LIB.soap.n50.$KMER
|
|
35
|
-
elif [ -e "$DIRS/O.readOnContig.gz" ] ; then
|
|
36
|
-
zcat "$DIRS/O.readOnContig.gz" | grep -vc '^read' >> $LIB.soap.n50.$KMER
|
|
37
|
-
else
|
|
38
|
-
echo 0 >> $LIB.soap.n50.$KMER
|
|
39
|
-
fi
|
|
40
|
-
head -n 1 $DIRS/O.peGrads | awk '{print $3}' >> $LIB.soap.n50.$KMER
|
|
41
|
-
# Join
|
|
42
|
-
(cat $LIB.velvet.n50.$KMER | tr "\\n" " "; echo) >> $LIB.velvet.n50
|
|
43
|
-
rm $LIB.velvet.n50.$KMER
|
|
44
|
-
(cat $LIB.soap.n50.$KMER | tr "\\n" " "; echo) >> $LIB.soap.n50
|
|
45
|
-
rm $LIB.soap.n50.$KMER
|
|
46
|
-
done
|
|
47
|
-
|
|
48
|
-
# Create plot
|
|
49
|
-
module load R/3.1.2
|
|
50
|
-
echo "
|
|
51
|
-
source('$PDIR/kSelector.R');
|
|
52
|
-
pdf('$LIB.n50.pdf', 13, 7);
|
|
53
|
-
kSelector('$LIB.velvet.n50', '$LIB (Velvet)');
|
|
54
|
-
kSelector('$LIB.soap.n50', '$LIB (SOAP)');
|
|
55
|
-
dev.off();
|
|
56
|
-
" | R --vanilla -q
|
|
57
|
-
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
#PBS -l nodes=1:ppn=1
|
|
3
|
-
#PBS -k oe
|
|
4
|
-
|
|
5
|
-
# Some defaults for the parameters
|
|
6
|
-
FORMAT=${FORMAT:-fasta};
|
|
7
|
-
INSLEN=${INSLEN:-300};
|
|
8
|
-
USECOUPLED=${USECOUPLED:-yes};
|
|
9
|
-
USESINGLE=${USESINGLE:-no};
|
|
10
|
-
CLEANUP=${CLEANUP:-yes}
|
|
11
|
-
|
|
12
|
-
# Check mandatory variables
|
|
13
|
-
if [[ "$LIB" == "" ]]; then
|
|
14
|
-
echo "Error: LIB is mandatory" >&2
|
|
15
|
-
exit 1;
|
|
16
|
-
fi
|
|
17
|
-
if [[ "$PDIR" == "" ]]; then
|
|
18
|
-
echo "Error: PDIR is mandatory" >&2
|
|
19
|
-
exit 1;
|
|
20
|
-
fi
|
|
21
|
-
if [[ "$DATA" == "" ]]; then
|
|
22
|
-
echo "Error: DATA is mandatory" >&2
|
|
23
|
-
exit 1;
|
|
24
|
-
fi
|
|
25
|
-
|
|
26
|
-
# Prepare input
|
|
27
|
-
KMER=$PBS_ARRAYID
|
|
28
|
-
CWD=$(pwd)
|
|
29
|
-
DIR="$CWD/$LIB.velvet_$KMER"
|
|
30
|
-
|
|
31
|
-
# Run
|
|
32
|
-
module load velvet/1.2.10
|
|
33
|
-
echo velveth > $DIR.proc
|
|
34
|
-
CMD="velveth_101_omp $DIR $KMER -$FORMAT"
|
|
35
|
-
if [[ "$USECOUPLED" == "yes" ]]; then
|
|
36
|
-
CMD="$CMD -shortPaired $DATA/$LIB.CoupledReads.fa"
|
|
37
|
-
fi
|
|
38
|
-
if [[ "$USESINGLE" == "yes" ]]; then
|
|
39
|
-
CMD="$CMD -short $DATA/$LIB.SingleReads.fa"
|
|
40
|
-
fi
|
|
41
|
-
if [[ "$VELVETH_EXTRA" != "" ]]; then
|
|
42
|
-
CMD="$CMD $VELVETH_EXTRA"
|
|
43
|
-
fi
|
|
44
|
-
$CMD &> $DIR.hlog
|
|
45
|
-
echo velvetg > $DIR.proc
|
|
46
|
-
velvetg_101_omp "$DIR" -exp_cov auto -cov_cutoff auto -ins_length "$INSLEN" $VELVETG_EXTRA &> $DIR.glog
|
|
47
|
-
if [[ -d $DIR ]] ; then
|
|
48
|
-
if [[ -s $DIR/contigs.fa ]] ; then
|
|
49
|
-
if [[ "$CLEANUP" != "no" ]] ; then
|
|
50
|
-
echo cleanup > $DIR.proc
|
|
51
|
-
rm $DIR/Sequences
|
|
52
|
-
rm $DIR/Roadmaps
|
|
53
|
-
rm $DIR/*Graph*
|
|
54
|
-
fi
|
|
55
|
-
echo done > $DIR.proc
|
|
56
|
-
else
|
|
57
|
-
echo "$0: Error: File $DIR/contigs.fa doesn't exist, something went wrong" >&2
|
|
58
|
-
exit 1
|
|
59
|
-
fi
|
|
60
|
-
else
|
|
61
|
-
echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
|
|
62
|
-
exit 1
|
|
63
|
-
fi
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# blast.pbs pipeline
|
|
2
|
-
# Step 01 : Initialize input files
|
|
3
|
-
|
|
4
|
-
# 00. Read configuration
|
|
5
|
-
cd $SCRATCH ;
|
|
6
|
-
TASK="dry" ;
|
|
7
|
-
source "$PDIR/RUNME.bash" ;
|
|
8
|
-
echo "$PBS_JOBID" > "$SCRATCH/success/01.00" ;
|
|
9
|
-
|
|
10
|
-
if [[ ! -e "$SCRATCH/success/01.01" ]] ; then
|
|
11
|
-
# 01. BEGIN
|
|
12
|
-
REGISTER_JOB "01" "01" "Custom BEGIN function" \
|
|
13
|
-
&& BEGIN \
|
|
14
|
-
|| exit 1 ;
|
|
15
|
-
touch "$SCRATCH/success/01.01" ;
|
|
16
|
-
fi
|
|
17
|
-
|
|
18
|
-
if [[ ! -e "$SCRATCH/success/01.02" ]] ; then
|
|
19
|
-
# 02. Split
|
|
20
|
-
[[ -d "$SCRATCH/tmp/split" ]] && rm -R "$SCRATCH/tmp/split" ;
|
|
21
|
-
REGISTER_JOB "01" "02" "Splitting query files" \
|
|
22
|
-
&& mkdir "$SCRATCH/tmp/split" \
|
|
23
|
-
&& perl "$PDIR/FastA.split.pl" "$INPUT" "$SCRATCH/tmp/split/$PROJ" "$MAX_JOBS" \
|
|
24
|
-
|| exit 1 ;
|
|
25
|
-
touch "$SCRATCH/success/01.02" ;
|
|
26
|
-
fi ;
|
|
27
|
-
|
|
28
|
-
if [[ ! -e "$SCRATCH/success/01.03" ]] ; then
|
|
29
|
-
# 03. Finalize
|
|
30
|
-
REGISTER_JOB "01" "03" "Finalizing input preparation" \
|
|
31
|
-
&& mv "$SCRATCH/tmp/split" "$SCRATCH/tmp/in" \
|
|
32
|
-
|| exit 1 ;
|
|
33
|
-
touch "$SCRATCH/success/01.03" ;
|
|
34
|
-
fi ;
|
|
35
|
-
|
|
36
|
-
[[ -d "$SCRATCH/tmp/out" ]] || ( mkdir "$SCRATCH/tmp/out" || exit 1 ) ;
|
|
37
|
-
JOB_DONE "01" ;
|
|
38
|
-
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# blast.pbs pipeline
|
|
2
|
-
# Step 02 : Run BLAST
|
|
3
|
-
|
|
4
|
-
# Read configuration
|
|
5
|
-
cd $SCRATCH ;
|
|
6
|
-
TASK="dry" ;
|
|
7
|
-
source "$PDIR/RUNME.bash" ;
|
|
8
|
-
|
|
9
|
-
# 00. Initial vars
|
|
10
|
-
ID_N=$PBS_ARRAYID
|
|
11
|
-
[[ "$ID_N" == "" ]] && exit 1 ;
|
|
12
|
-
[[ -e "$SCRATCH/success/02.$ID_N" ]] && exit 0 ;
|
|
13
|
-
IN="$SCRATCH/tmp/in/$PROJ.$ID_N.fa" ;
|
|
14
|
-
OUT="$SCRATCH/tmp/out/$PROJ.blast.$ID_N" ;
|
|
15
|
-
FINAL_OUT="$SCRATCH/results/$PROJ.$ID_N.blast" ;
|
|
16
|
-
if [[ -e "$SCRATCH/success/02.$ID_N.00" ]] ; then
|
|
17
|
-
pre_job=$(cat "$SCRATCH/success/02.$ID_N.00") ;
|
|
18
|
-
state=$(qstat -f "$pre_job" 2>/dev/null | grep job_state | sed -e 's/.*= //')
|
|
19
|
-
if [[ "$state" == "R" ]] ; then
|
|
20
|
-
echo "Warning: This task is already being executed by $pre_job. Aborting." >&2 ;
|
|
21
|
-
exit 0 ;
|
|
22
|
-
elif [[ "$state" == "" ]] ; then
|
|
23
|
-
echo "Warning: This task was initialized by $pre_job, but it's currently not running. Superseding." >&2 ;
|
|
24
|
-
fi ;
|
|
25
|
-
fi
|
|
26
|
-
echo "$PBS_JOBID" > "$SCRATCH/success/02.$ID_N.00" ;
|
|
27
|
-
|
|
28
|
-
# 01. Before BLAST
|
|
29
|
-
if [[ ! -e "$SCRATCH/success/02.$ID_N.01" ]] ; then
|
|
30
|
-
BEFORE_BLAST "$IN" "$OUT" || exit 1 ;
|
|
31
|
-
touch "$SCRATCH/success/02.$ID_N.01" ;
|
|
32
|
-
fi ;
|
|
33
|
-
|
|
34
|
-
# 02. Run BLAST
|
|
35
|
-
if [[ ! -e "$SCRATCH/success/02.$ID_N.02" ]] ; then
|
|
36
|
-
# Recover previous runs, if any
|
|
37
|
-
if [[ -s "$OUT" ]] ; then
|
|
38
|
-
perl "$PDIR/BlastTab.recover_job.pl" "$IN" "$OUT" \
|
|
39
|
-
|| exit 1 ;
|
|
40
|
-
fi ;
|
|
41
|
-
# Run BLAST
|
|
42
|
-
RUN_BLAST "$IN" "$OUT" \
|
|
43
|
-
&& mv "$OUT" "$OUT-z" \
|
|
44
|
-
|| exit 1 ;
|
|
45
|
-
touch "$SCRATCH/success/02.$ID_N.02" ;
|
|
46
|
-
fi ;
|
|
47
|
-
|
|
48
|
-
# 03. Collect BLAST parts
|
|
49
|
-
if [[ ! -e "$SCRATCH/success/02.$ID_N.03" ]] ; then
|
|
50
|
-
if [[ -e "$OUT" ]] ; then
|
|
51
|
-
echo "Warning: The file $OUT pre-exists, but the BLAST collection was incomplete." >&2 ;
|
|
52
|
-
echo " I'm assuming that it corresponds to the first part of the result, but you should check manually." >&2 ;
|
|
53
|
-
echo " The last lines are:" >&2 ;
|
|
54
|
-
tail -n 3 "$OUT" >&2 ;
|
|
55
|
-
else
|
|
56
|
-
touch "$OUT" || exit 1 ;
|
|
57
|
-
fi ;
|
|
58
|
-
for i in $(ls $OUT-*) ; do
|
|
59
|
-
cat "$i" >> "$OUT" ;
|
|
60
|
-
rm "$i" || exit 1 ;
|
|
61
|
-
done ;
|
|
62
|
-
mv "$OUT" "$FINAL_OUT"
|
|
63
|
-
touch "$SCRATCH/success/02.$ID_N.03" ;
|
|
64
|
-
fi ;
|
|
65
|
-
|
|
66
|
-
# 04. After BLAST
|
|
67
|
-
if [[ ! -e "$SCRATCH/success/02.$ID_N.04" ]] ; then
|
|
68
|
-
AFTER_BLAST "$IN" "$FINAL_OUT" || exit 1 ;
|
|
69
|
-
touch "$SCRATCH/success/02.$ID_N.04" ;
|
|
70
|
-
fi ;
|
|
71
|
-
|
|
72
|
-
touch "$SCRATCH/success/02.$ID_N" ;
|
|
73
|
-
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# blast.pbs pipeline
|
|
2
|
-
# Step 03 : Finalize
|
|
3
|
-
|
|
4
|
-
# Read configuration
|
|
5
|
-
cd $SCRATCH ;
|
|
6
|
-
TASK="dry" ;
|
|
7
|
-
source "$PDIR/RUNME.bash" ;
|
|
8
|
-
PREFIX="$SCRATCH/results/$PROJ" ;
|
|
9
|
-
OUT="$SCRATCH/$PROJ.blast" ;
|
|
10
|
-
echo "$PBS_JOBID" > "$SCRATCH/success/02.00" ;
|
|
11
|
-
|
|
12
|
-
# 01. END
|
|
13
|
-
if [[ ! -e "$SCRATCH/success/03.01" ]] ; then
|
|
14
|
-
REGISTER_JOB "03" "01" "Custom END function" \
|
|
15
|
-
&& END "$PREFIX" "$OUT" \
|
|
16
|
-
|| exit 1 ;
|
|
17
|
-
touch "$SCRATCH/success/03.01" ;
|
|
18
|
-
fi ;
|
|
19
|
-
|
|
20
|
-
JOB_DONE "03" ;
|
|
21
|
-
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/perl
|
|
2
|
-
|
|
3
|
-
use warnings;
|
|
4
|
-
use strict;
|
|
5
|
-
use File::Copy;
|
|
6
|
-
|
|
7
|
-
my($fasta, $blast) = @ARGV;
|
|
8
|
-
|
|
9
|
-
($fasta and $blast) or die "
|
|
10
|
-
.USAGE:
|
|
11
|
-
$0 query.fa blast.txt
|
|
12
|
-
|
|
13
|
-
query.fa Query sequences in FastA format.
|
|
14
|
-
blast.txt Incomplete BLAST output in tabular format.
|
|
15
|
-
|
|
16
|
-
";
|
|
17
|
-
|
|
18
|
-
print "Fixing $blast:\n";
|
|
19
|
-
my $blast_res;
|
|
20
|
-
for(my $i=0; 1; $i++){
|
|
21
|
-
$blast_res = "$blast-$i";
|
|
22
|
-
last unless -e $blast_res;
|
|
23
|
-
}
|
|
24
|
-
open BLAST, "<", $blast or die "Cannot read the file: $blast: $!\n";
|
|
25
|
-
open TMP, ">", "$blast-tmp" or die "Cannot create the file: $blast-tmp: $!\n";
|
|
26
|
-
my $last="";
|
|
27
|
-
my $last_id="";
|
|
28
|
-
my $before = "";
|
|
29
|
-
while(my $ln=<BLAST>){
|
|
30
|
-
chomp $ln;
|
|
31
|
-
last unless $ln =~ m/(.+?)\t/;
|
|
32
|
-
my $id = $1;
|
|
33
|
-
if($id eq $last_id){
|
|
34
|
-
$last.= $ln."\n";
|
|
35
|
-
}else{
|
|
36
|
-
print TMP $last if $last;
|
|
37
|
-
$before = $last_id;
|
|
38
|
-
$last = $ln."\n";
|
|
39
|
-
$last_id = $id;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
close BLAST;
|
|
43
|
-
close TMP;
|
|
44
|
-
|
|
45
|
-
move "$blast-tmp", $blast_res or die "Cannot move file $blast-tmp into $blast_res: $!\n";
|
|
46
|
-
unlink $blast or die "Cannot delete file: $blast: $!\n";
|
|
47
|
-
|
|
48
|
-
unless($before eq ""){
|
|
49
|
-
print "[$before] ";
|
|
50
|
-
$before = ">$before";
|
|
51
|
-
|
|
52
|
-
open FASTA, "<", $fasta or die "Cannot read file: $fasta: $!\n";
|
|
53
|
-
open TMP, ">", "$fasta-tmp" or die "Cannot create file: $fasta-tmp: $!\n";
|
|
54
|
-
my $print = 0;
|
|
55
|
-
my $at = 0;
|
|
56
|
-
my $i = 0;
|
|
57
|
-
while(my $ln=<FASTA>){
|
|
58
|
-
$i++;
|
|
59
|
-
$print = 1 if $at and $ln =~ /^>/;
|
|
60
|
-
print TMP $ln if $print;
|
|
61
|
-
$ln =~ s/\s+.*//;
|
|
62
|
-
chomp $ln;
|
|
63
|
-
$at = $i if $ln eq $before;
|
|
64
|
-
}
|
|
65
|
-
close TMP;
|
|
66
|
-
close FASTA;
|
|
67
|
-
printf 'recovered at %.2f%% (%d/%d).'."\n", 100*$at/$i, $at, $i if $i;
|
|
68
|
-
|
|
69
|
-
move $fasta, "$fasta.old" or die "Cannot move file $fasta into $fasta.old: $!\n";
|
|
70
|
-
move "$fasta-tmp", $fasta or die "Cannot move file $fasta-tmp into $fasta: $!\n";
|
|
71
|
-
}
|
|
72
|
-
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
##################### VARIABLES
|
|
4
|
-
# Queue and resources.
|
|
5
|
-
QUEUE="iw-shared-6" ;
|
|
6
|
-
MAX_JOBS=500 ; # Maximum number of concurrent jobs. Never exceed 1990.
|
|
7
|
-
PPN=2 ;
|
|
8
|
-
RAM="9gb" ;
|
|
9
|
-
|
|
10
|
-
# Paths
|
|
11
|
-
SCRATCH_DIR="$HOME/scratch/pipelines/blast" ; # Where the outputs and temporals will be created
|
|
12
|
-
INPUT="$HOME/data/my-large-file.fasta" ; # Input query file
|
|
13
|
-
DB="$HOME/data/db/nr" ; # Input database
|
|
14
|
-
PROGRAM="blastp" ;
|
|
15
|
-
|
|
16
|
-
# Pipeline
|
|
17
|
-
MAX_TRIALS=5 ; # Maximum number of automated attempts to re-start a job
|
|
18
|
-
|
|
19
|
-
##################### FUNCTIONS
|
|
20
|
-
## All the functions below can be edited to suit your particular job.
|
|
21
|
-
## No function can be empty, but you can use a "dummy" function (like true).
|
|
22
|
-
## All functions have access to any of the variables defined above.
|
|
23
|
-
##
|
|
24
|
-
## The functions are executed in the following order (from left to right):
|
|
25
|
-
##
|
|
26
|
-
## / -----> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST ---\
|
|
27
|
-
## / ··· ··· ··· \
|
|
28
|
-
## BEGIN --#--------> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST -----#---> END
|
|
29
|
-
## \ ··· ··· ··· /
|
|
30
|
-
## \ -----> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST ---/
|
|
31
|
-
##
|
|
32
|
-
|
|
33
|
-
# Function to execute ONLY ONCE at the begining
|
|
34
|
-
function BEGIN {
|
|
35
|
-
### Format the database (assuming proteins, check commands):
|
|
36
|
-
# module load ncbi_blast/2.2.25 || exit 1 ;
|
|
37
|
-
# makeblastdb -in $HOME/data/some-database.faa -title $DB -dbtype prot || exit 1 ;
|
|
38
|
-
# module unload ncbi_blast/2.2.25 || exit 1 ;
|
|
39
|
-
### Don't do anything:
|
|
40
|
-
true ;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
# Function to execute BEFORE running the BLAST, for each sub-task.
|
|
44
|
-
function BEFORE_BLAST {
|
|
45
|
-
local IN=$1 # Query file
|
|
46
|
-
local OUT=$2 # Blast file (to be created)
|
|
47
|
-
### Don't do anything:
|
|
48
|
-
true ;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
# Function that executes BLAST, for each sub-task
|
|
52
|
-
function RUN_BLAST {
|
|
53
|
-
local IN=$1 # Query file
|
|
54
|
-
local OUT=$2 # Blast file (to be created)
|
|
55
|
-
### Run BLAST+ with 13th and 14th columns (query length and subject length):
|
|
56
|
-
module load ncbi_blast/2.2.28_binary || exit 1 ;
|
|
57
|
-
$PROGRAM -query $IN -db $DB -out $OUT -num_threads $PPN \
|
|
58
|
-
-outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen" \
|
|
59
|
-
|| exit 1 ;
|
|
60
|
-
module unload ncbi_blast/2.2.28_binary || exit 1 ;
|
|
61
|
-
### Run BLAT (nucleotides)
|
|
62
|
-
# module load blat/rhel6 || exit 1 ;
|
|
63
|
-
# blat $DB $IN -out=blast8 $OUT || exit 1 ;
|
|
64
|
-
# module unload blat/rhel6 || exit 1 ;
|
|
65
|
-
### Run BLAT (proteins)
|
|
66
|
-
# module load blat/rhel6 || exit 1 ;
|
|
67
|
-
# blat $DB $IN -out=blast8 -prot $OUT || exit 1 ;
|
|
68
|
-
# module unload blat/rhel6 || exit 1 ;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
# Function to execute AFTER running the BLAST, for each sub-task
|
|
72
|
-
function AFTER_BLAST {
|
|
73
|
-
local IN=$1 # Query files
|
|
74
|
-
local OUT=$2 # Blast files
|
|
75
|
-
### Filter by best-match:
|
|
76
|
-
# sort $OUT | perl $PDIR/../../Scripts/BlastTab.best_hit_sorted.pl > $OUT.bm
|
|
77
|
-
### Filter by Bit-score 60:
|
|
78
|
-
# awk '$12>=60' $OUT > $OUT.bs60
|
|
79
|
-
### Filter by corrected identity 95 (only if it has the additional 13th column):
|
|
80
|
-
# awk '$3*$4/$13 >= 95' $OUT > $OUT.ci95
|
|
81
|
-
### Don't do anything:
|
|
82
|
-
true ;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
# Function to execute ONLY ONCE at the end, to concatenate the results
|
|
86
|
-
function END {
|
|
87
|
-
local PREFIX=$1 # Prefix of all Blast files
|
|
88
|
-
local OUT=$2 # Single Blast output (to be created).
|
|
89
|
-
### Simply concatenate files:
|
|
90
|
-
# cat $PREFIX.*.blast > $OUT
|
|
91
|
-
### Concatenate only the filtered files (if filtering in AFTER_BLAST):
|
|
92
|
-
# cat $PREFIX.*.blast.bs60 > $OUT
|
|
93
|
-
### Sort the BLAST by query (might require considerable RAM):
|
|
94
|
-
# sort -k 1 $PREFIX.*.blast > $OUT
|
|
95
|
-
### Don't do anyhthing:
|
|
96
|
-
true ;
|
|
97
|
-
}
|
|
98
|
-
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
../../Scripts/FastA.split.pl
|