miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,228 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # @author Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Mar-23-2015
5
- # @license Artistic License 2.0
6
-
7
- use warnings;
8
- use strict;
9
- use List::Util qw/sum max/;
10
- use Getopt::Std;
11
- use Math::Round qw/round/;
12
- our $VERSION = 1.1;
13
-
14
- warn <<WARN
15
-
16
-
17
- ┌──[ IMPORTANT ]─────────────────────────────────────────────────┐
18
- │ This script has been deprecated in favor of JPlace.to_iToL.rb. │
19
- │ Please use the new version, together with the RAxML EPA's file │
20
- │ RAxML_portableTree.*.jplace instead. │
21
- └────────────────────────────────────────────────────────────────┘
22
- WARN
23
- ;
24
- sub HELP_MESSAGE {
25
- die "
26
- Description:
27
- Reformats the node names (labels) of a RAxML_originalLabelledTree.<NAME> file
28
- (produced by RAxML's EPA, -f v), so it can be opened in most tree viewers (like
29
- iToL and FigTree). Also, it creates iToL-compatible files to draw pie-charts
30
- (based on the classification of short reads) in the nodes of the reference tree.
31
-
32
- Usage:
33
- $0 -n <NAME> [other options...]
34
-
35
- -n <str> * Name of the run used in RAxML.
36
- -t <str> Use this file as original labelled tree, instead of generating one
37
- based on the job name. By default, RAxML_originalLabelledTree.<NAME>
38
- in the -d directory. See [NOTE1].
39
- -d <str> Directory containing RAxML files. By default: current directory.
40
- -o <str> Output tree. By default, it takes the path to the input tree and
41
- appends .nwk to it.
42
- -l <str> File containing a list of internal nodes. The nodes in the list
43
- will be renamed, and the reads of all children nodes will be
44
- transferred to it. This can be useful if you want to display
45
- these nodes collapsed. The format of the file is raw text, with
46
- two columns separated by tabs or spaces, where the first column is
47
- the original name of the internal node (without the brackets) and
48
- the second is the name to be used. See [NOTE2].
49
- -a Append original label to the renamed nodes (only if -l is passed).
50
- -s <str> The names of the reads will be assumed to contain the sample name,
51
- separated by this string. For example, if the value is '_', and
52
- a read has the name 'hco_ABCDEF/1#ACTG', it will be assumed to be
53
- a read from the sample 'hco'. If not provided, all the reads are
54
- assumed to come from the same sample (called 'unknown').
55
- -m <str> Comma-delimited list of samples. If not provided, all found samples
56
- will be used (unsorted).
57
- -c <str> Comma-delimited list of colors (in RGB hexadecimal) to represent
58
- the different samples. If not provided (or if insufficient values
59
- are provided) random colors are generated.
60
- -N <str> Comma-delimited list of normalizing factors per dataset. Typically,
61
- the size of the datasets divided by a fixed value (e.g. size x 1,000,
62
- to express sizes as reads per thousand).
63
- -T Use the total number of assigned reads per sample (times a constant)
64
- as the normalizing factor. The constant used corresponds to the 100
65
- times the size of the largest factor. If passed, -N is ignored.
66
- -q Run quietly.
67
- -h/--help Displays this message and exits.
68
-
69
- * Mandatory
70
- [NOTE1] The tree provided by -t MUST be based on a tree produced by this script
71
- without the -l option.
72
- [NOTE2] The tree produced by RAxML-EPA is usually not correctly rooted, which
73
- makes the -l option useless. However, you can manually root the tree and provide
74
- the rooted tree in Newick format using the -t option. If you do this, make
75
- sure the program doesn't change/delete the names of the internal nodes. I know
76
- that iToL can do it correctly (if you export preserving the original IDs), while
77
- FigTree deletes the labels. I didn't try any other tool.
78
-
79
- ";
80
- }
81
-
82
- my %o;
83
- getopts('n:t:d:o:l:s:m:c:N:Tqh', \%o);
84
- $o{d} ||= '.';
85
- $o{n} or &HELP_MESSAGE;
86
- $o{h} and &HELP_MESSAGE;
87
- $o{c} = [split /,/, (defined $o{c}?$o{c}:"")];
88
- $o{N} = [split /,/, (defined $o{N}?$o{N}:"")];
89
-
90
- # Set files
91
- my $inTree = ($o{t} || $o{d}."/RAxML_originalLabelledTree.".$o{n});
92
- my $outTree = ($o{o} || $inTree.".nwk");
93
- my $inClass = $o{d}."/RAxML_classification.".$o{n};
94
- my $outClass = $inClass.".iToL";
95
- my $outColl = $outTree.".collapse.iToL";
96
-
97
- # Relocate tree node names
98
- print STDERR "o Reformatting tree.\n" unless $o{q};
99
- open INTREE, "<", $inTree or die "Cannot read file: $inTree: $!\n";
100
- my $tree = <INTREE>;
101
- $tree =~ s/:([\d\.]+)(\[.+?\])/$2:$1/g unless $o{t};
102
- close INTREE;
103
-
104
- # Read leaf nodes
105
- print STDERR "o Reading nodes.\n" unless $o{q};
106
- my %tags = ();
107
-
108
- my $t = $tree;
109
- while($t =~ m/([A-Za-z0-9_\|\.-]+\[([A-Za-z0-9_\|\.-]+)\])/){
110
- my $n = $1;
111
- my $ta = $2;
112
- $tags{$ta} = $n;
113
- $t = substr $t, (length($n) + index $t, $n);
114
- }
115
-
116
- # Label/collapse internal nodes
117
- if($o{l}){
118
- print STDERR "o Labeling/collapsing internal nodes.\n";
119
- open LIST, "<", $o{l} or die "Cannot read file: $o{l}: $!\n";
120
- open COLL, ">", $outColl or die "Cannot create file: $outColl: $!\n";
121
- while(<LIST>){
122
- chomp;
123
- next if /^#/ or /^\s*$/;
124
- # Label internal node
125
- my @l = split /\s+/;
126
- $l[0] =~ m/^\[(.+)\]$/ or die "Unable to parse internal node name: $l[0].\n";
127
- my $ori = $1;
128
- my $new = $l[1];
129
- if(exists $tags{$ori}){
130
- warn "Warning: Trying to label/collapse $ori as $new, already defined as $tags{$ori}.\n";
131
- next;
132
- }
133
- $new =~ s/[^A-Za-z0-9_\|\.\-]/_/g;
134
- $new.= "[$ori]" if $o{a};
135
- $tags{$ori} = $new;
136
- $tree =~ s/\[$ori\]/$new/;
137
- # Isolate node
138
- $t = substr $tree, 0, index($tree, $new);
139
- my $i=length($t)-2;
140
- for(my $c=1 ; $i and $c; $i--){
141
- my $char = substr $t, $i, 1;
142
- $c++ if $char eq ')';
143
- $c-- if $char eq '(';
144
- }
145
- $t = substr $t, $i;
146
- # Get children
147
- $t =~ s/:[\d\.]+|[\(\)]/,/g;
148
- $t =~ s/,+/,/g;
149
- my $chn=0;
150
- for my $child (split /,/, $t){
151
- next unless $child;
152
- $child =~ s/.*\[(.+?)\]/$1/;
153
- $tags{$child} = $new;
154
- $chn++;
155
- }
156
- print STDERR " Collapsing $new: $chn children.\n" unless $o{q};
157
- print COLL "$new\n";
158
- }
159
- close LIST;
160
- close COLL;
161
- }
162
-
163
- # Save tree
164
- open OUTTREE, ">", $outTree or die "Cannot create file: $outTree: $!\n";
165
- print OUTTREE $tree;
166
- close OUTTREE;
167
-
168
- # Count reads
169
- my %samples = ();
170
- my %nodes = ();
171
- print STDERR "o Counting reads.\n";
172
- my $s = defined $o{s} ? $o{s} : "";
173
- open INCLASS, "<", $inClass or die "Cannot read file: $inClass: $!\n";
174
- while(<INCLASS>){
175
- my @ln = split /\s+/;
176
- $ln[0] =~ s/$s.+$//; # Sample name
177
- ($samples{$ln[0]} ||= 0)++;
178
- $tags{$ln[1]} ||= "[".$ln[1]."]"; # Node name
179
- (($nodes{$tags{$ln[1]}} ||= {})->{$ln[0]} ||= 0)++;
180
- }
181
- close INCLASS;
182
-
183
-
184
- my $labs = 'LABELS';
185
- my $cols = 'COLORS';
186
- my @samples = $o{m} ? (split /,/, $o{m}) : (keys %samples);
187
- my @normfac = ();
188
- for my $sample (@samples){
189
- my $col = shift @{$o{c}};
190
- unless(defined $col and length($col)==6){
191
- $col = '';
192
- for (1 .. 3){
193
- my $v = int rand 16;
194
- $v = chr $v+55 if $v>9;
195
- $col.="$v$v";
196
- }
197
- }
198
- my $nf = shift @{$o{N}};
199
- $nf = 1 unless defined $nf and $nf>0;
200
- $labs.= ','.($sample || 'unknown');
201
- $cols.= ',#'.$col;
202
- push @normfac, $nf+0;
203
- }
204
-
205
- open OUTCLASS, ">", $outClass or die "Cannot create file: $outClass: $!\n";
206
- print OUTCLASS "$labs\n$cols\n";
207
- my $tiny=0;
208
- for my $node (keys %nodes){
209
- my $i=0;
210
- for my $s (@samples){
211
- $nodes{$node}->{$s} = ($nodes{$node}->{$s} || 0)/($o{T} ? ($samples{$s}||1)/(max(values %samples)*100) : ($normfac[$i++]||1));
212
- }
213
- my $r = round(sum(values %{$nodes{$node}}));
214
- print OUTCLASS "$node,R$r";
215
- for my $sample (@samples){
216
- print OUTCLASS ",".round($nodes{$node}->{$sample} || 0);
217
- }
218
- print OUTCLASS "\n";
219
- $tiny++ unless $r;
220
- }
221
- close OUTCLASS;
222
-
223
- unless($o{q}) {
224
- print "Total counts per dataset:\n";
225
- print " $_\t".($samples{$_}||0)."\n" for @samples;
226
- }
227
- warn "$tiny node assignments are too small to represent. Decrease the values of -N or use an alternative like -T." if $tiny;
228
-
@@ -1,32 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Jan-04-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- #= Load stuff
9
- args <- commandArgs(trailingOnly = F)
10
- enveomics_R <- file.path(dirname(
11
- sub("^--file=", "", args[grep("^--file=", args)])),
12
- "lib", "enveomics.R")
13
- library(methods)
14
- source(file.path(enveomics_R, "R", "cliopts.R"))
15
- source(file.path(enveomics_R, "R", "recplot2.R"))
16
-
17
- #= Generate interface
18
- opt <- enve.cliopts(enve.recplot2.compareIdentities,
19
- file.path(enveomics_R, "man", "enve.recplot2.compareIdentities.Rd"),
20
- positional_arguments=2,
21
- usage="usage: %prog [options] recplot-A.Rdata recplot-B.Rdata",
22
- number=c("pseudocounts", "max.deviation"), ignore=c("x", "y"),
23
- p_desc="Calculates the difference between identity distributions of two recruitment plots.")
24
-
25
- #= Run it!
26
- load(opt$args[1])
27
- opt$options[['x']] <- rp
28
- load(opt$args[2])
29
- opt$options[['y']] <- rp
30
- dist <- do.call("enve.recplot2.compareIdentities", opt$options)
31
- cat(dist, '\n')
32
-
@@ -1,48 +0,0 @@
1
- #!/bin/bash
2
-
3
- #
4
- # @author Luis M. Rodriguez-R
5
- # @update Oct-20-2015
6
- # @license artistic license 2.0
7
- #
8
-
9
- FTP="ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria"
10
- ORG=$1
11
- EXT=${2:-.*.gz}
12
- STT=${3:-Any}
13
- DIR=${4:-$ORG}
14
-
15
- if [[ "$ORG" == "" ]] ; then
16
- echo "
17
- Downloads a collection of sequences and/or annotations from NCBI's RefSeq.
18
-
19
- Usage:
20
- $0 <organism> [<extension>[ <level>[ <dir>]]]
21
-
22
- <organism> The organism to download (e.g., Streptococcus_pneumoniae).
23
- <extension> Extension to download. Common extensions include '.fna.gz'
24
- (genomic sequences), '.faa.gz' (protein sequences), and
25
- '.gff.gz' (annotations). By default: '.*.gz' (all data).
26
- <level> Use only genomes with this assembly level. Common levels are
27
- 'Complete Genome' and 'Contig'. By default, any assembly
28
- level is allowed ('Any').
29
- <dir> Directory where the files are to be downladed. By default,
30
- same as <organism>.
31
- " >&2
32
- exit
33
- fi
34
-
35
- [[ -d "$DIR" ]] || mkdir "$DIR"
36
-
37
- curl -s "$FTP/$ORG/assembly_summary.txt" -o "$DIR/assembly_summary.txt"
38
- for path in $(cat "$DIR/assembly_summary.txt" \
39
- | awk -F"\t" "\$12==\"$STT\" || \"$STT\"==\"Any\" {print \$20}" ) ; do
40
- dir="$DIR/$(basename "$path")"
41
- [[ -d "$dir" ]] || mkdir "$dir"
42
- for file in $(curl -s "$path/" | awk '{print $9}') ; do
43
- if [[ "$file" == *$EXT ]] ; then
44
- curl -s "$path/$file" -o "$dir/$file"
45
- fi
46
- done
47
- done
48
-
@@ -1,55 +0,0 @@
1
- #!/bin/bash
2
-
3
- DATA_LINK="https://www.ebi.ac.uk/ena/portal/api/filereport"
4
- DATA_OPS="result=read_run&fields=run_accession,fastq_ftp,fastq_md5"
5
- SRX=$1
6
- DIR=${2:-$SRX}
7
- VERSION=1.0
8
-
9
- if [[ "$SRX" == "" ]] ; then
10
- echo "
11
- [Enveomics Collection: $(basename "$0" .bash) $VERSION]
12
-
13
- Downloads the set of runs from a project, sample, or experiment in SRA.
14
-
15
- Usage:
16
- $(basename "$0") <SRA-ID>[ <dir>]
17
-
18
- <SRA-ID> ID of the SRA Project, Sample, or Experiment.
19
- <dir> Directory where the files are to be downladed. By default,
20
- same as <SRA-ID>.
21
- " >&2
22
- exit
23
- fi
24
-
25
- [[ -d "$DIR" ]] || mkdir "$DIR"
26
-
27
- function md5value {
28
- local file=$1
29
- o=$(md5 "$file" | perl -pe 's/.* //')
30
- [[ -n $o ]] || o=$(md5sum-lite "$file" | awk '{print $1}')
31
- [[ -n $o ]] || o=$(md5sum "$file" | awk '{print $1}')
32
- echo "$o"
33
- }
34
-
35
- curl -Ls "$DATA_LINK?$DATA_OPS&accession=$SRX" -o "$DIR/srr_list.txt"
36
- tail -n +2 "$DIR/srr_list.txt" | while read ln ; do
37
- srr=$(echo "$ln"|cut -f 1)
38
- ftp=$(echo "$ln"|cut -f 2)
39
- md5=$(echo "$ln"|cut -f 3)
40
- dir="$DIR/$srr"
41
- [[ -d "$dir" ]] || mkdir "$dir"
42
- echo "o $srr" >&2
43
- for uri in $(echo "$ftp" | tr ";" " ") ; do
44
- file="$dir/$(basename $uri)"
45
- curl "$uri" -o "$file"
46
- md5obs=$(md5value "$file" 2> /dev/null)
47
- if [[ "$md5" == "$md5obs"* ]] ; then
48
- md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
49
- else
50
- echo "Corrupt file: $file" >&2
51
- echo " MD5 mismatch: $md5obs not in $md5" >&2
52
- exit 1;
53
- fi
54
- done
55
- done
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Jan-05-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- #= Load stuff
9
- suppressPackageStartupMessages(library(enveomics.R))
10
- args <- commandArgs(trailingOnly = F)
11
- enveomics_R <- file.path(dirname(
12
- sub("^--file=", "", args[grep("^--file=", args)])),
13
- "lib", "enveomics.R")
14
-
15
- #= Generate interface
16
- opt <- enve.cliopts(plot.enve.TRIBStest,
17
- file.path(enveomics_R, "man", "plot.enve.TRIBStest.Rd"),
18
- positional_arguments=c(1,3),
19
- usage="usage: %prog [options] output.pdf [width height]",
20
- mandatory=c("x"),
21
- vectorize=c("xlim","ylim"),
22
- number=c("xlim","ylim"),
23
- defaults=c(type="overlap", xlim=NA, ylim=NA))
24
-
25
- #= Run it!
26
- a <- new.env()
27
- load(opt$options[['x']], a)
28
- opt$options[['x']] <- get(ls(envir=a),envir=a)
29
- summary(opt$options[['x']])
30
- if(is.na(opt$options[['xlim']][1])) opt$options[['xlim']] <- NULL
31
- if(is.na(opt$options[['ylim']][1])) opt$options[['ylim']] <- NULL
32
- args = as.list(opt$args)
33
- for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
34
- do.call("pdf", args)
35
- do.call("plot.enve.TRIBStest", opt$options)
36
- dev.off()
@@ -1,39 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Jan-05-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- #= Load stuff
9
- suppressPackageStartupMessages(library(enveomics.R))
10
- args <- commandArgs(trailingOnly = F)
11
- enveomics_R <- file.path(dirname(
12
- sub("^--file=", "", args[grep("^--file=", args)])),
13
- "lib", "enveomics.R")
14
-
15
- #= Generate interface
16
- opt <- suppressWarnings(enve.cliopts(enve.tribs,
17
- file.path(enveomics_R, "man", "enve.tribs.Rd"),
18
- positional_arguments=c(0,2),
19
- usage="usage: %prog [options] [output.Rdata [bins=50]]",
20
- mandatory=c("dist", "selection"),
21
- defaults=c(dimensions=0, selection=NULL),
22
- ignore=c("metaMDS.opts","points","pre.tribs","subsamples"),
23
- o_desc=list(dist="A tab-delimited matrix of distances.",
24
- selection="A list of names with the selection to evaluate."),
25
- p_desc=paste("",
26
- "Estimates the empirical difference between all the distances",
27
- "in a set of objects and a subset, together with its statistical",
28
- "significance.",sep="\n\t")))
29
-
30
- #= Run it!
31
- opt$options[['dist']] <- as.dist(read.table(opt$options[['dist']],
32
- header=TRUE, sep="\t", row.names=1))
33
- opt$options[['selection']] <- read.table(opt$options[['selection']],
34
- header=FALSE, sep="\t", as.is=TRUE)[,1]
35
- if(opt$options[['dimensions']]==0) opt$options[['dimensions']] <- NULL
36
- if(length(opt$args)>1) opt$options[['bins']] <- as.numeric(opt$args[2])
37
- t <- do.call("enve.tribs.test", opt$options)
38
- summary(t)
39
- if(length(opt$args)>0) save(t, file=opt$args[1])
@@ -1,31 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Dec-29-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- #= Load stuff
9
- args <- commandArgs(trailingOnly = F)
10
- enveomics_R <- file.path(dirname(
11
- sub("^--file=", "", args[grep("^--file=", args)])),
12
- "lib", "enveomics.R")
13
- source(file.path(enveomics_R, "R", "cliopts.R"))
14
- source(file.path(enveomics_R, "R", "utils.R"))
15
- source(file.path(enveomics_R, "R", "barplot.R"))
16
-
17
- #= Generate interface
18
- opt <- enve.cliopts(enve.barplot,
19
- file.path(enveomics_R, "man", "enve.barplot.Rd"),
20
- positional_arguments=c(1,3),
21
- usage="usage: %prog [options] output.pdf [width height]",
22
- mandatory=c("x"), vectorize=c("sizes","order","col"),
23
- number=c("sizes","order"),
24
- o_desc=list(x="A tab-delimited file containing header (first row) and row names (first column)."))
25
-
26
- #= Run it!
27
- args = as.list(opt$args)
28
- for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
29
- do.call("pdf", args)
30
- do.call("enve.barplot", opt$options)
31
- dev.off()
@@ -1,30 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Jan-04-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- #= Load stuff
9
- args <- commandArgs(trailingOnly = F)
10
- enveomics_R <- file.path(dirname(
11
- sub("^--file=", "", args[grep("^--file=", args)])),
12
- "lib", "enveomics.R")
13
- source(file.path(enveomics_R, "R", "cliopts.R"))
14
- source(file.path(enveomics_R, "R", "df2dist.R"))
15
-
16
- #= Generate interface
17
- opt <- enve.cliopts(enve.df2dist,
18
- file.path(enveomics_R, "man", "enve.df2dist.Rd"),
19
- positional_arguments=1,
20
- usage="usage: %prog [options] output.mat",
21
- mandatory=c("x"),
22
- number=c("default.d", "max.sim"),
23
- o_desc=list(x="A tab-delimited table with the distances."),
24
- p_desc="Transform a tab-delimited list of distances into a squared matrix.")
25
-
26
- #= Run it!
27
- opt$options[['x']] <- read.table(opt$options[['x']],
28
- header=TRUE, sep="\t", as.is=TRUE)
29
- dist <- do.call("enve.df2dist", opt$options)
30
- write.table(as.matrix(dist), opt$args[1], quote=FALSE, sep="\t", col.names=NA)
@@ -1,61 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Mar-23-2015
5
- # @license: artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- my %o;
13
- getopts('k:s:ihn', \%o);
14
- my($list, $table) = @ARGV;
15
-
16
- ($list and $table) or die "
17
- .Description:
18
- Extracts (and re-orders) a subset of rows from a raw table.
19
-
20
- .Usage: $0 [options] list.txt table.txt > subset.txt
21
-
22
- Options:
23
- -k <int> Column of the table to use as key to filter. By default, 1.
24
- -s <str> String to use as separation between rows. By default, tabulation.
25
- -i If set, reports the inverse of the list (i.e., reports only rows
26
- absent in the list). Implies -n.
27
- -h Keep first row of the table (header) untouched.
28
- -n No re-order. The output has the same order of the table. By
29
- default, it prints in the order of the list.
30
-
31
- list.txt List of IDs to extract.
32
- table.txt Table file containing the superset.
33
- subset.txt Table file to be created.
34
-
35
- ";
36
-
37
- $o{k} ||= 1;
38
- $o{s} ||= "\t";
39
- $o{n}=1 if $o{i};
40
- my $HEADER = "";
41
-
42
- my $tbl2 = $o{n} ? $list : $table;
43
- open TBL, "<", $tbl2 or die "Cannot read file: $tbl2: $!\n";
44
- $HEADER = <TBL> if $o{h} and not $o{n};
45
- my %tbl2 = map { my $l=$_; chomp $l; my @r=split $o{s}, $l; $r[ $o{n} ? 0 : $o{k}-1] => $l } <TBL>;
46
- close TBL;
47
-
48
- my $tbl1 = $o{n} ? $table : $list;
49
- open TBL, "<", $tbl1 or die "Cannot read file: $tbl1: $!\n";
50
- $HEADER = <TBL> if $o{h} and $o{n};
51
- print $HEADER;
52
- while(my $ln = <TBL>){
53
- chomp $ln;
54
- next unless $ln;
55
- my @ln = split $o{s}, $ln;
56
- my $good = exists $tbl2{ $ln[$o{n} ? $o{k}-1 : 0] };
57
- $good = not $good if $o{i};
58
- print "".($o{n} ? $ln : $tbl2{$ln[0]})."\n" if $good;
59
- }
60
- close TBL;
61
-
@@ -1,77 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Sep-20-2015
5
- # @license: artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- my %o;
13
- getopts('si:o:ne:h:H:r:', \%o);
14
- my @files = @ARGV;
15
-
16
- $#files>0 or die "
17
- .Description:
18
- Merges multiple (two-column) lists into one table.
19
-
20
- .Usage:
21
- $0 [options] files... > output.txt
22
-
23
- Options:
24
- -s Values are read as Strings. By default, values are read as numbers.
25
- -i <str> Input field-delimiter. By default: tabulation (\"\\t\").
26
- -o <str> Output field-delimiter. By default: tabulation (\"\\t\").
27
- -n No-header. By default, the header is determined by the file names.
28
- -e <str> Default string when no value is found. By default, the \"empty\" value
29
- is 0 if values are numeric (i.e., unless -s is set) or an empty string
30
- otherwise.
31
- -h <str> Header of the first column, containing the IDs. By default: \"Tag\".
32
- -H <str> Format of filenames capturing the column header in the first capturing
33
- parenthesis. Non-capturing paretheses can be defined as (?:...). By
34
- default: \"(?:.*/)?([^\\.]+)\", which captures the part of the basename
35
- of the file before the first dot (if any).
36
- -r <int> Number of leading rows to ignore in the input files. Zero by default.
37
-
38
- ";
39
- $o{i} ||= "\t";
40
- $o{o} ||= "\t";
41
- $o{e} ||= ($o{s} ? "" : 0);
42
- $o{h} ||= "Tag";
43
- $o{H} ||= "(?:.*/)?([^\\.]+)";
44
- $o{r} += 0;
45
-
46
- my $notes = {};
47
-
48
- print $o{h} unless $o{n};
49
- my $i = 0;
50
- for my $file (@files){
51
- unless($o{n}){
52
- $file =~ m/$o{H}/ or die "Filename '$file' doesn't match format '$o{H}'.";
53
- my $tag=$1;
54
- print $o{o}.$tag;
55
- }
56
- open IN, "<", $file or die "Cannot read file: $file: $!\n";
57
- while(<IN>){
58
- next if $. <= $o{r};
59
- chomp;
60
- my @l = split $o{i};
61
- $l[1]+=0 unless $o{s};
62
- $notes->{$l[0]} ||= [];
63
- $notes->{$l[0]}->[$i] = $l[1];
64
- }
65
- close IN;
66
- $i++;
67
- }
68
- print "\n" unless $o{n};
69
-
70
- for my $id (keys %$notes){
71
- print $id;
72
- for my $i (0 .. $#files){
73
- print $o{o}.(( defined $notes->{$id}->[$i] ? $notes->{$id}->[$i] : $o{e} ));
74
- }
75
- print "\n";
76
- }
77
-