miga-base 0.3.1.6 → 0.3.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/actions/ncbi_get.rb +57 -42
  3. data/lib/miga/result/base.rb +7 -0
  4. data/lib/miga/result/dates.rb +42 -0
  5. data/lib/miga/result.rb +4 -0
  6. data/lib/miga/version.rb +1 -1
  7. data/scripts/essential_genes.bash +5 -4
  8. data/utils/enveomics/Makefile +1 -1
  9. data/utils/enveomics/Manifest/Tasks/aasubs.json +75 -75
  10. data/utils/enveomics/Manifest/Tasks/blasttab.json +194 -185
  11. data/utils/enveomics/Manifest/Tasks/distances.json +130 -130
  12. data/utils/enveomics/Manifest/Tasks/fasta.json +51 -3
  13. data/utils/enveomics/Manifest/Tasks/fastq.json +161 -126
  14. data/utils/enveomics/Manifest/Tasks/graphics.json +111 -111
  15. data/utils/enveomics/Manifest/Tasks/mapping.json +30 -0
  16. data/utils/enveomics/Manifest/Tasks/ogs.json +308 -265
  17. data/utils/enveomics/Manifest/Tasks/other.json +451 -449
  18. data/utils/enveomics/Manifest/Tasks/remote.json +1 -1
  19. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +18 -10
  20. data/utils/enveomics/Manifest/Tasks/tables.json +250 -250
  21. data/utils/enveomics/Manifest/Tasks/trees.json +52 -52
  22. data/utils/enveomics/Manifest/Tasks/variants.json +4 -4
  23. data/utils/enveomics/Manifest/categories.json +12 -4
  24. data/utils/enveomics/Manifest/examples.json +1 -1
  25. data/utils/enveomics/Scripts/BedGraph.tad.rb +71 -0
  26. data/utils/enveomics/Scripts/BlastTab.recplot2.R +23 -22
  27. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  28. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  29. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +272 -258
  30. data/utils/enveomics/Scripts/aai.rb +13 -6
  31. data/utils/enveomics/Scripts/ani.rb +2 -2
  32. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  33. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +12 -14
  34. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +2 -2
  35. data/utils/enveomics/Scripts/rbm.rb +23 -14
  36. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
  37. data/utils/enveomics/enveomics.R/R/barplot.R +2 -2
  38. metadata +9 -2
@@ -273,10 +273,10 @@
273
273
  "description": ["Sister read number prefix in the name of the reads.",
274
274
  "Escape characters as dots (\\.), parenthesis (\\(, \\), \\[,",
275
275
  "\\]), other characters with special meaning in regular",
276
- "expressions (\\*, \\+, \\^, \\$, \\|). This prefix allows regular",
277
- "expressions (for example, use ':|\\.' to use any of colon or",
278
- "dot). Note that the prefix will not be included in the base name",
279
- "reported in the output."]
276
+ "expressions (\\*, \\+, \\^, \\$, \\|). This prefix allows regular",
277
+ "expressions (for example, use ':|\\.' to use any of colon or",
278
+ "dot). Note that the prefix will not be included in the base name",
279
+ "reported in the output."]
280
280
  },
281
281
  ">",
282
282
  {
@@ -295,7 +295,8 @@
295
295
  "task": "BlastTab.seqdepth.pl",
296
296
  "description": "Estimates the sequencing depth of subject sequences.",
297
297
  "help_arg": "",
298
- "see_also": ["BlastTab.seqdepth_ZIP.pl", "BlastTab.seqdepth_nomedian.pl"],
298
+ "see_also": ["BlastTab.seqdepth_ZIP.pl", "BlastTab.seqdepth_nomedian.pl",
299
+ "BedGraph.tad.rb"],
299
300
  "options": [
300
301
  "cat",
301
302
  {
@@ -306,7 +307,7 @@
306
307
  "(or contigs)."]
307
308
  },
308
309
  "|",
309
- { "arg": "task" },
310
+ { "arg": "task" },
310
311
  {
311
312
  "name": "genes_or_ctgs.fna",
312
313
  "arg": "in_file",
@@ -341,7 +342,8 @@
341
342
  "http://anisette.ucs.louisiana.edu/Academic/Sciences/MATH/stage/stat2012.pdf\n",
342
343
  "[2] Lindner et al, Bioinformatics, 2013."],
343
344
  "help_arg": "",
344
- "see_also": ["BlastTab.seqdepth.pl", "BlastTab.seqdepth_nomedian.pl"],
345
+ "see_also": ["BlastTab.seqdepth.pl", "BlastTab.seqdepth_nomedian.pl",
346
+ "BedGraph.tad.rb"],
345
347
  "options": [
346
348
  "cat",
347
349
  {
@@ -382,22 +384,22 @@
382
384
  "task": "BlastTab.seqdepth_nomedian.pl",
383
385
  "description": ["Estimates the sequencing depth of subject",
384
386
  "sequences. The values reported by this script may differ from those",
385
- "of BlastTab.seqdepth.pl, because this script uses the aligned length",
386
- "of the read while BlastTab.seqdepth.pl uses the aligned length of the",
387
- "subject sequence."],
387
+ "of BlastTab.seqdepth.pl, because this script uses the aligned length",
388
+ "of the read while BlastTab.seqdepth.pl uses the aligned length of the",
389
+ "subject sequence."],
388
390
  "help_arg": "",
389
391
  "see_also": ["BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
390
392
  "options": [
391
393
  "cat",
392
- {
393
- "arg": "in_file",
394
+ {
395
+ "arg": "in_file",
394
396
  "multiple_sep": " ",
395
397
  "mandatory": true,
396
398
  "description": ["One or more Tabular BLAST files of reads vs genes",
397
399
  "(or contigs)."]
398
- },
399
- "|",
400
- { "arg": "task" },
400
+ },
401
+ "|",
402
+ { "arg": "task" },
401
403
  {
402
404
  "name": "genes_or_ctgs.fna",
403
405
  "arg": "in_file",
@@ -412,7 +414,7 @@
412
414
  "mandatory": true,
413
415
  "description": ["A tab-delimited file with the following columns:",
414
416
  "(1) Subject ID. (2) Average sequencing depth. (3) Number of",
415
- "mapped reads. (4) Length of the subject sequence."]
417
+ "mapped reads. (4) Length of the subject sequence."]
416
418
  }
417
419
  ]
418
420
  },
@@ -424,23 +426,23 @@
424
426
  "see_also": "BlastTab.filter.pl",
425
427
  "options": [
426
428
  {
427
- "name": "blast.tab",
428
- "mandatory": true,
429
- "arg": "in_file",
430
- "description": "BLAST output to be filtered (tabular format)."
431
- },
432
- {
433
- "name": "sample.fa",
434
- "mandatory": true,
435
- "arg": "in_file",
436
- "description": "Sequences to use as query (FastA format)."
437
- },
438
- ">",
439
- {
440
- "arg": "out_file",
441
- "mandatory": true,
442
- "description": "The filtered BLAST output (tabular format)."
443
- }
429
+ "name": "blast.tab",
430
+ "mandatory": true,
431
+ "arg": "in_file",
432
+ "description": "BLAST output to be filtered (tabular format)."
433
+ },
434
+ {
435
+ "name": "sample.fa",
436
+ "mandatory": true,
437
+ "arg": "in_file",
438
+ "description": "Sequences to use as query (FastA format)."
439
+ },
440
+ ">",
441
+ {
442
+ "arg": "out_file",
443
+ "mandatory": true,
444
+ "description": "The filtered BLAST output (tabular format)."
445
+ }
444
446
  ]
445
447
  },
446
448
  {
@@ -517,51 +519,51 @@
517
519
  "help_arg": "",
518
520
  "options": [
519
521
  {
520
- "name": "tax_blast.txt",
521
- "mandatory": true,
522
- "arg": "in_file",
523
- "description": ["BLAST output, where subject IDs are NCBI Taxonomy",
524
- "IDs."]
525
- },
526
- {
527
- "name": "nodes.dmp",
528
- "mandatory": true,
529
- "arg": "in_file",
530
- "description": "Nodes file from NCBI Taxonomy.",
531
- "source_url": "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz"
532
- },
533
- {
534
- "name": "names.dmp",
535
- "mandatory": true,
536
- "arg": "in_file",
537
- "description": "Names file from NCBI Taxonomy.",
538
- "source_url": "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz"
539
- },
540
- {
541
- "name": "rank",
542
- "arg": "string",
543
- "mandatory": true,
544
- "default": "genus",
545
- "description": ["The rank to be reported. All the reported nodes",
546
- "will have the same rank. To see supported values, run:\n",
547
- "`cut -f 5 nodes.dmp | sort -u`."]
548
- },
549
- {
550
- "name": "Best-hit",
551
- "arg": "select",
552
- "values": ["yes", "no"],
553
- "default": "yes",
554
- "description": ["Should it take into account the best hit per query",
555
- "only? This is: should it filter by best-hit?"]
556
- },
557
- ">",
558
- {
559
- "name": "taxrank_list.txt",
560
- "arg": "out_file",
561
- "mandatory": true,
562
- "description": ["BLAST-like output, where subject IDs are Taxonomy",
563
- "names."]
564
- }
522
+ "name": "tax_blast.txt",
523
+ "mandatory": true,
524
+ "arg": "in_file",
525
+ "description": ["BLAST output, where subject IDs are NCBI Taxonomy",
526
+ "IDs."]
527
+ },
528
+ {
529
+ "name": "nodes.dmp",
530
+ "mandatory": true,
531
+ "arg": "in_file",
532
+ "description": "Nodes file from NCBI Taxonomy.",
533
+ "source_url": "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz"
534
+ },
535
+ {
536
+ "name": "names.dmp",
537
+ "mandatory": true,
538
+ "arg": "in_file",
539
+ "description": "Names file from NCBI Taxonomy.",
540
+ "source_url": "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz"
541
+ },
542
+ {
543
+ "name": "rank",
544
+ "arg": "string",
545
+ "mandatory": true,
546
+ "default": "genus",
547
+ "description": ["The rank to be reported. All the reported nodes",
548
+ "will have the same rank. To see supported values, run:\n",
549
+ "`cut -f 5 nodes.dmp | sort -u`."]
550
+ },
551
+ {
552
+ "name": "Best-hit",
553
+ "arg": "select",
554
+ "values": ["yes", "no"],
555
+ "default": "yes",
556
+ "description": ["Should it take into account the best hit per query",
557
+ "only? This is: should it filter by best-hit?"]
558
+ },
559
+ ">",
560
+ {
561
+ "name": "taxrank_list.txt",
562
+ "arg": "out_file",
563
+ "mandatory": true,
564
+ "description": ["BLAST-like output, where subject IDs are Taxonomy",
565
+ "names."]
566
+ }
565
567
  ]
566
568
  },
567
569
  {
@@ -577,39 +579,39 @@
577
579
  "mandatory": true,
578
580
  "description": ["Use 'sort' if your BLAST is not pre-sorted by the",
579
581
  "first column (or if you're not sure). Use 'cat' otherwise."]
580
- },
581
- {
582
- "arg": "in_file",
583
- "mandatory": true,
584
- "description": "Tabular BLAST file."
585
- },
586
- "|",
587
- { "arg": "task" },
588
- "--blast",
589
- "/dev/stdin",
590
- {
591
- "opt": "--top",
592
- "arg": "integer",
593
- "default": 5,
594
- "description": "Maximum number of hits to report for each query."
595
- },
596
- {
597
- "opt": "--sort-by",
598
- "arg": "select",
599
- "values": ["bitscore", "evalue", "identity", "length"],
600
- "default": "bitscore",
601
- "description": "Parameter used to detect the 'best' hits."
602
- },
603
- {
604
- "opt": "--quiet",
605
- "description": "Run quietly."
606
- },
607
- ">",
608
- {
609
- "arg": "out_file",
610
- "mandatory": true,
611
- "description": "Output (filtered) Tabular BLAST."
612
- }
582
+ },
583
+ {
584
+ "arg": "in_file",
585
+ "mandatory": true,
586
+ "description": "Tabular BLAST file."
587
+ },
588
+ "|",
589
+ { "arg": "task" },
590
+ "--blast",
591
+ "/dev/stdin",
592
+ {
593
+ "opt": "--top",
594
+ "arg": "integer",
595
+ "default": 5,
596
+ "description": "Maximum number of hits to report for each query."
597
+ },
598
+ {
599
+ "opt": "--sort-by",
600
+ "arg": "select",
601
+ "values": ["bitscore", "evalue", "identity", "length"],
602
+ "default": "bitscore",
603
+ "description": "Parameter used to detect the 'best' hits."
604
+ },
605
+ {
606
+ "opt": "--quiet",
607
+ "description": "Run quietly."
608
+ },
609
+ ">",
610
+ {
611
+ "arg": "out_file",
612
+ "mandatory": true,
613
+ "description": "Output (filtered) Tabular BLAST."
614
+ }
613
615
  ]
614
616
  },
615
617
  {
@@ -623,80 +625,87 @@
623
625
  ],
624
626
  "options": [
625
627
  {
626
- "opt": "--prefix",
627
- "arg": "in_file",
628
- "mandatory": true,
629
- "description": ["Path to the prefix of the BlastTab.catsbj.pl output",
630
- "files. At least the files .rec and .lim must exist with this",
631
- "prefix."]
632
- },
633
- {
634
- "opt": "--pos-breaks",
635
- "arg": "integer",
636
- "default": 1000,
637
- "description": ["Breaks in the positions histogram."]
638
- },
639
- {
640
- "opt": "--id-breaks",
641
- "arg": "integer",
642
- "default": 300,
643
- "description": ["Breaks in the identity histogram."]
644
- },
645
- {
646
- "opt": "--id-metric",
647
- "arg": "select",
648
- "values": ["identity", "corrected identity", "bit score"],
649
- "default": "identity",
650
- "description": ["Metric of identity to be used (Y-axis). Corrected",
651
- "identity is only supported if the original BLAST file included",
652
- "sequence lengths."]
653
- },
654
- {
655
- "opt": "--id-summary",
656
- "arg": "string",
657
- "default": "sum",
658
- "description": "Function summarizing the identity bins."
659
- },
660
- {
661
- "opt": "--id-cutoff",
662
- "arg": "float",
663
- "default": 95.0,
664
- "description": ["Cutoff of identity metric above which the hits are",
665
- "considered 'in-group'. The 95% identity corresponds to the",
666
- "expectation of ANI<95% within species."]
667
- },
668
- {
669
- "opt": "--threads",
670
- "arg": "integer",
671
- "default": 2,
672
- "description": "Number of threads to use."
673
- },
674
- {
675
- "opt": "--no-verbose",
676
- "description": "Indicates if the function should report the advance."
677
- },
678
- {
679
- "name": "R Object Output",
680
- "arg": "out_file",
681
- "mandatory": true,
682
- "description": ["Recplo2 object that can be re-plotted using",
683
- "R function plot."]
684
- },
685
- {
686
- "name": "Graphical Output",
687
- "arg": "out_file",
688
- "description": "Recruitment plot in PDF."
689
- },
690
- {
691
- "name": "Width",
692
- "arg": "float",
693
- "description": "Width of the plot in inches (7 by default)."
694
- },
695
- {
696
- "name": "Height",
697
- "arg": "float",
698
- "description": "Height of the plot in inches (7 by default)."
699
- }
628
+ "opt": "--prefix",
629
+ "arg": "in_file",
630
+ "mandatory": true,
631
+ "description": ["Path to the prefix of the BlastTab.catsbj.pl output",
632
+ "files. At least the files .rec and .lim must exist with this",
633
+ "prefix."]
634
+ },
635
+ {
636
+ "opt": "--pos-breaks",
637
+ "arg": "integer",
638
+ "default": 1000,
639
+ "description": ["Breaks in the positions histogram."]
640
+ },
641
+ {
642
+ "opt": "--id-breaks",
643
+ "arg": "integer",
644
+ "default": 300,
645
+ "description": ["Breaks in the identity histogram."]
646
+ },
647
+ {
648
+ "opt": "--id-metric",
649
+ "arg": "select",
650
+ "values": ["identity", "corrected identity", "bit score"],
651
+ "default": "identity",
652
+ "description": ["Metric of identity to be used (Y-axis). Corrected",
653
+ "identity is only supported if the original BLAST file included",
654
+ "sequence lengths."]
655
+ },
656
+ {
657
+ "opt": "--id-summary",
658
+ "arg": "string",
659
+ "default": "sum",
660
+ "description": "Function summarizing the identity bins."
661
+ },
662
+ {
663
+ "opt": "--id-cutoff",
664
+ "arg": "float",
665
+ "default": 95.0,
666
+ "description": ["Cutoff of identity metric above which the hits are",
667
+ "considered 'in-group'. The 95% identity corresponds to the",
668
+ "expectation of ANI<95% within species."]
669
+ },
670
+ {
671
+ "opt": "--threads",
672
+ "arg": "integer",
673
+ "default": 2,
674
+ "description": "Number of threads to use."
675
+ },
676
+ {
677
+ "opt": "--no-verbose",
678
+ "description": "Indicates if the function should report the advance."
679
+ },
680
+ {
681
+ "opt": "--peaks-col",
682
+ "arg": "string",
683
+ "default": "NA",
684
+ "description": ["Color of peaks, mandatory for peak-finding (e.g.,",
685
+ "darkred)."]
686
+ },
687
+ {
688
+ "name": "R Object Output",
689
+ "arg": "out_file",
690
+ "mandatory": true,
691
+ "description": ["Recplo2 object that can be re-plotted using",
692
+ "R function plot."]
693
+ },
694
+ {
695
+ "name": "Graphical Output",
696
+ "arg": "out_file",
697
+ "description": "Recruitment plot in PDF."
698
+ },
699
+ {
700
+ "name": "Width",
701
+ "arg": "float",
702
+ "description": "Width of the plot in inches (7 by default)."
703
+ },
704
+ {
705
+ "name": "Height",
706
+ "arg": "float",
707
+ "description": "Height of the plot in inches (7 by default)."
708
+ }
700
709
  ]
701
710
  }
702
711
  ]