RubyGems - miga-base - Versions diffs - 0.3.1.6 → 0.3.1.7 - Mend

miga-base 0.3.1.6 → 0.3.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/actions/ncbi_get.rb +57 -42
data/lib/miga/result/base.rb +7 -0
data/lib/miga/result/dates.rb +42 -0
data/lib/miga/result.rb +4 -0
data/lib/miga/version.rb +1 -1
data/scripts/essential_genes.bash +5 -4
data/utils/enveomics/Makefile +1 -1
data/utils/enveomics/Manifest/Tasks/aasubs.json +75 -75
data/utils/enveomics/Manifest/Tasks/blasttab.json +194 -185
data/utils/enveomics/Manifest/Tasks/distances.json +130 -130
data/utils/enveomics/Manifest/Tasks/fasta.json +51 -3
data/utils/enveomics/Manifest/Tasks/fastq.json +161 -126
data/utils/enveomics/Manifest/Tasks/graphics.json +111 -111
data/utils/enveomics/Manifest/Tasks/mapping.json +30 -0
data/utils/enveomics/Manifest/Tasks/ogs.json +308 -265
data/utils/enveomics/Manifest/Tasks/other.json +451 -449
data/utils/enveomics/Manifest/Tasks/remote.json +1 -1
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +18 -10
data/utils/enveomics/Manifest/Tasks/tables.json +250 -250
data/utils/enveomics/Manifest/Tasks/trees.json +52 -52
data/utils/enveomics/Manifest/Tasks/variants.json +4 -4
data/utils/enveomics/Manifest/categories.json +12 -4
data/utils/enveomics/Manifest/examples.json +1 -1
data/utils/enveomics/Scripts/BedGraph.tad.rb +71 -0
data/utils/enveomics/Scripts/BlastTab.recplot2.R +23 -22
data/utils/enveomics/Scripts/FastA.split.rb +79 -0
data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +272 -258
data/utils/enveomics/Scripts/aai.rb +13 -6
data/utils/enveomics/Scripts/ani.rb +2 -2
data/utils/enveomics/Scripts/clust.rand.rb +102 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +12 -14
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +2 -2
data/utils/enveomics/Scripts/rbm.rb +23 -14
data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
data/utils/enveomics/enveomics.R/R/barplot.R +2 -2
metadata +9 -2

data/utils/enveomics/Manifest/Tasks/ogs.json CHANGED Viewed

@@ -8,41 +8,41 @@
       "help_arg": "--help",
       "options": [
         {
-	  "name": "Input file",
-	  "opt": "--in",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": ["Input file containing the OGs (as generated by",
-	    "ogs.mcl.rb)."]
-	},
-	{
-	  "name": "Output file",
-	  "opt": "--out",
-	  "arg": "out_file",
-	  "mandatory": true,
-	  "description": "Output file containing the annotated OGs."
-	},
-	{
-	  "name": "Annotations",
-	  "opt": "-a",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "multiple_sep": ",",
-	  "description": ["Input file(s) containing the annotations. One or",
-	    "more tab-delimited files with the gene names in the first column",
-	    "and the annotation in the second."]
-	},
-	{
-	  "opt": "--format",
-	  "arg": "string",
-	  "default": "(\\S+)\\.txt",
-	  "description": ["Format of the filenames for the annotation files,",
-	    "using regex syntax."]
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+          "name": "Input file",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Input file containing the OGs (as generated by",
+            "ogs.mcl.rb)."]
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output file containing the annotated OGs."
+        },
+        {
+          "name": "Annotations",
+          "opt": "-a",
+          "arg": "in_file",
+          "mandatory": true,
+          "multiple_sep": ",",
+          "description": ["Input file(s) containing the annotations. One or",
+            "more tab-delimited files with the gene names in the first column",
+            "and the annotation in the second."]
+        },
+        {
+          "opt": "--format",
+          "arg": "string",
+          "default": "(\\S+)\\.txt",
+          "description": ["Format of the filenames for the annotation files,",
+            "using regex syntax."]
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     },
     {
@@ -52,48 +52,48 @@
       "help_arg": "--help",
       "requires": [
         {
-	  "ruby_gem": "json"
-	}
+          "ruby_gem": "json"
+        }
       ],
       "see_also": ["ogs.mcl.rb"],
       "options": [
         {
-	  "opt": "--ogs",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": "Input file containing the precomputed OGs."
-	},
-	{
-	  "opt": "--summary",
-	  "arg": "out_file",
-	  "description": ["Output file in tabular format with summary",
-	    "statistics."]
-	},
-	{
-	  "opt": "--tab",
-	  "arg": "out_file",
-	  "description": "Output file in tabular format."
-	},
-	{
-	  "opt": "--json",
-	  "arg": "out_file",
-	  "description": "Output file in JSON format."
-	},
-	{
-	  "opt": "--replicates",
-	  "arg": "integer",
-	  "description": "Number of replicates to estimate.",
-	  "default": 100
-	},
-	{
-	  "opt": "--threads",
-	  "arg": "integer",
-	  "description": "Children threads to spawn."
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+          "opt": "--ogs",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input file containing the precomputed OGs."
+        },
+        {
+          "opt": "--summary",
+          "arg": "out_file",
+          "description": ["Output file in tabular format with summary",
+            "statistics."]
+        },
+        {
+          "opt": "--tab",
+          "arg": "out_file",
+          "description": "Output file in tabular format."
+        },
+        {
+          "opt": "--json",
+          "arg": "out_file",
+          "description": "Output file in JSON format."
+        },
+        {
+          "opt": "--replicates",
+          "arg": "integer",
+          "description": "Number of replicates to estimate.",
+          "default": 100
+        },
+        {
+          "opt": "--threads",
+          "arg": "integer",
+          "description": "Children threads to spawn."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     },
     {
@@ -104,77 +104,77 @@
       "see_also": ["ogs.mcl.rb"],
       "options": [
         {
-	  "name": "Input file",
-	  "opt": "--in",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": ["Input file containing the OGs (as generated by",
-	    "ogs.mcl.rb)."]
-	},
-	{
-	  "name": "Output file",
-	  "opt": "--out",
-	  "arg": "out_file",
-	  "mandatory": true,
-	  "description": "Output directory where to place extracted sequences."
-	},
-	{
-	  "name": "Sequences",
-	  "opt": "--seqs",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": ["Path to the proteomes in FastA format, using '%s'",
-	    "to denote the genome. For example: /path/to/seqs/%s.faa."]
-	},
-	{
-	  "opt": "--core",
-	  "arg": "float",
-	  "description": ["Use only OGs present in at least this fraction of",
-	    "the genomes. To use only the strict core genome*, use --core 1."],
-	  "note": ["* To use only the unus genome (OGs with exactly one gene",
-	    "per genome), use: --core 1 --duplicates 1."]
-	},
-	{
-	  "opt": "--duplicates",
-	  "arg": "integer",
-	  "description": ["Use only OGs with less than this number of",
-	    "in-paralogs in a genome. To use only genes without in-paralogs*,",
-	    "use --duplicates 1."],
-	  "note": ["* To use only the unus genome (OGs with exactly one gene",
-	    "per genome), use: --core 1 --duplicates 1."]
-	},
-	{
-	  "opt": "--per-genome",
-	  "description": ["If set, the output is generated per genome. By",
-	    "default, the output is per OG."]
-	},
-	{
-	  "opt": "--prefix",
-	  "description": ["If set, each sequence is prefixed with the genome",
-	    "name (or OG number, if --per-genome) and a dash."]
-	},
-	{
-	  "opt": "--rand",
-	  "description": ["Get only one gene per genome per OG (random)",
-	    "regardless of in-paralogs. By default all genes are extracted."]
-	},
-	{
-	  "opt": "--first",
-	  "description": ["Get only one gene per genome per OG (first)",
-	    "regardless of in-paralogs. By default all genes are extracted.",
+          "name": "Input file",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Input file containing the OGs (as generated by",
+            "ogs.mcl.rb)."]
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output directory where to place extracted sequences."
+        },
+        {
+          "name": "Sequences",
+          "opt": "--seqs",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Path to the proteomes in FastA format, using '%s'",
+            "to denote the genome. For example: /path/to/seqs/%s.faa."]
+        },
+        {
+          "opt": "--core",
+          "arg": "float",
+          "description": ["Use only OGs present in at least this fraction of",
+            "the genomes. To use only the strict core genome*, use --core 1."],
+          "note": ["* To use only the unus genome (OGs with exactly one gene",
+            "per genome), use: --core 1 --duplicates 1."]
+        },
+        {
+          "opt": "--duplicates",
+          "arg": "integer",
+          "description": ["Use only OGs with less than this number of",
+            "in-paralogs in a genome. To use only genes without in-paralogs*,",
+            "use --duplicates 1."],
+          "note": ["* To use only the unus genome (OGs with exactly one gene",
+            "per genome), use: --core 1 --duplicates 1."]
+        },
+        {
+          "opt": "--per-genome",
+          "description": ["If set, the output is generated per genome. By",
+            "default, the output is per OG."]
+        },
+        {
+          "opt": "--prefix",
+          "description": ["If set, each sequence is prefixed with the genome",
+            "name (or OG number, if --per-genome) and a dash."]
+        },
+        {
+          "opt": "--rand",
+          "description": ["Get only one gene per genome per OG (random)",
+            "regardless of in-paralogs. By default all genes are extracted."]
+        },
+        {
+          "opt": "--first",
+          "description": ["Get only one gene per genome per OG (first)",
+            "regardless of in-paralogs. By default all genes are extracted.",
             "Takes precedence over --rand."]
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     },
     {
       "task": "ogs.mcl.rb",
       "description": ["Identifies Orthology Groups (OGs) in Reciprocal Best",
         "Matches (RBM) between all pairs in a collection of genomes, using the",
-	"Markov Cluster Algorithm."],
+        "Markov Cluster Algorithm."],
       "see_also": ["ogs.annotate.rb", "ogs.core-pan.rb", "ogs.extract.rb",
         "ogs.stats.rb"],
       "cite": [["Enright et al, 2002, NAR",
@@ -182,72 +182,72 @@
       "help_arg": "--help",
       "options": [
         {
-	  "opt": "--out",
-	  "arg": "out_file",
-	  "mandatory": true,
-	  "description": "Output file containing the detected OGs."
-	},
-	{
-	  "opt": "--dir",
-	  "arg": "in_dir",
-	  "description": "Directory containing the RBM files.",
-	  "note": "Mandatory, unless --abc is set to a non-empty file."
-	},
-	{
-	  "opt": "--format",
-	  "arg": "string",
-	  "description": ["Format of the filenames for the RBM files (within",
-	    "--dir), using regex syntax."],
-	  "default": "(\\S+)-(\\S+)\\.rbm"
-	},
-	{
-	  "opt": "--inflation",
-	  "arg": "float",
-	  "description": "Inflation parameter for MCL clustering.",
-	  "default": 1.5
-	},
-	{
-	  "opt": "--blind",
-	  "description": ["If set, computes clusters without taking bitscore",
-	    "into account."]
-	},
-	{
-	  "opt": "--evalue",
-	  "description": ["If set, uses the e-value to weight edges, instead",
-	    "of the default Bit-Score."]
-	},
-	{
-	  "opt": "--identity",
-	  "description": ["If set, uses the identity to weight edges, instead",
-	    "of the default Bit-Score."]
-	},
-	{
-	  "opt": "--best-match",
-	  "description": ["If set, it assumes best-matches instead reciprocal",
-	    "best matches."]
-	},
-	{
-	  "opt": "--mcl-bin",
-	  "arg": "in_dir",
-	  "description": ["Path to the directory containing the mcl binaries.",
-	    "By default, assumed to be in the PATH."]
-	},
-	{
-	  "name": "abc",
-	  "arg": "out_file",
-	  "opt": "--abc",
-	  "description": "Use this abc file instead of a temporal file."
-	},
-	{
-	  "opt": "--threads",
-	  "arg": "integer",
-	  "default": 2,
-	  "description": "Number of threads to use."
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output file containing the detected OGs."
+        },
+        {
+          "opt": "--dir",
+          "arg": "in_dir",
+          "description": "Directory containing the RBM files.",
+          "note": "Mandatory, unless --abc is set to a non-empty file."
+        },
+        {
+          "opt": "--format",
+          "arg": "string",
+          "description": ["Format of the filenames for the RBM files (within",
+            "--dir), using regex syntax."],
+          "default": "(\\S+)-(\\S+)\\.rbm"
+        },
+        {
+          "opt": "--inflation",
+          "arg": "float",
+          "description": "Inflation parameter for MCL clustering.",
+          "default": 1.5
+        },
+        {
+          "opt": "--blind",
+          "description": ["If set, computes clusters without taking bitscore",
+            "into account."]
+        },
+        {
+          "opt": "--evalue",
+          "description": ["If set, uses the e-value to weight edges, instead",
+            "of the default Bit-Score."]
+        },
+        {
+          "opt": "--identity",
+          "description": ["If set, uses the identity to weight edges, instead",
+            "of the default Bit-Score."]
+        },
+        {
+          "opt": "--best-match",
+          "description": ["If set, it assumes best-matches instead reciprocal",
+            "best matches."]
+        },
+        {
+          "opt": "--mcl-bin",
+          "arg": "in_dir",
+          "description": ["Path to the directory containing the mcl binaries.",
+            "By default, assumed to be in the PATH."]
+        },
+        {
+          "name": "abc",
+          "arg": "out_file",
+          "opt": "--abc",
+          "description": "Use this abc file instead of a temporal file."
+        },
+        {
+          "opt": "--threads",
+          "arg": "integer",
+          "default": 2,
+          "description": "Number of threads to use."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     },
     {
@@ -256,44 +256,44 @@
         "Matches (RBM) between all pairs in a collection of genomes."],
       "warn": ["This script suffers from chaining effect and is very",
         "sensitive to spurious connections, because it applies a greedy",
-	"clustering algorithm. For most practical purposes, the use of this",
-	"script is discouraged and `ogs.mcl.rb` should be preferred."],
+        "clustering algorithm. For most practical purposes, the use of this",
+        "script is discouraged and `ogs.mcl.rb` should be preferred."],
       "help_arg": "--help",
       "see_also": ["ogs.mcl.rb"],
       "options": [
         {
-	  "opt": "--out",
-	  "mandatory": true,
-	  "arg": "out_file",
-	  "description": "Output file containing the detected OGs."
-	},
-	{
-	  "opt": "--dir",
-	  "arg": "in_dir",
-	  "description": "Directory containing the RBM files.",
-	  "note": "Required unless --pre-ogs is passed."
-	},
-	{
-	  "opt": "--pre-ogs",
-	  "arg": "in_file",
-	  "multiple_sep": ",",
-	  "description": "Pre-computed OGs file(s), separated by commas."
-	},
-	{
-	  "opt": "--unchecked",
-	  "description": "Do not check internal redundancy in OGs."
-	},
-	{
-	  "opt": "--format",
-	  "arg": "string",
-	  "default": "(\\S+)-(\\S+)\\.rbm",
-	  "description": ["Format of the filenames for the RBM files (within",
-	    "-d), using regex syntax."]
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+          "opt": "--out",
+          "mandatory": true,
+          "arg": "out_file",
+          "description": "Output file containing the detected OGs."
+        },
+        {
+          "opt": "--dir",
+          "arg": "in_dir",
+          "description": "Directory containing the RBM files.",
+          "note": "Required unless --pre-ogs is passed."
+        },
+        {
+          "opt": "--pre-ogs",
+          "arg": "in_file",
+          "multiple_sep": ",",
+          "description": "Pre-computed OGs file(s), separated by commas."
+        },
+        {
+          "opt": "--unchecked",
+          "description": "Do not check internal redundancy in OGs."
+        },
+        {
+          "opt": "--format",
+          "arg": "string",
+          "default": "(\\S+)-(\\S+)\\.rbm",
+          "description": ["Format of the filenames for the RBM files (within",
+            "-d), using regex syntax."]
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     },
     {
@@ -305,34 +305,77 @@
       "requires": [ { "ruby_gem": "json" } ],
       "options": [
         {
-	  "opt": "--ogs",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": "Input file containing the precomputed OGs."
-	},
-	{
-	  "opt": "--json",
-	  "arg": "out_file",
-	  "description": "Output file in JSON format."
-	},
-	{
-	  "opt": "--tab",
-	  "arg": "out_file",
-	  "description": "Output file in tabular format."
-	},
-	{
-	  "opt": "--transposed-tab",
-	  "arg": "out_file",
-	  "description": "Output file in transposed tabular format."
-	},
-	{
-	  "opt": "--auto",
-	  "description": "Run completely quiertly (no STDERR or STDOUT)."
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+          "opt": "--ogs",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input file containing the precomputed OGs."
+        },
+        {
+          "opt": "--json",
+          "arg": "out_file",
+          "description": "Output file in JSON format."
+        },
+        {
+          "opt": "--tab",
+          "arg": "out_file",
+          "description": "Output file in tabular format."
+        },
+        {
+          "opt": "--transposed-tab",
+          "arg": "out_file",
+          "description": "Output file in transposed tabular format."
+        },
+        {
+          "opt": "--auto",
+          "description": "Run completely quiertly (no STDERR or STDOUT)."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "clust.rand.rb",
+      "description": ["Calculates the Rand Index and the Adjusted Rand Index",
+        "between two clusterings. The clustering format is a raw text file",
+        "with one cluster per line, each defined as comma-delimited members,",
+        "and a header line (ignored). Note that this is equivalent to the OGs",
+        "format for 1 genome."],
+      "see_also": ["ogs.mcl.rb"],
+      "help_arg": "--help",
+      "cite": [
+        ["Rand, 1971, J Am Stat Assoc",
+          "https://doi.org/10.2307%2F2284239"],
+        ["Hubert & Arabie, 1985, J Classif",
+          "https://doi.org/10.1007%2FBF01908075"]
+      ],
+      "options": [
+        {
+          "name": "Input file 1",
+          "opt": "--clust1",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "First input file."
+        },
+        {
+          "name": "Input file 2",
+          "opt": "--clust2",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Second input file."
+        },
+        {
+          "name": "Precision",
+          "opt": "--prec",
+          "arg": "integer",
+          "description": "Precision to report.",
+          "default": 6
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     }
   ]