RubyGems - miga-base - Versions diffs - 0.3.9.0 → 0.3.9.1 - Mend

miga-base 0.3.9.0 → 0.3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +4 -4
data/actions/add.rb +33 -33
data/actions/edit.rb +33 -0
data/actions/new.rb +17 -18
data/actions/next_step.rb +33 -0
data/actions/run.rb +15 -12
data/bin/miga +43 -37
data/lib/miga/daemon.rb +2 -2
data/lib/miga/project/result.rb +16 -1
data/lib/miga/version.rb +2 -2
data/scripts/aai_distances.bash +1 -3
data/scripts/ani_distances.bash +1 -3
data/scripts/assembly.bash +1 -3
data/scripts/cds.bash +1 -3
data/scripts/clade_finding.bash +1 -3
data/scripts/d.bash +13 -0
data/scripts/distances.bash +1 -3
data/scripts/essential_genes.bash +1 -3
data/scripts/haai_distances.bash +1 -3
data/scripts/miga.bash +12 -9
data/scripts/mytaxa.bash +1 -3
data/scripts/mytaxa_scan.bash +1 -3
data/scripts/ogs.bash +36 -33
data/scripts/p.bash +23 -0
data/scripts/project_stats.bash +1 -3
data/scripts/read_quality.bash +1 -3
data/scripts/ssu.bash +1 -3
data/scripts/stats.bash +1 -3
data/scripts/subclades.bash +1 -3
data/scripts/taxonomy.bash +1 -3
data/scripts/trimmed_fasta.bash +1 -3
data/scripts/trimmed_reads.bash +1 -3
data/test/daemon_test.rb +3 -3
data/utils/distance/runner.rb +1 -1
data/utils/enveomics/Docs/recplot2.md +13 -2
data/utils/enveomics/Examples/aai-matrix.bash +3 -3
data/utils/enveomics/Examples/ani-matrix.bash +3 -3
data/utils/enveomics/Makefile +2 -2
data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
data/utils/enveomics/Manifest/Tasks/other.json +49 -0
data/utils/enveomics/Manifest/categories.json +4 -0
data/utils/enveomics/Manifest/examples.json +1 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
data/utils/enveomics/Scripts/aai.rb +4 -3
data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
data/utils/enveomics/enveomics.R/R/utils.R +19 -1
data/utils/enveomics/enveomics.R/README.md +11 -0
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
data/utils/subclade/runner.rb +4 -0
metadata +14 -3

data/scripts/miga.bash CHANGED Viewed

@@ -2,7 +2,7 @@
 set -e
 #MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
 # shellcheck source=/dev/null
-source "$HOME/.miga_rc"
+. "$HOME/.miga_rc"
 export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
 SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
@@ -11,15 +11,18 @@ function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
 for i in $(miga plugins -P "$PROJECT") ; do
   # shellcheck source=/dev/null
-  source "$i/scripts-plugin.bash"
+  . "$i/scripts-plugin.bash"
 done
-[[ -n $DATASET ]] \
-      && miga add -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT" --update
+if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
+  echo -n "Date: " ; miga date
+  echo "MiGA: $MIGA"
+  echo "Task: $SCRIPT"
+  echo "Project: $PROJECT"
+  if [[ -n $DATASET ]] ; then
+    echo "Dataset: $DATASET"
+    miga edit -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT"
+  fi
+fi
 true
-#if [[ "$RUNTYPE" == "qsub" ]] ; then
-#elif [[ "$RUNTYPE" == "msub" ]] ; then
-#fi

data/scripts/mytaxa.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="mytaxa"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
 [[ -d "$DIR" ]] || mkdir -p "$DIR"
 cd "$DIR"

data/scripts/mytaxa_scan.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="mytaxa_scan"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
 [[ -d "$DIR" ]] || mkdir -p "$DIR"
 cd "$DIR"

data/scripts/ogs.bash CHANGED Viewed

@@ -2,49 +2,52 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
 set -e
 SCRIPT="ogs"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 cd "$PROJECT/data/10.clades/03.ogs"
 # Initialize
 miga date > "miga-project.start"
 DS=$(miga ls -P "$PROJECT" --ref --no-multi)
-MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
-[[ $MIN_ID == "?" ]] && MIN_ID=80
-if [[ ! -s miga-project.ogs ]] ; then
-  # Extract RBMs
-  if [[ ! -s miga-project.abc ]] ; then
-    [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
-    for i in $DS ; do
-      file="miga-project.tmp/$i.abc"
-      [[ -s "$file" ]] && continue
-      echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
-        | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
-        | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
-        > "$file.tmp"
-      mv "$file.tmp" "$file"
-    done
-    cat miga-project.tmp/*.abc > miga-project.abc
-  fi
-  rm -rf miga-project.tmp
-  # Estimate OGs and Clean RBMs
-  ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
-  if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
-    rm miga-project.abc
-  else
-    gzip -9 miga-project.abc
+if [[ -n $DS ]] ; then
+  MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
+  [[ $MIN_ID == "?" ]] && MIN_ID=80
+  if [[ ! -s miga-project.ogs ]] ; then
+    # Extract RBMs
+    if [[ ! -s miga-project.abc ]] ; then
+      [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
+      for i in $DS ; do
+        file="miga-project.tmp/$i.abc"
+        [[ -s "$file" ]] && continue
+        echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
+          | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
+          | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
+          > "$file.tmp"
+        mv "$file.tmp" "$file"
+      done
+      cat miga-project.tmp/*.abc > miga-project.abc
+    fi
+    rm -rf miga-project.tmp
+    # Estimate OGs and Clean RBMs
+    ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
+    if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
+      rm miga-project.abc
+    else
+      gzip -9 miga-project.abc
+    fi
   fi
-fi
-# Calculate Statistics
-ogs.stats.rb -o miga-project.ogs -j miga-project.stats
-ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
-Rscript "$MIGA/utils/core-pan-plot.R" \
-  miga-project.core-pan.tsv miga-project.core-pan.pdf
+  # Calculate Statistics
+  ogs.stats.rb -o miga-project.ogs -j miga-project.stats
+  ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
+  Rscript "$MIGA/utils/core-pan-plot.R" \
+    miga-project.core-pan.tsv miga-project.core-pan.pdf
+else
+  touch miga-project.empty
+fi
 # Finalize
 miga date > "miga-project.done"

data/scripts/p.bash ADDED Viewed

@@ -0,0 +1,23 @@
+#!/bin/bash
+# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
+set -e
+SCRIPT="p"
+# shellcheck source=scripts/miga.bash
+. "$MIGA/scripts/miga.bash" || exit 1
+while true ; do
+  res="$(miga next_step -P "$PROJECT")"
+  [[ "$res" == '?' ]] && break
+  miga run -P "$PROJECT" -r "$res" -t "$CORES"
+  if [[ "$res" == "$last_res" ]] ; then
+    let k=$k+1
+    if [[ $k -gt 10 ]] ; then
+      miga new --update -P "$PROJECT" \
+        -m "run_$res=false,warn=Too many failed attempts to run $res."
+    fi
+  else
+    k=0
+    last_res=$res
+  fi
+done

data/scripts/project_stats.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
 set -e
 SCRIPT="project_stats"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 DIR="$PROJECT/data/90.stats"
 [[ -d "$DIR" ]] || mkdir -p "$DIR"
 cd "$DIR"

data/scripts/read_quality.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="read_quality"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 cd "$PROJECT/data/03.read_quality"
 b=$DATASET

data/scripts/ssu.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="ssu"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
 [[ -d "$DIR" ]] || mkdir -p "$DIR"
 cd "$DIR"

data/scripts/stats.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="stats"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 DIR="$PROJECT/data/90.stats"
 [[ -d "$DIR" ]] || mkdir -p "$DIR"
 cd "$DIR"

data/scripts/subclades.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
 set -e
 SCRIPT="subclades"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 cd "$PROJECT/data/10.clades/02.ani"
 # Initialize

data/scripts/taxonomy.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="taxonomy"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 DIR="$PROJECT/data/09.distances/05.taxonomy"
 [[ -d "$DIR" ]] || mkdir -p "$DIR"
 cd "$DIR"

data/scripts/trimmed_fasta.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="trimmed_fasta"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 cd "$PROJECT/data/04.trimmed_fasta"
 b=$DATASET

data/scripts/trimmed_reads.bash CHANGED Viewed

@@ -2,10 +2,8 @@
 # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
 set -e
 SCRIPT="trimmed_reads"
-echo "MiGA: $MIGA"
-echo "Project: $PROJECT"
 # shellcheck source=scripts/miga.bash
-source "$MIGA/scripts/miga.bash" || exit 1
+. "$MIGA/scripts/miga.bash" || exit 1
 cd "$PROJECT/data/02.trimmed_reads"
 b=$DATASET

data/test/daemon_test.rb CHANGED Viewed

@@ -40,10 +40,10 @@ class DaemonTest < Test::Unit::TestCase
     out = capture_stdout do
       d.check_datasets
     end
-    assert(out.string =~ /Queueing #{ds.name}:trimmed_reads/)
+    assert(out.string =~ /Queueing #{ds.name}:d/)
     assert_equal(1, d.jobs_to_run.size)
-    assert_equal("project1:trimmed_reads:ds1", d.jobs_to_run.first[:cmd])
-    assert_equal(d.jobs_to_run.first, d.get_job(:trimmed_reads, ds))
+    assert_equal("project1:d:ds1", d.jobs_to_run.first[:cmd])
+    assert_equal(d.jobs_to_run.first, d.get_job(:d, ds))
   end
   def test_in_loop

data/utils/distance/runner.rb CHANGED Viewed

@@ -35,7 +35,7 @@ class MiGA::DistanceRunner
     elsif !opts[:run_taxonomy] and dataset.metadata[:db_project]
       ref_path = dataset.metadata[:db_project]
       if project.metadata[:db_proj_dir]
-        ref_path = File.expand_path(project.metadata[:db_proj_dir], ref_path)
+        ref_path = File.expand_path(ref_path, project.metadata[:db_proj_dir])
       end
       @ref_project = MiGA::Project.load(ref_path)
       raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?

data/utils/enveomics/Docs/recplot2.md CHANGED Viewed

@@ -117,11 +117,22 @@ library(enveomics.R)
 load('my-recplot.rdata')
 ```
-### Average and median sequencing depth
+### Centrality measures of sequencing depth
 ```R
 mean(enve.recplot2.seqdepth(rp)) # <- Average
 median(enve.recplot2.seqdepth(rp)) # <- Median
+enve.truncate(enve.recplot2.seqdepth(rp)) # <- 95% Central Truncated Mean
+enve.truncate(enve.recplot2.seqdepth(rp), 0.9) # <- 90% Central Truncated Mean
+```
+The functions above only use hits with identity above the cutoff for "in-group" (by default: 95%).
+In order to estimate the sequencing depth with a different identity cutoff, modify the cutoff first:
+```R
+rp98 <- enve.recplot2.changeCutoff(rp, 98) # <- Change to ≥98%
+mean(enve.recplot2.seqdepth(rp98)) # <- Average (for the new object)
+median(enve.recplot2.seqdepth(rp98)) # <- Median (for the new object)
 ```
 ### Average and median sequencing depth excluding zero-coverage windows
@@ -189,7 +200,7 @@ p <- plot(rp, use.peaks=peaks, layout=4) # <- Remove `layout=4` for the full plo
 dev.off()
 ```
-The key function here is `enve.recplo2.findPeaks`. This function has several parameters, depending on
+The key function here is `enve.recplot2.findPeaks`. This function has several parameters, depending on
 the method used. To see all supported methods, use `?enve.recplot2.findPeaks`. To see all the options
 of the default method (`'emauto'`) use `?enve.recplot2.findPeaks.emauto`.

data/utils/enveomics/Examples/aai-matrix.bash CHANGED Viewed

@@ -59,8 +59,8 @@ echo "select seq1, seq2, aai, sd, n, omega, (100.0*n/omega) from aai;" \
 echo "[03/03] Generating distance matrix"
 echo "
 source('$(dirname $0)/../enveomics.R/R/df2dist.R');
-a <- read.table('$OUT', sep='\\t', h=TRUE, as.is=T);
-aai.d <- enve.df2dist(a, default.d=$DEF_DIST, max.sim=100);
+a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
+aai.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
 write.table(as.matrix(aai.d), '$OUT.dist',
-  quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
+  quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
 " | R --vanilla >/dev/null

data/utils/enveomics/Examples/ani-matrix.bash CHANGED Viewed

@@ -59,8 +59,8 @@ echo "select seq1, seq2, ani, sd, n, omega, (100.0*n/omega) from ani;" \
 echo "[03/03] Generating distance matrix"
 echo "
 source('$(dirname $0)/../enveomics.R/R/df2dist.R');
-a <- read.table('$OUT', sep='\\t', h=TRUE, as.is=T);
-ani.d <- enve.df2dist(a, default.d=$DEF_DIST, max.sim=100);
+a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
+ani.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
 write.table(as.matrix(ani.d), '$OUT.dist',
-  quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
+  quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
 " | R --vanilla >/dev/null

data/utils/enveomics/Makefile CHANGED Viewed

@@ -7,7 +7,7 @@ include globals.mk
 TEST=Tests
 enveomics_r=enveomics.R
-enveomics_r_v=enveomics.R_1.3
+enveomics_r_v=enveomics.R_$(shell grep '^Version: ' enveomics.R/DESCRIPTION | perl -pe 's/.*: //')
 .PHONY: test install install-scripts install-r uninstall install-deps
 test: $(enveomics_r_v).tar.gz
@@ -41,7 +41,7 @@ uninstall:
 	-$(R) CMD REMOVE $(enveomics_r)
 $(enveomics_r_v).tar.gz: install-deps
-	-rm -r $(enveomics_r).tar.gz
+	rm -f $(enveomics_r_v).tar.gz
 	./build_enveomics_r.bash
 	$(R) CMD build $(enveomics_r)/
 	$(MAKE) install-r

data/utils/enveomics/Manifest/Tasks/blasttab.json CHANGED Viewed

@@ -99,6 +99,7 @@
         "files using <map.bls> as prefix with extensions .rec (for the",
         "recruitment plot) and .lim (for the limits of the different sequences",
         "in <seq.fa>)."],
+      "see_also": ["BlastTab.recplot2.R", "GFF.catsbj.pl"],
       "help_arg": "-h",
       "options": [
          {
@@ -119,8 +120,8 @@
            "opt": "-s",
            "name": "Subset",
            "description": ["The FastA provided is to be treated as a subset of",
-             "the subject. By default, it expects all the subjects to be",
-             "present in the BLAST."]
+             "the subject. By default, it expects all the BLAST subjects to be",
+             "present in the FastA."]
          },
          {
            "opt": "-q",
@@ -623,7 +624,8 @@
         { "r_package": "optparse" },
         { "r_package": "enveomics.R" }
       ],
-      "see_also": [ "RecPlot2.compareIdentities.R" ],
+      "see_also": ["BlastTab.catsbj.pl", "GFF.catsbj.pl",
+        "RecPlot2.compareIdentities.R"],
       "options": [
         {
           "opt": "--prefix",
@@ -637,7 +639,13 @@
           "opt": "--pos-breaks",
           "arg": "integer",
           "default": 1000,
-          "description": ["Breaks in the positions histogram."]
+          "description": "Breaks in the positions histogram."
+        },
+        {
+          "opt": "--pos-breaks-tsv",
+          "arg": "in_file",
+          "description": ["File with (absolute) coordinates of breaks in the",
+            "position histogram."]
         },
         {
           "opt": "--id-breaks",

data/utils/enveomics/Manifest/Tasks/fasta.json CHANGED Viewed

@@ -216,6 +216,50 @@
         }
       ]
     },
+    {
+      "task": "FastA.mask.rb",
+      "description": "Mask sequence region(s) in a FastA file.",
+      "help_arg": "--help",
+      "options": [
+        {
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input FastA file."
+        },
+        {
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output FastA file."
+        },
+        {
+          "opt": "--regions",
+          "arg": "string",
+          "mandatory": true,
+          "description": ["Regions to mask separated by commas.",
+            "Each region must be in the format \"sequence_id:from..to\"."]
+        },
+        {
+          "opt": "--symbol",
+          "arg": "string",
+          "default": "N",
+          "description": "Character used to mask the region(s)."
+        },
+        {
+          "opt": "--trim",
+          "description": ["Trim masked regions extending to the edge of a",
+            "sequence."]
+        },
+        {
+          "opt": "--wrap",
+          "arg": "integer",
+          "default": 70,
+          "description": ["Line length to wrap sequences. Use 0 to generate",
+            "1-line sequences."]
+        }
+      ]
+    },
     {
       "task": "FastA.qlen.pl",
       "description": ["Calculates the quartiles of the length in a set of",
@@ -298,6 +342,49 @@
         }
       ]
     },
+    {
+      "task": "FastA.sample.rb",
+      "description": ["Samples a random set of sequences from a multi-FastA",
+        "file."],
+      "help_arg": "--help",
+      "see_also": "FastA.subsample.pl",
+      "options": [
+        {
+          "name": "Input File",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input FastA file."
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output FastA file."
+        },
+        {
+          "opt": "--fraction",
+          "arg": "float",
+          "description": ["Fraction of sequences to sample [0-1].",
+            "Mandatory unless Number is provided."]
+        },
+        {
+          "opt": "--number",
+          "arg": "integer",
+          "description": ["Number of sequences to sample.",
+            "Mandatory unless -f is provided."]
+        },
+        {
+          "opt": "--replacement",
+          "description": "Sample with replacement."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
     {
       "task": "FastA.slider.pl",
       "description": "Slices sequences in fixed- or variable-length windows.",
@@ -432,6 +519,7 @@
       "task": "FastA.subsample.pl",
       "description": "Subsamples a set of sequences.",
       "help_arg": "-h",
+      "see_also": "FastA.sample.rb",
       "options": [
         {
           "name": "Fraction",
@@ -548,6 +636,53 @@
         }
       ]
     },
+    {
+      "task": "FastA.extract.rb",
+      "description": ["Extracts a list of sequences and/or coordinates from",
+        "multi-FastA files."],
+      "help_arg": "--help",
+      "options": [
+        {
+          "name": "Input file",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input FastA file."
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output FastA file."
+        },
+        {
+          "name": "Coordinates",
+          "opt": "--coords",
+          "arg": "string",
+          "description": ["Comma-delimited list of coordinates (mandatory",
+            "unless -C is passed).",
+            "The format of the coordinates is SEQ:FROM..TO or SEQ:FROM~LEN:",
+            "SEQ: Sequence ID, or * (asterisk) to extract range from all",
+              "sequences",
+            "FROM: Integer, position of the first base to include (can be",
+              "negative)",
+            "TO: Integer, last base to include (can be negative)",
+            "LEN: Length of the range to extract."]
+        },
+        {
+          "name": "Coordinates file",
+          "opt": "--coords-file",
+          "arg": "in_file",
+          "description": ["File containing the coordinates, one per line.",
+            "Each line must follow the format described for Coordinates."]
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
     {
       "task": "FastA.fragment.rb",
       "description": ["Simulates incomplete (fragmented) drafts from complete",

data/utils/enveomics/Manifest/Tasks/other.json CHANGED Viewed

@@ -743,6 +743,55 @@
           "description": "Verbously display warnings."
         }
       ]
+    },
+    {
+      "task": "GFF.catsbj.pl",
+      "description": ["Generates a list of coordinates from a GFF table",
+        "concatenating the subject sequences."],
+      "help_arg": "-h",
+      "see_also": ["BlastTab.recplot2.R", "BlastTab.catsbj.pl"],
+      "options": [
+        {
+          "name": "Lim file",
+          "opt": "-L",
+          "arg": "out_file",
+          "description": ["An output file with the absolute coordinates of the",
+            "concatenated contigs. This is identical to the .lim file",
+            "generated by BlastTab.catsbj.pl."]
+        },
+        {
+          "name": "Inter-feature gaps",
+          "opt": "-i",
+          "description": ["Preserve exact coordinates and include",
+            "inter-feature windows as separate bins. By default, the",
+            "coordinates are set in the midpoint between features when",
+            "non-contiguous."]
+        },
+        {
+          "name": "Subset",
+          "opt": "-s",
+          "description": ["The FastA provided is to be treated as a subset of",
+            "the subject. By default, it expects all the contigs to be present",
+            "in the BLAST."]
+        },
+        {
+          "name": "Quiet",
+          "opt": "-q",
+          "description": "Run quietly."
+        },
+        {
+          "name": "Subject sequences",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Subject sequences (contigs) in FastA format."
+        },
+        {
+          "name": "Features",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Features to map in GFF."
+        }
+      ]
     }
   ]
 }