miga-base 0.3.9.0 → 0.3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/actions/add.rb +33 -33
  3. data/actions/edit.rb +33 -0
  4. data/actions/new.rb +17 -18
  5. data/actions/next_step.rb +33 -0
  6. data/actions/run.rb +15 -12
  7. data/bin/miga +43 -37
  8. data/lib/miga/daemon.rb +2 -2
  9. data/lib/miga/project/result.rb +16 -1
  10. data/lib/miga/version.rb +2 -2
  11. data/scripts/aai_distances.bash +1 -3
  12. data/scripts/ani_distances.bash +1 -3
  13. data/scripts/assembly.bash +1 -3
  14. data/scripts/cds.bash +1 -3
  15. data/scripts/clade_finding.bash +1 -3
  16. data/scripts/d.bash +13 -0
  17. data/scripts/distances.bash +1 -3
  18. data/scripts/essential_genes.bash +1 -3
  19. data/scripts/haai_distances.bash +1 -3
  20. data/scripts/miga.bash +12 -9
  21. data/scripts/mytaxa.bash +1 -3
  22. data/scripts/mytaxa_scan.bash +1 -3
  23. data/scripts/ogs.bash +36 -33
  24. data/scripts/p.bash +23 -0
  25. data/scripts/project_stats.bash +1 -3
  26. data/scripts/read_quality.bash +1 -3
  27. data/scripts/ssu.bash +1 -3
  28. data/scripts/stats.bash +1 -3
  29. data/scripts/subclades.bash +1 -3
  30. data/scripts/taxonomy.bash +1 -3
  31. data/scripts/trimmed_fasta.bash +1 -3
  32. data/scripts/trimmed_reads.bash +1 -3
  33. data/test/daemon_test.rb +3 -3
  34. data/utils/distance/runner.rb +1 -1
  35. data/utils/enveomics/Docs/recplot2.md +13 -2
  36. data/utils/enveomics/Examples/aai-matrix.bash +3 -3
  37. data/utils/enveomics/Examples/ani-matrix.bash +3 -3
  38. data/utils/enveomics/Makefile +2 -2
  39. data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
  40. data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
  41. data/utils/enveomics/Manifest/Tasks/other.json +49 -0
  42. data/utils/enveomics/Manifest/categories.json +4 -0
  43. data/utils/enveomics/Manifest/examples.json +1 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
  47. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
  48. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
  49. data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
  50. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  51. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  52. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  53. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  54. data/utils/enveomics/Scripts/aai.rb +4 -3
  55. data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
  56. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
  57. data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
  58. data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
  59. data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
  60. data/utils/enveomics/enveomics.R/R/utils.R +19 -1
  61. data/utils/enveomics/enveomics.R/README.md +11 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
  64. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
  65. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
  66. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
  68. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
  69. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
  70. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
  71. data/utils/subclade/runner.rb +4 -0
  72. metadata +14 -3
data/scripts/miga.bash CHANGED
@@ -2,7 +2,7 @@
2
2
  set -e
3
3
  #MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
4
4
  # shellcheck source=/dev/null
5
- source "$HOME/.miga_rc"
5
+ . "$HOME/.miga_rc"
6
6
  export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
7
7
  SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
8
8
 
@@ -11,15 +11,18 @@ function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
11
11
 
12
12
  for i in $(miga plugins -P "$PROJECT") ; do
13
13
  # shellcheck source=/dev/null
14
- source "$i/scripts-plugin.bash"
14
+ . "$i/scripts-plugin.bash"
15
15
  done
16
16
 
17
- [[ -n $DATASET ]] \
18
- && miga add -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT" --update
17
+ if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
18
+ echo -n "Date: " ; miga date
19
+ echo "MiGA: $MIGA"
20
+ echo "Task: $SCRIPT"
21
+ echo "Project: $PROJECT"
22
+ if [[ -n $DATASET ]] ; then
23
+ echo "Dataset: $DATASET"
24
+ miga edit -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT"
25
+ fi
26
+ fi
19
27
 
20
28
  true
21
-
22
- #if [[ "$RUNTYPE" == "qsub" ]] ; then
23
- #elif [[ "$RUNTYPE" == "msub" ]] ; then
24
- #fi
25
-
data/scripts/mytaxa.bash CHANGED
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa_scan"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
data/scripts/ogs.bash CHANGED
@@ -2,49 +2,52 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
4
  SCRIPT="ogs"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/10.clades/03.ogs"
10
8
 
11
9
  # Initialize
12
10
  miga date > "miga-project.start"
13
11
 
14
12
  DS=$(miga ls -P "$PROJECT" --ref --no-multi)
15
- MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
16
- [[ $MIN_ID == "?" ]] && MIN_ID=80
17
- if [[ ! -s miga-project.ogs ]] ; then
18
- # Extract RBMs
19
- if [[ ! -s miga-project.abc ]] ; then
20
- [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
21
- for i in $DS ; do
22
- file="miga-project.tmp/$i.abc"
23
- [[ -s "$file" ]] && continue
24
- echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
25
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
26
- | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
27
- > "$file.tmp"
28
- mv "$file.tmp" "$file"
29
- done
30
- cat miga-project.tmp/*.abc > miga-project.abc
31
- fi
32
- rm -rf miga-project.tmp
33
13
 
34
- # Estimate OGs and Clean RBMs
35
- ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
36
- if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
37
- rm miga-project.abc
38
- else
39
- gzip -9 miga-project.abc
14
+ if [[ -n $DS ]] ; then
15
+ MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
16
+ [[ $MIN_ID == "?" ]] && MIN_ID=80
17
+ if [[ ! -s miga-project.ogs ]] ; then
18
+ # Extract RBMs
19
+ if [[ ! -s miga-project.abc ]] ; then
20
+ [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
21
+ for i in $DS ; do
22
+ file="miga-project.tmp/$i.abc"
23
+ [[ -s "$file" ]] && continue
24
+ echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
25
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
26
+ | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
27
+ > "$file.tmp"
28
+ mv "$file.tmp" "$file"
29
+ done
30
+ cat miga-project.tmp/*.abc > miga-project.abc
31
+ fi
32
+ rm -rf miga-project.tmp
33
+
34
+ # Estimate OGs and Clean RBMs
35
+ ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
36
+ if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
37
+ rm miga-project.abc
38
+ else
39
+ gzip -9 miga-project.abc
40
+ fi
40
41
  fi
41
- fi
42
42
 
43
- # Calculate Statistics
44
- ogs.stats.rb -o miga-project.ogs -j miga-project.stats
45
- ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
46
- Rscript "$MIGA/utils/core-pan-plot.R" \
47
- miga-project.core-pan.tsv miga-project.core-pan.pdf
43
+ # Calculate Statistics
44
+ ogs.stats.rb -o miga-project.ogs -j miga-project.stats
45
+ ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
46
+ Rscript "$MIGA/utils/core-pan-plot.R" \
47
+ miga-project.core-pan.tsv miga-project.core-pan.pdf
48
+ else
49
+ touch miga-project.empty
50
+ fi
48
51
 
49
52
  # Finalize
50
53
  miga date > "miga-project.done"
data/scripts/p.bash ADDED
@@ -0,0 +1,23 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ set -e
4
+ SCRIPT="p"
5
+ # shellcheck source=scripts/miga.bash
6
+ . "$MIGA/scripts/miga.bash" || exit 1
7
+
8
+ while true ; do
9
+ res="$(miga next_step -P "$PROJECT")"
10
+ [[ "$res" == '?' ]] && break
11
+ miga run -P "$PROJECT" -r "$res" -t "$CORES"
12
+ if [[ "$res" == "$last_res" ]] ; then
13
+ let k=$k+1
14
+ if [[ $k -gt 10 ]] ; then
15
+ miga new --update -P "$PROJECT" \
16
+ -m "run_$res=false,warn=Too many failed attempts to run $res."
17
+ fi
18
+ else
19
+ k=0
20
+ last_res=$res
21
+ fi
22
+ done
23
+
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
4
  SCRIPT="project_stats"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/90.stats"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="read_quality"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/03.read_quality"
10
8
 
11
9
  b=$DATASET
data/scripts/ssu.bash CHANGED
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="ssu"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
data/scripts/stats.bash CHANGED
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="stats"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/90.stats"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
4
  SCRIPT="subclades"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/10.clades/02.ani"
10
8
 
11
9
  # Initialize
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="taxonomy"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/09.distances/05.taxonomy"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="trimmed_fasta"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/04.trimmed_fasta"
10
8
 
11
9
  b=$DATASET
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="trimmed_reads"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/02.trimmed_reads"
10
8
 
11
9
  b=$DATASET
data/test/daemon_test.rb CHANGED
@@ -40,10 +40,10 @@ class DaemonTest < Test::Unit::TestCase
40
40
  out = capture_stdout do
41
41
  d.check_datasets
42
42
  end
43
- assert(out.string =~ /Queueing #{ds.name}:trimmed_reads/)
43
+ assert(out.string =~ /Queueing #{ds.name}:d/)
44
44
  assert_equal(1, d.jobs_to_run.size)
45
- assert_equal("project1:trimmed_reads:ds1", d.jobs_to_run.first[:cmd])
46
- assert_equal(d.jobs_to_run.first, d.get_job(:trimmed_reads, ds))
45
+ assert_equal("project1:d:ds1", d.jobs_to_run.first[:cmd])
46
+ assert_equal(d.jobs_to_run.first, d.get_job(:d, ds))
47
47
  end
48
48
 
49
49
  def test_in_loop
@@ -35,7 +35,7 @@ class MiGA::DistanceRunner
35
35
  elsif !opts[:run_taxonomy] and dataset.metadata[:db_project]
36
36
  ref_path = dataset.metadata[:db_project]
37
37
  if project.metadata[:db_proj_dir]
38
- ref_path = File.expand_path(project.metadata[:db_proj_dir], ref_path)
38
+ ref_path = File.expand_path(ref_path, project.metadata[:db_proj_dir])
39
39
  end
40
40
  @ref_project = MiGA::Project.load(ref_path)
41
41
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
@@ -117,11 +117,22 @@ library(enveomics.R)
117
117
  load('my-recplot.rdata')
118
118
  ```
119
119
 
120
- ### Average and median sequencing depth
120
+ ### Centrality measures of sequencing depth
121
121
 
122
122
  ```R
123
123
  mean(enve.recplot2.seqdepth(rp)) # <- Average
124
124
  median(enve.recplot2.seqdepth(rp)) # <- Median
125
+ enve.truncate(enve.recplot2.seqdepth(rp)) # <- 95% Central Truncated Mean
126
+ enve.truncate(enve.recplot2.seqdepth(rp), 0.9) # <- 90% Central Truncated Mean
127
+ ```
128
+
129
+ The functions above only use hits with identity above the cutoff for "in-group" (by default: 95%).
130
+ In order to estimate the sequencing depth with a different identity cutoff, modify the cutoff first:
131
+
132
+ ```R
133
+ rp98 <- enve.recplot2.changeCutoff(rp, 98) # <- Change to ≥98%
134
+ mean(enve.recplot2.seqdepth(rp98)) # <- Average (for the new object)
135
+ median(enve.recplot2.seqdepth(rp98)) # <- Median (for the new object)
125
136
  ```
126
137
 
127
138
  ### Average and median sequencing depth excluding zero-coverage windows
@@ -189,7 +200,7 @@ p <- plot(rp, use.peaks=peaks, layout=4) # <- Remove `layout=4` for the full plo
189
200
  dev.off()
190
201
  ```
191
202
 
192
- The key function here is `enve.recplo2.findPeaks`. This function has several parameters, depending on
203
+ The key function here is `enve.recplot2.findPeaks`. This function has several parameters, depending on
193
204
  the method used. To see all supported methods, use `?enve.recplot2.findPeaks`. To see all the options
194
205
  of the default method (`'emauto'`) use `?enve.recplot2.findPeaks.emauto`.
195
206
 
@@ -59,8 +59,8 @@ echo "select seq1, seq2, aai, sd, n, omega, (100.0*n/omega) from aai;" \
59
59
  echo "[03/03] Generating distance matrix"
60
60
  echo "
61
61
  source('$(dirname $0)/../enveomics.R/R/df2dist.R');
62
- a <- read.table('$OUT', sep='\\t', h=TRUE, as.is=T);
63
- aai.d <- enve.df2dist(a, default.d=$DEF_DIST, max.sim=100);
62
+ a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
63
+ aai.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
64
64
  write.table(as.matrix(aai.d), '$OUT.dist',
65
- quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
65
+ quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
66
66
  " | R --vanilla >/dev/null
@@ -59,8 +59,8 @@ echo "select seq1, seq2, ani, sd, n, omega, (100.0*n/omega) from ani;" \
59
59
  echo "[03/03] Generating distance matrix"
60
60
  echo "
61
61
  source('$(dirname $0)/../enveomics.R/R/df2dist.R');
62
- a <- read.table('$OUT', sep='\\t', h=TRUE, as.is=T);
63
- ani.d <- enve.df2dist(a, default.d=$DEF_DIST, max.sim=100);
62
+ a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
63
+ ani.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
64
64
  write.table(as.matrix(ani.d), '$OUT.dist',
65
- quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
65
+ quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
66
66
  " | R --vanilla >/dev/null
@@ -7,7 +7,7 @@ include globals.mk
7
7
 
8
8
  TEST=Tests
9
9
  enveomics_r=enveomics.R
10
- enveomics_r_v=enveomics.R_1.3
10
+ enveomics_r_v=enveomics.R_$(shell grep '^Version: ' enveomics.R/DESCRIPTION | perl -pe 's/.*: //')
11
11
  .PHONY: test install install-scripts install-r uninstall install-deps
12
12
 
13
13
  test: $(enveomics_r_v).tar.gz
@@ -41,7 +41,7 @@ uninstall:
41
41
  -$(R) CMD REMOVE $(enveomics_r)
42
42
 
43
43
  $(enveomics_r_v).tar.gz: install-deps
44
- -rm -r $(enveomics_r).tar.gz
44
+ rm -f $(enveomics_r_v).tar.gz
45
45
  ./build_enveomics_r.bash
46
46
  $(R) CMD build $(enveomics_r)/
47
47
  $(MAKE) install-r
@@ -99,6 +99,7 @@
99
99
  "files using <map.bls> as prefix with extensions .rec (for the",
100
100
  "recruitment plot) and .lim (for the limits of the different sequences",
101
101
  "in <seq.fa>)."],
102
+ "see_also": ["BlastTab.recplot2.R", "GFF.catsbj.pl"],
102
103
  "help_arg": "-h",
103
104
  "options": [
104
105
  {
@@ -119,8 +120,8 @@
119
120
  "opt": "-s",
120
121
  "name": "Subset",
121
122
  "description": ["The FastA provided is to be treated as a subset of",
122
- "the subject. By default, it expects all the subjects to be",
123
- "present in the BLAST."]
123
+ "the subject. By default, it expects all the BLAST subjects to be",
124
+ "present in the FastA."]
124
125
  },
125
126
  {
126
127
  "opt": "-q",
@@ -623,7 +624,8 @@
623
624
  { "r_package": "optparse" },
624
625
  { "r_package": "enveomics.R" }
625
626
  ],
626
- "see_also": [ "RecPlot2.compareIdentities.R" ],
627
+ "see_also": ["BlastTab.catsbj.pl", "GFF.catsbj.pl",
628
+ "RecPlot2.compareIdentities.R"],
627
629
  "options": [
628
630
  {
629
631
  "opt": "--prefix",
@@ -637,7 +639,13 @@
637
639
  "opt": "--pos-breaks",
638
640
  "arg": "integer",
639
641
  "default": 1000,
640
- "description": ["Breaks in the positions histogram."]
642
+ "description": "Breaks in the positions histogram."
643
+ },
644
+ {
645
+ "opt": "--pos-breaks-tsv",
646
+ "arg": "in_file",
647
+ "description": ["File with (absolute) coordinates of breaks in the",
648
+ "position histogram."]
641
649
  },
642
650
  {
643
651
  "opt": "--id-breaks",
@@ -216,6 +216,50 @@
216
216
  }
217
217
  ]
218
218
  },
219
+ {
220
+ "task": "FastA.mask.rb",
221
+ "description": "Mask sequence region(s) in a FastA file.",
222
+ "help_arg": "--help",
223
+ "options": [
224
+ {
225
+ "opt": "--in",
226
+ "arg": "in_file",
227
+ "mandatory": true,
228
+ "description": "Input FastA file."
229
+ },
230
+ {
231
+ "opt": "--out",
232
+ "arg": "out_file",
233
+ "mandatory": true,
234
+ "description": "Output FastA file."
235
+ },
236
+ {
237
+ "opt": "--regions",
238
+ "arg": "string",
239
+ "mandatory": true,
240
+ "description": ["Regions to mask separated by commas.",
241
+ "Each region must be in the format \"sequence_id:from..to\"."]
242
+ },
243
+ {
244
+ "opt": "--symbol",
245
+ "arg": "string",
246
+ "default": "N",
247
+ "description": "Character used to mask the region(s)."
248
+ },
249
+ {
250
+ "opt": "--trim",
251
+ "description": ["Trim masked regions extending to the edge of a",
252
+ "sequence."]
253
+ },
254
+ {
255
+ "opt": "--wrap",
256
+ "arg": "integer",
257
+ "default": 70,
258
+ "description": ["Line length to wrap sequences. Use 0 to generate",
259
+ "1-line sequences."]
260
+ }
261
+ ]
262
+ },
219
263
  {
220
264
  "task": "FastA.qlen.pl",
221
265
  "description": ["Calculates the quartiles of the length in a set of",
@@ -298,6 +342,49 @@
298
342
  }
299
343
  ]
300
344
  },
345
+ {
346
+ "task": "FastA.sample.rb",
347
+ "description": ["Samples a random set of sequences from a multi-FastA",
348
+ "file."],
349
+ "help_arg": "--help",
350
+ "see_also": "FastA.subsample.pl",
351
+ "options": [
352
+ {
353
+ "name": "Input File",
354
+ "opt": "--in",
355
+ "arg": "in_file",
356
+ "mandatory": true,
357
+ "description": "Input FastA file."
358
+ },
359
+ {
360
+ "name": "Output file",
361
+ "opt": "--out",
362
+ "arg": "out_file",
363
+ "mandatory": true,
364
+ "description": "Output FastA file."
365
+ },
366
+ {
367
+ "opt": "--fraction",
368
+ "arg": "float",
369
+ "description": ["Fraction of sequences to sample [0-1].",
370
+ "Mandatory unless Number is provided."]
371
+ },
372
+ {
373
+ "opt": "--number",
374
+ "arg": "integer",
375
+ "description": ["Number of sequences to sample.",
376
+ "Mandatory unless -f is provided."]
377
+ },
378
+ {
379
+ "opt": "--replacement",
380
+ "description": "Sample with replacement."
381
+ },
382
+ {
383
+ "opt": "--quiet",
384
+ "description": "Run quietly (no STDERR output)."
385
+ }
386
+ ]
387
+ },
301
388
  {
302
389
  "task": "FastA.slider.pl",
303
390
  "description": "Slices sequences in fixed- or variable-length windows.",
@@ -432,6 +519,7 @@
432
519
  "task": "FastA.subsample.pl",
433
520
  "description": "Subsamples a set of sequences.",
434
521
  "help_arg": "-h",
522
+ "see_also": "FastA.sample.rb",
435
523
  "options": [
436
524
  {
437
525
  "name": "Fraction",
@@ -548,6 +636,53 @@
548
636
  }
549
637
  ]
550
638
  },
639
+ {
640
+ "task": "FastA.extract.rb",
641
+ "description": ["Extracts a list of sequences and/or coordinates from",
642
+ "multi-FastA files."],
643
+ "help_arg": "--help",
644
+ "options": [
645
+ {
646
+ "name": "Input file",
647
+ "opt": "--in",
648
+ "arg": "in_file",
649
+ "mandatory": true,
650
+ "description": "Input FastA file."
651
+ },
652
+ {
653
+ "name": "Output file",
654
+ "opt": "--out",
655
+ "arg": "out_file",
656
+ "mandatory": true,
657
+ "description": "Output FastA file."
658
+ },
659
+ {
660
+ "name": "Coordinates",
661
+ "opt": "--coords",
662
+ "arg": "string",
663
+ "description": ["Comma-delimited list of coordinates (mandatory",
664
+ "unless -C is passed).",
665
+ "The format of the coordinates is SEQ:FROM..TO or SEQ:FROM~LEN:",
666
+ "SEQ: Sequence ID, or * (asterisk) to extract range from all",
667
+ "sequences",
668
+ "FROM: Integer, position of the first base to include (can be",
669
+ "negative)",
670
+ "TO: Integer, last base to include (can be negative)",
671
+ "LEN: Length of the range to extract."]
672
+ },
673
+ {
674
+ "name": "Coordinates file",
675
+ "opt": "--coords-file",
676
+ "arg": "in_file",
677
+ "description": ["File containing the coordinates, one per line.",
678
+ "Each line must follow the format described for Coordinates."]
679
+ },
680
+ {
681
+ "opt": "--quiet",
682
+ "description": "Run quietly (no STDERR output)."
683
+ }
684
+ ]
685
+ },
551
686
  {
552
687
  "task": "FastA.fragment.rb",
553
688
  "description": ["Simulates incomplete (fragmented) drafts from complete",
@@ -743,6 +743,55 @@
743
743
  "description": "Verbously display warnings."
744
744
  }
745
745
  ]
746
+ },
747
+ {
748
+ "task": "GFF.catsbj.pl",
749
+ "description": ["Generates a list of coordinates from a GFF table",
750
+ "concatenating the subject sequences."],
751
+ "help_arg": "-h",
752
+ "see_also": ["BlastTab.recplot2.R", "BlastTab.catsbj.pl"],
753
+ "options": [
754
+ {
755
+ "name": "Lim file",
756
+ "opt": "-L",
757
+ "arg": "out_file",
758
+ "description": ["An output file with the absolute coordinates of the",
759
+ "concatenated contigs. This is identical to the .lim file",
760
+ "generated by BlastTab.catsbj.pl."]
761
+ },
762
+ {
763
+ "name": "Inter-feature gaps",
764
+ "opt": "-i",
765
+ "description": ["Preserve exact coordinates and include",
766
+ "inter-feature windows as separate bins. By default, the",
767
+ "coordinates are set in the midpoint between features when",
768
+ "non-contiguous."]
769
+ },
770
+ {
771
+ "name": "Subset",
772
+ "opt": "-s",
773
+ "description": ["The FastA provided is to be treated as a subset of",
774
+ "the subject. By default, it expects all the contigs to be present",
775
+ "in the BLAST."]
776
+ },
777
+ {
778
+ "name": "Quiet",
779
+ "opt": "-q",
780
+ "description": "Run quietly."
781
+ },
782
+ {
783
+ "name": "Subject sequences",
784
+ "arg": "in_file",
785
+ "mandatory": true,
786
+ "description": "Subject sequences (contigs) in FastA format."
787
+ },
788
+ {
789
+ "name": "Features",
790
+ "arg": "in_file",
791
+ "mandatory": true,
792
+ "description": "Features to map in GFF."
793
+ }
794
+ ]
746
795
  }
747
796
  ]
748
797
  }