miga-base 0.3.9.0 → 0.3.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/actions/add.rb +33 -33
  3. data/actions/edit.rb +33 -0
  4. data/actions/new.rb +17 -18
  5. data/actions/next_step.rb +33 -0
  6. data/actions/run.rb +15 -12
  7. data/bin/miga +43 -37
  8. data/lib/miga/daemon.rb +2 -2
  9. data/lib/miga/project/result.rb +16 -1
  10. data/lib/miga/version.rb +2 -2
  11. data/scripts/aai_distances.bash +1 -3
  12. data/scripts/ani_distances.bash +1 -3
  13. data/scripts/assembly.bash +1 -3
  14. data/scripts/cds.bash +1 -3
  15. data/scripts/clade_finding.bash +1 -3
  16. data/scripts/d.bash +13 -0
  17. data/scripts/distances.bash +1 -3
  18. data/scripts/essential_genes.bash +1 -3
  19. data/scripts/haai_distances.bash +1 -3
  20. data/scripts/miga.bash +12 -9
  21. data/scripts/mytaxa.bash +1 -3
  22. data/scripts/mytaxa_scan.bash +1 -3
  23. data/scripts/ogs.bash +36 -33
  24. data/scripts/p.bash +23 -0
  25. data/scripts/project_stats.bash +1 -3
  26. data/scripts/read_quality.bash +1 -3
  27. data/scripts/ssu.bash +1 -3
  28. data/scripts/stats.bash +1 -3
  29. data/scripts/subclades.bash +1 -3
  30. data/scripts/taxonomy.bash +1 -3
  31. data/scripts/trimmed_fasta.bash +1 -3
  32. data/scripts/trimmed_reads.bash +1 -3
  33. data/test/daemon_test.rb +3 -3
  34. data/utils/distance/runner.rb +1 -1
  35. data/utils/enveomics/Docs/recplot2.md +13 -2
  36. data/utils/enveomics/Examples/aai-matrix.bash +3 -3
  37. data/utils/enveomics/Examples/ani-matrix.bash +3 -3
  38. data/utils/enveomics/Makefile +2 -2
  39. data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
  40. data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
  41. data/utils/enveomics/Manifest/Tasks/other.json +49 -0
  42. data/utils/enveomics/Manifest/categories.json +4 -0
  43. data/utils/enveomics/Manifest/examples.json +1 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
  47. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
  48. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
  49. data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
  50. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  51. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  52. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  53. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  54. data/utils/enveomics/Scripts/aai.rb +4 -3
  55. data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
  56. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
  57. data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
  58. data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
  59. data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
  60. data/utils/enveomics/enveomics.R/R/utils.R +19 -1
  61. data/utils/enveomics/enveomics.R/README.md +11 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
  64. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
  65. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
  66. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
  68. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
  69. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
  70. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
  71. data/utils/subclade/runner.rb +4 -0
  72. metadata +14 -3
data/scripts/miga.bash CHANGED
@@ -2,7 +2,7 @@
2
2
  set -e
3
3
  #MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
4
4
  # shellcheck source=/dev/null
5
- source "$HOME/.miga_rc"
5
+ . "$HOME/.miga_rc"
6
6
  export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
7
7
  SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
8
8
 
@@ -11,15 +11,18 @@ function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
11
11
 
12
12
  for i in $(miga plugins -P "$PROJECT") ; do
13
13
  # shellcheck source=/dev/null
14
- source "$i/scripts-plugin.bash"
14
+ . "$i/scripts-plugin.bash"
15
15
  done
16
16
 
17
- [[ -n $DATASET ]] \
18
- && miga add -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT" --update
17
+ if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
18
+ echo -n "Date: " ; miga date
19
+ echo "MiGA: $MIGA"
20
+ echo "Task: $SCRIPT"
21
+ echo "Project: $PROJECT"
22
+ if [[ -n $DATASET ]] ; then
23
+ echo "Dataset: $DATASET"
24
+ miga edit -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT"
25
+ fi
26
+ fi
19
27
 
20
28
  true
21
-
22
- #if [[ "$RUNTYPE" == "qsub" ]] ; then
23
- #elif [[ "$RUNTYPE" == "msub" ]] ; then
24
- #fi
25
-
data/scripts/mytaxa.bash CHANGED
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa_scan"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
data/scripts/ogs.bash CHANGED
@@ -2,49 +2,52 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
4
  SCRIPT="ogs"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/10.clades/03.ogs"
10
8
 
11
9
  # Initialize
12
10
  miga date > "miga-project.start"
13
11
 
14
12
  DS=$(miga ls -P "$PROJECT" --ref --no-multi)
15
- MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
16
- [[ $MIN_ID == "?" ]] && MIN_ID=80
17
- if [[ ! -s miga-project.ogs ]] ; then
18
- # Extract RBMs
19
- if [[ ! -s miga-project.abc ]] ; then
20
- [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
21
- for i in $DS ; do
22
- file="miga-project.tmp/$i.abc"
23
- [[ -s "$file" ]] && continue
24
- echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
25
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
26
- | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
27
- > "$file.tmp"
28
- mv "$file.tmp" "$file"
29
- done
30
- cat miga-project.tmp/*.abc > miga-project.abc
31
- fi
32
- rm -rf miga-project.tmp
33
13
 
34
- # Estimate OGs and Clean RBMs
35
- ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
36
- if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
37
- rm miga-project.abc
38
- else
39
- gzip -9 miga-project.abc
14
+ if [[ -n $DS ]] ; then
15
+ MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
16
+ [[ $MIN_ID == "?" ]] && MIN_ID=80
17
+ if [[ ! -s miga-project.ogs ]] ; then
18
+ # Extract RBMs
19
+ if [[ ! -s miga-project.abc ]] ; then
20
+ [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
21
+ for i in $DS ; do
22
+ file="miga-project.tmp/$i.abc"
23
+ [[ -s "$file" ]] && continue
24
+ echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
25
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
26
+ | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
27
+ > "$file.tmp"
28
+ mv "$file.tmp" "$file"
29
+ done
30
+ cat miga-project.tmp/*.abc > miga-project.abc
31
+ fi
32
+ rm -rf miga-project.tmp
33
+
34
+ # Estimate OGs and Clean RBMs
35
+ ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
36
+ if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
37
+ rm miga-project.abc
38
+ else
39
+ gzip -9 miga-project.abc
40
+ fi
40
41
  fi
41
- fi
42
42
 
43
- # Calculate Statistics
44
- ogs.stats.rb -o miga-project.ogs -j miga-project.stats
45
- ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
46
- Rscript "$MIGA/utils/core-pan-plot.R" \
47
- miga-project.core-pan.tsv miga-project.core-pan.pdf
43
+ # Calculate Statistics
44
+ ogs.stats.rb -o miga-project.ogs -j miga-project.stats
45
+ ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
46
+ Rscript "$MIGA/utils/core-pan-plot.R" \
47
+ miga-project.core-pan.tsv miga-project.core-pan.pdf
48
+ else
49
+ touch miga-project.empty
50
+ fi
48
51
 
49
52
  # Finalize
50
53
  miga date > "miga-project.done"
data/scripts/p.bash ADDED
@@ -0,0 +1,23 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ set -e
4
+ SCRIPT="p"
5
+ # shellcheck source=scripts/miga.bash
6
+ . "$MIGA/scripts/miga.bash" || exit 1
7
+
8
+ while true ; do
9
+ res="$(miga next_step -P "$PROJECT")"
10
+ [[ "$res" == '?' ]] && break
11
+ miga run -P "$PROJECT" -r "$res" -t "$CORES"
12
+ if [[ "$res" == "$last_res" ]] ; then
13
+ let k=$k+1
14
+ if [[ $k -gt 10 ]] ; then
15
+ miga new --update -P "$PROJECT" \
16
+ -m "run_$res=false,warn=Too many failed attempts to run $res."
17
+ fi
18
+ else
19
+ k=0
20
+ last_res=$res
21
+ fi
22
+ done
23
+
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
4
  SCRIPT="project_stats"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/90.stats"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="read_quality"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/03.read_quality"
10
8
 
11
9
  b=$DATASET
data/scripts/ssu.bash CHANGED
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="ssu"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
data/scripts/stats.bash CHANGED
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="stats"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/90.stats"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
4
  SCRIPT="subclades"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/10.clades/02.ani"
10
8
 
11
9
  # Initialize
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="taxonomy"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  DIR="$PROJECT/data/09.distances/05.taxonomy"
10
8
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
11
9
  cd "$DIR"
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="trimmed_fasta"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/04.trimmed_fasta"
10
8
 
11
9
  b=$DATASET
@@ -2,10 +2,8 @@
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="trimmed_reads"
5
- echo "MiGA: $MIGA"
6
- echo "Project: $PROJECT"
7
5
  # shellcheck source=scripts/miga.bash
8
- source "$MIGA/scripts/miga.bash" || exit 1
6
+ . "$MIGA/scripts/miga.bash" || exit 1
9
7
  cd "$PROJECT/data/02.trimmed_reads"
10
8
 
11
9
  b=$DATASET
data/test/daemon_test.rb CHANGED
@@ -40,10 +40,10 @@ class DaemonTest < Test::Unit::TestCase
40
40
  out = capture_stdout do
41
41
  d.check_datasets
42
42
  end
43
- assert(out.string =~ /Queueing #{ds.name}:trimmed_reads/)
43
+ assert(out.string =~ /Queueing #{ds.name}:d/)
44
44
  assert_equal(1, d.jobs_to_run.size)
45
- assert_equal("project1:trimmed_reads:ds1", d.jobs_to_run.first[:cmd])
46
- assert_equal(d.jobs_to_run.first, d.get_job(:trimmed_reads, ds))
45
+ assert_equal("project1:d:ds1", d.jobs_to_run.first[:cmd])
46
+ assert_equal(d.jobs_to_run.first, d.get_job(:d, ds))
47
47
  end
48
48
 
49
49
  def test_in_loop
@@ -35,7 +35,7 @@ class MiGA::DistanceRunner
35
35
  elsif !opts[:run_taxonomy] and dataset.metadata[:db_project]
36
36
  ref_path = dataset.metadata[:db_project]
37
37
  if project.metadata[:db_proj_dir]
38
- ref_path = File.expand_path(project.metadata[:db_proj_dir], ref_path)
38
+ ref_path = File.expand_path(ref_path, project.metadata[:db_proj_dir])
39
39
  end
40
40
  @ref_project = MiGA::Project.load(ref_path)
41
41
  raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
@@ -117,11 +117,22 @@ library(enveomics.R)
117
117
  load('my-recplot.rdata')
118
118
  ```
119
119
 
120
- ### Average and median sequencing depth
120
+ ### Centrality measures of sequencing depth
121
121
 
122
122
  ```R
123
123
  mean(enve.recplot2.seqdepth(rp)) # <- Average
124
124
  median(enve.recplot2.seqdepth(rp)) # <- Median
125
+ enve.truncate(enve.recplot2.seqdepth(rp)) # <- 95% Central Truncated Mean
126
+ enve.truncate(enve.recplot2.seqdepth(rp), 0.9) # <- 90% Central Truncated Mean
127
+ ```
128
+
129
+ The functions above only use hits with identity above the cutoff for "in-group" (by default: 95%).
130
+ In order to estimate the sequencing depth with a different identity cutoff, modify the cutoff first:
131
+
132
+ ```R
133
+ rp98 <- enve.recplot2.changeCutoff(rp, 98) # <- Change to ≥98%
134
+ mean(enve.recplot2.seqdepth(rp98)) # <- Average (for the new object)
135
+ median(enve.recplot2.seqdepth(rp98)) # <- Median (for the new object)
125
136
  ```
126
137
 
127
138
  ### Average and median sequencing depth excluding zero-coverage windows
@@ -189,7 +200,7 @@ p <- plot(rp, use.peaks=peaks, layout=4) # <- Remove `layout=4` for the full plo
189
200
  dev.off()
190
201
  ```
191
202
 
192
- The key function here is `enve.recplo2.findPeaks`. This function has several parameters, depending on
203
+ The key function here is `enve.recplot2.findPeaks`. This function has several parameters, depending on
193
204
  the method used. To see all supported methods, use `?enve.recplot2.findPeaks`. To see all the options
194
205
  of the default method (`'emauto'`) use `?enve.recplot2.findPeaks.emauto`.
195
206
 
@@ -59,8 +59,8 @@ echo "select seq1, seq2, aai, sd, n, omega, (100.0*n/omega) from aai;" \
59
59
  echo "[03/03] Generating distance matrix"
60
60
  echo "
61
61
  source('$(dirname $0)/../enveomics.R/R/df2dist.R');
62
- a <- read.table('$OUT', sep='\\t', h=TRUE, as.is=T);
63
- aai.d <- enve.df2dist(a, default.d=$DEF_DIST, max.sim=100);
62
+ a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
63
+ aai.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
64
64
  write.table(as.matrix(aai.d), '$OUT.dist',
65
- quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
65
+ quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
66
66
  " | R --vanilla >/dev/null
@@ -59,8 +59,8 @@ echo "select seq1, seq2, ani, sd, n, omega, (100.0*n/omega) from ani;" \
59
59
  echo "[03/03] Generating distance matrix"
60
60
  echo "
61
61
  source('$(dirname $0)/../enveomics.R/R/df2dist.R');
62
- a <- read.table('$OUT', sep='\\t', h=TRUE, as.is=T);
63
- ani.d <- enve.df2dist(a, default.d=$DEF_DIST, max.sim=100);
62
+ a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
63
+ ani.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
64
64
  write.table(as.matrix(ani.d), '$OUT.dist',
65
- quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
65
+ quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
66
66
  " | R --vanilla >/dev/null
@@ -7,7 +7,7 @@ include globals.mk
7
7
 
8
8
  TEST=Tests
9
9
  enveomics_r=enveomics.R
10
- enveomics_r_v=enveomics.R_1.3
10
+ enveomics_r_v=enveomics.R_$(shell grep '^Version: ' enveomics.R/DESCRIPTION | perl -pe 's/.*: //')
11
11
  .PHONY: test install install-scripts install-r uninstall install-deps
12
12
 
13
13
  test: $(enveomics_r_v).tar.gz
@@ -41,7 +41,7 @@ uninstall:
41
41
  -$(R) CMD REMOVE $(enveomics_r)
42
42
 
43
43
  $(enveomics_r_v).tar.gz: install-deps
44
- -rm -r $(enveomics_r).tar.gz
44
+ rm -f $(enveomics_r_v).tar.gz
45
45
  ./build_enveomics_r.bash
46
46
  $(R) CMD build $(enveomics_r)/
47
47
  $(MAKE) install-r
@@ -99,6 +99,7 @@
99
99
  "files using <map.bls> as prefix with extensions .rec (for the",
100
100
  "recruitment plot) and .lim (for the limits of the different sequences",
101
101
  "in <seq.fa>)."],
102
+ "see_also": ["BlastTab.recplot2.R", "GFF.catsbj.pl"],
102
103
  "help_arg": "-h",
103
104
  "options": [
104
105
  {
@@ -119,8 +120,8 @@
119
120
  "opt": "-s",
120
121
  "name": "Subset",
121
122
  "description": ["The FastA provided is to be treated as a subset of",
122
- "the subject. By default, it expects all the subjects to be",
123
- "present in the BLAST."]
123
+ "the subject. By default, it expects all the BLAST subjects to be",
124
+ "present in the FastA."]
124
125
  },
125
126
  {
126
127
  "opt": "-q",
@@ -623,7 +624,8 @@
623
624
  { "r_package": "optparse" },
624
625
  { "r_package": "enveomics.R" }
625
626
  ],
626
- "see_also": [ "RecPlot2.compareIdentities.R" ],
627
+ "see_also": ["BlastTab.catsbj.pl", "GFF.catsbj.pl",
628
+ "RecPlot2.compareIdentities.R"],
627
629
  "options": [
628
630
  {
629
631
  "opt": "--prefix",
@@ -637,7 +639,13 @@
637
639
  "opt": "--pos-breaks",
638
640
  "arg": "integer",
639
641
  "default": 1000,
640
- "description": ["Breaks in the positions histogram."]
642
+ "description": "Breaks in the positions histogram."
643
+ },
644
+ {
645
+ "opt": "--pos-breaks-tsv",
646
+ "arg": "in_file",
647
+ "description": ["File with (absolute) coordinates of breaks in the",
648
+ "position histogram."]
641
649
  },
642
650
  {
643
651
  "opt": "--id-breaks",
@@ -216,6 +216,50 @@
216
216
  }
217
217
  ]
218
218
  },
219
+ {
220
+ "task": "FastA.mask.rb",
221
+ "description": "Mask sequence region(s) in a FastA file.",
222
+ "help_arg": "--help",
223
+ "options": [
224
+ {
225
+ "opt": "--in",
226
+ "arg": "in_file",
227
+ "mandatory": true,
228
+ "description": "Input FastA file."
229
+ },
230
+ {
231
+ "opt": "--out",
232
+ "arg": "out_file",
233
+ "mandatory": true,
234
+ "description": "Output FastA file."
235
+ },
236
+ {
237
+ "opt": "--regions",
238
+ "arg": "string",
239
+ "mandatory": true,
240
+ "description": ["Regions to mask separated by commas.",
241
+ "Each region must be in the format \"sequence_id:from..to\"."]
242
+ },
243
+ {
244
+ "opt": "--symbol",
245
+ "arg": "string",
246
+ "default": "N",
247
+ "description": "Character used to mask the region(s)."
248
+ },
249
+ {
250
+ "opt": "--trim",
251
+ "description": ["Trim masked regions extending to the edge of a",
252
+ "sequence."]
253
+ },
254
+ {
255
+ "opt": "--wrap",
256
+ "arg": "integer",
257
+ "default": 70,
258
+ "description": ["Line length to wrap sequences. Use 0 to generate",
259
+ "1-line sequences."]
260
+ }
261
+ ]
262
+ },
219
263
  {
220
264
  "task": "FastA.qlen.pl",
221
265
  "description": ["Calculates the quartiles of the length in a set of",
@@ -298,6 +342,49 @@
298
342
  }
299
343
  ]
300
344
  },
345
+ {
346
+ "task": "FastA.sample.rb",
347
+ "description": ["Samples a random set of sequences from a multi-FastA",
348
+ "file."],
349
+ "help_arg": "--help",
350
+ "see_also": "FastA.subsample.pl",
351
+ "options": [
352
+ {
353
+ "name": "Input File",
354
+ "opt": "--in",
355
+ "arg": "in_file",
356
+ "mandatory": true,
357
+ "description": "Input FastA file."
358
+ },
359
+ {
360
+ "name": "Output file",
361
+ "opt": "--out",
362
+ "arg": "out_file",
363
+ "mandatory": true,
364
+ "description": "Output FastA file."
365
+ },
366
+ {
367
+ "opt": "--fraction",
368
+ "arg": "float",
369
+ "description": ["Fraction of sequences to sample [0-1].",
370
+ "Mandatory unless Number is provided."]
371
+ },
372
+ {
373
+ "opt": "--number",
374
+ "arg": "integer",
375
+ "description": ["Number of sequences to sample.",
376
+ "Mandatory unless -f is provided."]
377
+ },
378
+ {
379
+ "opt": "--replacement",
380
+ "description": "Sample with replacement."
381
+ },
382
+ {
383
+ "opt": "--quiet",
384
+ "description": "Run quietly (no STDERR output)."
385
+ }
386
+ ]
387
+ },
301
388
  {
302
389
  "task": "FastA.slider.pl",
303
390
  "description": "Slices sequences in fixed- or variable-length windows.",
@@ -432,6 +519,7 @@
432
519
  "task": "FastA.subsample.pl",
433
520
  "description": "Subsamples a set of sequences.",
434
521
  "help_arg": "-h",
522
+ "see_also": "FastA.sample.rb",
435
523
  "options": [
436
524
  {
437
525
  "name": "Fraction",
@@ -548,6 +636,53 @@
548
636
  }
549
637
  ]
550
638
  },
639
+ {
640
+ "task": "FastA.extract.rb",
641
+ "description": ["Extracts a list of sequences and/or coordinates from",
642
+ "multi-FastA files."],
643
+ "help_arg": "--help",
644
+ "options": [
645
+ {
646
+ "name": "Input file",
647
+ "opt": "--in",
648
+ "arg": "in_file",
649
+ "mandatory": true,
650
+ "description": "Input FastA file."
651
+ },
652
+ {
653
+ "name": "Output file",
654
+ "opt": "--out",
655
+ "arg": "out_file",
656
+ "mandatory": true,
657
+ "description": "Output FastA file."
658
+ },
659
+ {
660
+ "name": "Coordinates",
661
+ "opt": "--coords",
662
+ "arg": "string",
663
+ "description": ["Comma-delimited list of coordinates (mandatory",
664
+ "unless -C is passed).",
665
+ "The format of the coordinates is SEQ:FROM..TO or SEQ:FROM~LEN:",
666
+ "SEQ: Sequence ID, or * (asterisk) to extract range from all",
667
+ "sequences",
668
+ "FROM: Integer, position of the first base to include (can be",
669
+ "negative)",
670
+ "TO: Integer, last base to include (can be negative)",
671
+ "LEN: Length of the range to extract."]
672
+ },
673
+ {
674
+ "name": "Coordinates file",
675
+ "opt": "--coords-file",
676
+ "arg": "in_file",
677
+ "description": ["File containing the coordinates, one per line.",
678
+ "Each line must follow the format described for Coordinates."]
679
+ },
680
+ {
681
+ "opt": "--quiet",
682
+ "description": "Run quietly (no STDERR output)."
683
+ }
684
+ ]
685
+ },
551
686
  {
552
687
  "task": "FastA.fragment.rb",
553
688
  "description": ["Simulates incomplete (fragmented) drafts from complete",
@@ -743,6 +743,55 @@
743
743
  "description": "Verbously display warnings."
744
744
  }
745
745
  ]
746
+ },
747
+ {
748
+ "task": "GFF.catsbj.pl",
749
+ "description": ["Generates a list of coordinates from a GFF table",
750
+ "concatenating the subject sequences."],
751
+ "help_arg": "-h",
752
+ "see_also": ["BlastTab.recplot2.R", "BlastTab.catsbj.pl"],
753
+ "options": [
754
+ {
755
+ "name": "Lim file",
756
+ "opt": "-L",
757
+ "arg": "out_file",
758
+ "description": ["An output file with the absolute coordinates of the",
759
+ "concatenated contigs. This is identical to the .lim file",
760
+ "generated by BlastTab.catsbj.pl."]
761
+ },
762
+ {
763
+ "name": "Inter-feature gaps",
764
+ "opt": "-i",
765
+ "description": ["Preserve exact coordinates and include",
766
+ "inter-feature windows as separate bins. By default, the",
767
+ "coordinates are set in the midpoint between features when",
768
+ "non-contiguous."]
769
+ },
770
+ {
771
+ "name": "Subset",
772
+ "opt": "-s",
773
+ "description": ["The FastA provided is to be treated as a subset of",
774
+ "the subject. By default, it expects all the contigs to be present",
775
+ "in the BLAST."]
776
+ },
777
+ {
778
+ "name": "Quiet",
779
+ "opt": "-q",
780
+ "description": "Run quietly."
781
+ },
782
+ {
783
+ "name": "Subject sequences",
784
+ "arg": "in_file",
785
+ "mandatory": true,
786
+ "description": "Subject sequences (contigs) in FastA format."
787
+ },
788
+ {
789
+ "name": "Features",
790
+ "arg": "in_file",
791
+ "mandatory": true,
792
+ "description": "Features to map in GFF."
793
+ }
794
+ ]
746
795
  }
747
796
  ]
748
797
  }