miga-base 0.3.9.0 → 0.3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/add.rb +33 -33
- data/actions/edit.rb +33 -0
- data/actions/new.rb +17 -18
- data/actions/next_step.rb +33 -0
- data/actions/run.rb +15 -12
- data/bin/miga +43 -37
- data/lib/miga/daemon.rb +2 -2
- data/lib/miga/project/result.rb +16 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +1 -3
- data/scripts/ani_distances.bash +1 -3
- data/scripts/assembly.bash +1 -3
- data/scripts/cds.bash +1 -3
- data/scripts/clade_finding.bash +1 -3
- data/scripts/d.bash +13 -0
- data/scripts/distances.bash +1 -3
- data/scripts/essential_genes.bash +1 -3
- data/scripts/haai_distances.bash +1 -3
- data/scripts/miga.bash +12 -9
- data/scripts/mytaxa.bash +1 -3
- data/scripts/mytaxa_scan.bash +1 -3
- data/scripts/ogs.bash +36 -33
- data/scripts/p.bash +23 -0
- data/scripts/project_stats.bash +1 -3
- data/scripts/read_quality.bash +1 -3
- data/scripts/ssu.bash +1 -3
- data/scripts/stats.bash +1 -3
- data/scripts/subclades.bash +1 -3
- data/scripts/taxonomy.bash +1 -3
- data/scripts/trimmed_fasta.bash +1 -3
- data/scripts/trimmed_reads.bash +1 -3
- data/test/daemon_test.rb +3 -3
- data/utils/distance/runner.rb +1 -1
- data/utils/enveomics/Docs/recplot2.md +13 -2
- data/utils/enveomics/Examples/aai-matrix.bash +3 -3
- data/utils/enveomics/Examples/ani-matrix.bash +3 -3
- data/utils/enveomics/Makefile +2 -2
- data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
- data/utils/enveomics/Manifest/Tasks/other.json +49 -0
- data/utils/enveomics/Manifest/categories.json +4 -0
- data/utils/enveomics/Manifest/examples.json +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/aai.rb +4 -3
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
- data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
- data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
- data/utils/enveomics/enveomics.R/R/utils.R +19 -1
- data/utils/enveomics/enveomics.R/README.md +11 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
- data/utils/subclade/runner.rb +4 -0
- metadata +14 -3
data/scripts/miga.bash
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
set -e
|
3
3
|
#MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
|
4
4
|
# shellcheck source=/dev/null
|
5
|
-
|
5
|
+
. "$HOME/.miga_rc"
|
6
6
|
export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
|
7
7
|
SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
|
8
8
|
|
@@ -11,15 +11,18 @@ function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
|
|
11
11
|
|
12
12
|
for i in $(miga plugins -P "$PROJECT") ; do
|
13
13
|
# shellcheck source=/dev/null
|
14
|
-
|
14
|
+
. "$i/scripts-plugin.bash"
|
15
15
|
done
|
16
16
|
|
17
|
-
[[
|
18
|
-
|
17
|
+
if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
|
18
|
+
echo -n "Date: " ; miga date
|
19
|
+
echo "MiGA: $MIGA"
|
20
|
+
echo "Task: $SCRIPT"
|
21
|
+
echo "Project: $PROJECT"
|
22
|
+
if [[ -n $DATASET ]] ; then
|
23
|
+
echo "Dataset: $DATASET"
|
24
|
+
miga edit -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT"
|
25
|
+
fi
|
26
|
+
fi
|
19
27
|
|
20
28
|
true
|
21
|
-
|
22
|
-
#if [[ "$RUNTYPE" == "qsub" ]] ; then
|
23
|
-
#elif [[ "$RUNTYPE" == "msub" ]] ; then
|
24
|
-
#fi
|
25
|
-
|
data/scripts/mytaxa.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa_scan"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/ogs.bash
CHANGED
@@ -2,49 +2,52 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
4
|
SCRIPT="ogs"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/10.clades/03.ogs"
|
10
8
|
|
11
9
|
# Initialize
|
12
10
|
miga date > "miga-project.start"
|
13
11
|
|
14
12
|
DS=$(miga ls -P "$PROJECT" --ref --no-multi)
|
15
|
-
MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
|
16
|
-
[[ $MIN_ID == "?" ]] && MIN_ID=80
|
17
|
-
if [[ ! -s miga-project.ogs ]] ; then
|
18
|
-
# Extract RBMs
|
19
|
-
if [[ ! -s miga-project.abc ]] ; then
|
20
|
-
[[ -d miga-project.tmp ]] || mkdir miga-project.tmp
|
21
|
-
for i in $DS ; do
|
22
|
-
file="miga-project.tmp/$i.abc"
|
23
|
-
[[ -s "$file" ]] && continue
|
24
|
-
echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
|
25
|
-
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
|
26
|
-
| awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
|
27
|
-
> "$file.tmp"
|
28
|
-
mv "$file.tmp" "$file"
|
29
|
-
done
|
30
|
-
cat miga-project.tmp/*.abc > miga-project.abc
|
31
|
-
fi
|
32
|
-
rm -rf miga-project.tmp
|
33
13
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
14
|
+
if [[ -n $DS ]] ; then
|
15
|
+
MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
|
16
|
+
[[ $MIN_ID == "?" ]] && MIN_ID=80
|
17
|
+
if [[ ! -s miga-project.ogs ]] ; then
|
18
|
+
# Extract RBMs
|
19
|
+
if [[ ! -s miga-project.abc ]] ; then
|
20
|
+
[[ -d miga-project.tmp ]] || mkdir miga-project.tmp
|
21
|
+
for i in $DS ; do
|
22
|
+
file="miga-project.tmp/$i.abc"
|
23
|
+
[[ -s "$file" ]] && continue
|
24
|
+
echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
|
25
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
|
26
|
+
| awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
|
27
|
+
> "$file.tmp"
|
28
|
+
mv "$file.tmp" "$file"
|
29
|
+
done
|
30
|
+
cat miga-project.tmp/*.abc > miga-project.abc
|
31
|
+
fi
|
32
|
+
rm -rf miga-project.tmp
|
33
|
+
|
34
|
+
# Estimate OGs and Clean RBMs
|
35
|
+
ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
|
36
|
+
if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
|
37
|
+
rm miga-project.abc
|
38
|
+
else
|
39
|
+
gzip -9 miga-project.abc
|
40
|
+
fi
|
40
41
|
fi
|
41
|
-
fi
|
42
42
|
|
43
|
-
# Calculate Statistics
|
44
|
-
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
45
|
-
ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
|
46
|
-
Rscript "$MIGA/utils/core-pan-plot.R" \
|
47
|
-
|
43
|
+
# Calculate Statistics
|
44
|
+
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
45
|
+
ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
|
46
|
+
Rscript "$MIGA/utils/core-pan-plot.R" \
|
47
|
+
miga-project.core-pan.tsv miga-project.core-pan.pdf
|
48
|
+
else
|
49
|
+
touch miga-project.empty
|
50
|
+
fi
|
48
51
|
|
49
52
|
# Finalize
|
50
53
|
miga date > "miga-project.done"
|
data/scripts/p.bash
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
set -e
|
4
|
+
SCRIPT="p"
|
5
|
+
# shellcheck source=scripts/miga.bash
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
7
|
+
|
8
|
+
while true ; do
|
9
|
+
res="$(miga next_step -P "$PROJECT")"
|
10
|
+
[[ "$res" == '?' ]] && break
|
11
|
+
miga run -P "$PROJECT" -r "$res" -t "$CORES"
|
12
|
+
if [[ "$res" == "$last_res" ]] ; then
|
13
|
+
let k=$k+1
|
14
|
+
if [[ $k -gt 10 ]] ; then
|
15
|
+
miga new --update -P "$PROJECT" \
|
16
|
+
-m "run_$res=false,warn=Too many failed attempts to run $res."
|
17
|
+
fi
|
18
|
+
else
|
19
|
+
k=0
|
20
|
+
last_res=$res
|
21
|
+
fi
|
22
|
+
done
|
23
|
+
|
data/scripts/project_stats.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
4
|
SCRIPT="project_stats"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/90.stats"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/read_quality.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="read_quality"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/03.read_quality"
|
10
8
|
|
11
9
|
b=$DATASET
|
data/scripts/ssu.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="ssu"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/stats.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="stats"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/90.stats"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/subclades.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
4
|
SCRIPT="subclades"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/10.clades/02.ani"
|
10
8
|
|
11
9
|
# Initialize
|
data/scripts/taxonomy.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="taxonomy"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/09.distances/05.taxonomy"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/trimmed_fasta.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="trimmed_fasta"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/04.trimmed_fasta"
|
10
8
|
|
11
9
|
b=$DATASET
|
data/scripts/trimmed_reads.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="trimmed_reads"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/02.trimmed_reads"
|
10
8
|
|
11
9
|
b=$DATASET
|
data/test/daemon_test.rb
CHANGED
@@ -40,10 +40,10 @@ class DaemonTest < Test::Unit::TestCase
|
|
40
40
|
out = capture_stdout do
|
41
41
|
d.check_datasets
|
42
42
|
end
|
43
|
-
assert(out.string =~ /Queueing #{ds.name}:
|
43
|
+
assert(out.string =~ /Queueing #{ds.name}:d/)
|
44
44
|
assert_equal(1, d.jobs_to_run.size)
|
45
|
-
assert_equal("project1:
|
46
|
-
assert_equal(d.jobs_to_run.first, d.get_job(:
|
45
|
+
assert_equal("project1:d:ds1", d.jobs_to_run.first[:cmd])
|
46
|
+
assert_equal(d.jobs_to_run.first, d.get_job(:d, ds))
|
47
47
|
end
|
48
48
|
|
49
49
|
def test_in_loop
|
data/utils/distance/runner.rb
CHANGED
@@ -35,7 +35,7 @@ class MiGA::DistanceRunner
|
|
35
35
|
elsif !opts[:run_taxonomy] and dataset.metadata[:db_project]
|
36
36
|
ref_path = dataset.metadata[:db_project]
|
37
37
|
if project.metadata[:db_proj_dir]
|
38
|
-
ref_path = File.expand_path(project.metadata[:db_proj_dir]
|
38
|
+
ref_path = File.expand_path(ref_path, project.metadata[:db_proj_dir])
|
39
39
|
end
|
40
40
|
@ref_project = MiGA::Project.load(ref_path)
|
41
41
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
@@ -117,11 +117,22 @@ library(enveomics.R)
|
|
117
117
|
load('my-recplot.rdata')
|
118
118
|
```
|
119
119
|
|
120
|
-
###
|
120
|
+
### Centrality measures of sequencing depth
|
121
121
|
|
122
122
|
```R
|
123
123
|
mean(enve.recplot2.seqdepth(rp)) # <- Average
|
124
124
|
median(enve.recplot2.seqdepth(rp)) # <- Median
|
125
|
+
enve.truncate(enve.recplot2.seqdepth(rp)) # <- 95% Central Truncated Mean
|
126
|
+
enve.truncate(enve.recplot2.seqdepth(rp), 0.9) # <- 90% Central Truncated Mean
|
127
|
+
```
|
128
|
+
|
129
|
+
The functions above only use hits with identity above the cutoff for "in-group" (by default: 95%).
|
130
|
+
In order to estimate the sequencing depth with a different identity cutoff, modify the cutoff first:
|
131
|
+
|
132
|
+
```R
|
133
|
+
rp98 <- enve.recplot2.changeCutoff(rp, 98) # <- Change to ≥98%
|
134
|
+
mean(enve.recplot2.seqdepth(rp98)) # <- Average (for the new object)
|
135
|
+
median(enve.recplot2.seqdepth(rp98)) # <- Median (for the new object)
|
125
136
|
```
|
126
137
|
|
127
138
|
### Average and median sequencing depth excluding zero-coverage windows
|
@@ -189,7 +200,7 @@ p <- plot(rp, use.peaks=peaks, layout=4) # <- Remove `layout=4` for the full plo
|
|
189
200
|
dev.off()
|
190
201
|
```
|
191
202
|
|
192
|
-
The key function here is `enve.
|
203
|
+
The key function here is `enve.recplot2.findPeaks`. This function has several parameters, depending on
|
193
204
|
the method used. To see all supported methods, use `?enve.recplot2.findPeaks`. To see all the options
|
194
205
|
of the default method (`'emauto'`) use `?enve.recplot2.findPeaks.emauto`.
|
195
206
|
|
@@ -59,8 +59,8 @@ echo "select seq1, seq2, aai, sd, n, omega, (100.0*n/omega) from aai;" \
|
|
59
59
|
echo "[03/03] Generating distance matrix"
|
60
60
|
echo "
|
61
61
|
source('$(dirname $0)/../enveomics.R/R/df2dist.R');
|
62
|
-
a <- read.table('$OUT', sep='\\t',
|
63
|
-
aai.d <- enve.df2dist(a, default.d
|
62
|
+
a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
|
63
|
+
aai.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
|
64
64
|
write.table(as.matrix(aai.d), '$OUT.dist',
|
65
|
-
quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
|
65
|
+
quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
|
66
66
|
" | R --vanilla >/dev/null
|
@@ -59,8 +59,8 @@ echo "select seq1, seq2, ani, sd, n, omega, (100.0*n/omega) from ani;" \
|
|
59
59
|
echo "[03/03] Generating distance matrix"
|
60
60
|
echo "
|
61
61
|
source('$(dirname $0)/../enveomics.R/R/df2dist.R');
|
62
|
-
a <- read.table('$OUT', sep='\\t',
|
63
|
-
ani.d <- enve.df2dist(a, default.d
|
62
|
+
a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
|
63
|
+
ani.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
|
64
64
|
write.table(as.matrix(ani.d), '$OUT.dist',
|
65
|
-
quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
|
65
|
+
quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
|
66
66
|
" | R --vanilla >/dev/null
|
data/utils/enveomics/Makefile
CHANGED
@@ -7,7 +7,7 @@ include globals.mk
|
|
7
7
|
|
8
8
|
TEST=Tests
|
9
9
|
enveomics_r=enveomics.R
|
10
|
-
enveomics_r_v=enveomics.
|
10
|
+
enveomics_r_v=enveomics.R_$(shell grep '^Version: ' enveomics.R/DESCRIPTION | perl -pe 's/.*: //')
|
11
11
|
.PHONY: test install install-scripts install-r uninstall install-deps
|
12
12
|
|
13
13
|
test: $(enveomics_r_v).tar.gz
|
@@ -41,7 +41,7 @@ uninstall:
|
|
41
41
|
-$(R) CMD REMOVE $(enveomics_r)
|
42
42
|
|
43
43
|
$(enveomics_r_v).tar.gz: install-deps
|
44
|
-
|
44
|
+
rm -f $(enveomics_r_v).tar.gz
|
45
45
|
./build_enveomics_r.bash
|
46
46
|
$(R) CMD build $(enveomics_r)/
|
47
47
|
$(MAKE) install-r
|
@@ -99,6 +99,7 @@
|
|
99
99
|
"files using <map.bls> as prefix with extensions .rec (for the",
|
100
100
|
"recruitment plot) and .lim (for the limits of the different sequences",
|
101
101
|
"in <seq.fa>)."],
|
102
|
+
"see_also": ["BlastTab.recplot2.R", "GFF.catsbj.pl"],
|
102
103
|
"help_arg": "-h",
|
103
104
|
"options": [
|
104
105
|
{
|
@@ -119,8 +120,8 @@
|
|
119
120
|
"opt": "-s",
|
120
121
|
"name": "Subset",
|
121
122
|
"description": ["The FastA provided is to be treated as a subset of",
|
122
|
-
"the subject. By default, it expects all the subjects to be",
|
123
|
-
"present in the
|
123
|
+
"the subject. By default, it expects all the BLAST subjects to be",
|
124
|
+
"present in the FastA."]
|
124
125
|
},
|
125
126
|
{
|
126
127
|
"opt": "-q",
|
@@ -623,7 +624,8 @@
|
|
623
624
|
{ "r_package": "optparse" },
|
624
625
|
{ "r_package": "enveomics.R" }
|
625
626
|
],
|
626
|
-
"see_also": [ "
|
627
|
+
"see_also": ["BlastTab.catsbj.pl", "GFF.catsbj.pl",
|
628
|
+
"RecPlot2.compareIdentities.R"],
|
627
629
|
"options": [
|
628
630
|
{
|
629
631
|
"opt": "--prefix",
|
@@ -637,7 +639,13 @@
|
|
637
639
|
"opt": "--pos-breaks",
|
638
640
|
"arg": "integer",
|
639
641
|
"default": 1000,
|
640
|
-
"description":
|
642
|
+
"description": "Breaks in the positions histogram."
|
643
|
+
},
|
644
|
+
{
|
645
|
+
"opt": "--pos-breaks-tsv",
|
646
|
+
"arg": "in_file",
|
647
|
+
"description": ["File with (absolute) coordinates of breaks in the",
|
648
|
+
"position histogram."]
|
641
649
|
},
|
642
650
|
{
|
643
651
|
"opt": "--id-breaks",
|
@@ -216,6 +216,50 @@
|
|
216
216
|
}
|
217
217
|
]
|
218
218
|
},
|
219
|
+
{
|
220
|
+
"task": "FastA.mask.rb",
|
221
|
+
"description": "Mask sequence region(s) in a FastA file.",
|
222
|
+
"help_arg": "--help",
|
223
|
+
"options": [
|
224
|
+
{
|
225
|
+
"opt": "--in",
|
226
|
+
"arg": "in_file",
|
227
|
+
"mandatory": true,
|
228
|
+
"description": "Input FastA file."
|
229
|
+
},
|
230
|
+
{
|
231
|
+
"opt": "--out",
|
232
|
+
"arg": "out_file",
|
233
|
+
"mandatory": true,
|
234
|
+
"description": "Output FastA file."
|
235
|
+
},
|
236
|
+
{
|
237
|
+
"opt": "--regions",
|
238
|
+
"arg": "string",
|
239
|
+
"mandatory": true,
|
240
|
+
"description": ["Regions to mask separated by commas.",
|
241
|
+
"Each region must be in the format \"sequence_id:from..to\"."]
|
242
|
+
},
|
243
|
+
{
|
244
|
+
"opt": "--symbol",
|
245
|
+
"arg": "string",
|
246
|
+
"default": "N",
|
247
|
+
"description": "Character used to mask the region(s)."
|
248
|
+
},
|
249
|
+
{
|
250
|
+
"opt": "--trim",
|
251
|
+
"description": ["Trim masked regions extending to the edge of a",
|
252
|
+
"sequence."]
|
253
|
+
},
|
254
|
+
{
|
255
|
+
"opt": "--wrap",
|
256
|
+
"arg": "integer",
|
257
|
+
"default": 70,
|
258
|
+
"description": ["Line length to wrap sequences. Use 0 to generate",
|
259
|
+
"1-line sequences."]
|
260
|
+
}
|
261
|
+
]
|
262
|
+
},
|
219
263
|
{
|
220
264
|
"task": "FastA.qlen.pl",
|
221
265
|
"description": ["Calculates the quartiles of the length in a set of",
|
@@ -298,6 +342,49 @@
|
|
298
342
|
}
|
299
343
|
]
|
300
344
|
},
|
345
|
+
{
|
346
|
+
"task": "FastA.sample.rb",
|
347
|
+
"description": ["Samples a random set of sequences from a multi-FastA",
|
348
|
+
"file."],
|
349
|
+
"help_arg": "--help",
|
350
|
+
"see_also": "FastA.subsample.pl",
|
351
|
+
"options": [
|
352
|
+
{
|
353
|
+
"name": "Input File",
|
354
|
+
"opt": "--in",
|
355
|
+
"arg": "in_file",
|
356
|
+
"mandatory": true,
|
357
|
+
"description": "Input FastA file."
|
358
|
+
},
|
359
|
+
{
|
360
|
+
"name": "Output file",
|
361
|
+
"opt": "--out",
|
362
|
+
"arg": "out_file",
|
363
|
+
"mandatory": true,
|
364
|
+
"description": "Output FastA file."
|
365
|
+
},
|
366
|
+
{
|
367
|
+
"opt": "--fraction",
|
368
|
+
"arg": "float",
|
369
|
+
"description": ["Fraction of sequences to sample [0-1].",
|
370
|
+
"Mandatory unless Number is provided."]
|
371
|
+
},
|
372
|
+
{
|
373
|
+
"opt": "--number",
|
374
|
+
"arg": "integer",
|
375
|
+
"description": ["Number of sequences to sample.",
|
376
|
+
"Mandatory unless -f is provided."]
|
377
|
+
},
|
378
|
+
{
|
379
|
+
"opt": "--replacement",
|
380
|
+
"description": "Sample with replacement."
|
381
|
+
},
|
382
|
+
{
|
383
|
+
"opt": "--quiet",
|
384
|
+
"description": "Run quietly (no STDERR output)."
|
385
|
+
}
|
386
|
+
]
|
387
|
+
},
|
301
388
|
{
|
302
389
|
"task": "FastA.slider.pl",
|
303
390
|
"description": "Slices sequences in fixed- or variable-length windows.",
|
@@ -432,6 +519,7 @@
|
|
432
519
|
"task": "FastA.subsample.pl",
|
433
520
|
"description": "Subsamples a set of sequences.",
|
434
521
|
"help_arg": "-h",
|
522
|
+
"see_also": "FastA.sample.rb",
|
435
523
|
"options": [
|
436
524
|
{
|
437
525
|
"name": "Fraction",
|
@@ -548,6 +636,53 @@
|
|
548
636
|
}
|
549
637
|
]
|
550
638
|
},
|
639
|
+
{
|
640
|
+
"task": "FastA.extract.rb",
|
641
|
+
"description": ["Extracts a list of sequences and/or coordinates from",
|
642
|
+
"multi-FastA files."],
|
643
|
+
"help_arg": "--help",
|
644
|
+
"options": [
|
645
|
+
{
|
646
|
+
"name": "Input file",
|
647
|
+
"opt": "--in",
|
648
|
+
"arg": "in_file",
|
649
|
+
"mandatory": true,
|
650
|
+
"description": "Input FastA file."
|
651
|
+
},
|
652
|
+
{
|
653
|
+
"name": "Output file",
|
654
|
+
"opt": "--out",
|
655
|
+
"arg": "out_file",
|
656
|
+
"mandatory": true,
|
657
|
+
"description": "Output FastA file."
|
658
|
+
},
|
659
|
+
{
|
660
|
+
"name": "Coordinates",
|
661
|
+
"opt": "--coords",
|
662
|
+
"arg": "string",
|
663
|
+
"description": ["Comma-delimited list of coordinates (mandatory",
|
664
|
+
"unless -C is passed).",
|
665
|
+
"The format of the coordinates is SEQ:FROM..TO or SEQ:FROM~LEN:",
|
666
|
+
"SEQ: Sequence ID, or * (asterisk) to extract range from all",
|
667
|
+
"sequences",
|
668
|
+
"FROM: Integer, position of the first base to include (can be",
|
669
|
+
"negative)",
|
670
|
+
"TO: Integer, last base to include (can be negative)",
|
671
|
+
"LEN: Length of the range to extract."]
|
672
|
+
},
|
673
|
+
{
|
674
|
+
"name": "Coordinates file",
|
675
|
+
"opt": "--coords-file",
|
676
|
+
"arg": "in_file",
|
677
|
+
"description": ["File containing the coordinates, one per line.",
|
678
|
+
"Each line must follow the format described for Coordinates."]
|
679
|
+
},
|
680
|
+
{
|
681
|
+
"opt": "--quiet",
|
682
|
+
"description": "Run quietly (no STDERR output)."
|
683
|
+
}
|
684
|
+
]
|
685
|
+
},
|
551
686
|
{
|
552
687
|
"task": "FastA.fragment.rb",
|
553
688
|
"description": ["Simulates incomplete (fragmented) drafts from complete",
|
@@ -743,6 +743,55 @@
|
|
743
743
|
"description": "Verbously display warnings."
|
744
744
|
}
|
745
745
|
]
|
746
|
+
},
|
747
|
+
{
|
748
|
+
"task": "GFF.catsbj.pl",
|
749
|
+
"description": ["Generates a list of coordinates from a GFF table",
|
750
|
+
"concatenating the subject sequences."],
|
751
|
+
"help_arg": "-h",
|
752
|
+
"see_also": ["BlastTab.recplot2.R", "BlastTab.catsbj.pl"],
|
753
|
+
"options": [
|
754
|
+
{
|
755
|
+
"name": "Lim file",
|
756
|
+
"opt": "-L",
|
757
|
+
"arg": "out_file",
|
758
|
+
"description": ["An output file with the absolute coordinates of the",
|
759
|
+
"concatenated contigs. This is identical to the .lim file",
|
760
|
+
"generated by BlastTab.catsbj.pl."]
|
761
|
+
},
|
762
|
+
{
|
763
|
+
"name": "Inter-feature gaps",
|
764
|
+
"opt": "-i",
|
765
|
+
"description": ["Preserve exact coordinates and include",
|
766
|
+
"inter-feature windows as separate bins. By default, the",
|
767
|
+
"coordinates are set in the midpoint between features when",
|
768
|
+
"non-contiguous."]
|
769
|
+
},
|
770
|
+
{
|
771
|
+
"name": "Subset",
|
772
|
+
"opt": "-s",
|
773
|
+
"description": ["The FastA provided is to be treated as a subset of",
|
774
|
+
"the subject. By default, it expects all the contigs to be present",
|
775
|
+
"in the BLAST."]
|
776
|
+
},
|
777
|
+
{
|
778
|
+
"name": "Quiet",
|
779
|
+
"opt": "-q",
|
780
|
+
"description": "Run quietly."
|
781
|
+
},
|
782
|
+
{
|
783
|
+
"name": "Subject sequences",
|
784
|
+
"arg": "in_file",
|
785
|
+
"mandatory": true,
|
786
|
+
"description": "Subject sequences (contigs) in FastA format."
|
787
|
+
},
|
788
|
+
{
|
789
|
+
"name": "Features",
|
790
|
+
"arg": "in_file",
|
791
|
+
"mandatory": true,
|
792
|
+
"description": "Features to map in GFF."
|
793
|
+
}
|
794
|
+
]
|
746
795
|
}
|
747
796
|
]
|
748
797
|
}
|