miga-base 0.3.9.0 → 0.3.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/add.rb +33 -33
- data/actions/edit.rb +33 -0
- data/actions/new.rb +17 -18
- data/actions/next_step.rb +33 -0
- data/actions/run.rb +15 -12
- data/bin/miga +43 -37
- data/lib/miga/daemon.rb +2 -2
- data/lib/miga/project/result.rb +16 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +1 -3
- data/scripts/ani_distances.bash +1 -3
- data/scripts/assembly.bash +1 -3
- data/scripts/cds.bash +1 -3
- data/scripts/clade_finding.bash +1 -3
- data/scripts/d.bash +13 -0
- data/scripts/distances.bash +1 -3
- data/scripts/essential_genes.bash +1 -3
- data/scripts/haai_distances.bash +1 -3
- data/scripts/miga.bash +12 -9
- data/scripts/mytaxa.bash +1 -3
- data/scripts/mytaxa_scan.bash +1 -3
- data/scripts/ogs.bash +36 -33
- data/scripts/p.bash +23 -0
- data/scripts/project_stats.bash +1 -3
- data/scripts/read_quality.bash +1 -3
- data/scripts/ssu.bash +1 -3
- data/scripts/stats.bash +1 -3
- data/scripts/subclades.bash +1 -3
- data/scripts/taxonomy.bash +1 -3
- data/scripts/trimmed_fasta.bash +1 -3
- data/scripts/trimmed_reads.bash +1 -3
- data/test/daemon_test.rb +3 -3
- data/utils/distance/runner.rb +1 -1
- data/utils/enveomics/Docs/recplot2.md +13 -2
- data/utils/enveomics/Examples/aai-matrix.bash +3 -3
- data/utils/enveomics/Examples/ani-matrix.bash +3 -3
- data/utils/enveomics/Makefile +2 -2
- data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
- data/utils/enveomics/Manifest/Tasks/other.json +49 -0
- data/utils/enveomics/Manifest/categories.json +4 -0
- data/utils/enveomics/Manifest/examples.json +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/aai.rb +4 -3
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
- data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
- data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
- data/utils/enveomics/enveomics.R/R/utils.R +19 -1
- data/utils/enveomics/enveomics.R/README.md +11 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
- data/utils/subclade/runner.rb +4 -0
- metadata +14 -3
data/scripts/miga.bash
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
set -e
|
3
3
|
#MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
|
4
4
|
# shellcheck source=/dev/null
|
5
|
-
|
5
|
+
. "$HOME/.miga_rc"
|
6
6
|
export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
|
7
7
|
SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
|
8
8
|
|
@@ -11,15 +11,18 @@ function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
|
|
11
11
|
|
12
12
|
for i in $(miga plugins -P "$PROJECT") ; do
|
13
13
|
# shellcheck source=/dev/null
|
14
|
-
|
14
|
+
. "$i/scripts-plugin.bash"
|
15
15
|
done
|
16
16
|
|
17
|
-
[[
|
18
|
-
|
17
|
+
if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
|
18
|
+
echo -n "Date: " ; miga date
|
19
|
+
echo "MiGA: $MIGA"
|
20
|
+
echo "Task: $SCRIPT"
|
21
|
+
echo "Project: $PROJECT"
|
22
|
+
if [[ -n $DATASET ]] ; then
|
23
|
+
echo "Dataset: $DATASET"
|
24
|
+
miga edit -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT"
|
25
|
+
fi
|
26
|
+
fi
|
19
27
|
|
20
28
|
true
|
21
|
-
|
22
|
-
#if [[ "$RUNTYPE" == "qsub" ]] ; then
|
23
|
-
#elif [[ "$RUNTYPE" == "msub" ]] ; then
|
24
|
-
#fi
|
25
|
-
|
data/scripts/mytaxa.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa_scan"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/ogs.bash
CHANGED
@@ -2,49 +2,52 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
4
|
SCRIPT="ogs"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/10.clades/03.ogs"
|
10
8
|
|
11
9
|
# Initialize
|
12
10
|
miga date > "miga-project.start"
|
13
11
|
|
14
12
|
DS=$(miga ls -P "$PROJECT" --ref --no-multi)
|
15
|
-
MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
|
16
|
-
[[ $MIN_ID == "?" ]] && MIN_ID=80
|
17
|
-
if [[ ! -s miga-project.ogs ]] ; then
|
18
|
-
# Extract RBMs
|
19
|
-
if [[ ! -s miga-project.abc ]] ; then
|
20
|
-
[[ -d miga-project.tmp ]] || mkdir miga-project.tmp
|
21
|
-
for i in $DS ; do
|
22
|
-
file="miga-project.tmp/$i.abc"
|
23
|
-
[[ -s "$file" ]] && continue
|
24
|
-
echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
|
25
|
-
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
|
26
|
-
| awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
|
27
|
-
> "$file.tmp"
|
28
|
-
mv "$file.tmp" "$file"
|
29
|
-
done
|
30
|
-
cat miga-project.tmp/*.abc > miga-project.abc
|
31
|
-
fi
|
32
|
-
rm -rf miga-project.tmp
|
33
13
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
14
|
+
if [[ -n $DS ]] ; then
|
15
|
+
MIN_ID=$(miga about -P "$PROJECT" -m ogs_identity)
|
16
|
+
[[ $MIN_ID == "?" ]] && MIN_ID=80
|
17
|
+
if [[ ! -s miga-project.ogs ]] ; then
|
18
|
+
# Extract RBMs
|
19
|
+
if [[ ! -s miga-project.abc ]] ; then
|
20
|
+
[[ -d miga-project.tmp ]] || mkdir miga-project.tmp
|
21
|
+
for i in $DS ; do
|
22
|
+
file="miga-project.tmp/$i.abc"
|
23
|
+
[[ -s "$file" ]] && continue
|
24
|
+
echo "SELECT seq1,id1,seq2,id2,bitscore from rbm where id >= $MIN_ID;" \
|
25
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
|
26
|
+
| awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
|
27
|
+
> "$file.tmp"
|
28
|
+
mv "$file.tmp" "$file"
|
29
|
+
done
|
30
|
+
cat miga-project.tmp/*.abc > miga-project.abc
|
31
|
+
fi
|
32
|
+
rm -rf miga-project.tmp
|
33
|
+
|
34
|
+
# Estimate OGs and Clean RBMs
|
35
|
+
ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
|
36
|
+
if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
|
37
|
+
rm miga-project.abc
|
38
|
+
else
|
39
|
+
gzip -9 miga-project.abc
|
40
|
+
fi
|
40
41
|
fi
|
41
|
-
fi
|
42
42
|
|
43
|
-
# Calculate Statistics
|
44
|
-
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
45
|
-
ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
|
46
|
-
Rscript "$MIGA/utils/core-pan-plot.R" \
|
47
|
-
|
43
|
+
# Calculate Statistics
|
44
|
+
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
45
|
+
ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
|
46
|
+
Rscript "$MIGA/utils/core-pan-plot.R" \
|
47
|
+
miga-project.core-pan.tsv miga-project.core-pan.pdf
|
48
|
+
else
|
49
|
+
touch miga-project.empty
|
50
|
+
fi
|
48
51
|
|
49
52
|
# Finalize
|
50
53
|
miga date > "miga-project.done"
|
data/scripts/p.bash
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
set -e
|
4
|
+
SCRIPT="p"
|
5
|
+
# shellcheck source=scripts/miga.bash
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
7
|
+
|
8
|
+
while true ; do
|
9
|
+
res="$(miga next_step -P "$PROJECT")"
|
10
|
+
[[ "$res" == '?' ]] && break
|
11
|
+
miga run -P "$PROJECT" -r "$res" -t "$CORES"
|
12
|
+
if [[ "$res" == "$last_res" ]] ; then
|
13
|
+
let k=$k+1
|
14
|
+
if [[ $k -gt 10 ]] ; then
|
15
|
+
miga new --update -P "$PROJECT" \
|
16
|
+
-m "run_$res=false,warn=Too many failed attempts to run $res."
|
17
|
+
fi
|
18
|
+
else
|
19
|
+
k=0
|
20
|
+
last_res=$res
|
21
|
+
fi
|
22
|
+
done
|
23
|
+
|
data/scripts/project_stats.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
4
|
SCRIPT="project_stats"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/90.stats"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/read_quality.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="read_quality"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/03.read_quality"
|
10
8
|
|
11
9
|
b=$DATASET
|
data/scripts/ssu.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="ssu"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/stats.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="stats"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/90.stats"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/subclades.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
4
|
SCRIPT="subclades"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/10.clades/02.ani"
|
10
8
|
|
11
9
|
# Initialize
|
data/scripts/taxonomy.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="taxonomy"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
DIR="$PROJECT/data/09.distances/05.taxonomy"
|
10
8
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
9
|
cd "$DIR"
|
data/scripts/trimmed_fasta.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="trimmed_fasta"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/04.trimmed_fasta"
|
10
8
|
|
11
9
|
b=$DATASET
|
data/scripts/trimmed_reads.bash
CHANGED
@@ -2,10 +2,8 @@
|
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="trimmed_reads"
|
5
|
-
echo "MiGA: $MIGA"
|
6
|
-
echo "Project: $PROJECT"
|
7
5
|
# shellcheck source=scripts/miga.bash
|
8
|
-
|
6
|
+
. "$MIGA/scripts/miga.bash" || exit 1
|
9
7
|
cd "$PROJECT/data/02.trimmed_reads"
|
10
8
|
|
11
9
|
b=$DATASET
|
data/test/daemon_test.rb
CHANGED
@@ -40,10 +40,10 @@ class DaemonTest < Test::Unit::TestCase
|
|
40
40
|
out = capture_stdout do
|
41
41
|
d.check_datasets
|
42
42
|
end
|
43
|
-
assert(out.string =~ /Queueing #{ds.name}:
|
43
|
+
assert(out.string =~ /Queueing #{ds.name}:d/)
|
44
44
|
assert_equal(1, d.jobs_to_run.size)
|
45
|
-
assert_equal("project1:
|
46
|
-
assert_equal(d.jobs_to_run.first, d.get_job(:
|
45
|
+
assert_equal("project1:d:ds1", d.jobs_to_run.first[:cmd])
|
46
|
+
assert_equal(d.jobs_to_run.first, d.get_job(:d, ds))
|
47
47
|
end
|
48
48
|
|
49
49
|
def test_in_loop
|
data/utils/distance/runner.rb
CHANGED
@@ -35,7 +35,7 @@ class MiGA::DistanceRunner
|
|
35
35
|
elsif !opts[:run_taxonomy] and dataset.metadata[:db_project]
|
36
36
|
ref_path = dataset.metadata[:db_project]
|
37
37
|
if project.metadata[:db_proj_dir]
|
38
|
-
ref_path = File.expand_path(project.metadata[:db_proj_dir]
|
38
|
+
ref_path = File.expand_path(ref_path, project.metadata[:db_proj_dir])
|
39
39
|
end
|
40
40
|
@ref_project = MiGA::Project.load(ref_path)
|
41
41
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
@@ -117,11 +117,22 @@ library(enveomics.R)
|
|
117
117
|
load('my-recplot.rdata')
|
118
118
|
```
|
119
119
|
|
120
|
-
###
|
120
|
+
### Centrality measures of sequencing depth
|
121
121
|
|
122
122
|
```R
|
123
123
|
mean(enve.recplot2.seqdepth(rp)) # <- Average
|
124
124
|
median(enve.recplot2.seqdepth(rp)) # <- Median
|
125
|
+
enve.truncate(enve.recplot2.seqdepth(rp)) # <- 95% Central Truncated Mean
|
126
|
+
enve.truncate(enve.recplot2.seqdepth(rp), 0.9) # <- 90% Central Truncated Mean
|
127
|
+
```
|
128
|
+
|
129
|
+
The functions above only use hits with identity above the cutoff for "in-group" (by default: 95%).
|
130
|
+
In order to estimate the sequencing depth with a different identity cutoff, modify the cutoff first:
|
131
|
+
|
132
|
+
```R
|
133
|
+
rp98 <- enve.recplot2.changeCutoff(rp, 98) # <- Change to ≥98%
|
134
|
+
mean(enve.recplot2.seqdepth(rp98)) # <- Average (for the new object)
|
135
|
+
median(enve.recplot2.seqdepth(rp98)) # <- Median (for the new object)
|
125
136
|
```
|
126
137
|
|
127
138
|
### Average and median sequencing depth excluding zero-coverage windows
|
@@ -189,7 +200,7 @@ p <- plot(rp, use.peaks=peaks, layout=4) # <- Remove `layout=4` for the full plo
|
|
189
200
|
dev.off()
|
190
201
|
```
|
191
202
|
|
192
|
-
The key function here is `enve.
|
203
|
+
The key function here is `enve.recplot2.findPeaks`. This function has several parameters, depending on
|
193
204
|
the method used. To see all supported methods, use `?enve.recplot2.findPeaks`. To see all the options
|
194
205
|
of the default method (`'emauto'`) use `?enve.recplot2.findPeaks.emauto`.
|
195
206
|
|
@@ -59,8 +59,8 @@ echo "select seq1, seq2, aai, sd, n, omega, (100.0*n/omega) from aai;" \
|
|
59
59
|
echo "[03/03] Generating distance matrix"
|
60
60
|
echo "
|
61
61
|
source('$(dirname $0)/../enveomics.R/R/df2dist.R');
|
62
|
-
a <- read.table('$OUT', sep='\\t',
|
63
|
-
aai.d <- enve.df2dist(a, default.d
|
62
|
+
a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
|
63
|
+
aai.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
|
64
64
|
write.table(as.matrix(aai.d), '$OUT.dist',
|
65
|
-
quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
|
65
|
+
quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
|
66
66
|
" | R --vanilla >/dev/null
|
@@ -59,8 +59,8 @@ echo "select seq1, seq2, ani, sd, n, omega, (100.0*n/omega) from ani;" \
|
|
59
59
|
echo "[03/03] Generating distance matrix"
|
60
60
|
echo "
|
61
61
|
source('$(dirname $0)/../enveomics.R/R/df2dist.R');
|
62
|
-
a <- read.table('$OUT', sep='\\t',
|
63
|
-
ani.d <- enve.df2dist(a, default.d
|
62
|
+
a <- read.table('$OUT', sep = '\\t', header = TRUE, as.is = TRUE, quote = '');
|
63
|
+
ani.d <- enve.df2dist(a, default.d = $DEF_DIST, max.sim = 100);
|
64
64
|
write.table(as.matrix(ani.d), '$OUT.dist',
|
65
|
-
quote=FALSE, col.names=NA, row.names=TRUE, sep='\\t')
|
65
|
+
quote = FALSE, col.names = NA, row.names = TRUE, sep = '\\t')
|
66
66
|
" | R --vanilla >/dev/null
|
data/utils/enveomics/Makefile
CHANGED
@@ -7,7 +7,7 @@ include globals.mk
|
|
7
7
|
|
8
8
|
TEST=Tests
|
9
9
|
enveomics_r=enveomics.R
|
10
|
-
enveomics_r_v=enveomics.
|
10
|
+
enveomics_r_v=enveomics.R_$(shell grep '^Version: ' enveomics.R/DESCRIPTION | perl -pe 's/.*: //')
|
11
11
|
.PHONY: test install install-scripts install-r uninstall install-deps
|
12
12
|
|
13
13
|
test: $(enveomics_r_v).tar.gz
|
@@ -41,7 +41,7 @@ uninstall:
|
|
41
41
|
-$(R) CMD REMOVE $(enveomics_r)
|
42
42
|
|
43
43
|
$(enveomics_r_v).tar.gz: install-deps
|
44
|
-
|
44
|
+
rm -f $(enveomics_r_v).tar.gz
|
45
45
|
./build_enveomics_r.bash
|
46
46
|
$(R) CMD build $(enveomics_r)/
|
47
47
|
$(MAKE) install-r
|
@@ -99,6 +99,7 @@
|
|
99
99
|
"files using <map.bls> as prefix with extensions .rec (for the",
|
100
100
|
"recruitment plot) and .lim (for the limits of the different sequences",
|
101
101
|
"in <seq.fa>)."],
|
102
|
+
"see_also": ["BlastTab.recplot2.R", "GFF.catsbj.pl"],
|
102
103
|
"help_arg": "-h",
|
103
104
|
"options": [
|
104
105
|
{
|
@@ -119,8 +120,8 @@
|
|
119
120
|
"opt": "-s",
|
120
121
|
"name": "Subset",
|
121
122
|
"description": ["The FastA provided is to be treated as a subset of",
|
122
|
-
"the subject. By default, it expects all the subjects to be",
|
123
|
-
"present in the
|
123
|
+
"the subject. By default, it expects all the BLAST subjects to be",
|
124
|
+
"present in the FastA."]
|
124
125
|
},
|
125
126
|
{
|
126
127
|
"opt": "-q",
|
@@ -623,7 +624,8 @@
|
|
623
624
|
{ "r_package": "optparse" },
|
624
625
|
{ "r_package": "enveomics.R" }
|
625
626
|
],
|
626
|
-
"see_also": [ "
|
627
|
+
"see_also": ["BlastTab.catsbj.pl", "GFF.catsbj.pl",
|
628
|
+
"RecPlot2.compareIdentities.R"],
|
627
629
|
"options": [
|
628
630
|
{
|
629
631
|
"opt": "--prefix",
|
@@ -637,7 +639,13 @@
|
|
637
639
|
"opt": "--pos-breaks",
|
638
640
|
"arg": "integer",
|
639
641
|
"default": 1000,
|
640
|
-
"description":
|
642
|
+
"description": "Breaks in the positions histogram."
|
643
|
+
},
|
644
|
+
{
|
645
|
+
"opt": "--pos-breaks-tsv",
|
646
|
+
"arg": "in_file",
|
647
|
+
"description": ["File with (absolute) coordinates of breaks in the",
|
648
|
+
"position histogram."]
|
641
649
|
},
|
642
650
|
{
|
643
651
|
"opt": "--id-breaks",
|
@@ -216,6 +216,50 @@
|
|
216
216
|
}
|
217
217
|
]
|
218
218
|
},
|
219
|
+
{
|
220
|
+
"task": "FastA.mask.rb",
|
221
|
+
"description": "Mask sequence region(s) in a FastA file.",
|
222
|
+
"help_arg": "--help",
|
223
|
+
"options": [
|
224
|
+
{
|
225
|
+
"opt": "--in",
|
226
|
+
"arg": "in_file",
|
227
|
+
"mandatory": true,
|
228
|
+
"description": "Input FastA file."
|
229
|
+
},
|
230
|
+
{
|
231
|
+
"opt": "--out",
|
232
|
+
"arg": "out_file",
|
233
|
+
"mandatory": true,
|
234
|
+
"description": "Output FastA file."
|
235
|
+
},
|
236
|
+
{
|
237
|
+
"opt": "--regions",
|
238
|
+
"arg": "string",
|
239
|
+
"mandatory": true,
|
240
|
+
"description": ["Regions to mask separated by commas.",
|
241
|
+
"Each region must be in the format \"sequence_id:from..to\"."]
|
242
|
+
},
|
243
|
+
{
|
244
|
+
"opt": "--symbol",
|
245
|
+
"arg": "string",
|
246
|
+
"default": "N",
|
247
|
+
"description": "Character used to mask the region(s)."
|
248
|
+
},
|
249
|
+
{
|
250
|
+
"opt": "--trim",
|
251
|
+
"description": ["Trim masked regions extending to the edge of a",
|
252
|
+
"sequence."]
|
253
|
+
},
|
254
|
+
{
|
255
|
+
"opt": "--wrap",
|
256
|
+
"arg": "integer",
|
257
|
+
"default": 70,
|
258
|
+
"description": ["Line length to wrap sequences. Use 0 to generate",
|
259
|
+
"1-line sequences."]
|
260
|
+
}
|
261
|
+
]
|
262
|
+
},
|
219
263
|
{
|
220
264
|
"task": "FastA.qlen.pl",
|
221
265
|
"description": ["Calculates the quartiles of the length in a set of",
|
@@ -298,6 +342,49 @@
|
|
298
342
|
}
|
299
343
|
]
|
300
344
|
},
|
345
|
+
{
|
346
|
+
"task": "FastA.sample.rb",
|
347
|
+
"description": ["Samples a random set of sequences from a multi-FastA",
|
348
|
+
"file."],
|
349
|
+
"help_arg": "--help",
|
350
|
+
"see_also": "FastA.subsample.pl",
|
351
|
+
"options": [
|
352
|
+
{
|
353
|
+
"name": "Input File",
|
354
|
+
"opt": "--in",
|
355
|
+
"arg": "in_file",
|
356
|
+
"mandatory": true,
|
357
|
+
"description": "Input FastA file."
|
358
|
+
},
|
359
|
+
{
|
360
|
+
"name": "Output file",
|
361
|
+
"opt": "--out",
|
362
|
+
"arg": "out_file",
|
363
|
+
"mandatory": true,
|
364
|
+
"description": "Output FastA file."
|
365
|
+
},
|
366
|
+
{
|
367
|
+
"opt": "--fraction",
|
368
|
+
"arg": "float",
|
369
|
+
"description": ["Fraction of sequences to sample [0-1].",
|
370
|
+
"Mandatory unless Number is provided."]
|
371
|
+
},
|
372
|
+
{
|
373
|
+
"opt": "--number",
|
374
|
+
"arg": "integer",
|
375
|
+
"description": ["Number of sequences to sample.",
|
376
|
+
"Mandatory unless -f is provided."]
|
377
|
+
},
|
378
|
+
{
|
379
|
+
"opt": "--replacement",
|
380
|
+
"description": "Sample with replacement."
|
381
|
+
},
|
382
|
+
{
|
383
|
+
"opt": "--quiet",
|
384
|
+
"description": "Run quietly (no STDERR output)."
|
385
|
+
}
|
386
|
+
]
|
387
|
+
},
|
301
388
|
{
|
302
389
|
"task": "FastA.slider.pl",
|
303
390
|
"description": "Slices sequences in fixed- or variable-length windows.",
|
@@ -432,6 +519,7 @@
|
|
432
519
|
"task": "FastA.subsample.pl",
|
433
520
|
"description": "Subsamples a set of sequences.",
|
434
521
|
"help_arg": "-h",
|
522
|
+
"see_also": "FastA.sample.rb",
|
435
523
|
"options": [
|
436
524
|
{
|
437
525
|
"name": "Fraction",
|
@@ -548,6 +636,53 @@
|
|
548
636
|
}
|
549
637
|
]
|
550
638
|
},
|
639
|
+
{
|
640
|
+
"task": "FastA.extract.rb",
|
641
|
+
"description": ["Extracts a list of sequences and/or coordinates from",
|
642
|
+
"multi-FastA files."],
|
643
|
+
"help_arg": "--help",
|
644
|
+
"options": [
|
645
|
+
{
|
646
|
+
"name": "Input file",
|
647
|
+
"opt": "--in",
|
648
|
+
"arg": "in_file",
|
649
|
+
"mandatory": true,
|
650
|
+
"description": "Input FastA file."
|
651
|
+
},
|
652
|
+
{
|
653
|
+
"name": "Output file",
|
654
|
+
"opt": "--out",
|
655
|
+
"arg": "out_file",
|
656
|
+
"mandatory": true,
|
657
|
+
"description": "Output FastA file."
|
658
|
+
},
|
659
|
+
{
|
660
|
+
"name": "Coordinates",
|
661
|
+
"opt": "--coords",
|
662
|
+
"arg": "string",
|
663
|
+
"description": ["Comma-delimited list of coordinates (mandatory",
|
664
|
+
"unless -C is passed).",
|
665
|
+
"The format of the coordinates is SEQ:FROM..TO or SEQ:FROM~LEN:",
|
666
|
+
"SEQ: Sequence ID, or * (asterisk) to extract range from all",
|
667
|
+
"sequences",
|
668
|
+
"FROM: Integer, position of the first base to include (can be",
|
669
|
+
"negative)",
|
670
|
+
"TO: Integer, last base to include (can be negative)",
|
671
|
+
"LEN: Length of the range to extract."]
|
672
|
+
},
|
673
|
+
{
|
674
|
+
"name": "Coordinates file",
|
675
|
+
"opt": "--coords-file",
|
676
|
+
"arg": "in_file",
|
677
|
+
"description": ["File containing the coordinates, one per line.",
|
678
|
+
"Each line must follow the format described for Coordinates."]
|
679
|
+
},
|
680
|
+
{
|
681
|
+
"opt": "--quiet",
|
682
|
+
"description": "Run quietly (no STDERR output)."
|
683
|
+
}
|
684
|
+
]
|
685
|
+
},
|
551
686
|
{
|
552
687
|
"task": "FastA.fragment.rb",
|
553
688
|
"description": ["Simulates incomplete (fragmented) drafts from complete",
|
@@ -743,6 +743,55 @@
|
|
743
743
|
"description": "Verbously display warnings."
|
744
744
|
}
|
745
745
|
]
|
746
|
+
},
|
747
|
+
{
|
748
|
+
"task": "GFF.catsbj.pl",
|
749
|
+
"description": ["Generates a list of coordinates from a GFF table",
|
750
|
+
"concatenating the subject sequences."],
|
751
|
+
"help_arg": "-h",
|
752
|
+
"see_also": ["BlastTab.recplot2.R", "BlastTab.catsbj.pl"],
|
753
|
+
"options": [
|
754
|
+
{
|
755
|
+
"name": "Lim file",
|
756
|
+
"opt": "-L",
|
757
|
+
"arg": "out_file",
|
758
|
+
"description": ["An output file with the absolute coordinates of the",
|
759
|
+
"concatenated contigs. This is identical to the .lim file",
|
760
|
+
"generated by BlastTab.catsbj.pl."]
|
761
|
+
},
|
762
|
+
{
|
763
|
+
"name": "Inter-feature gaps",
|
764
|
+
"opt": "-i",
|
765
|
+
"description": ["Preserve exact coordinates and include",
|
766
|
+
"inter-feature windows as separate bins. By default, the",
|
767
|
+
"coordinates are set in the midpoint between features when",
|
768
|
+
"non-contiguous."]
|
769
|
+
},
|
770
|
+
{
|
771
|
+
"name": "Subset",
|
772
|
+
"opt": "-s",
|
773
|
+
"description": ["The FastA provided is to be treated as a subset of",
|
774
|
+
"the subject. By default, it expects all the contigs to be present",
|
775
|
+
"in the BLAST."]
|
776
|
+
},
|
777
|
+
{
|
778
|
+
"name": "Quiet",
|
779
|
+
"opt": "-q",
|
780
|
+
"description": "Run quietly."
|
781
|
+
},
|
782
|
+
{
|
783
|
+
"name": "Subject sequences",
|
784
|
+
"arg": "in_file",
|
785
|
+
"mandatory": true,
|
786
|
+
"description": "Subject sequences (contigs) in FastA format."
|
787
|
+
},
|
788
|
+
{
|
789
|
+
"name": "Features",
|
790
|
+
"arg": "in_file",
|
791
|
+
"mandatory": true,
|
792
|
+
"description": "Features to map in GFF."
|
793
|
+
}
|
794
|
+
]
|
746
795
|
}
|
747
796
|
]
|
748
797
|
}
|