miga-base 0.2.6.4 → 0.2.6.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/list_datasets.rb +6 -1
- data/actions/run_local.rb +1 -1
- data/actions/tax_distributions.rb +4 -4
- data/lib/miga/common.rb +18 -0
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset_result.rb +46 -47
- data/lib/miga/remote_dataset.rb +52 -32
- data/lib/miga/tax_dist.rb +2 -2
- data/lib/miga/tax_index.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +17 -8
- data/scripts/_distances_noref_nomulti.bash +26 -7
- data/scripts/aai_distances.bash +3 -2
- data/scripts/ani_distances.bash +3 -2
- data/scripts/assembly.bash +24 -24
- data/scripts/cds.bash +22 -30
- data/scripts/clade_finding.bash +5 -4
- data/scripts/distances.bash +13 -9
- data/scripts/essential_genes.bash +12 -11
- data/scripts/haai_distances.bash +3 -2
- data/scripts/init.bash +100 -108
- data/scripts/miga.bash +4 -2
- data/scripts/mytaxa.bash +72 -71
- data/scripts/mytaxa_scan.bash +62 -61
- data/scripts/ogs.bash +14 -13
- data/scripts/project_stats.bash +1 -0
- data/scripts/read_quality.bash +12 -16
- data/scripts/ssu.bash +18 -18
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +7 -6
- data/scripts/trimmed_fasta.bash +22 -21
- data/scripts/trimmed_reads.bash +34 -32
- data/utils/index_metadata.rb +4 -4
- data/utils/ref-tree.R +65 -0
- data/utils/requirements.txt +1 -1
- metadata +57 -56
data/scripts/assembly.bash
CHANGED
@@ -1,53 +1,53 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="assembly"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/05.assembly"
|
9
10
|
|
10
|
-
b=$DATASET
|
11
|
-
|
12
11
|
# Initialize
|
13
|
-
date
|
12
|
+
miga date > "$DATASET.start"
|
14
13
|
|
15
14
|
# Interpose (if needed)
|
16
15
|
TF="../04.trimmed_fasta"
|
17
|
-
if [[ -s $TF/$DATASET.1.fasta \
|
18
|
-
&& -s $TF/$DATASET.2.fasta \
|
19
|
-
&& ! -s $TF/$DATASET.CoupledReads.fa ]] ; then
|
20
|
-
FastA.interpose.pl $TF/$DATASET.CoupledReads.fa $TF/$DATASET.[12].fasta
|
21
|
-
gzip -9 -f $TF/$DATASET.1.fasta
|
22
|
-
gzip -9 -f $TF/$DATASET.2.fasta
|
16
|
+
if [[ -s "$TF/$DATASET.1.fasta" \
|
17
|
+
&& -s "$TF/$DATASET.2.fasta" \
|
18
|
+
&& ! -s "$TF/$DATASET.CoupledReads.fa" ]] ; then
|
19
|
+
FastA.interpose.pl "$TF/$DATASET.CoupledReads.fa" "$TF/$DATASET".[12].fasta
|
20
|
+
gzip -9 -f "$TF/$DATASET.1.fasta"
|
21
|
+
gzip -9 -f "$TF/$DATASET.2.fasta"
|
23
22
|
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
|
24
23
|
fi
|
25
24
|
|
26
25
|
# Assemble
|
27
26
|
FA="$TF/$DATASET.CoupledReads.fa"
|
28
|
-
[[ -e $FA ]] || FA="$FA.gz"
|
29
|
-
[[ -e $FA ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
|
30
|
-
[[ -e $FA ]] || FA="$FA.gz"
|
27
|
+
[[ -e "$FA" ]] || FA="$FA.gz"
|
28
|
+
[[ -e "$FA" ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
|
29
|
+
[[ -e "$FA" ]] || FA="$FA.gz"
|
31
30
|
RD="r"
|
32
31
|
[[ $FA == *.SingleReads.fa* ]] && RD="l"
|
33
32
|
idba_ud --pre_correction -$RD "$FA" -o "$DATASET" --num_threads "$CORES" || true
|
34
|
-
[[ -s $DATASET/contig.fa ]] || exit 1
|
33
|
+
[[ -s "$DATASET/contig.fa" ]] || exit 1
|
35
34
|
|
36
35
|
# Clean
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
(
|
37
|
+
cd "$DATASET"
|
38
|
+
rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa
|
39
|
+
)
|
40
40
|
|
41
41
|
# Extract
|
42
|
-
if [[ -s $DATASET/scaffold.fa ]] ; then
|
43
|
-
|
42
|
+
if [[ -s "$DATASET/scaffold.fa" ]] ; then
|
43
|
+
ln -s "$DATASET/scaffold.fa" "$DATASET.AllContigs.fna"
|
44
44
|
else
|
45
|
-
|
45
|
+
ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
|
46
46
|
fi
|
47
|
-
FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
|
48
|
-
|
49
|
-
|
47
|
+
FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
48
|
+
| FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
|
49
|
+
> "$DATASET.LargeContigs.fna"
|
50
50
|
|
51
51
|
# Finalize
|
52
|
-
date
|
52
|
+
miga date > "$DATASET.done"
|
53
53
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/cds.bash
CHANGED
@@ -1,46 +1,38 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="cds"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/06.cds"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
-
GM=$(dirname -- $(which gmhmmp))
|
12
|
+
miga date > "$DATASET.start"
|
13
13
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
cp "$GM/.gm_key" ".gm_key"
|
24
|
-
elif [[ -e "$HOME/.gm_key" ]] ; then
|
25
|
-
cp "$HOME/.gm_key" .
|
26
|
-
else
|
27
|
-
echo "Impossible to find MetaGeneMark key, please register your copy" \
|
28
|
-
"and place the key in '$GM/gm_key'." >&2
|
29
|
-
exit 1
|
30
|
-
fi
|
31
|
-
fi
|
14
|
+
# Run Prodigal
|
15
|
+
TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
|
16
|
+
--metadata "type" | awk '{print $2}')
|
17
|
+
case "$TYPE" in
|
18
|
+
metagenome|virome) PROCEDURE=meta ;;
|
19
|
+
*) PROCEDURE=single ;;
|
20
|
+
esac
|
21
|
+
prodigal -a "$DATASET.faa" -d "$DATASET.fna" -f gff -o "$DATASET.gff3" \
|
22
|
+
-p $PROCEDURE -q -i "../05.assembly/$DATASET.LargeContigs.fna"
|
32
23
|
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
perl
|
39
|
-
|
24
|
+
# Clean Prodigal noisy deflines
|
25
|
+
for i in faa fna ; do
|
26
|
+
perl -pe 's/>.*ID=([^;]+);.*/>gene_$1/' "$DATASET.$i" > "$DATASET.$i.t"
|
27
|
+
mv "$DATASET.$i.t" "$DATASET.$i"
|
28
|
+
done
|
29
|
+
perl -pe 's/ID=([0-9]+_[0-9]+);/ID=gene_$1;/' "$DATASET.gff3" \
|
30
|
+
> "$DATASET.gff3.t"
|
31
|
+
mv "$DATASET.gff3.t" "$DATASET.gff3"
|
40
32
|
|
41
33
|
# Gzip
|
42
|
-
gzip -9 -f "$DATASET.
|
34
|
+
gzip -9 -f "$DATASET.gff3"
|
43
35
|
|
44
36
|
# Finalize
|
45
|
-
date
|
37
|
+
miga date > "$DATASET.done"
|
46
38
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/clade_finding.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="clade_finding"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/10.clades/01.find"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
# Markov-cluster genomes by ANI
|
14
15
|
gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
|
@@ -30,9 +31,9 @@ cat miga-project.ani95-clades | tail -n +2 | tr "," "\\t" | awk 'NF >= 5' \
|
|
30
31
|
|
31
32
|
# Run R code (except in projects type clade)
|
32
33
|
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
33
|
-
$MIGA/utils/subclades.R \
|
34
|
+
"$MIGA/utils/subclades.R" \
|
34
35
|
../../09.distances/02.aai/miga-project.txt.gz \
|
35
|
-
miga-project $CORES
|
36
|
+
miga-project "$CORES"
|
36
37
|
mv miga-project.nwk miga-project.aai.nwk
|
37
38
|
|
38
39
|
# Compile
|
@@ -42,5 +43,5 @@ if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
|
42
43
|
fi
|
43
44
|
|
44
45
|
# Finalize
|
45
|
-
date
|
46
|
+
miga date > "miga-project.done"
|
46
47
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/distances.bash
CHANGED
@@ -1,32 +1,36 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "$DATASET.start"
|
12
13
|
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
13
|
-
trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
|
14
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
14
15
|
|
15
16
|
# Check type of dataset
|
16
17
|
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
17
|
-
|
18
|
+
| wc -l | awk '{print $1}')
|
18
19
|
REF=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --ref \
|
19
|
-
|
20
|
+
| wc -l | awk '{print $1}')
|
20
21
|
|
21
22
|
# Call submodules
|
23
|
+
# shellcheck source=scripts/_distances_functions.bash
|
22
24
|
source "$MIGA/scripts/_distances_functions.bash"
|
23
25
|
if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
|
24
|
-
|
26
|
+
# shellcheck source=scripts/_distances_ref_nomulti.bash
|
27
|
+
source "$MIGA/scripts/_distances_ref_nomulti.bash"
|
25
28
|
elif [[ "$NOMULTI" -eq "1" ]] ; then
|
26
|
-
|
29
|
+
# shellcheck source=scripts/_distances_noref_nomulti.bash
|
30
|
+
source "$MIGA/scripts/_distances_noref_nomulti.bash"
|
27
31
|
fi
|
28
32
|
|
29
33
|
# Finalize
|
30
|
-
rm -R $TMPDIR
|
31
|
-
date
|
34
|
+
rm -R "$TMPDIR"
|
35
|
+
miga date > "$DATASET.done"
|
32
36
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
@@ -1,21 +1,22 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="essential_genes"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/07.annotation/01.function/01.essential"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "$DATASET.start"
|
12
13
|
FAA="../../../06.cds/$DATASET.faa"
|
13
14
|
|
14
15
|
# Check if there are any proteins
|
15
16
|
if [[ ! -s $FAA ]] ; then
|
16
17
|
echo Empty protein set, bypassing essential genes
|
17
18
|
rm "$DATASET.start"
|
18
|
-
miga create_dataset -P "$PROJECT" -D $DATASET \
|
19
|
+
miga create_dataset -P "$PROJECT" -D "$DATASET" \
|
19
20
|
-m run_essential_genes=false --update
|
20
21
|
exit 0
|
21
22
|
fi
|
@@ -24,17 +25,17 @@ fi
|
|
24
25
|
[[ -d "$DATASET.ess" ]] && rm -R "$DATASET.ess"
|
25
26
|
mkdir "$DATASET.ess"
|
26
27
|
TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
|
27
|
-
|
28
|
+
--metadata "type" | awk '{print $2}')
|
28
29
|
if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
|
31
|
+
-m "$DATASET.ess/" -t "$CORES" -r "$DATASET" --metagenome \
|
32
|
+
> "$DATASET.ess/log"
|
32
33
|
else
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
|
35
|
+
-m "$DATASET.ess/" -t "$CORES" -r "$DATASET" \
|
36
|
+
> "$DATASET.ess/log"
|
36
37
|
fi
|
37
38
|
|
38
39
|
# Finalize
|
39
|
-
date
|
40
|
+
miga date > "$DATASET.done"
|
40
41
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/haai_distances.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="haai_distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances/01.haai"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -39,5 +40,5 @@ if(sum(haai[,'a'] != haai[,'b']) > 0){
|
|
39
40
|
gzip -9 -f miga-project.txt
|
40
41
|
|
41
42
|
# Finalize
|
42
|
-
date
|
43
|
+
miga date > "miga-project.done"
|
43
44
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/init.bash
CHANGED
@@ -3,39 +3,40 @@ set -e
|
|
3
3
|
|
4
4
|
#=======[ Functions ]
|
5
5
|
function ask_user {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
6
|
+
local question=$1
|
7
|
+
local default=$2
|
8
|
+
echo "$question" >&2
|
9
|
+
echo -n " [$default] > " >&2
|
10
|
+
read -r user_answer
|
11
|
+
user_answer=${user_answer:-$default}
|
12
|
+
echo -n "$user_answer"
|
13
13
|
}
|
14
14
|
|
15
15
|
function check_req {
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
16
|
+
local bin=$1
|
17
|
+
local default
|
18
|
+
default=$(dirname "$(which "$bin")")
|
19
|
+
user_answer=$(ask_user "Where can I find $2 ($3)? $4" "$default")
|
20
|
+
if [[ -x "$user_answer/$bin" ]] ; then
|
21
|
+
export PATH="$PATH:$user_answer"
|
22
|
+
echo "MIGA_PATH=\"$user_answer:\$MIGA_PATH\" # $2" >> "$HOME/.miga_rc"
|
23
|
+
else
|
24
|
+
echo "Cannot find $2 at '$user_answer/$bin'. Aborting..." >&2
|
25
|
+
exit 1
|
26
|
+
fi
|
26
27
|
}
|
27
28
|
|
28
29
|
function check_rlib {
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
local rlib=$1
|
31
|
+
gotit=$(echo "if(require($rlib)) cat('GOT','IT')" | R --vanilla -q 2>&1 \
|
32
|
+
| grep -c "GOT IT")
|
33
|
+
[[ "$gotit" == "1" ]]
|
33
34
|
}
|
34
35
|
|
35
36
|
function check_gem {
|
36
|
-
|
37
|
-
|
38
|
-
|
37
|
+
local gem=$1
|
38
|
+
gotit=$(echo "require '$gem'" | ruby 2>/dev/null && echo 1)
|
39
|
+
[[ "$gotit" == "1" ]]
|
39
40
|
}
|
40
41
|
|
41
42
|
#=======[ Main ]
|
@@ -49,31 +50,32 @@ make sure you have all the requirements for MiGA Daemons.
|
|
49
50
|
" >&2
|
50
51
|
|
51
52
|
if [[ "$(ask_user "Would you like to see all the requirements before starting? (yes / no)" "no")" == "yes" ]] ; then
|
52
|
-
|
53
|
-
|
54
|
-
|
53
|
+
echo "" >&2
|
54
|
+
cat "$MIGA/utils/requirements.txt" >&2
|
55
|
+
echo "" >&2
|
55
56
|
fi
|
56
57
|
|
57
58
|
if [[ -e "$HOME/.miga_rc" ]] ; then
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
59
|
+
case "$(ask_user "I found a previous configuration. Do you want to load the defaults within? (yes / no / cancel)" "yes")" in
|
60
|
+
yes)
|
61
|
+
# shellcheck source=/dev/null
|
62
|
+
source "$HOME/.miga_rc"
|
63
|
+
if [[ "$MIGA_CONFIG_DATE" == "" ]] ; then
|
64
|
+
echo " Loaded incomplete configuration" >&2
|
65
|
+
else
|
66
|
+
echo " Loaded configuration from $MIGA_CONFIG_DATE" >&2
|
67
|
+
fi
|
68
|
+
;;
|
69
|
+
no)
|
70
|
+
rm "$HOME/.miga_rc"
|
71
|
+
;;
|
72
|
+
cancel)
|
73
|
+
exit 0
|
74
|
+
;;
|
75
|
+
*)
|
76
|
+
echo "Cannot understand your answer, please use 'yes', 'no', or 'cancel'. Aborting..." >&2
|
77
|
+
exit 1
|
78
|
+
esac
|
77
79
|
fi
|
78
80
|
|
79
81
|
echo "#!/bin/bash
|
@@ -83,10 +85,11 @@ echo "#!/bin/bash
|
|
83
85
|
# Check Software requirements
|
84
86
|
MIGA_STARTUP=$(ask_user "Is there a script I need to load at startup? (no / path to the script to load)" "$MIGA_STARTUP")
|
85
87
|
if [[ "$MIGA_STARTUP" != "no" ]] ; then
|
86
|
-
|
88
|
+
echo "MIGA_STARTUP='$MIGA_STARTUP'
|
87
89
|
source \"\$MIGA_STARTUP\"
|
88
90
|
" >> "$HOME/.miga_rc";
|
89
|
-
|
91
|
+
# shellcheck source=/dev/null
|
92
|
+
source "$MIGA_STARTUP";
|
90
93
|
fi
|
91
94
|
echo "
|
92
95
|
Looking for Software requirements:" >&2
|
@@ -94,11 +97,11 @@ reqs=$(tail -n+3 "$MIGA/utils/requirements.txt" | perl -pe 's/\t+/\t/g')
|
|
94
97
|
IFS_BU=$IFS
|
95
98
|
IFS=$'\n'
|
96
99
|
for ln in $reqs ; do
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
100
|
+
rname=$(echo "$ln" | awk -F'\t' '{print $1}')
|
101
|
+
rtest=$(echo "$ln" | awk -F'\t' '{print $2}')
|
102
|
+
rwebs=$(echo "$ln" | awk -F'\t' '{print $3}')
|
103
|
+
rhint=$(echo "$ln" | awk -F'\t' '{print $4}')
|
104
|
+
check_req "$rtest" "$rname" "$rwebs" "$rhint"
|
102
105
|
done
|
103
106
|
IFS=$IFS_BU
|
104
107
|
echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
|
@@ -108,11 +111,11 @@ echo "
|
|
108
111
|
Looking for R packages:" >&2
|
109
112
|
RLIBS="enveomics.R ape phangorn phytools ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
|
110
113
|
for lib in $RLIBS ; do
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
114
|
+
if ! check_rlib "$lib" ; then
|
115
|
+
echo "+ Installing $lib" >&2
|
116
|
+
echo "install.packages('$lib', repos='http://cran.rstudio.com/')" \
|
117
|
+
| R --vanilla -q
|
118
|
+
fi
|
116
119
|
done
|
117
120
|
|
118
121
|
# Check for ruby gems
|
@@ -120,36 +123,25 @@ echo "
|
|
120
123
|
Looking for Ruby gems:" >&2
|
121
124
|
GEMS="rest-client sqlite3 daemons json"
|
122
125
|
for gem in $GEMS ; do
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
126
|
+
if ! check_gem "$gem" ; then
|
127
|
+
echo "+ Installing $gem (user-only)" >&2
|
128
|
+
gem install --user "$gem"
|
129
|
+
fi
|
127
130
|
done
|
128
131
|
|
129
132
|
# Check for other files
|
130
133
|
echo "
|
131
134
|
Looking for additional files:
|
132
|
-
+
|
133
|
-
|
134
|
-
if [[ ! -e "$GM/gm_key" && ! -e "$GM/gm_key_64" && ! -e "$GM/gm_key_32" && ! -e "$GM/.gm_key" && ! -e "$HOME/.gm_key" ]] ; then
|
135
|
-
echo "Cannot find it, please place your license key in '$GM/gm_key'. Aborting..." >&2
|
136
|
-
exit 1
|
137
|
-
fi
|
138
|
-
echo "+ MetaGeneMark scripts" >&2
|
139
|
-
if [[ ! -e "$GM/aa_from_gff.pl" || ! -e "$GM/nt_from_gff.pl" ]] ; then
|
140
|
-
echo "Cannot find it, please place aa_from_gff.pl and nt_from_gff.pl in '$GM/'. Aborting..." >&2
|
141
|
-
exit 1
|
142
|
-
fi
|
143
|
-
echo "+ MyTaxa scores database" >&2
|
144
|
-
MT=$(dirname -- $(which MyTaxa))
|
135
|
+
+ MyTaxa scores database" >&2
|
136
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
145
137
|
if [[ ! -d "$MT/db" ]] ; then
|
146
|
-
|
147
|
-
|
138
|
+
echo "Cannot find it, please execute 'python $MT/utils/download_db.py'. Aborting..." >&2
|
139
|
+
exit 1
|
148
140
|
fi
|
149
141
|
echo "+ MyTaxa DIAMOND database" >&2
|
150
142
|
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
151
|
-
|
152
|
-
|
143
|
+
echo "Cannot find it, please download 'http://enve-omics.ce.gatech.edu/data/public_mytaxa/AllGenomes.faa.dmnd' into '$MT'. Aborting..." >&2
|
144
|
+
exit 1
|
153
145
|
fi
|
154
146
|
|
155
147
|
# Configure daemon
|
@@ -157,34 +149,34 @@ echo "
|
|
157
149
|
Default daemon configuration:" >&2
|
158
150
|
dtype=$(ask_user "Please select the type of daemon you want to setup (bash / qsub / msub)", "bash")
|
159
151
|
case "$dtype" in
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
152
|
+
bash)
|
153
|
+
dlatency=$(ask_user "For how long should I sleep? (# in seconds)" "30")
|
154
|
+
dmaxjobs=$(ask_user "How many jobs can I launch at once?" "6")
|
155
|
+
dppn=$(ask_user "How many CPUs can I use per job?" "2")
|
156
|
+
echo "Setting up internal daemon defaults, if you don't understand this just leave default values:" >&2
|
157
|
+
dcmd=$(ask_user "How should I launch tasks? Use %1\$s for script path, %2\$s for variables, %3\$d for CPUs, %4\$s for log file, and %5\$s for task name." "%2\$s '%1\$s' > '%4\$s' 2>&1")
|
158
|
+
dvar=$(ask_user "How should I pass variables? Use %1\$s for keys and %2\$s for values." "%1\$s=%2\$s")
|
159
|
+
dsep=$(ask_user "What should I use to separate variables?" " ")
|
160
|
+
dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for PID, output should be 1 for running and 0 for non-running." "ps -p '%1\$s'|tail -n+2|wc -l|awk '{print \$1}'")
|
161
|
+
;;
|
162
|
+
[qm]sub)
|
163
|
+
dqueue=$(ask_user "What's the name of the queue I should use?" "")
|
164
|
+
dlatency=$(ask_user "How long should I sleep? (# in seconds)" "150")
|
165
|
+
dmaxjobs=$(ask_user "How many jobs can I launch at once?" "300")
|
166
|
+
dppn=$(ask_user "How many CPUs can I use per job?" "4")
|
167
|
+
echo "Setting up internal daemon defaults, if you don't understand this just leave default values:" >&2
|
168
|
+
dcmd=$(ask_user "How should I launch tasks? Use %1\$s for script path, %2\$s for variables, and %3\$d for CPUs, %4\$d for log file, and %5\$s for task name." \
|
169
|
+
"$dtype -q '$dqueue' -v '%2\$s' -l nodes=1:ppn=%3\$d %1\$s -j oe -o '%4\$s' -N '%5\$s' | grep .")
|
170
|
+
dvar=$(ask_user "How should I pass variables? Use %1\$s for keys and %2\$s for values." "%1\$s=%2\$s")
|
171
|
+
dsep=$(ask_user "What should I use to separate variables?" ",")
|
172
|
+
if [[ "$dtype" == "qsub" ]] ; then
|
173
|
+
dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for job id, output should be 1 for running and 0 for non-running." \
|
174
|
+
"qstat -f '%1\$s'|grep ' job_state ='|perl -pe 's/.*= //'|grep '[^C]'|tail -n1|wc -l|awk '{print \$1}'")
|
175
|
+
else
|
176
|
+
dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for job id, output should be 1 for running and 0 for non-running." \
|
177
|
+
"checkjob '%1\$s'|grep '^State:'|perl -pe 's/.*: //'|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'|tail -n1|wc -l|awk '{print \$1}'")
|
178
|
+
fi
|
179
|
+
;;
|
188
180
|
*)
|
189
181
|
esac
|
190
182
|
echo "{
|
@@ -198,7 +190,7 @@ echo "{
|
|
198
190
|
\"latency\": $dlatency,
|
199
191
|
\"maxjobs\": $dmaxjobs,
|
200
192
|
\"ppn\" : $dppn
|
201
|
-
}" > $HOME/.miga_daemon.json
|
193
|
+
}" > "$HOME/.miga_daemon.json"
|
202
194
|
|
203
195
|
# Confirm configuration
|
204
196
|
echo "
|
data/scripts/miga.bash
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
set -e
|
3
3
|
#MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
|
4
|
+
# shellcheck source=/dev/null
|
4
5
|
source "$HOME/.miga_rc"
|
5
6
|
export PATH="$MIGA/bin:$PATH"
|
6
|
-
SCRIPT=${SCRIPT:-$(basename $0 .bash)}
|
7
|
+
SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
|
7
8
|
|
8
9
|
function exists { [[ -e "$1" ]] ; }
|
9
|
-
function fx_exists { [[ $(type -t $1) == "function" ]] ; }
|
10
|
+
function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
|
10
11
|
|
11
12
|
for i in $(miga plugins -P "$PROJECT") ; do
|
13
|
+
# shellcheck source=/dev/null
|
12
14
|
source "$i/scripts-plugin.bash"
|
13
15
|
done
|
14
16
|
|