miga-base 0.2.6.4 → 0.2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/list_datasets.rb +6 -1
- data/actions/run_local.rb +1 -1
- data/actions/tax_distributions.rb +4 -4
- data/lib/miga/common.rb +18 -0
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset_result.rb +46 -47
- data/lib/miga/remote_dataset.rb +52 -32
- data/lib/miga/tax_dist.rb +2 -2
- data/lib/miga/tax_index.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +17 -8
- data/scripts/_distances_noref_nomulti.bash +26 -7
- data/scripts/aai_distances.bash +3 -2
- data/scripts/ani_distances.bash +3 -2
- data/scripts/assembly.bash +24 -24
- data/scripts/cds.bash +22 -30
- data/scripts/clade_finding.bash +5 -4
- data/scripts/distances.bash +13 -9
- data/scripts/essential_genes.bash +12 -11
- data/scripts/haai_distances.bash +3 -2
- data/scripts/init.bash +100 -108
- data/scripts/miga.bash +4 -2
- data/scripts/mytaxa.bash +72 -71
- data/scripts/mytaxa_scan.bash +62 -61
- data/scripts/ogs.bash +14 -13
- data/scripts/project_stats.bash +1 -0
- data/scripts/read_quality.bash +12 -16
- data/scripts/ssu.bash +18 -18
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +7 -6
- data/scripts/trimmed_fasta.bash +22 -21
- data/scripts/trimmed_reads.bash +34 -32
- data/utils/index_metadata.rb +4 -4
- data/utils/ref-tree.R +65 -0
- data/utils/requirements.txt +1 -1
- metadata +57 -56
data/scripts/assembly.bash
CHANGED
@@ -1,53 +1,53 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="assembly"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/05.assembly"
|
9
10
|
|
10
|
-
b=$DATASET
|
11
|
-
|
12
11
|
# Initialize
|
13
|
-
date
|
12
|
+
miga date > "$DATASET.start"
|
14
13
|
|
15
14
|
# Interpose (if needed)
|
16
15
|
TF="../04.trimmed_fasta"
|
17
|
-
if [[ -s $TF/$DATASET.1.fasta \
|
18
|
-
&& -s $TF/$DATASET.2.fasta \
|
19
|
-
&& ! -s $TF/$DATASET.CoupledReads.fa ]] ; then
|
20
|
-
FastA.interpose.pl $TF/$DATASET.CoupledReads.fa $TF/$DATASET.[12].fasta
|
21
|
-
gzip -9 -f $TF/$DATASET.1.fasta
|
22
|
-
gzip -9 -f $TF/$DATASET.2.fasta
|
16
|
+
if [[ -s "$TF/$DATASET.1.fasta" \
|
17
|
+
&& -s "$TF/$DATASET.2.fasta" \
|
18
|
+
&& ! -s "$TF/$DATASET.CoupledReads.fa" ]] ; then
|
19
|
+
FastA.interpose.pl "$TF/$DATASET.CoupledReads.fa" "$TF/$DATASET".[12].fasta
|
20
|
+
gzip -9 -f "$TF/$DATASET.1.fasta"
|
21
|
+
gzip -9 -f "$TF/$DATASET.2.fasta"
|
23
22
|
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
|
24
23
|
fi
|
25
24
|
|
26
25
|
# Assemble
|
27
26
|
FA="$TF/$DATASET.CoupledReads.fa"
|
28
|
-
[[ -e $FA ]] || FA="$FA.gz"
|
29
|
-
[[ -e $FA ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
|
30
|
-
[[ -e $FA ]] || FA="$FA.gz"
|
27
|
+
[[ -e "$FA" ]] || FA="$FA.gz"
|
28
|
+
[[ -e "$FA" ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
|
29
|
+
[[ -e "$FA" ]] || FA="$FA.gz"
|
31
30
|
RD="r"
|
32
31
|
[[ $FA == *.SingleReads.fa* ]] && RD="l"
|
33
32
|
idba_ud --pre_correction -$RD "$FA" -o "$DATASET" --num_threads "$CORES" || true
|
34
|
-
[[ -s $DATASET/contig.fa ]] || exit 1
|
33
|
+
[[ -s "$DATASET/contig.fa" ]] || exit 1
|
35
34
|
|
36
35
|
# Clean
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
(
|
37
|
+
cd "$DATASET"
|
38
|
+
rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa
|
39
|
+
)
|
40
40
|
|
41
41
|
# Extract
|
42
|
-
if [[ -s $DATASET/scaffold.fa ]] ; then
|
43
|
-
|
42
|
+
if [[ -s "$DATASET/scaffold.fa" ]] ; then
|
43
|
+
ln -s "$DATASET/scaffold.fa" "$DATASET.AllContigs.fna"
|
44
44
|
else
|
45
|
-
|
45
|
+
ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
|
46
46
|
fi
|
47
|
-
FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
|
48
|
-
|
49
|
-
|
47
|
+
FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
48
|
+
| FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
|
49
|
+
> "$DATASET.LargeContigs.fna"
|
50
50
|
|
51
51
|
# Finalize
|
52
|
-
date
|
52
|
+
miga date > "$DATASET.done"
|
53
53
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/cds.bash
CHANGED
@@ -1,46 +1,38 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="cds"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/06.cds"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
-
GM=$(dirname -- $(which gmhmmp))
|
12
|
+
miga date > "$DATASET.start"
|
13
13
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
cp "$GM/.gm_key" ".gm_key"
|
24
|
-
elif [[ -e "$HOME/.gm_key" ]] ; then
|
25
|
-
cp "$HOME/.gm_key" .
|
26
|
-
else
|
27
|
-
echo "Impossible to find MetaGeneMark key, please register your copy" \
|
28
|
-
"and place the key in '$GM/gm_key'." >&2
|
29
|
-
exit 1
|
30
|
-
fi
|
31
|
-
fi
|
14
|
+
# Run Prodigal
|
15
|
+
TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
|
16
|
+
--metadata "type" | awk '{print $2}')
|
17
|
+
case "$TYPE" in
|
18
|
+
metagenome|virome) PROCEDURE=meta ;;
|
19
|
+
*) PROCEDURE=single ;;
|
20
|
+
esac
|
21
|
+
prodigal -a "$DATASET.faa" -d "$DATASET.fna" -f gff -o "$DATASET.gff3" \
|
22
|
+
-p $PROCEDURE -q -i "../05.assembly/$DATASET.LargeContigs.fna"
|
32
23
|
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
perl
|
39
|
-
|
24
|
+
# Clean Prodigal noisy deflines
|
25
|
+
for i in faa fna ; do
|
26
|
+
perl -pe 's/>.*ID=([^;]+);.*/>gene_$1/' "$DATASET.$i" > "$DATASET.$i.t"
|
27
|
+
mv "$DATASET.$i.t" "$DATASET.$i"
|
28
|
+
done
|
29
|
+
perl -pe 's/ID=([0-9]+_[0-9]+);/ID=gene_$1;/' "$DATASET.gff3" \
|
30
|
+
> "$DATASET.gff3.t"
|
31
|
+
mv "$DATASET.gff3.t" "$DATASET.gff3"
|
40
32
|
|
41
33
|
# Gzip
|
42
|
-
gzip -9 -f "$DATASET.
|
34
|
+
gzip -9 -f "$DATASET.gff3"
|
43
35
|
|
44
36
|
# Finalize
|
45
|
-
date
|
37
|
+
miga date > "$DATASET.done"
|
46
38
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/clade_finding.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="clade_finding"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/10.clades/01.find"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
# Markov-cluster genomes by ANI
|
14
15
|
gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
|
@@ -30,9 +31,9 @@ cat miga-project.ani95-clades | tail -n +2 | tr "," "\\t" | awk 'NF >= 5' \
|
|
30
31
|
|
31
32
|
# Run R code (except in projects type clade)
|
32
33
|
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
33
|
-
$MIGA/utils/subclades.R \
|
34
|
+
"$MIGA/utils/subclades.R" \
|
34
35
|
../../09.distances/02.aai/miga-project.txt.gz \
|
35
|
-
miga-project $CORES
|
36
|
+
miga-project "$CORES"
|
36
37
|
mv miga-project.nwk miga-project.aai.nwk
|
37
38
|
|
38
39
|
# Compile
|
@@ -42,5 +43,5 @@ if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
|
42
43
|
fi
|
43
44
|
|
44
45
|
# Finalize
|
45
|
-
date
|
46
|
+
miga date > "miga-project.done"
|
46
47
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/distances.bash
CHANGED
@@ -1,32 +1,36 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "$DATASET.start"
|
12
13
|
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
13
|
-
trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
|
14
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
14
15
|
|
15
16
|
# Check type of dataset
|
16
17
|
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
17
|
-
|
18
|
+
| wc -l | awk '{print $1}')
|
18
19
|
REF=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --ref \
|
19
|
-
|
20
|
+
| wc -l | awk '{print $1}')
|
20
21
|
|
21
22
|
# Call submodules
|
23
|
+
# shellcheck source=scripts/_distances_functions.bash
|
22
24
|
source "$MIGA/scripts/_distances_functions.bash"
|
23
25
|
if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
|
24
|
-
|
26
|
+
# shellcheck source=scripts/_distances_ref_nomulti.bash
|
27
|
+
source "$MIGA/scripts/_distances_ref_nomulti.bash"
|
25
28
|
elif [[ "$NOMULTI" -eq "1" ]] ; then
|
26
|
-
|
29
|
+
# shellcheck source=scripts/_distances_noref_nomulti.bash
|
30
|
+
source "$MIGA/scripts/_distances_noref_nomulti.bash"
|
27
31
|
fi
|
28
32
|
|
29
33
|
# Finalize
|
30
|
-
rm -R $TMPDIR
|
31
|
-
date
|
34
|
+
rm -R "$TMPDIR"
|
35
|
+
miga date > "$DATASET.done"
|
32
36
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
@@ -1,21 +1,22 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="essential_genes"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/07.annotation/01.function/01.essential"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "$DATASET.start"
|
12
13
|
FAA="../../../06.cds/$DATASET.faa"
|
13
14
|
|
14
15
|
# Check if there are any proteins
|
15
16
|
if [[ ! -s $FAA ]] ; then
|
16
17
|
echo Empty protein set, bypassing essential genes
|
17
18
|
rm "$DATASET.start"
|
18
|
-
miga create_dataset -P "$PROJECT" -D $DATASET \
|
19
|
+
miga create_dataset -P "$PROJECT" -D "$DATASET" \
|
19
20
|
-m run_essential_genes=false --update
|
20
21
|
exit 0
|
21
22
|
fi
|
@@ -24,17 +25,17 @@ fi
|
|
24
25
|
[[ -d "$DATASET.ess" ]] && rm -R "$DATASET.ess"
|
25
26
|
mkdir "$DATASET.ess"
|
26
27
|
TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
|
27
|
-
|
28
|
+
--metadata "type" | awk '{print $2}')
|
28
29
|
if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
|
31
|
+
-m "$DATASET.ess/" -t "$CORES" -r "$DATASET" --metagenome \
|
32
|
+
> "$DATASET.ess/log"
|
32
33
|
else
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
|
35
|
+
-m "$DATASET.ess/" -t "$CORES" -r "$DATASET" \
|
36
|
+
> "$DATASET.ess/log"
|
36
37
|
fi
|
37
38
|
|
38
39
|
# Finalize
|
39
|
-
date
|
40
|
+
miga date > "$DATASET.done"
|
40
41
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/haai_distances.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="haai_distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances/01.haai"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -39,5 +40,5 @@ if(sum(haai[,'a'] != haai[,'b']) > 0){
|
|
39
40
|
gzip -9 -f miga-project.txt
|
40
41
|
|
41
42
|
# Finalize
|
42
|
-
date
|
43
|
+
miga date > "miga-project.done"
|
43
44
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/init.bash
CHANGED
@@ -3,39 +3,40 @@ set -e
|
|
3
3
|
|
4
4
|
#=======[ Functions ]
|
5
5
|
function ask_user {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
6
|
+
local question=$1
|
7
|
+
local default=$2
|
8
|
+
echo "$question" >&2
|
9
|
+
echo -n " [$default] > " >&2
|
10
|
+
read -r user_answer
|
11
|
+
user_answer=${user_answer:-$default}
|
12
|
+
echo -n "$user_answer"
|
13
13
|
}
|
14
14
|
|
15
15
|
function check_req {
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
16
|
+
local bin=$1
|
17
|
+
local default
|
18
|
+
default=$(dirname "$(which "$bin")")
|
19
|
+
user_answer=$(ask_user "Where can I find $2 ($3)? $4" "$default")
|
20
|
+
if [[ -x "$user_answer/$bin" ]] ; then
|
21
|
+
export PATH="$PATH:$user_answer"
|
22
|
+
echo "MIGA_PATH=\"$user_answer:\$MIGA_PATH\" # $2" >> "$HOME/.miga_rc"
|
23
|
+
else
|
24
|
+
echo "Cannot find $2 at '$user_answer/$bin'. Aborting..." >&2
|
25
|
+
exit 1
|
26
|
+
fi
|
26
27
|
}
|
27
28
|
|
28
29
|
function check_rlib {
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
local rlib=$1
|
31
|
+
gotit=$(echo "if(require($rlib)) cat('GOT','IT')" | R --vanilla -q 2>&1 \
|
32
|
+
| grep -c "GOT IT")
|
33
|
+
[[ "$gotit" == "1" ]]
|
33
34
|
}
|
34
35
|
|
35
36
|
function check_gem {
|
36
|
-
|
37
|
-
|
38
|
-
|
37
|
+
local gem=$1
|
38
|
+
gotit=$(echo "require '$gem'" | ruby 2>/dev/null && echo 1)
|
39
|
+
[[ "$gotit" == "1" ]]
|
39
40
|
}
|
40
41
|
|
41
42
|
#=======[ Main ]
|
@@ -49,31 +50,32 @@ make sure you have all the requirements for MiGA Daemons.
|
|
49
50
|
" >&2
|
50
51
|
|
51
52
|
if [[ "$(ask_user "Would you like to see all the requirements before starting? (yes / no)" "no")" == "yes" ]] ; then
|
52
|
-
|
53
|
-
|
54
|
-
|
53
|
+
echo "" >&2
|
54
|
+
cat "$MIGA/utils/requirements.txt" >&2
|
55
|
+
echo "" >&2
|
55
56
|
fi
|
56
57
|
|
57
58
|
if [[ -e "$HOME/.miga_rc" ]] ; then
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
59
|
+
case "$(ask_user "I found a previous configuration. Do you want to load the defaults within? (yes / no / cancel)" "yes")" in
|
60
|
+
yes)
|
61
|
+
# shellcheck source=/dev/null
|
62
|
+
source "$HOME/.miga_rc"
|
63
|
+
if [[ "$MIGA_CONFIG_DATE" == "" ]] ; then
|
64
|
+
echo " Loaded incomplete configuration" >&2
|
65
|
+
else
|
66
|
+
echo " Loaded configuration from $MIGA_CONFIG_DATE" >&2
|
67
|
+
fi
|
68
|
+
;;
|
69
|
+
no)
|
70
|
+
rm "$HOME/.miga_rc"
|
71
|
+
;;
|
72
|
+
cancel)
|
73
|
+
exit 0
|
74
|
+
;;
|
75
|
+
*)
|
76
|
+
echo "Cannot understand your answer, please use 'yes', 'no', or 'cancel'. Aborting..." >&2
|
77
|
+
exit 1
|
78
|
+
esac
|
77
79
|
fi
|
78
80
|
|
79
81
|
echo "#!/bin/bash
|
@@ -83,10 +85,11 @@ echo "#!/bin/bash
|
|
83
85
|
# Check Software requirements
|
84
86
|
MIGA_STARTUP=$(ask_user "Is there a script I need to load at startup? (no / path to the script to load)" "$MIGA_STARTUP")
|
85
87
|
if [[ "$MIGA_STARTUP" != "no" ]] ; then
|
86
|
-
|
88
|
+
echo "MIGA_STARTUP='$MIGA_STARTUP'
|
87
89
|
source \"\$MIGA_STARTUP\"
|
88
90
|
" >> "$HOME/.miga_rc";
|
89
|
-
|
91
|
+
# shellcheck source=/dev/null
|
92
|
+
source "$MIGA_STARTUP";
|
90
93
|
fi
|
91
94
|
echo "
|
92
95
|
Looking for Software requirements:" >&2
|
@@ -94,11 +97,11 @@ reqs=$(tail -n+3 "$MIGA/utils/requirements.txt" | perl -pe 's/\t+/\t/g')
|
|
94
97
|
IFS_BU=$IFS
|
95
98
|
IFS=$'\n'
|
96
99
|
for ln in $reqs ; do
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
100
|
+
rname=$(echo "$ln" | awk -F'\t' '{print $1}')
|
101
|
+
rtest=$(echo "$ln" | awk -F'\t' '{print $2}')
|
102
|
+
rwebs=$(echo "$ln" | awk -F'\t' '{print $3}')
|
103
|
+
rhint=$(echo "$ln" | awk -F'\t' '{print $4}')
|
104
|
+
check_req "$rtest" "$rname" "$rwebs" "$rhint"
|
102
105
|
done
|
103
106
|
IFS=$IFS_BU
|
104
107
|
echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
|
@@ -108,11 +111,11 @@ echo "
|
|
108
111
|
Looking for R packages:" >&2
|
109
112
|
RLIBS="enveomics.R ape phangorn phytools ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
|
110
113
|
for lib in $RLIBS ; do
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
114
|
+
if ! check_rlib "$lib" ; then
|
115
|
+
echo "+ Installing $lib" >&2
|
116
|
+
echo "install.packages('$lib', repos='http://cran.rstudio.com/')" \
|
117
|
+
| R --vanilla -q
|
118
|
+
fi
|
116
119
|
done
|
117
120
|
|
118
121
|
# Check for ruby gems
|
@@ -120,36 +123,25 @@ echo "
|
|
120
123
|
Looking for Ruby gems:" >&2
|
121
124
|
GEMS="rest-client sqlite3 daemons json"
|
122
125
|
for gem in $GEMS ; do
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
126
|
+
if ! check_gem "$gem" ; then
|
127
|
+
echo "+ Installing $gem (user-only)" >&2
|
128
|
+
gem install --user "$gem"
|
129
|
+
fi
|
127
130
|
done
|
128
131
|
|
129
132
|
# Check for other files
|
130
133
|
echo "
|
131
134
|
Looking for additional files:
|
132
|
-
+
|
133
|
-
|
134
|
-
if [[ ! -e "$GM/gm_key" && ! -e "$GM/gm_key_64" && ! -e "$GM/gm_key_32" && ! -e "$GM/.gm_key" && ! -e "$HOME/.gm_key" ]] ; then
|
135
|
-
echo "Cannot find it, please place your license key in '$GM/gm_key'. Aborting..." >&2
|
136
|
-
exit 1
|
137
|
-
fi
|
138
|
-
echo "+ MetaGeneMark scripts" >&2
|
139
|
-
if [[ ! -e "$GM/aa_from_gff.pl" || ! -e "$GM/nt_from_gff.pl" ]] ; then
|
140
|
-
echo "Cannot find it, please place aa_from_gff.pl and nt_from_gff.pl in '$GM/'. Aborting..." >&2
|
141
|
-
exit 1
|
142
|
-
fi
|
143
|
-
echo "+ MyTaxa scores database" >&2
|
144
|
-
MT=$(dirname -- $(which MyTaxa))
|
135
|
+
+ MyTaxa scores database" >&2
|
136
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
145
137
|
if [[ ! -d "$MT/db" ]] ; then
|
146
|
-
|
147
|
-
|
138
|
+
echo "Cannot find it, please execute 'python $MT/utils/download_db.py'. Aborting..." >&2
|
139
|
+
exit 1
|
148
140
|
fi
|
149
141
|
echo "+ MyTaxa DIAMOND database" >&2
|
150
142
|
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
151
|
-
|
152
|
-
|
143
|
+
echo "Cannot find it, please download 'http://enve-omics.ce.gatech.edu/data/public_mytaxa/AllGenomes.faa.dmnd' into '$MT'. Aborting..." >&2
|
144
|
+
exit 1
|
153
145
|
fi
|
154
146
|
|
155
147
|
# Configure daemon
|
@@ -157,34 +149,34 @@ echo "
|
|
157
149
|
Default daemon configuration:" >&2
|
158
150
|
dtype=$(ask_user "Please select the type of daemon you want to setup (bash / qsub / msub)", "bash")
|
159
151
|
case "$dtype" in
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
152
|
+
bash)
|
153
|
+
dlatency=$(ask_user "For how long should I sleep? (# in seconds)" "30")
|
154
|
+
dmaxjobs=$(ask_user "How many jobs can I launch at once?" "6")
|
155
|
+
dppn=$(ask_user "How many CPUs can I use per job?" "2")
|
156
|
+
echo "Setting up internal daemon defaults, if you don't understand this just leave default values:" >&2
|
157
|
+
dcmd=$(ask_user "How should I launch tasks? Use %1\$s for script path, %2\$s for variables, %3\$d for CPUs, %4\$s for log file, and %5\$s for task name." "%2\$s '%1\$s' > '%4\$s' 2>&1")
|
158
|
+
dvar=$(ask_user "How should I pass variables? Use %1\$s for keys and %2\$s for values." "%1\$s=%2\$s")
|
159
|
+
dsep=$(ask_user "What should I use to separate variables?" " ")
|
160
|
+
dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for PID, output should be 1 for running and 0 for non-running." "ps -p '%1\$s'|tail -n+2|wc -l|awk '{print \$1}'")
|
161
|
+
;;
|
162
|
+
[qm]sub)
|
163
|
+
dqueue=$(ask_user "What's the name of the queue I should use?" "")
|
164
|
+
dlatency=$(ask_user "How long should I sleep? (# in seconds)" "150")
|
165
|
+
dmaxjobs=$(ask_user "How many jobs can I launch at once?" "300")
|
166
|
+
dppn=$(ask_user "How many CPUs can I use per job?" "4")
|
167
|
+
echo "Setting up internal daemon defaults, if you don't understand this just leave default values:" >&2
|
168
|
+
dcmd=$(ask_user "How should I launch tasks? Use %1\$s for script path, %2\$s for variables, and %3\$d for CPUs, %4\$d for log file, and %5\$s for task name." \
|
169
|
+
"$dtype -q '$dqueue' -v '%2\$s' -l nodes=1:ppn=%3\$d %1\$s -j oe -o '%4\$s' -N '%5\$s' | grep .")
|
170
|
+
dvar=$(ask_user "How should I pass variables? Use %1\$s for keys and %2\$s for values." "%1\$s=%2\$s")
|
171
|
+
dsep=$(ask_user "What should I use to separate variables?" ",")
|
172
|
+
if [[ "$dtype" == "qsub" ]] ; then
|
173
|
+
dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for job id, output should be 1 for running and 0 for non-running." \
|
174
|
+
"qstat -f '%1\$s'|grep ' job_state ='|perl -pe 's/.*= //'|grep '[^C]'|tail -n1|wc -l|awk '{print \$1}'")
|
175
|
+
else
|
176
|
+
dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for job id, output should be 1 for running and 0 for non-running." \
|
177
|
+
"checkjob '%1\$s'|grep '^State:'|perl -pe 's/.*: //'|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'|tail -n1|wc -l|awk '{print \$1}'")
|
178
|
+
fi
|
179
|
+
;;
|
188
180
|
*)
|
189
181
|
esac
|
190
182
|
echo "{
|
@@ -198,7 +190,7 @@ echo "{
|
|
198
190
|
\"latency\": $dlatency,
|
199
191
|
\"maxjobs\": $dmaxjobs,
|
200
192
|
\"ppn\" : $dppn
|
201
|
-
}" > $HOME/.miga_daemon.json
|
193
|
+
}" > "$HOME/.miga_daemon.json"
|
202
194
|
|
203
195
|
# Confirm configuration
|
204
196
|
echo "
|
data/scripts/miga.bash
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
set -e
|
3
3
|
#MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
|
4
|
+
# shellcheck source=/dev/null
|
4
5
|
source "$HOME/.miga_rc"
|
5
6
|
export PATH="$MIGA/bin:$PATH"
|
6
|
-
SCRIPT=${SCRIPT:-$(basename $0 .bash)}
|
7
|
+
SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
|
7
8
|
|
8
9
|
function exists { [[ -e "$1" ]] ; }
|
9
|
-
function fx_exists { [[ $(type -t $1) == "function" ]] ; }
|
10
|
+
function fx_exists { [[ $(type -t "$1") == "function" ]] ; }
|
10
11
|
|
11
12
|
for i in $(miga plugins -P "$PROJECT") ; do
|
13
|
+
# shellcheck source=/dev/null
|
12
14
|
source "$i/scripts-plugin.bash"
|
13
15
|
done
|
14
16
|
|