miga-base 0.2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +351 -0
  3. data/actions/add_result +61 -0
  4. data/actions/add_taxonomy +86 -0
  5. data/actions/create_dataset +62 -0
  6. data/actions/create_project +70 -0
  7. data/actions/daemon +69 -0
  8. data/actions/download_dataset +77 -0
  9. data/actions/find_datasets +63 -0
  10. data/actions/import_datasets +86 -0
  11. data/actions/index_taxonomy +71 -0
  12. data/actions/list_datasets +83 -0
  13. data/actions/list_files +67 -0
  14. data/actions/unlink_dataset +52 -0
  15. data/bin/miga +48 -0
  16. data/lib/miga/daemon.rb +178 -0
  17. data/lib/miga/dataset.rb +286 -0
  18. data/lib/miga/gui.rb +289 -0
  19. data/lib/miga/metadata.rb +74 -0
  20. data/lib/miga/project.rb +268 -0
  21. data/lib/miga/remote_dataset.rb +154 -0
  22. data/lib/miga/result.rb +102 -0
  23. data/lib/miga/tax_index.rb +70 -0
  24. data/lib/miga/taxonomy.rb +107 -0
  25. data/lib/miga.rb +83 -0
  26. data/scripts/_distances_noref_nomulti.bash +86 -0
  27. data/scripts/_distances_ref_nomulti.bash +105 -0
  28. data/scripts/aai_distances.bash +40 -0
  29. data/scripts/ani_distances.bash +39 -0
  30. data/scripts/assembly.bash +38 -0
  31. data/scripts/cds.bash +45 -0
  32. data/scripts/clade_finding.bash +27 -0
  33. data/scripts/distances.bash +30 -0
  34. data/scripts/essential_genes.bash +29 -0
  35. data/scripts/haai_distances.bash +39 -0
  36. data/scripts/init.bash +211 -0
  37. data/scripts/miga.bash +12 -0
  38. data/scripts/mytaxa.bash +93 -0
  39. data/scripts/mytaxa_scan.bash +85 -0
  40. data/scripts/ogs.bash +36 -0
  41. data/scripts/read_quality.bash +37 -0
  42. data/scripts/ssu.bash +35 -0
  43. data/scripts/subclades.bash +26 -0
  44. data/scripts/trimmed_fasta.bash +47 -0
  45. data/scripts/trimmed_reads.bash +57 -0
  46. data/utils/adapters.fa +302 -0
  47. data/utils/mytaxa_scan.R +89 -0
  48. data/utils/mytaxa_scan.rb +58 -0
  49. data/utils/requirements.txt +19 -0
  50. data/utils/subclades-compile.rb +48 -0
  51. data/utils/subclades.R +171 -0
  52. metadata +185 -0
@@ -0,0 +1,105 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
+ # $NOMULTI, $REF
4
+
5
+ function checkpoint_n {
6
+ if [[ $N -eq 10 ]] ; then
7
+ for t in 01.haai 02.aai 03.ani ; do
8
+ if [[ -s $TMPDIR/$t.db ]] ; then
9
+ tab="aai"
10
+ [[ "$t" == "03.ani" ]] && tab="ani"
11
+ echo "select count(*) from $tab;" \
12
+ | sqlite3 $TMPDIR/$t.db\
13
+ || exit 1
14
+ cp $TMPDIR/$t.db $t/$DATASET.db
15
+ fi
16
+ done
17
+ N=0
18
+ fi
19
+ let N=$N+1
20
+ }
21
+
22
+ ESS="../07.annotation/01.function/01.essential"
23
+
24
+ # Initialize temporals
25
+ for t in 01.haai 02.aai 03.ani ; do
26
+ [[ -s $t/$DATASET.db ]] && cp $t/$DATASET.db $TMPDIR/$t.db
27
+ done
28
+ echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
29
+ "aai float, sd float, n int, omega int);" | sqlite3 $TMPDIR/02.aai.db
30
+ N=1
31
+
32
+ # Traverse "nearly-half" of the ref-datasets using first-come-first-served
33
+ for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
34
+ echo "=[ $i ]"
35
+ date "+%Y-%m-%d %H:%M:%S %z"
36
+ HAAI=""; AAI=""; ANI="";
37
+ # Check if the i-th dataset is ready
38
+ [[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
39
+ # Check if this is done (e.g., in a previous failed iteration)
40
+ AAI=$(echo "select aai from aai where seq1='$DATASET' and seq2='$i';" \
41
+ | sqlite3 $TMPDIR/02.aai.db || echo "")
42
+ # Try the other direction
43
+ if [[ "$AAI" == "" && -s 02.aai/$i.db ]] ; then
44
+ cp "02.aai/$i.db" "$TMPDIR/$i.db"
45
+ AAI=$(echo "select aai from aai where seq2='$DATASET' and seq1='$i';" \
46
+ | sqlite3 "$TMPDIR/$i.db" || echo "")
47
+ rm "$TMPDIR/$i.db"
48
+ fi
49
+ # Try with hAAI
50
+ if [[ "$AAI" == "" ]] ; then
51
+ [[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
52
+ || cp $ESS/$DATASET.ess.faa $TMPDIR/$DATASET.ess.faa
53
+ HAAI=$(aai.rb -1 $TMPDIR/$DATASET.ess.faa -2 $ESS/$i.ess.faa \
54
+ -t $CORES -a -n 10 -S $TMPDIR/01.haai.db --name1 $DATASET \
55
+ --name2 $i --lookup-first --no-save-rbm || echo "")
56
+ if [[ "$HAAI" != "" \
57
+ && $(perl -MPOSIX -e "print floor $HAAI") -lt 90 ]] ; then
58
+ AAI=$(perl -e \
59
+ "printf '%f', 100-exp(2.435076 + 0.4275193*log(100-$HAAI))")
60
+ echo "insert into aai values('$DATASET','$i','$AAI',0,0,0);" \
61
+ | sqlite3 $TMPDIR/02.aai.db
62
+ fi
63
+ fi
64
+ # Try with complete AAI
65
+ if [[ "$AAI" == "" ]] ; then
66
+ [[ -e "$TMPDIR/$DATASET.faa" ]] \
67
+ || cp ../06.cds/$DATASET.faa $TMPDIR/$DATASET.faa
68
+ AAI=$(aai.rb -1 $TMPDIR/$DATASET.faa -2 ../06.cds/$i.faa -t $CORES -a \
69
+ -S $TMPDIR/02.aai.db --name1 $DATASET --name2 $i --lookup-first \
70
+ || echo "")
71
+ fi
72
+ date "+%Y-%m-%d %H:%M:%S %z"
73
+ # Check if ANI is meaningful
74
+ if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
75
+ && -e "../05.assembly/$i.LargeContigs.fna" \
76
+ && $(perl -MPOSIX -e "print ceil $AAI") -gt 90 ]] ; then
77
+ # Check if this is done (e.g., in a previous failed iteration)
78
+ ANI=$(echo "select ani from ani where seq1='$DATASET' and seq2='$i';" \
79
+ | sqlite3 $TMPDIR/03.ani.db || echo "")
80
+ # Try the other direction
81
+ if [[ "$ANI" == "" && -s 03.ani/$i.db ]] ; then
82
+ cp "03.ani/$i.db" "$TMPDIR/$i.db"
83
+ ANI=$(echo "select ani from ani" \
84
+ "where seq2='$DATASET' and seq1='$i';" \
85
+ | sqlite3 "$TMPDIR/$i.db" || echo "")
86
+ rm "$TMPDIR/$i.db"
87
+ fi
88
+ # Calculate it
89
+ if [[ "$ANI" == "" ]] ; then
90
+ [[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
91
+ || cp ../05.assembly/$DATASET.LargeContigs.fna \
92
+ $TMPDIR/$DATASET.LargeContigs.fna
93
+ ANI=$(ani.rb -1 $TMPDIR/$DATASET.LargeContigs.fna \
94
+ -2 ../05.assembly/$i.LargeContigs.fna -t $CORES \
95
+ -S $TMPDIR/03.ani.db -a --name1 $DATASET --name2 $i \
96
+ --no-save-regions --no-save-rbm --lookup-first \
97
+ || echo "")
98
+ fi
99
+ fi
100
+ echo "$AAI;$ANI"
101
+ checkpoint_n
102
+ done
103
+ N=10
104
+ checkpoint_n
105
+
@@ -0,0 +1,40 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/09.distances/02.aai"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ echo -n "" > miga-project.log
12
+ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
13
+
14
+ # Extract values
15
+ echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
16
+ for i in $DS ; do
17
+ echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
18
+ " seq1, seq2, aai, sd, n, omega from aai;" \
19
+ | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
20
+ echo "$i" >> miga-project.log
21
+ done
22
+
23
+ # R-ify
24
+ echo "
25
+ aai <- read.table('miga-project.txt', sep='\\t', h=T);
26
+ save(aai, file='miga-project.Rdata');
27
+ h <- hist(aai[,'value'], breaks=100, plot=FALSE);
28
+ write.table(
29
+ cbind(h[['breaks']][-length(h[['breaks']])],h[['breaks']][-1],h[['counts']]),
30
+ file='miga-project.hist', quote=FALSE, sep='\\t',
31
+ col.names=FALSE, row.names=FALSE);
32
+ " | R --vanilla
33
+
34
+ # Gzip
35
+ gzip -9 -f miga-project.txt
36
+
37
+ # Finalize
38
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
39
+ miga add_result -P "$PROJECT" -r aai_distances
40
+
@@ -0,0 +1,39 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/09.distances/03.ani"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ echo -n "" > miga-project.log
12
+ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
13
+
14
+ # Extract values
15
+ echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
16
+ for i in $DS ; do
17
+ echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
18
+ | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
19
+ echo "$i" >> miga-project.log
20
+ done
21
+
22
+ # R-ify
23
+ echo "
24
+ ani <- read.table('miga-project.txt', sep='\\t', h=T);
25
+ save(ani, file='miga-project.Rdata');
26
+ h <- hist(ani[,'value'], breaks=100, plot=FALSE);
27
+ write.table(
28
+ cbind(h[['breaks']][-length(h[['breaks']])],h[['breaks']][-1],h[['counts']]),
29
+ file='miga-project.hist', quote=FALSE, sep='\\t',
30
+ col.names=FALSE, row.names=FALSE);
31
+ " | R --vanilla
32
+
33
+ # Gzip
34
+ gzip -9 -f miga-project.txt
35
+
36
+ # Finalize
37
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
38
+ miga add_result -P "$PROJECT" -r ani_distances
39
+
@@ -0,0 +1,38 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/05.assembly"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # Assemble
14
+ FA="../04.trimmed_fasta/$DATASET.CoupledReads.fa"
15
+ [[ -e $FA ]] || FA="$FA.gz"
16
+ [[ -e $FA ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
17
+ [[ -e $FA ]] || FA="$FA.gz"
18
+ idba_ud --pre_correction -r "$FA" -o "$DATASET" --num_threads "$CORES"
19
+
20
+ # Clean
21
+ cd $DATASET
22
+ rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa
23
+ cd ..
24
+
25
+ # Extract
26
+ if [[ -s $DATASET/scaffold.fa ]] ; then
27
+ ln -s $DATASET/scaffold.fa $DATASET.AllContigs.fna
28
+ else
29
+ ln -s $DATASET/contig.fa $DATASET.AllContigs.fna
30
+ fi
31
+ FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
32
+ | FastA.filter.pl /dev/stdin $DATASET.AllContigs.fna \
33
+ > $DATASET.LargeContigs.fna
34
+
35
+ # Finalize
36
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
37
+ miga add_result -P "$PROJECT" -D "$DATASET" -r assembly
38
+
data/scripts/cds.bash ADDED
@@ -0,0 +1,45 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/06.cds"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
10
+ GM=$(dirname -- $(which gmhmmp))
11
+
12
+ # Register key
13
+ if [[ ! -e .gm_key ]] ; then
14
+ if [[ -e "$GM/gm_key" ]] ; then
15
+ cp "$GM/gm_key" ".gm_key"
16
+ elif [[ -e "$GM/gm_key_64" ]] ; then
17
+ cp "$GM/gm_key_64" ".gm_key"
18
+ elif [[ -e "$GM/gm_key_32" ]] ; then
19
+ cp "$GM/gm_key_32" ".gm_key"
20
+ elif [[ -e "$GM/.gm_key" ]] ; then
21
+ cp "$GM/.gm_key" ".gm_key"
22
+ elif [[ -e "$HOME/.gm_key" ]] ; then
23
+ cp "$HOME/.gm_key" .
24
+ else
25
+ echo "Impossible to find MetaGeneMark key, please register your copy" \
26
+ "and place the key in '$GM/gm_key'." >&2
27
+ exit 1
28
+ fi
29
+ fi
30
+
31
+ # Run MetaGeneMark
32
+ gmhmmp -a -d -m "$GM/MetaGeneMark_v1.mod" -f G -o "$DATASET.gff2" \
33
+ "../05.assembly/$DATASET.LargeContigs.fna"
34
+
35
+ # Extract
36
+ perl "$GM/aa_from_gff.pl" < "$DATASET.gff2" > "$DATASET.faa"
37
+ perl "$GM/nt_from_gff.pl" < "$DATASET.gff2" > "$DATASET.fna"
38
+
39
+ # Gzip
40
+ gzip -9 -f "$DATASET.gff2"
41
+
42
+ # Finalize
43
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
44
+ miga add_result -P "$PROJECT" -D "$DATASET" -r cds
45
+
@@ -0,0 +1,27 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/10.clades/01.find"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ # Markov-cluster genomes by ANI
12
+ gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
13
+ | awk -F"\\t" '{print $2"'"\\t"'"$3"'"\\t"'"$4}' > genome-genome.aai90.rbm
14
+ ogs.mcl.rb -d . -o miga-project.ani-clades -t "$CORES" -i \
15
+ -f "(\\S+)-(\\S+)\\.aai90\\.rbm"
16
+ cat genome-genome.aai90.rbm | awk -F"\\t" '$3>=95' > genome-genome.ani95.rbm
17
+ ogs.mcl.rb -d . -o miga-project.ani95-clades -t "$CORES" -b \
18
+ -f "(\\S+)-(\\S+)\\.ani95\\.rbm"
19
+
20
+ # Propose clade projects
21
+ cat miga-project.ani95-clades | tail -n +2 | tr "," "\\t" | awk 'NF >= 5' \
22
+ > miga-project.proposed-clades
23
+
24
+ # Finalize
25
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
26
+ miga add_result -P "$PROJECT" -r clade_finding
27
+
@@ -0,0 +1,30 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/09.distances"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
10
+ TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
11
+ trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
12
+
13
+ # Check type of dataset
14
+ NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
15
+ | wc -l | awk '{print $1}')
16
+ REF=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --ref \
17
+ | wc -l | awk '{print $1}')
18
+
19
+ # Call submodule
20
+ if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
21
+ source "$MIGA/scripts/_distances_ref_nomulti.bash"
22
+ elif [[ "$NOMULTI" -eq "1" ]] ; then
23
+ source "$MIGA/scripts/_distances_noref_nomulti.bash"
24
+ fi
25
+
26
+ # Finalize
27
+ rm -R $TMPDIR
28
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
29
+ miga add_result -P "$PROJECT" -D "$DATASET" -r distances
30
+
@@ -0,0 +1,29 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/07.annotation/01.function/01.essential"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
10
+
11
+ # Find and extract essential genes
12
+ [[ -d "$DATASET.ess" ]] && rm -R "$DATASET.ess"
13
+ mkdir "$DATASET.ess"
14
+ TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
15
+ --metadata "type" | awk '{print $2}')
16
+ if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
17
+ HMM.essential.rb -i "../../../06.cds/$DATASET.faa" -o "$DATASET.ess.faa" \
18
+ -m "$DATASET.ess/" -t "$CORES" -r "$DATASET" --metagenome \
19
+ > "$DATASET.ess/log"
20
+ else
21
+ HMM.essential.rb -i "../../../06.cds/$DATASET.faa" -o "$DATASET.ess.faa" \
22
+ -m "$DATASET.ess/" -t "$CORES" -r "$DATASET" \
23
+ > "$DATASET.ess/log"
24
+ fi
25
+
26
+ # Finalize
27
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
28
+ miga add_result -P "$PROJECT" -D "$DATASET" -r essential
29
+
@@ -0,0 +1,39 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/09.distances/01.haai"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ echo -n "" > miga-project.log
12
+ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
13
+
14
+ # Extract values
15
+ echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
16
+ for i in $DS ; do
17
+ echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
18
+ | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
19
+ echo "$i" >> miga-project.log
20
+ done
21
+
22
+ # R-ify
23
+ echo "
24
+ haai <- read.table('miga-project.txt', sep='\\t', h=T);
25
+ save(haai, file='miga-project.Rdata');
26
+ h <- hist(haai[,'value'], breaks=100, plot=FALSE);
27
+ write.table(
28
+ cbind(h[['breaks']][-length(h[['breaks']])],h[['breaks']][-1],h[['counts']]),
29
+ file='miga-project.hist', quote=FALSE, sep='\\t',
30
+ col.names=FALSE, row.names=FALSE);
31
+ " | R --vanilla
32
+
33
+ # Gzip
34
+ gzip -9 -f miga-project.txt
35
+
36
+ # Finalize
37
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
38
+ miga add_result -P "$PROJECT" -r haai_distances
39
+
data/scripts/init.bash ADDED
@@ -0,0 +1,211 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ #=======[ Functions ]
5
+ function ask_user {
6
+ local question=$1
7
+ local default=$2
8
+ echo $question >&2
9
+ echo -n " [$default] > " >&2
10
+ read user_answer
11
+ user_answer=${user_answer:-$default}
12
+ echo -n "$user_answer"
13
+ }
14
+
15
+ function check_req {
16
+ local bin=$1
17
+ local default=$(dirname "$(which "$bin")")
18
+ user_answer=$(ask_user "Where can I find $2 ($3)? $4" "$default")
19
+ if [[ -x "$user_answer/$bin" ]] ; then
20
+ export PATH="$PATH:$user_answer"
21
+ echo "MIGA_PATH=\"$user_answer:\$MIGA_PATH\" # $2" >> "$HOME/.miga_rc"
22
+ else
23
+ echo "Cannot find $2 at '$user_answer/$bin'. Aborting..." >&2
24
+ exit 1
25
+ fi
26
+ }
27
+
28
+ function check_rlib {
29
+ local rlib=$1
30
+ gotit=$(echo "if(require($rlib)) cat('GOT','IT')" | R --vanilla -q 2>&1 \
31
+ | grep "GOT IT" | wc -l | awk '{print $1}')
32
+ [[ "$gotit" == "1" ]]
33
+ }
34
+
35
+ function check_gem {
36
+ local gem=$1
37
+ gotit=$(echo "require '$gem'" | ruby 2>/dev/null && echo 1)
38
+ [[ "$gotit" == "1" ]]
39
+ }
40
+
41
+ #=======[ Main ]
42
+ MIGA_STARTUP="no"
43
+ MIGA=$(cd "$(dirname "$0")/.."; pwd)
44
+ echo "
45
+ ===[ Welcome to MiGA, the Microbial Genome Atlas ]===
46
+
47
+ I'm the initialization script, and I'll sniff around your computer to
48
+ make sure you have all the requirements for MiGA Daemons.
49
+ " >&2
50
+
51
+ if [[ "$(ask_user "Would you like to see all the requirements before starting? (yes / no)" "no")" == "yes" ]] ; then
52
+ echo "" >&2
53
+ cat "$MIGA/utils/requirements.txt" >&2
54
+ echo "" >&2
55
+ fi
56
+
57
+ if [[ -e "$HOME/.miga_rc" ]] ; then
58
+ case "$(ask_user "I found a previous configuration. Do you want to load the defaults within? (yes / no / cancel)" "yes")" in
59
+ yes)
60
+ source "$HOME/.miga_rc"
61
+ if [[ "$MIGA_CONFIG_DATE" == "" ]] ; then
62
+ echo " Loaded incomplete configuration" >&2
63
+ else
64
+ echo " Loaded configuration from $MIGA_CONFIG_DATE" >&2
65
+ fi
66
+ ;;
67
+ no)
68
+ rm "$HOME/.miga_rc"
69
+ ;;
70
+ cancel)
71
+ exit 0
72
+ ;;
73
+ *)
74
+ echo "Cannot understand your answer, please use 'yes', 'no', or 'cancel'. Aborting..." >&2
75
+ exit 1
76
+ esac
77
+ fi
78
+
79
+ echo "#!/bin/bash
80
+ # MiGA made this on $(date)
81
+ " > "$HOME/.miga_rc"
82
+
83
+ # Check Software requirements
84
+ MIGA_STARTUP=$(ask_user "Is there a script I need to load at startup? (no / path to the script to load)" "$MIGA_STARTUP")
85
+ if [[ "$MIGA_STARTUP" != "no" ]] ; then
86
+ echo "MIGA_STARTUP='$MIGA_STARTUP'
87
+ source \"\$MIGA_STARTUP\"
88
+ " >> "$HOME/.miga_rc";
89
+ source "$MIGA_STARTUP";
90
+ fi
91
+ echo "
92
+ Looking for Software requirements:" >&2
93
+ reqs=$(tail -n+3 "$MIGA/utils/requirements.txt" | perl -pe 's/\t+/\t/g')
94
+ IFS_BU=$IFS
95
+ IFS=$'\n'
96
+ for ln in $reqs ; do
97
+ rname=$(echo "$ln" | awk -F'\t' '{print $1}')
98
+ rtest=$(echo "$ln" | awk -F'\t' '{print $2}')
99
+ rwebs=$(echo "$ln" | awk -F'\t' '{print $3}')
100
+ rhint=$(echo "$ln" | awk -F'\t' '{print $4}')
101
+ check_req "$rtest" "$rname" "$rwebs" "$rhint"
102
+ done
103
+ IFS=$IFS_BU
104
+ echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
105
+
106
+ # Check for R packages
107
+ echo "
108
+ Looking for R packages:" >&2
109
+ if ! check_rlib enveomics.R ; then
110
+ echo "+ Installing enveomics.R" >&2
111
+ R CMD INSTALL $(dirname "$(which "FastQ.tag.rb")")/../enveomics.R
112
+ fi
113
+ RLIBS="ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
114
+ for lib in $RLIBS ; do
115
+ if ! check_rlib $lib ; then
116
+ echo "+ Installing $lib" >&2
117
+ echo "install.packages('$lib', repos='http://cran.rstudio.com/')" \
118
+ | R --vanilla -q
119
+ fi
120
+ done
121
+
122
+ # Check for ruby gems
123
+ echo "
124
+ Looking for Ruby gems:" >&2
125
+ GEMS="rest_client sqlite3 daemons json"
126
+ for gem in $GEMS ; do
127
+ if ! check_gem $gem ; then
128
+ echo "+ Installing $gem (user-only)" >&2
129
+ gem install --user $gem
130
+ fi
131
+ done
132
+
133
+ # Check for other files
134
+ echo "
135
+ Looking for additional files:
136
+ + MetaGeneMark license key" >&2
137
+ GM=$(dirname -- $(which gmhmmp))
138
+ if [[ ! -e "$GM/gm_key" && ! -e "$GM/gm_key_64" && ! -e "$GM/gm_key_32" && ! -e "$GM/.gm_key" && ! -e "$HOME/.gm_key" ]] ; then
139
+ echo "Cannot find it, please place your license key in '$GM/gm_key'. Aborting..." >&2
140
+ exit 1
141
+ fi
142
+ echo "+ MetaGeneMark scripts" >&2
143
+ if [[ ! -e "$GM/aa_from_gff.pl" || ! -e "$GM/nt_from_gff.pl" ]] ; then
144
+ echo "Cannot find it, please place aa_from_gff.pl and nt_from_gff.pl in '$GM/'. Aborting..." >&2
145
+ exit 1
146
+ fi
147
+ echo "+ MyTaxa scores database" >&2
148
+ MT=$(dirname -- $(which MyTaxa))
149
+ if [[ ! -d "$MT/db" ]] ; then
150
+ echo "Cannot find it, please execute 'python $MT/utils/download_db.py'. Aborting..." >&2
151
+ exit 1
152
+ fi
153
+ echo "+ MyTaxa DIAMOND database" >&2
154
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
155
+ echo "Cannot find it, please download 'http://enve-omics.ce.gatech.edu/data/public_mytaxa/AllGenomes.faa.dmnd' into '$MT'. Aborting..." >&2
156
+ exit 1
157
+ fi
158
+
159
+ # Configure daemon
160
+ echo "
161
+ Default daemon configuration:" >&2
162
+ dtype=$(ask_user "Please select the type of daemon you want to setup (bash / qsub / msub)", "bash")
163
+ case "$dtype" in
164
+ bash)
165
+ dlatency=$(ask_user "For how long should I sleep? (# in seconds)" "30")
166
+ dmaxjobs=$(ask_user "How many jobs can I launch at once?" "6")
167
+ dppn=$(ask_user "How many CPUs can I use per job?" "2")
168
+ echo "Setting up internal daemon defaults, if you don't understand this just leave default values:" >&2
169
+ dcmd=$(ask_user "How should I launch tasks? Use %1\$s for script path, %2\$s for variables, %3\$d for CPUs, %4\$s for log file, and %5\$s for task name." "%2\$s '%1\$s' &> '%4\$s'")
170
+ dvar=$(ask_user "How should I pass variables? Use %1\$s for keys and %2\$s for values." "%1\$s=%2\$s")
171
+ dsep=$(ask_user "What should I use to separate variables?" " ")
172
+ dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for PID, output should be 1 for running and 0 for non-running." "ps -p '%1\$s'|tail -n+2|wc -l|awk '{print \$1}'")
173
+ ;;
174
+ [qm]sub)
175
+ dqueue=$(ask_user "What's the name of the queue I should use?" "")
176
+ dlatency=$(ask_user "How long should I sleep? (# in seconds)" "150")
177
+ dmaxjobs=$(ask_user "How many jobs can I launch at once?" "300")
178
+ dppn=$(ask_user "How many CPUs can I use per job?" "4")
179
+ echo "Setting up internal daemon defaults, if you don't understand this just leave default values:" >&2
180
+ dcmd=$(ask_user "How should I launch tasks? Use %1\$s for script path, %2\$s for variables, and %3\$d for CPUs, %4\$d for log file, and %5\$s for task name." \
181
+ "$dtype -q '$dqueue' -v '%2\$s' -l nodes=1:ppn=%3\$d %1\$s -j oe -o '%4\$s' -N '%5\$s' | grep .")
182
+ dvar=$(ask_user "How should I pass variables? Use %1\$s for keys and %2\$s for values." "%1\$s=%2\$s")
183
+ dsep=$(ask_user "What should I use to separate variables?" ",")
184
+ if [[ "$dtype" == "qsub" ]] ; then
185
+ dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for job id, output should be 1 for running and 0 for non-running." \
186
+ "qstat -f '%1\$s'|grep ' job_state ='|perl -pe 's/.*= //'|grep '[^C]'|tail -n1|wc -l|awk '{print \$1}'")
187
+ else
188
+ dalive=$(ask_user "How can I know that a process is still alive? Use %1\$s for job id, output should be 1 for running and 0 for non-running." \
189
+ "checkjob '%1\$s'|grep '^State:'|perl -pe 's/.*: //'|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'|tail -n1|wc -l|awk '{print \$1}'")
190
+ fi
191
+ ;;
192
+ *)
193
+ esac
194
+ echo "{
195
+ \"created\": \"$(date "+%Y-%m-%d %H:%M:%S %z")\",
196
+ \"updated\": \"$(date "+%Y-%m-%d %H:%M:%S %z")\",
197
+ \"type\" : \"$dtype\",
198
+ \"cmd\" : \"$dcmd\",
199
+ \"var\" : \"$dvar\",
200
+ \"varsep\" : \"$dsep\",
201
+ \"alive\" : \"$dalive\",
202
+ \"latency\": $dlatency,
203
+ \"maxjobs\": $dmaxjobs,
204
+ \"ppn\" : $dppn
205
+ }" > $HOME/.miga_daemon.json
206
+
207
+ # Confirm configuration
208
+ echo "
209
+ MIGA_CONFIG_DATE='$(date "+%Y-%m-%d %H:%M:%S %z")'
210
+ " >> "$HOME/.miga_rc"
211
+
data/scripts/miga.bash ADDED
@@ -0,0 +1,12 @@
1
+ #!/bin/bash
2
+ set -e
3
+ #MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
4
+ source "$HOME/.miga_rc"
5
+ export PATH="$MIGA/bin:$PATH"
6
+
7
+ function exists { [[ -e "$1" ]] ; }
8
+
9
+ #if [[ "$RUNTYPE" == "qsub" ]] ; then
10
+ #elif [[ "$RUNTYPE" == "msub" ]] ; then
11
+ #fi
12
+