miga-base 0.2.0.9 → 0.2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
@@ -0,0 +1,82 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
+ # $NOMULTI, $REF
4
+
5
+ set -e
6
+
7
+ if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
8
+ MIGA_AAI_SAVE_RBM="save-rbm"
9
+ if [[ -n $PROJECT ]] ; then
10
+ if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
11
+ MIGA_AAI_SAVE_RBM="no-save-rbm"
12
+ fi
13
+ fi
14
+ fi
15
+
16
+ function make_empty_aai_db {
17
+ local DB=$1
18
+ echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
19
+ " aai float, sd float, n int, omega int);" | sqlite3 $DB
20
+ }
21
+
22
+ function ds_name {
23
+ basename $1 | perl -pe "s/[^A-Za-z0-9_].*//"
24
+ }
25
+
26
+ function aai {
27
+ local F1=$1
28
+ local F2=$2
29
+ local TH=$3
30
+ local DB=$4
31
+ local N1=$(ds_name $F1)
32
+ local N2=$(ds_name $F2)
33
+ aai.rb -1 $F1 -2 $F2 -t $TH -a --lookup-first -S $DB --name1 $N1 --name2 $N2 \
34
+ --$MIGA_AAI_SAVE_RBM || echo "0"
35
+ }
36
+
37
+ function ani {
38
+ local F1=$1
39
+ local F2=$2
40
+ local TH=$3
41
+ local DB=$4
42
+ local N1=$(ds_name $F1)
43
+ local N2=$(ds_name $F2)
44
+ ani.rb -1 $F1 -2 $F2 -t $TH -a --no-save-regions --no-save-rbm \
45
+ --lookup-first -S $DB --name1 $N1 --name2 $N2 || echo "0"
46
+ }
47
+
48
+ function haai {
49
+ local F1=$1
50
+ local F2=$2
51
+ local TH=$3
52
+ local DB=$4
53
+ local AAI_DB=$5
54
+ local N1=$(ds_name $F1)
55
+ local N2=$(ds_name $F2)
56
+ local HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" aai $F1 $F2 $TH $DB)
57
+ if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
58
+ local AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
59
+ [[ ! -s $AAI_DB ]] && make_empty_aai_db $AAI_DB
60
+ echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 $AAI_DB
61
+ echo $AAI
62
+ fi
63
+ }
64
+
65
+ function val_from_db {
66
+ local N1=$1
67
+ local N2=$2
68
+ local DB=$3
69
+ local MT=$4
70
+ if [[ -s $DB ]] ; then
71
+ echo "select $MT from $MT where seq1='$N1' and seq2='$N2';" \
72
+ | sqlite3 $DB || echo 0
73
+ fi
74
+ }
75
+
76
+ function aai_from_db {
77
+ val_from_db $1 $2 $3 aai
78
+ }
79
+
80
+ function ani_from_db {
81
+ val_from_db $1 $2 $3 ani
82
+ }
@@ -1,88 +1,117 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
3
+ # $NOMULTI, $REF
4
4
 
5
5
  set -e
6
6
 
7
7
  # Deal with previous runs (if any)
8
+ exists $DATASET.haai.db && cp $DATASET.haai.db $TMPDIR
8
9
  exists $DATASET.a[an]i.db && cp $DATASET.a[an]i.db $TMPDIR
9
10
  exists $DATASET.a[an]i.9[05] && rm $DATASET.a[an]i.9[05]
10
11
  N=0
11
12
  function checkpoint_n {
12
- let N=$N+1
13
- if [[ $N -ge 10 ]] ; then
14
- for metric in aai ani ; do
15
- if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
16
- echo "select count(*) from $metric;" \
17
- | sqlite3 $TMPDIR/$DATASET.$metric.db \
18
- || exit 1
19
- cp $TMPDIR/$DATASET.$metric.db .
20
- fi
21
- done
22
- N=0
23
- fi
13
+ let N=$N+1
14
+ if [[ $N -ge 10 ]] ; then
15
+ for metric in haai aai ani ; do
16
+ if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
17
+ echo "select count(*) from ${metric#h};" \
18
+ | sqlite3 $TMPDIR/$DATASET.$metric.db \
19
+ >/dev/null || exit 1
20
+ cp $TMPDIR/$DATASET.$metric.db .
21
+ fi
22
+ done
23
+ N=0
24
+ fi
24
25
  }
25
26
 
26
- # Find 95%ANI clade(s) with AAI <= 90% / ANI <= 95%
27
- REFGENOMES=$(cat ../10.clades/01.find/miga-project.ani95-clades \
28
- | tail -n +2 | cut -d , -f 1)
29
- for i in $REFGENOMES ; do
30
- AAI=$(aai.rb -1 ../06.cds/$DATASET.faa \
31
- -2 ../06.cds/$i.faa -t $CORES -a --lookup-first \
32
- -S $TMPDIR/$DATASET.aai.db --name1 $DATASET --name2 $i || echo "")
33
- checkpoint_n
34
- if [[ $(perl -MPOSIX -e "print ceil $AAI") -ge 90 ]] ; then
35
- echo $i >> $DATASET.aai90
36
- [[ -e "../05.assembly/$DATASET.LargeContigs.fna" ]] || continue
37
- [[ -e "../05.assembly/$i.LargeContigs.fna" ]] || continue
38
- ANI=$(ani.rb -1 ../05.assembly/$DATASET.LargeContigs.fna \
39
- -2 ../05.assembly/$i.LargeContigs.fna -t $CORES -a \
40
- --no-save-regions --no-save-rbm --lookup-first \
41
- -S $TMPDIR/$DATASET.ani.db --name1 $DATASET --name2 $i || echo "")
27
+ ESS="../07.annotation/01.function/01.essential"
28
+ if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
29
+ # Classify aai-clade (if project type is not clade)
30
+ CLADES="../10.clades/01.find"
31
+ CLASSIF="."
32
+ [[ -e "$DATASET.aai-medoids.tsv" ]] && rm "$DATASET.aai-medoids.tsv"
33
+ while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
34
+ MAX_AAI=0
35
+ AAI_MED=""
36
+ AAI_CLS=""
37
+ i_n=0
38
+ for i in $(cat "$CLADES/$CLASSIF/miga-project.medoids") ; do
39
+ let i_n=$i_n+1
40
+ AAI=$(haai $ESS/$DATASET.ess.faa $ESS/$i.ess.faa $CORES \
41
+ $TMPDIR/$DATASET.haai.db $TMPDIR/$DATASET.aai.db)
42
+ [[ "${AAI%.*}" -le 0 ]] \
43
+ && AAI=$(aai ../06.cds/$DATASET.faa ../06.cds/$i.faa $CORES \
44
+ $TMPDIR/$DATASET.aai.db)
42
45
  checkpoint_n
43
- if [[ $(perl -MPOSIX -e "print ceil $ANI") -ge 95 ]] ; then
44
- echo $i >> $DATASET.ani95
46
+ if [[ $(perl -e "print 1 if '$AAI' >= '$MAX_AAI'") == "1" ]] ; then
47
+ MAX_AAI=$AAI
48
+ AAI_MED=$i
49
+ AAI_CLS=$i_n
50
+ echo "[$CLASSIF] New max: $AAI_MED ($AAI_CLS): $MAX_AAI"
45
51
  fi
46
- fi
47
- done
52
+ done
53
+ CLASSIF="$CLASSIF/miga-project.sc-$AAI_CLS"
54
+ echo "$AAI_CLS $AAI_MED $MAX_AAI $CLASSIF" \
55
+ >> "$DATASET.aai-medoids.tsv"
56
+ done
48
57
 
49
- # Classify in-clade (if project type is clade)
50
- CLADES="../10.clades/02.ani"
51
- CLASSIF="."
52
- MAX_ANI=0
53
- ANI_MED=""
54
- [[ -e "$DATASET.medoids" ]] && rm "$DATASET.medoids"
55
- while [[ -e "$CLADES/$CLASSIF/miga-project.1.medoids" ]] ; do
56
- for i in $(cat "$CLADES/$CLASSIF/miga-project.1.medoids") ; do
57
- ANI=$(ani.rb -1 ../05.assembly/$DATASET.LargeContigs.fna \
58
- -2 ../05.assembly/$i.LargeContigs.fna -t $CORES -a \
59
- --no-save-regions --no-save-rbm --lookup-first \
60
- -S $TMPDIR/$DATASET.ani.db --name1 $DATASET --name2 $i || echo "")
58
+ # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
59
+ if [[ "$CLASSIF" != "." ]] ; then
60
+ PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
61
+ if [[ -s "$PAR" ]] ; then
62
+ for i in $(cat "$PAR" | awk "\$2==$AAI_CLS{print \$1}") ; do
63
+ AAI=$(aai ../06.cds/$DATASET.faa ../06.cds/$i.faa $CORES \
64
+ $TMPDIR/$DATASET.aai.db)
65
+ if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
66
+ ani ../05.assembly/$DATASET.LargeContigs.fna \
67
+ ../05.assembly/$i.LargeContigs.fna \
68
+ $TMPDIR/$DATASET.ani.db >/dev/null
69
+ fi
70
+ checkpoint_n
71
+ done
72
+ fi
73
+ fi
74
+ else
75
+ # Classify ani-clade (if project type is clade)
76
+ CLADES="../10.clades/02.ani"
77
+ CLASSIF="."
78
+ [[ -e "$DATASET.ani-medoids.tsv" ]] && rm "$DATASET.ani-medoids.tsv"
79
+ while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
80
+ MAX_ANI=0
81
+ ANI_MED=""
82
+ ANI_CLS=""
83
+ i_n=0
84
+ for i in $(cat "$CLADES/$CLASSIF/miga-project.medoids") ; do
85
+ let i_n=$i_n+1
86
+ ANI=$(ani ../05.assembly/$DATASET.LargeContigs.fna \
87
+ ../05.assembly/$i.LargeContigs.fna $CORES $TMPDIR/$DATASET.ani.db)
61
88
  checkpoint_n
62
- if [[ $(perl -e "print 1 if $ANI > $MAX_ANI") == "1" ]] ; then
63
- MAX_ANI=$ANI
64
- ANI_MED=$i
89
+ if [[ $(perl -e "print 1 if '$ANI' >= '$MAX_ANI'") == "1" ]] ; then
90
+ MAX_ANI=$ANI
91
+ ANI_MED=$i
92
+ ANI_CLS=$i_n
93
+ echo "[$CLASSIF] New max: $ANI_MED ($ANI_CLS): $MAX_ANI"
65
94
  fi
66
- done
67
- echo $i >> "$DATASET.medoids"
68
- CLASSIF="$CLASSIF/miga-project.1.subcl-$i"
69
- done
70
- echo $CLASSIF > "$DATASET.class"
95
+ done
96
+ CLASSIF="$CLASSIF/miga-project.sc-$ANI_CLS"
97
+ echo "$ANI_CLS $ANI_MED $MAX_ANI $CLASSIF" \
98
+ >> "$DATASET.ani-medoids.tsv"
99
+ done
71
100
 
72
- # Calculate all the ANIs against the lowest subclade (if classified in-clade)
73
- if [[ "$CLASSIF" != "." ]] ; then
74
- if [[ -s "$CLADES/$CLASSIF/miga-project.all" ]] ; then
75
- for i in $(cat "$CLADES/$CLASSIF/miga-project.all") ; do
76
- ANI=$(ani.rb -1 ../05.assembly/$DATASET.LargeContigs.fna \
77
- -2 ../05.assembly/$i.LargeContigs.fna -t $CORES -a \
78
- --no-save-regions --no-save-rbm --lookup-first \
79
- -S $TMPDIR/$DATASET.ani.db --name1 $DATASET --name2 $i || echo "")
80
- checkpoint_n
101
+ # Calculate all the ANIs against the lowest subclade (if classified in-clade)
102
+ if [[ "$CLASSIF" != "." ]] ; then
103
+ PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
104
+ if [[ -s "$CLADES/$CLASSIF/miga-project.all" ]] ; then
105
+ for i in $(cat "$PAR" | awk "\$2==$ANI_CLS{print \$1}") ; do
106
+ ani ../05.assembly/$DATASET.LargeContigs.fna \
107
+ ../05.assembly/$i.LargeContigs.fna $CORES \
108
+ $TMPDIR/$DATASET.ani.db > /dev/null
109
+ checkpoint_n
81
110
  done
82
- fi
111
+ fi
112
+ fi
83
113
  fi
84
114
 
85
- # Finalize
86
- mv $TMPDIR/$DATASET.aai.db 02.aai/$DATASET.db
87
- mv $TMPDIR/$DATASET.ani.db 03.ani/$DATASET.db
88
-
115
+ #Finalize
116
+ N=11
117
+ checkpoint_n
@@ -5,102 +5,71 @@
5
5
  set -e
6
6
 
7
7
  function checkpoint_n {
8
- if [[ $N -eq 10 ]] ; then
9
- for t in 01.haai 02.aai 03.ani ; do
10
- if [[ -s $TMPDIR/$t.db ]] ; then
11
- tab="aai"
12
- [[ "$t" == "03.ani" ]] && tab="ani"
13
- echo "select count(*) from $tab;" \
14
- | sqlite3 $TMPDIR/$t.db\
15
- || exit 1
16
- cp $TMPDIR/$t.db $t/$DATASET.db
17
- fi
18
- done
19
- N=0
20
- fi
21
- let N=$N+1
8
+ if [[ $N -eq 10 ]] ; then
9
+ for t in 01.haai 02.aai 03.ani ; do
10
+ if [[ -s $TMPDIR/$t.db ]] ; then
11
+ tab="aai"
12
+ [[ "$t" == "03.ani" ]] && tab="ani"
13
+ echo "select count(*) from $tab;" \
14
+ | sqlite3 $TMPDIR/$t.db \
15
+ >/dev/null || exit 1
16
+ cp $TMPDIR/$t.db $t/$DATASET.db
17
+ fi
18
+ done
19
+ N=0
20
+ fi
21
+ let N=$N+1
22
22
  }
23
23
 
24
24
  ESS="../07.annotation/01.function/01.essential"
25
25
 
26
26
  # Initialize temporals
27
27
  for t in 01.haai 02.aai 03.ani ; do
28
- [[ -s $t/$DATASET.db ]] && cp $t/$DATASET.db $TMPDIR/$t.db
28
+ [[ -s $t/$DATASET.db ]] && cp $t/$DATASET.db $TMPDIR/$t.db
29
29
  done
30
- echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
31
- "aai float, sd float, n int, omega int);" | sqlite3 $TMPDIR/02.aai.db
32
30
  N=1
33
31
 
34
32
  # Traverse "nearly-half" of the ref-datasets using first-come-first-served
35
33
  for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
36
- echo "=[ $i ]"
37
- date "+%Y-%m-%d %H:%M:%S %z"
38
- HAAI=""; AAI=""; ANI="";
39
- # Check if the i-th dataset is ready
40
- [[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
41
- # Check if this is done (e.g., in a previous failed iteration)
42
- AAI=$(echo "select aai from aai where seq1='$DATASET' and seq2='$i';" \
43
- | sqlite3 $TMPDIR/02.aai.db || echo "")
44
- # Try the other direction
45
- if [[ "$AAI" == "" && -s 02.aai/$i.db ]] ; then
46
- cp "02.aai/$i.db" "$TMPDIR/$i.db"
47
- AAI=$(echo "select aai from aai where seq2='$DATASET' and seq1='$i';" \
48
- | sqlite3 "$TMPDIR/$i.db" || echo "")
49
- rm "$TMPDIR/$i.db"
50
- fi
51
- # Try with hAAI
52
- if [[ "$AAI" == "" ]] ; then
53
- [[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
54
- || cp $ESS/$DATASET.ess.faa $TMPDIR/$DATASET.ess.faa
55
- HAAI=$(aai.rb -1 $TMPDIR/$DATASET.ess.faa -2 $ESS/$i.ess.faa \
56
- -t $CORES -a -n 10 -S $TMPDIR/01.haai.db --name1 $DATASET \
57
- --name2 $i --lookup-first --no-save-rbm || echo "")
58
- if [[ "$HAAI" != "" \
59
- && $(perl -MPOSIX -e "print floor $HAAI") -lt 90 ]] ; then
60
- AAI=$(perl -e \
61
- "printf '%f', 100-exp(2.435076 + 0.4275193*log(100-$HAAI))")
62
- echo "insert into aai values('$DATASET','$i','$AAI',0,0,0);" \
63
- | sqlite3 $TMPDIR/02.aai.db
64
- fi
65
- fi
66
- # Try with complete AAI
67
- if [[ "$AAI" == "" ]] ; then
68
- [[ -e "$TMPDIR/$DATASET.faa" ]] \
69
- || cp ../06.cds/$DATASET.faa $TMPDIR/$DATASET.faa
70
- AAI=$(aai.rb -1 $TMPDIR/$DATASET.faa -2 ../06.cds/$i.faa -t $CORES -a \
71
- -S $TMPDIR/02.aai.db --name1 $DATASET --name2 $i --lookup-first \
72
- || echo "")
73
- fi
74
- date "+%Y-%m-%d %H:%M:%S %z"
75
- # Check if ANI is meaningful
76
- if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
77
- && -e "../05.assembly/$i.LargeContigs.fna" \
78
- && $(perl -MPOSIX -e "print ceil $AAI") -gt 90 ]] ; then
79
- # Check if this is done (e.g., in a previous failed iteration)
80
- ANI=$(echo "select ani from ani where seq1='$DATASET' and seq2='$i';" \
81
- | sqlite3 $TMPDIR/03.ani.db || echo "")
82
- # Try the other direction
83
- if [[ "$ANI" == "" && -s 03.ani/$i.db ]] ; then
84
- cp "03.ani/$i.db" "$TMPDIR/$i.db"
85
- ANI=$(echo "select ani from ani" \
86
- "where seq2='$DATASET' and seq1='$i';" \
87
- | sqlite3 "$TMPDIR/$i.db" || echo "")
88
- rm "$TMPDIR/$i.db"
89
- fi
90
- # Calculate it
91
- if [[ "$ANI" == "" ]] ; then
92
- [[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
93
- || cp ../05.assembly/$DATASET.LargeContigs.fna \
94
- $TMPDIR/$DATASET.LargeContigs.fna
95
- ANI=$(ani.rb -1 $TMPDIR/$DATASET.LargeContigs.fna \
96
- -2 ../05.assembly/$i.LargeContigs.fna -t $CORES \
97
- -S $TMPDIR/03.ani.db -a --name1 $DATASET --name2 $i \
98
- --no-save-regions --no-save-rbm --lookup-first \
99
- || echo "")
100
- fi
101
- fi
102
- echo "$AAI;$ANI"
103
- checkpoint_n
34
+ echo "[ $(date "+%Y-%m-%d %H:%M:%S %z") ] $i"
35
+ AAI=""; ANI="";
36
+ # Check if the i-th dataset is ready
37
+ [[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
38
+ # Check if this is done (e.g., in a previous failed iteration)
39
+ AAI=$(aai_from_db $DATASET $i $TMPDIR/02.aai.db)
40
+ # Try the other direction
41
+ [[ "${AAI%.*}" -le 0 ]] && AAI=$(aai_from_db $i $DATASET 02.aai/$i.db)
42
+ # Try with hAAI
43
+ if [[ "${AAI%.*}" -le 0 ]] ; then
44
+ [[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
45
+ || cp $ESS/$DATASET.ess.faa $TMPDIR/$DATASET.ess.faa
46
+ AAI=$(haai $TMPDIR/$DATASET.ess.faa $ESS/$i.ess.faa \
47
+ $CORES $TMPDIR/01.haai.db $TMPDIR/02.aai.db)
48
+ fi
49
+ # Try with complete AAI
50
+ if [[ "${AAI%.*}" -le 0 ]] ; then
51
+ [[ -e "$TMPDIR/$DATASET.faa" ]] \
52
+ || cp ../06.cds/$DATASET.faa $TMPDIR/$DATASET.faa
53
+ AAI=$(aai $TMPDIR/$DATASET.faa ../06.cds/$i.faa $CORES $TMPDIR/02.aai.db)
54
+ fi
55
+ # Check if ANI is meaningful
56
+ if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
57
+ && -e "../05.assembly/$i.LargeContigs.fna" \
58
+ && $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
59
+ # Check if this is done (e.g., in a previous failed iteration)
60
+ ANI=$(ani_from_db $DATASET $i $TMPDIR/03.ani.db)
61
+ # Try the other direction
62
+ [[ "${ANI%.*}" -le 0 ]] && ANI=$(ani_from_db $i $DATASET 03.ani/$i.db)
63
+ # Calculate it
64
+ if [[ "${ANI%.*}" -le 0 ]] ; then
65
+ [[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
66
+ || cp ../05.assembly/$DATASET.LargeContigs.fna \
67
+ $TMPDIR/$DATASET.LargeContigs.fna
68
+ ANI=$(ani $TMPDIR/$DATASET.LargeContigs.fna \
69
+ ../05.assembly/$i.LargeContigs.fna $CORES $TMPDIR/03.ani.db)
70
+ fi
71
+ fi
72
+ checkpoint_n
104
73
  done
105
74
  N=10
106
75
  checkpoint_n
@@ -11,12 +11,26 @@ b=$DATASET
11
11
  # Initialize
12
12
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
13
13
 
14
+ # Interpose (if needed)
15
+ TF="../04.trimmed_fasta"
16
+ if [[ -s $TF/$DATASET.1.fasta \
17
+ && -s $TF/$DATASET.2.fasta \
18
+ && ! -s $TF/$DATASET.CoupledReads.fa ]] ; then
19
+ FastA.interpose.pl $TF/$DATASET.CoupledReads.fa $TF/$DATASET.[12].fasta
20
+ gzip -9 -f $TF/$DATASET.1.fasta
21
+ gzip -9 -f $TF/$DATASET.2.fasta
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
23
+ fi
24
+
14
25
  # Assemble
15
- FA="../04.trimmed_fasta/$DATASET.CoupledReads.fa"
26
+ FA="$TF/$DATASET.CoupledReads.fa"
16
27
  [[ -e $FA ]] || FA="$FA.gz"
17
28
  [[ -e $FA ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
18
29
  [[ -e $FA ]] || FA="$FA.gz"
19
- idba_ud --pre_correction -r "$FA" -o "$DATASET" --num_threads "$CORES"
30
+ RD="r"
31
+ [[ $FA == *.SingleReads.fa* ]] && RD="l"
32
+ idba_ud --pre_correction -$RD "$FA" -o "$DATASET" --num_threads "$CORES" || true
33
+ [[ -s $DATASET/contig.fa ]] || exit 1
20
34
 
21
35
  # Clean
22
36
  cd $DATASET
@@ -36,4 +50,3 @@ FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
36
50
  # Finalize
37
51
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
38
52
  miga add_result -P "$PROJECT" -D "$DATASET" -r assembly
39
-