miga-base 0.2.0.9 → 0.2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -0
- data/actions/add_result.rb +37 -0
- data/actions/add_taxonomy.rb +63 -0
- data/actions/create_dataset.rb +49 -0
- data/actions/create_project.rb +46 -0
- data/actions/daemon.rb +50 -0
- data/actions/date.rb +14 -0
- data/actions/{download_dataset → download_dataset.rb} +5 -28
- data/actions/find_datasets.rb +41 -0
- data/actions/import_datasets.rb +47 -0
- data/actions/index_taxonomy.rb +46 -0
- data/actions/list_datasets.rb +50 -0
- data/actions/list_files.rb +43 -0
- data/actions/project_info.rb +40 -0
- data/actions/unlink_dataset.rb +28 -0
- data/bin/miga +129 -33
- data/lib/miga/daemon.rb +48 -34
- data/lib/miga/dataset.rb +7 -123
- data/lib/miga/dataset_result.rb +177 -0
- data/lib/miga/project.rb +32 -12
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +82 -0
- data/scripts/_distances_noref_nomulti.bash +96 -67
- data/scripts/_distances_ref_nomulti.bash +54 -85
- data/scripts/assembly.bash +16 -3
- data/scripts/clade_finding.bash +20 -18
- data/scripts/distances.bash +2 -1
- data/scripts/init.bash +2 -6
- data/scripts/subclades.bash +4 -5
- data/test/common_test.rb +2 -2
- data/test/daemon_test.rb +73 -1
- data/test/project_test.rb +26 -2
- data/test/taxonomy_test.rb +10 -0
- data/test/test_helper.rb +1 -1
- data/utils/subclades-compile.rb +4 -2
- data/utils/subclades.R +140 -158
- metadata +48 -44
- data/actions/add_result +0 -58
- data/actions/add_taxonomy +0 -83
- data/actions/create_dataset +0 -61
- data/actions/create_project +0 -67
- data/actions/daemon +0 -66
- data/actions/find_datasets +0 -61
- data/actions/import_datasets +0 -83
- data/actions/index_taxonomy +0 -68
- data/actions/list_datasets +0 -81
- data/actions/list_files +0 -63
- data/actions/unlink_dataset +0 -49
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
+
# $NOMULTI, $REF
|
4
|
+
|
5
|
+
set -e
|
6
|
+
|
7
|
+
if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
|
8
|
+
MIGA_AAI_SAVE_RBM="save-rbm"
|
9
|
+
if [[ -n $PROJECT ]] ; then
|
10
|
+
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
11
|
+
MIGA_AAI_SAVE_RBM="no-save-rbm"
|
12
|
+
fi
|
13
|
+
fi
|
14
|
+
fi
|
15
|
+
|
16
|
+
function make_empty_aai_db {
|
17
|
+
local DB=$1
|
18
|
+
echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
|
19
|
+
" aai float, sd float, n int, omega int);" | sqlite3 $DB
|
20
|
+
}
|
21
|
+
|
22
|
+
function ds_name {
|
23
|
+
basename $1 | perl -pe "s/[^A-Za-z0-9_].*//"
|
24
|
+
}
|
25
|
+
|
26
|
+
function aai {
|
27
|
+
local F1=$1
|
28
|
+
local F2=$2
|
29
|
+
local TH=$3
|
30
|
+
local DB=$4
|
31
|
+
local N1=$(ds_name $F1)
|
32
|
+
local N2=$(ds_name $F2)
|
33
|
+
aai.rb -1 $F1 -2 $F2 -t $TH -a --lookup-first -S $DB --name1 $N1 --name2 $N2 \
|
34
|
+
--$MIGA_AAI_SAVE_RBM || echo "0"
|
35
|
+
}
|
36
|
+
|
37
|
+
function ani {
|
38
|
+
local F1=$1
|
39
|
+
local F2=$2
|
40
|
+
local TH=$3
|
41
|
+
local DB=$4
|
42
|
+
local N1=$(ds_name $F1)
|
43
|
+
local N2=$(ds_name $F2)
|
44
|
+
ani.rb -1 $F1 -2 $F2 -t $TH -a --no-save-regions --no-save-rbm \
|
45
|
+
--lookup-first -S $DB --name1 $N1 --name2 $N2 || echo "0"
|
46
|
+
}
|
47
|
+
|
48
|
+
function haai {
|
49
|
+
local F1=$1
|
50
|
+
local F2=$2
|
51
|
+
local TH=$3
|
52
|
+
local DB=$4
|
53
|
+
local AAI_DB=$5
|
54
|
+
local N1=$(ds_name $F1)
|
55
|
+
local N2=$(ds_name $F2)
|
56
|
+
local HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" aai $F1 $F2 $TH $DB)
|
57
|
+
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
58
|
+
local AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
59
|
+
[[ ! -s $AAI_DB ]] && make_empty_aai_db $AAI_DB
|
60
|
+
echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 $AAI_DB
|
61
|
+
echo $AAI
|
62
|
+
fi
|
63
|
+
}
|
64
|
+
|
65
|
+
function val_from_db {
|
66
|
+
local N1=$1
|
67
|
+
local N2=$2
|
68
|
+
local DB=$3
|
69
|
+
local MT=$4
|
70
|
+
if [[ -s $DB ]] ; then
|
71
|
+
echo "select $MT from $MT where seq1='$N1' and seq2='$N2';" \
|
72
|
+
| sqlite3 $DB || echo 0
|
73
|
+
fi
|
74
|
+
}
|
75
|
+
|
76
|
+
function aai_from_db {
|
77
|
+
val_from_db $1 $2 $3 aai
|
78
|
+
}
|
79
|
+
|
80
|
+
function ani_from_db {
|
81
|
+
val_from_db $1 $2 $3 ani
|
82
|
+
}
|
@@ -1,88 +1,117 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
#
|
3
|
+
# $NOMULTI, $REF
|
4
4
|
|
5
5
|
set -e
|
6
6
|
|
7
7
|
# Deal with previous runs (if any)
|
8
|
+
exists $DATASET.haai.db && cp $DATASET.haai.db $TMPDIR
|
8
9
|
exists $DATASET.a[an]i.db && cp $DATASET.a[an]i.db $TMPDIR
|
9
10
|
exists $DATASET.a[an]i.9[05] && rm $DATASET.a[an]i.9[05]
|
10
11
|
N=0
|
11
12
|
function checkpoint_n {
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
13
|
+
let N=$N+1
|
14
|
+
if [[ $N -ge 10 ]] ; then
|
15
|
+
for metric in haai aai ani ; do
|
16
|
+
if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
|
17
|
+
echo "select count(*) from ${metric#h};" \
|
18
|
+
| sqlite3 $TMPDIR/$DATASET.$metric.db \
|
19
|
+
>/dev/null || exit 1
|
20
|
+
cp $TMPDIR/$DATASET.$metric.db .
|
21
|
+
fi
|
22
|
+
done
|
23
|
+
N=0
|
24
|
+
fi
|
24
25
|
}
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
27
|
+
ESS="../07.annotation/01.function/01.essential"
|
28
|
+
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
29
|
+
# Classify aai-clade (if project type is not clade)
|
30
|
+
CLADES="../10.clades/01.find"
|
31
|
+
CLASSIF="."
|
32
|
+
[[ -e "$DATASET.aai-medoids.tsv" ]] && rm "$DATASET.aai-medoids.tsv"
|
33
|
+
while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
34
|
+
MAX_AAI=0
|
35
|
+
AAI_MED=""
|
36
|
+
AAI_CLS=""
|
37
|
+
i_n=0
|
38
|
+
for i in $(cat "$CLADES/$CLASSIF/miga-project.medoids") ; do
|
39
|
+
let i_n=$i_n+1
|
40
|
+
AAI=$(haai $ESS/$DATASET.ess.faa $ESS/$i.ess.faa $CORES \
|
41
|
+
$TMPDIR/$DATASET.haai.db $TMPDIR/$DATASET.aai.db)
|
42
|
+
[[ "${AAI%.*}" -le 0 ]] \
|
43
|
+
&& AAI=$(aai ../06.cds/$DATASET.faa ../06.cds/$i.faa $CORES \
|
44
|
+
$TMPDIR/$DATASET.aai.db)
|
42
45
|
checkpoint_n
|
43
|
-
if [[ $(perl -
|
44
|
-
|
46
|
+
if [[ $(perl -e "print 1 if '$AAI' >= '$MAX_AAI'") == "1" ]] ; then
|
47
|
+
MAX_AAI=$AAI
|
48
|
+
AAI_MED=$i
|
49
|
+
AAI_CLS=$i_n
|
50
|
+
echo "[$CLASSIF] New max: $AAI_MED ($AAI_CLS): $MAX_AAI"
|
45
51
|
fi
|
46
|
-
|
47
|
-
|
52
|
+
done
|
53
|
+
CLASSIF="$CLASSIF/miga-project.sc-$AAI_CLS"
|
54
|
+
echo "$AAI_CLS $AAI_MED $MAX_AAI $CLASSIF" \
|
55
|
+
>> "$DATASET.aai-medoids.tsv"
|
56
|
+
done
|
48
57
|
|
49
|
-
#
|
50
|
-
|
51
|
-
CLASSIF
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
59
|
+
if [[ "$CLASSIF" != "." ]] ; then
|
60
|
+
PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
|
61
|
+
if [[ -s "$PAR" ]] ; then
|
62
|
+
for i in $(cat "$PAR" | awk "\$2==$AAI_CLS{print \$1}") ; do
|
63
|
+
AAI=$(aai ../06.cds/$DATASET.faa ../06.cds/$i.faa $CORES \
|
64
|
+
$TMPDIR/$DATASET.aai.db)
|
65
|
+
if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
66
|
+
ani ../05.assembly/$DATASET.LargeContigs.fna \
|
67
|
+
../05.assembly/$i.LargeContigs.fna \
|
68
|
+
$TMPDIR/$DATASET.ani.db >/dev/null
|
69
|
+
fi
|
70
|
+
checkpoint_n
|
71
|
+
done
|
72
|
+
fi
|
73
|
+
fi
|
74
|
+
else
|
75
|
+
# Classify ani-clade (if project type is clade)
|
76
|
+
CLADES="../10.clades/02.ani"
|
77
|
+
CLASSIF="."
|
78
|
+
[[ -e "$DATASET.ani-medoids.tsv" ]] && rm "$DATASET.ani-medoids.tsv"
|
79
|
+
while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
80
|
+
MAX_ANI=0
|
81
|
+
ANI_MED=""
|
82
|
+
ANI_CLS=""
|
83
|
+
i_n=0
|
84
|
+
for i in $(cat "$CLADES/$CLASSIF/miga-project.medoids") ; do
|
85
|
+
let i_n=$i_n+1
|
86
|
+
ANI=$(ani ../05.assembly/$DATASET.LargeContigs.fna \
|
87
|
+
../05.assembly/$i.LargeContigs.fna $CORES $TMPDIR/$DATASET.ani.db)
|
61
88
|
checkpoint_n
|
62
|
-
if [[ $(perl -e "print 1 if $ANI
|
63
|
-
|
64
|
-
|
89
|
+
if [[ $(perl -e "print 1 if '$ANI' >= '$MAX_ANI'") == "1" ]] ; then
|
90
|
+
MAX_ANI=$ANI
|
91
|
+
ANI_MED=$i
|
92
|
+
ANI_CLS=$i_n
|
93
|
+
echo "[$CLASSIF] New max: $ANI_MED ($ANI_CLS): $MAX_ANI"
|
65
94
|
fi
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
95
|
+
done
|
96
|
+
CLASSIF="$CLASSIF/miga-project.sc-$ANI_CLS"
|
97
|
+
echo "$ANI_CLS $ANI_MED $MAX_ANI $CLASSIF" \
|
98
|
+
>> "$DATASET.ani-medoids.tsv"
|
99
|
+
done
|
71
100
|
|
72
|
-
# Calculate all the ANIs against the lowest subclade (if classified in-clade)
|
73
|
-
if [[ "$CLASSIF" != "." ]] ; then
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
101
|
+
# Calculate all the ANIs against the lowest subclade (if classified in-clade)
|
102
|
+
if [[ "$CLASSIF" != "." ]] ; then
|
103
|
+
PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
|
104
|
+
if [[ -s "$CLADES/$CLASSIF/miga-project.all" ]] ; then
|
105
|
+
for i in $(cat "$PAR" | awk "\$2==$ANI_CLS{print \$1}") ; do
|
106
|
+
ani ../05.assembly/$DATASET.LargeContigs.fna \
|
107
|
+
../05.assembly/$i.LargeContigs.fna $CORES \
|
108
|
+
$TMPDIR/$DATASET.ani.db > /dev/null
|
109
|
+
checkpoint_n
|
81
110
|
done
|
82
|
-
|
111
|
+
fi
|
112
|
+
fi
|
83
113
|
fi
|
84
114
|
|
85
|
-
#
|
86
|
-
|
87
|
-
|
88
|
-
|
115
|
+
#Finalize
|
116
|
+
N=11
|
117
|
+
checkpoint_n
|
@@ -5,102 +5,71 @@
|
|
5
5
|
set -e
|
6
6
|
|
7
7
|
function checkpoint_n {
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
if [[ $N -eq 10 ]] ; then
|
9
|
+
for t in 01.haai 02.aai 03.ani ; do
|
10
|
+
if [[ -s $TMPDIR/$t.db ]] ; then
|
11
|
+
tab="aai"
|
12
|
+
[[ "$t" == "03.ani" ]] && tab="ani"
|
13
|
+
echo "select count(*) from $tab;" \
|
14
|
+
| sqlite3 $TMPDIR/$t.db \
|
15
|
+
>/dev/null || exit 1
|
16
|
+
cp $TMPDIR/$t.db $t/$DATASET.db
|
17
|
+
fi
|
18
|
+
done
|
19
|
+
N=0
|
20
|
+
fi
|
21
|
+
let N=$N+1
|
22
22
|
}
|
23
23
|
|
24
24
|
ESS="../07.annotation/01.function/01.essential"
|
25
25
|
|
26
26
|
# Initialize temporals
|
27
27
|
for t in 01.haai 02.aai 03.ani ; do
|
28
|
-
|
28
|
+
[[ -s $t/$DATASET.db ]] && cp $t/$DATASET.db $TMPDIR/$t.db
|
29
29
|
done
|
30
|
-
echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
|
31
|
-
"aai float, sd float, n int, omega int);" | sqlite3 $TMPDIR/02.aai.db
|
32
30
|
N=1
|
33
31
|
|
34
32
|
# Traverse "nearly-half" of the ref-datasets using first-come-first-served
|
35
33
|
for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
# Check if ANI is meaningful
|
76
|
-
if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
|
77
|
-
&& -e "../05.assembly/$i.LargeContigs.fna" \
|
78
|
-
&& $(perl -MPOSIX -e "print ceil $AAI") -gt 90 ]] ; then
|
79
|
-
# Check if this is done (e.g., in a previous failed iteration)
|
80
|
-
ANI=$(echo "select ani from ani where seq1='$DATASET' and seq2='$i';" \
|
81
|
-
| sqlite3 $TMPDIR/03.ani.db || echo "")
|
82
|
-
# Try the other direction
|
83
|
-
if [[ "$ANI" == "" && -s 03.ani/$i.db ]] ; then
|
84
|
-
cp "03.ani/$i.db" "$TMPDIR/$i.db"
|
85
|
-
ANI=$(echo "select ani from ani" \
|
86
|
-
"where seq2='$DATASET' and seq1='$i';" \
|
87
|
-
| sqlite3 "$TMPDIR/$i.db" || echo "")
|
88
|
-
rm "$TMPDIR/$i.db"
|
89
|
-
fi
|
90
|
-
# Calculate it
|
91
|
-
if [[ "$ANI" == "" ]] ; then
|
92
|
-
[[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
|
93
|
-
|| cp ../05.assembly/$DATASET.LargeContigs.fna \
|
94
|
-
$TMPDIR/$DATASET.LargeContigs.fna
|
95
|
-
ANI=$(ani.rb -1 $TMPDIR/$DATASET.LargeContigs.fna \
|
96
|
-
-2 ../05.assembly/$i.LargeContigs.fna -t $CORES \
|
97
|
-
-S $TMPDIR/03.ani.db -a --name1 $DATASET --name2 $i \
|
98
|
-
--no-save-regions --no-save-rbm --lookup-first \
|
99
|
-
|| echo "")
|
100
|
-
fi
|
101
|
-
fi
|
102
|
-
echo "$AAI;$ANI"
|
103
|
-
checkpoint_n
|
34
|
+
echo "[ $(date "+%Y-%m-%d %H:%M:%S %z") ] $i"
|
35
|
+
AAI=""; ANI="";
|
36
|
+
# Check if the i-th dataset is ready
|
37
|
+
[[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
|
38
|
+
# Check if this is done (e.g., in a previous failed iteration)
|
39
|
+
AAI=$(aai_from_db $DATASET $i $TMPDIR/02.aai.db)
|
40
|
+
# Try the other direction
|
41
|
+
[[ "${AAI%.*}" -le 0 ]] && AAI=$(aai_from_db $i $DATASET 02.aai/$i.db)
|
42
|
+
# Try with hAAI
|
43
|
+
if [[ "${AAI%.*}" -le 0 ]] ; then
|
44
|
+
[[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
|
45
|
+
|| cp $ESS/$DATASET.ess.faa $TMPDIR/$DATASET.ess.faa
|
46
|
+
AAI=$(haai $TMPDIR/$DATASET.ess.faa $ESS/$i.ess.faa \
|
47
|
+
$CORES $TMPDIR/01.haai.db $TMPDIR/02.aai.db)
|
48
|
+
fi
|
49
|
+
# Try with complete AAI
|
50
|
+
if [[ "${AAI%.*}" -le 0 ]] ; then
|
51
|
+
[[ -e "$TMPDIR/$DATASET.faa" ]] \
|
52
|
+
|| cp ../06.cds/$DATASET.faa $TMPDIR/$DATASET.faa
|
53
|
+
AAI=$(aai $TMPDIR/$DATASET.faa ../06.cds/$i.faa $CORES $TMPDIR/02.aai.db)
|
54
|
+
fi
|
55
|
+
# Check if ANI is meaningful
|
56
|
+
if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
|
57
|
+
&& -e "../05.assembly/$i.LargeContigs.fna" \
|
58
|
+
&& $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
59
|
+
# Check if this is done (e.g., in a previous failed iteration)
|
60
|
+
ANI=$(ani_from_db $DATASET $i $TMPDIR/03.ani.db)
|
61
|
+
# Try the other direction
|
62
|
+
[[ "${ANI%.*}" -le 0 ]] && ANI=$(ani_from_db $i $DATASET 03.ani/$i.db)
|
63
|
+
# Calculate it
|
64
|
+
if [[ "${ANI%.*}" -le 0 ]] ; then
|
65
|
+
[[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
|
66
|
+
|| cp ../05.assembly/$DATASET.LargeContigs.fna \
|
67
|
+
$TMPDIR/$DATASET.LargeContigs.fna
|
68
|
+
ANI=$(ani $TMPDIR/$DATASET.LargeContigs.fna \
|
69
|
+
../05.assembly/$i.LargeContigs.fna $CORES $TMPDIR/03.ani.db)
|
70
|
+
fi
|
71
|
+
fi
|
72
|
+
checkpoint_n
|
104
73
|
done
|
105
74
|
N=10
|
106
75
|
checkpoint_n
|
data/scripts/assembly.bash
CHANGED
@@ -11,12 +11,26 @@ b=$DATASET
|
|
11
11
|
# Initialize
|
12
12
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
13
13
|
|
14
|
+
# Interpose (if needed)
|
15
|
+
TF="../04.trimmed_fasta"
|
16
|
+
if [[ -s $TF/$DATASET.1.fasta \
|
17
|
+
&& -s $TF/$DATASET.2.fasta \
|
18
|
+
&& ! -s $TF/$DATASET.CoupledReads.fa ]] ; then
|
19
|
+
FastA.interpose.pl $TF/$DATASET.CoupledReads.fa $TF/$DATASET.[12].fasta
|
20
|
+
gzip -9 -f $TF/$DATASET.1.fasta
|
21
|
+
gzip -9 -f $TF/$DATASET.2.fasta
|
22
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
|
23
|
+
fi
|
24
|
+
|
14
25
|
# Assemble
|
15
|
-
FA="
|
26
|
+
FA="$TF/$DATASET.CoupledReads.fa"
|
16
27
|
[[ -e $FA ]] || FA="$FA.gz"
|
17
28
|
[[ -e $FA ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
|
18
29
|
[[ -e $FA ]] || FA="$FA.gz"
|
19
|
-
|
30
|
+
RD="r"
|
31
|
+
[[ $FA == *.SingleReads.fa* ]] && RD="l"
|
32
|
+
idba_ud --pre_correction -$RD "$FA" -o "$DATASET" --num_threads "$CORES" || true
|
33
|
+
[[ -s $DATASET/contig.fa ]] || exit 1
|
20
34
|
|
21
35
|
# Clean
|
22
36
|
cd $DATASET
|
@@ -36,4 +50,3 @@ FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
|
|
36
50
|
# Finalize
|
37
51
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
38
52
|
miga add_result -P "$PROJECT" -D "$DATASET" -r assembly
|
39
|
-
|