miga-base 0.2.6.4 → 0.2.6.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/list_datasets.rb +6 -1
- data/actions/run_local.rb +1 -1
- data/actions/tax_distributions.rb +4 -4
- data/lib/miga/common.rb +18 -0
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset_result.rb +46 -47
- data/lib/miga/remote_dataset.rb +52 -32
- data/lib/miga/tax_dist.rb +2 -2
- data/lib/miga/tax_index.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +17 -8
- data/scripts/_distances_noref_nomulti.bash +26 -7
- data/scripts/aai_distances.bash +3 -2
- data/scripts/ani_distances.bash +3 -2
- data/scripts/assembly.bash +24 -24
- data/scripts/cds.bash +22 -30
- data/scripts/clade_finding.bash +5 -4
- data/scripts/distances.bash +13 -9
- data/scripts/essential_genes.bash +12 -11
- data/scripts/haai_distances.bash +3 -2
- data/scripts/init.bash +100 -108
- data/scripts/miga.bash +4 -2
- data/scripts/mytaxa.bash +72 -71
- data/scripts/mytaxa_scan.bash +62 -61
- data/scripts/ogs.bash +14 -13
- data/scripts/project_stats.bash +1 -0
- data/scripts/read_quality.bash +12 -16
- data/scripts/ssu.bash +18 -18
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +7 -6
- data/scripts/trimmed_fasta.bash +22 -21
- data/scripts/trimmed_reads.bash +34 -32
- data/utils/index_metadata.rb +4 -4
- data/utils/ref-tree.R +65 -0
- data/utils/requirements.txt +1 -1
- metadata +57 -56
data/scripts/mytaxa.bash
CHANGED
@@ -1,94 +1,95 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
-
MT=$(dirname -- $(which MyTaxa))
|
14
|
+
miga date > "$DATASET.start"
|
15
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
15
16
|
|
16
17
|
# Check type of dataset
|
17
18
|
MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
|
18
|
-
|
19
|
+
| wc -l | awk '{print $1}')
|
19
20
|
if [[ "$MULTI" -eq "1" ]] ; then
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
21
|
+
# Check requirements
|
22
|
+
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
23
|
+
echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
|
24
|
+
"no such file or directory" >&2
|
25
|
+
exit 1
|
26
|
+
fi
|
27
|
+
if [[ ! -d "$MT/db" ]] ; then
|
28
|
+
echo "Cannot locate the MyTaxa index: $MT/db:" \
|
29
|
+
"no such file or directory" >&2
|
30
|
+
exit 1
|
31
|
+
fi
|
32
|
+
if [[ ! -d "$MT/utils" ]] ; then
|
33
|
+
echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
|
34
|
+
"no such file or directory" >&2
|
35
|
+
exit 1
|
36
|
+
fi
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
# Execute search
|
39
|
+
diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
|
40
|
+
-a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
|
41
|
+
diamond view -a "$DATASET.daa" -o "$DATASET.blast"
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
43
|
+
# Prepare MyTaxa input, execute MyTaxa, and generate profiles
|
44
|
+
[[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
|
45
|
+
&& [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
|
46
|
+
&& gunzip "../../../06.cds/$DATASET.gff2.gz"
|
47
|
+
[[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
|
48
|
+
&& [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
|
49
|
+
&& gunzip "../../../06.cds/$DATASET.gff3.gz"
|
50
|
+
if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
|
51
|
+
# GFF2
|
52
|
+
perl "$MT/utils/infile_convert.pl" -f gff2 \
|
53
|
+
"../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
|
54
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
55
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
56
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
57
|
+
-g "../../../06.cds/$DATASET.gff2" -f gff2 \
|
58
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
59
|
+
-K "$DATASET.mytaxa.krona" -u
|
60
|
+
elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
|
61
|
+
# GFF3
|
62
|
+
perl "$MT/utils/infile_convert.pl" -f gff3 \
|
63
|
+
"../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
|
64
|
+
> "$DATASET.mytaxain"
|
65
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
66
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
67
|
+
-g "../../../06.cds/$DATASET.gff3" -f gff3 \
|
68
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
69
|
+
-K "$DATASET.mytaxa.krona" -u
|
70
|
+
else
|
71
|
+
# No GFF
|
72
|
+
perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
|
73
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
74
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
75
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
76
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
77
|
+
-K "$DATASET.mytaxa.krona" -u
|
78
|
+
fi
|
78
79
|
|
79
|
-
|
80
|
-
|
80
|
+
# Execute Krona
|
81
|
+
ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
83
|
+
# Gzip and cleanup
|
84
|
+
[[ -e "../../../06.cds/$DATASET.gff2" ]] \
|
85
|
+
&& gzip -9 -f "../../../06.cds/$DATASET.gff2"
|
86
|
+
[[ -e "../../../06.cds/$DATASET.gff3" ]] \
|
87
|
+
&& gzip -9 -f "../../../06.cds/$DATASET.gff3"
|
88
|
+
gzip -9 -f "$DATASET.mytaxain"
|
89
|
+
gzip -9 -f "$DATASET.blast"
|
90
|
+
rm "$DATASET.daa"
|
90
91
|
fi
|
91
92
|
|
92
93
|
# Finalize
|
93
|
-
date
|
94
|
+
miga date > "$DATASET.done"
|
94
95
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -1,86 +1,87 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa_scan"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
-
MT=$(dirname -- $(which MyTaxa))
|
14
|
+
miga date > "$DATASET.start"
|
15
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
15
16
|
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
16
|
-
trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
|
17
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
17
18
|
|
18
19
|
# Check type of dataset
|
19
20
|
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
20
|
-
|
21
|
+
| wc -l | awk '{print $1}')
|
21
22
|
if [[ "$NOMULTI" -eq "1" ]] ; then
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
23
|
+
# Check requirements
|
24
|
+
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
25
|
+
echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
|
26
|
+
"no such file or directory" >&2
|
27
|
+
exit 1
|
28
|
+
fi
|
29
|
+
if [[ ! -d "$MT/db" ]] ; then
|
30
|
+
echo "Cannot locate the MyTaxa index: $MT/db:" \
|
31
|
+
"no such file or directory" >&2
|
32
|
+
exit 1
|
33
|
+
fi
|
34
|
+
if [[ ! -d "$MT/utils" ]] ; then
|
35
|
+
echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
|
36
|
+
"no such file or directory" >&2
|
37
|
+
exit 1
|
38
|
+
fi
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
if [[ ! -s "$DATASET.mytaxa" ]] ; then
|
41
|
+
# Execute search
|
42
|
+
if [[ ! -s "$DATASET.blast" ]] ; then
|
43
|
+
diamond blastp -q "../../../06.cds/$DATASET.faa" \
|
44
|
+
-d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
|
45
|
+
-a "$DATASET.daa" -t "$TMPDIR"
|
46
|
+
diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
|
47
|
+
fi
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
49
|
+
# Prepare MyTaxa input, execute MyTaxa, and generate profiles
|
50
|
+
perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
|
51
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
52
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
53
|
+
fi
|
54
|
+
ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
|
55
|
+
"$DATASET.mytaxa" "$DATASET.wintax"
|
56
|
+
echo "
|
57
|
+
source('$MIGA/utils/mytaxa_scan.R');
|
58
|
+
pdf('$DATASET.pdf', 12, 7);
|
59
|
+
mytaxa.scan('$DATASET.wintax');
|
60
|
+
dev.off();
|
61
|
+
" | R --vanilla
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
63
|
+
# Extract genes from flagged regions
|
64
|
+
[[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
|
65
|
+
if [[ -e "$DATASET.wintax.regions" ]] ; then
|
66
|
+
i=0
|
67
|
+
for win in $(cat "$DATASET.wintax.regions") ; do
|
68
|
+
let i=$i+1
|
69
|
+
awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
|
70
|
+
> "$DATASET.reg/$i.ids"
|
71
|
+
FastA.filter.pl -q "$DATASET.reg/$i.ids" \
|
72
|
+
"../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
|
73
|
+
done
|
74
|
+
fi
|
74
75
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
76
|
+
# Clean
|
77
|
+
[[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
|
78
|
+
[[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
|
79
|
+
&& gzip -9 -f "$DATASET.blast"
|
80
|
+
[[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
|
81
|
+
&& gzip -9 -f "$DATASET.mytaxain"
|
81
82
|
fi
|
82
83
|
|
83
84
|
# Finalize
|
84
85
|
rm -R "$TMPDIR"
|
85
|
-
date
|
86
|
+
miga date > "$DATASET.done"
|
86
87
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/ogs.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="ogs"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/10.clades/03.ogs"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -16,25 +17,25 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
|
16
17
|
# Extract RBMs
|
17
18
|
[[ -d miga-project.rbm ]] || mkdir miga-project.rbm
|
18
19
|
for i in $DS ; do
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
20
|
+
for j in $DS ; do
|
21
|
+
file="miga-project.rbm/$i-$j.rbm"
|
22
|
+
[[ -s $file ]] && continue
|
23
|
+
echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
|
24
|
+
"where seq1='$i' and seq2='$j' ;" \
|
25
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
|
26
|
+
> "$file"
|
27
|
+
[[ -s "$file" ]] || rm "$file"
|
28
|
+
done
|
29
|
+
echo "$i" >> miga-project.log
|
29
30
|
done
|
30
31
|
|
31
32
|
# Estimate OGs
|
32
|
-
ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
|
33
|
+
ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
|
33
34
|
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
34
35
|
|
35
36
|
# Clean RBMs
|
36
37
|
rm -rf miga-project.rbm
|
37
38
|
|
38
39
|
# Finalize
|
39
|
-
date
|
40
|
+
miga date > "miga-project.done"
|
40
41
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/project_stats.bash
CHANGED
data/scripts/read_quality.bash
CHANGED
@@ -1,38 +1,34 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="read_quality"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/03.read_quality"
|
9
10
|
|
10
11
|
b=$DATASET
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
# FastQC
|
16
17
|
[[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
|
17
|
-
fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
|
18
|
+
fastqc "../02.trimmed_reads/$b".[12].clipped.fastq -o "$b.fastqc"
|
18
19
|
|
19
20
|
# SolexaQA++
|
20
21
|
[[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
|
21
|
-
exists ../02.trimmed_reads/$b.[12].*.pdf \
|
22
|
-
|
22
|
+
exists "../02.trimmed_reads/$b".[12].*.pdf \
|
23
|
+
&& mv "../02.trimmed_reads/$b".[12].*.pdf "$b.solexaqa/"
|
23
24
|
|
24
25
|
# Clean 02.trimmed_reads
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
|
31
|
-
[[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
|
32
|
-
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed
|
33
|
-
[[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
|
34
|
-
&& rm ../02.trimmed_reads/$b.[12].fastq
|
26
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq_trimmed.segments
|
27
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.paired
|
28
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.single
|
29
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed
|
30
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq
|
35
31
|
|
36
32
|
# Finalize
|
37
|
-
date
|
33
|
+
miga date > "$DATASET.done"
|
38
34
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/ssu.bash
CHANGED
@@ -1,36 +1,36 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="ssu"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
fa="../../../05.assembly/$DATASET.LargeContigs.fna"
|
16
17
|
if [[ -s $fa ]] ; then
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
gzip -9 -f "$DATASET.ssu.all.fa"
|
18
|
+
# Run barrnap
|
19
|
+
barrnap --quiet --threads "$CORES" "$fa" | grep "^##gff\\|;product=16S " \
|
20
|
+
> "$DATASET.ssu.gff"
|
21
|
+
# Extract
|
22
|
+
bedtools getfasta -s "-fi" "$fa" -bed "$DATASET.ssu.gff" \
|
23
|
+
-fo "$DATASET.ssu.all.fa"
|
24
|
+
FastA.length.pl "$DATASET.ssu.all.fa" | sort -nr -k 2 | head -n 1 \
|
25
|
+
| cut -f 1 > "$DATASET.ssu.fa.id"
|
26
|
+
FastA.filter.pl "$DATASET.ssu.fa.id" "$DATASET.ssu.all.fa" > "$DATASET.ssu.fa"
|
27
|
+
rm "$DATASET.ssu.fa.id"
|
28
|
+
[[ -e "$fa.fai" ]] && rm "$fa.fai"
|
29
|
+
# Gzip
|
30
|
+
gzip -9 -f "$DATASET.ssu.gff"
|
31
|
+
gzip -9 -f "$DATASET.ssu.all.fa"
|
32
32
|
fi
|
33
33
|
|
34
34
|
# Finalize
|
35
|
-
date
|
35
|
+
miga date > "$DATASET.done"
|
36
36
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/stats.bash
CHANGED
@@ -4,13 +4,14 @@ set -e
|
|
4
4
|
SCRIPT="stats"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/90.stats"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
# Calculate statistics
|
16
17
|
for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
|
@@ -19,5 +20,5 @@ for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
|
|
19
20
|
done
|
20
21
|
|
21
22
|
# Finalize
|
22
|
-
date
|
23
|
+
miga date > "$DATASET.done"
|
23
24
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/subclades.bash
CHANGED
@@ -4,23 +4,24 @@ set -e
|
|
4
4
|
SCRIPT="subclades"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/10.clades/02.ani"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
# Run R code
|
14
|
-
$MIGA/utils/subclades.R \
|
15
|
+
"$MIGA/utils/subclades.R" \
|
15
16
|
../../09.distances/03.ani/miga-project.txt.gz \
|
16
|
-
miga-project $CORES
|
17
|
+
miga-project "$CORES"
|
17
18
|
mv miga-project.nwk miga-project.ani.nwk
|
18
19
|
|
19
20
|
# Compile
|
20
21
|
ruby "$MIGA/utils/subclades-compile.rb" . \
|
21
|
-
|
22
|
-
|
22
|
+
> miga-project.class.tsv \
|
23
|
+
2> miga-project.class.nwk
|
23
24
|
|
24
25
|
# Finalize
|
25
|
-
date
|
26
|
+
miga date > "miga-project.done"
|
26
27
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/trimmed_fasta.bash
CHANGED
@@ -1,48 +1,49 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="trimmed_fasta"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/04.trimmed_fasta"
|
9
10
|
|
10
11
|
b=$DATASET
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
# Gunzip (if necessary)
|
16
17
|
for sis in 1 2 ; do
|
17
|
-
|
18
|
-
|
18
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
|
19
|
+
&& ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
19
20
|
&& gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
|
20
21
|
done
|
21
22
|
|
22
23
|
# FastQ -> FastA
|
23
|
-
|
24
|
-
if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
FastQ.toFastA.awk < "../02.trimmed_reads/$b.1.clipped.fastq" > "$b.1.fasta"
|
25
|
+
if [[ -e "../02.trimmed_reads/$b.2.clipped.fastq" ]] ; then
|
26
|
+
FastQ.toFastA.awk < "../02.trimmed_reads/$b.2.clipped.fastq" > "$b.2.fasta"
|
27
|
+
FastA.interpose.pl "$b.CoupledReads.fa" "$b".[12].fasta
|
28
|
+
gzip -9 -f "$b.2.fasta"
|
29
|
+
gzip -9 -f "$b.1.fasta"
|
30
|
+
FastQ.toFastA.awk < "../02.trimmed_reads/$b".[12].clipped.single.fastq \
|
31
|
+
> "$b.SingleReads.fa"
|
32
|
+
gzip -9 -f "$b.SingleReads.fa"
|
32
33
|
else
|
33
|
-
mv $b.1.fasta $b.SingleReads.fa
|
34
|
+
mv "$b.1.fasta" "$b.SingleReads.fa"
|
34
35
|
fi
|
35
36
|
|
36
37
|
# Compress input at 01.raw_reads and 02.trimmed_reads
|
37
38
|
for sis in 1 2 ; do
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
[[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
|
40
|
+
&& gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
|
41
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
42
|
+
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
|
43
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
|
44
|
+
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
|
44
45
|
done
|
45
46
|
|
46
47
|
# Finalize
|
47
|
-
date
|
48
|
+
miga date > "$DATASET.done"
|
48
49
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|