miga-base 0.2.6.4 → 0.2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/list_datasets.rb +6 -1
- data/actions/run_local.rb +1 -1
- data/actions/tax_distributions.rb +4 -4
- data/lib/miga/common.rb +18 -0
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset_result.rb +46 -47
- data/lib/miga/remote_dataset.rb +52 -32
- data/lib/miga/tax_dist.rb +2 -2
- data/lib/miga/tax_index.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +17 -8
- data/scripts/_distances_noref_nomulti.bash +26 -7
- data/scripts/aai_distances.bash +3 -2
- data/scripts/ani_distances.bash +3 -2
- data/scripts/assembly.bash +24 -24
- data/scripts/cds.bash +22 -30
- data/scripts/clade_finding.bash +5 -4
- data/scripts/distances.bash +13 -9
- data/scripts/essential_genes.bash +12 -11
- data/scripts/haai_distances.bash +3 -2
- data/scripts/init.bash +100 -108
- data/scripts/miga.bash +4 -2
- data/scripts/mytaxa.bash +72 -71
- data/scripts/mytaxa_scan.bash +62 -61
- data/scripts/ogs.bash +14 -13
- data/scripts/project_stats.bash +1 -0
- data/scripts/read_quality.bash +12 -16
- data/scripts/ssu.bash +18 -18
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +7 -6
- data/scripts/trimmed_fasta.bash +22 -21
- data/scripts/trimmed_reads.bash +34 -32
- data/utils/index_metadata.rb +4 -4
- data/utils/ref-tree.R +65 -0
- data/utils/requirements.txt +1 -1
- metadata +57 -56
data/scripts/mytaxa.bash
CHANGED
@@ -1,94 +1,95 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
-
MT=$(dirname -- $(which MyTaxa))
|
14
|
+
miga date > "$DATASET.start"
|
15
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
15
16
|
|
16
17
|
# Check type of dataset
|
17
18
|
MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
|
18
|
-
|
19
|
+
| wc -l | awk '{print $1}')
|
19
20
|
if [[ "$MULTI" -eq "1" ]] ; then
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
21
|
+
# Check requirements
|
22
|
+
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
23
|
+
echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
|
24
|
+
"no such file or directory" >&2
|
25
|
+
exit 1
|
26
|
+
fi
|
27
|
+
if [[ ! -d "$MT/db" ]] ; then
|
28
|
+
echo "Cannot locate the MyTaxa index: $MT/db:" \
|
29
|
+
"no such file or directory" >&2
|
30
|
+
exit 1
|
31
|
+
fi
|
32
|
+
if [[ ! -d "$MT/utils" ]] ; then
|
33
|
+
echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
|
34
|
+
"no such file or directory" >&2
|
35
|
+
exit 1
|
36
|
+
fi
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
# Execute search
|
39
|
+
diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
|
40
|
+
-a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
|
41
|
+
diamond view -a "$DATASET.daa" -o "$DATASET.blast"
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
43
|
+
# Prepare MyTaxa input, execute MyTaxa, and generate profiles
|
44
|
+
[[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
|
45
|
+
&& [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
|
46
|
+
&& gunzip "../../../06.cds/$DATASET.gff2.gz"
|
47
|
+
[[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
|
48
|
+
&& [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
|
49
|
+
&& gunzip "../../../06.cds/$DATASET.gff3.gz"
|
50
|
+
if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
|
51
|
+
# GFF2
|
52
|
+
perl "$MT/utils/infile_convert.pl" -f gff2 \
|
53
|
+
"../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
|
54
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
55
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
56
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
57
|
+
-g "../../../06.cds/$DATASET.gff2" -f gff2 \
|
58
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
59
|
+
-K "$DATASET.mytaxa.krona" -u
|
60
|
+
elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
|
61
|
+
# GFF3
|
62
|
+
perl "$MT/utils/infile_convert.pl" -f gff3 \
|
63
|
+
"../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
|
64
|
+
> "$DATASET.mytaxain"
|
65
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
66
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
67
|
+
-g "../../../06.cds/$DATASET.gff3" -f gff3 \
|
68
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
69
|
+
-K "$DATASET.mytaxa.krona" -u
|
70
|
+
else
|
71
|
+
# No GFF
|
72
|
+
perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
|
73
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
74
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
75
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
76
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
77
|
+
-K "$DATASET.mytaxa.krona" -u
|
78
|
+
fi
|
78
79
|
|
79
|
-
|
80
|
-
|
80
|
+
# Execute Krona
|
81
|
+
ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
83
|
+
# Gzip and cleanup
|
84
|
+
[[ -e "../../../06.cds/$DATASET.gff2" ]] \
|
85
|
+
&& gzip -9 -f "../../../06.cds/$DATASET.gff2"
|
86
|
+
[[ -e "../../../06.cds/$DATASET.gff3" ]] \
|
87
|
+
&& gzip -9 -f "../../../06.cds/$DATASET.gff3"
|
88
|
+
gzip -9 -f "$DATASET.mytaxain"
|
89
|
+
gzip -9 -f "$DATASET.blast"
|
90
|
+
rm "$DATASET.daa"
|
90
91
|
fi
|
91
92
|
|
92
93
|
# Finalize
|
93
|
-
date
|
94
|
+
miga date > "$DATASET.done"
|
94
95
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -1,86 +1,87 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="mytaxa_scan"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
-
MT=$(dirname -- $(which MyTaxa))
|
14
|
+
miga date > "$DATASET.start"
|
15
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
15
16
|
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
16
|
-
trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
|
17
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
17
18
|
|
18
19
|
# Check type of dataset
|
19
20
|
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
20
|
-
|
21
|
+
| wc -l | awk '{print $1}')
|
21
22
|
if [[ "$NOMULTI" -eq "1" ]] ; then
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
23
|
+
# Check requirements
|
24
|
+
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
25
|
+
echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
|
26
|
+
"no such file or directory" >&2
|
27
|
+
exit 1
|
28
|
+
fi
|
29
|
+
if [[ ! -d "$MT/db" ]] ; then
|
30
|
+
echo "Cannot locate the MyTaxa index: $MT/db:" \
|
31
|
+
"no such file or directory" >&2
|
32
|
+
exit 1
|
33
|
+
fi
|
34
|
+
if [[ ! -d "$MT/utils" ]] ; then
|
35
|
+
echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
|
36
|
+
"no such file or directory" >&2
|
37
|
+
exit 1
|
38
|
+
fi
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
if [[ ! -s "$DATASET.mytaxa" ]] ; then
|
41
|
+
# Execute search
|
42
|
+
if [[ ! -s "$DATASET.blast" ]] ; then
|
43
|
+
diamond blastp -q "../../../06.cds/$DATASET.faa" \
|
44
|
+
-d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
|
45
|
+
-a "$DATASET.daa" -t "$TMPDIR"
|
46
|
+
diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
|
47
|
+
fi
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
49
|
+
# Prepare MyTaxa input, execute MyTaxa, and generate profiles
|
50
|
+
perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
|
51
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
52
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
53
|
+
fi
|
54
|
+
ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
|
55
|
+
"$DATASET.mytaxa" "$DATASET.wintax"
|
56
|
+
echo "
|
57
|
+
source('$MIGA/utils/mytaxa_scan.R');
|
58
|
+
pdf('$DATASET.pdf', 12, 7);
|
59
|
+
mytaxa.scan('$DATASET.wintax');
|
60
|
+
dev.off();
|
61
|
+
" | R --vanilla
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
63
|
+
# Extract genes from flagged regions
|
64
|
+
[[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
|
65
|
+
if [[ -e "$DATASET.wintax.regions" ]] ; then
|
66
|
+
i=0
|
67
|
+
for win in $(cat "$DATASET.wintax.regions") ; do
|
68
|
+
let i=$i+1
|
69
|
+
awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
|
70
|
+
> "$DATASET.reg/$i.ids"
|
71
|
+
FastA.filter.pl -q "$DATASET.reg/$i.ids" \
|
72
|
+
"../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
|
73
|
+
done
|
74
|
+
fi
|
74
75
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
76
|
+
# Clean
|
77
|
+
[[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
|
78
|
+
[[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
|
79
|
+
&& gzip -9 -f "$DATASET.blast"
|
80
|
+
[[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
|
81
|
+
&& gzip -9 -f "$DATASET.mytaxain"
|
81
82
|
fi
|
82
83
|
|
83
84
|
# Finalize
|
84
85
|
rm -R "$TMPDIR"
|
85
|
-
date
|
86
|
+
miga date > "$DATASET.done"
|
86
87
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/ogs.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="ogs"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/10.clades/03.ogs"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -16,25 +17,25 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
|
16
17
|
# Extract RBMs
|
17
18
|
[[ -d miga-project.rbm ]] || mkdir miga-project.rbm
|
18
19
|
for i in $DS ; do
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
20
|
+
for j in $DS ; do
|
21
|
+
file="miga-project.rbm/$i-$j.rbm"
|
22
|
+
[[ -s $file ]] && continue
|
23
|
+
echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
|
24
|
+
"where seq1='$i' and seq2='$j' ;" \
|
25
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
|
26
|
+
> "$file"
|
27
|
+
[[ -s "$file" ]] || rm "$file"
|
28
|
+
done
|
29
|
+
echo "$i" >> miga-project.log
|
29
30
|
done
|
30
31
|
|
31
32
|
# Estimate OGs
|
32
|
-
ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
|
33
|
+
ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
|
33
34
|
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
34
35
|
|
35
36
|
# Clean RBMs
|
36
37
|
rm -rf miga-project.rbm
|
37
38
|
|
38
39
|
# Finalize
|
39
|
-
date
|
40
|
+
miga date > "miga-project.done"
|
40
41
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/project_stats.bash
CHANGED
data/scripts/read_quality.bash
CHANGED
@@ -1,38 +1,34 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="read_quality"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/03.read_quality"
|
9
10
|
|
10
11
|
b=$DATASET
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
# FastQC
|
16
17
|
[[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
|
17
|
-
fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
|
18
|
+
fastqc "../02.trimmed_reads/$b".[12].clipped.fastq -o "$b.fastqc"
|
18
19
|
|
19
20
|
# SolexaQA++
|
20
21
|
[[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
|
21
|
-
exists ../02.trimmed_reads/$b.[12].*.pdf \
|
22
|
-
|
22
|
+
exists "../02.trimmed_reads/$b".[12].*.pdf \
|
23
|
+
&& mv "../02.trimmed_reads/$b".[12].*.pdf "$b.solexaqa/"
|
23
24
|
|
24
25
|
# Clean 02.trimmed_reads
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
|
31
|
-
[[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
|
32
|
-
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed
|
33
|
-
[[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
|
34
|
-
&& rm ../02.trimmed_reads/$b.[12].fastq
|
26
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq_trimmed.segments
|
27
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.paired
|
28
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.single
|
29
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed
|
30
|
+
rm -f "../02.trimmed_reads/$b".[12].fastq
|
35
31
|
|
36
32
|
# Finalize
|
37
|
-
date
|
33
|
+
miga date > "$DATASET.done"
|
38
34
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/ssu.bash
CHANGED
@@ -1,36 +1,36 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="ssu"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
fa="../../../05.assembly/$DATASET.LargeContigs.fna"
|
16
17
|
if [[ -s $fa ]] ; then
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
gzip -9 -f "$DATASET.ssu.all.fa"
|
18
|
+
# Run barrnap
|
19
|
+
barrnap --quiet --threads "$CORES" "$fa" | grep "^##gff\\|;product=16S " \
|
20
|
+
> "$DATASET.ssu.gff"
|
21
|
+
# Extract
|
22
|
+
bedtools getfasta -s "-fi" "$fa" -bed "$DATASET.ssu.gff" \
|
23
|
+
-fo "$DATASET.ssu.all.fa"
|
24
|
+
FastA.length.pl "$DATASET.ssu.all.fa" | sort -nr -k 2 | head -n 1 \
|
25
|
+
| cut -f 1 > "$DATASET.ssu.fa.id"
|
26
|
+
FastA.filter.pl "$DATASET.ssu.fa.id" "$DATASET.ssu.all.fa" > "$DATASET.ssu.fa"
|
27
|
+
rm "$DATASET.ssu.fa.id"
|
28
|
+
[[ -e "$fa.fai" ]] && rm "$fa.fai"
|
29
|
+
# Gzip
|
30
|
+
gzip -9 -f "$DATASET.ssu.gff"
|
31
|
+
gzip -9 -f "$DATASET.ssu.all.fa"
|
32
32
|
fi
|
33
33
|
|
34
34
|
# Finalize
|
35
|
-
date
|
35
|
+
miga date > "$DATASET.done"
|
36
36
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/stats.bash
CHANGED
@@ -4,13 +4,14 @@ set -e
|
|
4
4
|
SCRIPT="stats"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
DIR="$PROJECT/data/90.stats"
|
9
10
|
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
11
|
cd "$DIR"
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
# Calculate statistics
|
16
17
|
for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
|
@@ -19,5 +20,5 @@ for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
|
|
19
20
|
done
|
20
21
|
|
21
22
|
# Finalize
|
22
|
-
date
|
23
|
+
miga date > "$DATASET.done"
|
23
24
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/subclades.bash
CHANGED
@@ -4,23 +4,24 @@ set -e
|
|
4
4
|
SCRIPT="subclades"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/10.clades/02.ani"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
# Run R code
|
14
|
-
$MIGA/utils/subclades.R \
|
15
|
+
"$MIGA/utils/subclades.R" \
|
15
16
|
../../09.distances/03.ani/miga-project.txt.gz \
|
16
|
-
miga-project $CORES
|
17
|
+
miga-project "$CORES"
|
17
18
|
mv miga-project.nwk miga-project.ani.nwk
|
18
19
|
|
19
20
|
# Compile
|
20
21
|
ruby "$MIGA/utils/subclades-compile.rb" . \
|
21
|
-
|
22
|
-
|
22
|
+
> miga-project.class.tsv \
|
23
|
+
2> miga-project.class.nwk
|
23
24
|
|
24
25
|
# Finalize
|
25
|
-
date
|
26
|
+
miga date > "miga-project.done"
|
26
27
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/trimmed_fasta.bash
CHANGED
@@ -1,48 +1,49 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
3
|
set -e
|
4
4
|
SCRIPT="trimmed_fasta"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/04.trimmed_fasta"
|
9
10
|
|
10
11
|
b=$DATASET
|
11
12
|
|
12
13
|
# Initialize
|
13
|
-
date
|
14
|
+
miga date > "$DATASET.start"
|
14
15
|
|
15
16
|
# Gunzip (if necessary)
|
16
17
|
for sis in 1 2 ; do
|
17
|
-
|
18
|
-
|
18
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
|
19
|
+
&& ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
19
20
|
&& gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
|
20
21
|
done
|
21
22
|
|
22
23
|
# FastQ -> FastA
|
23
|
-
|
24
|
-
if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
FastQ.toFastA.awk < "../02.trimmed_reads/$b.1.clipped.fastq" > "$b.1.fasta"
|
25
|
+
if [[ -e "../02.trimmed_reads/$b.2.clipped.fastq" ]] ; then
|
26
|
+
FastQ.toFastA.awk < "../02.trimmed_reads/$b.2.clipped.fastq" > "$b.2.fasta"
|
27
|
+
FastA.interpose.pl "$b.CoupledReads.fa" "$b".[12].fasta
|
28
|
+
gzip -9 -f "$b.2.fasta"
|
29
|
+
gzip -9 -f "$b.1.fasta"
|
30
|
+
FastQ.toFastA.awk < "../02.trimmed_reads/$b".[12].clipped.single.fastq \
|
31
|
+
> "$b.SingleReads.fa"
|
32
|
+
gzip -9 -f "$b.SingleReads.fa"
|
32
33
|
else
|
33
|
-
mv $b.1.fasta $b.SingleReads.fa
|
34
|
+
mv "$b.1.fasta" "$b.SingleReads.fa"
|
34
35
|
fi
|
35
36
|
|
36
37
|
# Compress input at 01.raw_reads and 02.trimmed_reads
|
37
38
|
for sis in 1 2 ; do
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
[[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
|
40
|
+
&& gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
|
41
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
42
|
+
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
|
43
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
|
44
|
+
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
|
44
45
|
done
|
45
46
|
|
46
47
|
# Finalize
|
47
|
-
date
|
48
|
+
miga date > "$DATASET.done"
|
48
49
|
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|