miga-base 0.2.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +351 -0
- data/actions/add_result +61 -0
- data/actions/add_taxonomy +86 -0
- data/actions/create_dataset +62 -0
- data/actions/create_project +70 -0
- data/actions/daemon +69 -0
- data/actions/download_dataset +77 -0
- data/actions/find_datasets +63 -0
- data/actions/import_datasets +86 -0
- data/actions/index_taxonomy +71 -0
- data/actions/list_datasets +83 -0
- data/actions/list_files +67 -0
- data/actions/unlink_dataset +52 -0
- data/bin/miga +48 -0
- data/lib/miga/daemon.rb +178 -0
- data/lib/miga/dataset.rb +286 -0
- data/lib/miga/gui.rb +289 -0
- data/lib/miga/metadata.rb +74 -0
- data/lib/miga/project.rb +268 -0
- data/lib/miga/remote_dataset.rb +154 -0
- data/lib/miga/result.rb +102 -0
- data/lib/miga/tax_index.rb +70 -0
- data/lib/miga/taxonomy.rb +107 -0
- data/lib/miga.rb +83 -0
- data/scripts/_distances_noref_nomulti.bash +86 -0
- data/scripts/_distances_ref_nomulti.bash +105 -0
- data/scripts/aai_distances.bash +40 -0
- data/scripts/ani_distances.bash +39 -0
- data/scripts/assembly.bash +38 -0
- data/scripts/cds.bash +45 -0
- data/scripts/clade_finding.bash +27 -0
- data/scripts/distances.bash +30 -0
- data/scripts/essential_genes.bash +29 -0
- data/scripts/haai_distances.bash +39 -0
- data/scripts/init.bash +211 -0
- data/scripts/miga.bash +12 -0
- data/scripts/mytaxa.bash +93 -0
- data/scripts/mytaxa_scan.bash +85 -0
- data/scripts/ogs.bash +36 -0
- data/scripts/read_quality.bash +37 -0
- data/scripts/ssu.bash +35 -0
- data/scripts/subclades.bash +26 -0
- data/scripts/trimmed_fasta.bash +47 -0
- data/scripts/trimmed_reads.bash +57 -0
- data/utils/adapters.fa +302 -0
- data/utils/mytaxa_scan.R +89 -0
- data/utils/mytaxa_scan.rb +58 -0
- data/utils/requirements.txt +19 -0
- data/utils/subclades-compile.rb +48 -0
- data/utils/subclades.R +171 -0
- metadata +185 -0
data/scripts/mytaxa.bash
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
|
7
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
8
|
+
cd "$DIR"
|
9
|
+
|
10
|
+
# Initialize
|
11
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
12
|
+
MT=$(dirname -- $(which MyTaxa))
|
13
|
+
|
14
|
+
# Check type of dataset
|
15
|
+
MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
|
16
|
+
| wc -l | awk '{print $1}')
|
17
|
+
if [[ "$MULTI" -eq "1" ]] ; then
|
18
|
+
# Check requirements
|
19
|
+
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
20
|
+
echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
|
21
|
+
"no such file or directory" >&2
|
22
|
+
exit 1
|
23
|
+
fi
|
24
|
+
if [[ ! -d "$MT/db" ]] ; then
|
25
|
+
echo "Cannot locate the MyTaxa index: $MT/db:" \
|
26
|
+
"no such file or directory" >&2
|
27
|
+
exit 1
|
28
|
+
fi
|
29
|
+
if [[ ! -d "$MT/utils" ]] ; then
|
30
|
+
echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
|
31
|
+
"no such file or directory" >&2
|
32
|
+
exit 1
|
33
|
+
fi
|
34
|
+
|
35
|
+
# Execute search
|
36
|
+
diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
|
37
|
+
-a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
|
38
|
+
diamond view -a "$DATASET.daa" -o "$DATASET.blast"
|
39
|
+
|
40
|
+
# Prepare MyTaxa input, execute MyTaxa, and generate profiles
|
41
|
+
[[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
|
42
|
+
&& [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
|
43
|
+
&& gunzip "../../../06.cds/$DATASET.gff2.gz"
|
44
|
+
[[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
|
45
|
+
&& [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
|
46
|
+
&& gunzip "../../../06.cds/$DATASET.gff3.gz"
|
47
|
+
if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
|
48
|
+
# GFF2
|
49
|
+
perl "$MT/utils/infile_convert.pl" -f gff2 \
|
50
|
+
"../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
|
51
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
52
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
53
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
54
|
+
-g "../../../06.cds/$DATASET.gff2" -f gff2 \
|
55
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
56
|
+
-K "$DATASET.mytaxa.krona" -u
|
57
|
+
elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
|
58
|
+
# GFF3
|
59
|
+
perl "$MT/utils/infile_convert.pl" -f gff3 \
|
60
|
+
"../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
|
61
|
+
> "$DATASET.mytaxain"
|
62
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
63
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
64
|
+
-g "../../../06.cds/$DATASET.gff3" -f gff3 \
|
65
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
66
|
+
-K "$DATASET.mytaxa.krona" -u
|
67
|
+
else
|
68
|
+
# No GFF
|
69
|
+
perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
|
70
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
71
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
72
|
+
perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
|
73
|
+
-I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
|
74
|
+
-K "$DATASET.mytaxa.krona" -u
|
75
|
+
fi
|
76
|
+
|
77
|
+
# Execute Krona
|
78
|
+
ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
|
79
|
+
|
80
|
+
# Gzip and cleanup
|
81
|
+
[[ -e "../../../06.cds/$DATASET.gff2" ]] \
|
82
|
+
&& gzip -9 -f "../../../06.cds/$DATASET.gff2"
|
83
|
+
[[ -e "../../../06.cds/$DATASET.gff3" ]] \
|
84
|
+
&& gzip -9 -f "../../../06.cds/$DATASET.gff3"
|
85
|
+
gzip -9 -f "$DATASET.mytaxain"
|
86
|
+
gzip -9 -f "$DATASET.blast"
|
87
|
+
rm "$DATASET.daa"
|
88
|
+
fi
|
89
|
+
|
90
|
+
# Finalize
|
91
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
92
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa
|
93
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
7
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
8
|
+
cd "$DIR"
|
9
|
+
|
10
|
+
# Initialize
|
11
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
12
|
+
MT=$(dirname -- $(which MyTaxa))
|
13
|
+
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
14
|
+
trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
|
15
|
+
|
16
|
+
# Check type of dataset
|
17
|
+
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
18
|
+
| wc -l | awk '{print $1}')
|
19
|
+
if [[ "$NOMULTI" -eq "1" ]] ; then
|
20
|
+
# Check requirements
|
21
|
+
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
22
|
+
echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
|
23
|
+
"no such file or directory" >&2
|
24
|
+
exit 1
|
25
|
+
fi
|
26
|
+
if [[ ! -d "$MT/db" ]] ; then
|
27
|
+
echo "Cannot locate the MyTaxa index: $MT/db:" \
|
28
|
+
"no such file or directory" >&2
|
29
|
+
exit 1
|
30
|
+
fi
|
31
|
+
if [[ ! -d "$MT/utils" ]] ; then
|
32
|
+
echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
|
33
|
+
"no such file or directory" >&2
|
34
|
+
exit 1
|
35
|
+
fi
|
36
|
+
|
37
|
+
if [[ ! -s "$DATASET.mytaxa" ]] ; then
|
38
|
+
# Execute search
|
39
|
+
if [[ ! -s "$DATASET.blast" ]] ; then
|
40
|
+
diamond blastp -q "../../../06.cds/$DATASET.faa" \
|
41
|
+
-d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
|
42
|
+
-a "$DATASET.daa" -t "$TMPDIR"
|
43
|
+
diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
|
44
|
+
fi
|
45
|
+
|
46
|
+
# Prepare MyTaxa input, execute MyTaxa, and generate profiles
|
47
|
+
perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
|
48
|
+
| sort -k 13 > "$DATASET.mytaxain"
|
49
|
+
"$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
|
50
|
+
fi
|
51
|
+
ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
|
52
|
+
"$DATASET.mytaxa" "$DATASET.wintax"
|
53
|
+
echo "
|
54
|
+
source('$MIGA/utils/mytaxa_scan.R');
|
55
|
+
pdf('$DATASET.pdf', 12, 7);
|
56
|
+
mytaxa.scan('$DATASET.wintax');
|
57
|
+
dev.off();
|
58
|
+
" | R --vanilla
|
59
|
+
|
60
|
+
# Extract genes from flagged regions
|
61
|
+
[[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
|
62
|
+
if [[ -e "$DATASET.wintax.regions" ]] ; then
|
63
|
+
i=0
|
64
|
+
for win in $(cat "$DATASET.wintax.regions") ; do
|
65
|
+
let i=$i+1
|
66
|
+
awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
|
67
|
+
> "$DATASET.reg/$i.ids"
|
68
|
+
FastA.filter.pl -q "$DATASET.reg/$i.ids" \
|
69
|
+
"../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
|
70
|
+
done
|
71
|
+
fi
|
72
|
+
|
73
|
+
# Clean
|
74
|
+
[[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
|
75
|
+
[[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
|
76
|
+
&& gzip -9 -f "$DATASET.blast"
|
77
|
+
[[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
|
78
|
+
&& gzip -9 -f "$DATASET.mytaxain"
|
79
|
+
fi
|
80
|
+
|
81
|
+
# Finalize
|
82
|
+
rm -R "$TMPDIR"
|
83
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
84
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa_scan
|
85
|
+
|
data/scripts/ogs.bash
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
cd "$PROJECT/data/10.clades/03.ogs"
|
7
|
+
|
8
|
+
# Initialize
|
9
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
|
10
|
+
|
11
|
+
echo -n "" > miga-project.log
|
12
|
+
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
13
|
+
|
14
|
+
# Extract RBMs
|
15
|
+
[[ -d miga-project.rbm ]] || mkdir miga-project.rbm
|
16
|
+
for i in $DS ; do
|
17
|
+
for j in $DS ; do
|
18
|
+
file="miga-project.rbm/$i-$j.rbm"
|
19
|
+
[[ -s $file ]] && continue
|
20
|
+
echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
|
21
|
+
"where seq1='$i' and seq2='$j' ;" \
|
22
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
|
23
|
+
> $file
|
24
|
+
[[ -s $file ]] || rm $file
|
25
|
+
done
|
26
|
+
echo "$i" >> miga-project.log
|
27
|
+
done
|
28
|
+
|
29
|
+
# Estimate OGs
|
30
|
+
ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
|
31
|
+
ogs.stats.rb -o miga-project.ogs -j miga-project.stats
|
32
|
+
|
33
|
+
# Finalize
|
34
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
35
|
+
miga add_result -P "$PROJECT" -r ogs
|
36
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
cd "$PROJECT/data/03.read_quality"
|
7
|
+
|
8
|
+
b=$DATASET
|
9
|
+
|
10
|
+
# Initialize
|
11
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
12
|
+
|
13
|
+
# FastQC
|
14
|
+
[[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
|
15
|
+
fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
|
16
|
+
|
17
|
+
# SolexaQA++
|
18
|
+
[[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
|
19
|
+
exists ../02.trimmed_reads/$b.[12].*.pdf \
|
20
|
+
&& mv ../02.trimmed_reads/$b.[12].*.pdf "$b.solexaqa/"
|
21
|
+
|
22
|
+
# Clean 02.trimmed_reads
|
23
|
+
[[ -e "../02.trimmed_reads/$b.1.fastq_trimmed.segments" ]] \
|
24
|
+
&& rm ../02.trimmed_reads/$b.[12].fastq_trimmed.segments
|
25
|
+
[[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.paired" ]] \
|
26
|
+
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed.paired
|
27
|
+
[[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.single" ]] \
|
28
|
+
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
|
29
|
+
[[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
|
30
|
+
&& rm ../02.trimmed_reads/$b.[12].fastq.trimmed
|
31
|
+
[[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
|
32
|
+
&& rm ../02.trimmed_reads/$b.[12].fastq
|
33
|
+
|
34
|
+
# Finalize
|
35
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
36
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r read_quality
|
37
|
+
|
data/scripts/ssu.bash
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
|
7
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
8
|
+
cd "$DIR"
|
9
|
+
|
10
|
+
# Initialize
|
11
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
12
|
+
|
13
|
+
fa="../../../05.assembly/$DATASET.LargeContigs.fna"
|
14
|
+
if [[ -s $fa ]] ; then
|
15
|
+
# Run barrnap
|
16
|
+
barrnap --quiet --threads $CORES $fa | grep "^##gff\\|;product=16S " \
|
17
|
+
> $DATASET.ssu.gff
|
18
|
+
|
19
|
+
# Extract
|
20
|
+
bedtools getfasta -s "-fi" $fa -bed $DATASET.ssu.gff -fo $DATASET.ssu.all.fa
|
21
|
+
FastA.length.pl $DATASET.ssu.all.fa | sort -nr -k 2 | head -n 1 \
|
22
|
+
| cut -f 1 > $DATASET.ssu.fa.id
|
23
|
+
FastA.filter.pl $DATASET.ssu.fa.id $DATASET.ssu.all.fa > $DATASET.ssu.fa
|
24
|
+
rm $DATASET.ssu.fa.id
|
25
|
+
[[ -e "$fa.fai" ]] && rm "$fa.fai"
|
26
|
+
|
27
|
+
# Gzip
|
28
|
+
gzip -9 -f "$DATASET.ssu.gff"
|
29
|
+
gzip -9 -f "$DATASET.ssu.all.fa"
|
30
|
+
fi
|
31
|
+
|
32
|
+
# Finalize
|
33
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
34
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r ssu
|
35
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
cd "$PROJECT/data/10.clades/02.ani"
|
7
|
+
|
8
|
+
# Initialize
|
9
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
|
10
|
+
|
11
|
+
# Run R code
|
12
|
+
echo "
|
13
|
+
source('$MIGA/utils/subclades.R');
|
14
|
+
subclades('../../09.distances/03.ani/miga-project.txt.gz',
|
15
|
+
'miga-project', $CORES);
|
16
|
+
" | R --vanilla
|
17
|
+
|
18
|
+
# Compile
|
19
|
+
ruby "$MIGA/utils/subclades-compile.rb" . \
|
20
|
+
> miga-project.class.tsv \
|
21
|
+
2> miga-project.class.nwk
|
22
|
+
|
23
|
+
# Finalize
|
24
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
25
|
+
miga add_result -P "$PROJECT" -r subclades
|
26
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
cd "$PROJECT/data/04.trimmed_fasta"
|
7
|
+
|
8
|
+
b=$DATASET
|
9
|
+
|
10
|
+
# Initialize
|
11
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
12
|
+
|
13
|
+
# Gunzip (if necessary)
|
14
|
+
for sis in 1 2 ; do
|
15
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
|
16
|
+
&& ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
17
|
+
&& gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
|
18
|
+
done
|
19
|
+
|
20
|
+
# FastQ -> FastA
|
21
|
+
cat ../02.trimmed_reads/$b.1.clipped.fastq | FastQ.toFastA.awk > $b.1.fasta
|
22
|
+
if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
|
23
|
+
cat ../02.trimmed_reads/$b.2.clipped.fastq | FastQ.toFastA.awk > $b.2.fasta
|
24
|
+
FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta
|
25
|
+
gzip -9 -f $b.2.fasta
|
26
|
+
gzip -9 -f $b.1.fasta
|
27
|
+
cat ../02.trimmed_reads/$b.[12].clipped.single.fastq | FastQ.toFastA.awk \
|
28
|
+
> $b.SingleReads.fa
|
29
|
+
gzip -9 -f $b.SingleReads.fa
|
30
|
+
else
|
31
|
+
mv $b.1.fasta $b.SingleReads.fa
|
32
|
+
fi
|
33
|
+
|
34
|
+
# Compress input at 01.raw_reads and 02.trimmed_reads
|
35
|
+
for sis in 1 2 ; do
|
36
|
+
[[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
|
37
|
+
&& gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
|
38
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
|
39
|
+
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
|
40
|
+
[[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
|
41
|
+
&& gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
|
42
|
+
done
|
43
|
+
|
44
|
+
# Finalize
|
45
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
46
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
|
47
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
echo "MiGA: $MIGA"
|
4
|
+
echo "Project: $PROJECT"
|
5
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
6
|
+
cd "$PROJECT/data/02.trimmed_reads"
|
7
|
+
|
8
|
+
b=$DATASET
|
9
|
+
|
10
|
+
# Initialize
|
11
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
12
|
+
|
13
|
+
# Unzip (if necessary)
|
14
|
+
[[ -e ../01.raw_reads/$b.1.fastq.gz && ! -e ../01.raw_reads/$b.1.fastq ]] \
|
15
|
+
&& gunzip ../01.raw_reads/$b.1.fastq.gz
|
16
|
+
[[ -e ../01.raw_reads/$b.2.fastq.gz && ! -e ../01.raw_reads/$b.2.fastq ]] \
|
17
|
+
&& gunzip ../01.raw_reads/$b.2.fastq.gz
|
18
|
+
|
19
|
+
# Clean existing files
|
20
|
+
exists $b.[12].* && rm $b.[12].*
|
21
|
+
|
22
|
+
# Tag
|
23
|
+
FastQ.tag.rb -i ../01.raw_reads/$b.1.fastq -p "$b-" -s "/1" -o $b.1.fastq
|
24
|
+
[[ -e ../01.raw_reads/$b.2.fastq ]] \
|
25
|
+
&& FastQ.tag.rb -i ../01.raw_reads/$b.2.fastq -p "$b-" -s "/2" -o $b.2.fastq
|
26
|
+
|
27
|
+
# Trim
|
28
|
+
SolexaQA++ dynamictrim $b.[12].fastq -h 20 -d .
|
29
|
+
SolexaQA++ lengthsort $b.[12].fastq.trimmed -l 50 -d .
|
30
|
+
|
31
|
+
# Clean adapters
|
32
|
+
if [[ -e $b.2.fastq.trimmed.paired ]] ; then
|
33
|
+
scythe -a $MIGA/utils/adapters.fa $b.1.fastq.trimmed.paired \
|
34
|
+
> $b.1.clipped.all.fastq
|
35
|
+
scythe -a $MIGA/utils/adapters.fa $b.2.fastq.trimmed.paired \
|
36
|
+
> $b.2.clipped.all.fastq
|
37
|
+
SolexaQA++ lengthsort $b.[12].clipped.all.fastq -l 50 -d .
|
38
|
+
rm $b.[12].clipped.all.fastq
|
39
|
+
[[ -e $b.1.clipped.all.fastq.single ]] \
|
40
|
+
&& mv $b.1.clipped.all.fastq.single $b.1.clipped.single.fastq
|
41
|
+
[[ -e $b.2.clipped.all.fastq.single ]] \
|
42
|
+
&& mv $b.2.clipped.all.fastq.single $b.2.clipped.single.fastq
|
43
|
+
mv $b.1.clipped.all.fastq.paired $b.1.clipped.fastq
|
44
|
+
mv $b.2.clipped.all.fastq.paired $b.2.clipped.fastq
|
45
|
+
rm $b.1.clipped.all.fastq.summary.txt &>/dev/null
|
46
|
+
else
|
47
|
+
scythe -a $MIGA/utils/adapters.fa $b.1.fastq.trimmed.single \
|
48
|
+
> $b.1.clipped.all.fastq
|
49
|
+
SolexaQA++ lengthsort $b.1.clipped.all.fastq -l 50 -d .
|
50
|
+
mv $b.1.clipped.all.fastq.single $b.1.clipped.fastq
|
51
|
+
fi
|
52
|
+
rm $b.[12].*.discard &>/dev/null
|
53
|
+
|
54
|
+
# Finalize
|
55
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
56
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads
|
57
|
+
|