miga-base 0.2.6.4 → 0.2.6.5

Sign up to get free protection for your applications and to get access to all the features.
data/scripts/mytaxa.bash CHANGED
@@ -1,94 +1,95 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
- MT=$(dirname -- $(which MyTaxa))
14
+ miga date > "$DATASET.start"
15
+ MT=$(dirname -- "$(which MyTaxa)")
15
16
 
16
17
  # Check type of dataset
17
18
  MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
18
- | wc -l | awk '{print $1}')
19
+ | wc -l | awk '{print $1}')
19
20
  if [[ "$MULTI" -eq "1" ]] ; then
20
- # Check requirements
21
- if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
22
- echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
23
- "no such file or directory" >&2
24
- exit 1
25
- fi
26
- if [[ ! -d "$MT/db" ]] ; then
27
- echo "Cannot locate the MyTaxa index: $MT/db:" \
28
- "no such file or directory" >&2
29
- exit 1
30
- fi
31
- if [[ ! -d "$MT/utils" ]] ; then
32
- echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
33
- "no such file or directory" >&2
34
- exit 1
35
- fi
21
+ # Check requirements
22
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
23
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
24
+ "no such file or directory" >&2
25
+ exit 1
26
+ fi
27
+ if [[ ! -d "$MT/db" ]] ; then
28
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
29
+ "no such file or directory" >&2
30
+ exit 1
31
+ fi
32
+ if [[ ! -d "$MT/utils" ]] ; then
33
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
34
+ "no such file or directory" >&2
35
+ exit 1
36
+ fi
36
37
 
37
- # Execute search
38
- diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
39
- -a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
40
- diamond view -a "$DATASET.daa" -o "$DATASET.blast"
38
+ # Execute search
39
+ diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
40
+ -a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
41
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast"
41
42
 
42
- # Prepare MyTaxa input, execute MyTaxa, and generate profiles
43
- [[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
44
- && [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
45
- && gunzip "../../../06.cds/$DATASET.gff2.gz"
46
- [[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
47
- && [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
48
- && gunzip "../../../06.cds/$DATASET.gff3.gz"
49
- if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
50
- # GFF2
51
- perl "$MT/utils/infile_convert.pl" -f gff2 \
52
- "../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
53
- | sort -k 13 > "$DATASET.mytaxain"
54
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
55
- perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
56
- -g "../../../06.cds/$DATASET.gff2" -f gff2 \
57
- -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
58
- -K "$DATASET.mytaxa.krona" -u
59
- elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
60
- # GFF3
61
- perl "$MT/utils/infile_convert.pl" -f gff3 \
62
- "../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
63
- > "$DATASET.mytaxain"
64
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
65
- perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
66
- -g "../../../06.cds/$DATASET.gff3" -f gff3 \
67
- -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
68
- -K "$DATASET.mytaxa.krona" -u
69
- else
70
- # No GFF
71
- perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
72
- | sort -k 13 > "$DATASET.mytaxain"
73
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
74
- perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
75
- -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
76
- -K "$DATASET.mytaxa.krona" -u
77
- fi
43
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
44
+ [[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
45
+ && [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
46
+ && gunzip "../../../06.cds/$DATASET.gff2.gz"
47
+ [[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
48
+ && [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
49
+ && gunzip "../../../06.cds/$DATASET.gff3.gz"
50
+ if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
51
+ # GFF2
52
+ perl "$MT/utils/infile_convert.pl" -f gff2 \
53
+ "../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
54
+ | sort -k 13 > "$DATASET.mytaxain"
55
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
56
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
57
+ -g "../../../06.cds/$DATASET.gff2" -f gff2 \
58
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
59
+ -K "$DATASET.mytaxa.krona" -u
60
+ elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
61
+ # GFF3
62
+ perl "$MT/utils/infile_convert.pl" -f gff3 \
63
+ "../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
64
+ > "$DATASET.mytaxain"
65
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
66
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
67
+ -g "../../../06.cds/$DATASET.gff3" -f gff3 \
68
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
69
+ -K "$DATASET.mytaxa.krona" -u
70
+ else
71
+ # No GFF
72
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
73
+ | sort -k 13 > "$DATASET.mytaxain"
74
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
75
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
76
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
77
+ -K "$DATASET.mytaxa.krona" -u
78
+ fi
78
79
 
79
- # Execute Krona
80
- ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
80
+ # Execute Krona
81
+ ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
81
82
 
82
- # Gzip and cleanup
83
- [[ -e "../../../06.cds/$DATASET.gff2" ]] \
84
- && gzip -9 -f "../../../06.cds/$DATASET.gff2"
85
- [[ -e "../../../06.cds/$DATASET.gff3" ]] \
86
- && gzip -9 -f "../../../06.cds/$DATASET.gff3"
87
- gzip -9 -f "$DATASET.mytaxain"
88
- gzip -9 -f "$DATASET.blast"
89
- rm "$DATASET.daa"
83
+ # Gzip and cleanup
84
+ [[ -e "../../../06.cds/$DATASET.gff2" ]] \
85
+ && gzip -9 -f "../../../06.cds/$DATASET.gff2"
86
+ [[ -e "../../../06.cds/$DATASET.gff3" ]] \
87
+ && gzip -9 -f "../../../06.cds/$DATASET.gff3"
88
+ gzip -9 -f "$DATASET.mytaxain"
89
+ gzip -9 -f "$DATASET.blast"
90
+ rm "$DATASET.daa"
90
91
  fi
91
92
 
92
93
  # Finalize
93
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
94
+ miga date > "$DATASET.done"
94
95
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,86 +1,87 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa_scan"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
- MT=$(dirname -- $(which MyTaxa))
14
+ miga date > "$DATASET.start"
15
+ MT=$(dirname -- "$(which MyTaxa)")
15
16
  TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
16
- trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
17
+ trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
17
18
 
18
19
  # Check type of dataset
19
20
  NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
20
- | wc -l | awk '{print $1}')
21
+ | wc -l | awk '{print $1}')
21
22
  if [[ "$NOMULTI" -eq "1" ]] ; then
22
- # Check requirements
23
- if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
24
- echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
25
- "no such file or directory" >&2
26
- exit 1
27
- fi
28
- if [[ ! -d "$MT/db" ]] ; then
29
- echo "Cannot locate the MyTaxa index: $MT/db:" \
30
- "no such file or directory" >&2
31
- exit 1
32
- fi
33
- if [[ ! -d "$MT/utils" ]] ; then
34
- echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
35
- "no such file or directory" >&2
36
- exit 1
37
- fi
23
+ # Check requirements
24
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
25
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
26
+ "no such file or directory" >&2
27
+ exit 1
28
+ fi
29
+ if [[ ! -d "$MT/db" ]] ; then
30
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
31
+ "no such file or directory" >&2
32
+ exit 1
33
+ fi
34
+ if [[ ! -d "$MT/utils" ]] ; then
35
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
36
+ "no such file or directory" >&2
37
+ exit 1
38
+ fi
38
39
 
39
- if [[ ! -s "$DATASET.mytaxa" ]] ; then
40
- # Execute search
41
- if [[ ! -s "$DATASET.blast" ]] ; then
42
- diamond blastp -q "../../../06.cds/$DATASET.faa" \
43
- -d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
44
- -a "$DATASET.daa" -t "$TMPDIR"
45
- diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
46
- fi
40
+ if [[ ! -s "$DATASET.mytaxa" ]] ; then
41
+ # Execute search
42
+ if [[ ! -s "$DATASET.blast" ]] ; then
43
+ diamond blastp -q "../../../06.cds/$DATASET.faa" \
44
+ -d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
45
+ -a "$DATASET.daa" -t "$TMPDIR"
46
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
47
+ fi
47
48
 
48
- # Prepare MyTaxa input, execute MyTaxa, and generate profiles
49
- perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
50
- | sort -k 13 > "$DATASET.mytaxain"
51
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
52
- fi
53
- ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
54
- "$DATASET.mytaxa" "$DATASET.wintax"
55
- echo "
56
- source('$MIGA/utils/mytaxa_scan.R');
57
- pdf('$DATASET.pdf', 12, 7);
58
- mytaxa.scan('$DATASET.wintax');
59
- dev.off();
60
- " | R --vanilla
49
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
50
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
51
+ | sort -k 13 > "$DATASET.mytaxain"
52
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
53
+ fi
54
+ ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
55
+ "$DATASET.mytaxa" "$DATASET.wintax"
56
+ echo "
57
+ source('$MIGA/utils/mytaxa_scan.R');
58
+ pdf('$DATASET.pdf', 12, 7);
59
+ mytaxa.scan('$DATASET.wintax');
60
+ dev.off();
61
+ " | R --vanilla
61
62
 
62
- # Extract genes from flagged regions
63
- [[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
64
- if [[ -e "$DATASET.wintax.regions" ]] ; then
65
- i=0
66
- for win in $(cat "$DATASET.wintax.regions") ; do
67
- let i=$i+1
68
- awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
69
- > "$DATASET.reg/$i.ids"
70
- FastA.filter.pl -q "$DATASET.reg/$i.ids" \
71
- "../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
72
- done
73
- fi
63
+ # Extract genes from flagged regions
64
+ [[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
65
+ if [[ -e "$DATASET.wintax.regions" ]] ; then
66
+ i=0
67
+ for win in $(cat "$DATASET.wintax.regions") ; do
68
+ let i=$i+1
69
+ awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
70
+ > "$DATASET.reg/$i.ids"
71
+ FastA.filter.pl -q "$DATASET.reg/$i.ids" \
72
+ "../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
73
+ done
74
+ fi
74
75
 
75
- # Clean
76
- [[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
77
- [[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
78
- && gzip -9 -f "$DATASET.blast"
79
- [[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
80
- && gzip -9 -f "$DATASET.mytaxain"
76
+ # Clean
77
+ [[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
78
+ [[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
79
+ && gzip -9 -f "$DATASET.blast"
80
+ [[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
81
+ && gzip -9 -f "$DATASET.mytaxain"
81
82
  fi
82
83
 
83
84
  # Finalize
84
85
  rm -R "$TMPDIR"
85
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
86
+ miga date > "$DATASET.done"
86
87
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ogs.bash CHANGED
@@ -4,11 +4,12 @@ set -e
4
4
  SCRIPT="ogs"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/10.clades/03.ogs"
9
10
 
10
11
  # Initialize
11
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
12
+ miga date > "miga-project.start"
12
13
 
13
14
  echo -n "" > miga-project.log
14
15
  DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
@@ -16,25 +17,25 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
16
17
  # Extract RBMs
17
18
  [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
18
19
  for i in $DS ; do
19
- for j in $DS ; do
20
- file="miga-project.rbm/$i-$j.rbm"
21
- [[ -s $file ]] && continue
22
- echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
23
- "where seq1='$i' and seq2='$j' ;" \
24
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
25
- > $file
26
- [[ -s $file ]] || rm $file
27
- done
28
- echo "$i" >> miga-project.log
20
+ for j in $DS ; do
21
+ file="miga-project.rbm/$i-$j.rbm"
22
+ [[ -s $file ]] && continue
23
+ echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
+ "where seq1='$i' and seq2='$j' ;" \
25
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
+ > "$file"
27
+ [[ -s "$file" ]] || rm "$file"
28
+ done
29
+ echo "$i" >> miga-project.log
29
30
  done
30
31
 
31
32
  # Estimate OGs
32
- ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
33
+ ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
33
34
  ogs.stats.rb -o miga-project.ogs -j miga-project.stats
34
35
 
35
36
  # Clean RBMs
36
37
  rm -rf miga-project.rbm
37
38
 
38
39
  # Finalize
39
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
40
+ miga date > "miga-project.done"
40
41
  miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -4,6 +4,7 @@ set -e
4
4
  SCRIPT="project_stats"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/90.stats"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
@@ -1,38 +1,34 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="read_quality"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/03.read_quality"
9
10
 
10
11
  b=$DATASET
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  # FastQC
16
17
  [[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
17
- fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
18
+ fastqc "../02.trimmed_reads/$b".[12].clipped.fastq -o "$b.fastqc"
18
19
 
19
20
  # SolexaQA++
20
21
  [[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
21
- exists ../02.trimmed_reads/$b.[12].*.pdf \
22
- && mv ../02.trimmed_reads/$b.[12].*.pdf "$b.solexaqa/"
22
+ exists "../02.trimmed_reads/$b".[12].*.pdf \
23
+ && mv "../02.trimmed_reads/$b".[12].*.pdf "$b.solexaqa/"
23
24
 
24
25
  # Clean 02.trimmed_reads
25
- [[ -e "../02.trimmed_reads/$b.1.fastq_trimmed.segments" ]] \
26
- && rm ../02.trimmed_reads/$b.[12].fastq_trimmed.segments
27
- [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.paired" ]] \
28
- && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.paired
29
- [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.single" ]] \
30
- && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
31
- [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
32
- && rm ../02.trimmed_reads/$b.[12].fastq.trimmed
33
- [[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
34
- && rm ../02.trimmed_reads/$b.[12].fastq
26
+ rm -f "../02.trimmed_reads/$b".[12].fastq_trimmed.segments
27
+ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.paired
28
+ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.single
29
+ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed
30
+ rm -f "../02.trimmed_reads/$b".[12].fastq
35
31
 
36
32
  # Finalize
37
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
33
+ miga date > "$DATASET.done"
38
34
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ssu.bash CHANGED
@@ -1,36 +1,36 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="ssu"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  fa="../../../05.assembly/$DATASET.LargeContigs.fna"
16
17
  if [[ -s $fa ]] ; then
17
- # Run barrnap
18
- barrnap --quiet --threads $CORES $fa | grep "^##gff\\|;product=16S " \
19
- > $DATASET.ssu.gff
20
-
21
- # Extract
22
- bedtools getfasta -s "-fi" $fa -bed $DATASET.ssu.gff -fo $DATASET.ssu.all.fa
23
- FastA.length.pl $DATASET.ssu.all.fa | sort -nr -k 2 | head -n 1 \
24
- | cut -f 1 > $DATASET.ssu.fa.id
25
- FastA.filter.pl $DATASET.ssu.fa.id $DATASET.ssu.all.fa > $DATASET.ssu.fa
26
- rm $DATASET.ssu.fa.id
27
- [[ -e "$fa.fai" ]] && rm "$fa.fai"
28
-
29
- # Gzip
30
- gzip -9 -f "$DATASET.ssu.gff"
31
- gzip -9 -f "$DATASET.ssu.all.fa"
18
+ # Run barrnap
19
+ barrnap --quiet --threads "$CORES" "$fa" | grep "^##gff\\|;product=16S " \
20
+ > "$DATASET.ssu.gff"
21
+ # Extract
22
+ bedtools getfasta -s "-fi" "$fa" -bed "$DATASET.ssu.gff" \
23
+ -fo "$DATASET.ssu.all.fa"
24
+ FastA.length.pl "$DATASET.ssu.all.fa" | sort -nr -k 2 | head -n 1 \
25
+ | cut -f 1 > "$DATASET.ssu.fa.id"
26
+ FastA.filter.pl "$DATASET.ssu.fa.id" "$DATASET.ssu.all.fa" > "$DATASET.ssu.fa"
27
+ rm "$DATASET.ssu.fa.id"
28
+ [[ -e "$fa.fai" ]] && rm "$fa.fai"
29
+ # Gzip
30
+ gzip -9 -f "$DATASET.ssu.gff"
31
+ gzip -9 -f "$DATASET.ssu.all.fa"
32
32
  fi
33
33
 
34
34
  # Finalize
35
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
35
+ miga date > "$DATASET.done"
36
36
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/stats.bash CHANGED
@@ -4,13 +4,14 @@ set -e
4
4
  SCRIPT="stats"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/90.stats"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  # Calculate statistics
16
17
  for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
@@ -19,5 +20,5 @@ for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
19
20
  done
20
21
 
21
22
  # Finalize
22
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
23
+ miga date > "$DATASET.done"
23
24
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -4,23 +4,24 @@ set -e
4
4
  SCRIPT="subclades"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/10.clades/02.ani"
9
10
 
10
11
  # Initialize
11
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
12
+ miga date > "miga-project.start"
12
13
 
13
14
  # Run R code
14
- $MIGA/utils/subclades.R \
15
+ "$MIGA/utils/subclades.R" \
15
16
  ../../09.distances/03.ani/miga-project.txt.gz \
16
- miga-project $CORES
17
+ miga-project "$CORES"
17
18
  mv miga-project.nwk miga-project.ani.nwk
18
19
 
19
20
  # Compile
20
21
  ruby "$MIGA/utils/subclades-compile.rb" . \
21
- > miga-project.class.tsv \
22
- 2> miga-project.class.nwk
22
+ > miga-project.class.tsv \
23
+ 2> miga-project.class.nwk
23
24
 
24
25
  # Finalize
25
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
26
+ miga date > "miga-project.done"
26
27
  miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -1,48 +1,49 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="trimmed_fasta"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/04.trimmed_fasta"
9
10
 
10
11
  b=$DATASET
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  # Gunzip (if necessary)
16
17
  for sis in 1 2 ; do
17
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
18
- && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
18
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
19
+ && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
19
20
  && gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
20
21
  done
21
22
 
22
23
  # FastQ -> FastA
23
- cat ../02.trimmed_reads/$b.1.clipped.fastq | FastQ.toFastA.awk > $b.1.fasta
24
- if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
25
- cat ../02.trimmed_reads/$b.2.clipped.fastq | FastQ.toFastA.awk > $b.2.fasta
26
- FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta
27
- gzip -9 -f $b.2.fasta
28
- gzip -9 -f $b.1.fasta
29
- cat ../02.trimmed_reads/$b.[12].clipped.single.fastq | FastQ.toFastA.awk \
30
- > $b.SingleReads.fa
31
- gzip -9 -f $b.SingleReads.fa
24
+ FastQ.toFastA.awk < "../02.trimmed_reads/$b.1.clipped.fastq" > "$b.1.fasta"
25
+ if [[ -e "../02.trimmed_reads/$b.2.clipped.fastq" ]] ; then
26
+ FastQ.toFastA.awk < "../02.trimmed_reads/$b.2.clipped.fastq" > "$b.2.fasta"
27
+ FastA.interpose.pl "$b.CoupledReads.fa" "$b".[12].fasta
28
+ gzip -9 -f "$b.2.fasta"
29
+ gzip -9 -f "$b.1.fasta"
30
+ FastQ.toFastA.awk < "../02.trimmed_reads/$b".[12].clipped.single.fastq \
31
+ > "$b.SingleReads.fa"
32
+ gzip -9 -f "$b.SingleReads.fa"
32
33
  else
33
- mv $b.1.fasta $b.SingleReads.fa
34
+ mv "$b.1.fasta" "$b.SingleReads.fa"
34
35
  fi
35
36
 
36
37
  # Compress input at 01.raw_reads and 02.trimmed_reads
37
38
  for sis in 1 2 ; do
38
- [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
39
- && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
40
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
41
- && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
42
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
43
- && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
39
+ [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
40
+ && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
41
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
42
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
43
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
44
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
44
45
  done
45
46
 
46
47
  # Finalize
47
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
48
+ miga date > "$DATASET.done"
48
49
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"