miga-base 0.2.6.4 → 0.2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/scripts/mytaxa.bash CHANGED
@@ -1,94 +1,95 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
- MT=$(dirname -- $(which MyTaxa))
14
+ miga date > "$DATASET.start"
15
+ MT=$(dirname -- "$(which MyTaxa)")
15
16
 
16
17
  # Check type of dataset
17
18
  MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
18
- | wc -l | awk '{print $1}')
19
+ | wc -l | awk '{print $1}')
19
20
  if [[ "$MULTI" -eq "1" ]] ; then
20
- # Check requirements
21
- if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
22
- echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
23
- "no such file or directory" >&2
24
- exit 1
25
- fi
26
- if [[ ! -d "$MT/db" ]] ; then
27
- echo "Cannot locate the MyTaxa index: $MT/db:" \
28
- "no such file or directory" >&2
29
- exit 1
30
- fi
31
- if [[ ! -d "$MT/utils" ]] ; then
32
- echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
33
- "no such file or directory" >&2
34
- exit 1
35
- fi
21
+ # Check requirements
22
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
23
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
24
+ "no such file or directory" >&2
25
+ exit 1
26
+ fi
27
+ if [[ ! -d "$MT/db" ]] ; then
28
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
29
+ "no such file or directory" >&2
30
+ exit 1
31
+ fi
32
+ if [[ ! -d "$MT/utils" ]] ; then
33
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
34
+ "no such file or directory" >&2
35
+ exit 1
36
+ fi
36
37
 
37
- # Execute search
38
- diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
39
- -a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
40
- diamond view -a "$DATASET.daa" -o "$DATASET.blast"
38
+ # Execute search
39
+ diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
40
+ -a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
41
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast"
41
42
 
42
- # Prepare MyTaxa input, execute MyTaxa, and generate profiles
43
- [[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
44
- && [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
45
- && gunzip "../../../06.cds/$DATASET.gff2.gz"
46
- [[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
47
- && [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
48
- && gunzip "../../../06.cds/$DATASET.gff3.gz"
49
- if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
50
- # GFF2
51
- perl "$MT/utils/infile_convert.pl" -f gff2 \
52
- "../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
53
- | sort -k 13 > "$DATASET.mytaxain"
54
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
55
- perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
56
- -g "../../../06.cds/$DATASET.gff2" -f gff2 \
57
- -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
58
- -K "$DATASET.mytaxa.krona" -u
59
- elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
60
- # GFF3
61
- perl "$MT/utils/infile_convert.pl" -f gff3 \
62
- "../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
63
- > "$DATASET.mytaxain"
64
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
65
- perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
66
- -g "../../../06.cds/$DATASET.gff3" -f gff3 \
67
- -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
68
- -K "$DATASET.mytaxa.krona" -u
69
- else
70
- # No GFF
71
- perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
72
- | sort -k 13 > "$DATASET.mytaxain"
73
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
74
- perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
75
- -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
76
- -K "$DATASET.mytaxa.krona" -u
77
- fi
43
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
44
+ [[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
45
+ && [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
46
+ && gunzip "../../../06.cds/$DATASET.gff2.gz"
47
+ [[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
48
+ && [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
49
+ && gunzip "../../../06.cds/$DATASET.gff3.gz"
50
+ if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
51
+ # GFF2
52
+ perl "$MT/utils/infile_convert.pl" -f gff2 \
53
+ "../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
54
+ | sort -k 13 > "$DATASET.mytaxain"
55
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
56
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
57
+ -g "../../../06.cds/$DATASET.gff2" -f gff2 \
58
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
59
+ -K "$DATASET.mytaxa.krona" -u
60
+ elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
61
+ # GFF3
62
+ perl "$MT/utils/infile_convert.pl" -f gff3 \
63
+ "../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
64
+ > "$DATASET.mytaxain"
65
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
66
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
67
+ -g "../../../06.cds/$DATASET.gff3" -f gff3 \
68
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
69
+ -K "$DATASET.mytaxa.krona" -u
70
+ else
71
+ # No GFF
72
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
73
+ | sort -k 13 > "$DATASET.mytaxain"
74
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
75
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
76
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
77
+ -K "$DATASET.mytaxa.krona" -u
78
+ fi
78
79
 
79
- # Execute Krona
80
- ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
80
+ # Execute Krona
81
+ ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
81
82
 
82
- # Gzip and cleanup
83
- [[ -e "../../../06.cds/$DATASET.gff2" ]] \
84
- && gzip -9 -f "../../../06.cds/$DATASET.gff2"
85
- [[ -e "../../../06.cds/$DATASET.gff3" ]] \
86
- && gzip -9 -f "../../../06.cds/$DATASET.gff3"
87
- gzip -9 -f "$DATASET.mytaxain"
88
- gzip -9 -f "$DATASET.blast"
89
- rm "$DATASET.daa"
83
+ # Gzip and cleanup
84
+ [[ -e "../../../06.cds/$DATASET.gff2" ]] \
85
+ && gzip -9 -f "../../../06.cds/$DATASET.gff2"
86
+ [[ -e "../../../06.cds/$DATASET.gff3" ]] \
87
+ && gzip -9 -f "../../../06.cds/$DATASET.gff3"
88
+ gzip -9 -f "$DATASET.mytaxain"
89
+ gzip -9 -f "$DATASET.blast"
90
+ rm "$DATASET.daa"
90
91
  fi
91
92
 
92
93
  # Finalize
93
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
94
+ miga date > "$DATASET.done"
94
95
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,86 +1,87 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="mytaxa_scan"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
- MT=$(dirname -- $(which MyTaxa))
14
+ miga date > "$DATASET.start"
15
+ MT=$(dirname -- "$(which MyTaxa)")
15
16
  TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
16
- trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
17
+ trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
17
18
 
18
19
  # Check type of dataset
19
20
  NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
20
- | wc -l | awk '{print $1}')
21
+ | wc -l | awk '{print $1}')
21
22
  if [[ "$NOMULTI" -eq "1" ]] ; then
22
- # Check requirements
23
- if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
24
- echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
25
- "no such file or directory" >&2
26
- exit 1
27
- fi
28
- if [[ ! -d "$MT/db" ]] ; then
29
- echo "Cannot locate the MyTaxa index: $MT/db:" \
30
- "no such file or directory" >&2
31
- exit 1
32
- fi
33
- if [[ ! -d "$MT/utils" ]] ; then
34
- echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
35
- "no such file or directory" >&2
36
- exit 1
37
- fi
23
+ # Check requirements
24
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
25
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
26
+ "no such file or directory" >&2
27
+ exit 1
28
+ fi
29
+ if [[ ! -d "$MT/db" ]] ; then
30
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
31
+ "no such file or directory" >&2
32
+ exit 1
33
+ fi
34
+ if [[ ! -d "$MT/utils" ]] ; then
35
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
36
+ "no such file or directory" >&2
37
+ exit 1
38
+ fi
38
39
 
39
- if [[ ! -s "$DATASET.mytaxa" ]] ; then
40
- # Execute search
41
- if [[ ! -s "$DATASET.blast" ]] ; then
42
- diamond blastp -q "../../../06.cds/$DATASET.faa" \
43
- -d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
44
- -a "$DATASET.daa" -t "$TMPDIR"
45
- diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
46
- fi
40
+ if [[ ! -s "$DATASET.mytaxa" ]] ; then
41
+ # Execute search
42
+ if [[ ! -s "$DATASET.blast" ]] ; then
43
+ diamond blastp -q "../../../06.cds/$DATASET.faa" \
44
+ -d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
45
+ -a "$DATASET.daa" -t "$TMPDIR"
46
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
47
+ fi
47
48
 
48
- # Prepare MyTaxa input, execute MyTaxa, and generate profiles
49
- perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
50
- | sort -k 13 > "$DATASET.mytaxain"
51
- "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
52
- fi
53
- ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
54
- "$DATASET.mytaxa" "$DATASET.wintax"
55
- echo "
56
- source('$MIGA/utils/mytaxa_scan.R');
57
- pdf('$DATASET.pdf', 12, 7);
58
- mytaxa.scan('$DATASET.wintax');
59
- dev.off();
60
- " | R --vanilla
49
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
50
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
51
+ | sort -k 13 > "$DATASET.mytaxain"
52
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
53
+ fi
54
+ ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
55
+ "$DATASET.mytaxa" "$DATASET.wintax"
56
+ echo "
57
+ source('$MIGA/utils/mytaxa_scan.R');
58
+ pdf('$DATASET.pdf', 12, 7);
59
+ mytaxa.scan('$DATASET.wintax');
60
+ dev.off();
61
+ " | R --vanilla
61
62
 
62
- # Extract genes from flagged regions
63
- [[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
64
- if [[ -e "$DATASET.wintax.regions" ]] ; then
65
- i=0
66
- for win in $(cat "$DATASET.wintax.regions") ; do
67
- let i=$i+1
68
- awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
69
- > "$DATASET.reg/$i.ids"
70
- FastA.filter.pl -q "$DATASET.reg/$i.ids" \
71
- "../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
72
- done
73
- fi
63
+ # Extract genes from flagged regions
64
+ [[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
65
+ if [[ -e "$DATASET.wintax.regions" ]] ; then
66
+ i=0
67
+ for win in $(cat "$DATASET.wintax.regions") ; do
68
+ let i=$i+1
69
+ awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
70
+ > "$DATASET.reg/$i.ids"
71
+ FastA.filter.pl -q "$DATASET.reg/$i.ids" \
72
+ "../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
73
+ done
74
+ fi
74
75
 
75
- # Clean
76
- [[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
77
- [[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
78
- && gzip -9 -f "$DATASET.blast"
79
- [[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
80
- && gzip -9 -f "$DATASET.mytaxain"
76
+ # Clean
77
+ [[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
78
+ [[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
79
+ && gzip -9 -f "$DATASET.blast"
80
+ [[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
81
+ && gzip -9 -f "$DATASET.mytaxain"
81
82
  fi
82
83
 
83
84
  # Finalize
84
85
  rm -R "$TMPDIR"
85
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
86
+ miga date > "$DATASET.done"
86
87
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ogs.bash CHANGED
@@ -4,11 +4,12 @@ set -e
4
4
  SCRIPT="ogs"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/10.clades/03.ogs"
9
10
 
10
11
  # Initialize
11
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
12
+ miga date > "miga-project.start"
12
13
 
13
14
  echo -n "" > miga-project.log
14
15
  DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
@@ -16,25 +17,25 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
16
17
  # Extract RBMs
17
18
  [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
18
19
  for i in $DS ; do
19
- for j in $DS ; do
20
- file="miga-project.rbm/$i-$j.rbm"
21
- [[ -s $file ]] && continue
22
- echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
23
- "where seq1='$i' and seq2='$j' ;" \
24
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
25
- > $file
26
- [[ -s $file ]] || rm $file
27
- done
28
- echo "$i" >> miga-project.log
20
+ for j in $DS ; do
21
+ file="miga-project.rbm/$i-$j.rbm"
22
+ [[ -s $file ]] && continue
23
+ echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
+ "where seq1='$i' and seq2='$j' ;" \
25
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
+ > "$file"
27
+ [[ -s "$file" ]] || rm "$file"
28
+ done
29
+ echo "$i" >> miga-project.log
29
30
  done
30
31
 
31
32
  # Estimate OGs
32
- ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
33
+ ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
33
34
  ogs.stats.rb -o miga-project.ogs -j miga-project.stats
34
35
 
35
36
  # Clean RBMs
36
37
  rm -rf miga-project.rbm
37
38
 
38
39
  # Finalize
39
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
40
+ miga date > "miga-project.done"
40
41
  miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -4,6 +4,7 @@ set -e
4
4
  SCRIPT="project_stats"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/90.stats"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
@@ -1,38 +1,34 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="read_quality"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/03.read_quality"
9
10
 
10
11
  b=$DATASET
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  # FastQC
16
17
  [[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
17
- fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
18
+ fastqc "../02.trimmed_reads/$b".[12].clipped.fastq -o "$b.fastqc"
18
19
 
19
20
  # SolexaQA++
20
21
  [[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
21
- exists ../02.trimmed_reads/$b.[12].*.pdf \
22
- && mv ../02.trimmed_reads/$b.[12].*.pdf "$b.solexaqa/"
22
+ exists "../02.trimmed_reads/$b".[12].*.pdf \
23
+ && mv "../02.trimmed_reads/$b".[12].*.pdf "$b.solexaqa/"
23
24
 
24
25
  # Clean 02.trimmed_reads
25
- [[ -e "../02.trimmed_reads/$b.1.fastq_trimmed.segments" ]] \
26
- && rm ../02.trimmed_reads/$b.[12].fastq_trimmed.segments
27
- [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.paired" ]] \
28
- && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.paired
29
- [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.single" ]] \
30
- && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
31
- [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
32
- && rm ../02.trimmed_reads/$b.[12].fastq.trimmed
33
- [[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
34
- && rm ../02.trimmed_reads/$b.[12].fastq
26
+ rm -f "../02.trimmed_reads/$b".[12].fastq_trimmed.segments
27
+ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.paired
28
+ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.single
29
+ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed
30
+ rm -f "../02.trimmed_reads/$b".[12].fastq
35
31
 
36
32
  # Finalize
37
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
33
+ miga date > "$DATASET.done"
38
34
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ssu.bash CHANGED
@@ -1,36 +1,36 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="ssu"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  fa="../../../05.assembly/$DATASET.LargeContigs.fna"
16
17
  if [[ -s $fa ]] ; then
17
- # Run barrnap
18
- barrnap --quiet --threads $CORES $fa | grep "^##gff\\|;product=16S " \
19
- > $DATASET.ssu.gff
20
-
21
- # Extract
22
- bedtools getfasta -s "-fi" $fa -bed $DATASET.ssu.gff -fo $DATASET.ssu.all.fa
23
- FastA.length.pl $DATASET.ssu.all.fa | sort -nr -k 2 | head -n 1 \
24
- | cut -f 1 > $DATASET.ssu.fa.id
25
- FastA.filter.pl $DATASET.ssu.fa.id $DATASET.ssu.all.fa > $DATASET.ssu.fa
26
- rm $DATASET.ssu.fa.id
27
- [[ -e "$fa.fai" ]] && rm "$fa.fai"
28
-
29
- # Gzip
30
- gzip -9 -f "$DATASET.ssu.gff"
31
- gzip -9 -f "$DATASET.ssu.all.fa"
18
+ # Run barrnap
19
+ barrnap --quiet --threads "$CORES" "$fa" | grep "^##gff\\|;product=16S " \
20
+ > "$DATASET.ssu.gff"
21
+ # Extract
22
+ bedtools getfasta -s "-fi" "$fa" -bed "$DATASET.ssu.gff" \
23
+ -fo "$DATASET.ssu.all.fa"
24
+ FastA.length.pl "$DATASET.ssu.all.fa" | sort -nr -k 2 | head -n 1 \
25
+ | cut -f 1 > "$DATASET.ssu.fa.id"
26
+ FastA.filter.pl "$DATASET.ssu.fa.id" "$DATASET.ssu.all.fa" > "$DATASET.ssu.fa"
27
+ rm "$DATASET.ssu.fa.id"
28
+ [[ -e "$fa.fai" ]] && rm "$fa.fai"
29
+ # Gzip
30
+ gzip -9 -f "$DATASET.ssu.gff"
31
+ gzip -9 -f "$DATASET.ssu.all.fa"
32
32
  fi
33
33
 
34
34
  # Finalize
35
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
35
+ miga date > "$DATASET.done"
36
36
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/stats.bash CHANGED
@@ -4,13 +4,14 @@ set -e
4
4
  SCRIPT="stats"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  DIR="$PROJECT/data/90.stats"
9
10
  [[ -d "$DIR" ]] || mkdir -p "$DIR"
10
11
  cd "$DIR"
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  # Calculate statistics
16
17
  for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
@@ -19,5 +20,5 @@ for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
19
20
  done
20
21
 
21
22
  # Finalize
22
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
23
+ miga date > "$DATASET.done"
23
24
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -4,23 +4,24 @@ set -e
4
4
  SCRIPT="subclades"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/10.clades/02.ani"
9
10
 
10
11
  # Initialize
11
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
12
+ miga date > "miga-project.start"
12
13
 
13
14
  # Run R code
14
- $MIGA/utils/subclades.R \
15
+ "$MIGA/utils/subclades.R" \
15
16
  ../../09.distances/03.ani/miga-project.txt.gz \
16
- miga-project $CORES
17
+ miga-project "$CORES"
17
18
  mv miga-project.nwk miga-project.ani.nwk
18
19
 
19
20
  # Compile
20
21
  ruby "$MIGA/utils/subclades-compile.rb" . \
21
- > miga-project.class.tsv \
22
- 2> miga-project.class.nwk
22
+ > miga-project.class.tsv \
23
+ 2> miga-project.class.nwk
23
24
 
24
25
  # Finalize
25
- date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
26
+ miga date > "miga-project.done"
26
27
  miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -1,48 +1,49 @@
1
1
  #!/bin/bash
2
- # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
4
  SCRIPT="trimmed_fasta"
5
5
  echo "MiGA: $MIGA"
6
6
  echo "Project: $PROJECT"
7
+ # shellcheck source=scripts/miga.bash
7
8
  source "$MIGA/scripts/miga.bash" || exit 1
8
9
  cd "$PROJECT/data/04.trimmed_fasta"
9
10
 
10
11
  b=$DATASET
11
12
 
12
13
  # Initialize
13
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
+ miga date > "$DATASET.start"
14
15
 
15
16
  # Gunzip (if necessary)
16
17
  for sis in 1 2 ; do
17
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
18
- && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
18
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
19
+ && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
19
20
  && gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
20
21
  done
21
22
 
22
23
  # FastQ -> FastA
23
- cat ../02.trimmed_reads/$b.1.clipped.fastq | FastQ.toFastA.awk > $b.1.fasta
24
- if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
25
- cat ../02.trimmed_reads/$b.2.clipped.fastq | FastQ.toFastA.awk > $b.2.fasta
26
- FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta
27
- gzip -9 -f $b.2.fasta
28
- gzip -9 -f $b.1.fasta
29
- cat ../02.trimmed_reads/$b.[12].clipped.single.fastq | FastQ.toFastA.awk \
30
- > $b.SingleReads.fa
31
- gzip -9 -f $b.SingleReads.fa
24
+ FastQ.toFastA.awk < "../02.trimmed_reads/$b.1.clipped.fastq" > "$b.1.fasta"
25
+ if [[ -e "../02.trimmed_reads/$b.2.clipped.fastq" ]] ; then
26
+ FastQ.toFastA.awk < "../02.trimmed_reads/$b.2.clipped.fastq" > "$b.2.fasta"
27
+ FastA.interpose.pl "$b.CoupledReads.fa" "$b".[12].fasta
28
+ gzip -9 -f "$b.2.fasta"
29
+ gzip -9 -f "$b.1.fasta"
30
+ FastQ.toFastA.awk < "../02.trimmed_reads/$b".[12].clipped.single.fastq \
31
+ > "$b.SingleReads.fa"
32
+ gzip -9 -f "$b.SingleReads.fa"
32
33
  else
33
- mv $b.1.fasta $b.SingleReads.fa
34
+ mv "$b.1.fasta" "$b.SingleReads.fa"
34
35
  fi
35
36
 
36
37
  # Compress input at 01.raw_reads and 02.trimmed_reads
37
38
  for sis in 1 2 ; do
38
- [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
39
- && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
40
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
41
- && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
42
- [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
43
- && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
39
+ [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
40
+ && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
41
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
42
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
43
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
44
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
44
45
  done
45
46
 
46
47
  # Finalize
47
- date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
48
+ miga date > "$DATASET.done"
48
49
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"