miga-base 0.2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +351 -0
  3. data/actions/add_result +61 -0
  4. data/actions/add_taxonomy +86 -0
  5. data/actions/create_dataset +62 -0
  6. data/actions/create_project +70 -0
  7. data/actions/daemon +69 -0
  8. data/actions/download_dataset +77 -0
  9. data/actions/find_datasets +63 -0
  10. data/actions/import_datasets +86 -0
  11. data/actions/index_taxonomy +71 -0
  12. data/actions/list_datasets +83 -0
  13. data/actions/list_files +67 -0
  14. data/actions/unlink_dataset +52 -0
  15. data/bin/miga +48 -0
  16. data/lib/miga/daemon.rb +178 -0
  17. data/lib/miga/dataset.rb +286 -0
  18. data/lib/miga/gui.rb +289 -0
  19. data/lib/miga/metadata.rb +74 -0
  20. data/lib/miga/project.rb +268 -0
  21. data/lib/miga/remote_dataset.rb +154 -0
  22. data/lib/miga/result.rb +102 -0
  23. data/lib/miga/tax_index.rb +70 -0
  24. data/lib/miga/taxonomy.rb +107 -0
  25. data/lib/miga.rb +83 -0
  26. data/scripts/_distances_noref_nomulti.bash +86 -0
  27. data/scripts/_distances_ref_nomulti.bash +105 -0
  28. data/scripts/aai_distances.bash +40 -0
  29. data/scripts/ani_distances.bash +39 -0
  30. data/scripts/assembly.bash +38 -0
  31. data/scripts/cds.bash +45 -0
  32. data/scripts/clade_finding.bash +27 -0
  33. data/scripts/distances.bash +30 -0
  34. data/scripts/essential_genes.bash +29 -0
  35. data/scripts/haai_distances.bash +39 -0
  36. data/scripts/init.bash +211 -0
  37. data/scripts/miga.bash +12 -0
  38. data/scripts/mytaxa.bash +93 -0
  39. data/scripts/mytaxa_scan.bash +85 -0
  40. data/scripts/ogs.bash +36 -0
  41. data/scripts/read_quality.bash +37 -0
  42. data/scripts/ssu.bash +35 -0
  43. data/scripts/subclades.bash +26 -0
  44. data/scripts/trimmed_fasta.bash +47 -0
  45. data/scripts/trimmed_reads.bash +57 -0
  46. data/utils/adapters.fa +302 -0
  47. data/utils/mytaxa_scan.R +89 -0
  48. data/utils/mytaxa_scan.rb +58 -0
  49. data/utils/requirements.txt +19 -0
  50. data/utils/subclades-compile.rb +48 -0
  51. data/utils/subclades.R +171 -0
  52. metadata +185 -0
@@ -0,0 +1,93 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
7
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
8
+ cd "$DIR"
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+ MT=$(dirname -- $(which MyTaxa))
13
+
14
+ # Check type of dataset
15
+ MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
16
+ | wc -l | awk '{print $1}')
17
+ if [[ "$MULTI" -eq "1" ]] ; then
18
+ # Check requirements
19
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
20
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
21
+ "no such file or directory" >&2
22
+ exit 1
23
+ fi
24
+ if [[ ! -d "$MT/db" ]] ; then
25
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
26
+ "no such file or directory" >&2
27
+ exit 1
28
+ fi
29
+ if [[ ! -d "$MT/utils" ]] ; then
30
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
31
+ "no such file or directory" >&2
32
+ exit 1
33
+ fi
34
+
35
+ # Execute search
36
+ diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
37
+ -a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
38
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast"
39
+
40
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
41
+ [[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
42
+ && [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
43
+ && gunzip "../../../06.cds/$DATASET.gff2.gz"
44
+ [[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
45
+ && [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
46
+ && gunzip "../../../06.cds/$DATASET.gff3.gz"
47
+ if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
48
+ # GFF2
49
+ perl "$MT/utils/infile_convert.pl" -f gff2 \
50
+ "../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
51
+ | sort -k 13 > "$DATASET.mytaxain"
52
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
53
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
54
+ -g "../../../06.cds/$DATASET.gff2" -f gff2 \
55
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
56
+ -K "$DATASET.mytaxa.krona" -u
57
+ elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
58
+ # GFF3
59
+ perl "$MT/utils/infile_convert.pl" -f gff3 \
60
+ "../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
61
+ > "$DATASET.mytaxain"
62
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
63
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
64
+ -g "../../../06.cds/$DATASET.gff3" -f gff3 \
65
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
66
+ -K "$DATASET.mytaxa.krona" -u
67
+ else
68
+ # No GFF
69
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
70
+ | sort -k 13 > "$DATASET.mytaxain"
71
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
72
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
73
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
74
+ -K "$DATASET.mytaxa.krona" -u
75
+ fi
76
+
77
+ # Execute Krona
78
+ ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
79
+
80
+ # Gzip and cleanup
81
+ [[ -e "../../../06.cds/$DATASET.gff2" ]] \
82
+ && gzip -9 -f "../../../06.cds/$DATASET.gff2"
83
+ [[ -e "../../../06.cds/$DATASET.gff3" ]] \
84
+ && gzip -9 -f "../../../06.cds/$DATASET.gff3"
85
+ gzip -9 -f "$DATASET.mytaxain"
86
+ gzip -9 -f "$DATASET.blast"
87
+ rm "$DATASET.daa"
88
+ fi
89
+
90
+ # Finalize
91
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
92
+ miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa
93
+
@@ -0,0 +1,85 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
7
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
8
+ cd "$DIR"
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+ MT=$(dirname -- $(which MyTaxa))
13
+ TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
14
+ trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
15
+
16
+ # Check type of dataset
17
+ NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
18
+ | wc -l | awk '{print $1}')
19
+ if [[ "$NOMULTI" -eq "1" ]] ; then
20
+ # Check requirements
21
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
22
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
23
+ "no such file or directory" >&2
24
+ exit 1
25
+ fi
26
+ if [[ ! -d "$MT/db" ]] ; then
27
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
28
+ "no such file or directory" >&2
29
+ exit 1
30
+ fi
31
+ if [[ ! -d "$MT/utils" ]] ; then
32
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
33
+ "no such file or directory" >&2
34
+ exit 1
35
+ fi
36
+
37
+ if [[ ! -s "$DATASET.mytaxa" ]] ; then
38
+ # Execute search
39
+ if [[ ! -s "$DATASET.blast" ]] ; then
40
+ diamond blastp -q "../../../06.cds/$DATASET.faa" \
41
+ -d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
42
+ -a "$DATASET.daa" -t "$TMPDIR"
43
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
44
+ fi
45
+
46
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
47
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
48
+ | sort -k 13 > "$DATASET.mytaxain"
49
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
50
+ fi
51
+ ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
52
+ "$DATASET.mytaxa" "$DATASET.wintax"
53
+ echo "
54
+ source('$MIGA/utils/mytaxa_scan.R');
55
+ pdf('$DATASET.pdf', 12, 7);
56
+ mytaxa.scan('$DATASET.wintax');
57
+ dev.off();
58
+ " | R --vanilla
59
+
60
+ # Extract genes from flagged regions
61
+ [[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
62
+ if [[ -e "$DATASET.wintax.regions" ]] ; then
63
+ i=0
64
+ for win in $(cat "$DATASET.wintax.regions") ; do
65
+ let i=$i+1
66
+ awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
67
+ > "$DATASET.reg/$i.ids"
68
+ FastA.filter.pl -q "$DATASET.reg/$i.ids" \
69
+ "../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
70
+ done
71
+ fi
72
+
73
+ # Clean
74
+ [[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
75
+ [[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
76
+ && gzip -9 -f "$DATASET.blast"
77
+ [[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
78
+ && gzip -9 -f "$DATASET.mytaxain"
79
+ fi
80
+
81
+ # Finalize
82
+ rm -R "$TMPDIR"
83
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
84
+ miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa_scan
85
+
data/scripts/ogs.bash ADDED
@@ -0,0 +1,36 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/10.clades/03.ogs"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ echo -n "" > miga-project.log
12
+ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
13
+
14
+ # Extract RBMs
15
+ [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
16
+ for i in $DS ; do
17
+ for j in $DS ; do
18
+ file="miga-project.rbm/$i-$j.rbm"
19
+ [[ -s $file ]] && continue
20
+ echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
21
+ "where seq1='$i' and seq2='$j' ;" \
22
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
23
+ > $file
24
+ [[ -s $file ]] || rm $file
25
+ done
26
+ echo "$i" >> miga-project.log
27
+ done
28
+
29
+ # Estimate OGs
30
+ ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
31
+ ogs.stats.rb -o miga-project.ogs -j miga-project.stats
32
+
33
+ # Finalize
34
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
35
+ miga add_result -P "$PROJECT" -r ogs
36
+
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/03.read_quality"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # FastQC
14
+ [[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
15
+ fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
16
+
17
+ # SolexaQA++
18
+ [[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
19
+ exists ../02.trimmed_reads/$b.[12].*.pdf \
20
+ && mv ../02.trimmed_reads/$b.[12].*.pdf "$b.solexaqa/"
21
+
22
+ # Clean 02.trimmed_reads
23
+ [[ -e "../02.trimmed_reads/$b.1.fastq_trimmed.segments" ]] \
24
+ && rm ../02.trimmed_reads/$b.[12].fastq_trimmed.segments
25
+ [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.paired" ]] \
26
+ && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.paired
27
+ [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.single" ]] \
28
+ && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
29
+ [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
30
+ && rm ../02.trimmed_reads/$b.[12].fastq.trimmed
31
+ [[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
32
+ && rm ../02.trimmed_reads/$b.[12].fastq
33
+
34
+ # Finalize
35
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r read_quality
37
+
data/scripts/ssu.bash ADDED
@@ -0,0 +1,35 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
7
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
8
+ cd "$DIR"
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ fa="../../../05.assembly/$DATASET.LargeContigs.fna"
14
+ if [[ -s $fa ]] ; then
15
+ # Run barrnap
16
+ barrnap --quiet --threads $CORES $fa | grep "^##gff\\|;product=16S " \
17
+ > $DATASET.ssu.gff
18
+
19
+ # Extract
20
+ bedtools getfasta -s "-fi" $fa -bed $DATASET.ssu.gff -fo $DATASET.ssu.all.fa
21
+ FastA.length.pl $DATASET.ssu.all.fa | sort -nr -k 2 | head -n 1 \
22
+ | cut -f 1 > $DATASET.ssu.fa.id
23
+ FastA.filter.pl $DATASET.ssu.fa.id $DATASET.ssu.all.fa > $DATASET.ssu.fa
24
+ rm $DATASET.ssu.fa.id
25
+ [[ -e "$fa.fai" ]] && rm "$fa.fai"
26
+
27
+ # Gzip
28
+ gzip -9 -f "$DATASET.ssu.gff"
29
+ gzip -9 -f "$DATASET.ssu.all.fa"
30
+ fi
31
+
32
+ # Finalize
33
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
34
+ miga add_result -P "$PROJECT" -D "$DATASET" -r ssu
35
+
@@ -0,0 +1,26 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/10.clades/02.ani"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ # Run R code
12
+ echo "
13
+ source('$MIGA/utils/subclades.R');
14
+ subclades('../../09.distances/03.ani/miga-project.txt.gz',
15
+ 'miga-project', $CORES);
16
+ " | R --vanilla
17
+
18
+ # Compile
19
+ ruby "$MIGA/utils/subclades-compile.rb" . \
20
+ > miga-project.class.tsv \
21
+ 2> miga-project.class.nwk
22
+
23
+ # Finalize
24
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
25
+ miga add_result -P "$PROJECT" -r subclades
26
+
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/04.trimmed_fasta"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # Gunzip (if necessary)
14
+ for sis in 1 2 ; do
15
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
16
+ && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
17
+ && gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
18
+ done
19
+
20
+ # FastQ -> FastA
21
+ cat ../02.trimmed_reads/$b.1.clipped.fastq | FastQ.toFastA.awk > $b.1.fasta
22
+ if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
23
+ cat ../02.trimmed_reads/$b.2.clipped.fastq | FastQ.toFastA.awk > $b.2.fasta
24
+ FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta
25
+ gzip -9 -f $b.2.fasta
26
+ gzip -9 -f $b.1.fasta
27
+ cat ../02.trimmed_reads/$b.[12].clipped.single.fastq | FastQ.toFastA.awk \
28
+ > $b.SingleReads.fa
29
+ gzip -9 -f $b.SingleReads.fa
30
+ else
31
+ mv $b.1.fasta $b.SingleReads.fa
32
+ fi
33
+
34
+ # Compress input at 01.raw_reads and 02.trimmed_reads
35
+ for sis in 1 2 ; do
36
+ [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
37
+ && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
38
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
39
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
40
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
41
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
42
+ done
43
+
44
+ # Finalize
45
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
46
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
47
+
@@ -0,0 +1,57 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/02.trimmed_reads"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # Unzip (if necessary)
14
+ [[ -e ../01.raw_reads/$b.1.fastq.gz && ! -e ../01.raw_reads/$b.1.fastq ]] \
15
+ && gunzip ../01.raw_reads/$b.1.fastq.gz
16
+ [[ -e ../01.raw_reads/$b.2.fastq.gz && ! -e ../01.raw_reads/$b.2.fastq ]] \
17
+ && gunzip ../01.raw_reads/$b.2.fastq.gz
18
+
19
+ # Clean existing files
20
+ exists $b.[12].* && rm $b.[12].*
21
+
22
+ # Tag
23
+ FastQ.tag.rb -i ../01.raw_reads/$b.1.fastq -p "$b-" -s "/1" -o $b.1.fastq
24
+ [[ -e ../01.raw_reads/$b.2.fastq ]] \
25
+ && FastQ.tag.rb -i ../01.raw_reads/$b.2.fastq -p "$b-" -s "/2" -o $b.2.fastq
26
+
27
+ # Trim
28
+ SolexaQA++ dynamictrim $b.[12].fastq -h 20 -d .
29
+ SolexaQA++ lengthsort $b.[12].fastq.trimmed -l 50 -d .
30
+
31
+ # Clean adapters
32
+ if [[ -e $b.2.fastq.trimmed.paired ]] ; then
33
+ scythe -a $MIGA/utils/adapters.fa $b.1.fastq.trimmed.paired \
34
+ > $b.1.clipped.all.fastq
35
+ scythe -a $MIGA/utils/adapters.fa $b.2.fastq.trimmed.paired \
36
+ > $b.2.clipped.all.fastq
37
+ SolexaQA++ lengthsort $b.[12].clipped.all.fastq -l 50 -d .
38
+ rm $b.[12].clipped.all.fastq
39
+ [[ -e $b.1.clipped.all.fastq.single ]] \
40
+ && mv $b.1.clipped.all.fastq.single $b.1.clipped.single.fastq
41
+ [[ -e $b.2.clipped.all.fastq.single ]] \
42
+ && mv $b.2.clipped.all.fastq.single $b.2.clipped.single.fastq
43
+ mv $b.1.clipped.all.fastq.paired $b.1.clipped.fastq
44
+ mv $b.2.clipped.all.fastq.paired $b.2.clipped.fastq
45
+ rm $b.1.clipped.all.fastq.summary.txt &>/dev/null
46
+ else
47
+ scythe -a $MIGA/utils/adapters.fa $b.1.fastq.trimmed.single \
48
+ > $b.1.clipped.all.fastq
49
+ SolexaQA++ lengthsort $b.1.clipped.all.fastq -l 50 -d .
50
+ mv $b.1.clipped.all.fastq.single $b.1.clipped.fastq
51
+ fi
52
+ rm $b.[12].*.discard &>/dev/null
53
+
54
+ # Finalize
55
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
56
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads
57
+