miga-base 0.2.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +351 -0
  3. data/actions/add_result +61 -0
  4. data/actions/add_taxonomy +86 -0
  5. data/actions/create_dataset +62 -0
  6. data/actions/create_project +70 -0
  7. data/actions/daemon +69 -0
  8. data/actions/download_dataset +77 -0
  9. data/actions/find_datasets +63 -0
  10. data/actions/import_datasets +86 -0
  11. data/actions/index_taxonomy +71 -0
  12. data/actions/list_datasets +83 -0
  13. data/actions/list_files +67 -0
  14. data/actions/unlink_dataset +52 -0
  15. data/bin/miga +48 -0
  16. data/lib/miga/daemon.rb +178 -0
  17. data/lib/miga/dataset.rb +286 -0
  18. data/lib/miga/gui.rb +289 -0
  19. data/lib/miga/metadata.rb +74 -0
  20. data/lib/miga/project.rb +268 -0
  21. data/lib/miga/remote_dataset.rb +154 -0
  22. data/lib/miga/result.rb +102 -0
  23. data/lib/miga/tax_index.rb +70 -0
  24. data/lib/miga/taxonomy.rb +107 -0
  25. data/lib/miga.rb +83 -0
  26. data/scripts/_distances_noref_nomulti.bash +86 -0
  27. data/scripts/_distances_ref_nomulti.bash +105 -0
  28. data/scripts/aai_distances.bash +40 -0
  29. data/scripts/ani_distances.bash +39 -0
  30. data/scripts/assembly.bash +38 -0
  31. data/scripts/cds.bash +45 -0
  32. data/scripts/clade_finding.bash +27 -0
  33. data/scripts/distances.bash +30 -0
  34. data/scripts/essential_genes.bash +29 -0
  35. data/scripts/haai_distances.bash +39 -0
  36. data/scripts/init.bash +211 -0
  37. data/scripts/miga.bash +12 -0
  38. data/scripts/mytaxa.bash +93 -0
  39. data/scripts/mytaxa_scan.bash +85 -0
  40. data/scripts/ogs.bash +36 -0
  41. data/scripts/read_quality.bash +37 -0
  42. data/scripts/ssu.bash +35 -0
  43. data/scripts/subclades.bash +26 -0
  44. data/scripts/trimmed_fasta.bash +47 -0
  45. data/scripts/trimmed_reads.bash +57 -0
  46. data/utils/adapters.fa +302 -0
  47. data/utils/mytaxa_scan.R +89 -0
  48. data/utils/mytaxa_scan.rb +58 -0
  49. data/utils/requirements.txt +19 -0
  50. data/utils/subclades-compile.rb +48 -0
  51. data/utils/subclades.R +171 -0
  52. metadata +185 -0
@@ -0,0 +1,93 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ DIR="$PROJECT/data/07.annotation/02.taxonomy/01.mytaxa"
7
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
8
+ cd "$DIR"
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+ MT=$(dirname -- $(which MyTaxa))
13
+
14
+ # Check type of dataset
15
+ MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
16
+ | wc -l | awk '{print $1}')
17
+ if [[ "$MULTI" -eq "1" ]] ; then
18
+ # Check requirements
19
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
20
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
21
+ "no such file or directory" >&2
22
+ exit 1
23
+ fi
24
+ if [[ ! -d "$MT/db" ]] ; then
25
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
26
+ "no such file or directory" >&2
27
+ exit 1
28
+ fi
29
+ if [[ ! -d "$MT/utils" ]] ; then
30
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
31
+ "no such file or directory" >&2
32
+ exit 1
33
+ fi
34
+
35
+ # Execute search
36
+ diamond blastp -q "../../../06.cds/$DATASET.faa" -d "$MT/AllGenomes.faa" \
37
+ -a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
38
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast"
39
+
40
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
41
+ [[ -e "../../../06.cds/$DATASET.gff2.gz" ]] \
42
+ && [[ ! -e "../../../06.cds/$DATASET.gff2" ]] \
43
+ && gunzip "../../../06.cds/$DATASET.gff2.gz"
44
+ [[ -e "../../../06.cds/$DATASET.gff3.gz" ]] \
45
+ && [[ ! -e "../../../06.cds/$DATASET.gff3" ]] \
46
+ && gunzip "../../../06.cds/$DATASET.gff3.gz"
47
+ if [[ -e "../../../06.cds/$DATASET.gff2" ]] ; then
48
+ # GFF2
49
+ perl "$MT/utils/infile_convert.pl" -f gff2 \
50
+ "../../../06.cds/$DATASET.gff2" "$DATASET.blast" \
51
+ | sort -k 13 > "$DATASET.mytaxain"
52
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
53
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
54
+ -g "../../../06.cds/$DATASET.gff2" -f gff2 \
55
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
56
+ -K "$DATASET.mytaxa.krona" -u
57
+ elif [[ -e "../../../06.cds/$DATASET.gff3" ]] ; then
58
+ # GFF3
59
+ perl "$MT/utils/infile_convert.pl" -f gff3 \
60
+ "../../../06.cds/$DATASET.gff3" "$DATASET.blast" | sort -k 13 \
61
+ > "$DATASET.mytaxain"
62
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
63
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
64
+ -g "../../../06.cds/$DATASET.gff3" -f gff3 \
65
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
66
+ -K "$DATASET.mytaxa.krona" -u
67
+ else
68
+ # No GFF
69
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
70
+ | sort -k 13 > "$DATASET.mytaxain"
71
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
72
+ perl "$MT/utils/MyTaxa.distribution.pl" -m "$DATASET.mytaxa" \
73
+ -I "$DATASET.mytaxa.innominate" -G "$DATASET.mytaxa.genes" \
74
+ -K "$DATASET.mytaxa.krona" -u
75
+ fi
76
+
77
+ # Execute Krona
78
+ ktImportText -o "$DATASET.html" -n biota "$DATASET.mytaxa.krona,$DATASET"
79
+
80
+ # Gzip and cleanup
81
+ [[ -e "../../../06.cds/$DATASET.gff2" ]] \
82
+ && gzip -9 -f "../../../06.cds/$DATASET.gff2"
83
+ [[ -e "../../../06.cds/$DATASET.gff3" ]] \
84
+ && gzip -9 -f "../../../06.cds/$DATASET.gff3"
85
+ gzip -9 -f "$DATASET.mytaxain"
86
+ gzip -9 -f "$DATASET.blast"
87
+ rm "$DATASET.daa"
88
+ fi
89
+
90
+ # Finalize
91
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
92
+ miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa
93
+
@@ -0,0 +1,85 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
7
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
8
+ cd "$DIR"
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+ MT=$(dirname -- $(which MyTaxa))
13
+ TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
14
+ trap "rm -rf $TMPDIR; exit" SIGHUP SIGINT SIGTERM
15
+
16
+ # Check type of dataset
17
+ NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
18
+ | wc -l | awk '{print $1}')
19
+ if [[ "$NOMULTI" -eq "1" ]] ; then
20
+ # Check requirements
21
+ if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
22
+ echo "Cannot locate the database: $MT/AllGenomes.faa.dmnd:" \
23
+ "no such file or directory" >&2
24
+ exit 1
25
+ fi
26
+ if [[ ! -d "$MT/db" ]] ; then
27
+ echo "Cannot locate the MyTaxa index: $MT/db:" \
28
+ "no such file or directory" >&2
29
+ exit 1
30
+ fi
31
+ if [[ ! -d "$MT/utils" ]] ; then
32
+ echo "Cannot locate the MyTaxa utilities: $MT/utils:" \
33
+ "no such file or directory" >&2
34
+ exit 1
35
+ fi
36
+
37
+ if [[ ! -s "$DATASET.mytaxa" ]] ; then
38
+ # Execute search
39
+ if [[ ! -s "$DATASET.blast" ]] ; then
40
+ diamond blastp -q "../../../06.cds/$DATASET.faa" \
41
+ -d "$MT/AllGenomes.faa" -k 5 -p "$CORES" --min-score 60 \
42
+ -a "$DATASET.daa" -t "$TMPDIR"
43
+ diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
44
+ fi
45
+
46
+ # Prepare MyTaxa input, execute MyTaxa, and generate profiles
47
+ perl "$MT/utils/infile_convert.pl" -f no "LOREM_IPSUM" "$DATASET.blast" \
48
+ | sort -k 13 > "$DATASET.mytaxain"
49
+ "$MT/MyTaxa" "$DATASET.mytaxain" "$DATASET.mytaxa" "0.5"
50
+ fi
51
+ ruby "$MIGA/utils/mytaxa_scan.rb" "../../../06.cds/$DATASET.faa" \
52
+ "$DATASET.mytaxa" "$DATASET.wintax"
53
+ echo "
54
+ source('$MIGA/utils/mytaxa_scan.R');
55
+ pdf('$DATASET.pdf', 12, 7);
56
+ mytaxa.scan('$DATASET.wintax');
57
+ dev.off();
58
+ " | R --vanilla
59
+
60
+ # Extract genes from flagged regions
61
+ [[ -d "$DATASET.reg" ]] || mkdir "$DATASET.reg"
62
+ if [[ -e "$DATASET.wintax.regions" ]] ; then
63
+ i=0
64
+ for win in $(cat "$DATASET.wintax.regions") ; do
65
+ let i=$i+1
66
+ awk "NR==$win" "$DATASET.wintax.genes" | tr "\\t" "\\n" \
67
+ > "$DATASET.reg/$i.ids"
68
+ FastA.filter.pl -q "$DATASET.reg/$i.ids" \
69
+ "../../../06.cds/$DATASET.faa" > "$DATASET.reg/$i.faa"
70
+ done
71
+ fi
72
+
73
+ # Clean
74
+ [[ -e "$DATASET.daa" ]] && rm "$DATASET.daa"
75
+ [[ -s "$DATASET.blast" && ! -s "$DATASET.blast.gz" ]] \
76
+ && gzip -9 -f "$DATASET.blast"
77
+ [[ -s "$DATASET.mytaxain" && ! -s "$DATASET.mytaxain.gz" ]] \
78
+ && gzip -9 -f "$DATASET.mytaxain"
79
+ fi
80
+
81
+ # Finalize
82
+ rm -R "$TMPDIR"
83
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
84
+ miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa_scan
85
+
data/scripts/ogs.bash ADDED
@@ -0,0 +1,36 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/10.clades/03.ogs"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ echo -n "" > miga-project.log
12
+ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
13
+
14
+ # Extract RBMs
15
+ [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
16
+ for i in $DS ; do
17
+ for j in $DS ; do
18
+ file="miga-project.rbm/$i-$j.rbm"
19
+ [[ -s $file ]] && continue
20
+ echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
21
+ "where seq1='$i' and seq2='$j' ;" \
22
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
23
+ > $file
24
+ [[ -s $file ]] || rm $file
25
+ done
26
+ echo "$i" >> miga-project.log
27
+ done
28
+
29
+ # Estimate OGs
30
+ ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t $CORES
31
+ ogs.stats.rb -o miga-project.ogs -j miga-project.stats
32
+
33
+ # Finalize
34
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
35
+ miga add_result -P "$PROJECT" -r ogs
36
+
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/03.read_quality"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # FastQC
14
+ [[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
15
+ fastqc ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc
16
+
17
+ # SolexaQA++
18
+ [[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
19
+ exists ../02.trimmed_reads/$b.[12].*.pdf \
20
+ && mv ../02.trimmed_reads/$b.[12].*.pdf "$b.solexaqa/"
21
+
22
+ # Clean 02.trimmed_reads
23
+ [[ -e "../02.trimmed_reads/$b.1.fastq_trimmed.segments" ]] \
24
+ && rm ../02.trimmed_reads/$b.[12].fastq_trimmed.segments
25
+ [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.paired" ]] \
26
+ && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.paired
27
+ [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed.single" ]] \
28
+ && rm ../02.trimmed_reads/$b.[12].fastq.trimmed.single
29
+ [[ -e "../02.trimmed_reads/$b.1.fastq.trimmed" ]] \
30
+ && rm ../02.trimmed_reads/$b.[12].fastq.trimmed
31
+ [[ -e "../02.trimmed_reads/$b.1.fastq" ]] \
32
+ && rm ../02.trimmed_reads/$b.[12].fastq
33
+
34
+ # Finalize
35
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r read_quality
37
+
data/scripts/ssu.bash ADDED
@@ -0,0 +1,35 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ DIR="$PROJECT/data/07.annotation/01.function/02.ssu"
7
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
8
+ cd "$DIR"
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ fa="../../../05.assembly/$DATASET.LargeContigs.fna"
14
+ if [[ -s $fa ]] ; then
15
+ # Run barrnap
16
+ barrnap --quiet --threads $CORES $fa | grep "^##gff\\|;product=16S " \
17
+ > $DATASET.ssu.gff
18
+
19
+ # Extract
20
+ bedtools getfasta -s "-fi" $fa -bed $DATASET.ssu.gff -fo $DATASET.ssu.all.fa
21
+ FastA.length.pl $DATASET.ssu.all.fa | sort -nr -k 2 | head -n 1 \
22
+ | cut -f 1 > $DATASET.ssu.fa.id
23
+ FastA.filter.pl $DATASET.ssu.fa.id $DATASET.ssu.all.fa > $DATASET.ssu.fa
24
+ rm $DATASET.ssu.fa.id
25
+ [[ -e "$fa.fai" ]] && rm "$fa.fai"
26
+
27
+ # Gzip
28
+ gzip -9 -f "$DATASET.ssu.gff"
29
+ gzip -9 -f "$DATASET.ssu.all.fa"
30
+ fi
31
+
32
+ # Finalize
33
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
34
+ miga add_result -P "$PROJECT" -D "$DATASET" -r ssu
35
+
@@ -0,0 +1,26 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/10.clades/02.ani"
7
+
8
+ # Initialize
9
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
10
+
11
+ # Run R code
12
+ echo "
13
+ source('$MIGA/utils/subclades.R');
14
+ subclades('../../09.distances/03.ani/miga-project.txt.gz',
15
+ 'miga-project', $CORES);
16
+ " | R --vanilla
17
+
18
+ # Compile
19
+ ruby "$MIGA/utils/subclades-compile.rb" . \
20
+ > miga-project.class.tsv \
21
+ 2> miga-project.class.nwk
22
+
23
+ # Finalize
24
+ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
25
+ miga add_result -P "$PROJECT" -r subclades
26
+
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/04.trimmed_fasta"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # Gunzip (if necessary)
14
+ for sis in 1 2 ; do
15
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq.gz" \
16
+ && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
17
+ && gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
18
+ done
19
+
20
+ # FastQ -> FastA
21
+ cat ../02.trimmed_reads/$b.1.clipped.fastq | FastQ.toFastA.awk > $b.1.fasta
22
+ if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
23
+ cat ../02.trimmed_reads/$b.2.clipped.fastq | FastQ.toFastA.awk > $b.2.fasta
24
+ FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta
25
+ gzip -9 -f $b.2.fasta
26
+ gzip -9 -f $b.1.fasta
27
+ cat ../02.trimmed_reads/$b.[12].clipped.single.fastq | FastQ.toFastA.awk \
28
+ > $b.SingleReads.fa
29
+ gzip -9 -f $b.SingleReads.fa
30
+ else
31
+ mv $b.1.fasta $b.SingleReads.fa
32
+ fi
33
+
34
+ # Compress input at 01.raw_reads and 02.trimmed_reads
35
+ for sis in 1 2 ; do
36
+ [[ -e "../01.raw_reads/$b.$sis.fastq" ]] \
37
+ && gzip -9 -f "../01.raw_reads/$b.$sis.fastq"
38
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
39
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.fastq"
40
+ [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
41
+ && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
42
+ done
43
+
44
+ # Finalize
45
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
46
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
47
+
@@ -0,0 +1,57 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
+ echo "MiGA: $MIGA"
4
+ echo "Project: $PROJECT"
5
+ source "$MIGA/scripts/miga.bash" || exit 1
6
+ cd "$PROJECT/data/02.trimmed_reads"
7
+
8
+ b=$DATASET
9
+
10
+ # Initialize
11
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+
13
+ # Unzip (if necessary)
14
+ [[ -e ../01.raw_reads/$b.1.fastq.gz && ! -e ../01.raw_reads/$b.1.fastq ]] \
15
+ && gunzip ../01.raw_reads/$b.1.fastq.gz
16
+ [[ -e ../01.raw_reads/$b.2.fastq.gz && ! -e ../01.raw_reads/$b.2.fastq ]] \
17
+ && gunzip ../01.raw_reads/$b.2.fastq.gz
18
+
19
+ # Clean existing files
20
+ exists $b.[12].* && rm $b.[12].*
21
+
22
+ # Tag
23
+ FastQ.tag.rb -i ../01.raw_reads/$b.1.fastq -p "$b-" -s "/1" -o $b.1.fastq
24
+ [[ -e ../01.raw_reads/$b.2.fastq ]] \
25
+ && FastQ.tag.rb -i ../01.raw_reads/$b.2.fastq -p "$b-" -s "/2" -o $b.2.fastq
26
+
27
+ # Trim
28
+ SolexaQA++ dynamictrim $b.[12].fastq -h 20 -d .
29
+ SolexaQA++ lengthsort $b.[12].fastq.trimmed -l 50 -d .
30
+
31
+ # Clean adapters
32
+ if [[ -e $b.2.fastq.trimmed.paired ]] ; then
33
+ scythe -a $MIGA/utils/adapters.fa $b.1.fastq.trimmed.paired \
34
+ > $b.1.clipped.all.fastq
35
+ scythe -a $MIGA/utils/adapters.fa $b.2.fastq.trimmed.paired \
36
+ > $b.2.clipped.all.fastq
37
+ SolexaQA++ lengthsort $b.[12].clipped.all.fastq -l 50 -d .
38
+ rm $b.[12].clipped.all.fastq
39
+ [[ -e $b.1.clipped.all.fastq.single ]] \
40
+ && mv $b.1.clipped.all.fastq.single $b.1.clipped.single.fastq
41
+ [[ -e $b.2.clipped.all.fastq.single ]] \
42
+ && mv $b.2.clipped.all.fastq.single $b.2.clipped.single.fastq
43
+ mv $b.1.clipped.all.fastq.paired $b.1.clipped.fastq
44
+ mv $b.2.clipped.all.fastq.paired $b.2.clipped.fastq
45
+ rm $b.1.clipped.all.fastq.summary.txt &>/dev/null
46
+ else
47
+ scythe -a $MIGA/utils/adapters.fa $b.1.fastq.trimmed.single \
48
+ > $b.1.clipped.all.fastq
49
+ SolexaQA++ lengthsort $b.1.clipped.all.fastq -l 50 -d .
50
+ mv $b.1.clipped.all.fastq.single $b.1.clipped.fastq
51
+ fi
52
+ rm $b.[12].*.discard &>/dev/null
53
+
54
+ # Finalize
55
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
56
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads
57
+