miga-base 1.2.17.1 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  4. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  5. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  6. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  7. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  8. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  9. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  10. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  11. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  12. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  13. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  14. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  15. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  16. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  17. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  18. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  19. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  21. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  22. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  23. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  24. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  25. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  26. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  27. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  28. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  30. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  31. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  32. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  48. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  50. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  51. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  52. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  53. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  54. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  55. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  56. metadata +3 -37
  57. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  58. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  59. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  62. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  63. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  66. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  67. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  68. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  69. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  70. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  71. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  72. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  73. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  74. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  75. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  76. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  77. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  78. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  79. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  80. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  81. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  85. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  86. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  87. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  88. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  89. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  90. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  91. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -1,44 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Find the directory of the pipeline
5
- PDIR=$(dirname $(readlink -f $0));
6
- # Load variables
7
- source "$PDIR/RUNME.bash"
8
- if [[ "$SCRATCH" == "" ]] ; then
9
- echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
10
- exit 1
11
- fi
12
-
13
- # Run it
14
- echo "Jobs being launched in $SCRATCH"
15
- RAMMULT=${RAMMULT:-1}
16
- for LIB in $LIBRARIES; do
17
- # Prepare info
18
- echo "Running $LIB";
19
- K_VELVET=$(echo $K_VELVET | sed -e 's/ /:/g')
20
- K_SOAP=$(echo $K_SOAP | sed -e 's/ /:/g')
21
- if [[ "$USECOUPLED" == "yes" ]] ; then
22
- INPUT="$DATA/$LIB.CoupledReads.fa"
23
- elif [[ "$USESINGLE" == "yes" ]] ; then
24
- INPUT="$DATA/$LIB.SingleReads.fa"
25
- else
26
- echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
27
- exit 1;
28
- fi
29
- let SIZE=30*$(ls -l "$INPUT" | awk '{print $5}')/1024/1024/1024;
30
- let RAM=\(3+$SIZE\)*$RAMMULT;
31
- VARS="LIB=$LIB,PDIR=$PDIR,BIN454=$BIN454,KVELVET=$K_VELVET,KSOAP=$K_SOAP"
32
- # Launch Newbler
33
- NAME="Newbler_${LIB}"
34
- if [[ "$QUEUE" != "" ]] ; then
35
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l "walltime=$WTIME" -q "$QUEUE"
36
- elif [[ $RAM -gt 150 ]] ; then
37
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=360:00:00 -q biohimem-6
38
- elif [[ $SIZE -lt 4 ]] ; then
39
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=12:00:00 -q iw-shared-6
40
- else
41
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=120:00:00 -q biocluster-6
42
- fi
43
- done
44
-
@@ -1,50 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Check if it was sourced from RUNME-*.bash
5
- if [[ "$PDIR" == "" ]] ; then
6
- echo "$0: Error: This file is not stand-alone. Execute one of RUNME-*.bash as described in the README.txt file" >&2
7
- exit 1
8
- fi
9
-
10
- # Find the directory of the pipeline
11
- CWD=$(pwd)
12
- PDIR=$(dirname $(readlink -f $0));
13
-
14
- # Run it
15
- # Actually, this script doesn't run anything. It's meant to keep the
16
- # variables centralized.
17
-
18
- # Load config
19
- NAMES=$(ls $PDIR/CONFIG.*.bash | sed -e 's/.*CONFIG\./ * /' | sed -e 's/\.bash//');
20
- if [[ "$1" == "" ]] ; then
21
- if [[ "$HELP" == "" ]] ; then
22
- echo "
23
- Usage:
24
- $0 name
25
-
26
- name The name of the run. CONFIG.name.bash must exist.
27
-
28
- See $PDIR/README.txt for more information.
29
-
30
- Available names are:
31
- $NAMES
32
- " >&2
33
- else
34
- echo "$HELP
35
- Available names are:
36
- $NAMES
37
- " >&2
38
- fi
39
- exit 1
40
- fi
41
- if [[ ! -e "$PDIR/CONFIG.$1.bash" ]] ; then
42
- echo "$0: Error: Impossible to find $PDIR/CONFIG.$1.bash, available names are:
43
- $NAMES" >&2
44
- exit 1
45
- fi
46
- source "$PDIR/CONFIG.$1.bash"
47
-
48
- # Create the scratch directory
49
- if [[ ! -d $SCRATCH ]] ; then mkdir -p $SCRATCH ; fi;
50
-
@@ -1,37 +0,0 @@
1
-
2
- # @author: Luis M. Rodriguez-R
3
- # @update: Nov-29-2012
4
-
5
- kSelector <- function(file, lib){
6
- red <- rgb(0.6, 0, 0);
7
- d <- read.table(file, sep=" ", h=T, fill=T);
8
- d <- d[!is.na(d$N50) & !is.na(d$used), ];
9
- d$reads <- max(d$reads, na.rm=T)
10
- d <- d[order(d$K), ];
11
- rownames(d) <- 1:nrow(d);
12
- par(mar=c(5,4,4,5)+.1, cex=0.8);
13
- barplot(d$reads/1e6, names=d$K, col='white', ylab='Number of reads (in millions)', xlab='K',
14
- main=paste('Reads used and N50 by K-mers in the assembly of', lib));
15
- barplot(d$used/1e6, col='grey', add=T);
16
- par(new=T);
17
- plot(1:length(d$K)-0.5, d$N50, col=red, t='b', lty=2, pch=20, cex=1, lwd=1.5,
18
- xlim=c(0, length(d$K)), xaxt='n', yaxt='n', xlab='', ylab='');
19
- axis(4, col.axis=red);
20
- mtext('N50 (bp)', side=4, line=3, col=red);
21
- # Suggest best k-mers
22
- if(nrow(d) >= 3){
23
- x = data.frame(K=d$K, l=(d$N50 - mean(d$N50))/sd(d$N50), u=(d$used - mean(d$used))/sd(d$used));
24
- rownames(x) <- rownames(d)
25
- d <- cbind(d, sel=FALSE);
26
- k_s = c();
27
- for(l_star in c(2, 1/2, 1)){
28
- k_s_i = x$K[which.max(l_star*x$l + x$u)];
29
- k_s <- c(k_s, k_s_i);
30
- x <- x[x$K!=k_s_i, ];
31
- d$sel[d$K==k_s_i] <- TRUE;
32
- }
33
- abline(v=as.numeric(rownames(d)[d$sel])-0.5, col='darkgreen', lty=6);
34
- }
35
- return(d);
36
- }
37
-
@@ -1,68 +0,0 @@
1
- #!/bin/bash
2
- #PBS -l nodes=1:ppn=1
3
- #PBS -k oe
4
-
5
- # Some defaults for the parameters
6
- BIN454=${BIN454:-"$HOME/454/bin"};
7
-
8
- # Check mandatory variables
9
- if [[ "$LIB" == "" ]]; then
10
- echo "Error: LIB is mandatory" >&2
11
- exit 1;
12
- fi
13
- if [[ "$PDIR" == "" ]]; then
14
- echo "Error: PDIR is mandatory" >&2
15
- exit 1;
16
- fi
17
- if [[ "$KVELVET$KSOAP" == "" ]]; then
18
- echo "Error: KVELVET and/or KSOAP are mandatory" >&2
19
- exit 1;
20
- fi
21
-
22
- # Prepare input
23
- KVELVET=$(echo $KVELVET | sed -e 's/:/ /g')
24
- KSOAP=$(echo $KSOAP | sed -e 's/:/ /g')
25
- NP=$(cat "$PBS_NODEFILE" | wc -l)
26
- CWD=$(pwd)
27
- DIR="$CWD/$LIB.newbler"
28
- LOG="$DIR.log"
29
- module load perl/5.14.4
30
- export PATH=$PATH:$BIN454
31
-
32
- # Create project
33
- echo new > $DIR.proc
34
- nohup newAssembly $DIR > $LOG
35
-
36
- # Prepare Velvet
37
- if [[ "$KVELVET" != "" ]] ; then
38
- echo pre-velvet > $DIR.proc
39
- rm $LIB.velvet.tmp1 &>/dev/null
40
- for K in $KVELVET ; do
41
- perl "$PDIR/FastA.filterN.pl" "$LIB.velvet_$K/contigs.fa" >> $LIB.velvet.tmp1
42
- done
43
- perl "$PDIR/newbler_preparator.pl" $LIB.velvet.tmp1 $LIB.velvet.tmp2
44
- cd $DIR
45
- nohup addRun ../$LIB.velvet.tmp2 >> $LOG
46
- cd $CWD
47
- fi ;
48
-
49
- # Prepare SOAP
50
- if [[ "$KSOAP" != "" ]] ; then
51
- echo pre-soap > $DIR.proc
52
- rm $LIB.soap.tmp1 &>/dev/null
53
- for K in $KSOAP ; do
54
- cat "$LIB.soap_$K/O.contig" >> $LIB.soap.tmp1
55
- done
56
- perl "$PDIR/newbler_preparator.pl" $LIB.soap.tmp1 $LIB.soap.tmp2
57
- cd $DIR
58
- nohup addRun ../$LIB.soap.tmp2 >> $LOG
59
- cd $CWD
60
- fi ;
61
-
62
- # Run
63
- cd $DIR
64
- echo newbler > $DIR.proc
65
- nohup runProject -cpu $NP >> $LOG
66
- cd $CWD
67
- echo done > $DIR.proc
68
-
@@ -1,49 +0,0 @@
1
- #!/usr/bin/perl
2
-
3
-
4
- my ($in, $out) = @ARGV;
5
- ($in and $out) or die "
6
- Usage: $0 input.fa output.fa
7
- ";
8
-
9
- open IN, "<", $in or die "Cannot read file: $in: $!\n";
10
- open OUT,">", $out or die "Cannot create file: $out: $!\n";
11
-
12
- %reads=();
13
- @reads=();
14
- while(<IN>){
15
- chomp;
16
- if(/^\>/){
17
- $tag=$_;
18
- $reads{$tag}='';
19
- push(@reads,$tag);
20
- }else{
21
- $reads{$tag}.=$_;
22
- }
23
- }
24
- close(IN);
25
-
26
- for(0..$#reads){
27
- $tag=$reads[$_];
28
- $read=$reads{$tag};
29
- $l=length $read;
30
- if($l<100){
31
- next;
32
- }else{
33
- if($l<1500){
34
- print OUT "$tag\n$read\n";
35
- }else{
36
- $r=int($l/1500)+1;
37
- $start=0;
38
- $i=1;
39
- while($start<$l-100){
40
- $tag_new=$tag.':r'.$i;
41
- $i++;
42
- $read_new=substr($read,$start,1500);
43
- $start+=200;
44
- print OUT "$tag_new\n$read_new\n";
45
- }
46
- }
47
- }
48
- }
49
- close(OUT);
@@ -1,80 +0,0 @@
1
- #!/bin/bash
2
- #PBS -k oe
3
-
4
- # Some defaults for the parameters
5
- INSLEN=${INSLEN:-300};
6
- USECOUPLED=${USECOUPLED:-yes}
7
- USESINGLE=${USESINGLE:-no}
8
- CLEANUP=${CLEANUP:-yes}
9
-
10
- # Check mandatory variables
11
- if [[ "$LIB" == "" ]]; then
12
- echo "Error: LIB is mandatory" >&2
13
- exit 1;
14
- fi
15
- if [[ "$PDIR" == "" ]]; then
16
- echo "Error: PDIR is mandatory" >&2
17
- exit 1;
18
- fi
19
- if [[ "$DATA" == "" ]]; then
20
- echo "Error: DATA is mandatory" >&2
21
- exit 1;
22
- fi
23
-
24
- # Prepare input
25
- module load perl/5.14.4
26
- KMER=$PBS_ARRAYID
27
- DIR="$LIB.soap_$KMER"
28
- if [[ "$USECOUPLED" == "yes" ]]; then
29
- MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.CoupledReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
30
- elif [[ "$USESINGLE" == "yes" ]]; then
31
- MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.SingleReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
32
- else
33
- echo "$0: Error: Nothing to do, neither USECOUPLED nor USESINGLE set to yes." >&2
34
- exit 2
35
- fi
36
- NP=$(cat "$PBS_NODEFILE" | wc -l)
37
-
38
- # Config
39
- module load SOAP/denovo2/r240
40
- echo config > $DIR.proc
41
- echo "max_rd_len=$MAXRDLEN
42
- [LIB]
43
- reverse_seq=0
44
- asm_flag=3
45
- rank=1" > $DIR.config
46
- if [[ "$USECOUPLED" == "yes" ]]; then
47
- echo "avg_ins=$INSLEN
48
- p=$DATA/$LIB.CoupledReads.fa" >> $DIR.config
49
- fi
50
- if [[ "$USESINGLE" == "yes" ]]; then
51
- echo "f=$DATA/$LIB.SingleReads.fa" >> $DIR.config
52
- fi
53
-
54
-
55
- # Run
56
- echo pre > $DIR.proc
57
- if [[ -d $DIR ]] ; then rm -R $DIR ; fi
58
- mkdir $DIR
59
- echo soap > $DIR.proc
60
- SOAPdenovo-63mer all -s $DIR.config -p $NP -K $KMER -o $DIR/O &> $DIR.log
61
- if [[ -d $DIR ]] ; then
62
- if [[ -s $DIR/O.contig ]] ; then
63
- if [[ "$CLEANUP" != "no" ]] ; then
64
- echo cleanup > $DIR.proc
65
- rm $DIR/*edge
66
- rm $DIR/*vertex
67
- rm $DIR/*Arc*
68
- rm $DIR/*Graph*
69
- rm $DIR/*readInGap*
70
- fi
71
- echo done > $DIR.proc
72
- else
73
- echo "$0: Error: File $DIR/O.contig doesn't exist, something went wrong" >&2
74
- exit 1
75
- fi
76
- else
77
- echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
78
- exit 1
79
- fi
80
-
@@ -1,57 +0,0 @@
1
- #!/bin/bash
2
- #PBS -q iw-shared-6
3
- #PBS -l nodes=1:ppn=1
4
- #PBS -l mem=1gb
5
- #PBS -l walltime=3:00:00
6
- #PBS -k oe
7
-
8
- # Check mandatory variables
9
- if [[ "$LIB" == "" ]]; then
10
- echo "Error: LIB is mandatory" >&2
11
- exit 1;
12
- fi
13
- if [[ "$PDIR" == "" ]]; then
14
- echo "Error: PDIR is mandatory" >&2
15
- exit 1;
16
- fi
17
-
18
- # Run
19
- module load perl/5.14.4
20
- echo "K N50 used reads " > $LIB.velvet.n50
21
- echo "K N50 used reads " > $LIB.soap.n50
22
- for ID in $(seq 10 31); do
23
- let KMER=$ID*2+1
24
- DIRV="$LIB.velvet_$KMER"
25
- DIRS="$LIB.soap_$KMER"
26
- echo $KMER > $LIB.velvet.n50.$KMER
27
- echo $KMER > $LIB.soap.n50.$KMER
28
- # N50 (>=500)
29
- perl "$PDIR/FastA.N50.pl" "$DIRV/contigs.fa" 500 | grep '^N50' | sed -e 's/.*: //' >> $LIB.velvet.n50.$KMER
30
- perl "$PDIR/FastA.N50.pl" "$DIRS/O.contig" 500 | grep '^N50' | sed -e 's/.*: //' >> $LIB.soap.n50.$KMER
31
- # Used and Total reads
32
- tail -n 1 $DIRV/Log | sed -e 's/.* using \\([0-9]*\\)\\/\\([0-9]*\\) reads.*/\\1\\n\\2/' >> $LIB.velvet.n50.$KMER
33
- if [ -e "$DIRS/O.readOnContig" ] ; then
34
- cat "$DIRS/O.readOnContig" | grep -vc '^read' >> $LIB.soap.n50.$KMER
35
- elif [ -e "$DIRS/O.readOnContig.gz" ] ; then
36
- zcat "$DIRS/O.readOnContig.gz" | grep -vc '^read' >> $LIB.soap.n50.$KMER
37
- else
38
- echo 0 >> $LIB.soap.n50.$KMER
39
- fi
40
- head -n 1 $DIRS/O.peGrads | awk '{print $3}' >> $LIB.soap.n50.$KMER
41
- # Join
42
- (cat $LIB.velvet.n50.$KMER | tr "\\n" " "; echo) >> $LIB.velvet.n50
43
- rm $LIB.velvet.n50.$KMER
44
- (cat $LIB.soap.n50.$KMER | tr "\\n" " "; echo) >> $LIB.soap.n50
45
- rm $LIB.soap.n50.$KMER
46
- done
47
-
48
- # Create plot
49
- module load R/3.1.2
50
- echo "
51
- source('$PDIR/kSelector.R');
52
- pdf('$LIB.n50.pdf', 13, 7);
53
- kSelector('$LIB.velvet.n50', '$LIB (Velvet)');
54
- kSelector('$LIB.soap.n50', '$LIB (SOAP)');
55
- dev.off();
56
- " | R --vanilla -q
57
-
@@ -1,63 +0,0 @@
1
- #!/bin/bash
2
- #PBS -l nodes=1:ppn=1
3
- #PBS -k oe
4
-
5
- # Some defaults for the parameters
6
- FORMAT=${FORMAT:-fasta};
7
- INSLEN=${INSLEN:-300};
8
- USECOUPLED=${USECOUPLED:-yes};
9
- USESINGLE=${USESINGLE:-no};
10
- CLEANUP=${CLEANUP:-yes}
11
-
12
- # Check mandatory variables
13
- if [[ "$LIB" == "" ]]; then
14
- echo "Error: LIB is mandatory" >&2
15
- exit 1;
16
- fi
17
- if [[ "$PDIR" == "" ]]; then
18
- echo "Error: PDIR is mandatory" >&2
19
- exit 1;
20
- fi
21
- if [[ "$DATA" == "" ]]; then
22
- echo "Error: DATA is mandatory" >&2
23
- exit 1;
24
- fi
25
-
26
- # Prepare input
27
- KMER=$PBS_ARRAYID
28
- CWD=$(pwd)
29
- DIR="$CWD/$LIB.velvet_$KMER"
30
-
31
- # Run
32
- module load velvet/1.2.10
33
- echo velveth > $DIR.proc
34
- CMD="velveth_101_omp $DIR $KMER -$FORMAT"
35
- if [[ "$USECOUPLED" == "yes" ]]; then
36
- CMD="$CMD -shortPaired $DATA/$LIB.CoupledReads.fa"
37
- fi
38
- if [[ "$USESINGLE" == "yes" ]]; then
39
- CMD="$CMD -short $DATA/$LIB.SingleReads.fa"
40
- fi
41
- if [[ "$VELVETH_EXTRA" != "" ]]; then
42
- CMD="$CMD $VELVETH_EXTRA"
43
- fi
44
- $CMD &> $DIR.hlog
45
- echo velvetg > $DIR.proc
46
- velvetg_101_omp "$DIR" -exp_cov auto -cov_cutoff auto -ins_length "$INSLEN" $VELVETG_EXTRA &> $DIR.glog
47
- if [[ -d $DIR ]] ; then
48
- if [[ -s $DIR/contigs.fa ]] ; then
49
- if [[ "$CLEANUP" != "no" ]] ; then
50
- echo cleanup > $DIR.proc
51
- rm $DIR/Sequences
52
- rm $DIR/Roadmaps
53
- rm $DIR/*Graph*
54
- fi
55
- echo done > $DIR.proc
56
- else
57
- echo "$0: Error: File $DIR/contigs.fa doesn't exist, something went wrong" >&2
58
- exit 1
59
- fi
60
- else
61
- echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
62
- exit 1
63
- fi
@@ -1,38 +0,0 @@
1
- # blast.pbs pipeline
2
- # Step 01 : Initialize input files
3
-
4
- # 00. Read configuration
5
- cd $SCRATCH ;
6
- TASK="dry" ;
7
- source "$PDIR/RUNME.bash" ;
8
- echo "$PBS_JOBID" > "$SCRATCH/success/01.00" ;
9
-
10
- if [[ ! -e "$SCRATCH/success/01.01" ]] ; then
11
- # 01. BEGIN
12
- REGISTER_JOB "01" "01" "Custom BEGIN function" \
13
- && BEGIN \
14
- || exit 1 ;
15
- touch "$SCRATCH/success/01.01" ;
16
- fi
17
-
18
- if [[ ! -e "$SCRATCH/success/01.02" ]] ; then
19
- # 02. Split
20
- [[ -d "$SCRATCH/tmp/split" ]] && rm -R "$SCRATCH/tmp/split" ;
21
- REGISTER_JOB "01" "02" "Splitting query files" \
22
- && mkdir "$SCRATCH/tmp/split" \
23
- && perl "$PDIR/FastA.split.pl" "$INPUT" "$SCRATCH/tmp/split/$PROJ" "$MAX_JOBS" \
24
- || exit 1 ;
25
- touch "$SCRATCH/success/01.02" ;
26
- fi ;
27
-
28
- if [[ ! -e "$SCRATCH/success/01.03" ]] ; then
29
- # 03. Finalize
30
- REGISTER_JOB "01" "03" "Finalizing input preparation" \
31
- && mv "$SCRATCH/tmp/split" "$SCRATCH/tmp/in" \
32
- || exit 1 ;
33
- touch "$SCRATCH/success/01.03" ;
34
- fi ;
35
-
36
- [[ -d "$SCRATCH/tmp/out" ]] || ( mkdir "$SCRATCH/tmp/out" || exit 1 ) ;
37
- JOB_DONE "01" ;
38
-
@@ -1,73 +0,0 @@
1
- # blast.pbs pipeline
2
- # Step 02 : Run BLAST
3
-
4
- # Read configuration
5
- cd $SCRATCH ;
6
- TASK="dry" ;
7
- source "$PDIR/RUNME.bash" ;
8
-
9
- # 00. Initial vars
10
- ID_N=$PBS_ARRAYID
11
- [[ "$ID_N" == "" ]] && exit 1 ;
12
- [[ -e "$SCRATCH/success/02.$ID_N" ]] && exit 0 ;
13
- IN="$SCRATCH/tmp/in/$PROJ.$ID_N.fa" ;
14
- OUT="$SCRATCH/tmp/out/$PROJ.blast.$ID_N" ;
15
- FINAL_OUT="$SCRATCH/results/$PROJ.$ID_N.blast" ;
16
- if [[ -e "$SCRATCH/success/02.$ID_N.00" ]] ; then
17
- pre_job=$(cat "$SCRATCH/success/02.$ID_N.00") ;
18
- state=$(qstat -f "$pre_job" 2>/dev/null | grep job_state | sed -e 's/.*= //')
19
- if [[ "$state" == "R" ]] ; then
20
- echo "Warning: This task is already being executed by $pre_job. Aborting." >&2 ;
21
- exit 0 ;
22
- elif [[ "$state" == "" ]] ; then
23
- echo "Warning: This task was initialized by $pre_job, but it's currently not running. Superseding." >&2 ;
24
- fi ;
25
- fi
26
- echo "$PBS_JOBID" > "$SCRATCH/success/02.$ID_N.00" ;
27
-
28
- # 01. Before BLAST
29
- if [[ ! -e "$SCRATCH/success/02.$ID_N.01" ]] ; then
30
- BEFORE_BLAST "$IN" "$OUT" || exit 1 ;
31
- touch "$SCRATCH/success/02.$ID_N.01" ;
32
- fi ;
33
-
34
- # 02. Run BLAST
35
- if [[ ! -e "$SCRATCH/success/02.$ID_N.02" ]] ; then
36
- # Recover previous runs, if any
37
- if [[ -s "$OUT" ]] ; then
38
- perl "$PDIR/BlastTab.recover_job.pl" "$IN" "$OUT" \
39
- || exit 1 ;
40
- fi ;
41
- # Run BLAST
42
- RUN_BLAST "$IN" "$OUT" \
43
- && mv "$OUT" "$OUT-z" \
44
- || exit 1 ;
45
- touch "$SCRATCH/success/02.$ID_N.02" ;
46
- fi ;
47
-
48
- # 03. Collect BLAST parts
49
- if [[ ! -e "$SCRATCH/success/02.$ID_N.03" ]] ; then
50
- if [[ -e "$OUT" ]] ; then
51
- echo "Warning: The file $OUT pre-exists, but the BLAST collection was incomplete." >&2 ;
52
- echo " I'm assuming that it corresponds to the first part of the result, but you should check manually." >&2 ;
53
- echo " The last lines are:" >&2 ;
54
- tail -n 3 "$OUT" >&2 ;
55
- else
56
- touch "$OUT" || exit 1 ;
57
- fi ;
58
- for i in $(ls $OUT-*) ; do
59
- cat "$i" >> "$OUT" ;
60
- rm "$i" || exit 1 ;
61
- done ;
62
- mv "$OUT" "$FINAL_OUT"
63
- touch "$SCRATCH/success/02.$ID_N.03" ;
64
- fi ;
65
-
66
- # 04. After BLAST
67
- if [[ ! -e "$SCRATCH/success/02.$ID_N.04" ]] ; then
68
- AFTER_BLAST "$IN" "$FINAL_OUT" || exit 1 ;
69
- touch "$SCRATCH/success/02.$ID_N.04" ;
70
- fi ;
71
-
72
- touch "$SCRATCH/success/02.$ID_N" ;
73
-
@@ -1,21 +0,0 @@
1
- # blast.pbs pipeline
2
- # Step 03 : Finalize
3
-
4
- # Read configuration
5
- cd $SCRATCH ;
6
- TASK="dry" ;
7
- source "$PDIR/RUNME.bash" ;
8
- PREFIX="$SCRATCH/results/$PROJ" ;
9
- OUT="$SCRATCH/$PROJ.blast" ;
10
- echo "$PBS_JOBID" > "$SCRATCH/success/02.00" ;
11
-
12
- # 01. END
13
- if [[ ! -e "$SCRATCH/success/03.01" ]] ; then
14
- REGISTER_JOB "03" "01" "Custom END function" \
15
- && END "$PREFIX" "$OUT" \
16
- || exit 1 ;
17
- touch "$SCRATCH/success/03.01" ;
18
- fi ;
19
-
20
- JOB_DONE "03" ;
21
-
@@ -1,72 +0,0 @@
1
- #!/usr/bin/perl
2
-
3
- use warnings;
4
- use strict;
5
- use File::Copy;
6
-
7
- my($fasta, $blast) = @ARGV;
8
-
9
- ($fasta and $blast) or die "
10
- .USAGE:
11
- $0 query.fa blast.txt
12
-
13
- query.fa Query sequences in FastA format.
14
- blast.txt Incomplete BLAST output in tabular format.
15
-
16
- ";
17
-
18
- print "Fixing $blast:\n";
19
- my $blast_res;
20
- for(my $i=0; 1; $i++){
21
- $blast_res = "$blast-$i";
22
- last unless -e $blast_res;
23
- }
24
- open BLAST, "<", $blast or die "Cannot read the file: $blast: $!\n";
25
- open TMP, ">", "$blast-tmp" or die "Cannot create the file: $blast-tmp: $!\n";
26
- my $last="";
27
- my $last_id="";
28
- my $before = "";
29
- while(my $ln=<BLAST>){
30
- chomp $ln;
31
- last unless $ln =~ m/(.+?)\t/;
32
- my $id = $1;
33
- if($id eq $last_id){
34
- $last.= $ln."\n";
35
- }else{
36
- print TMP $last if $last;
37
- $before = $last_id;
38
- $last = $ln."\n";
39
- $last_id = $id;
40
- }
41
- }
42
- close BLAST;
43
- close TMP;
44
-
45
- move "$blast-tmp", $blast_res or die "Cannot move file $blast-tmp into $blast_res: $!\n";
46
- unlink $blast or die "Cannot delete file: $blast: $!\n";
47
-
48
- unless($before eq ""){
49
- print "[$before] ";
50
- $before = ">$before";
51
-
52
- open FASTA, "<", $fasta or die "Cannot read file: $fasta: $!\n";
53
- open TMP, ">", "$fasta-tmp" or die "Cannot create file: $fasta-tmp: $!\n";
54
- my $print = 0;
55
- my $at = 0;
56
- my $i = 0;
57
- while(my $ln=<FASTA>){
58
- $i++;
59
- $print = 1 if $at and $ln =~ /^>/;
60
- print TMP $ln if $print;
61
- $ln =~ s/\s+.*//;
62
- chomp $ln;
63
- $at = $i if $ln eq $before;
64
- }
65
- close TMP;
66
- close FASTA;
67
- printf 'recovered at %.2f%% (%d/%d).'."\n", 100*$at/$i, $at, $i if $i;
68
-
69
- move $fasta, "$fasta.old" or die "Cannot move file $fasta into $fasta.old: $!\n";
70
- move "$fasta-tmp", $fasta or die "Cannot move file $fasta-tmp into $fasta: $!\n";
71
- }
72
-