miga-base 1.2.17.1 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  4. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  5. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  6. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  7. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  8. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  9. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  10. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  11. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  12. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  13. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  14. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  15. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  16. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  17. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  18. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  19. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  21. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  22. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  23. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  24. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  25. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  26. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  27. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  28. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  30. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  31. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  32. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  48. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  50. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  51. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  52. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  53. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  54. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  55. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  56. metadata +3 -37
  57. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  58. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  59. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  62. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  63. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  66. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  67. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  68. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  69. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  70. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  71. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  72. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  73. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  74. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  75. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  76. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  77. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  78. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  79. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  80. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  81. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  85. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  86. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  87. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  88. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  89. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  90. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  91. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -1,98 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### VARIABLES
4
- # Queue and resources.
5
- QUEUE="iw-shared-6" ;
6
- MAX_JOBS=500 ; # Maximum number of concurrent jobs. Never exceed 1990.
7
- PPN=2 ;
8
- RAM="9gb" ;
9
-
10
- # Paths
11
- SCRATCH_DIR="$HOME/scratch/pipelines/blast" ; # Where the outputs and temporals will be created
12
- INPUT="$HOME/data/my-large-file.fasta" ; # Input query file
13
- DB="$HOME/data/db/nr" ; # Input database
14
- PROGRAM="blastp" ;
15
-
16
- # Pipeline
17
- MAX_TRIALS=5 ; # Maximum number of automated attempts to re-start a job
18
-
19
- ##################### FUNCTIONS
20
- ## All the functions below can be edited to suit your particular job.
21
- ## No function can be empty, but you can use a "dummy" function (like true).
22
- ## All functions have access to any of the variables defined above.
23
- ##
24
- ## The functions are executed in the following order (from left to right):
25
- ##
26
- ## / -----> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST ---\
27
- ## / ··· ··· ··· \
28
- ## BEGIN --#--------> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST -----#---> END
29
- ## \ ··· ··· ··· /
30
- ## \ -----> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST ---/
31
- ##
32
-
33
- # Function to execute ONLY ONCE at the begining
34
- function BEGIN {
35
- ### Format the database (assuming proteins, check commands):
36
- # module load ncbi_blast/2.2.25 || exit 1 ;
37
- # makeblastdb -in $HOME/data/some-database.faa -title $DB -dbtype prot || exit 1 ;
38
- # module unload ncbi_blast/2.2.25 || exit 1 ;
39
- ### Don't do anything:
40
- true ;
41
- }
42
-
43
- # Function to execute BEFORE running the BLAST, for each sub-task.
44
- function BEFORE_BLAST {
45
- local IN=$1 # Query file
46
- local OUT=$2 # Blast file (to be created)
47
- ### Don't do anything:
48
- true ;
49
- }
50
-
51
- # Function that executes BLAST, for each sub-task
52
- function RUN_BLAST {
53
- local IN=$1 # Query file
54
- local OUT=$2 # Blast file (to be created)
55
- ### Run BLAST+ with 13th and 14th columns (query length and subject length):
56
- module load ncbi_blast/2.2.28_binary || exit 1 ;
57
- $PROGRAM -query $IN -db $DB -out $OUT -num_threads $PPN \
58
- -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen" \
59
- || exit 1 ;
60
- module unload ncbi_blast/2.2.28_binary || exit 1 ;
61
- ### Run BLAT (nucleotides)
62
- # module load blat/rhel6 || exit 1 ;
63
- # blat $DB $IN -out=blast8 $OUT || exit 1 ;
64
- # module unload blat/rhel6 || exit 1 ;
65
- ### Run BLAT (proteins)
66
- # module load blat/rhel6 || exit 1 ;
67
- # blat $DB $IN -out=blast8 -prot $OUT || exit 1 ;
68
- # module unload blat/rhel6 || exit 1 ;
69
- }
70
-
71
- # Function to execute AFTER running the BLAST, for each sub-task
72
- function AFTER_BLAST {
73
- local IN=$1 # Query files
74
- local OUT=$2 # Blast files
75
- ### Filter by best-match:
76
- # sort $OUT | perl $PDIR/../../Scripts/BlastTab.best_hit_sorted.pl > $OUT.bm
77
- ### Filter by Bit-score 60:
78
- # awk '$12>=60' $OUT > $OUT.bs60
79
- ### Filter by corrected identity 95 (only if it has the additional 13th column):
80
- # awk '$3*$4/$13 >= 95' $OUT > $OUT.ci95
81
- ### Don't do anything:
82
- true ;
83
- }
84
-
85
- # Function to execute ONLY ONCE at the end, to concatenate the results
86
- function END {
87
- local PREFIX=$1 # Prefix of all Blast files
88
- local OUT=$2 # Single Blast output (to be created).
89
- ### Simply concatenate files:
90
- # cat $PREFIX.*.blast > $OUT
91
- ### Concatenate only the filtered files (if filtering in AFTER_BLAST):
92
- # cat $PREFIX.*.blast.bs60 > $OUT
93
- ### Sort the BLAST by query (might require considerable RAM):
94
- # sort -k 1 $PREFIX.*.blast > $OUT
95
- ### Don't do anyhthing:
96
- true ;
97
- }
98
-
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.split.pl
@@ -1,127 +0,0 @@
1
- @author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
2
-
3
- @update: Feb-20-2014
4
-
5
- @license: artistic 2.0
6
-
7
- @status: auto
8
-
9
- @pbs: yes
10
-
11
- # IMPORTANT
12
-
13
- This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
14
- are free to use it in other platforms with adequate adjustments.
15
-
16
- # PURPOSE
17
-
18
- Simplifies submitting and tracking large BLAST jobs in cluster.
19
-
20
- # HELP
21
-
22
- 1. Files preparation:
23
-
24
- 1.1. Obtain the enveomics package in the cluster. You can use: `git clone https://github.com/lmrodriguezr/enveomics.git`
25
-
26
- 1.2. Prepare the query sequences and the database.
27
-
28
- 1.3. Copy the file `CONFIG.mock.bash` to `CONFIG.<name>.bash`, where `<name>` is a
29
- short name for your project (avoid characters other than alphanumeric).
30
-
31
- 1.4. Change the variables in `CONFIG.<name>.bash`. The **Queue and resources** and the
32
- **Pipeline** variables are very standard, and can be kept unchanged. The **Paths**
33
- variables indicate where your input files are and where the output files are to
34
- be created, so check them carefully. Finally, the **FUNCTIONS** define the core
35
- functionality of the pipeline, and should also be reviewed. By default, the
36
- Pipeline simply runs BLAST+, with default parameters and tabular output with two
37
- extra columns (qlen and slen). However, additional functionality can easily be
38
- incorporated via these functions, such as BLAST filtering, concatenation, sorting,
39
- or even execution of other programs instead of BLAST, such as BLAT, etc. Note that
40
- the output MUST be BLAST-like tabular, because this is the only format supported
41
- to check completeness and recover incomplete runs.
42
-
43
- 2. Pipeline execution:
44
-
45
- 2.1. To initialize a run, execute: `./RUNME.bash <name> run`.
46
-
47
- 2.2. To check the status of a job, execute: `./RUNME.bash <name> check`.
48
-
49
- 2.3. To pause a run, execute: `./RUNME.bash <name> pause` (see 2.1 to resume).
50
-
51
- 2.4. To check if your CONFIG defines all required parameters, execute: `./RUNME.bash <name> dry`.
52
-
53
- 2.5. To review all the e/o files in the run, execute: `./RUNME.bash <name> eo`.
54
-
55
- 3. Finalizing:
56
-
57
- 3.1. `./RUNME.bash <name> check` will inform you if a project finished. If it finished successfully,
58
- you can review your (split) results in $SCRATCH/results. If you concatenated the results in the
59
- `END` function, you should have a file with all the results in $SCRATCH/<name>.blast.
60
-
61
- 3.2. Usually, checking the e/o files at the end is a good idea (`./RUNME.bash <name> eo`). However,
62
- bear in mind that this Pipeline can overcome several errors and is robust to most failures, so
63
- don't be alarmed at the first sight of errors.
64
-
65
- # Comments
66
-
67
- * Some scripts contained in this package are actually symlinks to files in the
68
- _Scripts_ folder. Check the existance of these files when copied to
69
- the cluster.
70
-
71
- # Troubleshooting
72
-
73
- 1. Do I really have to change directory (`cd`) to the pipeline's folder everytime I want to execute
74
- something?
75
-
76
- No. Not really. For simplicity, this file tells you to execute `./RUNME.bash`. However, you don't
77
- really have to be there, you can execute it from any location. For example, if you saved enveomics in
78
- your home directory, you can just execute `~/enveomics/blast.pbs/RUNME.bash` insted from any location
79
- in the head node.
80
-
81
- 2. When I check a project, few sub-jobs are Active for much longer than the others. How do I know if those
82
- sub-jobs are really active?
83
-
84
- Lets review an example of a problematic run. When you run `./RUNME.bash <name> check`, you see the
85
- following in the "Active jobs" section:
86
- ````
87
- Idle: 155829.shared-sched.pace.gatech.edu: 02: 00: Mon Mar 17 14:10:28 EDT 2014
88
- Sub-jobs:500 Active:4 ( 0.8% ) Eligible:0 ( 0.0% ) Blocked:0 ( 0.0% ) Completed:496 ( 99.2% )
89
- Idle: 155830.shared-sched.pace.gatech.edu: 02: 00: Mon Mar 17 14:10:28 EDT 2014
90
-
91
- Running jobs: 0.
92
- Idle jobs: 2.
93
- ````
94
- That means that the job "155829.shared-sched.pace.gatech.edu" has four Active jobs, while all the others are Completed. This is
95
- a sign of something problematic. You can see the complete status of each array using
96
- `checkjob -v <JOB_NAME>`. In our example above, you would run `checkjob -v 155829`. In the output
97
- of checkjob, most jobs should report "Completed". In this example, there are four jobs that are not
98
- complete:
99
- ````
100
- ...
101
- 387 : 155829[387] : Completed
102
- 388 : 155829[388] : Running
103
- 389 : 155829[389] : Running
104
- 390 : 155829[390] : Running
105
- 391 : 155829[391] : Running
106
- 392 : 155829[392] : Completed
107
- ...
108
- ````
109
- So you can simply check these sub-jobs in more detail. For example, if I run `checkjob -v 155829[388]`,
110
- I see that the job is running in the machine `iw-k30-12.pace.gatech.edu` (Task Distribution), so I can try
111
- to login to that machine to check if the job is actually running, using `top -u $(whoami)`. However, when
112
- I run `ssh iw-k30-12`, I got a "Connection closed" error, which means that the machine hung up. At this point,
113
- you might want to try one of the following solutions:
114
-
115
- 2.1. Pause the project using `./RUNME.bash <name> pause`, wait a few minutes, and resume using
116
- `./RUNME.bash <name> run`. If you tried this a couple of times and you still have sub-jobs hanging, try:
117
-
118
- 2.2. Check if your sub-jobs finished. Sometimes sub-jobs die too soon to return a success code, but they actually
119
- finished. Just run the following command: `ls <SCRATCH>/<name>/success/02.* | wc -l`, where `<SCRATCH>` is the
120
- value you set for the `SCRATCH` variable in the CONFIG file, and `<name>` is the name of your project. If the
121
- output of that command is a number, and that number is exactly six times the number of jobs (`MAX_JOBS` in the
122
- CONFIG file, typically 500), then your step 2 actually finished. In my case, I have 500 jobs, and the output
123
- was 3000, so my job finished successfully, but the pipeline didn't notice. You can manually tell the pipeline
124
- to go on running: `touch <SCRATCH>/<name>/success/02`, and pausing/resuming the project (see 2.1 above). If
125
- the output is not the expected number (in my case, 3000, which is 6*500), DON'T RUN `touch`, just try the
126
- solution 2.1 above once again.
127
-
@@ -1,109 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### VARIABLES
4
- # Find the directory of the pipeline
5
- if [[ "$PDIR" == "" ]] ; then PDIR=$(dirname $(readlink -f $0)); fi ;
6
- CWD=$(pwd)
7
-
8
- # Load config
9
- if [[ "$PROJ" == "" ]] ; then PROJ="$1" ; fi
10
- if [[ "$TASK" == "" ]] ; then TASK="$2" ; fi
11
- if [[ "$TASK" == "" ]] ; then TASK="check" ; fi
12
- NAMES=$(ls $PDIR/CONFIG.*.bash | sed -e 's/.*CONFIG\./ o /' | sed -e 's/\.bash//');
13
- if [[ "$PROJ" == "" ]] ; then
14
- if [[ "$HELP" == "" ]] ; then
15
- echo "
16
- Usage:
17
- $0 name task
18
-
19
- name The name of the run. CONFIG.name.bash must exist.
20
- task The action to perform. One of:
21
- o run: Executes the BLAST.
22
- o check: Indicates the progress of the task (default).
23
- o pause: Cancels running jobs (resume using run).
24
- o dry: Checks that the parameters are correct, but doesn't run.
25
- o eo: Review all eo files produced in the project.
26
-
27
- See $PDIR/README.md for more information.
28
-
29
- Available names are:
30
- $NAMES
31
- " >&2
32
- else
33
- echo "$HELP
34
- Available names are:
35
- $NAMES
36
- " >&2
37
- fi
38
- exit 1
39
- fi
40
- if [[ ! -e "$PDIR/CONFIG.$PROJ.bash" ]] ; then
41
- echo "$0: Error: Impossible to find $PDIR/CONFIG.$PROJ.bash, available names are:
42
- $NAMES" >&2
43
- exit 1
44
- fi
45
- source "$PDIR/CONFIG.$PROJ.bash" ;
46
- SCRATCH="$SCRATCH_DIR/$PROJ" ;
47
- MINVARS="PDIR=$PDIR,SCRATCH=$SCRATCH,PROJ=$PROJ" ;
48
- case $QUEUE in
49
- bioforce-6)
50
- MAX_H=120 ;;
51
- iw-shared-6)
52
- MAX_H=12 ;;
53
- micro-largedata)
54
- MAX_H=120 ;;
55
- biocluster-6 | biohimem-6 | microcluster)
56
- MAX_H=240 ;;
57
- *)
58
- echo "Unrecognized queue: $QUEUE." >&2 ;
59
- exit 1 ;;
60
- esac ;
61
-
62
- ##################### FUNCTIONS
63
- function REGISTER_JOB {
64
- local STEP=$1
65
- local SUBSTEP=$2
66
- local MESSAGE=$3
67
- local JOBID=$4
68
-
69
- if [[ "$JOBID" != "" ]] ; then
70
- MESSAGE="$MESSAGE [$JOBID]" ;
71
- echo "$STEP: $SUBSTEP: $(date)" >> "$SCRATCH/log/active/$JOBID" ;
72
- fi
73
- echo "$MESSAGE." >> "$SCRATCH/log/status/$STEP" ;
74
- }
75
-
76
- function LAUNCH_JOB {
77
- local STEP=$1
78
- local SUBSTEP=$2
79
- local MESSAGE=$3
80
- local BASHFILE=$4
81
-
82
- cd "$SCRATCH/log/eo" ;
83
- date >> "$SCRATCH/etc/trials" ;
84
- source "$BASHFILE" || exit 1 ;
85
- cd $CWD ;
86
- if [[ "$SENTINEL_JOBID" != "" ]] ; then
87
- REGISTER_JOB "$STEP" "$SUBSTEP" "Guarding job $NEW_JOBID" "$SENTINEL_JOBID" ;
88
- fi ;
89
- REGISTER_JOB "$STEP" "$SUBSTEP" "$MESSAGE" "$NEW_JOBID" ;
90
- echo $NEW_JOBID ;
91
- }
92
-
93
- function JOB_DONE {
94
- STEP=$1
95
-
96
- echo "Done." >> "$SCRATCH/log/status/$STEP" ;
97
- touch "$SCRATCH/success/$STEP" ;
98
- echo -n '# ' > "$SCRATCH/etc/trials" ;
99
- }
100
-
101
- ##################### RUN
102
- # Execute task
103
- if [[ ! -e "$PDIR/TASK.$TASK.bash" ]] ; then
104
- echo "Unrecognized task: $TASK." >&2 ;
105
- exit 1 ;
106
- else
107
- source "$PDIR/TASK.$TASK.bash"
108
- fi
109
-
@@ -1,128 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Check if it was sourced from RUNME.bash
5
- if [[ "$PDIR" == "" ]] ; then
6
- echo "$0: Error: This file is not stand-alone." >&2
7
- echo " Execute RUNME.bash as described in the README.txt file" >&2
8
- exit 1
9
- fi
10
-
11
- # Check if the project exists
12
- if [[ ! -d "$SCRATCH" ]] ; then
13
- echo "The project $PROJ doesn't exist at $SCRATCH_DIR." >&2
14
- echo " Execute '$PDIR/RUNME.bash $PROJ run' first." >&2
15
- exit 1
16
- fi
17
-
18
- # Get log:
19
- echo "==[ Running tasks ]=="
20
- for i in $(ls $SCRATCH/log/status/* 2>/dev/null) ; do
21
- echo " $(basename $i): $(tail -n 1 $i)"
22
- done
23
- echo ""
24
-
25
- # Get active jobs:
26
- echo "==[ Active jobs ]=="
27
- job_r=0
28
- job_i=0
29
- job_c=0
30
- for i in $(ls $SCRATCH/log/active/* 2>/dev/null) ; do
31
- jid=$(basename $i)
32
- stat=$(qstat -f1 $jid 2>&1)
33
- state=$(echo "$stat" | grep '^ *job_state = ' | sed -e 's/.*job_state = //')
34
- case $state in
35
- C)
36
- code=$(echo "$stat" | grep '^ *exit_status = ' | sed -e 's/.*exit_status = //')
37
- if [[ "$code" == "0" ]] ; then
38
- mv "$i" "$SCRATCH/log/done/"
39
- let job_c=$job_c+1
40
- else
41
- echo "Warning: Job $jid ($(cat $i|tr -d '\n')) failed with code $code." >&2
42
- echo " see errors at: $(echo "$stat" | grep '^ *Error_Path = ' | sed -e 's/.*Error_Path = //')"
43
- mv "$i" "$SCRATCH/log/failed/"
44
- fi ;;
45
- R)
46
- echo " Running: $jid: $(cat "$i")"
47
- let job_r=$job_r+1 ;;
48
- [HQW])
49
- echo " Idle: $jid: $(cat "$i")"
50
- let job_i=$job_i+1 ;;
51
- E)
52
- echo " Canceling: $jid: $(cat "$i")" ;;
53
- *)
54
- tmp_err=$(echo "$stat" | grep ERROR)
55
- if [[ "$tmp_err" == "" ]] ; then
56
- echo "Warning: Unrecognized state: $jid: $state." >&2
57
- echo " Please report this problem." >&2
58
- else
59
- echo " Error: $jid: $tmp_err"
60
- fi ;;
61
- esac
62
- #subjobs=$(echo "$stat" | grep 'Sub-jobs:' | sed -e 's/.*: *//')
63
- #if [[ "$subjobs" -gt 0 ]] ; then
64
- # echo "$stat" | grep '^ *\(Sub-jobs\|Active\|Eligible\|Blocked\|Completed\):' | sed -e 's/^ *//' | sed -e 's/ *//' | tr '\n' ' ' | sed -e 's/^/ /'
65
- # echo
66
- #fi
67
- done
68
- if [[ $job_c -gt 0 ]] ; then
69
- echo ""
70
- echo " Completed since last check: $job_c."
71
- fi
72
- if [[ $job_r -gt 0 || $job_i -gt 0 ]] ; then
73
- echo ""
74
- echo " Running jobs: $job_r."
75
- echo " Idle jobs: $job_i."
76
- fi
77
- echo ""
78
-
79
- # Auto-trials
80
- echo "==[ Auto-trials ]=="
81
- if [[ -e "$SCRATCH/etc/trials" ]] ; then
82
- trials=$(cat "$SCRATCH/etc/trials" | wc -l | sed -e 's/ //g')
83
- if [[ $trials -gt 1 ]] ; then
84
- echo " $trials trials attempted:"
85
- else
86
- echo " No recent failures in the current step, job launched:"
87
- fi
88
- cat "$SCRATCH/etc/trials" | sed -e 's/^/ o /' | sed -e 's/# $/No active trials\n/g'
89
- fi
90
- echo ""
91
-
92
- # Step-specific checks:
93
- echo "==[ Step summary ]=="
94
- todo=1
95
- if [[ -e "$SCRATCH/success/00" ]] ; then
96
- echo " Successful project initialization."
97
- if [[ -e "$SCRATCH/success/01" ]] ; then
98
- echo " Successful input preparation."
99
- if [[ -e "$SCRATCH/success/02" ]] ; then
100
- echo " Successful BLAST execution."
101
- if [[ -e "$SCRATCH/success/02" ]] ; then
102
- echo " Successful concatenation."
103
- echo " Project finished successfully!"
104
- todo=0
105
- else
106
- echo " Concatenating results."
107
- fi
108
- else
109
- echo " Running BLAST."
110
- fi
111
- else
112
- echo " Preparing input."
113
- fi
114
- else
115
- echo " Initializing project."
116
- fi
117
-
118
- if [[ "$todo" -eq 1 && $job_r -eq 0 && $job_i -eq 0 ]] ; then
119
- echo " Job currently paused. To resume, execute:"
120
- echo " $PDIR/RUNME.bash $PROJ run"
121
- fi
122
- echo
123
-
124
- # Entire log
125
- echo "==[ Complete log ]=="
126
- for i in $(ls $SCRATCH/log/status/* 2>/dev/null) ; do
127
- cat "$i" | sed -e "s/^/ $(basename $i): /"
128
- done
@@ -1,16 +0,0 @@
1
-
2
- [[ "$QUEUE" == "" ]] && echo "Undefined QUEUE" >&2 && exit 1;
3
- [[ "$MAX_JOBS" == "" ]] && echo "Undefined MAX_JOBS" >&2 && exit 1;
4
- [[ "$PPN" == "" ]] && echo "Undefined PPN" >&2 && exit 1;
5
- [[ "$RAM" == "" ]] && echo "Undefined RAM" >&2 && exit 1;
6
- [[ "$SCRATCH_DIR" == "" ]] && echo "Undefined SCRATCH_DIR" >&2 && exit 1;
7
- [[ "$INPUT" == "" ]] && echo "Undefined INPUT" >&2 && exit 1;
8
- [[ "$DB" == "" ]] && echo "Undefined DB" >&2 && exit 1;
9
- [[ "$PROGRAM" == "" ]] && echo "Undefined PROGRAM" >&2 && exit 1;
10
- [[ "$MAX_TRIALS" == "" ]] && echo "Undefined MAX_TRIALS" >&2 && exit 1;
11
- [[ "$(type -t BEGIN)" == "function" ]] || ( echo "Undefined function BEGIN" && exit 1 ) ;
12
- [[ "$(type -t BEFORE_BLAST)" == "function" ]] || ( echo "Undefined function BEFORE_BLAST" && exit 1 ) ;
13
- [[ "$(type -t RUN_BLAST)" == "function" ]] || ( echo "Undefined function RUN_BLAST" && exit 1 ) ;
14
- [[ "$(type -t AFTER_BLAST)" == "function" ]] || ( echo "Undefined function AFTER_BLAST" && exit 1 ) ;
15
- [[ "$(type -t END)" == "function" ]] || ( echo "Undefined function END" && exit 1 ) ;
16
-
@@ -1,22 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Check if it was sourced from RUNME.bash
5
- if [[ "$PDIR" == "" ]] ; then
6
- echo "$0: Error: This file is not stand-alone." >&2
7
- echo " Execute RUNME.bash as described in the README.txt file" >&2 ;
8
- exit 1 ;
9
- fi ;
10
-
11
- # Check if the project exists
12
- if [[ ! -d "$SCRATCH" ]] ; then
13
- echo "The project $PROJ doesn't exist at $SCRATCH_DIR." >&2 ;
14
- echo " Execute '$PDIR/RUNME.bash $PROJ run' first." >&2 ;
15
- exit 1 ;
16
- fi ;
17
-
18
- # Review errors
19
- (echo -e "==[ Last 10 lines of all e files ]==\nPress q to exit\n" ; tail -n 10 $SCRATCH/log/eo/*.e* ) | less
20
- # Review output
21
- (echo -e "==[ Last 100 lines of all o files ]==\nPress q to exit\n" ; tail -n 100 $SCRATCH/log/eo/*.o* ) | less
22
-
@@ -1,26 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Check if it was sourced from RUNME.bash
5
- if [[ "$PDIR" == "" ]] ; then
6
- echo "$0: Error: This file is not stand-alone." >&2
7
- echo " Execute RUNME.bash as described in the README.txt file" >&2 ;
8
- exit 1 ;
9
- fi ;
10
-
11
- # Get active jobs:
12
- echo "======[ check ]======"
13
- job_r=0;
14
- job_i=0;
15
- job_c=0;
16
-
17
- echo "======[ pause ]======"
18
- for i in $(ls $SCRATCH/log/active/* 2>/dev/null) ; do
19
- echo " Pausing $jid." ;
20
- jid=$(basename $i) ;
21
- qdel $jid ;
22
- done ;
23
-
24
- # Restart auto-trials
25
- echo -n > "$SCRATCH/etc/trials" ;
26
-
@@ -1,89 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Check if it was sourced from RUNME.bash
5
- if [[ "$PDIR" == "" ]] ; then
6
- echo "$0: Error: This file is not stand-alone. Execute RUNME.bash as described in the README.txt file" >&2
7
- exit 1
8
- fi
9
-
10
- # Check if too many auto-trials were attempted
11
- trials=0 ;
12
- if [[ -e "$SCRATCH/etc/trials" ]] ; then
13
- trials=$(cat "$SCRATCH/etc/trials" | wc -l | sed -e 's/ //g');
14
- if [[ $trials -ge $MAX_TRIALS ]] ; then
15
- echo "The maximum number of trials was already attempted, halting." >&2 ;
16
- exit 1 ;
17
- fi ;
18
- fi ;
19
-
20
- # Create the scratch directory
21
- if [[ ! -d "$SCRATCH" ]] ; then mkdir -p "$SCRATCH" || exit 1 ; fi;
22
-
23
- if [[ ! -e "$SCRATCH/success/00" ]] ; then
24
- # 00. Initialize the project
25
- echo "00. Initializing project." >&2 ;
26
- mkdir -p "$SCRATCH/tmp" "$SCRATCH/etc" "$SCRATCH/results" "$SCRATCH/success" || exit 1 ;
27
- mkdir -p "$SCRATCH/log/active" "$SCRATCH/log/done" "$SCRATCH/log/failed" || exit 1 ;
28
- mkdir -p "$SCRATCH/log/status" "$SCRATCH/log/eo" || exit 1 ;
29
- echo "Preparing structure." >> "$SCRATCH/log/status/00" ;
30
- # Build 01.bash
31
- echo "NEW_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=$MAX_H:00:00,mem=$RAM' -v '$MINVARS' -N '$PROJ-01' \\
32
- '$PDIR/01.pbs.bash'|tr -d '\\n')" \
33
- > "$SCRATCH/etc/01.bash" || exit 1 ;
34
- echo "SENTINEL_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=2:00:00' -W \"depend=afterany:\$NEW_JOBID\" \\
35
- -v \"$MINVARS,STEP=01,AFTERJOB=\$NEW_JOBID\" -N '$PROJ-01-sentinel' '$PDIR/sentinel.pbs.bash'|tr -d '\\n')" \
36
- >> "$SCRATCH/etc/01.bash" || exit 1 ;
37
- # Build 02.bash
38
- echo "NEW_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=$MAX_H:00:00,mem=$RAM,nodes=1:ppn=$PPN' \\
39
- -v '$MINVARS' -N '$PROJ-02' -t '1-$MAX_JOBS' '$PDIR/02.pbs.bash'|tr -d '\\n')" \
40
- > "$SCRATCH/etc/02.bash" \
41
- || exit 1 ;
42
- echo "SENTINEL_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=2:00:00' -W \"depend=afteranyarray:\$NEW_JOBID\" \\
43
- -v \"$MINVARS,STEP=02,AFTERJOB=\$NEW_JOBID\" -N '$PROJ-02-sentinel' '$PDIR/sentinel.pbs.bash'|tr -d '\\n')" \
44
- >> "$SCRATCH/etc/02.bash" \
45
- || exit 1 ;
46
- # Build 03.bash
47
- echo "NEW_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=$MAX_H:00:00,mem=$RAM' -v '$MINVARS' -N '$PROJ-03' \\
48
- '$PDIR/03.pbs.bash'|tr -d '\\n')" \
49
- > "$SCRATCH/etc/03.bash" || exit 1 ;
50
- echo "SENTINEL_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=2:00:00' -W \"depend=afterany:\$NEW_JOBID\" \\
51
- -v \"$MINVARS,STEP=03,AFTERJOB=\$NEW_JOBID\" -N '$PROJ-03-sentinel' '$PDIR/sentinel.pbs.bash'|tr -d '\\n')" \
52
- >> "$SCRATCH/etc/03.bash" || exit 1 ;
53
-
54
- JOB_DONE "00" ;
55
- fi ;
56
-
57
- if [[ ! -e "$SCRATCH/success/01" ]] ; then
58
- # 01. Preparing input
59
- echo "01. Preparing input." >&2 ;
60
- JOB01=$(LAUNCH_JOB "01" "00" "Preparing input files" "$SCRATCH/etc/01.bash") ;
61
- echo " New job: $JOB01." >&2 ;
62
- else
63
- if [[ ! -e "$SCRATCH/success/02" ]] ; then
64
- # 02. Launching BLAST
65
- echo "02. Launching BLAST." >&2 ;
66
- JOB02=$(LAUNCH_JOB "02" "00" "Running BLAST" "$SCRATCH/etc/02.bash") ;
67
- echo " New job: $JOB02." >&2 ;
68
- # Clean on resubmission
69
- cleaned=0
70
- echo -n " Cleaning completed sub-jobs on $JOB02: " >&2 ;
71
- for jobi in $(seq 1 $MAX_JOBS) ; do
72
- if [[ -e "$SCRATCH/success/02.$jobi" ]] ; then
73
- qdel "$JOB02""[$jobi]" &> /dev/null ;
74
- let cleaned=$cleaned+1 ;
75
- fi ;
76
- done ;
77
- echo "$cleaned sub-jobs completed." >&2 ;
78
- else
79
- if [[ ! -e "$SCRATCH/success/03" ]] ; then
80
- # 03. Finalize
81
- echo "03. Finalizing." >&2 ;
82
- JOB03=$(LAUNCH_JOB "03" "00" "Concatenating results" "$SCRATCH/etc/03.bash") ;
83
- echo " New job: $JOB03." >&2 ;
84
- else
85
- echo "Project complete, nothing to run." ;
86
- fi ;
87
- fi ;
88
- fi ;
89
-
@@ -1,29 +0,0 @@
1
- # blast.pbs pipeline
2
- # Sentinel script
3
-
4
- echo "Sentinel script after $AFTERJOB" ;
5
-
6
- # Step-specific checks
7
- if [[ "$STEP" == "02" ]] ; then
8
- # Read configuration
9
- cd $SCRATCH ;
10
- TASK="dry" ;
11
- source "$PDIR/RUNME.bash" ;
12
-
13
- # Check tasks
14
- INCOMPLETE=0;
15
- for i in $(seq 1 $MAX_JOBS) ; do
16
- if [[ ! -e "$SCRATCH/success/02.$i" ]] ; then
17
- let INCOMPLETE=$INCOMPLETE+1 ;
18
- fi ;
19
- done
20
- if [[ $INCOMPLETE -eq 0 ]] ; then
21
- JOB_DONE "02" ;
22
- else
23
- echo "$INCOMPLETE incomplete jobs, re-launching step 02." ;
24
- fi ;
25
- fi
26
-
27
- # Continue the workflow
28
- "$PDIR/RUNME.bash" "$PROJ" run || exit 1 ;
29
-