miga-base 1.2.17.1 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  4. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  5. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  6. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  7. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  8. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  9. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  10. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  11. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  12. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  13. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  14. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  15. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  16. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  17. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  18. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  19. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  21. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  22. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  23. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  24. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  25. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  26. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  27. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  28. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  30. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  31. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  32. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  48. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  50. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  51. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  52. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  53. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  54. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  55. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  56. metadata +3 -37
  57. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  58. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  59. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  62. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  63. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  66. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  67. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  68. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  69. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  70. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  71. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  72. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  73. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  74. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  75. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  76. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  77. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  78. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  79. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  80. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  81. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  85. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  86. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  87. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  88. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  89. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  90. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  91. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -7,20 +7,24 @@
7
7
  enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best, verbose, ...)
8
8
  }
9
9
  \arguments{
10
- \item{x}{\code{\link{enve.RecPlot2}} object}
10
+ \item{x}{\code{\link{enve.RecPlot2}} object.}
11
11
 
12
- \item{comp}{Components}
12
+ \item{comp}{Components.}
13
13
 
14
- \item{do_crit}{Function estimating the criterion}
14
+ \item{do_crit}{Function estimating the criterion.}
15
15
 
16
- \item{best}{Best solution thus far}
16
+ \item{best}{Best solution thus far.}
17
17
 
18
- \item{verbose}{If verbose}
18
+ \item{verbose}{If verbose.}
19
19
 
20
- \item{...}{Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}}
20
+ \item{...}{Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}.}
21
+ }
22
+ \value{
23
+ Updated solution with the same structure as \code{best}.
21
24
  }
22
25
  \description{
23
- Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
26
+ Internal ancillary function (see
27
+ \code{\link{enve.recplot2.findPeaks.emauto}}).
24
28
  }
25
29
  \author{
26
30
  Luis M. Rodriguez-R [aut, cre]
@@ -44,8 +44,12 @@ enve.recplot2.findPeaks.__mow_one(
44
44
 
45
45
  \item{log}{If log-transformed depths}
46
46
  }
47
+ \value{
48
+ Return an \code{enve.RecPlot2.Peak} object.
49
+ }
47
50
  \description{
48
- Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
51
+ Internal ancillary function (see
52
+ \code{\link{enve.recplot2.findPeaks.mower}}).
49
53
  }
50
54
  \author{
51
55
  Luis M. Rodriguez-R [aut, cre]
@@ -9,6 +9,9 @@ enve.recplot2.findPeaks.__mower(peaks.opts)
9
9
  \arguments{
10
10
  \item{peaks.opts}{List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}}
11
11
  }
12
+ \value{
13
+ A list of \code{enve.RecPlot2.Peak} objects.
14
+ }
12
15
  \description{
13
16
  Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
14
17
  }
@@ -9,8 +9,8 @@ enve.recplot2.findPeaks.mower(
9
9
  min.points = 10,
10
10
  quant.est = c(0.002, 0.998),
11
11
  mlv.opts = list(method = "parzen"),
12
- fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start =
13
- list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
12
+ fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start
13
+ = list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
14
14
  fitdist.opts.norm = list(distr = "norm", method = "qme", probs = c(0.4, 0.6), start =
15
15
  list(sd = 1), lower = c(0, -Inf)),
16
16
  rm.top = 0.05,
@@ -14,9 +14,9 @@ enve.recplot2.windowDepthThreshold(
14
14
  \arguments{
15
15
  \item{rp}{Recruitment plot, an \code{\link{enve.RecPlot2}} object.}
16
16
 
17
- \item{peak}{Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
18
- list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
19
- used (see \code{\link{enve.recplot2.corePeak}}).}
17
+ \item{peak}{Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
18
+ be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
19
+ peak is used (see \code{\link{enve.recplot2.corePeak}}).}
20
20
 
21
21
  \item{lower.tail}{If \code{FALSE}, it returns windows significantly above the peak in
22
22
  sequencing depth.}
@@ -50,8 +50,8 @@ It must be a matrix or matrix-coercible object, with samples as rows and
50
50
  dimensions as columns.}
51
51
 
52
52
  \item{pre.tribs}{Optional. If passed, the points are recovered from this object (except if
53
- \code{points} is also passed. This should be an \code{\link{enve.TRIBS}} object
54
- estimated on the same objects (the selection is unimportant).}
53
+ \code{points} is also passed. This should be an \code{\link{enve.TRIBS}}
54
+ object estimated on the same objects (the selection is unimportant).}
55
55
  }
56
56
  \value{
57
57
  Returns an \code{\link{enve.TRIBS}} object.
@@ -70,6 +70,9 @@ model?}
70
70
 
71
71
  \item{...}{Any other graphic parameters.}
72
72
  }
73
+ \value{
74
+ No return value.
75
+ }
73
76
  \description{
74
77
  Plots an \code{\link{enve.GrowthCurve}} object.
75
78
  }
@@ -38,6 +38,9 @@ as areas, and plots the outliers as points.}
38
38
 
39
39
  \item{...}{Any additional parameters supported by \code{plot}.}
40
40
  }
41
+ \value{
42
+ No return value.
43
+ }
41
44
  \description{
42
45
  Plot an \code{\link{enve.TRIBS}} object.
43
46
  }
@@ -19,9 +19,9 @@
19
19
  \arguments{
20
20
  \item{x}{\code{\link{enve.TRIBStest}} object to plot.}
21
21
 
22
- \item{type}{What to plot. \code{overlap} generates a plot of the two contrasting empirical
23
- PDFs (to compare against each other), \code{difference} produces a plot of the
24
- differences between the empirical PDFs (to compare against zero).}
22
+ \item{type}{What to plot. \code{overlap} generates a plot of the two contrasting
23
+ empirical PDFs (to compare against each other), \code{difference} produces a
24
+ plot of the differences between the empirical PDFs (to compare against zero).}
25
25
 
26
26
  \item{col}{Main color of the plot if type=\code{difference}.}
27
27
 
@@ -37,6 +37,9 @@ differences between the empirical PDFs (to compare against zero).}
37
37
 
38
38
  \item{...}{Any other graphical arguments.}
39
39
  }
40
+ \value{
41
+ No return value.
42
+ }
40
43
  \description{
41
44
  Plots an \code{\link{enve.TRIBStest}} object.
42
45
  }
@@ -17,8 +17,8 @@
17
17
  id.lim = range(x$id.breaks),
18
18
  pos.lim = range(x$pos.breaks),
19
19
  pos.units = c("Mbp", "Kbp", "bp"),
20
- mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4, 1)
21
- + 0.1, `3` = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1, `4` =
20
+ mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4,
21
+ 1) + 0.1, `3` = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1, `4` =
22
22
  c(ifelse(any(layout == 1), 1, 5), ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1, `5` =
23
23
  c(5, 3, 4, 1) + 0.1, `6` = c(5, 4, 4, 2) + 0.1),
24
24
  pos.splines = 0,
@@ -11,6 +11,9 @@
11
11
 
12
12
  \item{...}{No additional parameters are currently supported.}
13
13
  }
14
+ \value{
15
+ No return value.
16
+ }
14
17
  \description{
15
18
  Summary of an \code{\link{enve.GrowthCurve}} object.
16
19
  }
@@ -11,6 +11,9 @@
11
11
 
12
12
  \item{...}{No additional parameters are currently supported.}
13
13
  }
14
+ \value{
15
+ No return value.
16
+ }
14
17
  \description{
15
18
  Summary of an \code{\link{enve.TRIBS}} object.
16
19
  }
@@ -11,6 +11,9 @@
11
11
 
12
12
  \item{...}{No additional parameters are currently supported.}
13
13
  }
14
+ \value{
15
+ No return value.
16
+ }
14
17
  \description{
15
18
  Summary of an \code{\link{enve.TRIBStest}} object.
16
19
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.17.1
4
+ version: 1.2.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-14 00:00:00.000000000 Z
11
+ date: 2023-02-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -315,41 +315,6 @@ files:
315
315
  - utils/enveomics/Manifest/categories.json
316
316
  - utils/enveomics/Manifest/examples.json
317
317
  - utils/enveomics/Manifest/tasks.json
318
- - utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash
319
- - utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl
320
- - utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl
321
- - utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl
322
- - utils/enveomics/Pipelines/assembly.pbs/README.md
323
- - utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash
324
- - utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash
325
- - utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash
326
- - utils/enveomics/Pipelines/assembly.pbs/RUNME.bash
327
- - utils/enveomics/Pipelines/assembly.pbs/kSelector.R
328
- - utils/enveomics/Pipelines/assembly.pbs/newbler.pbs
329
- - utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl
330
- - utils/enveomics/Pipelines/assembly.pbs/soap.pbs
331
- - utils/enveomics/Pipelines/assembly.pbs/stats.pbs
332
- - utils/enveomics/Pipelines/assembly.pbs/velvet.pbs
333
- - utils/enveomics/Pipelines/blast.pbs/01.pbs.bash
334
- - utils/enveomics/Pipelines/blast.pbs/02.pbs.bash
335
- - utils/enveomics/Pipelines/blast.pbs/03.pbs.bash
336
- - utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl
337
- - utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash
338
- - utils/enveomics/Pipelines/blast.pbs/FastA.split.pl
339
- - utils/enveomics/Pipelines/blast.pbs/README.md
340
- - utils/enveomics/Pipelines/blast.pbs/RUNME.bash
341
- - utils/enveomics/Pipelines/blast.pbs/TASK.check.bash
342
- - utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash
343
- - utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash
344
- - utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash
345
- - utils/enveomics/Pipelines/blast.pbs/TASK.run.bash
346
- - utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash
347
- - utils/enveomics/Pipelines/idba.pbs/README.md
348
- - utils/enveomics/Pipelines/idba.pbs/RUNME.bash
349
- - utils/enveomics/Pipelines/idba.pbs/run.pbs
350
- - utils/enveomics/Pipelines/trim.pbs/README.md
351
- - utils/enveomics/Pipelines/trim.pbs/RUNME.bash
352
- - utils/enveomics/Pipelines/trim.pbs/run.pbs
353
318
  - utils/enveomics/README.md
354
319
  - utils/enveomics/Scripts/AAsubs.log2ratio.rb
355
320
  - utils/enveomics/Scripts/Aln.cat.rb
@@ -493,6 +458,7 @@ files:
493
458
  - utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv
494
459
  - utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim
495
460
  - utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec
461
+ - utils/enveomics/Tests/low-cov.bg.gz
496
462
  - utils/enveomics/Tests/phyla_counts.tsv
497
463
  - utils/enveomics/Tests/primate_lentivirus.ogs
498
464
  - utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm
@@ -1,69 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### VARIABLES
4
- # Queue: Preferred queue. Delete (or comment) this line to allow
5
- # automatic detection:
6
- #QUEUE="biocluster-6"
7
- # If you set the QUEUE variable, you MUST set the WTIME variable
8
- # as well, containing the walltime to be asked for. The WTIME
9
- # variable is ignored otherwise.
10
- WTIME="120:00:00"
11
-
12
- # Scratch: This is where the output will be created.
13
- SCRATCH="$HOME/scratch/pipelines/assembly"
14
-
15
- # Data folder: This is the folder that cointains the input files.
16
- DATA="$HOME/data/trim"
17
-
18
- # Location of Newbler's binaries
19
- BIN454="$HOME/454/bin"
20
-
21
- # Name(s) of the library(ies) to use, separated by spaces:
22
- # This is determined by the name of your input files. For example,
23
- # if your input files are: LLSEP.CoupledReads.fa and LWP.CoupledReads.fa,
24
- # use:
25
- # LIBRARIES="LLSEP LWP"
26
- # It's strongly encouraged to use only one per CONFIG file.
27
- LIBRARIES="A";
28
-
29
- # Use .CoupledReads.fa and/or .SingleReads.fa (yes or no):
30
- USECOUPLED=yes
31
- USESINGLE=no
32
-
33
- # Insert length (in bp): This is the average length of the entire insert,
34
- # not just the gap length.
35
- INSLEN=300
36
-
37
- # Number of CPUs to use (for SOAP and Newbler):
38
- PPN=16
39
-
40
- # RAM multiplier: Multiply the estimated required RAM by this number:
41
- RAMMULT=1
42
-
43
- # Maximum number of simultaneous jobs: Uncomment and increase these values if
44
- # you have increased resources (e.g., a dedicated queue); uncomment and decrease
45
- # if the resources are scarce (e.g., a very busy queue or other simultaneous jobs).
46
- #VELVETSIM=22
47
- #SOAPSIM=8
48
-
49
- # Extra parameters for Velvet: Any additional parameters to be passed to
50
- # velvetg or velveth. If you have MP data, consider adding the option
51
- # -shortMatePaired yes to VELVETG_EXTRA. If you have Nextera, consider
52
- # adding the option above, plus the option -ins_length_sd <integer>, to
53
- # indicate the standard deviation of the insert size. By default, the
54
- # SD is assumed to be 10% of the average, but Nextera produces much
55
- # wider distribution of sizes (i.e., larger SD). Typically you shouldn't
56
- # need to add anything in VELVETH_EXTRA.
57
- VELVETH_EXTRA=""
58
- VELVETG_EXTRA=""
59
-
60
- # Clean non-essential files (yes or no):
61
- CLEANUP=yes
62
-
63
- # Best k-mers: Space-delimited list of kmers selected from Velvet and SOAP.
64
- # This is to be modified at the begining of step 4, and it's ignored in all
65
- # the other steps.
66
- K_VELVET="21 23 35"
67
- K_SOAP="21 23 35"
68
-
69
-
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.N50.pl
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.filterN.pl
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.length.pl
@@ -1,189 +0,0 @@
1
- @author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
2
-
3
- @update: Mar-17-2013
4
-
5
- @license: artistic 2.0
6
-
7
- @status: semi
8
-
9
- @pbs: yes
10
-
11
- # IMPORTANT
12
-
13
- This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
14
- are free to use it in other platforms with adequate adjustments. It is largely
15
- based on Luo _et al._ 2012, ISME J.
16
-
17
- # PURPOSE
18
-
19
- This pipeline assemblies coupled and/or single reads from one or more libraries.
20
- It assumes that the reads have been quality-checked and trimmed.
21
-
22
- # HELP
23
-
24
- 1. Files preparation:
25
-
26
- 1.1. Copy this folder to the cluster.
27
-
28
- 1.2. Copy the sequences to the cluster. Only trimmed/filtered reads are used.
29
- All the files are expected to be in the same folder, and the filenames must
30
- end in `.CoupledReads.fa` or `.SingleReads.fa`.
31
-
32
- 1.3. Copy the file `CONFIG.mock.bash` to `CONFIG.<name>.bash`, where `<name>` is a
33
- short name for your run (avoid characters other than alphanumeric).
34
-
35
- 1.4. Change the variables in `CONFIG.<name>.bash`. Notice that this pipeline
36
- supports running several libraries at the same time, but it's strongly
37
- recomended to run only one per config file, because the insert length
38
- (in step 2) and the selected k-mers (in step 3) are fixed for all the
39
- included libraries. Also, there is a technical consideration: The first
40
- step will execute parallel jobs for each odd number between 21 and 63, and
41
- SOAP will use 16 CPUs by default, which means 357 CPUs will be requested
42
- per library in step 2. It's a bad idea to run many libraries at the same
43
- time.
44
-
45
- 1.5. If you have Mate-paired datasets (for example, prepared with Nextera), first
46
- reverse-complement all the reads. See also the `VELVETG_EXTRA` variable in
47
- the `CONFIG.<name>.bash` file.
48
-
49
- 2. Velvet and SOAP assembly:
50
-
51
- 2.1. Execute `./RUNME-2.bash <name>` in the head node (see [troubleshooting](#troubleshooting) #1).
52
-
53
- 2.2. Monitor the tasks named velvet_* and soap_*.
54
-
55
- 2.3. Once completed, make sure the files .proc contain only the
56
- word "done". To do this, you may execute:
57
- ```
58
- grep -v '^done$' *.proc
59
- ```
60
-
61
- If successful, the output of the above command should be empty. See
62
- [Troubleshooting](#troubleshooting) #2 and #3 below if one or more of your jobs failed.
63
-
64
- 3. K-mers selection:
65
-
66
- 3.1. If you completed step 2, execute `./RUNME-3.bash <name>` in the head
67
- node.
68
-
69
- 3.2. Once completed, download and open the files `*.n50.pdf`.
70
-
71
- 3.3. Select the three "best" k-mers for Velvet and for SOAP (they don't
72
- have to be the same). There is no well-tested method to select the
73
- "best", and this is why this protocol is not automated, but semi-
74
- automated. A generally good rule-of-thumb is: pick one that optimizes
75
- the amount of sequences used (these are the grey bars in the plot;
76
- usually this is the smallest k-mer), pick one that optimizes the N50
77
- (this is the dashed red line; usually this is a large k-mer), and pick
78
- one that optimizes both (something in the middle). You can select
79
- more or less than three k-mers, this is just a suggestion.
80
-
81
- 4. Newbler assembly:
82
-
83
- 4.1. Edit the file `CONFIG.<name>.bash`: set the variables `K_VELVET` and
84
- `K_SOAP` to contain the lists of "best" selected k-mers for Velvet and
85
- SOAP, respectively.
86
-
87
- 4.2. Execute `./RUNME-4.bash <name>` in the head node.
88
-
89
- 4.3. Monitor the task newbler_*. Once finished, your assembly is ready.
90
- Once completed, make sure the file .newbler.proc contain only the
91
- word "done". To do this, you may execute:
92
- ```
93
- grep -v '^done$' *.proc
94
- ```
95
- If successful, the output should be empty.
96
-
97
- 4.4. The final assembly should be located in the `SCRATCH` path, in a folder
98
- named `<lib>.newbler/assembly/`. The file `454AllContigs.fna` contains
99
- all the assembled contigs, `454LargeContigs.fna` contains the contigs
100
- with 500bp or more in length, and `454NewblerMetrics.txt` contains some
101
- relevant statistics.
102
-
103
-
104
- # Comments
105
-
106
- * Some scripts contained in this package are actually symlinks to files in the
107
- _Scripts_ folder. Check the existance of these files when copied to
108
- the cluster.
109
-
110
- # Troubleshooting
111
-
112
- 1. Do I really have to change directory (`cd`) to the pipeline's folder everytime
113
- I want to execute something?
114
-
115
- No. Not really. For simplicity, this file tells you to execute, for example,
116
- `./RUNME-2.bash`. However, you don't really have to be there, you can execute it
117
- from any location. For example, if you saved this pipeline in your home
118
- directory, you can just execute `~/assembly.pbs/RUNME-2.bash` insted from any
119
- location in the head node.
120
-
121
- 2. I executed step 2, and Velvet worked but SOAP failed (or vice versa). Can I
122
- submit only one of them?
123
-
124
- Yes. To execute only Velvet, run:
125
- ```
126
- ./RUNME-2.bash <name> velvet
127
- ```
128
-
129
- To execute only SOAP, run:
130
- ```
131
- ./RUNME-2.bash <name> soap
132
- ```
133
-
134
- 3. I ran step 2, and most of the jobs finished, but few of them failed. Can I
135
- submit only few K-mers?
136
-
137
- Yes. To execute one kmer (say, the k-mer 33 of SOAP), run:
138
- ```
139
- ./RUNME-2.bash <name> soap 33
140
- ```
141
-
142
- You can also execute more than one kmer, using a comma-separated list. For
143
- example, to re-submit the k-mers 37, 39, and 41 of Velvet, run:
144
- ```
145
- ./RUNME-2.bash <name> velvet 37,39,41
146
- ```
147
-
148
- 4. What are the numbers on the job names of step 2?
149
-
150
- The K-mer. Each k-mer has it's own job, but they are "arrayed", to simplify
151
- administration: notice that all the jobs of Velvet and all the jobs of SOAP
152
- share the same job ID.
153
-
154
- 5. Some jobs are being killed, why?
155
-
156
- 5.1. First, check the log file created by the pipeline. The name is typically
157
- the output prefix and the .log extension. For velvet, there are two log files,
158
- the `.glog` and the `.hlog`. You may find the problem there.
159
-
160
- 5.2. Now, check the error file in your HOME directory. The name depends on the
161
- job, the library and the task. For example: `~/soap_Mg_2-37.e1999838` is the
162
- error file for step 2, task soap, library Mg_2, k-mer 37. The appending
163
- number after the 'e' is the job ID. If this file contains errors probably
164
- related to the pipeline, please let me know.
165
-
166
- 5.3. If you still have no clues, check the output file in your `HOME` directory. The
167
- name is just like the name of the error file (see #5.2 above), but with 'o'
168
- instead of 'e'. Compare the lines 'Resources' (what we asked the scheduler for)
169
- and 'Rsrc Used' (what the job actually used). A typical problem is that your
170
- job may need more RAM than we asked for (the value of 'mem' in both lines). If
171
- the RAM used is larger than the RAM requested, the scheduler probably killed
172
- your job. To solve this, just go to your config file, and set the variable
173
- RAMMULT to a number larger than 1. For example, if you want to ask for double the
174
- RAM, set `RAMMULT=2`. You can also include simple arithmetic operations, like
175
- `RAMMULT=3/2`. If you want to add a fixed ammount of RAM, in Gib, use addition.
176
- For example, to add 10G, set `RAMMULT=1+10`.
177
-
178
- 5.4. Still no idea? Try running the job again, sometimes the jobs fail with no
179
- apparent reason, but they succeed when re-submited. If your job keeps failing,
180
- please gather as much information (the log, error and output files should be
181
- enough) and let me take a look.
182
-
183
- 6. In the step 2, some k-mers keep failing, and I just want to give up on them, can I?
184
-
185
- Yes. Step 3 will analyze only completed jobs, so you can just ignore these faulty
186
- k-mers. Very small k-mers, for example, sometimes need too much memory, and very
187
- large k-mers in Velvet sometimes need too much time. If you don't think you're
188
- missing too much, just ignore them.
189
-
@@ -1,112 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### HELP
4
- HELP="
5
- Usage:
6
- $0 name[ prog[ k-mers]]
7
-
8
- name The name of the run. CONFIG.name.bash must exist.
9
- prog Program to execute. One of 'soap' or 'velvet'. By
10
- default, it executes both.
11
- k-mers Comma-separated list of k-mers to run. By default,
12
- it executes all the odd numbers between 21 and 63
13
- (inclusive).
14
-
15
- See $PDIR/README.txt for more information.
16
- "
17
- ##################### RUN
18
- # Find the directory of the pipeline
19
- PDIR=$(dirname $(readlink -f $0));
20
- # Load variables
21
- source "$PDIR/RUNME.bash"
22
- if [[ "$SCRATCH" == "" ]] ; then
23
- echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
24
- exit 1
25
- fi
26
-
27
- # Check request
28
- RUNVELVET=yes
29
- RUNSOAP=yes
30
- if [[ "$2" == "velvet" ]] ; then
31
- RUNSOAP=no
32
- elif [[ "$2" == "soap" ]] ; then
33
- RUNVELVET=no
34
- fi
35
- if [[ "$3" == "" ]] ; then
36
- KMERARRAY="21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63"
37
- else
38
- KMERARRAY=$3
39
- fi
40
- if [[ "$VELVETSIM" == "" ]] ; then
41
- VELVETSIM=22
42
- fi
43
- if [[ "$SOAPSIM" == "" ]] ; then
44
- let SOAPSIM=130/$PPN
45
- fi
46
-
47
- # Run it
48
- RAMMULT=${RAMMULT:-1}
49
- echo "Jobs being launched in $SCRATCH"
50
- for LIB in $LIBRARIES; do
51
- # Prepare info
52
- echo "Running $LIB";
53
- if [[ "$USECOUPLED" == "yes" ]] ; then
54
- INPUT="$DATA/$LIB.CoupledReads.fa"
55
- elif [[ "$USESINGLE" == "yes" ]] ; then
56
- INPUT="$DATA/$LIB.SingleReads.fa"
57
- else
58
- echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
59
- exit 1;
60
- fi
61
- VARS="LIB=$LIB,PDIR=$PDIR,DATA=$DATA,USECOUPLED=$USECOUPLED,USESINGLE=$USESINGLE"
62
- [[ -n $INSLEN ]] && VARS="$VARS,INSLEN=$INSLEN"
63
- [[ -n $VELVETG_EXTRA ]] && VARS="$VARS,VELVETG_EXTRA=$VELVETG_EXTRA"
64
- [[ -n $VELVETH_EXTRA ]] && VARS="$VARS,VELVETH_EXTRA=$VELVETH_EXTRA"
65
- [[ -n $CLEANUP ]] && VARS="$VARS,CLEANUP=$CLEANUP"
66
- let SIZE=$(ls -lH "$INPUT" | awk '{print $5}')/1024/1024/1024;
67
- let RAMS=40+$SIZE*10*$RAMMULT;
68
- let RAMV=50+$SIZE*15*$RAMMULT;
69
- # Launch Velvet
70
- if [[ "$RUNVELVET" == "yes" ]] ; then
71
- NAME="velvet_${LIB}"
72
- if [[ "$QUEUE" != "" ]]; then
73
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
74
- -l mem=${RAMV}gb -l "walltime=$WTIME" -q "$QUEUE" \
75
- -t "$KMERARRAY%$VELVETSIM"
76
- elif [[ $RAMV -gt 150 ]]; then
77
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
78
- -l mem=${RAMV}gb -l walltime=360:00:00 -q biohimem-6 \
79
- -t "$KMERARRAY%$VELVETSIM"
80
- elif [[ $SIZE -lt 6 ]]; then
81
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
82
- -l mem=${RAMV}gb -l walltime=12:00:00 -q iw-shared-6 \
83
- -t "$KMERARRAY%$VELVETSIM"
84
- elif [[ $SIZE -lt 20 ]]; then
85
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
86
- -l mem=${RAMV}gb -l walltime=120:00:00 -q bioforce-6 \
87
- -t "$KMERARRAY%$VELVETSIM"
88
- else
89
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
90
- -l mem=${RAMV}gb -l walltime=360:00:00 -q biocluster-6 \
91
- -t "$KMERARRAY%$VELVETSIM"
92
- fi
93
- fi
94
- # Launch SOAP
95
- if [[ "$RUNSOAP" == "yes" ]] ; then
96
- NAME="soap_${LIB}"
97
- if [[ "$QUEUE" != "" ]]; then
98
- qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
99
- -l mem=${RAMS}gb -l walltime=$WTIME -q $QUEUE -l nodes=1:ppn=$PPN \
100
- -t "$KMERARRAY%$SOAPSIM"
101
- elif [[ $RAMS -gt 150 ]]; then
102
- qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
103
- -l mem=${RAMS}gb -l walltime=48:00:00 -q biohimem-6 \
104
- -l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
105
- else
106
- qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
107
- -l mem=${RAMS}gb -l walltime=12:00:00 -q iw-shared-6 \
108
- -l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
109
- fi
110
- fi
111
- done
112
-
@@ -1,23 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Find the directory of the pipeline
5
- PDIR=$(dirname $(readlink -f $0));
6
- # Load variables
7
- source "$PDIR/RUNME.bash"
8
- if [[ "$SCRATCH" == "" ]] ; then
9
- echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
10
- exit 1
11
- fi
12
-
13
- # Run it
14
- echo "Jobs being launched in $SCRATCH"
15
- for LIB in $LIBRARIES; do
16
- # Prepare info
17
- echo "Running $LIB";
18
- VARS="LIB=$LIB,PDIR=$PDIR"
19
- # Launch Stats
20
- NAME="N50_${LIB}"
21
- qsub "$PDIR/stats.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME"
22
- done
23
-