miga-base 1.2.17.1 → 1.2.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +2 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
- data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
- data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
- data/utils/enveomics/Scripts/SRA.download.bash +14 -2
- data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
- data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
- data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
- data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
- data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
- data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
- data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
- data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
- data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
- data/utils/enveomics/enveomics.R/R/utils.R +30 -20
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
- metadata +3 -37
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
|
@@ -7,20 +7,24 @@
|
|
|
7
7
|
enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best, verbose, ...)
|
|
8
8
|
}
|
|
9
9
|
\arguments{
|
|
10
|
-
\item{x}{\code{\link{enve.RecPlot2}} object}
|
|
10
|
+
\item{x}{\code{\link{enve.RecPlot2}} object.}
|
|
11
11
|
|
|
12
|
-
\item{comp}{Components}
|
|
12
|
+
\item{comp}{Components.}
|
|
13
13
|
|
|
14
|
-
\item{do_crit}{Function estimating the criterion}
|
|
14
|
+
\item{do_crit}{Function estimating the criterion.}
|
|
15
15
|
|
|
16
|
-
\item{best}{Best solution thus far}
|
|
16
|
+
\item{best}{Best solution thus far.}
|
|
17
17
|
|
|
18
|
-
\item{verbose}{If verbose}
|
|
18
|
+
\item{verbose}{If verbose.}
|
|
19
19
|
|
|
20
|
-
\item{...}{Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}}
|
|
20
|
+
\item{...}{Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}.}
|
|
21
|
+
}
|
|
22
|
+
\value{
|
|
23
|
+
Updated solution with the same structure as \code{best}.
|
|
21
24
|
}
|
|
22
25
|
\description{
|
|
23
|
-
Internal ancillary function (see
|
|
26
|
+
Internal ancillary function (see
|
|
27
|
+
\code{\link{enve.recplot2.findPeaks.emauto}}).
|
|
24
28
|
}
|
|
25
29
|
\author{
|
|
26
30
|
Luis M. Rodriguez-R [aut, cre]
|
|
@@ -44,8 +44,12 @@ enve.recplot2.findPeaks.__mow_one(
|
|
|
44
44
|
|
|
45
45
|
\item{log}{If log-transformed depths}
|
|
46
46
|
}
|
|
47
|
+
\value{
|
|
48
|
+
Return an \code{enve.RecPlot2.Peak} object.
|
|
49
|
+
}
|
|
47
50
|
\description{
|
|
48
|
-
|
|
51
|
+
Internal ancillary function (see
|
|
52
|
+
\code{\link{enve.recplot2.findPeaks.mower}}).
|
|
49
53
|
}
|
|
50
54
|
\author{
|
|
51
55
|
Luis M. Rodriguez-R [aut, cre]
|
|
@@ -9,6 +9,9 @@ enve.recplot2.findPeaks.__mower(peaks.opts)
|
|
|
9
9
|
\arguments{
|
|
10
10
|
\item{peaks.opts}{List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}}
|
|
11
11
|
}
|
|
12
|
+
\value{
|
|
13
|
+
A list of \code{enve.RecPlot2.Peak} objects.
|
|
14
|
+
}
|
|
12
15
|
\description{
|
|
13
16
|
Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
|
|
14
17
|
}
|
|
@@ -9,8 +9,8 @@ enve.recplot2.findPeaks.mower(
|
|
|
9
9
|
min.points = 10,
|
|
10
10
|
quant.est = c(0.002, 0.998),
|
|
11
11
|
mlv.opts = list(method = "parzen"),
|
|
12
|
-
fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start
|
|
13
|
-
list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
|
|
12
|
+
fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start
|
|
13
|
+
= list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
|
|
14
14
|
fitdist.opts.norm = list(distr = "norm", method = "qme", probs = c(0.4, 0.6), start =
|
|
15
15
|
list(sd = 1), lower = c(0, -Inf)),
|
|
16
16
|
rm.top = 0.05,
|
|
@@ -14,9 +14,9 @@ enve.recplot2.windowDepthThreshold(
|
|
|
14
14
|
\arguments{
|
|
15
15
|
\item{rp}{Recruitment plot, an \code{\link{enve.RecPlot2}} object.}
|
|
16
16
|
|
|
17
|
-
\item{peak}{Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
|
|
18
|
-
list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
|
|
19
|
-
used (see \code{\link{enve.recplot2.corePeak}}).}
|
|
17
|
+
\item{peak}{Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
|
|
18
|
+
be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
|
|
19
|
+
peak is used (see \code{\link{enve.recplot2.corePeak}}).}
|
|
20
20
|
|
|
21
21
|
\item{lower.tail}{If \code{FALSE}, it returns windows significantly above the peak in
|
|
22
22
|
sequencing depth.}
|
|
@@ -50,8 +50,8 @@ It must be a matrix or matrix-coercible object, with samples as rows and
|
|
|
50
50
|
dimensions as columns.}
|
|
51
51
|
|
|
52
52
|
\item{pre.tribs}{Optional. If passed, the points are recovered from this object (except if
|
|
53
|
-
\code{points} is also passed. This should be an \code{\link{enve.TRIBS}}
|
|
54
|
-
estimated on the same objects (the selection is unimportant).}
|
|
53
|
+
\code{points} is also passed. This should be an \code{\link{enve.TRIBS}}
|
|
54
|
+
object estimated on the same objects (the selection is unimportant).}
|
|
55
55
|
}
|
|
56
56
|
\value{
|
|
57
57
|
Returns an \code{\link{enve.TRIBS}} object.
|
|
@@ -19,9 +19,9 @@
|
|
|
19
19
|
\arguments{
|
|
20
20
|
\item{x}{\code{\link{enve.TRIBStest}} object to plot.}
|
|
21
21
|
|
|
22
|
-
\item{type}{What to plot. \code{overlap} generates a plot of the two contrasting
|
|
23
|
-
PDFs (to compare against each other), \code{difference} produces a
|
|
24
|
-
differences between the empirical PDFs (to compare against zero).}
|
|
22
|
+
\item{type}{What to plot. \code{overlap} generates a plot of the two contrasting
|
|
23
|
+
empirical PDFs (to compare against each other), \code{difference} produces a
|
|
24
|
+
plot of the differences between the empirical PDFs (to compare against zero).}
|
|
25
25
|
|
|
26
26
|
\item{col}{Main color of the plot if type=\code{difference}.}
|
|
27
27
|
|
|
@@ -37,6 +37,9 @@ differences between the empirical PDFs (to compare against zero).}
|
|
|
37
37
|
|
|
38
38
|
\item{...}{Any other graphical arguments.}
|
|
39
39
|
}
|
|
40
|
+
\value{
|
|
41
|
+
No return value.
|
|
42
|
+
}
|
|
40
43
|
\description{
|
|
41
44
|
Plots an \code{\link{enve.TRIBStest}} object.
|
|
42
45
|
}
|
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
id.lim = range(x$id.breaks),
|
|
18
18
|
pos.lim = range(x$pos.breaks),
|
|
19
19
|
pos.units = c("Mbp", "Kbp", "bp"),
|
|
20
|
-
mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4,
|
|
21
|
-
+ 0.1, `3` = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1, `4` =
|
|
20
|
+
mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4,
|
|
21
|
+
1) + 0.1, `3` = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1, `4` =
|
|
22
22
|
c(ifelse(any(layout == 1), 1, 5), ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1, `5` =
|
|
23
23
|
c(5, 3, 4, 1) + 0.1, `6` = c(5, 4, 4, 2) + 0.1),
|
|
24
24
|
pos.splines = 0,
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: miga-base
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.2.17.
|
|
4
|
+
version: 1.2.17.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Luis M. Rodriguez-R
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-02-
|
|
11
|
+
date: 2023-02-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: daemons
|
|
@@ -315,41 +315,6 @@ files:
|
|
|
315
315
|
- utils/enveomics/Manifest/categories.json
|
|
316
316
|
- utils/enveomics/Manifest/examples.json
|
|
317
317
|
- utils/enveomics/Manifest/tasks.json
|
|
318
|
-
- utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash
|
|
319
|
-
- utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl
|
|
320
|
-
- utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl
|
|
321
|
-
- utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl
|
|
322
|
-
- utils/enveomics/Pipelines/assembly.pbs/README.md
|
|
323
|
-
- utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash
|
|
324
|
-
- utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash
|
|
325
|
-
- utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash
|
|
326
|
-
- utils/enveomics/Pipelines/assembly.pbs/RUNME.bash
|
|
327
|
-
- utils/enveomics/Pipelines/assembly.pbs/kSelector.R
|
|
328
|
-
- utils/enveomics/Pipelines/assembly.pbs/newbler.pbs
|
|
329
|
-
- utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl
|
|
330
|
-
- utils/enveomics/Pipelines/assembly.pbs/soap.pbs
|
|
331
|
-
- utils/enveomics/Pipelines/assembly.pbs/stats.pbs
|
|
332
|
-
- utils/enveomics/Pipelines/assembly.pbs/velvet.pbs
|
|
333
|
-
- utils/enveomics/Pipelines/blast.pbs/01.pbs.bash
|
|
334
|
-
- utils/enveomics/Pipelines/blast.pbs/02.pbs.bash
|
|
335
|
-
- utils/enveomics/Pipelines/blast.pbs/03.pbs.bash
|
|
336
|
-
- utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl
|
|
337
|
-
- utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash
|
|
338
|
-
- utils/enveomics/Pipelines/blast.pbs/FastA.split.pl
|
|
339
|
-
- utils/enveomics/Pipelines/blast.pbs/README.md
|
|
340
|
-
- utils/enveomics/Pipelines/blast.pbs/RUNME.bash
|
|
341
|
-
- utils/enveomics/Pipelines/blast.pbs/TASK.check.bash
|
|
342
|
-
- utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash
|
|
343
|
-
- utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash
|
|
344
|
-
- utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash
|
|
345
|
-
- utils/enveomics/Pipelines/blast.pbs/TASK.run.bash
|
|
346
|
-
- utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash
|
|
347
|
-
- utils/enveomics/Pipelines/idba.pbs/README.md
|
|
348
|
-
- utils/enveomics/Pipelines/idba.pbs/RUNME.bash
|
|
349
|
-
- utils/enveomics/Pipelines/idba.pbs/run.pbs
|
|
350
|
-
- utils/enveomics/Pipelines/trim.pbs/README.md
|
|
351
|
-
- utils/enveomics/Pipelines/trim.pbs/RUNME.bash
|
|
352
|
-
- utils/enveomics/Pipelines/trim.pbs/run.pbs
|
|
353
318
|
- utils/enveomics/README.md
|
|
354
319
|
- utils/enveomics/Scripts/AAsubs.log2ratio.rb
|
|
355
320
|
- utils/enveomics/Scripts/Aln.cat.rb
|
|
@@ -493,6 +458,7 @@ files:
|
|
|
493
458
|
- utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv
|
|
494
459
|
- utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim
|
|
495
460
|
- utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec
|
|
461
|
+
- utils/enveomics/Tests/low-cov.bg.gz
|
|
496
462
|
- utils/enveomics/Tests/phyla_counts.tsv
|
|
497
463
|
- utils/enveomics/Tests/primate_lentivirus.ogs
|
|
498
464
|
- utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
##################### VARIABLES
|
|
4
|
-
# Queue: Preferred queue. Delete (or comment) this line to allow
|
|
5
|
-
# automatic detection:
|
|
6
|
-
#QUEUE="biocluster-6"
|
|
7
|
-
# If you set the QUEUE variable, you MUST set the WTIME variable
|
|
8
|
-
# as well, containing the walltime to be asked for. The WTIME
|
|
9
|
-
# variable is ignored otherwise.
|
|
10
|
-
WTIME="120:00:00"
|
|
11
|
-
|
|
12
|
-
# Scratch: This is where the output will be created.
|
|
13
|
-
SCRATCH="$HOME/scratch/pipelines/assembly"
|
|
14
|
-
|
|
15
|
-
# Data folder: This is the folder that cointains the input files.
|
|
16
|
-
DATA="$HOME/data/trim"
|
|
17
|
-
|
|
18
|
-
# Location of Newbler's binaries
|
|
19
|
-
BIN454="$HOME/454/bin"
|
|
20
|
-
|
|
21
|
-
# Name(s) of the library(ies) to use, separated by spaces:
|
|
22
|
-
# This is determined by the name of your input files. For example,
|
|
23
|
-
# if your input files are: LLSEP.CoupledReads.fa and LWP.CoupledReads.fa,
|
|
24
|
-
# use:
|
|
25
|
-
# LIBRARIES="LLSEP LWP"
|
|
26
|
-
# It's strongly encouraged to use only one per CONFIG file.
|
|
27
|
-
LIBRARIES="A";
|
|
28
|
-
|
|
29
|
-
# Use .CoupledReads.fa and/or .SingleReads.fa (yes or no):
|
|
30
|
-
USECOUPLED=yes
|
|
31
|
-
USESINGLE=no
|
|
32
|
-
|
|
33
|
-
# Insert length (in bp): This is the average length of the entire insert,
|
|
34
|
-
# not just the gap length.
|
|
35
|
-
INSLEN=300
|
|
36
|
-
|
|
37
|
-
# Number of CPUs to use (for SOAP and Newbler):
|
|
38
|
-
PPN=16
|
|
39
|
-
|
|
40
|
-
# RAM multiplier: Multiply the estimated required RAM by this number:
|
|
41
|
-
RAMMULT=1
|
|
42
|
-
|
|
43
|
-
# Maximum number of simultaneous jobs: Uncomment and increase these values if
|
|
44
|
-
# you have increased resources (e.g., a dedicated queue); uncomment and decrease
|
|
45
|
-
# if the resources are scarce (e.g., a very busy queue or other simultaneous jobs).
|
|
46
|
-
#VELVETSIM=22
|
|
47
|
-
#SOAPSIM=8
|
|
48
|
-
|
|
49
|
-
# Extra parameters for Velvet: Any additional parameters to be passed to
|
|
50
|
-
# velvetg or velveth. If you have MP data, consider adding the option
|
|
51
|
-
# -shortMatePaired yes to VELVETG_EXTRA. If you have Nextera, consider
|
|
52
|
-
# adding the option above, plus the option -ins_length_sd <integer>, to
|
|
53
|
-
# indicate the standard deviation of the insert size. By default, the
|
|
54
|
-
# SD is assumed to be 10% of the average, but Nextera produces much
|
|
55
|
-
# wider distribution of sizes (i.e., larger SD). Typically you shouldn't
|
|
56
|
-
# need to add anything in VELVETH_EXTRA.
|
|
57
|
-
VELVETH_EXTRA=""
|
|
58
|
-
VELVETG_EXTRA=""
|
|
59
|
-
|
|
60
|
-
# Clean non-essential files (yes or no):
|
|
61
|
-
CLEANUP=yes
|
|
62
|
-
|
|
63
|
-
# Best k-mers: Space-delimited list of kmers selected from Velvet and SOAP.
|
|
64
|
-
# This is to be modified at the begining of step 4, and it's ignored in all
|
|
65
|
-
# the other steps.
|
|
66
|
-
K_VELVET="21 23 35"
|
|
67
|
-
K_SOAP="21 23 35"
|
|
68
|
-
|
|
69
|
-
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
../../Scripts/FastA.N50.pl
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
../../Scripts/FastA.filterN.pl
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
../../Scripts/FastA.length.pl
|
|
@@ -1,189 +0,0 @@
|
|
|
1
|
-
@author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
2
|
-
|
|
3
|
-
@update: Mar-17-2013
|
|
4
|
-
|
|
5
|
-
@license: artistic 2.0
|
|
6
|
-
|
|
7
|
-
@status: semi
|
|
8
|
-
|
|
9
|
-
@pbs: yes
|
|
10
|
-
|
|
11
|
-
# IMPORTANT
|
|
12
|
-
|
|
13
|
-
This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
|
|
14
|
-
are free to use it in other platforms with adequate adjustments. It is largely
|
|
15
|
-
based on Luo _et al._ 2012, ISME J.
|
|
16
|
-
|
|
17
|
-
# PURPOSE
|
|
18
|
-
|
|
19
|
-
This pipeline assemblies coupled and/or single reads from one or more libraries.
|
|
20
|
-
It assumes that the reads have been quality-checked and trimmed.
|
|
21
|
-
|
|
22
|
-
# HELP
|
|
23
|
-
|
|
24
|
-
1. Files preparation:
|
|
25
|
-
|
|
26
|
-
1.1. Copy this folder to the cluster.
|
|
27
|
-
|
|
28
|
-
1.2. Copy the sequences to the cluster. Only trimmed/filtered reads are used.
|
|
29
|
-
All the files are expected to be in the same folder, and the filenames must
|
|
30
|
-
end in `.CoupledReads.fa` or `.SingleReads.fa`.
|
|
31
|
-
|
|
32
|
-
1.3. Copy the file `CONFIG.mock.bash` to `CONFIG.<name>.bash`, where `<name>` is a
|
|
33
|
-
short name for your run (avoid characters other than alphanumeric).
|
|
34
|
-
|
|
35
|
-
1.4. Change the variables in `CONFIG.<name>.bash`. Notice that this pipeline
|
|
36
|
-
supports running several libraries at the same time, but it's strongly
|
|
37
|
-
recomended to run only one per config file, because the insert length
|
|
38
|
-
(in step 2) and the selected k-mers (in step 3) are fixed for all the
|
|
39
|
-
included libraries. Also, there is a technical consideration: The first
|
|
40
|
-
step will execute parallel jobs for each odd number between 21 and 63, and
|
|
41
|
-
SOAP will use 16 CPUs by default, which means 357 CPUs will be requested
|
|
42
|
-
per library in step 2. It's a bad idea to run many libraries at the same
|
|
43
|
-
time.
|
|
44
|
-
|
|
45
|
-
1.5. If you have Mate-paired datasets (for example, prepared with Nextera), first
|
|
46
|
-
reverse-complement all the reads. See also the `VELVETG_EXTRA` variable in
|
|
47
|
-
the `CONFIG.<name>.bash` file.
|
|
48
|
-
|
|
49
|
-
2. Velvet and SOAP assembly:
|
|
50
|
-
|
|
51
|
-
2.1. Execute `./RUNME-2.bash <name>` in the head node (see [troubleshooting](#troubleshooting) #1).
|
|
52
|
-
|
|
53
|
-
2.2. Monitor the tasks named velvet_* and soap_*.
|
|
54
|
-
|
|
55
|
-
2.3. Once completed, make sure the files .proc contain only the
|
|
56
|
-
word "done". To do this, you may execute:
|
|
57
|
-
```
|
|
58
|
-
grep -v '^done$' *.proc
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
If successful, the output of the above command should be empty. See
|
|
62
|
-
[Troubleshooting](#troubleshooting) #2 and #3 below if one or more of your jobs failed.
|
|
63
|
-
|
|
64
|
-
3. K-mers selection:
|
|
65
|
-
|
|
66
|
-
3.1. If you completed step 2, execute `./RUNME-3.bash <name>` in the head
|
|
67
|
-
node.
|
|
68
|
-
|
|
69
|
-
3.2. Once completed, download and open the files `*.n50.pdf`.
|
|
70
|
-
|
|
71
|
-
3.3. Select the three "best" k-mers for Velvet and for SOAP (they don't
|
|
72
|
-
have to be the same). There is no well-tested method to select the
|
|
73
|
-
"best", and this is why this protocol is not automated, but semi-
|
|
74
|
-
automated. A generally good rule-of-thumb is: pick one that optimizes
|
|
75
|
-
the amount of sequences used (these are the grey bars in the plot;
|
|
76
|
-
usually this is the smallest k-mer), pick one that optimizes the N50
|
|
77
|
-
(this is the dashed red line; usually this is a large k-mer), and pick
|
|
78
|
-
one that optimizes both (something in the middle). You can select
|
|
79
|
-
more or less than three k-mers, this is just a suggestion.
|
|
80
|
-
|
|
81
|
-
4. Newbler assembly:
|
|
82
|
-
|
|
83
|
-
4.1. Edit the file `CONFIG.<name>.bash`: set the variables `K_VELVET` and
|
|
84
|
-
`K_SOAP` to contain the lists of "best" selected k-mers for Velvet and
|
|
85
|
-
SOAP, respectively.
|
|
86
|
-
|
|
87
|
-
4.2. Execute `./RUNME-4.bash <name>` in the head node.
|
|
88
|
-
|
|
89
|
-
4.3. Monitor the task newbler_*. Once finished, your assembly is ready.
|
|
90
|
-
Once completed, make sure the file .newbler.proc contain only the
|
|
91
|
-
word "done". To do this, you may execute:
|
|
92
|
-
```
|
|
93
|
-
grep -v '^done$' *.proc
|
|
94
|
-
```
|
|
95
|
-
If successful, the output should be empty.
|
|
96
|
-
|
|
97
|
-
4.4. The final assembly should be located in the `SCRATCH` path, in a folder
|
|
98
|
-
named `<lib>.newbler/assembly/`. The file `454AllContigs.fna` contains
|
|
99
|
-
all the assembled contigs, `454LargeContigs.fna` contains the contigs
|
|
100
|
-
with 500bp or more in length, and `454NewblerMetrics.txt` contains some
|
|
101
|
-
relevant statistics.
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
# Comments
|
|
105
|
-
|
|
106
|
-
* Some scripts contained in this package are actually symlinks to files in the
|
|
107
|
-
_Scripts_ folder. Check the existance of these files when copied to
|
|
108
|
-
the cluster.
|
|
109
|
-
|
|
110
|
-
# Troubleshooting
|
|
111
|
-
|
|
112
|
-
1. Do I really have to change directory (`cd`) to the pipeline's folder everytime
|
|
113
|
-
I want to execute something?
|
|
114
|
-
|
|
115
|
-
No. Not really. For simplicity, this file tells you to execute, for example,
|
|
116
|
-
`./RUNME-2.bash`. However, you don't really have to be there, you can execute it
|
|
117
|
-
from any location. For example, if you saved this pipeline in your home
|
|
118
|
-
directory, you can just execute `~/assembly.pbs/RUNME-2.bash` insted from any
|
|
119
|
-
location in the head node.
|
|
120
|
-
|
|
121
|
-
2. I executed step 2, and Velvet worked but SOAP failed (or vice versa). Can I
|
|
122
|
-
submit only one of them?
|
|
123
|
-
|
|
124
|
-
Yes. To execute only Velvet, run:
|
|
125
|
-
```
|
|
126
|
-
./RUNME-2.bash <name> velvet
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
To execute only SOAP, run:
|
|
130
|
-
```
|
|
131
|
-
./RUNME-2.bash <name> soap
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
3. I ran step 2, and most of the jobs finished, but few of them failed. Can I
|
|
135
|
-
submit only few K-mers?
|
|
136
|
-
|
|
137
|
-
Yes. To execute one kmer (say, the k-mer 33 of SOAP), run:
|
|
138
|
-
```
|
|
139
|
-
./RUNME-2.bash <name> soap 33
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
You can also execute more than one kmer, using a comma-separated list. For
|
|
143
|
-
example, to re-submit the k-mers 37, 39, and 41 of Velvet, run:
|
|
144
|
-
```
|
|
145
|
-
./RUNME-2.bash <name> velvet 37,39,41
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
4. What are the numbers on the job names of step 2?
|
|
149
|
-
|
|
150
|
-
The K-mer. Each k-mer has it's own job, but they are "arrayed", to simplify
|
|
151
|
-
administration: notice that all the jobs of Velvet and all the jobs of SOAP
|
|
152
|
-
share the same job ID.
|
|
153
|
-
|
|
154
|
-
5. Some jobs are being killed, why?
|
|
155
|
-
|
|
156
|
-
5.1. First, check the log file created by the pipeline. The name is typically
|
|
157
|
-
the output prefix and the .log extension. For velvet, there are two log files,
|
|
158
|
-
the `.glog` and the `.hlog`. You may find the problem there.
|
|
159
|
-
|
|
160
|
-
5.2. Now, check the error file in your HOME directory. The name depends on the
|
|
161
|
-
job, the library and the task. For example: `~/soap_Mg_2-37.e1999838` is the
|
|
162
|
-
error file for step 2, task soap, library Mg_2, k-mer 37. The appending
|
|
163
|
-
number after the 'e' is the job ID. If this file contains errors probably
|
|
164
|
-
related to the pipeline, please let me know.
|
|
165
|
-
|
|
166
|
-
5.3. If you still have no clues, check the output file in your `HOME` directory. The
|
|
167
|
-
name is just like the name of the error file (see #5.2 above), but with 'o'
|
|
168
|
-
instead of 'e'. Compare the lines 'Resources' (what we asked the scheduler for)
|
|
169
|
-
and 'Rsrc Used' (what the job actually used). A typical problem is that your
|
|
170
|
-
job may need more RAM than we asked for (the value of 'mem' in both lines). If
|
|
171
|
-
the RAM used is larger than the RAM requested, the scheduler probably killed
|
|
172
|
-
your job. To solve this, just go to your config file, and set the variable
|
|
173
|
-
RAMMULT to a number larger than 1. For example, if you want to ask for double the
|
|
174
|
-
RAM, set `RAMMULT=2`. You can also include simple arithmetic operations, like
|
|
175
|
-
`RAMMULT=3/2`. If you want to add a fixed ammount of RAM, in Gib, use addition.
|
|
176
|
-
For example, to add 10G, set `RAMMULT=1+10`.
|
|
177
|
-
|
|
178
|
-
5.4. Still no idea? Try running the job again, sometimes the jobs fail with no
|
|
179
|
-
apparent reason, but they succeed when re-submited. If your job keeps failing,
|
|
180
|
-
please gather as much information (the log, error and output files should be
|
|
181
|
-
enough) and let me take a look.
|
|
182
|
-
|
|
183
|
-
6. In the step 2, some k-mers keep failing, and I just want to give up on them, can I?
|
|
184
|
-
|
|
185
|
-
Yes. Step 3 will analyze only completed jobs, so you can just ignore these faulty
|
|
186
|
-
k-mers. Very small k-mers, for example, sometimes need too much memory, and very
|
|
187
|
-
large k-mers in Velvet sometimes need too much time. If you don't think you're
|
|
188
|
-
missing too much, just ignore them.
|
|
189
|
-
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
##################### HELP
|
|
4
|
-
HELP="
|
|
5
|
-
Usage:
|
|
6
|
-
$0 name[ prog[ k-mers]]
|
|
7
|
-
|
|
8
|
-
name The name of the run. CONFIG.name.bash must exist.
|
|
9
|
-
prog Program to execute. One of 'soap' or 'velvet'. By
|
|
10
|
-
default, it executes both.
|
|
11
|
-
k-mers Comma-separated list of k-mers to run. By default,
|
|
12
|
-
it executes all the odd numbers between 21 and 63
|
|
13
|
-
(inclusive).
|
|
14
|
-
|
|
15
|
-
See $PDIR/README.txt for more information.
|
|
16
|
-
"
|
|
17
|
-
##################### RUN
|
|
18
|
-
# Find the directory of the pipeline
|
|
19
|
-
PDIR=$(dirname $(readlink -f $0));
|
|
20
|
-
# Load variables
|
|
21
|
-
source "$PDIR/RUNME.bash"
|
|
22
|
-
if [[ "$SCRATCH" == "" ]] ; then
|
|
23
|
-
echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
|
|
24
|
-
exit 1
|
|
25
|
-
fi
|
|
26
|
-
|
|
27
|
-
# Check request
|
|
28
|
-
RUNVELVET=yes
|
|
29
|
-
RUNSOAP=yes
|
|
30
|
-
if [[ "$2" == "velvet" ]] ; then
|
|
31
|
-
RUNSOAP=no
|
|
32
|
-
elif [[ "$2" == "soap" ]] ; then
|
|
33
|
-
RUNVELVET=no
|
|
34
|
-
fi
|
|
35
|
-
if [[ "$3" == "" ]] ; then
|
|
36
|
-
KMERARRAY="21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63"
|
|
37
|
-
else
|
|
38
|
-
KMERARRAY=$3
|
|
39
|
-
fi
|
|
40
|
-
if [[ "$VELVETSIM" == "" ]] ; then
|
|
41
|
-
VELVETSIM=22
|
|
42
|
-
fi
|
|
43
|
-
if [[ "$SOAPSIM" == "" ]] ; then
|
|
44
|
-
let SOAPSIM=130/$PPN
|
|
45
|
-
fi
|
|
46
|
-
|
|
47
|
-
# Run it
|
|
48
|
-
RAMMULT=${RAMMULT:-1}
|
|
49
|
-
echo "Jobs being launched in $SCRATCH"
|
|
50
|
-
for LIB in $LIBRARIES; do
|
|
51
|
-
# Prepare info
|
|
52
|
-
echo "Running $LIB";
|
|
53
|
-
if [[ "$USECOUPLED" == "yes" ]] ; then
|
|
54
|
-
INPUT="$DATA/$LIB.CoupledReads.fa"
|
|
55
|
-
elif [[ "$USESINGLE" == "yes" ]] ; then
|
|
56
|
-
INPUT="$DATA/$LIB.SingleReads.fa"
|
|
57
|
-
else
|
|
58
|
-
echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
|
|
59
|
-
exit 1;
|
|
60
|
-
fi
|
|
61
|
-
VARS="LIB=$LIB,PDIR=$PDIR,DATA=$DATA,USECOUPLED=$USECOUPLED,USESINGLE=$USESINGLE"
|
|
62
|
-
[[ -n $INSLEN ]] && VARS="$VARS,INSLEN=$INSLEN"
|
|
63
|
-
[[ -n $VELVETG_EXTRA ]] && VARS="$VARS,VELVETG_EXTRA=$VELVETG_EXTRA"
|
|
64
|
-
[[ -n $VELVETH_EXTRA ]] && VARS="$VARS,VELVETH_EXTRA=$VELVETH_EXTRA"
|
|
65
|
-
[[ -n $CLEANUP ]] && VARS="$VARS,CLEANUP=$CLEANUP"
|
|
66
|
-
let SIZE=$(ls -lH "$INPUT" | awk '{print $5}')/1024/1024/1024;
|
|
67
|
-
let RAMS=40+$SIZE*10*$RAMMULT;
|
|
68
|
-
let RAMV=50+$SIZE*15*$RAMMULT;
|
|
69
|
-
# Launch Velvet
|
|
70
|
-
if [[ "$RUNVELVET" == "yes" ]] ; then
|
|
71
|
-
NAME="velvet_${LIB}"
|
|
72
|
-
if [[ "$QUEUE" != "" ]]; then
|
|
73
|
-
qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
74
|
-
-l mem=${RAMV}gb -l "walltime=$WTIME" -q "$QUEUE" \
|
|
75
|
-
-t "$KMERARRAY%$VELVETSIM"
|
|
76
|
-
elif [[ $RAMV -gt 150 ]]; then
|
|
77
|
-
qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
78
|
-
-l mem=${RAMV}gb -l walltime=360:00:00 -q biohimem-6 \
|
|
79
|
-
-t "$KMERARRAY%$VELVETSIM"
|
|
80
|
-
elif [[ $SIZE -lt 6 ]]; then
|
|
81
|
-
qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
82
|
-
-l mem=${RAMV}gb -l walltime=12:00:00 -q iw-shared-6 \
|
|
83
|
-
-t "$KMERARRAY%$VELVETSIM"
|
|
84
|
-
elif [[ $SIZE -lt 20 ]]; then
|
|
85
|
-
qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
86
|
-
-l mem=${RAMV}gb -l walltime=120:00:00 -q bioforce-6 \
|
|
87
|
-
-t "$KMERARRAY%$VELVETSIM"
|
|
88
|
-
else
|
|
89
|
-
qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
90
|
-
-l mem=${RAMV}gb -l walltime=360:00:00 -q biocluster-6 \
|
|
91
|
-
-t "$KMERARRAY%$VELVETSIM"
|
|
92
|
-
fi
|
|
93
|
-
fi
|
|
94
|
-
# Launch SOAP
|
|
95
|
-
if [[ "$RUNSOAP" == "yes" ]] ; then
|
|
96
|
-
NAME="soap_${LIB}"
|
|
97
|
-
if [[ "$QUEUE" != "" ]]; then
|
|
98
|
-
qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
99
|
-
-l mem=${RAMS}gb -l walltime=$WTIME -q $QUEUE -l nodes=1:ppn=$PPN \
|
|
100
|
-
-t "$KMERARRAY%$SOAPSIM"
|
|
101
|
-
elif [[ $RAMS -gt 150 ]]; then
|
|
102
|
-
qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
103
|
-
-l mem=${RAMS}gb -l walltime=48:00:00 -q biohimem-6 \
|
|
104
|
-
-l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
|
|
105
|
-
else
|
|
106
|
-
qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
|
|
107
|
-
-l mem=${RAMS}gb -l walltime=12:00:00 -q iw-shared-6 \
|
|
108
|
-
-l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
|
|
109
|
-
fi
|
|
110
|
-
fi
|
|
111
|
-
done
|
|
112
|
-
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
##################### RUN
|
|
4
|
-
# Find the directory of the pipeline
|
|
5
|
-
PDIR=$(dirname $(readlink -f $0));
|
|
6
|
-
# Load variables
|
|
7
|
-
source "$PDIR/RUNME.bash"
|
|
8
|
-
if [[ "$SCRATCH" == "" ]] ; then
|
|
9
|
-
echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
|
|
10
|
-
exit 1
|
|
11
|
-
fi
|
|
12
|
-
|
|
13
|
-
# Run it
|
|
14
|
-
echo "Jobs being launched in $SCRATCH"
|
|
15
|
-
for LIB in $LIBRARIES; do
|
|
16
|
-
# Prepare info
|
|
17
|
-
echo "Running $LIB";
|
|
18
|
-
VARS="LIB=$LIB,PDIR=$PDIR"
|
|
19
|
-
# Launch Stats
|
|
20
|
-
NAME="N50_${LIB}"
|
|
21
|
-
qsub "$PDIR/stats.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME"
|
|
22
|
-
done
|
|
23
|
-
|