miga-base 1.2.17.1 → 1.2.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/remote_dataset.rb +9 -4
- data/lib/miga/version.rb +2 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
- data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
- data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
- data/utils/enveomics/Scripts/SRA.download.bash +14 -2
- data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
- data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
- data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
- data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
- data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
- data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
- data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
- data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
- data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
- data/utils/enveomics/enveomics.R/R/utils.R +30 -20
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
- metadata +3 -37
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -6,8 +6,10 @@
|
|
6
6
|
#' be produced by \code{\link{enve.recplot2}} and supports S4 method plot.
|
7
7
|
#'
|
8
8
|
#' @slot counts \code{(matrix)} Counts as a two-dimensional histogram.
|
9
|
-
#' @slot pos.counts.in
|
10
|
-
#'
|
9
|
+
#' @slot pos.counts.in
|
10
|
+
#' \code{(numeric)} Counts of in-group hits per position bin.
|
11
|
+
#' @slot pos.counts.out
|
12
|
+
#' \code{(numeric)} Counts of out-group hits per position bin.
|
11
13
|
#' @slot id.counts \code{(numeric)} Counts per ID bin.
|
12
14
|
#' @slot id.breaks \code{(numeric)} Breaks of identity bins.
|
13
15
|
#' @slot pos.breaks \code{(numeric)} Breaks of position bins.
|
@@ -208,42 +210,50 @@ setMethod("$", "enve.RecPlot2.Peak", function(x, name) attr(x, name))
|
|
208
210
|
#' @export
|
209
211
|
|
210
212
|
plot.enve.RecPlot2 <- function
|
211
|
-
(
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
213
|
+
(
|
214
|
+
x,
|
215
|
+
layout = matrix(c(5, 5, 2, 1, 4, 3), nrow = 2),
|
216
|
+
panel.fun = list(),
|
217
|
+
widths = c(1, 7, 2),
|
218
|
+
heights = c(1, 2),
|
219
|
+
palette = grey((100:0) / 100),
|
220
|
+
underlay.group = TRUE,
|
221
|
+
peaks.col = "darkred",
|
222
|
+
use.peaks,
|
223
|
+
id.lim = range(x$id.breaks),
|
224
|
+
pos.lim = range(x$pos.breaks),
|
225
|
+
pos.units = c("Mbp", "Kbp", "bp"),
|
226
|
+
mar = list(
|
227
|
+
"1" = c(5, 4, 1, 1) + 0.1,
|
228
|
+
"2" = c(ifelse(any(layout == 1), 1, 5), 4, 4, 1) + 0.1,
|
229
|
+
"3" = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1,
|
230
|
+
"4" = c(ifelse(any(layout == 1), 1, 5),
|
231
|
+
ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1,
|
232
|
+
"5" = c(5, 3, 4, 1) + 0.1,
|
233
|
+
"6" = c(5, 4, 4, 2) + 0.1
|
234
|
+
),
|
235
|
+
pos.splines = 0,
|
236
|
+
id.splines = 1/2,
|
237
|
+
in.lwd = ifelse(is.null(pos.splines) || pos.splines > 0, 1/2, 2),
|
238
|
+
out.lwd = ifelse(is.null(pos.splines) || pos.splines > 0, 1/2, 2),
|
239
|
+
id.lwd = ifelse(is.null(id.splines) || id.splines > 0, 1/2, 2),
|
240
|
+
in.col = "darkblue",
|
241
|
+
out.col = "lightblue",
|
242
|
+
id.col = "black",
|
243
|
+
breaks.col = "#AAAAAA40",
|
244
|
+
peaks.opts = list(),
|
245
|
+
...
|
246
|
+
) {
|
247
|
+
pos.units <- match.arg(pos.units)
|
248
|
+
pos.factor <- ifelse(pos.units == "bp", 1,
|
249
|
+
ifelse(pos.units == "Kbp", 1e3, 1e6))
|
250
|
+
pos.lim <- pos.lim / pos.factor
|
251
|
+
lmat <- layout
|
252
|
+
for (i in 1:6) if (!any(layout == i)) lmat[layout > i] <- lmat[layout > i] - 1
|
253
|
+
|
254
|
+
layout(lmat, widths = widths, heights = heights)
|
255
|
+
ori.mar <- par("mar")
|
256
|
+
on.exit(par(ori.mar))
|
247
257
|
|
248
258
|
# Essential vars
|
249
259
|
counts <- x$counts
|
@@ -273,8 +283,8 @@ plot.enve.RecPlot2 <- function
|
|
273
283
|
}
|
274
284
|
|
275
285
|
# [1] Counts matrix
|
276
|
-
if(any(layout==1)){
|
277
|
-
par(mar=mar[[
|
286
|
+
if (any(layout==1)) {
|
287
|
+
par(mar = mar[["1"]]) # par(mar) already being watched by on.exit
|
278
288
|
plot(1, t='n', bty='l',
|
279
289
|
xlim=pos.lim, xlab=paste('Position in genome (',pos.units,')',sep=''),
|
280
290
|
xaxs='i', ylim=id.lim, ylab=x$id.metric, yaxs='i');
|
@@ -288,18 +298,18 @@ plot.enve.RecPlot2 <- function
|
|
288
298
|
image(x=pos.breaks, y=id.breaks, z=log10(counts),col=palette,
|
289
299
|
bg=grey(1,0), breaks=seq(-.1,log10(max(counts)),
|
290
300
|
length.out=1+length(palette)), add=TRUE);
|
291
|
-
if(exists(
|
301
|
+
if(exists("1", panel.fun)) panel.fun[["1"]]()
|
292
302
|
}
|
293
303
|
|
294
304
|
# [2] Position histogram
|
295
|
-
if(any(layout==2)){
|
296
|
-
par(mar=mar[[
|
297
|
-
if(any(layout==1)){
|
298
|
-
xlab
|
299
|
-
xaxt
|
300
|
-
}else{
|
301
|
-
xlab
|
302
|
-
xaxt
|
305
|
+
if (any(layout == 2)) {
|
306
|
+
par(mar = mar[["2"]]) # par(mar) already being watched by on.exit
|
307
|
+
if (any(layout == 1)) {
|
308
|
+
xlab <- ""
|
309
|
+
xaxt <- "n"
|
310
|
+
} else {
|
311
|
+
xlab <- paste("Position in genome (", pos.units, ")", sep = "")
|
312
|
+
xaxt <- "s"
|
303
313
|
}
|
304
314
|
plot(1,t='n', bty='l', log='y',
|
305
315
|
xlim=pos.lim, xlab=xlab, xaxt=xaxt, xaxs='i',
|
@@ -309,31 +319,33 @@ plot.enve.RecPlot2 <- function
|
|
309
319
|
pos.f <- rep(seqdepth.in,each=2)
|
310
320
|
lines(pos.x, rep(seqdepth.out,each=2), lwd=out.lwd, col=out.col);
|
311
321
|
lines(pos.x, pos.f, lwd=in.lwd, col=in.col);
|
312
|
-
if(is.null(pos.splines) || pos.splines > 0){
|
322
|
+
if (is.null(pos.splines) || pos.splines > 0) {
|
313
323
|
pos.spline <- smooth.spline(pos.x[pos.f>0], log(pos.f[pos.f>0]),
|
314
324
|
spar=pos.splines)
|
315
325
|
lines(pos.spline$x, exp(pos.spline$y), lwd=2, col=in.col)
|
316
326
|
}
|
317
|
-
if(any(pos.counts.out==0))
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
327
|
+
if (any(pos.counts.out==0))
|
328
|
+
rect(pos.breaks[c(pos.counts.out==0,FALSE)],
|
329
|
+
seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
|
330
|
+
seqdepth.lim[1]*3/2, col=out.col, border=NA);
|
331
|
+
if (any(pos.counts.in==0))
|
332
|
+
rect(pos.breaks[c(pos.counts.in==0,FALSE)],
|
333
|
+
seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
|
334
|
+
seqdepth.lim[1]*3/2, col=in.col, border=NA);
|
335
|
+
if (exists("2", panel.fun)) panel.fun[["2"]]()
|
324
336
|
}
|
325
337
|
|
326
338
|
# [3] Identity histogram
|
327
|
-
if(any(layout==3)){
|
328
|
-
par(mar=mar[[
|
329
|
-
if(any(layout==1)){
|
330
|
-
ylab
|
331
|
-
yaxt
|
332
|
-
}else{
|
333
|
-
ylab
|
334
|
-
yaxt
|
339
|
+
if (any(layout == 3)) {
|
340
|
+
par(mar = mar[["3"]]) # par(mar) already being watched by on.exit
|
341
|
+
if (any(layout == 1)) {
|
342
|
+
ylab <- ""
|
343
|
+
yaxt <- "n"
|
344
|
+
} else {
|
345
|
+
ylab <- x$id.metric
|
346
|
+
yaxt <- "s"
|
335
347
|
}
|
336
|
-
if(sum(id.counts>0) >= 4){
|
348
|
+
if (sum(id.counts > 0) >= 4) {
|
337
349
|
id.counts.range <- range(id.counts[id.counts>0])*c(1/2,2);
|
338
350
|
plot(1,t='n', bty='l', log='x',
|
339
351
|
xlim=id.counts.range, xlab='bps per bin', xaxs='i',
|
@@ -352,23 +364,23 @@ plot.enve.RecPlot2 <- function
|
|
352
364
|
spar=id.splines)
|
353
365
|
lines(exp(id.spline$y), id.spline$x, lwd=2, col=id.col)
|
354
366
|
}
|
355
|
-
}else{
|
367
|
+
} else {
|
356
368
|
plot(1,t='n',bty='l',xlab='', xaxt='n', ylab='', yaxt='n')
|
357
369
|
text(1,1,labels='Insufficient data', srt=90)
|
358
370
|
}
|
359
|
-
if(exists(
|
371
|
+
if (exists("3", panel.fun)) panel.fun[["3"]]()
|
360
372
|
}
|
361
373
|
|
362
374
|
# [4] Populations histogram
|
363
375
|
peaks <- NA;
|
364
|
-
if(any(layout==4)){
|
365
|
-
par(mar=mar[[
|
366
|
-
if(any(layout==2)){
|
367
|
-
ylab
|
368
|
-
yaxt
|
369
|
-
}else{
|
370
|
-
ylab
|
371
|
-
yaxt
|
376
|
+
if (any(layout == 4)) {
|
377
|
+
par(mar = mar[["4"]]) # par(mar) already being watched by on.exit
|
378
|
+
if (any(layout == 2)) {
|
379
|
+
ylab <- ""
|
380
|
+
yaxt <- "n"
|
381
|
+
} else {
|
382
|
+
ylab <- "Sequencing depth (X)"
|
383
|
+
yaxt <- "s"
|
372
384
|
}
|
373
385
|
h.breaks <- seq(log10(seqdepth.lim[1]*2), log10(seqdepth.lim[2]/2),
|
374
386
|
length.out=200);
|
@@ -406,44 +418,47 @@ plot.enve.RecPlot2 <- function
|
|
406
418
|
lapply(peaks,
|
407
419
|
function(x) ifelse(length(x$values)==0, x$n.hat,
|
408
420
|
length(x$values))/x$n.total)), 2)
|
409
|
-
if(peaks[[1]]$err.res < 0){
|
410
|
-
err <- paste(
|
411
|
-
}else{
|
412
|
-
err <- paste(
|
413
|
-
|
421
|
+
if (peaks[[1]]$err.res < 0) {
|
422
|
+
err <- paste(", LL:", signif(peaks[[1]]$err.res, 3))
|
423
|
+
} else {
|
424
|
+
err <- paste(
|
425
|
+
", err:",
|
426
|
+
signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2)
|
427
|
+
)
|
414
428
|
}
|
415
429
|
legend('topright', bty='n', cex=1/2,
|
416
430
|
legend=paste(letters[1:length(peaks)],'. ',
|
417
431
|
dpt,'X (', frx, '%', err, ')', sep=''))
|
418
432
|
}
|
419
433
|
}
|
420
|
-
if(exists(
|
434
|
+
if (exists("4", panel.fun)) panel.fun[["4"]]()
|
421
435
|
}
|
422
436
|
|
423
437
|
# [5] Color scale of the counts matrix (vertical)
|
424
|
-
count.bins <- 10^seq(
|
425
|
-
|
426
|
-
|
427
|
-
|
438
|
+
count.bins <- 10^seq(
|
439
|
+
log10(min(counts[counts>0])), log10(max(counts)),
|
440
|
+
length.out = 1 + length(palette)
|
441
|
+
)
|
442
|
+
if (any(layout == 5)) {
|
443
|
+
par(mar = mar[["5"]]) # par(mar) already being watched by on.exit
|
428
444
|
plot(1,t='n',log='y',xlim=0:1,xaxt='n',xlab='',xaxs='i',
|
429
445
|
ylim=range(count.bins), yaxs='i', ylab='')
|
430
446
|
rect(0,count.bins[-length(count.bins)],1,count.bins[-1],col=palette,
|
431
447
|
border=NA)
|
432
|
-
if(exists(
|
448
|
+
if (exists("5", panel.fun)) panel.fun[["5"]]()
|
433
449
|
}
|
434
450
|
|
435
451
|
# [6] Color scale of the coutnts matrix (horizontal)
|
436
|
-
if(any(layout==6)){
|
437
|
-
par(mar=mar[[
|
452
|
+
if (any(layout == 6)) {
|
453
|
+
par(mar = mar[["6"]]) # par(mar) already being watched by on.exit
|
438
454
|
plot(1,t='n',log='x',ylim=0:1,yaxt='n',ylab='',yaxs='i',
|
439
455
|
xlim=range(count.bins), xaxs='i',xlab='');
|
440
456
|
rect(count.bins[-length(count.bins)],0,count.bins[-1],1,col=palette,
|
441
457
|
border=NA);
|
442
|
-
if(exists(
|
458
|
+
if (exists("6", panel.fun)) panel.fun[["6"]]()
|
443
459
|
}
|
444
460
|
|
445
|
-
|
446
|
-
return(peaks);
|
461
|
+
return(peaks)
|
447
462
|
}
|
448
463
|
|
449
464
|
#==============> Define core functions
|
@@ -635,7 +650,7 @@ enve.recplot2 <- function(
|
|
635
650
|
#'
|
636
651
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
637
652
|
#'
|
638
|
-
#' export
|
653
|
+
#' @export
|
639
654
|
|
640
655
|
enve.recplot2.findPeaks <- function(
|
641
656
|
x,
|
@@ -955,33 +970,39 @@ enve.recplot2.findPeaks.mower <- function(
|
|
955
970
|
#' "core genome" of a population.
|
956
971
|
#'
|
957
972
|
#' @param x \code{list} of \code{\link{enve.RecPlot2.Peak}} objects.
|
973
|
+
#'
|
974
|
+
#' @return A \code{\link{enve.RecPlot2.Peak}} object.
|
958
975
|
#'
|
959
976
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
960
977
|
#'
|
961
978
|
#' @export
|
962
979
|
|
963
|
-
enve.recplot2.corePeak <- function
|
964
|
-
(x
|
965
|
-
){
|
980
|
+
enve.recplot2.corePeak <- function(x) {
|
966
981
|
# Find the peak with maximum depth (centrality)
|
967
982
|
maxPeak <- x[[
|
968
|
-
which.max(
|
969
|
-
|
970
|
-
|
983
|
+
which.max(
|
984
|
+
as.numeric(
|
985
|
+
lapply(x, function(y) y$param.hat[[length(y$param.hat)]])
|
986
|
+
)
|
987
|
+
)
|
988
|
+
]]
|
971
989
|
# If a "larger" peak (a peak explaining more bins of the genome) is within
|
972
990
|
# the default "merge.logdist" distance, take that one instead.
|
973
991
|
corePeak <- maxPeak
|
974
|
-
for(p in x){
|
975
|
-
p.len <- ifelse(length(p$values)==0, p$n.hat, length(p$values))
|
992
|
+
for (p in x) {
|
993
|
+
p.len <- ifelse(length(p$values) == 0, p$n.hat, length(p$values))
|
976
994
|
corePeak.len <- ifelse(
|
977
|
-
length(corePeak$values)==0,
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
if(
|
983
|
-
|
984
|
-
|
995
|
+
length(corePeak$values) == 0,
|
996
|
+
corePeak$n.hat,
|
997
|
+
length(corePeak$values)
|
998
|
+
)
|
999
|
+
sz.d <- log(p.len / corePeak.len)
|
1000
|
+
if (is.nan(sz.d) || sz.d < 0) next
|
1001
|
+
sq.d.a <- as.numeric(tail(p$param.hat, n = 1))
|
1002
|
+
sq.d.b <- as.numeric(tail(maxPeak$param.hat, n = 1))
|
1003
|
+
if (p$log) sq.d.a <- exp(sq.d.a)
|
1004
|
+
if (corePeak$log) sq.d.b <- exp(sq.d.b)
|
1005
|
+
if (abs(log(sq.d.a / sq.d.b)) < log(1.75) + sz.d / 5) corePeak <- p
|
985
1006
|
}
|
986
1007
|
return(corePeak)
|
987
1008
|
}
|
@@ -994,20 +1015,20 @@ enve.recplot2.corePeak <- function
|
|
994
1015
|
#' \code{\link{enve.RecPlot2}} object.
|
995
1016
|
#' @param new.cutoff
|
996
1017
|
#' New cutoff to use.
|
1018
|
+
#'
|
1019
|
+
#' @return The modified \code{\link{enve.RecPlot2}} object.
|
997
1020
|
#'
|
998
1021
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
999
1022
|
#'
|
1000
1023
|
#' @export
|
1001
1024
|
|
1002
|
-
enve.recplot2.changeCutoff <- function
|
1003
|
-
(rp,
|
1004
|
-
new.cutoff=98
|
1005
|
-
){
|
1025
|
+
enve.recplot2.changeCutoff <- function(rp, new.cutoff = 98) {
|
1006
1026
|
# Re-calculate vectors
|
1007
|
-
id.mids
|
1008
|
-
id.ingroup
|
1009
|
-
pos.counts.in <- apply(rp$counts[,id.ingroup], 1, sum)
|
1010
|
-
pos.counts.out <- apply(rp$counts[
|
1027
|
+
id.mids <- (rp$id.breaks[-length(rp$id.breaks)] + rp$id.breaks[-1]) / 2
|
1028
|
+
id.ingroup <- (id.mids > new.cutoff)
|
1029
|
+
pos.counts.in <- apply(rp$counts[, id.ingroup], 1, sum)
|
1030
|
+
pos.counts.out <- apply(rp$counts[, !id.ingroup], 1, sum)
|
1031
|
+
|
1011
1032
|
# Update object
|
1012
1033
|
attr(rp, "id.ingroup") <- id.ingroup
|
1013
1034
|
attr(rp, "pos.counts.in") <- pos.counts.in
|
@@ -1024,9 +1045,9 @@ enve.recplot2.changeCutoff <- function
|
|
1024
1045
|
#' @param rp
|
1025
1046
|
#' Recruitment plot, an \code{\link{enve.RecPlot2}} object.
|
1026
1047
|
#' @param peak
|
1027
|
-
#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
|
1028
|
-
#' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
|
1029
|
-
#' used (see \code{\link{enve.recplot2.corePeak}}).
|
1048
|
+
#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
|
1049
|
+
#' be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
|
1050
|
+
#' peak is used (see \code{\link{enve.recplot2.corePeak}}).
|
1030
1051
|
#' @param lower.tail
|
1031
1052
|
#' If \code{FALSE}, it returns windows significantly above the peak in
|
1032
1053
|
#' sequencing depth.
|
@@ -1041,16 +1062,16 @@ enve.recplot2.changeCutoff <- function
|
|
1041
1062
|
#'
|
1042
1063
|
#' @export
|
1043
1064
|
|
1044
|
-
enve.recplot2.windowDepthThreshold <- function
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
){
|
1050
|
-
if(is.list(peak)) peak <- enve.recplot2.corePeak(peak)
|
1065
|
+
enve.recplot2.windowDepthThreshold <- function(
|
1066
|
+
rp,
|
1067
|
+
peak,
|
1068
|
+
lower.tail = TRUE,
|
1069
|
+
significance = 0.05
|
1070
|
+
) {
|
1071
|
+
if (is.list(peak)) peak <- enve.recplot2.corePeak(peak)
|
1051
1072
|
par <- peak$param.hat
|
1052
|
-
par[["p"]] <- ifelse(lower.tail, significance, 1-significance)
|
1053
|
-
thr <- do.call(ifelse(length(par)==4, qsn, qnorm), par)
|
1073
|
+
par[["p"]] <- ifelse(lower.tail, significance, 1 - significance)
|
1074
|
+
thr <- do.call(ifelse(length(par) == 4, qsn, qnorm), par)
|
1054
1075
|
if(peak$log) thr <- exp(thr)
|
1055
1076
|
|
1056
1077
|
return(thr)
|
@@ -1064,9 +1085,9 @@ enve.recplot2.windowDepthThreshold <- function
|
|
1064
1085
|
#' @param rp
|
1065
1086
|
#' Recruitment plot, a \code{\link{enve.RecPlot2}} object.
|
1066
1087
|
#' @param peak
|
1067
|
-
#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
|
1068
|
-
#' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
|
1069
|
-
#' used (see \code{\link{enve.recplot2.corePeak}}).
|
1088
|
+
#' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
|
1089
|
+
#' be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
|
1090
|
+
#' peak is used (see \code{\link{enve.recplot2.corePeak}}).
|
1070
1091
|
#' @param lower.tail
|
1071
1092
|
#' If \code{FALSE}, it returns windows significantly above the peak in
|
1072
1093
|
#' sequencing depth.
|
@@ -1089,21 +1110,21 @@ enve.recplot2.windowDepthThreshold <- function
|
|
1089
1110
|
#'
|
1090
1111
|
#' @export
|
1091
1112
|
|
1092
|
-
enve.recplot2.extractWindows <- function
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
){
|
1113
|
+
enve.recplot2.extractWindows <- function(
|
1114
|
+
rp,
|
1115
|
+
peak,
|
1116
|
+
lower.tail = TRUE,
|
1117
|
+
significance = 0.05,
|
1118
|
+
seq.names = FALSE
|
1119
|
+
) {
|
1099
1120
|
# Determine the threshold
|
1100
1121
|
thr <- enve.recplot2.windowDepthThreshold(rp, peak, lower.tail, significance)
|
1101
1122
|
|
1102
1123
|
# Select windows past the threshold
|
1103
1124
|
seqdepth.in <- enve.recplot2.seqdepth(rp)
|
1104
|
-
if(lower.tail){
|
1125
|
+
if (lower.tail) {
|
1105
1126
|
sel <- seqdepth.in < thr
|
1106
|
-
}else{
|
1127
|
+
} else {
|
1107
1128
|
sel <- seqdepth.in > thr
|
1108
1129
|
}
|
1109
1130
|
|
@@ -1117,7 +1138,7 @@ enve.recplot2.extractWindows <- function
|
|
1117
1138
|
#'
|
1118
1139
|
#' Compare the distribution of identities between two
|
1119
1140
|
#' \code{\link{enve.RecPlot2}} objects.
|
1120
|
-
#'
|
1141
|
+
#'
|
1121
1142
|
#' @param x
|
1122
1143
|
#' First \code{\link{enve.RecPlot2}} object.
|
1123
1144
|
#' @param y
|
@@ -1126,10 +1147,12 @@ enve.recplot2.extractWindows <- function
|
|
1126
1147
|
#' Distance method to use. This should be (an unambiguous abbreviation of)
|
1127
1148
|
#' one of:
|
1128
1149
|
#' \itemize{
|
1129
|
-
#' \item{"hellinger"
|
1130
|
-
#'
|
1131
|
-
#' \item{"
|
1132
|
-
#'
|
1150
|
+
#' \item{"hellinger"
|
1151
|
+
#' (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
|
1152
|
+
#' \item{"bhattacharyya"
|
1153
|
+
#' (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
|
1154
|
+
#' \item{"kl" or "kullback-leibler"
|
1155
|
+
#' (\emph{Kullback & Leibler, 1951, doi:10.1214/aoms/1177729694}), or}
|
1133
1156
|
#' \item{"euclidean"}
|
1134
1157
|
#' }
|
1135
1158
|
#' @param smooth.par
|
@@ -1142,56 +1165,66 @@ enve.recplot2.extractWindows <- function
|
|
1142
1165
|
#' @param max.deviation
|
1143
1166
|
#' Maximum mean deviation between identity breaks tolerated (as percent
|
1144
1167
|
#' identity). Difference in number of \code{id.breaks} is never tolerated.
|
1145
|
-
#'
|
1168
|
+
#'
|
1169
|
+
#' @return A \strong{numeric} indicating the distance between the objects.
|
1170
|
+
#'
|
1146
1171
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1147
1172
|
#'
|
1148
1173
|
#' @export
|
1149
1174
|
|
1150
1175
|
enve.recplot2.compareIdentities <- function
|
1151
|
-
(
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1176
|
+
(
|
1177
|
+
x,
|
1178
|
+
y,
|
1179
|
+
method = "hellinger",
|
1180
|
+
smooth.par = NULL,
|
1181
|
+
pseudocounts = 0,
|
1182
|
+
max.deviation = 0.75
|
1183
|
+
) {
|
1184
|
+
# Sanity checks
|
1185
|
+
METHODS <- c(
|
1186
|
+
"hellinger", "bhattacharyya", "kullback-leibler", "kl", "euclidean"
|
1187
|
+
)
|
1159
1188
|
i.meth <- pmatch(method, METHODS)
|
1160
1189
|
if (is.na(i.meth)) stop("Invalid distance ", method)
|
1161
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1190
|
+
if (!inherits(x, "enve.RecPlot2"))
|
1162
1191
|
stop("'x' must inherit from class `enve.RecPlot2`")
|
1163
|
-
if(!inherits(y, "enve.RecPlot2"))
|
1192
|
+
if (!inherits(y, "enve.RecPlot2"))
|
1164
1193
|
stop("'y' must inherit from class `enve.RecPlot2`")
|
1165
|
-
if(length(x$id.breaks) != length(y$id.breaks))
|
1194
|
+
if (length(x$id.breaks) != length(y$id.breaks))
|
1166
1195
|
stop("'x' and 'y' must have the same number of `id.breaks`")
|
1167
1196
|
dev <- mean(abs(x$id.breaks - y$id.breaks))
|
1168
|
-
if(dev > max.deviation)
|
1197
|
+
if (dev > max.deviation)
|
1169
1198
|
stop("'x' and 'y' must have similar `id.breaks`; exceeding max.deviation: ",
|
1170
1199
|
dev)
|
1200
|
+
|
1201
|
+
# Initialize
|
1171
1202
|
x.cnt <- x$id.counts
|
1172
1203
|
y.cnt <- y$id.counts
|
1173
|
-
if(is.null(smooth.par) || smooth.par > 0){
|
1174
|
-
x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)])/2
|
1175
|
-
y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)])/2
|
1176
|
-
p.spline <- smooth.spline(x.mids, x.cnt, spar=smooth.par)
|
1177
|
-
q.spline <- smooth.spline(y.mids, y.cnt, spar=smooth.par)
|
1204
|
+
if (is.null(smooth.par) || smooth.par > 0){
|
1205
|
+
x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)]) / 2
|
1206
|
+
y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)]) / 2
|
1207
|
+
p.spline <- smooth.spline(x.mids, x.cnt, spar = smooth.par)
|
1208
|
+
q.spline <- smooth.spline(y.mids, y.cnt, spar = smooth.par)
|
1178
1209
|
x.cnt <- pmax(p.spline$y, 0)
|
1179
1210
|
y.cnt <- pmax(q.spline$y, 0)
|
1180
1211
|
}
|
1212
|
+
|
1181
1213
|
a <- as.numeric(pseudocounts)
|
1182
1214
|
p <- (x.cnt + a) / sum(x.cnt + a)
|
1183
1215
|
q <- (y.cnt + a) / sum(y.cnt + a)
|
1184
1216
|
d <- NA
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1217
|
+
|
1218
|
+
if (i.meth %in% c(1L, 2L)) {
|
1219
|
+
d <- sqrt(sum((sqrt(p) - sqrt(q))**2)) / sqrt(2)
|
1220
|
+
if(i.meth == 2L) d <- 1 - d**2
|
1221
|
+
} else if (i.meth %in% c(3L, 4L)) {
|
1222
|
+
sel <- p > 0
|
1223
|
+
if (any(q[sel] == 0))
|
1191
1224
|
stop("Undefined distance without absolute continuity, use pseudocounts")
|
1192
|
-
d <- -sum(p[sel]*log(q[sel]/p[sel]))
|
1193
|
-
}else if(i.meth == 5L){
|
1194
|
-
d <- sqrt(sum((q-p)**2))
|
1225
|
+
d <- -sum(p[sel] * log(q[sel] / p[sel]))
|
1226
|
+
} else if (i.meth == 5L) {
|
1227
|
+
d <- sqrt(sum((q - p)**2))
|
1195
1228
|
}
|
1196
1229
|
return(d)
|
1197
1230
|
}
|
@@ -1219,19 +1252,16 @@ enve.recplot2.compareIdentities <- function
|
|
1219
1252
|
#'
|
1220
1253
|
#' @export
|
1221
1254
|
|
1222
|
-
enve.recplot2.coordinates <- function
|
1223
|
-
(x,
|
1224
|
-
bins
|
1225
|
-
){
|
1226
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1255
|
+
enve.recplot2.coordinates <- function(x, bins) {
|
1256
|
+
if (!inherits(x, "enve.RecPlot2"))
|
1227
1257
|
stop("'x' must inherit from class `enve.RecPlot2`")
|
1228
|
-
if(missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
|
1229
|
-
if(!is.vector(bins)) stop("'bins' must be a vector")
|
1230
|
-
if(inherits(bins, "logical")) bins <- which(bins)
|
1258
|
+
if (missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
|
1259
|
+
if (!is.vector(bins)) stop("'bins' must be a vector")
|
1260
|
+
if (inherits(bins, "logical")) bins <- which(bins)
|
1231
1261
|
|
1232
1262
|
y <- data.frame(stringsAsFactors = FALSE, row.names = bins)
|
1233
1263
|
|
1234
|
-
for(i in 1:length(bins)){
|
1264
|
+
for (i in 1:length(bins)) {
|
1235
1265
|
j <- bins[i]
|
1236
1266
|
# Concatenated coordinates
|
1237
1267
|
cc <- x$pos.breaks[c(j, j+1)]
|
@@ -1243,7 +1273,7 @@ enve.recplot2.coordinates <- function
|
|
1243
1273
|
cc[2] > x$seq.breaks[-length(x$seq.breaks)] &
|
1244
1274
|
cc[2] <= x$seq.breaks[-1])
|
1245
1275
|
# Translate coordinates
|
1246
|
-
if(length(sb.from)==1 & length(sb.to)==1){
|
1276
|
+
if (length(sb.from) == 1 & length(sb.to) == 1) {
|
1247
1277
|
y[i, 'name.from'] <- x$seq.names[sb.from]
|
1248
1278
|
y[i, 'pos.from'] <- floor(x$seq.breaks[sb.from] + cc[1] - 1)
|
1249
1279
|
y[i, 'name.to'] <- x$seq.names[sb.to]
|
@@ -1276,23 +1306,18 @@ enve.recplot2.coordinates <- function
|
|
1276
1306
|
#'
|
1277
1307
|
#' @export
|
1278
1308
|
|
1279
|
-
enve.recplot2.seqdepth <- function
|
1280
|
-
|
1281
|
-
(x,
|
1282
|
-
sel,
|
1283
|
-
low.identity=FALSE
|
1284
|
-
){
|
1285
|
-
if(!inherits(x, "enve.RecPlot2"))
|
1309
|
+
enve.recplot2.seqdepth <- function(x, sel, low.identity = FALSE) {
|
1310
|
+
if (!inherits(x, "enve.RecPlot2"))
|
1286
1311
|
stop("'x' must inherit from class `enve.RecPlot2`")
|
1287
|
-
if(low.identity){
|
1312
|
+
if (low.identity) {
|
1288
1313
|
pos.cnts.in <- x$pos.counts.out
|
1289
|
-
}else{
|
1314
|
+
} else {
|
1290
1315
|
pos.cnts.in <- x$pos.counts.in
|
1291
1316
|
}
|
1292
1317
|
pos.breaks <- x$pos.breaks
|
1293
1318
|
pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
|
1294
1319
|
seqdepth.in <- pos.cnts.in/pos.binsize
|
1295
|
-
if(missing(sel)) return(seqdepth.in)
|
1320
|
+
if (missing(sel)) return(seqdepth.in)
|
1296
1321
|
return(seqdepth.in[sel])
|
1297
1322
|
}
|
1298
1323
|
|
@@ -1307,8 +1332,10 @@ enve.recplot2.seqdepth <- function
|
|
1307
1332
|
#' Range of identities to be considered. By default, the full range
|
1308
1333
|
#' is used (note that the upper boundary is \code{Inf} and not 100 because
|
1309
1334
|
#' recruitment plots can also be built with bit-scores). To use only
|
1310
|
-
#' intra-population matches (with identities), use c(95,100). To use
|
1311
|
-
#' inter-population values, use c(0,95).
|
1335
|
+
#' intra-population matches (with identities), use \code{c(95, 100)}. To use
|
1336
|
+
#' only inter-population values, use \code{c(0, 95)}.
|
1337
|
+
#'
|
1338
|
+
#' @return A numeric value indicating the ANIr (as percentage).
|
1312
1339
|
#'
|
1313
1340
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1314
1341
|
#'
|
@@ -1338,43 +1365,49 @@ enve.recplot2.ANIr <- function
|
|
1338
1365
|
#' @param pos.breaks Position breaks
|
1339
1366
|
#' @param id.breaks Identity breaks
|
1340
1367
|
#' @param rec.idcol Identity column to use
|
1368
|
+
#'
|
1369
|
+
#' @return 2-dimensional matrix of counts per identity and position bins.
|
1341
1370
|
#'
|
1342
1371
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1343
1372
|
#' @author Kenji Gerhardt [aut]
|
1344
1373
|
#'
|
1345
1374
|
#' @export
|
1346
1375
|
|
1347
|
-
enve.recplot2.__counts <- function
|
1348
|
-
(x, pos.breaks, id.breaks, rec.idcol) {
|
1376
|
+
enve.recplot2.__counts <- function(x, pos.breaks, id.breaks, rec.idcol) {
|
1349
1377
|
rec2 <- x$rec
|
1350
1378
|
verbose <- x$verbose
|
1351
1379
|
|
1352
1380
|
# get counts of how many occurrences of each genome pos.bin there are per read
|
1353
1381
|
x.bins <- mapply(
|
1354
1382
|
function(start, end) {
|
1355
|
-
list(rle(findInterval(start:end, pos.breaks, left.open =
|
1356
|
-
},
|
1383
|
+
list(rle(findInterval(start:end, pos.breaks, left.open = TRUE)))
|
1384
|
+
},
|
1385
|
+
rec2[, 1], rec2[, 2]
|
1386
|
+
)
|
1357
1387
|
|
1358
1388
|
# find the single y bin for each row, replicates it at the correct places to
|
1359
1389
|
# the number of distinct bins found in its row
|
1360
|
-
y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open =
|
1390
|
+
y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = TRUE),
|
1361
1391
|
times = unlist(lapply(x.bins, function(a) length(a$lengths))))
|
1362
1392
|
|
1363
1393
|
# x.bins_counts is the number of occurrences of each bin a row contains,
|
1364
1394
|
# per row, then unlisted
|
1365
1395
|
x.bins_counts <- unlist(lapply(x.bins, function(a) a$lengths))
|
1366
1396
|
|
1367
|
-
# these are the pos. in. genome bins that each count in x.bins_counts falls
|
1397
|
+
# these are the pos. in. genome bins that each count in x.bins_counts falls
|
1398
|
+
# into
|
1368
1399
|
x.bins <- unlist(lapply(x.bins, function(a) a$values))
|
1369
1400
|
|
1370
|
-
# much more efficient counts implementation in R using lists instead of a
|
1401
|
+
# much more efficient counts implementation in R using lists instead of a
|
1402
|
+
# matrix:
|
1371
1403
|
counts <- lapply(
|
1372
1404
|
1:(length(pos.breaks) - 1),
|
1373
|
-
function(col_len) rep(0, length(id.breaks) - 1)
|
1405
|
+
function(col_len) rep(0, length(id.breaks) - 1)
|
1406
|
+
)
|
1374
1407
|
|
1375
1408
|
# accesses the correct list in counts by x.bin, then
|
1376
1409
|
# accesses the position in that row by y.bins and adds the new count
|
1377
|
-
for(i in 1:length(x.bins)){
|
1410
|
+
for (i in 1:length(x.bins)) {
|
1378
1411
|
counts[[x.bins[i]]][y.bins[i]] <- counts[[x.bins[i]]][y.bins[i]] + x.bins_counts[i]
|
1379
1412
|
}
|
1380
1413
|
|
@@ -1384,32 +1417,38 @@ enve.recplot2.__counts <- function
|
|
1384
1417
|
|
1385
1418
|
#' Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function
|
1386
1419
|
#'
|
1387
|
-
#' Internal ancillary function (see
|
1420
|
+
#' Internal ancillary function (see
|
1421
|
+
#' \code{\link{enve.recplot2.findPeaks.emauto}}).
|
1388
1422
|
#'
|
1389
|
-
#' @param x \code{\link{enve.RecPlot2}} object
|
1390
|
-
#' @param comp Components
|
1391
|
-
#' @param do_crit Function estimating the criterion
|
1392
|
-
#' @param best Best solution thus far
|
1393
|
-
#' @param verbose If verbose
|
1394
|
-
#' @param ...
|
1423
|
+
#' @param x \code{\link{enve.RecPlot2}} object.
|
1424
|
+
#' @param comp Components.
|
1425
|
+
#' @param do_crit Function estimating the criterion.
|
1426
|
+
#' @param best Best solution thus far.
|
1427
|
+
#' @param verbose If verbose.
|
1428
|
+
#' @param ...
|
1429
|
+
#' Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}.
|
1430
|
+
#'
|
1431
|
+
#' @return Updated solution with the same structure as \code{best}.
|
1395
1432
|
#'
|
1396
1433
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1397
1434
|
#'
|
1398
1435
|
#' @export
|
1399
1436
|
|
1400
|
-
enve.recplot2.findPeaks.__emauto_one <- function
|
1401
|
-
|
1402
|
-
|
1403
|
-
|
1404
|
-
|
1437
|
+
enve.recplot2.findPeaks.__emauto_one <- function(
|
1438
|
+
x, comp, do_crit, best, verbose, ...
|
1439
|
+
) {
|
1440
|
+
peaks <- enve.recplot2.findPeaks.em(x = x, components = comp, ...)
|
1441
|
+
if (length(peaks) == 0) return(best)
|
1442
|
+
k <- comp * 3 - 1 # mean & sd for each component, and n-1 free alpha params
|
1405
1443
|
crit <- do_crit(peaks[[1]]$err.res, k, peaks[[1]]$n.total)
|
1406
|
-
if(verbose)
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
best[[
|
1444
|
+
if(verbose)
|
1445
|
+
cat(comp, "\t| LL =", peaks[[1]]$err.res, "\t| Estimate =", crit,
|
1446
|
+
ifelse(crit > best[["crit"]], "*", ""), "\n")
|
1447
|
+
if(crit > best[["crit"]]){
|
1448
|
+
best[["crit"]] <- crit
|
1449
|
+
best[["peaks"]] <- peaks
|
1411
1450
|
}
|
1412
|
-
best[[
|
1451
|
+
best[["pstore"]][[comp]] <- peaks
|
1413
1452
|
return(best)
|
1414
1453
|
}
|
1415
1454
|
|
@@ -1419,13 +1458,15 @@ enve.recplot2.findPeaks.__emauto_one <- function
|
|
1419
1458
|
#'
|
1420
1459
|
#' @param x Vector of log-transformed sequencing depths
|
1421
1460
|
#' @param theta Parameters list
|
1461
|
+
#'
|
1462
|
+
#' @return A list with components \code{ll} (numeric) the log-likelihood, and
|
1463
|
+
#' \code{posterior} (numeric) the posterior probability.
|
1422
1464
|
#'
|
1423
1465
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1424
1466
|
#'
|
1425
1467
|
#' @export
|
1426
1468
|
|
1427
|
-
enve.recplot2.findPeaks.__em_e <- function
|
1428
|
-
(x, theta){
|
1469
|
+
enve.recplot2.findPeaks.__em_e <- function(x, theta) {
|
1429
1470
|
components <- length(theta[['mu']])
|
1430
1471
|
product <- do.call(cbind,
|
1431
1472
|
lapply(1:components,
|
@@ -1437,7 +1478,7 @@ enve.recplot2.findPeaks.__em_e <- function
|
|
1437
1478
|
cat(i,'/',nrow(product), ':', product[i,], '\n')
|
1438
1479
|
}
|
1439
1480
|
|
1440
|
-
return(list(ll=sum(log(sum.of.components)), posterior=posterior))
|
1481
|
+
return(list(ll = sum(log(sum.of.components)), posterior = posterior))
|
1441
1482
|
}
|
1442
1483
|
|
1443
1484
|
#' Enveomics: Recruitment Plot (2) Em Peak Finder - Internal Ancillary Function Maximization
|
@@ -1446,20 +1487,23 @@ enve.recplot2.findPeaks.__em_e <- function
|
|
1446
1487
|
#'
|
1447
1488
|
#' @param x Vector of log-transformed sequencing depths
|
1448
1489
|
#' @param posterior Posterior probability
|
1490
|
+
#'
|
1491
|
+
#' @return A list with components \code{mu} (numeric) the estimated mean,
|
1492
|
+
#' \code{sd} (numeric) the estimated standard deviation, and \code{alpha}
|
1493
|
+
#' (numeric) the estimated alpha parameter.
|
1449
1494
|
#'
|
1450
1495
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1451
1496
|
#'
|
1452
1497
|
#' @export
|
1453
1498
|
|
1454
|
-
enve.recplot2.findPeaks.__em_m <- function
|
1455
|
-
(x, posterior){
|
1499
|
+
enve.recplot2.findPeaks.__em_m <- function(x, posterior) {
|
1456
1500
|
components <- ncol(posterior)
|
1457
1501
|
n <- colSums(posterior)
|
1458
1502
|
mu <- colSums(posterior * x) / n
|
1459
1503
|
sd <- sqrt( colSums(
|
1460
1504
|
posterior * (matrix(rep(x,components), ncol=components) - mu)^2) / n )
|
1461
1505
|
alpha <- n/length(x)
|
1462
|
-
return(list(mu=mu, sd=sd, alpha=alpha))
|
1506
|
+
return(list(mu = mu, sd = sd, alpha = alpha))
|
1463
1507
|
}
|
1464
1508
|
|
1465
1509
|
#' Enveomics: Recruitment Plot (2) Peak S4 Class - Internal Ancillary Function
|
@@ -1469,29 +1513,31 @@ enve.recplot2.findPeaks.__em_m <- function
|
|
1469
1513
|
#' @param x \code{\link{enve.RecPlot2.Peak}} object
|
1470
1514
|
#' @param mids Midpoints
|
1471
1515
|
#' @param counts Counts
|
1516
|
+
#'
|
1517
|
+
#' @return A numeric vector of counts (histogram)
|
1472
1518
|
#'
|
1473
1519
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1474
1520
|
#'
|
1475
1521
|
#' @export
|
1476
1522
|
|
1477
|
-
enve.recplot2.__peakHist <- function
|
1478
|
-
(x, mids, counts=TRUE){
|
1523
|
+
enve.recplot2.__peakHist <- function(x, mids, counts = TRUE){
|
1479
1524
|
d.o <- x$param.hat
|
1480
|
-
if(length(x$log)==0) x$log <- FALSE
|
1481
|
-
if(x$log){
|
1525
|
+
if (length(x$log) == 0) x$log <- FALSE
|
1526
|
+
if (x$log) {
|
1482
1527
|
d.o$x <- log(mids)
|
1483
|
-
}else{
|
1528
|
+
} else {
|
1484
1529
|
d.o$x <- mids
|
1485
1530
|
}
|
1486
|
-
prob <- do.call(paste('d', x$dist, sep=
|
1531
|
+
prob <- do.call(paste('d', x$dist, sep = ""), d.o)
|
1487
1532
|
if(!counts) return(prob)
|
1488
1533
|
if(length(x$values)>0) return(prob*length(x$values)/sum(prob))
|
1489
|
-
return(prob*x$n.hat/sum(prob))
|
1534
|
+
return(prob * x$n.hat / sum(prob))
|
1490
1535
|
}
|
1491
1536
|
|
1492
1537
|
#' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1
|
1493
1538
|
#'
|
1494
|
-
#'
|
1539
|
+
#' Internal ancillary function (see
|
1540
|
+
#' \code{\link{enve.recplot2.findPeaks.mower}}).
|
1495
1541
|
#'
|
1496
1542
|
#' @param lsd1 Vector of log-transformed sequencing depths
|
1497
1543
|
#' @param min.points Minimum number of points
|
@@ -1505,85 +1551,100 @@ enve.recplot2.__peakHist <- function
|
|
1505
1551
|
#' @param merge.logdist Attempted \code{merge.logdist} parameter
|
1506
1552
|
#' @param verbose If verbose
|
1507
1553
|
#' @param log If log-transformed depths
|
1554
|
+
#'
|
1555
|
+
#' @return Return an \code{enve.RecPlot2.Peak} object.
|
1508
1556
|
#'
|
1509
1557
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1510
1558
|
#'
|
1511
1559
|
#' @export
|
1512
1560
|
|
1513
|
-
enve.recplot2.findPeaks.__mow_one <- function
|
1514
|
-
|
1515
|
-
|
1516
|
-
){
|
1517
|
-
dist <- ifelse(with.skewness,
|
1561
|
+
enve.recplot2.findPeaks.__mow_one <- function(
|
1562
|
+
lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
|
1563
|
+
optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
|
1564
|
+
) {
|
1565
|
+
dist <- ifelse(with.skewness, "sn", "norm")
|
1518
1566
|
|
1519
1567
|
# Find peak
|
1520
|
-
o <- mlv.opts
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1568
|
+
o <- mlv.opts
|
1569
|
+
o$x <- lsd1
|
1570
|
+
mode1 <- median(lsd1) # mode1 <- do.call(mlv, o)$M;
|
1571
|
+
if (verbose) cat("Anchoring at mode =", mode1, "\n")
|
1572
|
+
param.hat <- fitdist.opts$start
|
1573
|
+
last.hat <- param.hat
|
1574
|
+
lim <- NA
|
1575
|
+
if (with.skewness) { param.hat$xi <- mode1 } else { param.hat$mean <- mode1 }
|
1526
1576
|
|
1527
1577
|
# Refine peak parameters
|
1528
|
-
for(round in 1:optim.rounds){
|
1529
|
-
param.hat[[ 1 ]] <- param.hat[[
|
1578
|
+
for (round in 1:optim.rounds) {
|
1579
|
+
param.hat[[ 1 ]] <- param.hat[[1]] / diff(quant.est) # <- expand dispersion
|
1530
1580
|
lim.o <- param.hat
|
1531
|
-
lim.o$p <- quant.est
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1581
|
+
lim.o$p <- quant.est
|
1582
|
+
lim <- do.call(paste("q", dist, sep = ""), lim.o)
|
1583
|
+
lsd1.pop <- lsd1[(lsd1 > lim[1]) & (lsd1 < lim[2])]
|
1584
|
+
if (verbose)
|
1585
|
+
cat(" Round", round, "with n =", length(lsd1.pop),
|
1586
|
+
"and params =", as.numeric(param.hat), " \r")
|
1587
|
+
if (length(lsd1.pop) < min.points) break
|
1588
|
+
o <- fitdist.opts
|
1589
|
+
o$data <- lsd1.pop
|
1590
|
+
o$start <- param.hat
|
1537
1591
|
last.last.hat <- last.hat
|
1538
1592
|
last.hat <- param.hat
|
1539
|
-
param.hat <- as.list(do.call(fitdist, o)$estimate)
|
1540
|
-
if(any(is.na(param.hat))){
|
1541
|
-
if(round>1) param.hat <- last.hat
|
1542
|
-
break
|
1593
|
+
param.hat <- as.list(do.call(fitdist, o)$estimate)
|
1594
|
+
if (any(is.na(param.hat))) {
|
1595
|
+
if (round > 1) param.hat <- last.hat
|
1596
|
+
break
|
1543
1597
|
}
|
1544
|
-
if(round > 1){
|
1545
|
-
epsilon1 <- sum((as.numeric(last.hat)-as.numeric(param.hat))^2)
|
1546
|
-
if(epsilon1 < optim.epsilon) break
|
1547
|
-
if(round > 2){
|
1548
|
-
epsilon2 <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
|
1549
|
-
if(epsilon2 < optim.epsilon) break
|
1598
|
+
if (round > 1) {
|
1599
|
+
epsilon1 <- sum((as.numeric(last.hat) - as.numeric(param.hat))^2)
|
1600
|
+
if (epsilon1 < optim.epsilon) break
|
1601
|
+
if (round > 2) {
|
1602
|
+
epsilon2 <- sum((as.numeric(last.last.hat) - as.numeric(param.hat))^2)
|
1603
|
+
if (epsilon2 < optim.epsilon) break
|
1550
1604
|
}
|
1551
1605
|
}
|
1552
1606
|
}
|
1553
|
-
if(verbose) cat(
|
1554
|
-
if(is.na(param.hat[1]) | is.na(lim[1])) return(NULL)
|
1607
|
+
if (verbose) cat("\n")
|
1608
|
+
if (is.na(param.hat[1]) | is.na(lim[1])) return(NULL)
|
1555
1609
|
|
1556
1610
|
# Mow distribution
|
1557
|
-
lsd2 <- c()
|
1558
|
-
lsd.pop <- c()
|
1559
|
-
n.hat <- length(lsd1.pop)/diff(quant.est)
|
1560
|
-
peak <- new(
|
1561
|
-
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1611
|
+
lsd2 <- c()
|
1612
|
+
lsd.pop <- c()
|
1613
|
+
n.hat <- length(lsd1.pop) / diff(quant.est)
|
1614
|
+
peak <- new(
|
1615
|
+
"enve.RecPlot2.Peak", dist = dist, values = as.numeric(), mode = mode1,
|
1616
|
+
param.hat = param.hat, n.hat = n.hat, n.total = n.total,
|
1617
|
+
merge.logdist = merge.logdist, log = log
|
1618
|
+
)
|
1619
|
+
peak.breaks <- seq(min(lsd1), max(lsd1), length = 20)
|
1620
|
+
peak.cnt <- enve.recplot2.__peakHist(
|
1621
|
+
peak, (peak.breaks[-length(peak.breaks)] + peak.breaks[-1]) / 2
|
1622
|
+
)
|
1623
|
+
for (i in 2:length(peak.breaks)) {
|
1624
|
+
values <- lsd1[(lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i])]
|
1625
|
+
n.exp <- peak.cnt[i - 1]
|
1626
|
+
if (is.na(n.exp) | n.exp == 0) n.exp <- 0.1
|
1627
|
+
if (length(values) == 0) next
|
1628
|
+
in.peak <- runif(length(values)) <= n.exp / length(values)
|
1572
1629
|
lsd2 <- c(lsd2, values[!in.peak])
|
1573
1630
|
lsd.pop <- c(lsd.pop, values[in.peak])
|
1574
1631
|
}
|
1575
|
-
if(length(lsd.pop) < min.points) return(NULL)
|
1632
|
+
if (length(lsd.pop) < min.points) return(NULL)
|
1576
1633
|
|
1577
1634
|
# Return peak
|
1578
|
-
attr(peak,
|
1579
|
-
attr(peak,
|
1580
|
-
attr(peak,
|
1581
|
-
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1586
|
-
|
1635
|
+
attr(peak, "values") <- lsd.pop
|
1636
|
+
attr(peak, "values.res") <- lsd2
|
1637
|
+
attr(peak, "err.res") <- 1 - 0.5 * (
|
1638
|
+
cor(
|
1639
|
+
hist(lsd.pop, breaks = peak.breaks, plot = FALSE)$counts,
|
1640
|
+
hist(lsd1, breaks = peak.breaks, plot = FALSE)$counts
|
1641
|
+
) + 1
|
1642
|
+
)
|
1643
|
+
mu <- tail(param.hat, n = 1)
|
1644
|
+
attr(peak, "seq.depth") <- ifelse(log, exp(mu), mu)
|
1645
|
+
if(verbose)
|
1646
|
+
cat(" Extracted peak with n =", length(lsd.pop),
|
1647
|
+
"with expected n =", n.hat, "\n")
|
1587
1648
|
return(peak)
|
1588
1649
|
}
|
1589
1650
|
|
@@ -1593,17 +1654,18 @@ enve.recplot2.findPeaks.__mow_one <- function
|
|
1593
1654
|
#'
|
1594
1655
|
#' @param peaks.opts List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}
|
1595
1656
|
#'
|
1657
|
+
#' @return A list of \code{enve.RecPlot2.Peak} objects.
|
1658
|
+
#'
|
1596
1659
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1597
1660
|
#'
|
1598
1661
|
#' @export
|
1599
1662
|
|
1600
|
-
enve.recplot2.findPeaks.__mower <- function
|
1601
|
-
(peaks.opts){
|
1663
|
+
enve.recplot2.findPeaks.__mower <- function(peaks.opts) {
|
1602
1664
|
peaks <- list()
|
1603
|
-
while(length(peaks.opts$lsd1) > peaks.opts$min.points){
|
1665
|
+
while (length(peaks.opts$lsd1) > peaks.opts$min.points) {
|
1604
1666
|
peak <- do.call(enve.recplot2.findPeaks.__mow_one, peaks.opts)
|
1605
|
-
if(is.null(peak)) break
|
1606
|
-
peaks[[
|
1667
|
+
if (is.null(peak)) break
|
1668
|
+
peaks[[length(peaks) + 1]] <- peak
|
1607
1669
|
peaks.opts$lsd1 <- peak$values.res
|
1608
1670
|
}
|
1609
1671
|
return(peaks)
|
@@ -1616,16 +1678,21 @@ enve.recplot2.findPeaks.__mower <- function
|
|
1616
1678
|
#' @param peak Query \code{\link{enve.RecPlot2.Peak}} object
|
1617
1679
|
#' @param peaks list of \code{\link{enve.RecPlot2.Peak}} objects
|
1618
1680
|
#'
|
1681
|
+
#' @return A numeric index out of \code{peaks}.
|
1682
|
+
#'
|
1619
1683
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
1620
1684
|
#'
|
1621
1685
|
#' @export
|
1622
1686
|
|
1623
|
-
enve.recplot2.__whichClosestPeak <- function
|
1624
|
-
(
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1629
|
-
|
1687
|
+
enve.recplot2.__whichClosestPeak <- function(peak, peaks){
|
1688
|
+
dist <- as.numeric(
|
1689
|
+
lapply(
|
1690
|
+
peaks,
|
1691
|
+
function(x)
|
1692
|
+
abs(log(x$param.hat[[length(x$param.hat)]] /
|
1693
|
+
peak$param.hat[[length(peak$param.hat)]]))
|
1694
|
+
)
|
1695
|
+
)
|
1696
|
+
dist[dist == 0] <- Inf
|
1630
1697
|
return(which.min(dist))
|
1631
1698
|
}
|