miga-base 1.2.17.1 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  4. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  5. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  6. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  7. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  8. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  9. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  10. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  11. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  12. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  13. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  14. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  15. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  16. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  17. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  18. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  19. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  21. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  22. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  23. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  24. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  25. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  26. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  27. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  28. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  30. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  31. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  32. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  48. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  50. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  51. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  52. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  53. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  54. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  55. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  56. metadata +3 -37
  57. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  58. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  59. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  62. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  63. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  66. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  67. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  68. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  69. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  70. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  71. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  72. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  73. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  74. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  75. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  76. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  77. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  78. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  79. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  80. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  81. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  85. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  86. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  87. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  88. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  89. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  90. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  91. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -6,8 +6,10 @@
6
6
  #' be produced by \code{\link{enve.recplot2}} and supports S4 method plot.
7
7
  #'
8
8
  #' @slot counts \code{(matrix)} Counts as a two-dimensional histogram.
9
- #' @slot pos.counts.in \code{(numeric)} Counts of in-group hits per position bin.
10
- #' @slot pos.counts.out \code{(numeric)} Counts of out-group hits per position bin.
9
+ #' @slot pos.counts.in
10
+ #' \code{(numeric)} Counts of in-group hits per position bin.
11
+ #' @slot pos.counts.out
12
+ #' \code{(numeric)} Counts of out-group hits per position bin.
11
13
  #' @slot id.counts \code{(numeric)} Counts per ID bin.
12
14
  #' @slot id.breaks \code{(numeric)} Breaks of identity bins.
13
15
  #' @slot pos.breaks \code{(numeric)} Breaks of position bins.
@@ -208,42 +210,50 @@ setMethod("$", "enve.RecPlot2.Peak", function(x, name) attr(x, name))
208
210
  #' @export
209
211
 
210
212
  plot.enve.RecPlot2 <- function
211
- (x,
212
- layout=matrix(c(5,5,2,1,4,3), nrow=2),
213
- panel.fun=list(),
214
- widths=c(1,7,2),
215
- heights=c(1,2),
216
- palette=grey((100:0)/100),
217
- underlay.group=TRUE,
218
- peaks.col='darkred',
219
- use.peaks,
220
- id.lim=range(x$id.breaks),
221
- pos.lim=range(x$pos.breaks),
222
- pos.units=c('Mbp','Kbp','bp'),
223
- mar=list('1'=c(5,4,1,1)+.1, '2'=c(ifelse(any(layout==1),1,5),4,4,1)+.1,
224
- '3'=c(5,ifelse(any(layout==1),1,4),1,2)+0.1,
225
- '4'=c(ifelse(any(layout==1),1,5),ifelse(any(layout==2),1,4),4,2)+0.1,
226
- '5'=c(5,3,4,1)+0.1, '6'=c(5,4,4,2)+0.1),
227
- pos.splines=0,
228
- id.splines=1/2,
229
- in.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
230
- out.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
231
- id.lwd=ifelse(is.null(id.splines) || id.splines>0, 1/2, 2),
232
- in.col='darkblue',
233
- out.col='lightblue',
234
- id.col='black',
235
- breaks.col='#AAAAAA40',
236
- peaks.opts=list(),
237
- ...
238
- ){
239
- pos.units <- match.arg(pos.units);
240
- pos.factor <- ifelse(pos.units=='bp',1,ifelse(pos.units=='Kbp',1e3,1e6));
241
- pos.lim <- pos.lim/pos.factor;
242
- lmat <- layout;
243
- for(i in 1:6) if(!any(layout==i)) lmat[layout>i] <- lmat[layout>i]-1;
244
-
245
- layout(lmat, widths=widths, heights=heights);
246
- ori.mar <- par('mar');
213
+ (
214
+ x,
215
+ layout = matrix(c(5, 5, 2, 1, 4, 3), nrow = 2),
216
+ panel.fun = list(),
217
+ widths = c(1, 7, 2),
218
+ heights = c(1, 2),
219
+ palette = grey((100:0) / 100),
220
+ underlay.group = TRUE,
221
+ peaks.col = "darkred",
222
+ use.peaks,
223
+ id.lim = range(x$id.breaks),
224
+ pos.lim = range(x$pos.breaks),
225
+ pos.units = c("Mbp", "Kbp", "bp"),
226
+ mar = list(
227
+ "1" = c(5, 4, 1, 1) + 0.1,
228
+ "2" = c(ifelse(any(layout == 1), 1, 5), 4, 4, 1) + 0.1,
229
+ "3" = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1,
230
+ "4" = c(ifelse(any(layout == 1), 1, 5),
231
+ ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1,
232
+ "5" = c(5, 3, 4, 1) + 0.1,
233
+ "6" = c(5, 4, 4, 2) + 0.1
234
+ ),
235
+ pos.splines = 0,
236
+ id.splines = 1/2,
237
+ in.lwd = ifelse(is.null(pos.splines) || pos.splines > 0, 1/2, 2),
238
+ out.lwd = ifelse(is.null(pos.splines) || pos.splines > 0, 1/2, 2),
239
+ id.lwd = ifelse(is.null(id.splines) || id.splines > 0, 1/2, 2),
240
+ in.col = "darkblue",
241
+ out.col = "lightblue",
242
+ id.col = "black",
243
+ breaks.col = "#AAAAAA40",
244
+ peaks.opts = list(),
245
+ ...
246
+ ) {
247
+ pos.units <- match.arg(pos.units)
248
+ pos.factor <- ifelse(pos.units == "bp", 1,
249
+ ifelse(pos.units == "Kbp", 1e3, 1e6))
250
+ pos.lim <- pos.lim / pos.factor
251
+ lmat <- layout
252
+ for (i in 1:6) if (!any(layout == i)) lmat[layout > i] <- lmat[layout > i] - 1
253
+
254
+ layout(lmat, widths = widths, heights = heights)
255
+ ori.mar <- par("mar")
256
+ on.exit(par(ori.mar))
247
257
 
248
258
  # Essential vars
249
259
  counts <- x$counts
@@ -273,8 +283,8 @@ plot.enve.RecPlot2 <- function
273
283
  }
274
284
 
275
285
  # [1] Counts matrix
276
- if(any(layout==1)){
277
- par(mar=mar[['1']]);
286
+ if (any(layout==1)) {
287
+ par(mar = mar[["1"]]) # par(mar) already being watched by on.exit
278
288
  plot(1, t='n', bty='l',
279
289
  xlim=pos.lim, xlab=paste('Position in genome (',pos.units,')',sep=''),
280
290
  xaxs='i', ylim=id.lim, ylab=x$id.metric, yaxs='i');
@@ -288,18 +298,18 @@ plot.enve.RecPlot2 <- function
288
298
  image(x=pos.breaks, y=id.breaks, z=log10(counts),col=palette,
289
299
  bg=grey(1,0), breaks=seq(-.1,log10(max(counts)),
290
300
  length.out=1+length(palette)), add=TRUE);
291
- if(exists('1',panel.fun)) panel.fun[['1']]();
301
+ if(exists("1", panel.fun)) panel.fun[["1"]]()
292
302
  }
293
303
 
294
304
  # [2] Position histogram
295
- if(any(layout==2)){
296
- par(mar=mar[['2']]);
297
- if(any(layout==1)){
298
- xlab=''
299
- xaxt='n'
300
- }else{
301
- xlab=paste('Position in genome (',pos.units,')',sep='')
302
- xaxt='s'
305
+ if (any(layout == 2)) {
306
+ par(mar = mar[["2"]]) # par(mar) already being watched by on.exit
307
+ if (any(layout == 1)) {
308
+ xlab <- ""
309
+ xaxt <- "n"
310
+ } else {
311
+ xlab <- paste("Position in genome (", pos.units, ")", sep = "")
312
+ xaxt <- "s"
303
313
  }
304
314
  plot(1,t='n', bty='l', log='y',
305
315
  xlim=pos.lim, xlab=xlab, xaxt=xaxt, xaxs='i',
@@ -309,31 +319,33 @@ plot.enve.RecPlot2 <- function
309
319
  pos.f <- rep(seqdepth.in,each=2)
310
320
  lines(pos.x, rep(seqdepth.out,each=2), lwd=out.lwd, col=out.col);
311
321
  lines(pos.x, pos.f, lwd=in.lwd, col=in.col);
312
- if(is.null(pos.splines) || pos.splines > 0){
322
+ if (is.null(pos.splines) || pos.splines > 0) {
313
323
  pos.spline <- smooth.spline(pos.x[pos.f>0], log(pos.f[pos.f>0]),
314
324
  spar=pos.splines)
315
325
  lines(pos.spline$x, exp(pos.spline$y), lwd=2, col=in.col)
316
326
  }
317
- if(any(pos.counts.out==0)) rect(pos.breaks[c(pos.counts.out==0,FALSE)],
318
- seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
319
- seqdepth.lim[1]*3/2, col=out.col, border=NA);
320
- if(any(pos.counts.in==0)) rect(pos.breaks[c(pos.counts.in==0,FALSE)],
321
- seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
322
- seqdepth.lim[1]*3/2, col=in.col, border=NA);
323
- if(exists('2',panel.fun)) panel.fun[['2']]();
327
+ if (any(pos.counts.out==0))
328
+ rect(pos.breaks[c(pos.counts.out==0,FALSE)],
329
+ seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
330
+ seqdepth.lim[1]*3/2, col=out.col, border=NA);
331
+ if (any(pos.counts.in==0))
332
+ rect(pos.breaks[c(pos.counts.in==0,FALSE)],
333
+ seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
334
+ seqdepth.lim[1]*3/2, col=in.col, border=NA);
335
+ if (exists("2", panel.fun)) panel.fun[["2"]]()
324
336
  }
325
337
 
326
338
  # [3] Identity histogram
327
- if(any(layout==3)){
328
- par(mar=mar[['3']]);
329
- if(any(layout==1)){
330
- ylab=''
331
- yaxt='n'
332
- }else{
333
- ylab=x$id.metric
334
- yaxt='s'
339
+ if (any(layout == 3)) {
340
+ par(mar = mar[["3"]]) # par(mar) already being watched by on.exit
341
+ if (any(layout == 1)) {
342
+ ylab <- ""
343
+ yaxt <- "n"
344
+ } else {
345
+ ylab <- x$id.metric
346
+ yaxt <- "s"
335
347
  }
336
- if(sum(id.counts>0) >= 4){
348
+ if (sum(id.counts > 0) >= 4) {
337
349
  id.counts.range <- range(id.counts[id.counts>0])*c(1/2,2);
338
350
  plot(1,t='n', bty='l', log='x',
339
351
  xlim=id.counts.range, xlab='bps per bin', xaxs='i',
@@ -352,23 +364,23 @@ plot.enve.RecPlot2 <- function
352
364
  spar=id.splines)
353
365
  lines(exp(id.spline$y), id.spline$x, lwd=2, col=id.col)
354
366
  }
355
- }else{
367
+ } else {
356
368
  plot(1,t='n',bty='l',xlab='', xaxt='n', ylab='', yaxt='n')
357
369
  text(1,1,labels='Insufficient data', srt=90)
358
370
  }
359
- if(exists('3',panel.fun)) panel.fun[['3']]();
371
+ if (exists("3", panel.fun)) panel.fun[["3"]]()
360
372
  }
361
373
 
362
374
  # [4] Populations histogram
363
375
  peaks <- NA;
364
- if(any(layout==4)){
365
- par(mar=mar[['4']]);
366
- if(any(layout==2)){
367
- ylab=''
368
- yaxt='n'
369
- }else{
370
- ylab='Sequencing depth (X)'
371
- yaxt='s'
376
+ if (any(layout == 4)) {
377
+ par(mar = mar[["4"]]) # par(mar) already being watched by on.exit
378
+ if (any(layout == 2)) {
379
+ ylab <- ""
380
+ yaxt <- "n"
381
+ } else {
382
+ ylab <- "Sequencing depth (X)"
383
+ yaxt <- "s"
372
384
  }
373
385
  h.breaks <- seq(log10(seqdepth.lim[1]*2), log10(seqdepth.lim[2]/2),
374
386
  length.out=200);
@@ -406,44 +418,47 @@ plot.enve.RecPlot2 <- function
406
418
  lapply(peaks,
407
419
  function(x) ifelse(length(x$values)==0, x$n.hat,
408
420
  length(x$values))/x$n.total)), 2)
409
- if(peaks[[1]]$err.res < 0){
410
- err <- paste(', LL:', signif(peaks[[1]]$err.res, 3))
411
- }else{
412
- err <- paste(', err:',
413
- signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2))
421
+ if (peaks[[1]]$err.res < 0) {
422
+ err <- paste(", LL:", signif(peaks[[1]]$err.res, 3))
423
+ } else {
424
+ err <- paste(
425
+ ", err:",
426
+ signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2)
427
+ )
414
428
  }
415
429
  legend('topright', bty='n', cex=1/2,
416
430
  legend=paste(letters[1:length(peaks)],'. ',
417
431
  dpt,'X (', frx, '%', err, ')', sep=''))
418
432
  }
419
433
  }
420
- if(exists('4',panel.fun)) panel.fun[['4']]();
434
+ if (exists("4", panel.fun)) panel.fun[["4"]]()
421
435
  }
422
436
 
423
437
  # [5] Color scale of the counts matrix (vertical)
424
- count.bins <- 10^seq(log10(min(counts[counts>0])), log10(max(counts)),
425
- length.out=1+length(palette))
426
- if(any(layout==5)){
427
- par(mar=mar[['5']]);
438
+ count.bins <- 10^seq(
439
+ log10(min(counts[counts>0])), log10(max(counts)),
440
+ length.out = 1 + length(palette)
441
+ )
442
+ if (any(layout == 5)) {
443
+ par(mar = mar[["5"]]) # par(mar) already being watched by on.exit
428
444
  plot(1,t='n',log='y',xlim=0:1,xaxt='n',xlab='',xaxs='i',
429
445
  ylim=range(count.bins), yaxs='i', ylab='')
430
446
  rect(0,count.bins[-length(count.bins)],1,count.bins[-1],col=palette,
431
447
  border=NA)
432
- if(exists('5',panel.fun)) panel.fun[['5']]();
448
+ if (exists("5", panel.fun)) panel.fun[["5"]]()
433
449
  }
434
450
 
435
451
  # [6] Color scale of the coutnts matrix (horizontal)
436
- if(any(layout==6)){
437
- par(mar=mar[['6']]);
452
+ if (any(layout == 6)) {
453
+ par(mar = mar[["6"]]) # par(mar) already being watched by on.exit
438
454
  plot(1,t='n',log='x',ylim=0:1,yaxt='n',ylab='',yaxs='i',
439
455
  xlim=range(count.bins), xaxs='i',xlab='');
440
456
  rect(count.bins[-length(count.bins)],0,count.bins[-1],1,col=palette,
441
457
  border=NA);
442
- if(exists('6',panel.fun)) panel.fun[['6']]();
458
+ if (exists("6", panel.fun)) panel.fun[["6"]]()
443
459
  }
444
460
 
445
- par(mar=ori.mar);
446
- return(peaks);
461
+ return(peaks)
447
462
  }
448
463
 
449
464
  #==============> Define core functions
@@ -635,7 +650,7 @@ enve.recplot2 <- function(
635
650
  #'
636
651
  #' @author Luis M. Rodriguez-R [aut, cre]
637
652
  #'
638
- #' export
653
+ #' @export
639
654
 
640
655
  enve.recplot2.findPeaks <- function(
641
656
  x,
@@ -955,33 +970,39 @@ enve.recplot2.findPeaks.mower <- function(
955
970
  #' "core genome" of a population.
956
971
  #'
957
972
  #' @param x \code{list} of \code{\link{enve.RecPlot2.Peak}} objects.
973
+ #'
974
+ #' @return A \code{\link{enve.RecPlot2.Peak}} object.
958
975
  #'
959
976
  #' @author Luis M. Rodriguez-R [aut, cre]
960
977
  #'
961
978
  #' @export
962
979
 
963
- enve.recplot2.corePeak <- function
964
- (x
965
- ){
980
+ enve.recplot2.corePeak <- function(x) {
966
981
  # Find the peak with maximum depth (centrality)
967
982
  maxPeak <- x[[
968
- which.max(as.numeric(lapply(x,
969
- function(y) y$param.hat[[ length(y$param.hat) ]])))
970
- ]]
983
+ which.max(
984
+ as.numeric(
985
+ lapply(x, function(y) y$param.hat[[length(y$param.hat)]])
986
+ )
987
+ )
988
+ ]]
971
989
  # If a "larger" peak (a peak explaining more bins of the genome) is within
972
990
  # the default "merge.logdist" distance, take that one instead.
973
991
  corePeak <- maxPeak
974
- for(p in x){
975
- p.len <- ifelse(length(p$values)==0, p$n.hat, length(p$values))
992
+ for (p in x) {
993
+ p.len <- ifelse(length(p$values) == 0, p$n.hat, length(p$values))
976
994
  corePeak.len <- ifelse(
977
- length(corePeak$values)==0, corePeak$n.hat, length(corePeak$values))
978
- sz.d <- log(p.len/corePeak.len)
979
- if(is.nan(sz.d) || sz.d < 0) next
980
- sq.d.a <- as.numeric(tail(p$param.hat, n=1))
981
- sq.d.b <- as.numeric(tail(maxPeak$param.hat, n=1))
982
- if(p$log) sq.d.a <- exp(sq.d.a)
983
- if(corePeak$log) sq.d.b <- exp(sq.d.b)
984
- if(abs(log(sq.d.a/sq.d.b)) < log(1.75)+sz.d/5) corePeak <- p
995
+ length(corePeak$values) == 0,
996
+ corePeak$n.hat,
997
+ length(corePeak$values)
998
+ )
999
+ sz.d <- log(p.len / corePeak.len)
1000
+ if (is.nan(sz.d) || sz.d < 0) next
1001
+ sq.d.a <- as.numeric(tail(p$param.hat, n = 1))
1002
+ sq.d.b <- as.numeric(tail(maxPeak$param.hat, n = 1))
1003
+ if (p$log) sq.d.a <- exp(sq.d.a)
1004
+ if (corePeak$log) sq.d.b <- exp(sq.d.b)
1005
+ if (abs(log(sq.d.a / sq.d.b)) < log(1.75) + sz.d / 5) corePeak <- p
985
1006
  }
986
1007
  return(corePeak)
987
1008
  }
@@ -994,20 +1015,20 @@ enve.recplot2.corePeak <- function
994
1015
  #' \code{\link{enve.RecPlot2}} object.
995
1016
  #' @param new.cutoff
996
1017
  #' New cutoff to use.
1018
+ #'
1019
+ #' @return The modified \code{\link{enve.RecPlot2}} object.
997
1020
  #'
998
1021
  #' @author Luis M. Rodriguez-R [aut, cre]
999
1022
  #'
1000
1023
  #' @export
1001
1024
 
1002
- enve.recplot2.changeCutoff <- function
1003
- (rp,
1004
- new.cutoff=98
1005
- ){
1025
+ enve.recplot2.changeCutoff <- function(rp, new.cutoff = 98) {
1006
1026
  # Re-calculate vectors
1007
- id.mids <- (rp$id.breaks[-length(rp$id.breaks)]+rp$id.breaks[-1])/2
1008
- id.ingroup <- (id.mids > new.cutoff)
1009
- pos.counts.in <- apply(rp$counts[,id.ingroup], 1, sum)
1010
- pos.counts.out <- apply(rp$counts[,!id.ingroup], 1, sum)
1027
+ id.mids <- (rp$id.breaks[-length(rp$id.breaks)] + rp$id.breaks[-1]) / 2
1028
+ id.ingroup <- (id.mids > new.cutoff)
1029
+ pos.counts.in <- apply(rp$counts[, id.ingroup], 1, sum)
1030
+ pos.counts.out <- apply(rp$counts[, !id.ingroup], 1, sum)
1031
+
1011
1032
  # Update object
1012
1033
  attr(rp, "id.ingroup") <- id.ingroup
1013
1034
  attr(rp, "pos.counts.in") <- pos.counts.in
@@ -1024,9 +1045,9 @@ enve.recplot2.changeCutoff <- function
1024
1045
  #' @param rp
1025
1046
  #' Recruitment plot, an \code{\link{enve.RecPlot2}} object.
1026
1047
  #' @param peak
1027
- #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
1028
- #' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
1029
- #' used (see \code{\link{enve.recplot2.corePeak}}).
1048
+ #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
1049
+ #' be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
1050
+ #' peak is used (see \code{\link{enve.recplot2.corePeak}}).
1030
1051
  #' @param lower.tail
1031
1052
  #' If \code{FALSE}, it returns windows significantly above the peak in
1032
1053
  #' sequencing depth.
@@ -1041,16 +1062,16 @@ enve.recplot2.changeCutoff <- function
1041
1062
  #'
1042
1063
  #' @export
1043
1064
 
1044
- enve.recplot2.windowDepthThreshold <- function
1045
- (rp,
1046
- peak,
1047
- lower.tail=TRUE,
1048
- significance=0.05
1049
- ){
1050
- if(is.list(peak)) peak <- enve.recplot2.corePeak(peak)
1065
+ enve.recplot2.windowDepthThreshold <- function(
1066
+ rp,
1067
+ peak,
1068
+ lower.tail = TRUE,
1069
+ significance = 0.05
1070
+ ) {
1071
+ if (is.list(peak)) peak <- enve.recplot2.corePeak(peak)
1051
1072
  par <- peak$param.hat
1052
- par[["p"]] <- ifelse(lower.tail, significance, 1-significance)
1053
- thr <- do.call(ifelse(length(par)==4, qsn, qnorm), par)
1073
+ par[["p"]] <- ifelse(lower.tail, significance, 1 - significance)
1074
+ thr <- do.call(ifelse(length(par) == 4, qsn, qnorm), par)
1054
1075
  if(peak$log) thr <- exp(thr)
1055
1076
 
1056
1077
  return(thr)
@@ -1064,9 +1085,9 @@ enve.recplot2.windowDepthThreshold <- function
1064
1085
  #' @param rp
1065
1086
  #' Recruitment plot, a \code{\link{enve.RecPlot2}} object.
1066
1087
  #' @param peak
1067
- #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
1068
- #' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
1069
- #' used (see \code{\link{enve.recplot2.corePeak}}).
1088
+ #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
1089
+ #' be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
1090
+ #' peak is used (see \code{\link{enve.recplot2.corePeak}}).
1070
1091
  #' @param lower.tail
1071
1092
  #' If \code{FALSE}, it returns windows significantly above the peak in
1072
1093
  #' sequencing depth.
@@ -1089,21 +1110,21 @@ enve.recplot2.windowDepthThreshold <- function
1089
1110
  #'
1090
1111
  #' @export
1091
1112
 
1092
- enve.recplot2.extractWindows <- function
1093
- (rp,
1094
- peak,
1095
- lower.tail = TRUE,
1096
- significance = 0.05,
1097
- seq.names = FALSE
1098
- ){
1113
+ enve.recplot2.extractWindows <- function(
1114
+ rp,
1115
+ peak,
1116
+ lower.tail = TRUE,
1117
+ significance = 0.05,
1118
+ seq.names = FALSE
1119
+ ) {
1099
1120
  # Determine the threshold
1100
1121
  thr <- enve.recplot2.windowDepthThreshold(rp, peak, lower.tail, significance)
1101
1122
 
1102
1123
  # Select windows past the threshold
1103
1124
  seqdepth.in <- enve.recplot2.seqdepth(rp)
1104
- if(lower.tail){
1125
+ if (lower.tail) {
1105
1126
  sel <- seqdepth.in < thr
1106
- }else{
1127
+ } else {
1107
1128
  sel <- seqdepth.in > thr
1108
1129
  }
1109
1130
 
@@ -1117,7 +1138,7 @@ enve.recplot2.extractWindows <- function
1117
1138
  #'
1118
1139
  #' Compare the distribution of identities between two
1119
1140
  #' \code{\link{enve.RecPlot2}} objects.
1120
- #'
1141
+ #'
1121
1142
  #' @param x
1122
1143
  #' First \code{\link{enve.RecPlot2}} object.
1123
1144
  #' @param y
@@ -1126,10 +1147,12 @@ enve.recplot2.extractWindows <- function
1126
1147
  #' Distance method to use. This should be (an unambiguous abbreviation of)
1127
1148
  #' one of:
1128
1149
  #' \itemize{
1129
- #' \item{"hellinger" (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
1130
- #' \item{"bhattacharyya" (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
1131
- #' \item{"kl" or "kullback-leibler" (\emph{Kullback & Leibler, 1951,
1132
- #' doi:10.1214/aoms/1177729694}), or}
1150
+ #' \item{"hellinger"
1151
+ #' (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
1152
+ #' \item{"bhattacharyya"
1153
+ #' (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
1154
+ #' \item{"kl" or "kullback-leibler"
1155
+ #' (\emph{Kullback & Leibler, 1951, doi:10.1214/aoms/1177729694}), or}
1133
1156
  #' \item{"euclidean"}
1134
1157
  #' }
1135
1158
  #' @param smooth.par
@@ -1142,56 +1165,66 @@ enve.recplot2.extractWindows <- function
1142
1165
  #' @param max.deviation
1143
1166
  #' Maximum mean deviation between identity breaks tolerated (as percent
1144
1167
  #' identity). Difference in number of \code{id.breaks} is never tolerated.
1145
- #'
1168
+ #'
1169
+ #' @return A \strong{numeric} indicating the distance between the objects.
1170
+ #'
1146
1171
  #' @author Luis M. Rodriguez-R [aut, cre]
1147
1172
  #'
1148
1173
  #' @export
1149
1174
 
1150
1175
  enve.recplot2.compareIdentities <- function
1151
- (x,
1152
- y,
1153
- method="hellinger",
1154
- smooth.par=NULL,
1155
- pseudocounts=0,
1156
- max.deviation=0.75
1157
- ){
1158
- METHODS <- c("hellinger","bhattacharyya","kullback-leibler","kl","euclidean")
1176
+ (
1177
+ x,
1178
+ y,
1179
+ method = "hellinger",
1180
+ smooth.par = NULL,
1181
+ pseudocounts = 0,
1182
+ max.deviation = 0.75
1183
+ ) {
1184
+ # Sanity checks
1185
+ METHODS <- c(
1186
+ "hellinger", "bhattacharyya", "kullback-leibler", "kl", "euclidean"
1187
+ )
1159
1188
  i.meth <- pmatch(method, METHODS)
1160
1189
  if (is.na(i.meth)) stop("Invalid distance ", method)
1161
- if(!inherits(x, "enve.RecPlot2"))
1190
+ if (!inherits(x, "enve.RecPlot2"))
1162
1191
  stop("'x' must inherit from class `enve.RecPlot2`")
1163
- if(!inherits(y, "enve.RecPlot2"))
1192
+ if (!inherits(y, "enve.RecPlot2"))
1164
1193
  stop("'y' must inherit from class `enve.RecPlot2`")
1165
- if(length(x$id.breaks) != length(y$id.breaks))
1194
+ if (length(x$id.breaks) != length(y$id.breaks))
1166
1195
  stop("'x' and 'y' must have the same number of `id.breaks`")
1167
1196
  dev <- mean(abs(x$id.breaks - y$id.breaks))
1168
- if(dev > max.deviation)
1197
+ if (dev > max.deviation)
1169
1198
  stop("'x' and 'y' must have similar `id.breaks`; exceeding max.deviation: ",
1170
1199
  dev)
1200
+
1201
+ # Initialize
1171
1202
  x.cnt <- x$id.counts
1172
1203
  y.cnt <- y$id.counts
1173
- if(is.null(smooth.par) || smooth.par > 0){
1174
- x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)])/2
1175
- y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)])/2
1176
- p.spline <- smooth.spline(x.mids, x.cnt, spar=smooth.par)
1177
- q.spline <- smooth.spline(y.mids, y.cnt, spar=smooth.par)
1204
+ if (is.null(smooth.par) || smooth.par > 0){
1205
+ x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)]) / 2
1206
+ y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)]) / 2
1207
+ p.spline <- smooth.spline(x.mids, x.cnt, spar = smooth.par)
1208
+ q.spline <- smooth.spline(y.mids, y.cnt, spar = smooth.par)
1178
1209
  x.cnt <- pmax(p.spline$y, 0)
1179
1210
  y.cnt <- pmax(q.spline$y, 0)
1180
1211
  }
1212
+
1181
1213
  a <- as.numeric(pseudocounts)
1182
1214
  p <- (x.cnt + a) / sum(x.cnt + a)
1183
1215
  q <- (y.cnt + a) / sum(y.cnt + a)
1184
1216
  d <- NA
1185
- if(i.meth %in% c(1L, 2L)){
1186
- d <- sqrt(sum((sqrt(p) - sqrt(q))**2))/sqrt(2)
1187
- if(i.meth==2L) d <- 1 - d**2
1188
- }else if(i.meth %in% c(3L, 4L)){
1189
- sel <- p>0
1190
- if(any(q[sel]==0))
1217
+
1218
+ if (i.meth %in% c(1L, 2L)) {
1219
+ d <- sqrt(sum((sqrt(p) - sqrt(q))**2)) / sqrt(2)
1220
+ if(i.meth == 2L) d <- 1 - d**2
1221
+ } else if (i.meth %in% c(3L, 4L)) {
1222
+ sel <- p > 0
1223
+ if (any(q[sel] == 0))
1191
1224
  stop("Undefined distance without absolute continuity, use pseudocounts")
1192
- d <- -sum(p[sel]*log(q[sel]/p[sel]))
1193
- }else if(i.meth == 5L){
1194
- d <- sqrt(sum((q-p)**2))
1225
+ d <- -sum(p[sel] * log(q[sel] / p[sel]))
1226
+ } else if (i.meth == 5L) {
1227
+ d <- sqrt(sum((q - p)**2))
1195
1228
  }
1196
1229
  return(d)
1197
1230
  }
@@ -1219,19 +1252,16 @@ enve.recplot2.compareIdentities <- function
1219
1252
  #'
1220
1253
  #' @export
1221
1254
 
1222
- enve.recplot2.coordinates <- function
1223
- (x,
1224
- bins
1225
- ){
1226
- if(!inherits(x, "enve.RecPlot2"))
1255
+ enve.recplot2.coordinates <- function(x, bins) {
1256
+ if (!inherits(x, "enve.RecPlot2"))
1227
1257
  stop("'x' must inherit from class `enve.RecPlot2`")
1228
- if(missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
1229
- if(!is.vector(bins)) stop("'bins' must be a vector")
1230
- if(inherits(bins, "logical")) bins <- which(bins)
1258
+ if (missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
1259
+ if (!is.vector(bins)) stop("'bins' must be a vector")
1260
+ if (inherits(bins, "logical")) bins <- which(bins)
1231
1261
 
1232
1262
  y <- data.frame(stringsAsFactors = FALSE, row.names = bins)
1233
1263
 
1234
- for(i in 1:length(bins)){
1264
+ for (i in 1:length(bins)) {
1235
1265
  j <- bins[i]
1236
1266
  # Concatenated coordinates
1237
1267
  cc <- x$pos.breaks[c(j, j+1)]
@@ -1243,7 +1273,7 @@ enve.recplot2.coordinates <- function
1243
1273
  cc[2] > x$seq.breaks[-length(x$seq.breaks)] &
1244
1274
  cc[2] <= x$seq.breaks[-1])
1245
1275
  # Translate coordinates
1246
- if(length(sb.from)==1 & length(sb.to)==1){
1276
+ if (length(sb.from) == 1 & length(sb.to) == 1) {
1247
1277
  y[i, 'name.from'] <- x$seq.names[sb.from]
1248
1278
  y[i, 'pos.from'] <- floor(x$seq.breaks[sb.from] + cc[1] - 1)
1249
1279
  y[i, 'name.to'] <- x$seq.names[sb.to]
@@ -1276,23 +1306,18 @@ enve.recplot2.coordinates <- function
1276
1306
  #'
1277
1307
  #' @export
1278
1308
 
1279
- enve.recplot2.seqdepth <- function
1280
-
1281
- (x,
1282
- sel,
1283
- low.identity=FALSE
1284
- ){
1285
- if(!inherits(x, "enve.RecPlot2"))
1309
+ enve.recplot2.seqdepth <- function(x, sel, low.identity = FALSE) {
1310
+ if (!inherits(x, "enve.RecPlot2"))
1286
1311
  stop("'x' must inherit from class `enve.RecPlot2`")
1287
- if(low.identity){
1312
+ if (low.identity) {
1288
1313
  pos.cnts.in <- x$pos.counts.out
1289
- }else{
1314
+ } else {
1290
1315
  pos.cnts.in <- x$pos.counts.in
1291
1316
  }
1292
1317
  pos.breaks <- x$pos.breaks
1293
1318
  pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
1294
1319
  seqdepth.in <- pos.cnts.in/pos.binsize
1295
- if(missing(sel)) return(seqdepth.in)
1320
+ if (missing(sel)) return(seqdepth.in)
1296
1321
  return(seqdepth.in[sel])
1297
1322
  }
1298
1323
 
@@ -1307,8 +1332,10 @@ enve.recplot2.seqdepth <- function
1307
1332
  #' Range of identities to be considered. By default, the full range
1308
1333
  #' is used (note that the upper boundary is \code{Inf} and not 100 because
1309
1334
  #' recruitment plots can also be built with bit-scores). To use only
1310
- #' intra-population matches (with identities), use c(95,100). To use only
1311
- #' inter-population values, use c(0,95).
1335
+ #' intra-population matches (with identities), use \code{c(95, 100)}. To use
1336
+ #' only inter-population values, use \code{c(0, 95)}.
1337
+ #'
1338
+ #' @return A numeric value indicating the ANIr (as percentage).
1312
1339
  #'
1313
1340
  #' @author Luis M. Rodriguez-R [aut, cre]
1314
1341
  #'
@@ -1338,43 +1365,49 @@ enve.recplot2.ANIr <- function
1338
1365
  #' @param pos.breaks Position breaks
1339
1366
  #' @param id.breaks Identity breaks
1340
1367
  #' @param rec.idcol Identity column to use
1368
+ #'
1369
+ #' @return 2-dimensional matrix of counts per identity and position bins.
1341
1370
  #'
1342
1371
  #' @author Luis M. Rodriguez-R [aut, cre]
1343
1372
  #' @author Kenji Gerhardt [aut]
1344
1373
  #'
1345
1374
  #' @export
1346
1375
 
1347
- enve.recplot2.__counts <- function
1348
- (x, pos.breaks, id.breaks, rec.idcol) {
1376
+ enve.recplot2.__counts <- function(x, pos.breaks, id.breaks, rec.idcol) {
1349
1377
  rec2 <- x$rec
1350
1378
  verbose <- x$verbose
1351
1379
 
1352
1380
  # get counts of how many occurrences of each genome pos.bin there are per read
1353
1381
  x.bins <- mapply(
1354
1382
  function(start, end) {
1355
- list(rle(findInterval(start:end, pos.breaks, left.open = T)))
1356
- }, rec2[, 1], rec2[, 2])
1383
+ list(rle(findInterval(start:end, pos.breaks, left.open = TRUE)))
1384
+ },
1385
+ rec2[, 1], rec2[, 2]
1386
+ )
1357
1387
 
1358
1388
  # find the single y bin for each row, replicates it at the correct places to
1359
1389
  # the number of distinct bins found in its row
1360
- y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = T),
1390
+ y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = TRUE),
1361
1391
  times = unlist(lapply(x.bins, function(a) length(a$lengths))))
1362
1392
 
1363
1393
  # x.bins_counts is the number of occurrences of each bin a row contains,
1364
1394
  # per row, then unlisted
1365
1395
  x.bins_counts <- unlist(lapply(x.bins, function(a) a$lengths))
1366
1396
 
1367
- # these are the pos. in. genome bins that each count in x.bins_counts falls into
1397
+ # these are the pos. in. genome bins that each count in x.bins_counts falls
1398
+ # into
1368
1399
  x.bins <- unlist(lapply(x.bins, function(a) a$values))
1369
1400
 
1370
- # much more efficient counts implementation in R using lists instead of a matrix:
1401
+ # much more efficient counts implementation in R using lists instead of a
1402
+ # matrix:
1371
1403
  counts <- lapply(
1372
1404
  1:(length(pos.breaks) - 1),
1373
- function(col_len) rep(0, length(id.breaks) - 1))
1405
+ function(col_len) rep(0, length(id.breaks) - 1)
1406
+ )
1374
1407
 
1375
1408
  # accesses the correct list in counts by x.bin, then
1376
1409
  # accesses the position in that row by y.bins and adds the new count
1377
- for(i in 1:length(x.bins)){
1410
+ for (i in 1:length(x.bins)) {
1378
1411
  counts[[x.bins[i]]][y.bins[i]] <- counts[[x.bins[i]]][y.bins[i]] + x.bins_counts[i]
1379
1412
  }
1380
1413
 
@@ -1384,32 +1417,38 @@ enve.recplot2.__counts <- function
1384
1417
 
1385
1418
  #' Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function
1386
1419
  #'
1387
- #' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
1420
+ #' Internal ancillary function (see
1421
+ #' \code{\link{enve.recplot2.findPeaks.emauto}}).
1388
1422
  #'
1389
- #' @param x \code{\link{enve.RecPlot2}} object
1390
- #' @param comp Components
1391
- #' @param do_crit Function estimating the criterion
1392
- #' @param best Best solution thus far
1393
- #' @param verbose If verbose
1394
- #' @param ... Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}
1423
+ #' @param x \code{\link{enve.RecPlot2}} object.
1424
+ #' @param comp Components.
1425
+ #' @param do_crit Function estimating the criterion.
1426
+ #' @param best Best solution thus far.
1427
+ #' @param verbose If verbose.
1428
+ #' @param ...
1429
+ #' Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}.
1430
+ #'
1431
+ #' @return Updated solution with the same structure as \code{best}.
1395
1432
  #'
1396
1433
  #' @author Luis M. Rodriguez-R [aut, cre]
1397
1434
  #'
1398
1435
  #' @export
1399
1436
 
1400
- enve.recplot2.findPeaks.__emauto_one <- function
1401
- (x, comp, do_crit, best, verbose, ...){
1402
- peaks <- enve.recplot2.findPeaks.em(x=x, components=comp, ...)
1403
- if(length(peaks)==0) return(best)
1404
- k <- comp*3 - 1 # mean & sd for each component, and n-1 free alpha parameters
1437
+ enve.recplot2.findPeaks.__emauto_one <- function(
1438
+ x, comp, do_crit, best, verbose, ...
1439
+ ) {
1440
+ peaks <- enve.recplot2.findPeaks.em(x = x, components = comp, ...)
1441
+ if (length(peaks) == 0) return(best)
1442
+ k <- comp * 3 - 1 # mean & sd for each component, and n-1 free alpha params
1405
1443
  crit <- do_crit(peaks[[1]]$err.res, k, peaks[[1]]$n.total)
1406
- if(verbose) cat(comp,'\t| LL =', peaks[[1]]$err.res, '\t| Estimate =', crit,
1407
- ifelse(crit > best[['crit']], '*', ''), '\n')
1408
- if(crit > best[['crit']]){
1409
- best[['crit']] <- crit
1410
- best[['peaks']] <- peaks
1444
+ if(verbose)
1445
+ cat(comp, "\t| LL =", peaks[[1]]$err.res, "\t| Estimate =", crit,
1446
+ ifelse(crit > best[["crit"]], "*", ""), "\n")
1447
+ if(crit > best[["crit"]]){
1448
+ best[["crit"]] <- crit
1449
+ best[["peaks"]] <- peaks
1411
1450
  }
1412
- best[['pstore']][[comp]] <- peaks
1451
+ best[["pstore"]][[comp]] <- peaks
1413
1452
  return(best)
1414
1453
  }
1415
1454
 
@@ -1419,13 +1458,15 @@ enve.recplot2.findPeaks.__emauto_one <- function
1419
1458
  #'
1420
1459
  #' @param x Vector of log-transformed sequencing depths
1421
1460
  #' @param theta Parameters list
1461
+ #'
1462
+ #' @return A list with components \code{ll} (numeric) the log-likelihood, and
1463
+ #' \code{posterior} (numeric) the posterior probability.
1422
1464
  #'
1423
1465
  #' @author Luis M. Rodriguez-R [aut, cre]
1424
1466
  #'
1425
1467
  #' @export
1426
1468
 
1427
- enve.recplot2.findPeaks.__em_e <- function
1428
- (x, theta){
1469
+ enve.recplot2.findPeaks.__em_e <- function(x, theta) {
1429
1470
  components <- length(theta[['mu']])
1430
1471
  product <- do.call(cbind,
1431
1472
  lapply(1:components,
@@ -1437,7 +1478,7 @@ enve.recplot2.findPeaks.__em_e <- function
1437
1478
  cat(i,'/',nrow(product), ':', product[i,], '\n')
1438
1479
  }
1439
1480
 
1440
- return(list(ll=sum(log(sum.of.components)), posterior=posterior))
1481
+ return(list(ll = sum(log(sum.of.components)), posterior = posterior))
1441
1482
  }
1442
1483
 
1443
1484
  #' Enveomics: Recruitment Plot (2) Em Peak Finder - Internal Ancillary Function Maximization
@@ -1446,20 +1487,23 @@ enve.recplot2.findPeaks.__em_e <- function
1446
1487
  #'
1447
1488
  #' @param x Vector of log-transformed sequencing depths
1448
1489
  #' @param posterior Posterior probability
1490
+ #'
1491
+ #' @return A list with components \code{mu} (numeric) the estimated mean,
1492
+ #' \code{sd} (numeric) the estimated standard deviation, and \code{alpha}
1493
+ #' (numeric) the estimated alpha parameter.
1449
1494
  #'
1450
1495
  #' @author Luis M. Rodriguez-R [aut, cre]
1451
1496
  #'
1452
1497
  #' @export
1453
1498
 
1454
- enve.recplot2.findPeaks.__em_m <- function
1455
- (x, posterior){
1499
+ enve.recplot2.findPeaks.__em_m <- function(x, posterior) {
1456
1500
  components <- ncol(posterior)
1457
1501
  n <- colSums(posterior)
1458
1502
  mu <- colSums(posterior * x) / n
1459
1503
  sd <- sqrt( colSums(
1460
1504
  posterior * (matrix(rep(x,components), ncol=components) - mu)^2) / n )
1461
1505
  alpha <- n/length(x)
1462
- return(list(mu=mu, sd=sd, alpha=alpha))
1506
+ return(list(mu = mu, sd = sd, alpha = alpha))
1463
1507
  }
1464
1508
 
1465
1509
  #' Enveomics: Recruitment Plot (2) Peak S4 Class - Internal Ancillary Function
@@ -1469,29 +1513,31 @@ enve.recplot2.findPeaks.__em_m <- function
1469
1513
  #' @param x \code{\link{enve.RecPlot2.Peak}} object
1470
1514
  #' @param mids Midpoints
1471
1515
  #' @param counts Counts
1516
+ #'
1517
+ #' @return A numeric vector of counts (histogram)
1472
1518
  #'
1473
1519
  #' @author Luis M. Rodriguez-R [aut, cre]
1474
1520
  #'
1475
1521
  #' @export
1476
1522
 
1477
- enve.recplot2.__peakHist <- function
1478
- (x, mids, counts=TRUE){
1523
+ enve.recplot2.__peakHist <- function(x, mids, counts = TRUE){
1479
1524
  d.o <- x$param.hat
1480
- if(length(x$log)==0) x$log <- FALSE
1481
- if(x$log){
1525
+ if (length(x$log) == 0) x$log <- FALSE
1526
+ if (x$log) {
1482
1527
  d.o$x <- log(mids)
1483
- }else{
1528
+ } else {
1484
1529
  d.o$x <- mids
1485
1530
  }
1486
- prob <- do.call(paste('d', x$dist, sep=''), d.o)
1531
+ prob <- do.call(paste('d', x$dist, sep = ""), d.o)
1487
1532
  if(!counts) return(prob)
1488
1533
  if(length(x$values)>0) return(prob*length(x$values)/sum(prob))
1489
- return(prob*x$n.hat/sum(prob))
1534
+ return(prob * x$n.hat / sum(prob))
1490
1535
  }
1491
1536
 
1492
1537
  #' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1
1493
1538
  #'
1494
- #' Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
1539
+ #' Internal ancillary function (see
1540
+ #' \code{\link{enve.recplot2.findPeaks.mower}}).
1495
1541
  #'
1496
1542
  #' @param lsd1 Vector of log-transformed sequencing depths
1497
1543
  #' @param min.points Minimum number of points
@@ -1505,85 +1551,100 @@ enve.recplot2.__peakHist <- function
1505
1551
  #' @param merge.logdist Attempted \code{merge.logdist} parameter
1506
1552
  #' @param verbose If verbose
1507
1553
  #' @param log If log-transformed depths
1554
+ #'
1555
+ #' @return Return an \code{enve.RecPlot2.Peak} object.
1508
1556
  #'
1509
1557
  #' @author Luis M. Rodriguez-R [aut, cre]
1510
1558
  #'
1511
1559
  #' @export
1512
1560
 
1513
- enve.recplot2.findPeaks.__mow_one <- function
1514
- (lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
1515
- optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
1516
- ){
1517
- dist <- ifelse(with.skewness, 'sn', 'norm');
1561
+ enve.recplot2.findPeaks.__mow_one <- function(
1562
+ lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
1563
+ optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
1564
+ ) {
1565
+ dist <- ifelse(with.skewness, "sn", "norm")
1518
1566
 
1519
1567
  # Find peak
1520
- o <- mlv.opts; o$x = lsd1;
1521
- mode1 <- median(lsd1); # mode1 <- do.call(mlv, o)$M;
1522
- if(verbose) cat('Anchoring at mode =',mode1,'\n')
1523
- param.hat <- fitdist.opts$start; last.hat <- param.hat;
1524
- lim <- NA;
1525
- if(with.skewness){ param.hat$xi <- mode1 }else{ param.hat$mean <- mode1 }
1568
+ o <- mlv.opts
1569
+ o$x <- lsd1
1570
+ mode1 <- median(lsd1) # mode1 <- do.call(mlv, o)$M;
1571
+ if (verbose) cat("Anchoring at mode =", mode1, "\n")
1572
+ param.hat <- fitdist.opts$start
1573
+ last.hat <- param.hat
1574
+ lim <- NA
1575
+ if (with.skewness) { param.hat$xi <- mode1 } else { param.hat$mean <- mode1 }
1526
1576
 
1527
1577
  # Refine peak parameters
1528
- for(round in 1:optim.rounds){
1529
- param.hat[[ 1 ]] <- param.hat[[ 1 ]]/diff(quant.est)# <- expand dispersion
1578
+ for (round in 1:optim.rounds) {
1579
+ param.hat[[ 1 ]] <- param.hat[[1]] / diff(quant.est) # <- expand dispersion
1530
1580
  lim.o <- param.hat
1531
- lim.o$p <- quant.est; lim <- do.call(paste('q',dist,sep=''), lim.o)
1532
- lsd1.pop <- lsd1[(lsd1>lim[1]) & (lsd1<lim[2])];
1533
- if(verbose) cat(' Round', round, 'with n =',length(lsd1.pop),
1534
- 'and params =',as.numeric(param.hat),' \r')
1535
- if(length(lsd1.pop) < min.points) break;
1536
- o <- fitdist.opts; o$data = lsd1.pop; o$start = param.hat;
1581
+ lim.o$p <- quant.est
1582
+ lim <- do.call(paste("q", dist, sep = ""), lim.o)
1583
+ lsd1.pop <- lsd1[(lsd1 > lim[1]) & (lsd1 < lim[2])]
1584
+ if (verbose)
1585
+ cat(" Round", round, "with n =", length(lsd1.pop),
1586
+ "and params =", as.numeric(param.hat), " \r")
1587
+ if (length(lsd1.pop) < min.points) break
1588
+ o <- fitdist.opts
1589
+ o$data <- lsd1.pop
1590
+ o$start <- param.hat
1537
1591
  last.last.hat <- last.hat
1538
1592
  last.hat <- param.hat
1539
- param.hat <- as.list(do.call(fitdist, o)$estimate);
1540
- if(any(is.na(param.hat))){
1541
- if(round>1) param.hat <- last.hat;
1542
- break;
1593
+ param.hat <- as.list(do.call(fitdist, o)$estimate)
1594
+ if (any(is.na(param.hat))) {
1595
+ if (round > 1) param.hat <- last.hat
1596
+ break
1543
1597
  }
1544
- if(round > 1){
1545
- epsilon1 <- sum((as.numeric(last.hat)-as.numeric(param.hat))^2)
1546
- if(epsilon1 < optim.epsilon) break;
1547
- if(round > 2){
1548
- epsilon2 <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
1549
- if(epsilon2 < optim.epsilon) break;
1598
+ if (round > 1) {
1599
+ epsilon1 <- sum((as.numeric(last.hat) - as.numeric(param.hat))^2)
1600
+ if (epsilon1 < optim.epsilon) break
1601
+ if (round > 2) {
1602
+ epsilon2 <- sum((as.numeric(last.last.hat) - as.numeric(param.hat))^2)
1603
+ if (epsilon2 < optim.epsilon) break
1550
1604
  }
1551
1605
  }
1552
1606
  }
1553
- if(verbose) cat('\n')
1554
- if(is.na(param.hat[1]) | is.na(lim[1])) return(NULL);
1607
+ if (verbose) cat("\n")
1608
+ if (is.na(param.hat[1]) | is.na(lim[1])) return(NULL)
1555
1609
 
1556
1610
  # Mow distribution
1557
- lsd2 <- c();
1558
- lsd.pop <- c();
1559
- n.hat <- length(lsd1.pop)/diff(quant.est)
1560
- peak <- new('enve.RecPlot2.Peak', dist=dist, values=as.numeric(), mode=mode1,
1561
- param.hat=param.hat, n.hat=n.hat, n.total=n.total,
1562
- merge.logdist=merge.logdist, log=log)
1563
- peak.breaks <- seq(min(lsd1), max(lsd1), length=20)
1564
- peak.cnt <- enve.recplot2.__peakHist(peak,
1565
- (peak.breaks[-length(peak.breaks)]+peak.breaks[-1])/2)
1566
- for(i in 2:length(peak.breaks)){
1567
- values <- lsd1[ (lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i]) ]
1568
- n.exp <- peak.cnt[i-1]
1569
- if(is.na(n.exp) | n.exp==0) n.exp <- 0.1
1570
- if(length(values)==0) next
1571
- in.peak <- runif(length(values)) <= n.exp/length(values)
1611
+ lsd2 <- c()
1612
+ lsd.pop <- c()
1613
+ n.hat <- length(lsd1.pop) / diff(quant.est)
1614
+ peak <- new(
1615
+ "enve.RecPlot2.Peak", dist = dist, values = as.numeric(), mode = mode1,
1616
+ param.hat = param.hat, n.hat = n.hat, n.total = n.total,
1617
+ merge.logdist = merge.logdist, log = log
1618
+ )
1619
+ peak.breaks <- seq(min(lsd1), max(lsd1), length = 20)
1620
+ peak.cnt <- enve.recplot2.__peakHist(
1621
+ peak, (peak.breaks[-length(peak.breaks)] + peak.breaks[-1]) / 2
1622
+ )
1623
+ for (i in 2:length(peak.breaks)) {
1624
+ values <- lsd1[(lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i])]
1625
+ n.exp <- peak.cnt[i - 1]
1626
+ if (is.na(n.exp) | n.exp == 0) n.exp <- 0.1
1627
+ if (length(values) == 0) next
1628
+ in.peak <- runif(length(values)) <= n.exp / length(values)
1572
1629
  lsd2 <- c(lsd2, values[!in.peak])
1573
1630
  lsd.pop <- c(lsd.pop, values[in.peak])
1574
1631
  }
1575
- if(length(lsd.pop) < min.points) return(NULL)
1632
+ if (length(lsd.pop) < min.points) return(NULL)
1576
1633
 
1577
1634
  # Return peak
1578
- attr(peak, 'values') <- lsd.pop
1579
- attr(peak, 'values.res') <- lsd2
1580
- attr(peak, 'err.res') <- 1-(cor(hist(lsd.pop, breaks=peak.breaks,
1581
- plot=FALSE)$counts, hist(lsd1, breaks=peak.breaks,
1582
- plot=FALSE)$counts)+1)/2
1583
- mu <- tail(param.hat, n=1)
1584
- attr(peak, 'seq.depth') <- ifelse(log, exp(mu), mu)
1585
- if(verbose) cat(' Extracted peak with n =',length(lsd.pop),
1586
- 'with expected n =',n.hat,'\n')
1635
+ attr(peak, "values") <- lsd.pop
1636
+ attr(peak, "values.res") <- lsd2
1637
+ attr(peak, "err.res") <- 1 - 0.5 * (
1638
+ cor(
1639
+ hist(lsd.pop, breaks = peak.breaks, plot = FALSE)$counts,
1640
+ hist(lsd1, breaks = peak.breaks, plot = FALSE)$counts
1641
+ ) + 1
1642
+ )
1643
+ mu <- tail(param.hat, n = 1)
1644
+ attr(peak, "seq.depth") <- ifelse(log, exp(mu), mu)
1645
+ if(verbose)
1646
+ cat(" Extracted peak with n =", length(lsd.pop),
1647
+ "with expected n =", n.hat, "\n")
1587
1648
  return(peak)
1588
1649
  }
1589
1650
 
@@ -1593,17 +1654,18 @@ enve.recplot2.findPeaks.__mow_one <- function
1593
1654
  #'
1594
1655
  #' @param peaks.opts List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}
1595
1656
  #'
1657
+ #' @return A list of \code{enve.RecPlot2.Peak} objects.
1658
+ #'
1596
1659
  #' @author Luis M. Rodriguez-R [aut, cre]
1597
1660
  #'
1598
1661
  #' @export
1599
1662
 
1600
- enve.recplot2.findPeaks.__mower <- function
1601
- (peaks.opts){
1663
+ enve.recplot2.findPeaks.__mower <- function(peaks.opts) {
1602
1664
  peaks <- list()
1603
- while(length(peaks.opts$lsd1) > peaks.opts$min.points){
1665
+ while (length(peaks.opts$lsd1) > peaks.opts$min.points) {
1604
1666
  peak <- do.call(enve.recplot2.findPeaks.__mow_one, peaks.opts)
1605
- if(is.null(peak)) break
1606
- peaks[[ length(peaks)+1 ]] <- peak
1667
+ if (is.null(peak)) break
1668
+ peaks[[length(peaks) + 1]] <- peak
1607
1669
  peaks.opts$lsd1 <- peak$values.res
1608
1670
  }
1609
1671
  return(peaks)
@@ -1616,16 +1678,21 @@ enve.recplot2.findPeaks.__mower <- function
1616
1678
  #' @param peak Query \code{\link{enve.RecPlot2.Peak}} object
1617
1679
  #' @param peaks list of \code{\link{enve.RecPlot2.Peak}} objects
1618
1680
  #'
1681
+ #' @return A numeric index out of \code{peaks}.
1682
+ #'
1619
1683
  #' @author Luis M. Rodriguez-R [aut, cre]
1620
1684
  #'
1621
1685
  #' @export
1622
1686
 
1623
- enve.recplot2.__whichClosestPeak <- function
1624
- (peak, peaks){
1625
- dist <- as.numeric(lapply(peaks,
1626
- function(x)
1627
- abs(log(x$param.hat[[ length(x$param.hat) ]] /
1628
- peak$param.hat[[ length(peak$param.hat) ]] ))))
1629
- dist[ dist==0 ] <- Inf
1687
+ enve.recplot2.__whichClosestPeak <- function(peak, peaks){
1688
+ dist <- as.numeric(
1689
+ lapply(
1690
+ peaks,
1691
+ function(x)
1692
+ abs(log(x$param.hat[[length(x$param.hat)]] /
1693
+ peak$param.hat[[length(peak$param.hat)]]))
1694
+ )
1695
+ )
1696
+ dist[dist == 0] <- Inf
1630
1697
  return(which.min(dist))
1631
1698
  }