miga-base 1.2.17.1 → 1.2.17.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/remote_dataset/download.rb +1 -1
  3. data/lib/miga/remote_dataset.rb +9 -4
  4. data/lib/miga/version.rb +2 -2
  5. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  6. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  7. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  8. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  9. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  10. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  11. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  12. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  13. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  14. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  15. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  16. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  17. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  18. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  19. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  21. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  22. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  23. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  24. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  25. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  26. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  27. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  28. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  30. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  31. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  32. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  48. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  50. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  51. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  52. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  53. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  54. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  55. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  56. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  57. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  58. metadata +3 -37
  59. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  62. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  63. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  66. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  67. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  68. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  69. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  70. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  71. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  72. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  73. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  74. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  75. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  76. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  77. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  78. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  79. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  80. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  81. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  85. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  86. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  87. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  88. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  89. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  90. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  91. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  92. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  93. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -6,8 +6,10 @@
6
6
  #' be produced by \code{\link{enve.recplot2}} and supports S4 method plot.
7
7
  #'
8
8
  #' @slot counts \code{(matrix)} Counts as a two-dimensional histogram.
9
- #' @slot pos.counts.in \code{(numeric)} Counts of in-group hits per position bin.
10
- #' @slot pos.counts.out \code{(numeric)} Counts of out-group hits per position bin.
9
+ #' @slot pos.counts.in
10
+ #' \code{(numeric)} Counts of in-group hits per position bin.
11
+ #' @slot pos.counts.out
12
+ #' \code{(numeric)} Counts of out-group hits per position bin.
11
13
  #' @slot id.counts \code{(numeric)} Counts per ID bin.
12
14
  #' @slot id.breaks \code{(numeric)} Breaks of identity bins.
13
15
  #' @slot pos.breaks \code{(numeric)} Breaks of position bins.
@@ -208,42 +210,50 @@ setMethod("$", "enve.RecPlot2.Peak", function(x, name) attr(x, name))
208
210
  #' @export
209
211
 
210
212
  plot.enve.RecPlot2 <- function
211
- (x,
212
- layout=matrix(c(5,5,2,1,4,3), nrow=2),
213
- panel.fun=list(),
214
- widths=c(1,7,2),
215
- heights=c(1,2),
216
- palette=grey((100:0)/100),
217
- underlay.group=TRUE,
218
- peaks.col='darkred',
219
- use.peaks,
220
- id.lim=range(x$id.breaks),
221
- pos.lim=range(x$pos.breaks),
222
- pos.units=c('Mbp','Kbp','bp'),
223
- mar=list('1'=c(5,4,1,1)+.1, '2'=c(ifelse(any(layout==1),1,5),4,4,1)+.1,
224
- '3'=c(5,ifelse(any(layout==1),1,4),1,2)+0.1,
225
- '4'=c(ifelse(any(layout==1),1,5),ifelse(any(layout==2),1,4),4,2)+0.1,
226
- '5'=c(5,3,4,1)+0.1, '6'=c(5,4,4,2)+0.1),
227
- pos.splines=0,
228
- id.splines=1/2,
229
- in.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
230
- out.lwd=ifelse(is.null(pos.splines) || pos.splines>0, 1/2, 2),
231
- id.lwd=ifelse(is.null(id.splines) || id.splines>0, 1/2, 2),
232
- in.col='darkblue',
233
- out.col='lightblue',
234
- id.col='black',
235
- breaks.col='#AAAAAA40',
236
- peaks.opts=list(),
237
- ...
238
- ){
239
- pos.units <- match.arg(pos.units);
240
- pos.factor <- ifelse(pos.units=='bp',1,ifelse(pos.units=='Kbp',1e3,1e6));
241
- pos.lim <- pos.lim/pos.factor;
242
- lmat <- layout;
243
- for(i in 1:6) if(!any(layout==i)) lmat[layout>i] <- lmat[layout>i]-1;
244
-
245
- layout(lmat, widths=widths, heights=heights);
246
- ori.mar <- par('mar');
213
+ (
214
+ x,
215
+ layout = matrix(c(5, 5, 2, 1, 4, 3), nrow = 2),
216
+ panel.fun = list(),
217
+ widths = c(1, 7, 2),
218
+ heights = c(1, 2),
219
+ palette = grey((100:0) / 100),
220
+ underlay.group = TRUE,
221
+ peaks.col = "darkred",
222
+ use.peaks,
223
+ id.lim = range(x$id.breaks),
224
+ pos.lim = range(x$pos.breaks),
225
+ pos.units = c("Mbp", "Kbp", "bp"),
226
+ mar = list(
227
+ "1" = c(5, 4, 1, 1) + 0.1,
228
+ "2" = c(ifelse(any(layout == 1), 1, 5), 4, 4, 1) + 0.1,
229
+ "3" = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1,
230
+ "4" = c(ifelse(any(layout == 1), 1, 5),
231
+ ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1,
232
+ "5" = c(5, 3, 4, 1) + 0.1,
233
+ "6" = c(5, 4, 4, 2) + 0.1
234
+ ),
235
+ pos.splines = 0,
236
+ id.splines = 1/2,
237
+ in.lwd = ifelse(is.null(pos.splines) || pos.splines > 0, 1/2, 2),
238
+ out.lwd = ifelse(is.null(pos.splines) || pos.splines > 0, 1/2, 2),
239
+ id.lwd = ifelse(is.null(id.splines) || id.splines > 0, 1/2, 2),
240
+ in.col = "darkblue",
241
+ out.col = "lightblue",
242
+ id.col = "black",
243
+ breaks.col = "#AAAAAA40",
244
+ peaks.opts = list(),
245
+ ...
246
+ ) {
247
+ pos.units <- match.arg(pos.units)
248
+ pos.factor <- ifelse(pos.units == "bp", 1,
249
+ ifelse(pos.units == "Kbp", 1e3, 1e6))
250
+ pos.lim <- pos.lim / pos.factor
251
+ lmat <- layout
252
+ for (i in 1:6) if (!any(layout == i)) lmat[layout > i] <- lmat[layout > i] - 1
253
+
254
+ layout(lmat, widths = widths, heights = heights)
255
+ ori.mar <- par("mar")
256
+ on.exit(par(ori.mar))
247
257
 
248
258
  # Essential vars
249
259
  counts <- x$counts
@@ -273,8 +283,8 @@ plot.enve.RecPlot2 <- function
273
283
  }
274
284
 
275
285
  # [1] Counts matrix
276
- if(any(layout==1)){
277
- par(mar=mar[['1']]);
286
+ if (any(layout==1)) {
287
+ par(mar = mar[["1"]]) # par(mar) already being watched by on.exit
278
288
  plot(1, t='n', bty='l',
279
289
  xlim=pos.lim, xlab=paste('Position in genome (',pos.units,')',sep=''),
280
290
  xaxs='i', ylim=id.lim, ylab=x$id.metric, yaxs='i');
@@ -288,18 +298,18 @@ plot.enve.RecPlot2 <- function
288
298
  image(x=pos.breaks, y=id.breaks, z=log10(counts),col=palette,
289
299
  bg=grey(1,0), breaks=seq(-.1,log10(max(counts)),
290
300
  length.out=1+length(palette)), add=TRUE);
291
- if(exists('1',panel.fun)) panel.fun[['1']]();
301
+ if(exists("1", panel.fun)) panel.fun[["1"]]()
292
302
  }
293
303
 
294
304
  # [2] Position histogram
295
- if(any(layout==2)){
296
- par(mar=mar[['2']]);
297
- if(any(layout==1)){
298
- xlab=''
299
- xaxt='n'
300
- }else{
301
- xlab=paste('Position in genome (',pos.units,')',sep='')
302
- xaxt='s'
305
+ if (any(layout == 2)) {
306
+ par(mar = mar[["2"]]) # par(mar) already being watched by on.exit
307
+ if (any(layout == 1)) {
308
+ xlab <- ""
309
+ xaxt <- "n"
310
+ } else {
311
+ xlab <- paste("Position in genome (", pos.units, ")", sep = "")
312
+ xaxt <- "s"
303
313
  }
304
314
  plot(1,t='n', bty='l', log='y',
305
315
  xlim=pos.lim, xlab=xlab, xaxt=xaxt, xaxs='i',
@@ -309,31 +319,33 @@ plot.enve.RecPlot2 <- function
309
319
  pos.f <- rep(seqdepth.in,each=2)
310
320
  lines(pos.x, rep(seqdepth.out,each=2), lwd=out.lwd, col=out.col);
311
321
  lines(pos.x, pos.f, lwd=in.lwd, col=in.col);
312
- if(is.null(pos.splines) || pos.splines > 0){
322
+ if (is.null(pos.splines) || pos.splines > 0) {
313
323
  pos.spline <- smooth.spline(pos.x[pos.f>0], log(pos.f[pos.f>0]),
314
324
  spar=pos.splines)
315
325
  lines(pos.spline$x, exp(pos.spline$y), lwd=2, col=in.col)
316
326
  }
317
- if(any(pos.counts.out==0)) rect(pos.breaks[c(pos.counts.out==0,FALSE)],
318
- seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
319
- seqdepth.lim[1]*3/2, col=out.col, border=NA);
320
- if(any(pos.counts.in==0)) rect(pos.breaks[c(pos.counts.in==0,FALSE)],
321
- seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
322
- seqdepth.lim[1]*3/2, col=in.col, border=NA);
323
- if(exists('2',panel.fun)) panel.fun[['2']]();
327
+ if (any(pos.counts.out==0))
328
+ rect(pos.breaks[c(pos.counts.out==0,FALSE)],
329
+ seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.out==0)],
330
+ seqdepth.lim[1]*3/2, col=out.col, border=NA);
331
+ if (any(pos.counts.in==0))
332
+ rect(pos.breaks[c(pos.counts.in==0,FALSE)],
333
+ seqdepth.lim[1], pos.breaks[c(FALSE,pos.counts.in==0)],
334
+ seqdepth.lim[1]*3/2, col=in.col, border=NA);
335
+ if (exists("2", panel.fun)) panel.fun[["2"]]()
324
336
  }
325
337
 
326
338
  # [3] Identity histogram
327
- if(any(layout==3)){
328
- par(mar=mar[['3']]);
329
- if(any(layout==1)){
330
- ylab=''
331
- yaxt='n'
332
- }else{
333
- ylab=x$id.metric
334
- yaxt='s'
339
+ if (any(layout == 3)) {
340
+ par(mar = mar[["3"]]) # par(mar) already being watched by on.exit
341
+ if (any(layout == 1)) {
342
+ ylab <- ""
343
+ yaxt <- "n"
344
+ } else {
345
+ ylab <- x$id.metric
346
+ yaxt <- "s"
335
347
  }
336
- if(sum(id.counts>0) >= 4){
348
+ if (sum(id.counts > 0) >= 4) {
337
349
  id.counts.range <- range(id.counts[id.counts>0])*c(1/2,2);
338
350
  plot(1,t='n', bty='l', log='x',
339
351
  xlim=id.counts.range, xlab='bps per bin', xaxs='i',
@@ -352,23 +364,23 @@ plot.enve.RecPlot2 <- function
352
364
  spar=id.splines)
353
365
  lines(exp(id.spline$y), id.spline$x, lwd=2, col=id.col)
354
366
  }
355
- }else{
367
+ } else {
356
368
  plot(1,t='n',bty='l',xlab='', xaxt='n', ylab='', yaxt='n')
357
369
  text(1,1,labels='Insufficient data', srt=90)
358
370
  }
359
- if(exists('3',panel.fun)) panel.fun[['3']]();
371
+ if (exists("3", panel.fun)) panel.fun[["3"]]()
360
372
  }
361
373
 
362
374
  # [4] Populations histogram
363
375
  peaks <- NA;
364
- if(any(layout==4)){
365
- par(mar=mar[['4']]);
366
- if(any(layout==2)){
367
- ylab=''
368
- yaxt='n'
369
- }else{
370
- ylab='Sequencing depth (X)'
371
- yaxt='s'
376
+ if (any(layout == 4)) {
377
+ par(mar = mar[["4"]]) # par(mar) already being watched by on.exit
378
+ if (any(layout == 2)) {
379
+ ylab <- ""
380
+ yaxt <- "n"
381
+ } else {
382
+ ylab <- "Sequencing depth (X)"
383
+ yaxt <- "s"
372
384
  }
373
385
  h.breaks <- seq(log10(seqdepth.lim[1]*2), log10(seqdepth.lim[2]/2),
374
386
  length.out=200);
@@ -406,44 +418,47 @@ plot.enve.RecPlot2 <- function
406
418
  lapply(peaks,
407
419
  function(x) ifelse(length(x$values)==0, x$n.hat,
408
420
  length(x$values))/x$n.total)), 2)
409
- if(peaks[[1]]$err.res < 0){
410
- err <- paste(', LL:', signif(peaks[[1]]$err.res, 3))
411
- }else{
412
- err <- paste(', err:',
413
- signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2))
421
+ if (peaks[[1]]$err.res < 0) {
422
+ err <- paste(", LL:", signif(peaks[[1]]$err.res, 3))
423
+ } else {
424
+ err <- paste(
425
+ ", err:",
426
+ signif(as.numeric(lapply(peaks, function(x) x$err.res)), 2)
427
+ )
414
428
  }
415
429
  legend('topright', bty='n', cex=1/2,
416
430
  legend=paste(letters[1:length(peaks)],'. ',
417
431
  dpt,'X (', frx, '%', err, ')', sep=''))
418
432
  }
419
433
  }
420
- if(exists('4',panel.fun)) panel.fun[['4']]();
434
+ if (exists("4", panel.fun)) panel.fun[["4"]]()
421
435
  }
422
436
 
423
437
  # [5] Color scale of the counts matrix (vertical)
424
- count.bins <- 10^seq(log10(min(counts[counts>0])), log10(max(counts)),
425
- length.out=1+length(palette))
426
- if(any(layout==5)){
427
- par(mar=mar[['5']]);
438
+ count.bins <- 10^seq(
439
+ log10(min(counts[counts>0])), log10(max(counts)),
440
+ length.out = 1 + length(palette)
441
+ )
442
+ if (any(layout == 5)) {
443
+ par(mar = mar[["5"]]) # par(mar) already being watched by on.exit
428
444
  plot(1,t='n',log='y',xlim=0:1,xaxt='n',xlab='',xaxs='i',
429
445
  ylim=range(count.bins), yaxs='i', ylab='')
430
446
  rect(0,count.bins[-length(count.bins)],1,count.bins[-1],col=palette,
431
447
  border=NA)
432
- if(exists('5',panel.fun)) panel.fun[['5']]();
448
+ if (exists("5", panel.fun)) panel.fun[["5"]]()
433
449
  }
434
450
 
435
451
  # [6] Color scale of the coutnts matrix (horizontal)
436
- if(any(layout==6)){
437
- par(mar=mar[['6']]);
452
+ if (any(layout == 6)) {
453
+ par(mar = mar[["6"]]) # par(mar) already being watched by on.exit
438
454
  plot(1,t='n',log='x',ylim=0:1,yaxt='n',ylab='',yaxs='i',
439
455
  xlim=range(count.bins), xaxs='i',xlab='');
440
456
  rect(count.bins[-length(count.bins)],0,count.bins[-1],1,col=palette,
441
457
  border=NA);
442
- if(exists('6',panel.fun)) panel.fun[['6']]();
458
+ if (exists("6", panel.fun)) panel.fun[["6"]]()
443
459
  }
444
460
 
445
- par(mar=ori.mar);
446
- return(peaks);
461
+ return(peaks)
447
462
  }
448
463
 
449
464
  #==============> Define core functions
@@ -635,7 +650,7 @@ enve.recplot2 <- function(
635
650
  #'
636
651
  #' @author Luis M. Rodriguez-R [aut, cre]
637
652
  #'
638
- #' export
653
+ #' @export
639
654
 
640
655
  enve.recplot2.findPeaks <- function(
641
656
  x,
@@ -955,33 +970,39 @@ enve.recplot2.findPeaks.mower <- function(
955
970
  #' "core genome" of a population.
956
971
  #'
957
972
  #' @param x \code{list} of \code{\link{enve.RecPlot2.Peak}} objects.
973
+ #'
974
+ #' @return A \code{\link{enve.RecPlot2.Peak}} object.
958
975
  #'
959
976
  #' @author Luis M. Rodriguez-R [aut, cre]
960
977
  #'
961
978
  #' @export
962
979
 
963
- enve.recplot2.corePeak <- function
964
- (x
965
- ){
980
+ enve.recplot2.corePeak <- function(x) {
966
981
  # Find the peak with maximum depth (centrality)
967
982
  maxPeak <- x[[
968
- which.max(as.numeric(lapply(x,
969
- function(y) y$param.hat[[ length(y$param.hat) ]])))
970
- ]]
983
+ which.max(
984
+ as.numeric(
985
+ lapply(x, function(y) y$param.hat[[length(y$param.hat)]])
986
+ )
987
+ )
988
+ ]]
971
989
  # If a "larger" peak (a peak explaining more bins of the genome) is within
972
990
  # the default "merge.logdist" distance, take that one instead.
973
991
  corePeak <- maxPeak
974
- for(p in x){
975
- p.len <- ifelse(length(p$values)==0, p$n.hat, length(p$values))
992
+ for (p in x) {
993
+ p.len <- ifelse(length(p$values) == 0, p$n.hat, length(p$values))
976
994
  corePeak.len <- ifelse(
977
- length(corePeak$values)==0, corePeak$n.hat, length(corePeak$values))
978
- sz.d <- log(p.len/corePeak.len)
979
- if(is.nan(sz.d) || sz.d < 0) next
980
- sq.d.a <- as.numeric(tail(p$param.hat, n=1))
981
- sq.d.b <- as.numeric(tail(maxPeak$param.hat, n=1))
982
- if(p$log) sq.d.a <- exp(sq.d.a)
983
- if(corePeak$log) sq.d.b <- exp(sq.d.b)
984
- if(abs(log(sq.d.a/sq.d.b)) < log(1.75)+sz.d/5) corePeak <- p
995
+ length(corePeak$values) == 0,
996
+ corePeak$n.hat,
997
+ length(corePeak$values)
998
+ )
999
+ sz.d <- log(p.len / corePeak.len)
1000
+ if (is.nan(sz.d) || sz.d < 0) next
1001
+ sq.d.a <- as.numeric(tail(p$param.hat, n = 1))
1002
+ sq.d.b <- as.numeric(tail(maxPeak$param.hat, n = 1))
1003
+ if (p$log) sq.d.a <- exp(sq.d.a)
1004
+ if (corePeak$log) sq.d.b <- exp(sq.d.b)
1005
+ if (abs(log(sq.d.a / sq.d.b)) < log(1.75) + sz.d / 5) corePeak <- p
985
1006
  }
986
1007
  return(corePeak)
987
1008
  }
@@ -994,20 +1015,20 @@ enve.recplot2.corePeak <- function
994
1015
  #' \code{\link{enve.RecPlot2}} object.
995
1016
  #' @param new.cutoff
996
1017
  #' New cutoff to use.
1018
+ #'
1019
+ #' @return The modified \code{\link{enve.RecPlot2}} object.
997
1020
  #'
998
1021
  #' @author Luis M. Rodriguez-R [aut, cre]
999
1022
  #'
1000
1023
  #' @export
1001
1024
 
1002
- enve.recplot2.changeCutoff <- function
1003
- (rp,
1004
- new.cutoff=98
1005
- ){
1025
+ enve.recplot2.changeCutoff <- function(rp, new.cutoff = 98) {
1006
1026
  # Re-calculate vectors
1007
- id.mids <- (rp$id.breaks[-length(rp$id.breaks)]+rp$id.breaks[-1])/2
1008
- id.ingroup <- (id.mids > new.cutoff)
1009
- pos.counts.in <- apply(rp$counts[,id.ingroup], 1, sum)
1010
- pos.counts.out <- apply(rp$counts[,!id.ingroup], 1, sum)
1027
+ id.mids <- (rp$id.breaks[-length(rp$id.breaks)] + rp$id.breaks[-1]) / 2
1028
+ id.ingroup <- (id.mids > new.cutoff)
1029
+ pos.counts.in <- apply(rp$counts[, id.ingroup], 1, sum)
1030
+ pos.counts.out <- apply(rp$counts[, !id.ingroup], 1, sum)
1031
+
1011
1032
  # Update object
1012
1033
  attr(rp, "id.ingroup") <- id.ingroup
1013
1034
  attr(rp, "pos.counts.in") <- pos.counts.in
@@ -1024,9 +1045,9 @@ enve.recplot2.changeCutoff <- function
1024
1045
  #' @param rp
1025
1046
  #' Recruitment plot, an \code{\link{enve.RecPlot2}} object.
1026
1047
  #' @param peak
1027
- #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
1028
- #' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
1029
- #' used (see \code{\link{enve.recplot2.corePeak}}).
1048
+ #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
1049
+ #' be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
1050
+ #' peak is used (see \code{\link{enve.recplot2.corePeak}}).
1030
1051
  #' @param lower.tail
1031
1052
  #' If \code{FALSE}, it returns windows significantly above the peak in
1032
1053
  #' sequencing depth.
@@ -1041,16 +1062,16 @@ enve.recplot2.changeCutoff <- function
1041
1062
  #'
1042
1063
  #' @export
1043
1064
 
1044
- enve.recplot2.windowDepthThreshold <- function
1045
- (rp,
1046
- peak,
1047
- lower.tail=TRUE,
1048
- significance=0.05
1049
- ){
1050
- if(is.list(peak)) peak <- enve.recplot2.corePeak(peak)
1065
+ enve.recplot2.windowDepthThreshold <- function(
1066
+ rp,
1067
+ peak,
1068
+ lower.tail = TRUE,
1069
+ significance = 0.05
1070
+ ) {
1071
+ if (is.list(peak)) peak <- enve.recplot2.corePeak(peak)
1051
1072
  par <- peak$param.hat
1052
- par[["p"]] <- ifelse(lower.tail, significance, 1-significance)
1053
- thr <- do.call(ifelse(length(par)==4, qsn, qnorm), par)
1073
+ par[["p"]] <- ifelse(lower.tail, significance, 1 - significance)
1074
+ thr <- do.call(ifelse(length(par) == 4, qsn, qnorm), par)
1054
1075
  if(peak$log) thr <- exp(thr)
1055
1076
 
1056
1077
  return(thr)
@@ -1064,9 +1085,9 @@ enve.recplot2.windowDepthThreshold <- function
1064
1085
  #' @param rp
1065
1086
  #' Recruitment plot, a \code{\link{enve.RecPlot2}} object.
1066
1087
  #' @param peak
1067
- #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
1068
- #' list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
1069
- #' used (see \code{\link{enve.recplot2.corePeak}}).
1088
+ #' Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to
1089
+ #' be a list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core
1090
+ #' peak is used (see \code{\link{enve.recplot2.corePeak}}).
1070
1091
  #' @param lower.tail
1071
1092
  #' If \code{FALSE}, it returns windows significantly above the peak in
1072
1093
  #' sequencing depth.
@@ -1089,21 +1110,21 @@ enve.recplot2.windowDepthThreshold <- function
1089
1110
  #'
1090
1111
  #' @export
1091
1112
 
1092
- enve.recplot2.extractWindows <- function
1093
- (rp,
1094
- peak,
1095
- lower.tail = TRUE,
1096
- significance = 0.05,
1097
- seq.names = FALSE
1098
- ){
1113
+ enve.recplot2.extractWindows <- function(
1114
+ rp,
1115
+ peak,
1116
+ lower.tail = TRUE,
1117
+ significance = 0.05,
1118
+ seq.names = FALSE
1119
+ ) {
1099
1120
  # Determine the threshold
1100
1121
  thr <- enve.recplot2.windowDepthThreshold(rp, peak, lower.tail, significance)
1101
1122
 
1102
1123
  # Select windows past the threshold
1103
1124
  seqdepth.in <- enve.recplot2.seqdepth(rp)
1104
- if(lower.tail){
1125
+ if (lower.tail) {
1105
1126
  sel <- seqdepth.in < thr
1106
- }else{
1127
+ } else {
1107
1128
  sel <- seqdepth.in > thr
1108
1129
  }
1109
1130
 
@@ -1117,7 +1138,7 @@ enve.recplot2.extractWindows <- function
1117
1138
  #'
1118
1139
  #' Compare the distribution of identities between two
1119
1140
  #' \code{\link{enve.RecPlot2}} objects.
1120
- #'
1141
+ #'
1121
1142
  #' @param x
1122
1143
  #' First \code{\link{enve.RecPlot2}} object.
1123
1144
  #' @param y
@@ -1126,10 +1147,12 @@ enve.recplot2.extractWindows <- function
1126
1147
  #' Distance method to use. This should be (an unambiguous abbreviation of)
1127
1148
  #' one of:
1128
1149
  #' \itemize{
1129
- #' \item{"hellinger" (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
1130
- #' \item{"bhattacharyya" (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
1131
- #' \item{"kl" or "kullback-leibler" (\emph{Kullback & Leibler, 1951,
1132
- #' doi:10.1214/aoms/1177729694}), or}
1150
+ #' \item{"hellinger"
1151
+ #' (\emph{Hellinger, 1090, doi:10.1515/crll.1909.136.210}),}
1152
+ #' \item{"bhattacharyya"
1153
+ #' (\emph{Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35}),}
1154
+ #' \item{"kl" or "kullback-leibler"
1155
+ #' (\emph{Kullback & Leibler, 1951, doi:10.1214/aoms/1177729694}), or}
1133
1156
  #' \item{"euclidean"}
1134
1157
  #' }
1135
1158
  #' @param smooth.par
@@ -1142,56 +1165,66 @@ enve.recplot2.extractWindows <- function
1142
1165
  #' @param max.deviation
1143
1166
  #' Maximum mean deviation between identity breaks tolerated (as percent
1144
1167
  #' identity). Difference in number of \code{id.breaks} is never tolerated.
1145
- #'
1168
+ #'
1169
+ #' @return A \strong{numeric} indicating the distance between the objects.
1170
+ #'
1146
1171
  #' @author Luis M. Rodriguez-R [aut, cre]
1147
1172
  #'
1148
1173
  #' @export
1149
1174
 
1150
1175
  enve.recplot2.compareIdentities <- function
1151
- (x,
1152
- y,
1153
- method="hellinger",
1154
- smooth.par=NULL,
1155
- pseudocounts=0,
1156
- max.deviation=0.75
1157
- ){
1158
- METHODS <- c("hellinger","bhattacharyya","kullback-leibler","kl","euclidean")
1176
+ (
1177
+ x,
1178
+ y,
1179
+ method = "hellinger",
1180
+ smooth.par = NULL,
1181
+ pseudocounts = 0,
1182
+ max.deviation = 0.75
1183
+ ) {
1184
+ # Sanity checks
1185
+ METHODS <- c(
1186
+ "hellinger", "bhattacharyya", "kullback-leibler", "kl", "euclidean"
1187
+ )
1159
1188
  i.meth <- pmatch(method, METHODS)
1160
1189
  if (is.na(i.meth)) stop("Invalid distance ", method)
1161
- if(!inherits(x, "enve.RecPlot2"))
1190
+ if (!inherits(x, "enve.RecPlot2"))
1162
1191
  stop("'x' must inherit from class `enve.RecPlot2`")
1163
- if(!inherits(y, "enve.RecPlot2"))
1192
+ if (!inherits(y, "enve.RecPlot2"))
1164
1193
  stop("'y' must inherit from class `enve.RecPlot2`")
1165
- if(length(x$id.breaks) != length(y$id.breaks))
1194
+ if (length(x$id.breaks) != length(y$id.breaks))
1166
1195
  stop("'x' and 'y' must have the same number of `id.breaks`")
1167
1196
  dev <- mean(abs(x$id.breaks - y$id.breaks))
1168
- if(dev > max.deviation)
1197
+ if (dev > max.deviation)
1169
1198
  stop("'x' and 'y' must have similar `id.breaks`; exceeding max.deviation: ",
1170
1199
  dev)
1200
+
1201
+ # Initialize
1171
1202
  x.cnt <- x$id.counts
1172
1203
  y.cnt <- y$id.counts
1173
- if(is.null(smooth.par) || smooth.par > 0){
1174
- x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)])/2
1175
- y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)])/2
1176
- p.spline <- smooth.spline(x.mids, x.cnt, spar=smooth.par)
1177
- q.spline <- smooth.spline(y.mids, y.cnt, spar=smooth.par)
1204
+ if (is.null(smooth.par) || smooth.par > 0){
1205
+ x.mids <- (x$id.breaks[-1] + x$id.breaks[-length(x$id.breaks)]) / 2
1206
+ y.mids <- (y$id.breaks[-1] + y$id.breaks[-length(y$id.breaks)]) / 2
1207
+ p.spline <- smooth.spline(x.mids, x.cnt, spar = smooth.par)
1208
+ q.spline <- smooth.spline(y.mids, y.cnt, spar = smooth.par)
1178
1209
  x.cnt <- pmax(p.spline$y, 0)
1179
1210
  y.cnt <- pmax(q.spline$y, 0)
1180
1211
  }
1212
+
1181
1213
  a <- as.numeric(pseudocounts)
1182
1214
  p <- (x.cnt + a) / sum(x.cnt + a)
1183
1215
  q <- (y.cnt + a) / sum(y.cnt + a)
1184
1216
  d <- NA
1185
- if(i.meth %in% c(1L, 2L)){
1186
- d <- sqrt(sum((sqrt(p) - sqrt(q))**2))/sqrt(2)
1187
- if(i.meth==2L) d <- 1 - d**2
1188
- }else if(i.meth %in% c(3L, 4L)){
1189
- sel <- p>0
1190
- if(any(q[sel]==0))
1217
+
1218
+ if (i.meth %in% c(1L, 2L)) {
1219
+ d <- sqrt(sum((sqrt(p) - sqrt(q))**2)) / sqrt(2)
1220
+ if(i.meth == 2L) d <- 1 - d**2
1221
+ } else if (i.meth %in% c(3L, 4L)) {
1222
+ sel <- p > 0
1223
+ if (any(q[sel] == 0))
1191
1224
  stop("Undefined distance without absolute continuity, use pseudocounts")
1192
- d <- -sum(p[sel]*log(q[sel]/p[sel]))
1193
- }else if(i.meth == 5L){
1194
- d <- sqrt(sum((q-p)**2))
1225
+ d <- -sum(p[sel] * log(q[sel] / p[sel]))
1226
+ } else if (i.meth == 5L) {
1227
+ d <- sqrt(sum((q - p)**2))
1195
1228
  }
1196
1229
  return(d)
1197
1230
  }
@@ -1219,19 +1252,16 @@ enve.recplot2.compareIdentities <- function
1219
1252
  #'
1220
1253
  #' @export
1221
1254
 
1222
- enve.recplot2.coordinates <- function
1223
- (x,
1224
- bins
1225
- ){
1226
- if(!inherits(x, "enve.RecPlot2"))
1255
+ enve.recplot2.coordinates <- function(x, bins) {
1256
+ if (!inherits(x, "enve.RecPlot2"))
1227
1257
  stop("'x' must inherit from class `enve.RecPlot2`")
1228
- if(missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
1229
- if(!is.vector(bins)) stop("'bins' must be a vector")
1230
- if(inherits(bins, "logical")) bins <- which(bins)
1258
+ if (missing(bins)) bins <- rep(TRUE, length(x$pos.breaks)-1)
1259
+ if (!is.vector(bins)) stop("'bins' must be a vector")
1260
+ if (inherits(bins, "logical")) bins <- which(bins)
1231
1261
 
1232
1262
  y <- data.frame(stringsAsFactors = FALSE, row.names = bins)
1233
1263
 
1234
- for(i in 1:length(bins)){
1264
+ for (i in 1:length(bins)) {
1235
1265
  j <- bins[i]
1236
1266
  # Concatenated coordinates
1237
1267
  cc <- x$pos.breaks[c(j, j+1)]
@@ -1243,7 +1273,7 @@ enve.recplot2.coordinates <- function
1243
1273
  cc[2] > x$seq.breaks[-length(x$seq.breaks)] &
1244
1274
  cc[2] <= x$seq.breaks[-1])
1245
1275
  # Translate coordinates
1246
- if(length(sb.from)==1 & length(sb.to)==1){
1276
+ if (length(sb.from) == 1 & length(sb.to) == 1) {
1247
1277
  y[i, 'name.from'] <- x$seq.names[sb.from]
1248
1278
  y[i, 'pos.from'] <- floor(x$seq.breaks[sb.from] + cc[1] - 1)
1249
1279
  y[i, 'name.to'] <- x$seq.names[sb.to]
@@ -1276,23 +1306,18 @@ enve.recplot2.coordinates <- function
1276
1306
  #'
1277
1307
  #' @export
1278
1308
 
1279
- enve.recplot2.seqdepth <- function
1280
-
1281
- (x,
1282
- sel,
1283
- low.identity=FALSE
1284
- ){
1285
- if(!inherits(x, "enve.RecPlot2"))
1309
+ enve.recplot2.seqdepth <- function(x, sel, low.identity = FALSE) {
1310
+ if (!inherits(x, "enve.RecPlot2"))
1286
1311
  stop("'x' must inherit from class `enve.RecPlot2`")
1287
- if(low.identity){
1312
+ if (low.identity) {
1288
1313
  pos.cnts.in <- x$pos.counts.out
1289
- }else{
1314
+ } else {
1290
1315
  pos.cnts.in <- x$pos.counts.in
1291
1316
  }
1292
1317
  pos.breaks <- x$pos.breaks
1293
1318
  pos.binsize <- (pos.breaks[-1] - pos.breaks[-length(pos.breaks)])
1294
1319
  seqdepth.in <- pos.cnts.in/pos.binsize
1295
- if(missing(sel)) return(seqdepth.in)
1320
+ if (missing(sel)) return(seqdepth.in)
1296
1321
  return(seqdepth.in[sel])
1297
1322
  }
1298
1323
 
@@ -1307,8 +1332,10 @@ enve.recplot2.seqdepth <- function
1307
1332
  #' Range of identities to be considered. By default, the full range
1308
1333
  #' is used (note that the upper boundary is \code{Inf} and not 100 because
1309
1334
  #' recruitment plots can also be built with bit-scores). To use only
1310
- #' intra-population matches (with identities), use c(95,100). To use only
1311
- #' inter-population values, use c(0,95).
1335
+ #' intra-population matches (with identities), use \code{c(95, 100)}. To use
1336
+ #' only inter-population values, use \code{c(0, 95)}.
1337
+ #'
1338
+ #' @return A numeric value indicating the ANIr (as percentage).
1312
1339
  #'
1313
1340
  #' @author Luis M. Rodriguez-R [aut, cre]
1314
1341
  #'
@@ -1338,43 +1365,49 @@ enve.recplot2.ANIr <- function
1338
1365
  #' @param pos.breaks Position breaks
1339
1366
  #' @param id.breaks Identity breaks
1340
1367
  #' @param rec.idcol Identity column to use
1368
+ #'
1369
+ #' @return 2-dimensional matrix of counts per identity and position bins.
1341
1370
  #'
1342
1371
  #' @author Luis M. Rodriguez-R [aut, cre]
1343
1372
  #' @author Kenji Gerhardt [aut]
1344
1373
  #'
1345
1374
  #' @export
1346
1375
 
1347
- enve.recplot2.__counts <- function
1348
- (x, pos.breaks, id.breaks, rec.idcol) {
1376
+ enve.recplot2.__counts <- function(x, pos.breaks, id.breaks, rec.idcol) {
1349
1377
  rec2 <- x$rec
1350
1378
  verbose <- x$verbose
1351
1379
 
1352
1380
  # get counts of how many occurrences of each genome pos.bin there are per read
1353
1381
  x.bins <- mapply(
1354
1382
  function(start, end) {
1355
- list(rle(findInterval(start:end, pos.breaks, left.open = T)))
1356
- }, rec2[, 1], rec2[, 2])
1383
+ list(rle(findInterval(start:end, pos.breaks, left.open = TRUE)))
1384
+ },
1385
+ rec2[, 1], rec2[, 2]
1386
+ )
1357
1387
 
1358
1388
  # find the single y bin for each row, replicates it at the correct places to
1359
1389
  # the number of distinct bins found in its row
1360
- y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = T),
1390
+ y.bins <- rep(findInterval(rec2[, rec.idcol], id.breaks, left.open = TRUE),
1361
1391
  times = unlist(lapply(x.bins, function(a) length(a$lengths))))
1362
1392
 
1363
1393
  # x.bins_counts is the number of occurrences of each bin a row contains,
1364
1394
  # per row, then unlisted
1365
1395
  x.bins_counts <- unlist(lapply(x.bins, function(a) a$lengths))
1366
1396
 
1367
- # these are the pos. in. genome bins that each count in x.bins_counts falls into
1397
+ # these are the pos. in. genome bins that each count in x.bins_counts falls
1398
+ # into
1368
1399
  x.bins <- unlist(lapply(x.bins, function(a) a$values))
1369
1400
 
1370
- # much more efficient counts implementation in R using lists instead of a matrix:
1401
+ # much more efficient counts implementation in R using lists instead of a
1402
+ # matrix:
1371
1403
  counts <- lapply(
1372
1404
  1:(length(pos.breaks) - 1),
1373
- function(col_len) rep(0, length(id.breaks) - 1))
1405
+ function(col_len) rep(0, length(id.breaks) - 1)
1406
+ )
1374
1407
 
1375
1408
  # accesses the correct list in counts by x.bin, then
1376
1409
  # accesses the position in that row by y.bins and adds the new count
1377
- for(i in 1:length(x.bins)){
1410
+ for (i in 1:length(x.bins)) {
1378
1411
  counts[[x.bins[i]]][y.bins[i]] <- counts[[x.bins[i]]][y.bins[i]] + x.bins_counts[i]
1379
1412
  }
1380
1413
 
@@ -1384,32 +1417,38 @@ enve.recplot2.__counts <- function
1384
1417
 
1385
1418
  #' Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function
1386
1419
  #'
1387
- #' Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
1420
+ #' Internal ancillary function (see
1421
+ #' \code{\link{enve.recplot2.findPeaks.emauto}}).
1388
1422
  #'
1389
- #' @param x \code{\link{enve.RecPlot2}} object
1390
- #' @param comp Components
1391
- #' @param do_crit Function estimating the criterion
1392
- #' @param best Best solution thus far
1393
- #' @param verbose If verbose
1394
- #' @param ... Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}
1423
+ #' @param x \code{\link{enve.RecPlot2}} object.
1424
+ #' @param comp Components.
1425
+ #' @param do_crit Function estimating the criterion.
1426
+ #' @param best Best solution thus far.
1427
+ #' @param verbose If verbose.
1428
+ #' @param ...
1429
+ #' Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}.
1430
+ #'
1431
+ #' @return Updated solution with the same structure as \code{best}.
1395
1432
  #'
1396
1433
  #' @author Luis M. Rodriguez-R [aut, cre]
1397
1434
  #'
1398
1435
  #' @export
1399
1436
 
1400
- enve.recplot2.findPeaks.__emauto_one <- function
1401
- (x, comp, do_crit, best, verbose, ...){
1402
- peaks <- enve.recplot2.findPeaks.em(x=x, components=comp, ...)
1403
- if(length(peaks)==0) return(best)
1404
- k <- comp*3 - 1 # mean & sd for each component, and n-1 free alpha parameters
1437
+ enve.recplot2.findPeaks.__emauto_one <- function(
1438
+ x, comp, do_crit, best, verbose, ...
1439
+ ) {
1440
+ peaks <- enve.recplot2.findPeaks.em(x = x, components = comp, ...)
1441
+ if (length(peaks) == 0) return(best)
1442
+ k <- comp * 3 - 1 # mean & sd for each component, and n-1 free alpha params
1405
1443
  crit <- do_crit(peaks[[1]]$err.res, k, peaks[[1]]$n.total)
1406
- if(verbose) cat(comp,'\t| LL =', peaks[[1]]$err.res, '\t| Estimate =', crit,
1407
- ifelse(crit > best[['crit']], '*', ''), '\n')
1408
- if(crit > best[['crit']]){
1409
- best[['crit']] <- crit
1410
- best[['peaks']] <- peaks
1444
+ if(verbose)
1445
+ cat(comp, "\t| LL =", peaks[[1]]$err.res, "\t| Estimate =", crit,
1446
+ ifelse(crit > best[["crit"]], "*", ""), "\n")
1447
+ if(crit > best[["crit"]]){
1448
+ best[["crit"]] <- crit
1449
+ best[["peaks"]] <- peaks
1411
1450
  }
1412
- best[['pstore']][[comp]] <- peaks
1451
+ best[["pstore"]][[comp]] <- peaks
1413
1452
  return(best)
1414
1453
  }
1415
1454
 
@@ -1419,13 +1458,15 @@ enve.recplot2.findPeaks.__emauto_one <- function
1419
1458
  #'
1420
1459
  #' @param x Vector of log-transformed sequencing depths
1421
1460
  #' @param theta Parameters list
1461
+ #'
1462
+ #' @return A list with components \code{ll} (numeric) the log-likelihood, and
1463
+ #' \code{posterior} (numeric) the posterior probability.
1422
1464
  #'
1423
1465
  #' @author Luis M. Rodriguez-R [aut, cre]
1424
1466
  #'
1425
1467
  #' @export
1426
1468
 
1427
- enve.recplot2.findPeaks.__em_e <- function
1428
- (x, theta){
1469
+ enve.recplot2.findPeaks.__em_e <- function(x, theta) {
1429
1470
  components <- length(theta[['mu']])
1430
1471
  product <- do.call(cbind,
1431
1472
  lapply(1:components,
@@ -1437,7 +1478,7 @@ enve.recplot2.findPeaks.__em_e <- function
1437
1478
  cat(i,'/',nrow(product), ':', product[i,], '\n')
1438
1479
  }
1439
1480
 
1440
- return(list(ll=sum(log(sum.of.components)), posterior=posterior))
1481
+ return(list(ll = sum(log(sum.of.components)), posterior = posterior))
1441
1482
  }
1442
1483
 
1443
1484
  #' Enveomics: Recruitment Plot (2) Em Peak Finder - Internal Ancillary Function Maximization
@@ -1446,20 +1487,23 @@ enve.recplot2.findPeaks.__em_e <- function
1446
1487
  #'
1447
1488
  #' @param x Vector of log-transformed sequencing depths
1448
1489
  #' @param posterior Posterior probability
1490
+ #'
1491
+ #' @return A list with components \code{mu} (numeric) the estimated mean,
1492
+ #' \code{sd} (numeric) the estimated standard deviation, and \code{alpha}
1493
+ #' (numeric) the estimated alpha parameter.
1449
1494
  #'
1450
1495
  #' @author Luis M. Rodriguez-R [aut, cre]
1451
1496
  #'
1452
1497
  #' @export
1453
1498
 
1454
- enve.recplot2.findPeaks.__em_m <- function
1455
- (x, posterior){
1499
+ enve.recplot2.findPeaks.__em_m <- function(x, posterior) {
1456
1500
  components <- ncol(posterior)
1457
1501
  n <- colSums(posterior)
1458
1502
  mu <- colSums(posterior * x) / n
1459
1503
  sd <- sqrt( colSums(
1460
1504
  posterior * (matrix(rep(x,components), ncol=components) - mu)^2) / n )
1461
1505
  alpha <- n/length(x)
1462
- return(list(mu=mu, sd=sd, alpha=alpha))
1506
+ return(list(mu = mu, sd = sd, alpha = alpha))
1463
1507
  }
1464
1508
 
1465
1509
  #' Enveomics: Recruitment Plot (2) Peak S4 Class - Internal Ancillary Function
@@ -1469,29 +1513,31 @@ enve.recplot2.findPeaks.__em_m <- function
1469
1513
  #' @param x \code{\link{enve.RecPlot2.Peak}} object
1470
1514
  #' @param mids Midpoints
1471
1515
  #' @param counts Counts
1516
+ #'
1517
+ #' @return A numeric vector of counts (histogram)
1472
1518
  #'
1473
1519
  #' @author Luis M. Rodriguez-R [aut, cre]
1474
1520
  #'
1475
1521
  #' @export
1476
1522
 
1477
- enve.recplot2.__peakHist <- function
1478
- (x, mids, counts=TRUE){
1523
+ enve.recplot2.__peakHist <- function(x, mids, counts = TRUE){
1479
1524
  d.o <- x$param.hat
1480
- if(length(x$log)==0) x$log <- FALSE
1481
- if(x$log){
1525
+ if (length(x$log) == 0) x$log <- FALSE
1526
+ if (x$log) {
1482
1527
  d.o$x <- log(mids)
1483
- }else{
1528
+ } else {
1484
1529
  d.o$x <- mids
1485
1530
  }
1486
- prob <- do.call(paste('d', x$dist, sep=''), d.o)
1531
+ prob <- do.call(paste('d', x$dist, sep = ""), d.o)
1487
1532
  if(!counts) return(prob)
1488
1533
  if(length(x$values)>0) return(prob*length(x$values)/sum(prob))
1489
- return(prob*x$n.hat/sum(prob))
1534
+ return(prob * x$n.hat / sum(prob))
1490
1535
  }
1491
1536
 
1492
1537
  #' Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1
1493
1538
  #'
1494
- #' Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
1539
+ #' Internal ancillary function (see
1540
+ #' \code{\link{enve.recplot2.findPeaks.mower}}).
1495
1541
  #'
1496
1542
  #' @param lsd1 Vector of log-transformed sequencing depths
1497
1543
  #' @param min.points Minimum number of points
@@ -1505,85 +1551,100 @@ enve.recplot2.__peakHist <- function
1505
1551
  #' @param merge.logdist Attempted \code{merge.logdist} parameter
1506
1552
  #' @param verbose If verbose
1507
1553
  #' @param log If log-transformed depths
1554
+ #'
1555
+ #' @return Return an \code{enve.RecPlot2.Peak} object.
1508
1556
  #'
1509
1557
  #' @author Luis M. Rodriguez-R [aut, cre]
1510
1558
  #'
1511
1559
  #' @export
1512
1560
 
1513
- enve.recplot2.findPeaks.__mow_one <- function
1514
- (lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
1515
- optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
1516
- ){
1517
- dist <- ifelse(with.skewness, 'sn', 'norm');
1561
+ enve.recplot2.findPeaks.__mow_one <- function(
1562
+ lsd1, min.points, quant.est, mlv.opts, fitdist.opts, with.skewness,
1563
+ optim.rounds, optim.epsilon, n.total, merge.logdist, verbose, log
1564
+ ) {
1565
+ dist <- ifelse(with.skewness, "sn", "norm")
1518
1566
 
1519
1567
  # Find peak
1520
- o <- mlv.opts; o$x = lsd1;
1521
- mode1 <- median(lsd1); # mode1 <- do.call(mlv, o)$M;
1522
- if(verbose) cat('Anchoring at mode =',mode1,'\n')
1523
- param.hat <- fitdist.opts$start; last.hat <- param.hat;
1524
- lim <- NA;
1525
- if(with.skewness){ param.hat$xi <- mode1 }else{ param.hat$mean <- mode1 }
1568
+ o <- mlv.opts
1569
+ o$x <- lsd1
1570
+ mode1 <- median(lsd1) # mode1 <- do.call(mlv, o)$M;
1571
+ if (verbose) cat("Anchoring at mode =", mode1, "\n")
1572
+ param.hat <- fitdist.opts$start
1573
+ last.hat <- param.hat
1574
+ lim <- NA
1575
+ if (with.skewness) { param.hat$xi <- mode1 } else { param.hat$mean <- mode1 }
1526
1576
 
1527
1577
  # Refine peak parameters
1528
- for(round in 1:optim.rounds){
1529
- param.hat[[ 1 ]] <- param.hat[[ 1 ]]/diff(quant.est)# <- expand dispersion
1578
+ for (round in 1:optim.rounds) {
1579
+ param.hat[[ 1 ]] <- param.hat[[1]] / diff(quant.est) # <- expand dispersion
1530
1580
  lim.o <- param.hat
1531
- lim.o$p <- quant.est; lim <- do.call(paste('q',dist,sep=''), lim.o)
1532
- lsd1.pop <- lsd1[(lsd1>lim[1]) & (lsd1<lim[2])];
1533
- if(verbose) cat(' Round', round, 'with n =',length(lsd1.pop),
1534
- 'and params =',as.numeric(param.hat),' \r')
1535
- if(length(lsd1.pop) < min.points) break;
1536
- o <- fitdist.opts; o$data = lsd1.pop; o$start = param.hat;
1581
+ lim.o$p <- quant.est
1582
+ lim <- do.call(paste("q", dist, sep = ""), lim.o)
1583
+ lsd1.pop <- lsd1[(lsd1 > lim[1]) & (lsd1 < lim[2])]
1584
+ if (verbose)
1585
+ cat(" Round", round, "with n =", length(lsd1.pop),
1586
+ "and params =", as.numeric(param.hat), " \r")
1587
+ if (length(lsd1.pop) < min.points) break
1588
+ o <- fitdist.opts
1589
+ o$data <- lsd1.pop
1590
+ o$start <- param.hat
1537
1591
  last.last.hat <- last.hat
1538
1592
  last.hat <- param.hat
1539
- param.hat <- as.list(do.call(fitdist, o)$estimate);
1540
- if(any(is.na(param.hat))){
1541
- if(round>1) param.hat <- last.hat;
1542
- break;
1593
+ param.hat <- as.list(do.call(fitdist, o)$estimate)
1594
+ if (any(is.na(param.hat))) {
1595
+ if (round > 1) param.hat <- last.hat
1596
+ break
1543
1597
  }
1544
- if(round > 1){
1545
- epsilon1 <- sum((as.numeric(last.hat)-as.numeric(param.hat))^2)
1546
- if(epsilon1 < optim.epsilon) break;
1547
- if(round > 2){
1548
- epsilon2 <- sum((as.numeric(last.last.hat)-as.numeric(param.hat))^2)
1549
- if(epsilon2 < optim.epsilon) break;
1598
+ if (round > 1) {
1599
+ epsilon1 <- sum((as.numeric(last.hat) - as.numeric(param.hat))^2)
1600
+ if (epsilon1 < optim.epsilon) break
1601
+ if (round > 2) {
1602
+ epsilon2 <- sum((as.numeric(last.last.hat) - as.numeric(param.hat))^2)
1603
+ if (epsilon2 < optim.epsilon) break
1550
1604
  }
1551
1605
  }
1552
1606
  }
1553
- if(verbose) cat('\n')
1554
- if(is.na(param.hat[1]) | is.na(lim[1])) return(NULL);
1607
+ if (verbose) cat("\n")
1608
+ if (is.na(param.hat[1]) | is.na(lim[1])) return(NULL)
1555
1609
 
1556
1610
  # Mow distribution
1557
- lsd2 <- c();
1558
- lsd.pop <- c();
1559
- n.hat <- length(lsd1.pop)/diff(quant.est)
1560
- peak <- new('enve.RecPlot2.Peak', dist=dist, values=as.numeric(), mode=mode1,
1561
- param.hat=param.hat, n.hat=n.hat, n.total=n.total,
1562
- merge.logdist=merge.logdist, log=log)
1563
- peak.breaks <- seq(min(lsd1), max(lsd1), length=20)
1564
- peak.cnt <- enve.recplot2.__peakHist(peak,
1565
- (peak.breaks[-length(peak.breaks)]+peak.breaks[-1])/2)
1566
- for(i in 2:length(peak.breaks)){
1567
- values <- lsd1[ (lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i]) ]
1568
- n.exp <- peak.cnt[i-1]
1569
- if(is.na(n.exp) | n.exp==0) n.exp <- 0.1
1570
- if(length(values)==0) next
1571
- in.peak <- runif(length(values)) <= n.exp/length(values)
1611
+ lsd2 <- c()
1612
+ lsd.pop <- c()
1613
+ n.hat <- length(lsd1.pop) / diff(quant.est)
1614
+ peak <- new(
1615
+ "enve.RecPlot2.Peak", dist = dist, values = as.numeric(), mode = mode1,
1616
+ param.hat = param.hat, n.hat = n.hat, n.total = n.total,
1617
+ merge.logdist = merge.logdist, log = log
1618
+ )
1619
+ peak.breaks <- seq(min(lsd1), max(lsd1), length = 20)
1620
+ peak.cnt <- enve.recplot2.__peakHist(
1621
+ peak, (peak.breaks[-length(peak.breaks)] + peak.breaks[-1]) / 2
1622
+ )
1623
+ for (i in 2:length(peak.breaks)) {
1624
+ values <- lsd1[(lsd1 >= peak.breaks[i-1]) & (lsd1 < peak.breaks[i])]
1625
+ n.exp <- peak.cnt[i - 1]
1626
+ if (is.na(n.exp) | n.exp == 0) n.exp <- 0.1
1627
+ if (length(values) == 0) next
1628
+ in.peak <- runif(length(values)) <= n.exp / length(values)
1572
1629
  lsd2 <- c(lsd2, values[!in.peak])
1573
1630
  lsd.pop <- c(lsd.pop, values[in.peak])
1574
1631
  }
1575
- if(length(lsd.pop) < min.points) return(NULL)
1632
+ if (length(lsd.pop) < min.points) return(NULL)
1576
1633
 
1577
1634
  # Return peak
1578
- attr(peak, 'values') <- lsd.pop
1579
- attr(peak, 'values.res') <- lsd2
1580
- attr(peak, 'err.res') <- 1-(cor(hist(lsd.pop, breaks=peak.breaks,
1581
- plot=FALSE)$counts, hist(lsd1, breaks=peak.breaks,
1582
- plot=FALSE)$counts)+1)/2
1583
- mu <- tail(param.hat, n=1)
1584
- attr(peak, 'seq.depth') <- ifelse(log, exp(mu), mu)
1585
- if(verbose) cat(' Extracted peak with n =',length(lsd.pop),
1586
- 'with expected n =',n.hat,'\n')
1635
+ attr(peak, "values") <- lsd.pop
1636
+ attr(peak, "values.res") <- lsd2
1637
+ attr(peak, "err.res") <- 1 - 0.5 * (
1638
+ cor(
1639
+ hist(lsd.pop, breaks = peak.breaks, plot = FALSE)$counts,
1640
+ hist(lsd1, breaks = peak.breaks, plot = FALSE)$counts
1641
+ ) + 1
1642
+ )
1643
+ mu <- tail(param.hat, n = 1)
1644
+ attr(peak, "seq.depth") <- ifelse(log, exp(mu), mu)
1645
+ if(verbose)
1646
+ cat(" Extracted peak with n =", length(lsd.pop),
1647
+ "with expected n =", n.hat, "\n")
1587
1648
  return(peak)
1588
1649
  }
1589
1650
 
@@ -1593,17 +1654,18 @@ enve.recplot2.findPeaks.__mow_one <- function
1593
1654
  #'
1594
1655
  #' @param peaks.opts List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}
1595
1656
  #'
1657
+ #' @return A list of \code{enve.RecPlot2.Peak} objects.
1658
+ #'
1596
1659
  #' @author Luis M. Rodriguez-R [aut, cre]
1597
1660
  #'
1598
1661
  #' @export
1599
1662
 
1600
- enve.recplot2.findPeaks.__mower <- function
1601
- (peaks.opts){
1663
+ enve.recplot2.findPeaks.__mower <- function(peaks.opts) {
1602
1664
  peaks <- list()
1603
- while(length(peaks.opts$lsd1) > peaks.opts$min.points){
1665
+ while (length(peaks.opts$lsd1) > peaks.opts$min.points) {
1604
1666
  peak <- do.call(enve.recplot2.findPeaks.__mow_one, peaks.opts)
1605
- if(is.null(peak)) break
1606
- peaks[[ length(peaks)+1 ]] <- peak
1667
+ if (is.null(peak)) break
1668
+ peaks[[length(peaks) + 1]] <- peak
1607
1669
  peaks.opts$lsd1 <- peak$values.res
1608
1670
  }
1609
1671
  return(peaks)
@@ -1616,16 +1678,21 @@ enve.recplot2.findPeaks.__mower <- function
1616
1678
  #' @param peak Query \code{\link{enve.RecPlot2.Peak}} object
1617
1679
  #' @param peaks list of \code{\link{enve.RecPlot2.Peak}} objects
1618
1680
  #'
1681
+ #' @return A numeric index out of \code{peaks}.
1682
+ #'
1619
1683
  #' @author Luis M. Rodriguez-R [aut, cre]
1620
1684
  #'
1621
1685
  #' @export
1622
1686
 
1623
- enve.recplot2.__whichClosestPeak <- function
1624
- (peak, peaks){
1625
- dist <- as.numeric(lapply(peaks,
1626
- function(x)
1627
- abs(log(x$param.hat[[ length(x$param.hat) ]] /
1628
- peak$param.hat[[ length(peak$param.hat) ]] ))))
1629
- dist[ dist==0 ] <- Inf
1687
+ enve.recplot2.__whichClosestPeak <- function(peak, peaks){
1688
+ dist <- as.numeric(
1689
+ lapply(
1690
+ peaks,
1691
+ function(x)
1692
+ abs(log(x$param.hat[[length(x$param.hat)]] /
1693
+ peak$param.hat[[length(peak$param.hat)]]))
1694
+ )
1695
+ )
1696
+ dist[dist == 0] <- Inf
1630
1697
  return(which.min(dist))
1631
1698
  }