miga-base 0.4.3.0 → 0.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -1,74 +1,105 @@
1
1
  #==============> Define S4 classes
2
- setClass("enve.GrowthCurve",
3
- ### Enve-omics representation of fitted growth curves.
4
- representation(
5
- design = "array", ##<< Experimental design of the experiment.
6
- models = "list", ##<< Fitted growth curve models.
7
- predict = "list", ##<< Fitted growth curve values.
8
- call='call') ##<< Call producing this object.
9
- ,package='enveomics.R');
2
+
3
+ #' Enveomics: Growth Curve S4 Class
4
+ #'
5
+ #' Enve-omics representation of fitted growth curves.
6
+ #'
7
+ #' @slot design \code{(array)} Experimental design of the experiment.
8
+ #' @slot models \code{(list)} Fitted growth curve models.
9
+ #' @slot predict \code{(list)} Fitted growth curve values.
10
+ #' @slot call \code{(call)} Call producing this object.
11
+ #'
12
+ #' @author Luis M. Rodriguez-R [aut, cre]
13
+ #'
14
+ #' @exportClass
15
+
16
+ enve.GrowthCurve <- setClass("enve.GrowthCurve",
17
+ representation(
18
+ design = "array",
19
+ models = "list",
20
+ predict = "list",
21
+ call='call')
22
+ ,package='enveomics.R');
23
+
24
+ #' Attribute accessor
25
+ #'
26
+ #' @param x Object
27
+ #' @param name Attribute name
10
28
  setMethod("$", "enve.GrowthCurve", function(x, name) attr(x, name))
11
29
 
30
+ #' Enveomics: Plot of Growth Curve
31
+ #'
32
+ #' Plots an \code{\link{enve.GrowthCurve}} object.
33
+ #'
34
+ #' @param x An \code{\link{enve.GrowthCurve}} object to plot.
35
+ #' @param col Base colors to use for the different samples. Can be recycled.
36
+ #' By default, grey for one sample or rainbow colors for more than one.
37
+ #' @param pt.alpha Color alpha for the observed data points, using \code{col}
38
+ #' as a base.
39
+ #' @param ln.alpha Color alpha for the fitted growth curve, using \code{col}
40
+ #' as a base.
41
+ #' @param ln.lwd Line width for the fitted curve.
42
+ #' @param ln.lty Line type for the fitted curve.
43
+ #' @param band.alpha Color alpha for the confidence interval band of the
44
+ #' fitted growth curve, using \code{col} as a base.
45
+ #' @param band.density Density of the filling pattern in the interval band.
46
+ #' If \code{NULL}, a solid color is used.
47
+ #' @param band.angle Angle of the density filling pattern in the interval
48
+ #' band. Ignored if \code{band.density} is \code{NULL}.
49
+ #' @param xp.alpha Color alpha for the line connecting individual experiments,
50
+ #' using \code{col} as a base.
51
+ #' @param xp.lwd Width of line for the experiments.
52
+ #' @param xp.lty Type of line for the experiments.
53
+ #' @param pch Point character for observed data points.
54
+ #' @param new Should a new plot be generated? If \code{FALSE}, the existing
55
+ #' canvas is used.
56
+ #' @param legend Should the plot include a legend? If \code{FALSE}, no legend
57
+ #' is added. If \code{TRUE}, a legend is added in the bottom-right corner.
58
+ #' Otherwise, a legend is added in the position specified as \code{xy.coords}.
59
+ #' @param add.params Should the legend include the parameters of the fitted
60
+ #' model?
61
+ #' @param ... Any other graphic parameters.
62
+ #'
63
+ #' @author Luis M. Rodriguez-R [aut, cre]
64
+ #'
65
+ #' @method plot enve.GrowthCurve
66
+ #' @export
67
+
12
68
  #==============> Define S4 methods
13
69
  plot.enve.GrowthCurve <- function
14
- ### Plots an `enve.GrowthCurve` object.
15
- (x,
16
- ### `enve.GrowthCurve` object to plot.
17
- col,
18
- ### Base colors to use for the different samples. Can be recycled. By
19
- ### default, grey for one sample or rainbow colors for more than one.
20
- pt.alpha=0.9,
21
- ### Color alpha for the observed data points, using `col` as a base.
22
- ln.alpha=1.0,
23
- ### Color alpha for the fitted growth curve, using `col` as a base.
24
- ln.lwd=1,
25
- ### Line width for the fitted curve.
26
- ln.lty=1,
27
- ### Line type for the fitted curve.
28
- band.alpha=0.4,
29
- ### Color alpha for the confidence interval band of the fitted growth curve,
30
- ### using `col` as a base.
31
- band.density=NULL,
32
- ### Density of the filling pattern in the interval band. If NULL, a solid
33
- ### color is used.
34
- band.angle=45,
35
- ### Angle of the density filling pattern in the interval band. Ignored if
36
- ### `band.density` is NULL.
37
- xp.alpha=0.5,
38
- ### Color alpha for the line connecting individual experiments, using `col`
39
- ### as a base.
40
- xp.lwd=1,
41
- ### Width of line for the experiments.
42
- xp.lty=1,
43
- ### Type of line for the experiments.
44
- pch=19,
45
- ### Point character for observed data points.
46
- new=TRUE,
47
- ### Should a new plot be generated? If FALSE, the existing canvas is used.
48
- legend=new,
49
- ### Should the plot include a legend? If FALSE, no legend is added. If TRUE,
50
- ### a legend is added in the bottom-right corner. Otherwise, a legend is
51
- ### added in the position specified as `xy.coords`.
52
- add.params=FALSE,
53
- ### Should the legend include the parameters of the fitted model?
54
- ...
55
- ### Any other graphic parameters.
56
- ){
70
+ (x,
71
+ col,
72
+ pt.alpha=0.9,
73
+ ln.alpha=1.0,
74
+ ln.lwd=1,
75
+ ln.lty=1,
76
+ band.alpha=0.4,
77
+ band.density=NULL,
78
+ band.angle=45,
79
+ xp.alpha=0.5,
80
+ xp.lwd=1,
81
+ xp.lty=1,
82
+ pch=19,
83
+ new=TRUE,
84
+ legend=new,
85
+ add.params=FALSE,
86
+ ...
87
+ ){
57
88
 
58
89
  # Arguments
59
90
  if(missing(col)){
60
91
  col <-
61
92
  if(length(x$design)==0) grey(0.2)
62
- else rainbow(length(x$design), v=3/5, s=3/5)
93
+ else rainbow(length(x$design), v=3/5, s=3/5)
63
94
  }
64
-
95
+
65
96
  if(new){
66
97
  # Initiate canvas
67
98
  od.fit.max <- max(sapply(x$predict, function(x) max(x[,"upr"])))
68
99
  od.obs.max <- max(sapply(x$models, function(x) max(x$data[,"od"])))
69
100
  opts <- list(...)
70
101
  plot.defaults <- list(xlab="Time", ylab="Density",
71
- xlim=range(x$predict[[1]][,"t"]), ylim=c(0, max(od.fit.max, od.obs.max)))
102
+ xlim=range(x$predict[[1]][,"t"]), ylim=c(0, max(od.fit.max, od.obs.max)))
72
103
  for(i in names(plot.defaults)){
73
104
  if(is.null(opts[[i]])) opts[[i]] <- plot.defaults[[i]]
74
105
  }
@@ -76,7 +107,7 @@ plot.enve.GrowthCurve <- function
76
107
  opts[["type"]] <- "n"
77
108
  do.call(plot, opts)
78
109
  }
79
-
110
+
80
111
  # Graphic default
81
112
  pch <- rep(pch, length.out=length(x$design))
82
113
  col <- rep(col, length.out=length(x$design))
@@ -102,8 +133,8 @@ plot.enve.GrowthCurve <- function
102
133
  d <- x$predict[[i]]
103
134
  lines(d[,"t"], d[,"fit"], col=ln.col[i], lwd=ln.lwd, lty=ln.lty)
104
135
  polygon(c(d[,"t"], rev(d[,"t"])), c(d[,"lwr"], rev(d[,"upr"])),
105
- border=NA, col=band.col[i], density=band.density[i],
106
- angle=band.angle[i])
136
+ border=NA, col=band.col[i], density=band.density[i],
137
+ angle=band.angle[i])
107
138
  }
108
139
  }
109
140
 
@@ -113,87 +144,117 @@ plot.enve.GrowthCurve <- function
113
144
  if(add.params){
114
145
  for(p in names(coef(x$models[[1]]))){
115
146
  legend.txt <- paste(legend.txt, ", ", p, "=",
116
- sapply(x$models, function(x) signif(coef(x)[p],2)) , sep="")
147
+ sapply(x$models, function(x) signif(coef(x)[p],2)) , sep="")
117
148
  }
118
149
  }
119
150
  legend(legend, legend=legend.txt, pch=pch, col=ln.col)
120
151
  }
121
152
  }
122
153
 
123
- summary.enve.GrowthCurve <- function(
124
- ### Summary of an `enve.GrowthCurve` object.
125
- object,
126
- ### `enve.GrowthCurve` object.
127
- ...
128
- ### No additional parameters are currently supported.
129
- ){
154
+ #' Enveomics: Summary of Growth Curve
155
+ #'
156
+ #' Summary of an \code{\link{enve.GrowthCurve}} object.
157
+ #'
158
+ #' @param object An \code{\link{enve.GrowthCurve}} object.
159
+ #' @param ... No additional parameters are currently supported.
160
+ #'
161
+ #' @author Luis M. Rodriguez-R [aut, cre]
162
+ #'
163
+ #' @method summary enve.GrowthCurve
164
+ #' @export
130
165
 
166
+ summary.enve.GrowthCurve <- function(
167
+ object,
168
+ ...
169
+ ){
170
+
131
171
  x <- object
132
172
  cat('===[ enve.GrowthCurves ]------------------\n')
133
173
  for(i in names(x$design)){
134
- cat(i, ':\n', sep='')
135
- if(x$models[[i]]$convInfo$isConv){
136
- for(j in names(coef(x$models[[i]]))){
137
- cat(' - ', j, ' = ', coef(x$models[[i]])[j], '\n', sep='')
138
- }
139
- }else{
140
- cat(' Model didn\'t converge:\n ',
141
- x$models[[i]]$convInfo$stopMessage, '\n', sep='')
142
- }
143
- cat(' ', nrow(x$models[[i]]$data), ' observations, ',
144
- length(unique(x$models[[i]]$data[,"replicate"])), ' replicates.\n',
145
- sep='')
174
+ cat(i, ':\n', sep='')
175
+ if(x$models[[i]]$convInfo$isConv){
176
+ for(j in names(coef(x$models[[i]]))){
177
+ cat(' - ', j, ' = ', coef(x$models[[i]])[j], '\n', sep='')
178
+ }
179
+ }else{
180
+ cat(' Model didn\'t converge:\n ',
181
+ x$models[[i]]$convInfo$stopMessage, '\n', sep='')
182
+ }
183
+ cat(' ', nrow(x$models[[i]]$data), ' observations, ',
184
+ length(unique(x$models[[i]]$data[,"replicate"])), ' replicates.\n',
185
+ sep='')
146
186
  }
147
187
  cat('------------------------------------------\n')
148
188
  cat('call:',as.character(attr(x,'call')),'\n')
149
189
  cat('------------------------------------------\n')
150
190
  }
151
191
 
192
+ #' Enveomics: Growth Curve
193
+ #'
194
+ #' Calculates growth curves using the logistic growth function.
195
+ #'
196
+ #' @param x Data frame (or coercible) containing the observed growth data
197
+ #' (e.g., O.D. values). Each column is an independent growth curve and each
198
+ #' row is a time point. \code{NA}'s are allowed.
199
+ #' @param times Vector with the times at which each row was taken. By default,
200
+ #' all rows are assumed to be part of constantly periodic measurements.
201
+ #' @param triplicates If \code{TRUE}, the columns are assumed to be sorted by
202
+ #' sample with three replicates by sample. It requires a number of columns
203
+ #' multiple of 3.
204
+ #' @param design Experimental design of the data. An \strong{array} of mode list
205
+ #' with sample names as index and the list of column names in each sample as
206
+ #' the values. By default, each column is assumed to be an independent sample
207
+ #' if \code{triplicates} is \code{FALSE}, or every three columns are assumed
208
+ #' to be a sample if \code{triplicates} is \code{TRUE}. In the latter case,
209
+ #' samples are simply numbered.
210
+ #' @param new.times Values of time for the fitted curve.
211
+ #' @param level Confidence (or prediction) interval in the fitted curve.
212
+ #' @param interval Type of interval to be calculated for the fitted curve.
213
+ #' @param plot Should the growth curve be plotted?
214
+ #' @param FUN Function to fit. By default: logistic growth with paramenters
215
+ #' \code{K}: carrying capacity,
216
+ #' \code{r}: intrinsic growth rate, and
217
+ #' \code{P0}: Initial population.
218
+ #' @param nls.opt Any additional options passed to \code{nls}.
219
+ #' @param ... Any additional parameters to be passed to
220
+ #' \code{plot.enve.GrowthCurve}.
221
+ #'
222
+ #' @return Returns an \code{\link{enve.GrowthCurve}} object.
223
+ #'
224
+ #' @author Luis M. Rodriguez-R [aut, cre]
225
+ #'
226
+ #' @examples
227
+ #' # Load data
228
+ #' data("growth.curves", package="enveomics.R", envir=environment())
229
+ #' # Generate growth curves with different colors
230
+ #' g <- enve.growthcurve(growth.curves[,-1], growth.curves[,1], triplicates=TRUE)
231
+ #' # Generate black-and-white growth curves with different symbols
232
+ #' plot(g, pch=15:17, col="black", band.density=45, band.angle=c(-45,45,0))
233
+ #'
234
+ #' @export
235
+
152
236
  #==============> Core functions
153
237
  enve.growthcurve <- structure(function(
154
- ### Calculates growth curves using the logistic growth function.
155
- x,
156
- ### Data frame (or coercible) containing the observed growth data (e.g.,
157
- ### O.D. values). Each column is an independent growth curve and each
158
- ### row is a time point. NA's are allowed.
159
- times=1:nrow(x),
160
- ### Vector with the times at which each row was taken. By default, all
161
- ### rows are assumed to be part of constantly periodic measurements.
162
- triplicates=FALSE,
163
- ### If TRUE, the columns are assumed to be sorted by sample with three
164
- ### replicates by sample. It requires a number of columns multiple of 3.
165
- design,
166
- ### Experimental design of the data. An `array` of mode list with sample
167
- ### names as index and the list of column names in each sample as the
168
- ### values. By default, each column is assumed to be an independent sample
169
- ### if `triplicates` is FALSE, or every three columns are assumed to be a
170
- ### sample if `triplicates` is TRUE. In the latter case, samples are
171
- ### simply numbered.
172
- new.times=seq(min(times), max(times), length.out=length(times)*10),
173
- ### Values of time for the fitted curve.
174
- level=0.95,
175
- ### Confidence (or prediction) interval in the fitted curve.
176
- interval=c("confidence","prediction"),
177
- ### Type of interval to be calculated for the fitted curve.
178
- plot=TRUE,
179
- ### Should the growth curve be plotted?
180
- FUN=function(t,K,r,P0) K*P0*exp(r*t)/(K+P0*(exp(r*t)-1)),
181
- ### Function to fit. By default: logistic growth with paramenters `K`:
182
- ### carrying capacity, `r`: intrinsic growth rate, and `P0`: Initial
183
- ### population.
184
- nls.opt=list(),
185
- ### Any additional options passed to `nls`.
186
- ...
187
- ### Any additional parameters to be passed to `plot.enve.GrowthCurve`.
188
- ){
238
+ x,
239
+ times=1:nrow(x),
240
+ triplicates=FALSE,
241
+ design,
242
+ new.times=seq(min(times), max(times), length.out=length(times)*10),
243
+ level=0.95,
244
+ interval=c("confidence","prediction"),
245
+ plot=TRUE,
246
+ FUN=function(t,K,r,P0) K*P0*exp(r*t)/(K+P0*(exp(r*t)-1)),
247
+ nls.opt=list(),
248
+ ...
249
+ ){
189
250
 
190
251
  # Arguments
191
252
  if(missing(design)){
192
253
  design <-
193
254
  if(triplicates)
194
255
  tapply(colnames(x), colnames(x)[rep(1:(ncol(x)/3)*3-2, each=3)], c,
195
- simplify=FALSE)
196
- else tapply(colnames(x), colnames(x), c, simplify=FALSE)
256
+ simplify=FALSE)
257
+ else tapply(colnames(x), colnames(x), c, simplify=FALSE)
197
258
  }
198
259
  mod <- list()
199
260
  fit <- list()
@@ -207,18 +268,18 @@ enve.growthcurve <- structure(function(
207
268
  od <- c(od, x[,col])
208
269
  }
209
270
  data <- data.frame(t=rep(times, length(design[[sample]])), od=od,
210
- replicate=rep(1:length(design[[sample]]), each=length(times)))
271
+ replicate=rep(1:length(design[[sample]]), each=length(times)))
211
272
  data <- data[!is.na(data$od),]
212
273
  opts <- nls.opt
213
274
  opts[["data"]] <- data
214
275
  opt.defaults <- list(formula = od ~ enve._growth.fx(t, K, r, P0),
215
- algorithm="port", lower=list(P0=1e-16),
216
- control=nls.control(warnOnly=TRUE),
217
- start=list(
218
- K = 2*max(data$od),
219
- r = length(times)/max(data$t),
220
- P0 = min(data$od[data$od>0])
221
- ))
276
+ algorithm="port", lower=list(P0=1e-16),
277
+ control=nls.control(warnOnly=TRUE),
278
+ start=list(
279
+ K = 2*max(data$od),
280
+ r = length(times)/max(data$t),
281
+ P0 = min(data$od[data$od>0])
282
+ ))
222
283
  for(i in names(opt.defaults)){
223
284
  if(is.null(opts[[i]])){
224
285
  opts[[i]] <- opt.defaults[[i]]
@@ -226,16 +287,15 @@ enve.growthcurve <- structure(function(
226
287
  }
227
288
  mod[[sample]] <- do.call(nls, opts)
228
289
  fit[[sample]] <- cbind(t=new.times,
229
- predFit(mod[[sample]], level=level, interval=interval,
230
- newdata=data.frame(t=new.times)))
290
+ predFit(mod[[sample]], level=level, interval=interval,
291
+ newdata=data.frame(t=new.times)))
231
292
  }
232
293
  enve._growth.fx <<- NULL
233
294
  gc <- new("enve.GrowthCurve",
234
- design=design, models=mod, predict=fit,
235
- call=match.call());
295
+ design=design, models=mod, predict=fit,
296
+ call=match.call());
236
297
  if(plot) plot(gc, ...);
237
298
  return(gc)
238
- ### Returns an `enve.GrowthCurve` object.
239
299
  }, ex=function(){
240
300
  # Load data
241
301
  data("growth.curves", package="enveomics.R", envir=environment())
@@ -245,13 +305,21 @@ enve.growthcurve <- structure(function(
245
305
  plot(g, pch=15:17, col="black", band.density=45, band.angle=c(-45,45,0))
246
306
  });
247
307
 
308
+ #' Enveomics: Color to Alpha
309
+ #'
310
+ #' Takes a vector of colors and sets the alpha.
311
+ #'
312
+ #' @param x A vector of any value base colors.
313
+ #' @param alpha Alpha level to set (in the 0-1 range).
314
+ #'
315
+ #' @author Luis M. Rodriguez-R [aut, cre]
316
+ #'
317
+ #' @export
318
+
248
319
  enve.col2alpha <- function(
249
- ### Takes a vector of colors and sets the alpha.
250
- x,
251
- ### A vector of any value base colors.
252
- alpha
253
- ### Alpha level to set (in the 0-1 range).
254
- ){
320
+ x,
321
+ alpha
322
+ ){
255
323
  out <- c()
256
324
  for(i in x){
257
325
  opt <- as.list(col2rgb(i)[,1]/256)
@@ -1,319 +1,354 @@
1
- enve.recplot <- structure(function(
2
- ### Produces recruitment plots provided that BlastTab.catsbj.pl has
3
- ### been previously executed. Requires the gplots library.
4
- prefix,
5
- ### Path to the prefix of the BlastTab.catsbj.pl output files. At
6
- ### least the files .rec and .lim must exist with this prefix.
7
-
8
- # Id. hist.
9
- id.min=NULL,
10
- ### Minimum identity to be considered. By default, the minimum detected
11
- ### identity. This value is a percentage.
12
- id.max=NULL,
13
- ### Maximum identity to be considered. By default, 100.
14
- id.binsize=NULL,
15
- ### Size of the identity bins (vertical histograms). By default, 0.1 for
16
- ### identity metrics and 5 for bit score.
17
- id.splines=0,
18
- ### Smoothing parameter for the splines in the identity histogram. Zero (0) for no
19
- ### splines. A generally good value is 1/2. If non-zero, requires the stats package.
20
- id.metric='id',
21
- ### Metric of identity to be used (Y-axis). It can be any unambiguous prefix
22
- ### of "identity", "corrected identity", or "bit score".
23
- id.summary='sum',
24
- ### Method used to build the identity histogram (Horizontal axis of the right panel).
25
- ### It can be any unambiguous prefix of "sum", "average", "median", "90% lower bound",
26
- ### "90% upper bound", "95% lower bound", and "95% upper bound". The last four options
27
- ### correspond to the upper and lower boundaries of the 90% and 95% empirical confidence
28
- ### intervals.
29
-
30
- # Pos. hist.
31
- pos.min=1,
32
- ### Minimum (leftmost) position in the reference (concatenated) genome (in bp).
33
- pos.max=NULL,
34
- ### Maximum (rightmost) position in the reference (concatenated) genome (in bp).
35
- ### By default: Length of the genome.
36
- pos.binsize=1e3,
37
- ### Size of the position bins (horizontal histograms) in bp.
38
- pos.splines=0,
39
- ### Smoothing parameter for the splines in the position histogram. Zero (0) for no splines.
40
- ### If non-zero, requires the stats package.
41
-
42
- # Rec. plot
43
- rec.col1='white',
44
- ### Lightest color in the recruitment plot.
45
- rec.col2='black',
46
- ### Darkest color in the recruitment plot.
47
-
48
- # General
49
- main=NULL,
50
- ### Title of the plot.
51
- contig.col=grey(0.85),
52
- ### Color of the Contig boundaries. Set to NA to ignore Contig boundaries.
53
-
54
- # Return
55
- ret.recplot=FALSE,
56
- ### Indicates if the matrix of the recruitment plot is to be returned.
57
- ret.hist=FALSE,
58
- ### Ignored, for backwards compatibility.
59
- ret.mode=FALSE,
60
- ### Indicates if the mode of the identity is to be computed. It requires the modeest
61
- ### package.
62
-
63
- # General
64
- id.cutoff=NULL,
65
- ### Minimum identity to consider an alignment as "top". By default, it is 0.95 for the
66
- ### identity metrics and 95% of the best scoring alignment for bit score.
67
- verbose=TRUE,
68
- ### Indicates if the function should report the advance.
69
- ...
70
- ### Any additional graphic parameters to be passed to plot for all panels except the
71
- ### recruitment plot (lower-left).
72
- ){
73
-
74
- # Settings
75
- METRICS <- c('identity', 'corrected identity', 'bit score');
76
- SUMMARY <- c('sum', 'average', 'median', '');
77
- if(is.null(prefix)) stop('Parameter prefix is mandatory.');
78
- if(!requireNamespace("gplots", quietly=TRUE)) stop('Unavailable gplots library.');
79
-
80
- # Read files
81
- if(verbose) cat("Reading files.\n")
82
- rec <- read.table(paste(prefix, '.rec', sep=''), sep="\t", comment.char='', quote='');
83
- lim <- read.table(paste(prefix, '.lim', sep=''), sep="\t", comment.char='', quote='');
84
-
85
- # Configure ID summary
86
- id.summary <- pmatch(id.summary, SUMMARY);
87
- if(is.na(id.summary)) stop('Invalid identity summary.');
88
- if(id.summary == -1) stop('Ambiguous identity summary.');
89
- if(id.summary==1){
90
- id.summary.func <- function(x) colSums(x);
91
- id.summary.name <- 'sum'
92
- }else if(id.summary==2){
93
- id.summary.func <- function(x) colMeans(x);
94
- id.summary.name <- 'mean'
95
- }else if(id.summary==3){
96
- id.summary.func <- function(x) apply(x,2,median);
97
- id.summary.name <- 'median'
98
- }else if(id.summary==4){
99
- id.summary.func <- function(x) apply(x,2,quantile,probs=0.05,names=FALSE);
100
- id.summary.name <- '90% LB'
101
- }else if(id.summary==5){
102
- id.summary.func <- function(x) apply(x,2,quantile,probs=0.95,names=FALSE);
103
- id.summary.name <- '90% UB'
104
- }else if(id.summary==6){
105
- id.summary.func <- function(x) apply(x,2,quantile,probs=0.025,names=FALSE);
106
- id.summary.name <- '95% LB'
107
- }else if(id.summary==7){
108
- id.summary.func <- function(x) apply(x,2,quantile,probs=0.975,names=FALSE);
109
- id.summary.name <- '95% UB'
110
- }
111
-
112
- # Configure metrics
113
- id.metric <- pmatch(id.metric, METRICS);
114
- if(is.na(id.metric)) stop('Invalid identity metric.');
115
- if(id.metric == -1) stop('Ambiguous identity metric.');
116
- if(id.metric==1){
117
- id.reccol <- 3
118
- id.shortname <- 'Id.'
119
- id.fullname <- 'Identity'
120
- id.units <- '%'
121
- id.hallmarks <- seq(0, 100, by=5)
122
- if(is.null(id.max)) id.max <- 100
123
- if(is.null(id.cutoff)) id.cutoff <- 95
124
- if(is.null(id.binsize)) id.binsize <- 0.1
125
- }else if(id.metric==2){
126
- if(ncol(rec)<6) stop("Requesting corrected identity, but .rec file doesn't have 6th column")
127
- id.reccol <- 6
128
- id.shortname <- 'cId.'
129
- id.fullname <- 'Corrected identity'
130
- id.units <- '%'
131
- id.hallmarks <- seq(0, 100, by=5)
132
- if(is.null(id.max)) id.max <- 100
133
- if(is.null(id.cutoff)) id.cutoff <- 95
134
- if(is.null(id.binsize)) id.binsize <- 0.1
135
- }else if(id.metric==3){
136
- id.reccol <- 4
137
- id.shortname <- 'BSc.'
138
- id.fullname <- 'Bit score'
139
- id.units <- 'bits'
140
- max.bs <- max(rec[, id.reccol])
141
- id.hallmarks <- seq(0, max.bs*1.2, by=50)
142
- if(is.null(id.max)) id.max <- max.bs
143
- if(is.null(id.cutoff)) id.cutoff <- 0.95 * max.bs
144
- if(is.null(id.binsize)) id.binsize <- 5
145
- }
146
- if(is.null(id.min)) id.min <- min(rec[, id.reccol]);
147
- if(is.null(pos.max)) pos.max <- max(lim[, 3]);
148
- id.lim <- c(id.min, id.max);
149
- pos.lim <- c(pos.min, pos.max)/1e6;
150
- id.breaks <- round((id.max-id.min)/id.binsize);
151
- pos.breaks <- round((pos.max-pos.min)/pos.binsize);
152
- if(is.null(main)) main <- paste('Recruitment plot of ', prefix, sep='');
153
- pos.marks=seq(pos.min, pos.max, length.out=pos.breaks+1)/1e6;
154
- id.marks=seq(id.min, id.max, length.out=id.breaks+1);
155
- id.topclasses <- 0;
156
- for(i in length(id.marks):1) if(id.marks[i]>id.cutoff) id.topclasses <- id.topclasses + 1;
157
-
158
- # Set-up image
159
- layout(matrix(c(3,4,1,2), nrow=2, byrow=TRUE), widths=c(2,1), heights=c(1,2));
160
- out <- list();
161
-
162
- # Recruitment plot
163
- if(verbose) cat("Rec. plot.\n")
164
- par(mar=c(5,4,0,0)+0.1);
165
- rec.hist <- matrix(0, nrow=pos.breaks, ncol=id.breaks);
166
- for(i in 1:nrow(rec)){
167
- id.class <- ceiling((id.breaks)*((rec[i, id.reccol]-id.min)/(id.max-id.min)));
168
- if(id.class<=id.breaks & id.class>0){
169
- for(pos in rec[i, 1]:rec[i, 2]){
170
- pos.class <- ceiling((pos.breaks)*((pos-pos.min)/(pos.max-pos.min)));
171
- if(pos.class<=pos.breaks & pos.class>0) rec.hist[pos.class, id.class] <- rec.hist[pos.class, id.class]+1;
172
- }
173
- }
174
- }
175
- id.top <- c((1-id.topclasses):0) + id.breaks;
176
- rec.col=gplots::colorpanel(256, rec.col1, rec.col2);
177
- image(x=pos.marks, y=id.marks, z=log10(rec.hist),
178
- breaks=seq(0, log10(max(rec.hist)), length.out=1+length(rec.col)), col=rec.col,
179
- xlim=pos.lim, ylim=id.lim, xlab='Position in genome (Mbp)',
180
- ylab=paste(id.fullname, ' (',id.units,')', sep=''), xaxs='i', yaxs='r');
181
- if(!is.na(contig.col)) abline(v=c(lim$V2, lim$V3)/1e6, lty=1, col=contig.col);
182
- abline(h=id.hallmarks, lty=2, col=grey(0.7));
183
- abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
184
- legend('bottomleft', 'Rec. plot', bg=rgb(1,1,1,2/3));
185
- out <- c(out, list(pos.marks=pos.marks, id.marks=id.marks));
186
- if(ret.recplot) out <- c(out, list(recplot=rec.hist));
187
-
188
- # Identity histogram
189
- if(verbose) cat(id.shortname, " hist.\n", sep='')
190
- par(mar=c(5,0,0,2)+0.1);
191
- id.hist <- id.summary.func(rec.hist);
192
- plot(1, t='n', xlim=c(1, max(id.hist)), ylim=id.lim, ylab='', yaxt='n', xlab=paste('Sequences (bp),', id.summary.name), log='x', ...);
193
- id.x <- rep(id.marks, each=2)[2:(id.breaks*2+1)]
194
- id.f <- rep(id.hist, each=2)[1:(id.breaks*2)]
195
- if(sum(id.f)>0){
196
- lines(id.f, id.x, lwd=ifelse(id.splines>0, 1/2, 2), type='o', pch='.');
197
- if(id.splines>0){
198
- id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]), spar=id.splines)
199
- lines(exp(id.spline$y), id.spline$x, lwd=2)
200
- }
201
- }
202
-
203
- abline(h=id.hallmarks, lty=2, col=grey(0.7));
204
- abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
205
- legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
206
- out <- c(out, list(id.mean=mean(rec[, id.reccol])));
207
- out <- c(out, list(id.median=median(rec[, id.reccol])));
208
- if(ret.hist) out <- c(out, list(id.hist=id.hist));
1
+ #' Enveomics: Recruitment Plots
2
+ #'
3
+ #' @description
4
+ #' Produces recruitment plots provided that BlastTab.catsbj.pl has
5
+ #' been previously executed. Requires the \pkg{gplots} library.
6
+ #'
7
+ #' @param prefix
8
+ #' Path to the prefix of the BlastTab.catsbj.pl output files. At
9
+ #' least the files \strong{.rec} and \strong{.lim} must exist with this prefix.
10
+ #' @param id.min
11
+ #' Minimum identity to be considered. By default, the minimum detected
12
+ #' identity. This value is a percentage.
13
+ #' @param id.max
14
+ #' Maximum identity to be considered. By default, 100\%.
15
+ #' @param id.binsize
16
+ #' Size of the identity bins (vertical histograms). By default, 0.1 for
17
+ #' identity metrics and 5 for bit score.
18
+ #' @param id.splines
19
+ #' Smoothing parameter for the splines in the identity histogram. Zero (0) for no
20
+ #' splines. A generally good value is 1/2. If non-zero, requires the \pkg{stats} package.
21
+ #' @param id.metric
22
+ #' Metric of identity to be used (Y-axis).
23
+ #' It can be any unambiguous prefix of:
24
+ #' \itemize{
25
+ #' \item "identity"
26
+ #' \item "corrected identity"
27
+ #' \item "bit score"}
28
+ #' @param id.summary
29
+ #' Method used to build the identity histogram (Horizontal axis of the right panel).
30
+ #' It can be any unambiguous prefix of:
31
+ #' \itemize{
32
+ #' \item "sum"
33
+ #' \item "average"
34
+ #' \item "median"
35
+ #' \item "90\% lower bound"
36
+ #' \item "90\% upper bound"
37
+ #' \item "95\% lower bound"
38
+ #' \item "95\% upper bound" }
39
+ #' The last four options
40
+ #' correspond to the upper and lower boundaries of the 90\% and 95\% empirical confidence
41
+ #' intervals.
42
+ #' @param pos.min
43
+ #' Minimum (leftmost) position in the reference (concatenated) genome (in bp).
44
+ #' @param pos.max
45
+ #' Maximum (rightmost) position in the reference (concatenated) genome (in bp).
46
+ #' By default: Length of the genome.
47
+ #' @param pos.binsize
48
+ #' Size of the position bins (horizontal histograms) in bp.
49
+ #' @param pos.splines
50
+ #' Smoothing parameter for the splines in the position histogram. Zero (0) for no splines.
51
+ #' If non-zero, requires the stats package.
52
+ #' @param rec.col1
53
+ #' Lightest color in the recruitment plot.
54
+ #' @param rec.col2
55
+ #' Darkest color in the recruitment plot.
56
+ #' @param main
57
+ #' Title of the plot.
58
+ #' @param contig.col
59
+ #' Color of the Contig boundaries. Set to \code{NA} to ignore Contig boundaries.
60
+ #' @param ret.recplot
61
+ #' Indicates if the matrix of the recruitment plot is to be returned.
62
+ #' @param ret.hist
63
+ #' Ignored, for backwards compatibility.
64
+ #' @param ret.mode
65
+ #' Indicates if the mode of the identity is to be computed. It requires the
66
+ #' \pkg{modeest} package.
67
+ #' @param id.cutoff
68
+ #' Minimum identity to consider an alignment as "top". By default, it is 0.95 for the
69
+ #' identity metrics and 95\% of the best scoring alignment for bit score.
70
+ #' @param verbose
71
+ #' Indicates if the function should report the advance.
72
+ #' @param ...
73
+ #' Any additional graphic parameters to be passed to plot for all panels except the
74
+ #' recruitment plot (lower-left).
75
+ #'
76
+ #' @return
77
+ #'
78
+ #' Returns a list with the following elements:
79
+ #'
80
+ #' \describe{
81
+ #' \item{\code{pos.marks}}{Midpoints of the position histogram.}
82
+ #' \item{\code{id.matrix}}{Midpoints of the identity histogram.}
83
+ #' \item{\code{recplot}}{Matrix containing the recruitment plot values
84
+ #' (if \code{ret.recplot=TRUE}).}
85
+ #' \item{\code{id.mean}}{Mean identity.}
86
+ #' \item{\code{id.median}}{Median identity.}
87
+ #' \item{\code{id.mode}}{Mode of the identity (if \code{ret.mode=TRUE}). Deprecated.}
88
+ #' \item{\code{id.hist}}{Values of the identity histogram (if \code{ret.hist=TRUE}).}
89
+ #' \item{\code{pos.hist.low}}{Values of the position histogram (depth) with "low"
90
+ #' identity (i.e., below id.cutoff) (if \code{ret.hist=TRUE}).}
91
+ #' \item{\code{pos.hist.top}}{Values of the position histogram (depth) with "top"
92
+ #' identity (i.e., above id.cutoff) (if \code{ret.hist=TRUE}).}
93
+ #' \item{\code{id.max}}{Value of \code{id.max}. This is returned because
94
+ #' \code{id.max=NULL} may vary.}
95
+ #' \item{\code{id.cutoff}}{Value of \code{id.cutoff}.
96
+ #' This is returned because \code{id.cutoff=NULL} may vary.}
97
+ #' \item{\code{seqdepth.mean.top}}{Average sequencing depth with identity above
98
+ #' \code{id.cutoff}.}
99
+ #' \item{\code{seqdepth.mean.low}}{Average sequencing depth with identity below
100
+ #' \code{id.cutoff}.}
101
+ #' \item{\code{seqdepth.mean.all}}{Average sequencing depth without identity filtering.}
102
+ #' \item{\code{seqdepth.median.top}}{Median sequencing depth with identity above
103
+ #' \code{id.cutoff}.}
104
+ #' \item{\code{seqdepth.median.low}}{Median sequencing depth with identity below
105
+ #' \code{id.cutoff}.}
106
+ #' \item{\code{seqdepth.median.all}}{Median sequencing depth without identity filtering.}
107
+ #' \item{\code{id.metric}}{Full name of the used identity metric.}
108
+ #' \item{\code{id.summary}}{Full name of the summary method used to build the identity plot.}}
109
+ #'
110
+ #' @author Luis M. Rodriguez-R [aut, cre]
111
+ #'
112
+ #' @export
209
113
 
210
- # Position histogram
211
- if(verbose) cat("Pos. hist.\n")
212
- par(mar=c(0,4,4,0)+0.1);
213
- h1<-rep(0,nrow(rec.hist)) ;
214
- h2<-rep(0,nrow(rec.hist)) ;
215
- pos.winsize <- (pos.max-pos.min+1)/pos.breaks;
216
- if(sum(rec.hist[, id.top])>0) h1 <- rowSums(matrix(rec.hist[, id.top], nrow=nrow(rec.hist)))/pos.winsize;
217
- if(sum(rec.hist[,-id.top])>0) h2 <- rowSums(matrix(rec.hist[,-id.top], nrow=nrow(rec.hist)))/pos.winsize;
218
-
219
- ymin <- min(1, h1[h1>0], h2[h2>0]);
220
- ymax <- max(10, h1, h2);
221
- if(is.na(ymin) || ymin<=0) ymin <- 1e-10;
222
- if(is.na(ymax) || ymax<=0) ymax <- 1;
223
- plot(1, t='n', xlab='', xaxt='n', ylab='Sequencing depth (X)', log='y', xlim=pos.lim,
224
- ylim=c(ymin, ymax), xaxs='i', main=main, ...);
225
- if(!is.na(contig.col)) abline(v=c(lim[,2], lim[,3])/1e6, lty=1, col=contig.col);
226
- abline(h=10^c(0:5), lty=2, col=grey(0.7));
227
- if(sum(h2)>0){
228
- h2.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
229
- h2.y <- rep(h2, each=2)[1:(pos.breaks*2)]
230
- lines(h2.x, h2.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0.5));
231
- if(pos.splines>0){
232
- h2.spline <- smooth.spline(h2.x[h2.y>0], log(h2.y[h2.y>0]), spar=pos.splines)
233
- lines(h2.spline$x, exp(h2.spline$y), lwd=2, col=grey(0.5))
234
- }
235
- if(ret.hist) out <- c(out, list(pos.hist.low=h2.y));
236
- }
237
- if(sum(h1)>0){
238
- h1.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
239
- h1.y <- rep(h1, each=2)[1:(pos.breaks*2)]
240
- lines(h1.x, h1.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0));
241
- if(pos.splines>0){
242
- h1.spline <- smooth.spline(h1.x[h1.y>0], log(h1.y[h1.y>0]), spar=pos.splines)
243
- lines(h1.spline$x, exp(h1.spline$y), lwd=2, col=grey(0))
244
- }
245
- if(ret.hist) out <- c(out, list(pos.hist.top=h1.y));
246
- }
247
- legend('topleft', 'Pos. histogram', bg=rgb(1,1,1,2/3));
248
- out <- c(out, list(id.max=id.max, id.cutoff=id.marks[id.top[1]]));
249
- out <- c(out, list(seqdepth.mean.top=mean(h1)));
250
- out <- c(out, list(seqdepth.mean.low=mean(h2)));
251
- out <- c(out, list(seqdepth.mean=mean(h1+h2)));
252
- out <- c(out, list(seqdepth.median.top=median(h1)));
253
- out <- c(out, list(seqdepth.median.low=median(h2)));
254
- out <- c(out, list(seqdepth.median=median(h1+h2)));
255
- out <- c(out, list(id.metric=id.fullname));
256
- out <- c(out, list(id.summary=id.summary.name));
257
-
258
- # Legend
259
- par(mar=c(0,0,4,2)+0.1);
260
- plot(1, t='n', xlab='', xaxt='n', ylab='', yaxt='n', xlim=c(0,1), ylim=c(0,1), xaxs='r', yaxs='i', ...);
261
- text(1/2, 5/6, labels=paste('Reads per ', signif((pos.max-pos.min)/pos.breaks, 2), ' bp (rec. plot)', sep=''), pos=3);
262
- leg.col <- gplots::colorpanel(100, rec.col1, rec.col2);
263
- leg.lab <- signif(10^seq(0, log10(max(rec.hist)), length.out=10), 2);
264
- for(i in 1:10){
265
- for(j in 1:10){
266
- k <- (i-1)*10 + j;
267
- polygon(c(k-1, k, k, k-1)/100, c(2/3, 2/3, 5/6, 5/6), border=leg.col[k], col=leg.col[k]);
114
+ enve.recplot <- structure(function(
115
+ prefix,
116
+
117
+ # Id. hist.
118
+ id.min=NULL,
119
+ id.max=NULL,
120
+ id.binsize=NULL,
121
+ id.splines=0,
122
+ id.metric='id',
123
+ id.summary='sum',
124
+
125
+ # Pos. hist.
126
+ pos.min=1,
127
+ pos.max=NULL,
128
+ pos.binsize=1e3,
129
+ pos.splines=0,
130
+
131
+ # Rec. plot
132
+ rec.col1='white',
133
+ rec.col2='black',
134
+
135
+ # General
136
+ main=NULL,
137
+ contig.col=grey(0.85),
138
+
139
+ # Return
140
+ ret.recplot=FALSE,
141
+ ret.hist=FALSE,
142
+ ret.mode=FALSE,
143
+
144
+ # General
145
+ id.cutoff=NULL,
146
+ verbose=TRUE,
147
+ ...
148
+ ){
149
+
150
+ # Settings
151
+ METRICS <- c('identity', 'corrected identity', 'bit score');
152
+ SUMMARY <- c('sum', 'average', 'median', '');
153
+ if(is.null(prefix)) stop('Parameter prefix is mandatory.');
154
+ if(!requireNamespace("gplots", quietly=TRUE)) stop('Unavailable gplots library.');
155
+
156
+ # Read files
157
+ if(verbose) cat("Reading files.\n")
158
+ rec <- read.table(paste(prefix, '.rec', sep=''), sep="\t", comment.char='', quote='');
159
+ lim <- read.table(paste(prefix, '.lim', sep=''), sep="\t", comment.char='', quote='');
160
+
161
+ # Configure ID summary
162
+ id.summary <- pmatch(id.summary, SUMMARY);
163
+ if(is.na(id.summary)) stop('Invalid identity summary.');
164
+ if(id.summary == -1) stop('Ambiguous identity summary.');
165
+ if(id.summary==1){
166
+ id.summary.func <- function(x) colSums(x);
167
+ id.summary.name <- 'sum'
168
+ }else if(id.summary==2){
169
+ id.summary.func <- function(x) colMeans(x);
170
+ id.summary.name <- 'mean'
171
+ }else if(id.summary==3){
172
+ id.summary.func <- function(x) apply(x,2,median);
173
+ id.summary.name <- 'median'
174
+ }else if(id.summary==4){
175
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.05,names=FALSE);
176
+ id.summary.name <- '90% LB'
177
+ }else if(id.summary==5){
178
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.95,names=FALSE);
179
+ id.summary.name <- '90% UB'
180
+ }else if(id.summary==6){
181
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.025,names=FALSE);
182
+ id.summary.name <- '95% LB'
183
+ }else if(id.summary==7){
184
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.975,names=FALSE);
185
+ id.summary.name <- '95% UB'
186
+ }
187
+
188
+ # Configure metrics
189
+ id.metric <- pmatch(id.metric, METRICS);
190
+ if(is.na(id.metric)) stop('Invalid identity metric.');
191
+ if(id.metric == -1) stop('Ambiguous identity metric.');
192
+ if(id.metric==1){
193
+ id.reccol <- 3
194
+ id.shortname <- 'Id.'
195
+ id.fullname <- 'Identity'
196
+ id.units <- '%'
197
+ id.hallmarks <- seq(0, 100, by=5)
198
+ if(is.null(id.max)) id.max <- 100
199
+ if(is.null(id.cutoff)) id.cutoff <- 95
200
+ if(is.null(id.binsize)) id.binsize <- 0.1
201
+ }else if(id.metric==2){
202
+ if(ncol(rec)<6) stop("Requesting corrected identity, but .rec file doesn't have 6th column")
203
+ id.reccol <- 6
204
+ id.shortname <- 'cId.'
205
+ id.fullname <- 'Corrected identity'
206
+ id.units <- '%'
207
+ id.hallmarks <- seq(0, 100, by=5)
208
+ if(is.null(id.max)) id.max <- 100
209
+ if(is.null(id.cutoff)) id.cutoff <- 95
210
+ if(is.null(id.binsize)) id.binsize <- 0.1
211
+ }else if(id.metric==3){
212
+ id.reccol <- 4
213
+ id.shortname <- 'BSc.'
214
+ id.fullname <- 'Bit score'
215
+ id.units <- 'bits'
216
+ max.bs <- max(rec[, id.reccol])
217
+ id.hallmarks <- seq(0, max.bs*1.2, by=50)
218
+ if(is.null(id.max)) id.max <- max.bs
219
+ if(is.null(id.cutoff)) id.cutoff <- 0.95 * max.bs
220
+ if(is.null(id.binsize)) id.binsize <- 5
221
+ }
222
+ if(is.null(id.min)) id.min <- min(rec[, id.reccol]);
223
+ if(is.null(pos.max)) pos.max <- max(lim[, 3]);
224
+ id.lim <- c(id.min, id.max);
225
+ pos.lim <- c(pos.min, pos.max)/1e6;
226
+ id.breaks <- round((id.max-id.min)/id.binsize);
227
+ pos.breaks <- round((pos.max-pos.min)/pos.binsize);
228
+ if(is.null(main)) main <- paste('Recruitment plot of ', prefix, sep='');
229
+ pos.marks=seq(pos.min, pos.max, length.out=pos.breaks+1)/1e6;
230
+ id.marks=seq(id.min, id.max, length.out=id.breaks+1);
231
+ id.topclasses <- 0;
232
+ for(i in length(id.marks):1) if(id.marks[i]>id.cutoff) id.topclasses <- id.topclasses + 1;
233
+
234
+ # Set-up image
235
+ layout(matrix(c(3,4,1,2), nrow=2, byrow=TRUE), widths=c(2,1), heights=c(1,2));
236
+ out <- list();
237
+
238
+ # Recruitment plot
239
+ if(verbose) cat("Rec. plot.\n")
240
+ par(mar=c(5,4,0,0)+0.1);
241
+ rec.hist <- matrix(0, nrow=pos.breaks, ncol=id.breaks);
242
+ for(i in 1:nrow(rec)){
243
+ id.class <- ceiling((id.breaks)*((rec[i, id.reccol]-id.min)/(id.max-id.min)));
244
+ if(id.class<=id.breaks & id.class>0){
245
+ for(pos in rec[i, 1]:rec[i, 2]){
246
+ pos.class <- ceiling((pos.breaks)*((pos-pos.min)/(pos.max-pos.min)));
247
+ if(pos.class<=pos.breaks & pos.class>0) rec.hist[pos.class, id.class] <- rec.hist[pos.class, id.class]+1;
268
248
  }
269
- text((i-0.5)/10, 2/3, labels=paste(leg.lab[i], ''), srt=90, pos=2, offset=0, cex=3/4);
270
- }
271
- legend('bottom',
272
- legend=c('Contig boundary', 'Hallmark', paste(id.fullname, 'cutoff'),
273
- paste('Pos. hist.: ',id.shortname,' > ',signif(id.marks[id.top[1]],2),id.units,sep=''),
274
- paste('Pos. hist.: ',id.shortname,' < ',signif(id.marks[id.top[1]],2),id.units,sep='')), ncol=2,
275
- col=grey(c(0.85, 0.7, 0.5, 0, 0.5)), lty=c(1,2,3,1,1), lwd=c(1,1,1,2,2), bty='n', inset=0.05, cex=5/6);
276
- return(out);
277
- ### A list with the following elements:
278
- ###
279
- ### pos.marks: Midpoints of the position histogram.
280
- ###
281
- ### id.matrix: Midpoints of the identity histogram.
282
- ###
283
- ### recplot (if ret.recplot=TRUE): Matrix containing the recruitment plot values.
284
- ###
285
- ### id.mean: Mean identity.
286
- ###
287
- ### id.median: Median identity.
288
- ###
289
- ### id.mode (if ret.mode=TRUE): Mode of the identity. Deprecated.
290
- ###
291
- ### id.hist (if ret.hist=TRUE): Values of the identity histogram.
292
- ###
293
- ### pos.hist.low (if ret.hist=TRUE): Values of the position histogram (depth) with "low"
294
- ### identity (i.e., below id.cutoff).
295
- ###
296
- ### pos.hist.top (if ret.hist=TRUE): Values of the position histogram (depth) with "top"
297
- ### identity (i.e., above id.cutoff).
298
- ###
299
- ### id.max: Value of id.max. This is returned because id.max=NULL may vary.
300
- ###
301
- ### id.cutoff: Value of id.cutoff. This is returned because id.cutoff=NULL may vary.
302
- ###
303
- ### seqdepth.mean.top: Average sequencing depth with identity above id.cutoff.
304
- ###
305
- ### seqdepth.mean.low: Average sequencing depth with identity below id.cutoff.
306
- ###
307
- ### seqdepth.mean.all: Average sequencing depth without identity filtering.
308
- ###
309
- ### seqdepth.median.top: Median sequencing depth with identity above id.cutoff.
310
- ###
311
- ### seqdepth.median.low: Median sequencing depth with identity below id.cutoff.
312
- ###
313
- ### seqdepth.median.all: Median sequencing depth without identity filtering.
314
- ###
315
- ### id.metric: Full name of the used identity metric.
316
- ###
317
- ### id.summary: Full name of the summary method used to build the identity plot.
249
+ }
250
+ }
251
+ id.top <- c((1-id.topclasses):0) + id.breaks;
252
+ rec.col=gplots::colorpanel(256, rec.col1, rec.col2);
253
+ image(x=pos.marks, y=id.marks, z=log10(rec.hist),
254
+ breaks=seq(0, log10(max(rec.hist)), length.out=1+length(rec.col)), col=rec.col,
255
+ xlim=pos.lim, ylim=id.lim, xlab='Position in genome (Mbp)',
256
+ ylab=paste(id.fullname, ' (',id.units,')', sep=''), xaxs='i', yaxs='r');
257
+ if(!is.na(contig.col)) abline(v=c(lim$V2, lim$V3)/1e6, lty=1, col=contig.col);
258
+ abline(h=id.hallmarks, lty=2, col=grey(0.7));
259
+ abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
260
+ legend('bottomleft', 'Rec. plot', bg=rgb(1,1,1,2/3));
261
+ out <- c(out, list(pos.marks=pos.marks, id.marks=id.marks));
262
+ if(ret.recplot) out <- c(out, list(recplot=rec.hist));
263
+
264
+ # Identity histogram
265
+ if(verbose) cat(id.shortname, " hist.\n", sep='')
266
+ par(mar=c(5,0,0,2)+0.1);
267
+ id.hist <- id.summary.func(rec.hist);
268
+ plot(1, t='n', xlim=c(1, max(id.hist)), ylim=id.lim, ylab='', yaxt='n', xlab=paste('Sequences (bp),', id.summary.name), log='x', ...);
269
+ id.x <- rep(id.marks, each=2)[2:(id.breaks*2+1)]
270
+ id.f <- rep(id.hist, each=2)[1:(id.breaks*2)]
271
+ if(sum(id.f)>0){
272
+ lines(id.f, id.x, lwd=ifelse(id.splines>0, 1/2, 2), type='o', pch='.');
273
+ if(id.splines>0){
274
+ id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]), spar=id.splines)
275
+ lines(exp(id.spline$y), id.spline$x, lwd=2)
276
+ }
277
+ }
278
+
279
+ abline(h=id.hallmarks, lty=2, col=grey(0.7));
280
+ abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
281
+ legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
282
+ out <- c(out, list(id.mean=mean(rec[, id.reccol])));
283
+ out <- c(out, list(id.median=median(rec[, id.reccol])));
284
+ if(ret.hist) out <- c(out, list(id.hist=id.hist));
285
+
286
+ # Position histogram
287
+ if(verbose) cat("Pos. hist.\n")
288
+ par(mar=c(0,4,4,0)+0.1);
289
+ h1<-rep(0,nrow(rec.hist)) ;
290
+ h2<-rep(0,nrow(rec.hist)) ;
291
+ pos.winsize <- (pos.max-pos.min+1)/pos.breaks;
292
+ if(sum(rec.hist[, id.top])>0) h1 <- rowSums(matrix(rec.hist[, id.top], nrow=nrow(rec.hist)))/pos.winsize;
293
+ if(sum(rec.hist[,-id.top])>0) h2 <- rowSums(matrix(rec.hist[,-id.top], nrow=nrow(rec.hist)))/pos.winsize;
294
+
295
+ ymin <- min(1, h1[h1>0], h2[h2>0]);
296
+ ymax <- max(10, h1, h2);
297
+ if(is.na(ymin) || ymin<=0) ymin <- 1e-10;
298
+ if(is.na(ymax) || ymax<=0) ymax <- 1;
299
+ plot(1, t='n', xlab='', xaxt='n', ylab='Sequencing depth (X)', log='y', xlim=pos.lim,
300
+ ylim=c(ymin, ymax), xaxs='i', main=main, ...);
301
+ if(!is.na(contig.col)) abline(v=c(lim[,2], lim[,3])/1e6, lty=1, col=contig.col);
302
+ abline(h=10^c(0:5), lty=2, col=grey(0.7));
303
+ if(sum(h2)>0){
304
+ h2.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
305
+ h2.y <- rep(h2, each=2)[1:(pos.breaks*2)]
306
+ lines(h2.x, h2.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0.5));
307
+ if(pos.splines>0){
308
+ h2.spline <- smooth.spline(h2.x[h2.y>0], log(h2.y[h2.y>0]), spar=pos.splines)
309
+ lines(h2.spline$x, exp(h2.spline$y), lwd=2, col=grey(0.5))
310
+ }
311
+ if(ret.hist) out <- c(out, list(pos.hist.low=h2.y));
312
+ }
313
+ if(sum(h1)>0){
314
+ h1.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
315
+ h1.y <- rep(h1, each=2)[1:(pos.breaks*2)]
316
+ lines(h1.x, h1.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0));
317
+ if(pos.splines>0){
318
+ h1.spline <- smooth.spline(h1.x[h1.y>0], log(h1.y[h1.y>0]), spar=pos.splines)
319
+ lines(h1.spline$x, exp(h1.spline$y), lwd=2, col=grey(0))
320
+ }
321
+ if(ret.hist) out <- c(out, list(pos.hist.top=h1.y));
322
+ }
323
+ legend('topleft', 'Pos. histogram', bg=rgb(1,1,1,2/3));
324
+ out <- c(out, list(id.max=id.max, id.cutoff=id.marks[id.top[1]]));
325
+ out <- c(out, list(seqdepth.mean.top=mean(h1)));
326
+ out <- c(out, list(seqdepth.mean.low=mean(h2)));
327
+ out <- c(out, list(seqdepth.mean=mean(h1+h2)));
328
+ out <- c(out, list(seqdepth.median.top=median(h1)));
329
+ out <- c(out, list(seqdepth.median.low=median(h2)));
330
+ out <- c(out, list(seqdepth.median=median(h1+h2)));
331
+ out <- c(out, list(id.metric=id.fullname));
332
+ out <- c(out, list(id.summary=id.summary.name));
333
+
334
+ # Legend
335
+ par(mar=c(0,0,4,2)+0.1);
336
+ plot(1, t='n', xlab='', xaxt='n', ylab='', yaxt='n', xlim=c(0,1), ylim=c(0,1), xaxs='r', yaxs='i', ...);
337
+ text(1/2, 5/6, labels=paste('Reads per ', signif((pos.max-pos.min)/pos.breaks, 2), ' bp (rec. plot)', sep=''), pos=3);
338
+ leg.col <- gplots::colorpanel(100, rec.col1, rec.col2);
339
+ leg.lab <- signif(10^seq(0, log10(max(rec.hist)), length.out=10), 2);
340
+ for(i in 1:10){
341
+ for(j in 1:10){
342
+ k <- (i-1)*10 + j;
343
+ polygon(c(k-1, k, k, k-1)/100, c(2/3, 2/3, 5/6, 5/6), border=leg.col[k], col=leg.col[k]);
344
+ }
345
+ text((i-0.5)/10, 2/3, labels=paste(leg.lab[i], ''), srt=90, pos=2, offset=0, cex=3/4);
346
+ }
347
+ legend('bottom',
348
+ legend=c('Contig boundary', 'Hallmark', paste(id.fullname, 'cutoff'),
349
+ paste('Pos. hist.: ',id.shortname,' > ',signif(id.marks[id.top[1]],2),id.units,sep=''),
350
+ paste('Pos. hist.: ',id.shortname,' < ',signif(id.marks[id.top[1]],2),id.units,sep='')), ncol=2,
351
+ col=grey(c(0.85, 0.7, 0.5, 0, 0.5)), lty=c(1,2,3,1,1), lwd=c(1,1,1,2,2), bty='n', inset=0.05, cex=5/6);
352
+ return(out);
318
353
  });
319
354