miga-base 0.4.3.0 → 0.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -1,74 +1,105 @@
|
|
1
1
|
#==============> Define S4 classes
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
|
3
|
+
#' Enveomics: Growth Curve S4 Class
|
4
|
+
#'
|
5
|
+
#' Enve-omics representation of fitted growth curves.
|
6
|
+
#'
|
7
|
+
#' @slot design \code{(array)} Experimental design of the experiment.
|
8
|
+
#' @slot models \code{(list)} Fitted growth curve models.
|
9
|
+
#' @slot predict \code{(list)} Fitted growth curve values.
|
10
|
+
#' @slot call \code{(call)} Call producing this object.
|
11
|
+
#'
|
12
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
13
|
+
#'
|
14
|
+
#' @exportClass
|
15
|
+
|
16
|
+
enve.GrowthCurve <- setClass("enve.GrowthCurve",
|
17
|
+
representation(
|
18
|
+
design = "array",
|
19
|
+
models = "list",
|
20
|
+
predict = "list",
|
21
|
+
call='call')
|
22
|
+
,package='enveomics.R');
|
23
|
+
|
24
|
+
#' Attribute accessor
|
25
|
+
#'
|
26
|
+
#' @param x Object
|
27
|
+
#' @param name Attribute name
|
10
28
|
setMethod("$", "enve.GrowthCurve", function(x, name) attr(x, name))
|
11
29
|
|
30
|
+
#' Enveomics: Plot of Growth Curve
|
31
|
+
#'
|
32
|
+
#' Plots an \code{\link{enve.GrowthCurve}} object.
|
33
|
+
#'
|
34
|
+
#' @param x An \code{\link{enve.GrowthCurve}} object to plot.
|
35
|
+
#' @param col Base colors to use for the different samples. Can be recycled.
|
36
|
+
#' By default, grey for one sample or rainbow colors for more than one.
|
37
|
+
#' @param pt.alpha Color alpha for the observed data points, using \code{col}
|
38
|
+
#' as a base.
|
39
|
+
#' @param ln.alpha Color alpha for the fitted growth curve, using \code{col}
|
40
|
+
#' as a base.
|
41
|
+
#' @param ln.lwd Line width for the fitted curve.
|
42
|
+
#' @param ln.lty Line type for the fitted curve.
|
43
|
+
#' @param band.alpha Color alpha for the confidence interval band of the
|
44
|
+
#' fitted growth curve, using \code{col} as a base.
|
45
|
+
#' @param band.density Density of the filling pattern in the interval band.
|
46
|
+
#' If \code{NULL}, a solid color is used.
|
47
|
+
#' @param band.angle Angle of the density filling pattern in the interval
|
48
|
+
#' band. Ignored if \code{band.density} is \code{NULL}.
|
49
|
+
#' @param xp.alpha Color alpha for the line connecting individual experiments,
|
50
|
+
#' using \code{col} as a base.
|
51
|
+
#' @param xp.lwd Width of line for the experiments.
|
52
|
+
#' @param xp.lty Type of line for the experiments.
|
53
|
+
#' @param pch Point character for observed data points.
|
54
|
+
#' @param new Should a new plot be generated? If \code{FALSE}, the existing
|
55
|
+
#' canvas is used.
|
56
|
+
#' @param legend Should the plot include a legend? If \code{FALSE}, no legend
|
57
|
+
#' is added. If \code{TRUE}, a legend is added in the bottom-right corner.
|
58
|
+
#' Otherwise, a legend is added in the position specified as \code{xy.coords}.
|
59
|
+
#' @param add.params Should the legend include the parameters of the fitted
|
60
|
+
#' model?
|
61
|
+
#' @param ... Any other graphic parameters.
|
62
|
+
#'
|
63
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
64
|
+
#'
|
65
|
+
#' @method plot enve.GrowthCurve
|
66
|
+
#' @export
|
67
|
+
|
12
68
|
#==============> Define S4 methods
|
13
69
|
plot.enve.GrowthCurve <- function
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
### Density of the filling pattern in the interval band. If NULL, a solid
|
33
|
-
### color is used.
|
34
|
-
band.angle=45,
|
35
|
-
### Angle of the density filling pattern in the interval band. Ignored if
|
36
|
-
### `band.density` is NULL.
|
37
|
-
xp.alpha=0.5,
|
38
|
-
### Color alpha for the line connecting individual experiments, using `col`
|
39
|
-
### as a base.
|
40
|
-
xp.lwd=1,
|
41
|
-
### Width of line for the experiments.
|
42
|
-
xp.lty=1,
|
43
|
-
### Type of line for the experiments.
|
44
|
-
pch=19,
|
45
|
-
### Point character for observed data points.
|
46
|
-
new=TRUE,
|
47
|
-
### Should a new plot be generated? If FALSE, the existing canvas is used.
|
48
|
-
legend=new,
|
49
|
-
### Should the plot include a legend? If FALSE, no legend is added. If TRUE,
|
50
|
-
### a legend is added in the bottom-right corner. Otherwise, a legend is
|
51
|
-
### added in the position specified as `xy.coords`.
|
52
|
-
add.params=FALSE,
|
53
|
-
### Should the legend include the parameters of the fitted model?
|
54
|
-
...
|
55
|
-
### Any other graphic parameters.
|
56
|
-
){
|
70
|
+
(x,
|
71
|
+
col,
|
72
|
+
pt.alpha=0.9,
|
73
|
+
ln.alpha=1.0,
|
74
|
+
ln.lwd=1,
|
75
|
+
ln.lty=1,
|
76
|
+
band.alpha=0.4,
|
77
|
+
band.density=NULL,
|
78
|
+
band.angle=45,
|
79
|
+
xp.alpha=0.5,
|
80
|
+
xp.lwd=1,
|
81
|
+
xp.lty=1,
|
82
|
+
pch=19,
|
83
|
+
new=TRUE,
|
84
|
+
legend=new,
|
85
|
+
add.params=FALSE,
|
86
|
+
...
|
87
|
+
){
|
57
88
|
|
58
89
|
# Arguments
|
59
90
|
if(missing(col)){
|
60
91
|
col <-
|
61
92
|
if(length(x$design)==0) grey(0.2)
|
62
|
-
|
93
|
+
else rainbow(length(x$design), v=3/5, s=3/5)
|
63
94
|
}
|
64
|
-
|
95
|
+
|
65
96
|
if(new){
|
66
97
|
# Initiate canvas
|
67
98
|
od.fit.max <- max(sapply(x$predict, function(x) max(x[,"upr"])))
|
68
99
|
od.obs.max <- max(sapply(x$models, function(x) max(x$data[,"od"])))
|
69
100
|
opts <- list(...)
|
70
101
|
plot.defaults <- list(xlab="Time", ylab="Density",
|
71
|
-
|
102
|
+
xlim=range(x$predict[[1]][,"t"]), ylim=c(0, max(od.fit.max, od.obs.max)))
|
72
103
|
for(i in names(plot.defaults)){
|
73
104
|
if(is.null(opts[[i]])) opts[[i]] <- plot.defaults[[i]]
|
74
105
|
}
|
@@ -76,7 +107,7 @@ plot.enve.GrowthCurve <- function
|
|
76
107
|
opts[["type"]] <- "n"
|
77
108
|
do.call(plot, opts)
|
78
109
|
}
|
79
|
-
|
110
|
+
|
80
111
|
# Graphic default
|
81
112
|
pch <- rep(pch, length.out=length(x$design))
|
82
113
|
col <- rep(col, length.out=length(x$design))
|
@@ -102,8 +133,8 @@ plot.enve.GrowthCurve <- function
|
|
102
133
|
d <- x$predict[[i]]
|
103
134
|
lines(d[,"t"], d[,"fit"], col=ln.col[i], lwd=ln.lwd, lty=ln.lty)
|
104
135
|
polygon(c(d[,"t"], rev(d[,"t"])), c(d[,"lwr"], rev(d[,"upr"])),
|
105
|
-
|
106
|
-
|
136
|
+
border=NA, col=band.col[i], density=band.density[i],
|
137
|
+
angle=band.angle[i])
|
107
138
|
}
|
108
139
|
}
|
109
140
|
|
@@ -113,87 +144,117 @@ plot.enve.GrowthCurve <- function
|
|
113
144
|
if(add.params){
|
114
145
|
for(p in names(coef(x$models[[1]]))){
|
115
146
|
legend.txt <- paste(legend.txt, ", ", p, "=",
|
116
|
-
|
147
|
+
sapply(x$models, function(x) signif(coef(x)[p],2)) , sep="")
|
117
148
|
}
|
118
149
|
}
|
119
150
|
legend(legend, legend=legend.txt, pch=pch, col=ln.col)
|
120
151
|
}
|
121
152
|
}
|
122
153
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
154
|
+
#' Enveomics: Summary of Growth Curve
|
155
|
+
#'
|
156
|
+
#' Summary of an \code{\link{enve.GrowthCurve}} object.
|
157
|
+
#'
|
158
|
+
#' @param object An \code{\link{enve.GrowthCurve}} object.
|
159
|
+
#' @param ... No additional parameters are currently supported.
|
160
|
+
#'
|
161
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
162
|
+
#'
|
163
|
+
#' @method summary enve.GrowthCurve
|
164
|
+
#' @export
|
130
165
|
|
166
|
+
summary.enve.GrowthCurve <- function(
|
167
|
+
object,
|
168
|
+
...
|
169
|
+
){
|
170
|
+
|
131
171
|
x <- object
|
132
172
|
cat('===[ enve.GrowthCurves ]------------------\n')
|
133
173
|
for(i in names(x$design)){
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
174
|
+
cat(i, ':\n', sep='')
|
175
|
+
if(x$models[[i]]$convInfo$isConv){
|
176
|
+
for(j in names(coef(x$models[[i]]))){
|
177
|
+
cat(' - ', j, ' = ', coef(x$models[[i]])[j], '\n', sep='')
|
178
|
+
}
|
179
|
+
}else{
|
180
|
+
cat(' Model didn\'t converge:\n ',
|
181
|
+
x$models[[i]]$convInfo$stopMessage, '\n', sep='')
|
182
|
+
}
|
183
|
+
cat(' ', nrow(x$models[[i]]$data), ' observations, ',
|
184
|
+
length(unique(x$models[[i]]$data[,"replicate"])), ' replicates.\n',
|
185
|
+
sep='')
|
146
186
|
}
|
147
187
|
cat('------------------------------------------\n')
|
148
188
|
cat('call:',as.character(attr(x,'call')),'\n')
|
149
189
|
cat('------------------------------------------\n')
|
150
190
|
}
|
151
191
|
|
192
|
+
#' Enveomics: Growth Curve
|
193
|
+
#'
|
194
|
+
#' Calculates growth curves using the logistic growth function.
|
195
|
+
#'
|
196
|
+
#' @param x Data frame (or coercible) containing the observed growth data
|
197
|
+
#' (e.g., O.D. values). Each column is an independent growth curve and each
|
198
|
+
#' row is a time point. \code{NA}'s are allowed.
|
199
|
+
#' @param times Vector with the times at which each row was taken. By default,
|
200
|
+
#' all rows are assumed to be part of constantly periodic measurements.
|
201
|
+
#' @param triplicates If \code{TRUE}, the columns are assumed to be sorted by
|
202
|
+
#' sample with three replicates by sample. It requires a number of columns
|
203
|
+
#' multiple of 3.
|
204
|
+
#' @param design Experimental design of the data. An \strong{array} of mode list
|
205
|
+
#' with sample names as index and the list of column names in each sample as
|
206
|
+
#' the values. By default, each column is assumed to be an independent sample
|
207
|
+
#' if \code{triplicates} is \code{FALSE}, or every three columns are assumed
|
208
|
+
#' to be a sample if \code{triplicates} is \code{TRUE}. In the latter case,
|
209
|
+
#' samples are simply numbered.
|
210
|
+
#' @param new.times Values of time for the fitted curve.
|
211
|
+
#' @param level Confidence (or prediction) interval in the fitted curve.
|
212
|
+
#' @param interval Type of interval to be calculated for the fitted curve.
|
213
|
+
#' @param plot Should the growth curve be plotted?
|
214
|
+
#' @param FUN Function to fit. By default: logistic growth with paramenters
|
215
|
+
#' \code{K}: carrying capacity,
|
216
|
+
#' \code{r}: intrinsic growth rate, and
|
217
|
+
#' \code{P0}: Initial population.
|
218
|
+
#' @param nls.opt Any additional options passed to \code{nls}.
|
219
|
+
#' @param ... Any additional parameters to be passed to
|
220
|
+
#' \code{plot.enve.GrowthCurve}.
|
221
|
+
#'
|
222
|
+
#' @return Returns an \code{\link{enve.GrowthCurve}} object.
|
223
|
+
#'
|
224
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
225
|
+
#'
|
226
|
+
#' @examples
|
227
|
+
#' # Load data
|
228
|
+
#' data("growth.curves", package="enveomics.R", envir=environment())
|
229
|
+
#' # Generate growth curves with different colors
|
230
|
+
#' g <- enve.growthcurve(growth.curves[,-1], growth.curves[,1], triplicates=TRUE)
|
231
|
+
#' # Generate black-and-white growth curves with different symbols
|
232
|
+
#' plot(g, pch=15:17, col="black", band.density=45, band.angle=c(-45,45,0))
|
233
|
+
#'
|
234
|
+
#' @export
|
235
|
+
|
152
236
|
#==============> Core functions
|
153
237
|
enve.growthcurve <- structure(function(
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
### Experimental design of the data. An `array` of mode list with sample
|
167
|
-
### names as index and the list of column names in each sample as the
|
168
|
-
### values. By default, each column is assumed to be an independent sample
|
169
|
-
### if `triplicates` is FALSE, or every three columns are assumed to be a
|
170
|
-
### sample if `triplicates` is TRUE. In the latter case, samples are
|
171
|
-
### simply numbered.
|
172
|
-
new.times=seq(min(times), max(times), length.out=length(times)*10),
|
173
|
-
### Values of time for the fitted curve.
|
174
|
-
level=0.95,
|
175
|
-
### Confidence (or prediction) interval in the fitted curve.
|
176
|
-
interval=c("confidence","prediction"),
|
177
|
-
### Type of interval to be calculated for the fitted curve.
|
178
|
-
plot=TRUE,
|
179
|
-
### Should the growth curve be plotted?
|
180
|
-
FUN=function(t,K,r,P0) K*P0*exp(r*t)/(K+P0*(exp(r*t)-1)),
|
181
|
-
### Function to fit. By default: logistic growth with paramenters `K`:
|
182
|
-
### carrying capacity, `r`: intrinsic growth rate, and `P0`: Initial
|
183
|
-
### population.
|
184
|
-
nls.opt=list(),
|
185
|
-
### Any additional options passed to `nls`.
|
186
|
-
...
|
187
|
-
### Any additional parameters to be passed to `plot.enve.GrowthCurve`.
|
188
|
-
){
|
238
|
+
x,
|
239
|
+
times=1:nrow(x),
|
240
|
+
triplicates=FALSE,
|
241
|
+
design,
|
242
|
+
new.times=seq(min(times), max(times), length.out=length(times)*10),
|
243
|
+
level=0.95,
|
244
|
+
interval=c("confidence","prediction"),
|
245
|
+
plot=TRUE,
|
246
|
+
FUN=function(t,K,r,P0) K*P0*exp(r*t)/(K+P0*(exp(r*t)-1)),
|
247
|
+
nls.opt=list(),
|
248
|
+
...
|
249
|
+
){
|
189
250
|
|
190
251
|
# Arguments
|
191
252
|
if(missing(design)){
|
192
253
|
design <-
|
193
254
|
if(triplicates)
|
194
255
|
tapply(colnames(x), colnames(x)[rep(1:(ncol(x)/3)*3-2, each=3)], c,
|
195
|
-
|
196
|
-
|
256
|
+
simplify=FALSE)
|
257
|
+
else tapply(colnames(x), colnames(x), c, simplify=FALSE)
|
197
258
|
}
|
198
259
|
mod <- list()
|
199
260
|
fit <- list()
|
@@ -207,18 +268,18 @@ enve.growthcurve <- structure(function(
|
|
207
268
|
od <- c(od, x[,col])
|
208
269
|
}
|
209
270
|
data <- data.frame(t=rep(times, length(design[[sample]])), od=od,
|
210
|
-
|
271
|
+
replicate=rep(1:length(design[[sample]]), each=length(times)))
|
211
272
|
data <- data[!is.na(data$od),]
|
212
273
|
opts <- nls.opt
|
213
274
|
opts[["data"]] <- data
|
214
275
|
opt.defaults <- list(formula = od ~ enve._growth.fx(t, K, r, P0),
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
276
|
+
algorithm="port", lower=list(P0=1e-16),
|
277
|
+
control=nls.control(warnOnly=TRUE),
|
278
|
+
start=list(
|
279
|
+
K = 2*max(data$od),
|
280
|
+
r = length(times)/max(data$t),
|
281
|
+
P0 = min(data$od[data$od>0])
|
282
|
+
))
|
222
283
|
for(i in names(opt.defaults)){
|
223
284
|
if(is.null(opts[[i]])){
|
224
285
|
opts[[i]] <- opt.defaults[[i]]
|
@@ -226,16 +287,15 @@ enve.growthcurve <- structure(function(
|
|
226
287
|
}
|
227
288
|
mod[[sample]] <- do.call(nls, opts)
|
228
289
|
fit[[sample]] <- cbind(t=new.times,
|
229
|
-
|
230
|
-
|
290
|
+
predFit(mod[[sample]], level=level, interval=interval,
|
291
|
+
newdata=data.frame(t=new.times)))
|
231
292
|
}
|
232
293
|
enve._growth.fx <<- NULL
|
233
294
|
gc <- new("enve.GrowthCurve",
|
234
|
-
|
235
|
-
|
295
|
+
design=design, models=mod, predict=fit,
|
296
|
+
call=match.call());
|
236
297
|
if(plot) plot(gc, ...);
|
237
298
|
return(gc)
|
238
|
-
### Returns an `enve.GrowthCurve` object.
|
239
299
|
}, ex=function(){
|
240
300
|
# Load data
|
241
301
|
data("growth.curves", package="enveomics.R", envir=environment())
|
@@ -245,13 +305,21 @@ enve.growthcurve <- structure(function(
|
|
245
305
|
plot(g, pch=15:17, col="black", band.density=45, band.angle=c(-45,45,0))
|
246
306
|
});
|
247
307
|
|
308
|
+
#' Enveomics: Color to Alpha
|
309
|
+
#'
|
310
|
+
#' Takes a vector of colors and sets the alpha.
|
311
|
+
#'
|
312
|
+
#' @param x A vector of any value base colors.
|
313
|
+
#' @param alpha Alpha level to set (in the 0-1 range).
|
314
|
+
#'
|
315
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
316
|
+
#'
|
317
|
+
#' @export
|
318
|
+
|
248
319
|
enve.col2alpha <- function(
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
alpha
|
253
|
-
### Alpha level to set (in the 0-1 range).
|
254
|
-
){
|
320
|
+
x,
|
321
|
+
alpha
|
322
|
+
){
|
255
323
|
out <- c()
|
256
324
|
for(i in x){
|
257
325
|
opt <- as.list(col2rgb(i)[,1]/256)
|
@@ -1,319 +1,354 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
id.
|
95
|
-
|
96
|
-
id.
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
id.
|
101
|
-
|
102
|
-
|
103
|
-
id.
|
104
|
-
|
105
|
-
id.
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
id.metric <- pmatch(id.metric, METRICS);
|
114
|
-
if(is.na(id.metric)) stop('Invalid identity metric.');
|
115
|
-
if(id.metric == -1) stop('Ambiguous identity metric.');
|
116
|
-
if(id.metric==1){
|
117
|
-
id.reccol <- 3
|
118
|
-
id.shortname <- 'Id.'
|
119
|
-
id.fullname <- 'Identity'
|
120
|
-
id.units <- '%'
|
121
|
-
id.hallmarks <- seq(0, 100, by=5)
|
122
|
-
if(is.null(id.max)) id.max <- 100
|
123
|
-
if(is.null(id.cutoff)) id.cutoff <- 95
|
124
|
-
if(is.null(id.binsize)) id.binsize <- 0.1
|
125
|
-
}else if(id.metric==2){
|
126
|
-
if(ncol(rec)<6) stop("Requesting corrected identity, but .rec file doesn't have 6th column")
|
127
|
-
id.reccol <- 6
|
128
|
-
id.shortname <- 'cId.'
|
129
|
-
id.fullname <- 'Corrected identity'
|
130
|
-
id.units <- '%'
|
131
|
-
id.hallmarks <- seq(0, 100, by=5)
|
132
|
-
if(is.null(id.max)) id.max <- 100
|
133
|
-
if(is.null(id.cutoff)) id.cutoff <- 95
|
134
|
-
if(is.null(id.binsize)) id.binsize <- 0.1
|
135
|
-
}else if(id.metric==3){
|
136
|
-
id.reccol <- 4
|
137
|
-
id.shortname <- 'BSc.'
|
138
|
-
id.fullname <- 'Bit score'
|
139
|
-
id.units <- 'bits'
|
140
|
-
max.bs <- max(rec[, id.reccol])
|
141
|
-
id.hallmarks <- seq(0, max.bs*1.2, by=50)
|
142
|
-
if(is.null(id.max)) id.max <- max.bs
|
143
|
-
if(is.null(id.cutoff)) id.cutoff <- 0.95 * max.bs
|
144
|
-
if(is.null(id.binsize)) id.binsize <- 5
|
145
|
-
}
|
146
|
-
if(is.null(id.min)) id.min <- min(rec[, id.reccol]);
|
147
|
-
if(is.null(pos.max)) pos.max <- max(lim[, 3]);
|
148
|
-
id.lim <- c(id.min, id.max);
|
149
|
-
pos.lim <- c(pos.min, pos.max)/1e6;
|
150
|
-
id.breaks <- round((id.max-id.min)/id.binsize);
|
151
|
-
pos.breaks <- round((pos.max-pos.min)/pos.binsize);
|
152
|
-
if(is.null(main)) main <- paste('Recruitment plot of ', prefix, sep='');
|
153
|
-
pos.marks=seq(pos.min, pos.max, length.out=pos.breaks+1)/1e6;
|
154
|
-
id.marks=seq(id.min, id.max, length.out=id.breaks+1);
|
155
|
-
id.topclasses <- 0;
|
156
|
-
for(i in length(id.marks):1) if(id.marks[i]>id.cutoff) id.topclasses <- id.topclasses + 1;
|
157
|
-
|
158
|
-
# Set-up image
|
159
|
-
layout(matrix(c(3,4,1,2), nrow=2, byrow=TRUE), widths=c(2,1), heights=c(1,2));
|
160
|
-
out <- list();
|
161
|
-
|
162
|
-
# Recruitment plot
|
163
|
-
if(verbose) cat("Rec. plot.\n")
|
164
|
-
par(mar=c(5,4,0,0)+0.1);
|
165
|
-
rec.hist <- matrix(0, nrow=pos.breaks, ncol=id.breaks);
|
166
|
-
for(i in 1:nrow(rec)){
|
167
|
-
id.class <- ceiling((id.breaks)*((rec[i, id.reccol]-id.min)/(id.max-id.min)));
|
168
|
-
if(id.class<=id.breaks & id.class>0){
|
169
|
-
for(pos in rec[i, 1]:rec[i, 2]){
|
170
|
-
pos.class <- ceiling((pos.breaks)*((pos-pos.min)/(pos.max-pos.min)));
|
171
|
-
if(pos.class<=pos.breaks & pos.class>0) rec.hist[pos.class, id.class] <- rec.hist[pos.class, id.class]+1;
|
172
|
-
}
|
173
|
-
}
|
174
|
-
}
|
175
|
-
id.top <- c((1-id.topclasses):0) + id.breaks;
|
176
|
-
rec.col=gplots::colorpanel(256, rec.col1, rec.col2);
|
177
|
-
image(x=pos.marks, y=id.marks, z=log10(rec.hist),
|
178
|
-
breaks=seq(0, log10(max(rec.hist)), length.out=1+length(rec.col)), col=rec.col,
|
179
|
-
xlim=pos.lim, ylim=id.lim, xlab='Position in genome (Mbp)',
|
180
|
-
ylab=paste(id.fullname, ' (',id.units,')', sep=''), xaxs='i', yaxs='r');
|
181
|
-
if(!is.na(contig.col)) abline(v=c(lim$V2, lim$V3)/1e6, lty=1, col=contig.col);
|
182
|
-
abline(h=id.hallmarks, lty=2, col=grey(0.7));
|
183
|
-
abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
|
184
|
-
legend('bottomleft', 'Rec. plot', bg=rgb(1,1,1,2/3));
|
185
|
-
out <- c(out, list(pos.marks=pos.marks, id.marks=id.marks));
|
186
|
-
if(ret.recplot) out <- c(out, list(recplot=rec.hist));
|
187
|
-
|
188
|
-
# Identity histogram
|
189
|
-
if(verbose) cat(id.shortname, " hist.\n", sep='')
|
190
|
-
par(mar=c(5,0,0,2)+0.1);
|
191
|
-
id.hist <- id.summary.func(rec.hist);
|
192
|
-
plot(1, t='n', xlim=c(1, max(id.hist)), ylim=id.lim, ylab='', yaxt='n', xlab=paste('Sequences (bp),', id.summary.name), log='x', ...);
|
193
|
-
id.x <- rep(id.marks, each=2)[2:(id.breaks*2+1)]
|
194
|
-
id.f <- rep(id.hist, each=2)[1:(id.breaks*2)]
|
195
|
-
if(sum(id.f)>0){
|
196
|
-
lines(id.f, id.x, lwd=ifelse(id.splines>0, 1/2, 2), type='o', pch='.');
|
197
|
-
if(id.splines>0){
|
198
|
-
id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]), spar=id.splines)
|
199
|
-
lines(exp(id.spline$y), id.spline$x, lwd=2)
|
200
|
-
}
|
201
|
-
}
|
202
|
-
|
203
|
-
abline(h=id.hallmarks, lty=2, col=grey(0.7));
|
204
|
-
abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
|
205
|
-
legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
|
206
|
-
out <- c(out, list(id.mean=mean(rec[, id.reccol])));
|
207
|
-
out <- c(out, list(id.median=median(rec[, id.reccol])));
|
208
|
-
if(ret.hist) out <- c(out, list(id.hist=id.hist));
|
1
|
+
#' Enveomics: Recruitment Plots
|
2
|
+
#'
|
3
|
+
#' @description
|
4
|
+
#' Produces recruitment plots provided that BlastTab.catsbj.pl has
|
5
|
+
#' been previously executed. Requires the \pkg{gplots} library.
|
6
|
+
#'
|
7
|
+
#' @param prefix
|
8
|
+
#' Path to the prefix of the BlastTab.catsbj.pl output files. At
|
9
|
+
#' least the files \strong{.rec} and \strong{.lim} must exist with this prefix.
|
10
|
+
#' @param id.min
|
11
|
+
#' Minimum identity to be considered. By default, the minimum detected
|
12
|
+
#' identity. This value is a percentage.
|
13
|
+
#' @param id.max
|
14
|
+
#' Maximum identity to be considered. By default, 100\%.
|
15
|
+
#' @param id.binsize
|
16
|
+
#' Size of the identity bins (vertical histograms). By default, 0.1 for
|
17
|
+
#' identity metrics and 5 for bit score.
|
18
|
+
#' @param id.splines
|
19
|
+
#' Smoothing parameter for the splines in the identity histogram. Zero (0) for no
|
20
|
+
#' splines. A generally good value is 1/2. If non-zero, requires the \pkg{stats} package.
|
21
|
+
#' @param id.metric
|
22
|
+
#' Metric of identity to be used (Y-axis).
|
23
|
+
#' It can be any unambiguous prefix of:
|
24
|
+
#' \itemize{
|
25
|
+
#' \item "identity"
|
26
|
+
#' \item "corrected identity"
|
27
|
+
#' \item "bit score"}
|
28
|
+
#' @param id.summary
|
29
|
+
#' Method used to build the identity histogram (Horizontal axis of the right panel).
|
30
|
+
#' It can be any unambiguous prefix of:
|
31
|
+
#' \itemize{
|
32
|
+
#' \item "sum"
|
33
|
+
#' \item "average"
|
34
|
+
#' \item "median"
|
35
|
+
#' \item "90\% lower bound"
|
36
|
+
#' \item "90\% upper bound"
|
37
|
+
#' \item "95\% lower bound"
|
38
|
+
#' \item "95\% upper bound" }
|
39
|
+
#' The last four options
|
40
|
+
#' correspond to the upper and lower boundaries of the 90\% and 95\% empirical confidence
|
41
|
+
#' intervals.
|
42
|
+
#' @param pos.min
|
43
|
+
#' Minimum (leftmost) position in the reference (concatenated) genome (in bp).
|
44
|
+
#' @param pos.max
|
45
|
+
#' Maximum (rightmost) position in the reference (concatenated) genome (in bp).
|
46
|
+
#' By default: Length of the genome.
|
47
|
+
#' @param pos.binsize
|
48
|
+
#' Size of the position bins (horizontal histograms) in bp.
|
49
|
+
#' @param pos.splines
|
50
|
+
#' Smoothing parameter for the splines in the position histogram. Zero (0) for no splines.
|
51
|
+
#' If non-zero, requires the stats package.
|
52
|
+
#' @param rec.col1
|
53
|
+
#' Lightest color in the recruitment plot.
|
54
|
+
#' @param rec.col2
|
55
|
+
#' Darkest color in the recruitment plot.
|
56
|
+
#' @param main
|
57
|
+
#' Title of the plot.
|
58
|
+
#' @param contig.col
|
59
|
+
#' Color of the Contig boundaries. Set to \code{NA} to ignore Contig boundaries.
|
60
|
+
#' @param ret.recplot
|
61
|
+
#' Indicates if the matrix of the recruitment plot is to be returned.
|
62
|
+
#' @param ret.hist
|
63
|
+
#' Ignored, for backwards compatibility.
|
64
|
+
#' @param ret.mode
|
65
|
+
#' Indicates if the mode of the identity is to be computed. It requires the
|
66
|
+
#' \pkg{modeest} package.
|
67
|
+
#' @param id.cutoff
|
68
|
+
#' Minimum identity to consider an alignment as "top". By default, it is 0.95 for the
|
69
|
+
#' identity metrics and 95\% of the best scoring alignment for bit score.
|
70
|
+
#' @param verbose
|
71
|
+
#' Indicates if the function should report the advance.
|
72
|
+
#' @param ...
|
73
|
+
#' Any additional graphic parameters to be passed to plot for all panels except the
|
74
|
+
#' recruitment plot (lower-left).
|
75
|
+
#'
|
76
|
+
#' @return
|
77
|
+
#'
|
78
|
+
#' Returns a list with the following elements:
|
79
|
+
#'
|
80
|
+
#' \describe{
|
81
|
+
#' \item{\code{pos.marks}}{Midpoints of the position histogram.}
|
82
|
+
#' \item{\code{id.matrix}}{Midpoints of the identity histogram.}
|
83
|
+
#' \item{\code{recplot}}{Matrix containing the recruitment plot values
|
84
|
+
#' (if \code{ret.recplot=TRUE}).}
|
85
|
+
#' \item{\code{id.mean}}{Mean identity.}
|
86
|
+
#' \item{\code{id.median}}{Median identity.}
|
87
|
+
#' \item{\code{id.mode}}{Mode of the identity (if \code{ret.mode=TRUE}). Deprecated.}
|
88
|
+
#' \item{\code{id.hist}}{Values of the identity histogram (if \code{ret.hist=TRUE}).}
|
89
|
+
#' \item{\code{pos.hist.low}}{Values of the position histogram (depth) with "low"
|
90
|
+
#' identity (i.e., below id.cutoff) (if \code{ret.hist=TRUE}).}
|
91
|
+
#' \item{\code{pos.hist.top}}{Values of the position histogram (depth) with "top"
|
92
|
+
#' identity (i.e., above id.cutoff) (if \code{ret.hist=TRUE}).}
|
93
|
+
#' \item{\code{id.max}}{Value of \code{id.max}. This is returned because
|
94
|
+
#' \code{id.max=NULL} may vary.}
|
95
|
+
#' \item{\code{id.cutoff}}{Value of \code{id.cutoff}.
|
96
|
+
#' This is returned because \code{id.cutoff=NULL} may vary.}
|
97
|
+
#' \item{\code{seqdepth.mean.top}}{Average sequencing depth with identity above
|
98
|
+
#' \code{id.cutoff}.}
|
99
|
+
#' \item{\code{seqdepth.mean.low}}{Average sequencing depth with identity below
|
100
|
+
#' \code{id.cutoff}.}
|
101
|
+
#' \item{\code{seqdepth.mean.all}}{Average sequencing depth without identity filtering.}
|
102
|
+
#' \item{\code{seqdepth.median.top}}{Median sequencing depth with identity above
|
103
|
+
#' \code{id.cutoff}.}
|
104
|
+
#' \item{\code{seqdepth.median.low}}{Median sequencing depth with identity below
|
105
|
+
#' \code{id.cutoff}.}
|
106
|
+
#' \item{\code{seqdepth.median.all}}{Median sequencing depth without identity filtering.}
|
107
|
+
#' \item{\code{id.metric}}{Full name of the used identity metric.}
|
108
|
+
#' \item{\code{id.summary}}{Full name of the summary method used to build the identity plot.}}
|
109
|
+
#'
|
110
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
111
|
+
#'
|
112
|
+
#' @export
|
209
113
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
114
|
+
enve.recplot <- structure(function(
|
115
|
+
prefix,
|
116
|
+
|
117
|
+
# Id. hist.
|
118
|
+
id.min=NULL,
|
119
|
+
id.max=NULL,
|
120
|
+
id.binsize=NULL,
|
121
|
+
id.splines=0,
|
122
|
+
id.metric='id',
|
123
|
+
id.summary='sum',
|
124
|
+
|
125
|
+
# Pos. hist.
|
126
|
+
pos.min=1,
|
127
|
+
pos.max=NULL,
|
128
|
+
pos.binsize=1e3,
|
129
|
+
pos.splines=0,
|
130
|
+
|
131
|
+
# Rec. plot
|
132
|
+
rec.col1='white',
|
133
|
+
rec.col2='black',
|
134
|
+
|
135
|
+
# General
|
136
|
+
main=NULL,
|
137
|
+
contig.col=grey(0.85),
|
138
|
+
|
139
|
+
# Return
|
140
|
+
ret.recplot=FALSE,
|
141
|
+
ret.hist=FALSE,
|
142
|
+
ret.mode=FALSE,
|
143
|
+
|
144
|
+
# General
|
145
|
+
id.cutoff=NULL,
|
146
|
+
verbose=TRUE,
|
147
|
+
...
|
148
|
+
){
|
149
|
+
|
150
|
+
# Settings
|
151
|
+
METRICS <- c('identity', 'corrected identity', 'bit score');
|
152
|
+
SUMMARY <- c('sum', 'average', 'median', '');
|
153
|
+
if(is.null(prefix)) stop('Parameter prefix is mandatory.');
|
154
|
+
if(!requireNamespace("gplots", quietly=TRUE)) stop('Unavailable gplots library.');
|
155
|
+
|
156
|
+
# Read files
|
157
|
+
if(verbose) cat("Reading files.\n")
|
158
|
+
rec <- read.table(paste(prefix, '.rec', sep=''), sep="\t", comment.char='', quote='');
|
159
|
+
lim <- read.table(paste(prefix, '.lim', sep=''), sep="\t", comment.char='', quote='');
|
160
|
+
|
161
|
+
# Configure ID summary
|
162
|
+
id.summary <- pmatch(id.summary, SUMMARY);
|
163
|
+
if(is.na(id.summary)) stop('Invalid identity summary.');
|
164
|
+
if(id.summary == -1) stop('Ambiguous identity summary.');
|
165
|
+
if(id.summary==1){
|
166
|
+
id.summary.func <- function(x) colSums(x);
|
167
|
+
id.summary.name <- 'sum'
|
168
|
+
}else if(id.summary==2){
|
169
|
+
id.summary.func <- function(x) colMeans(x);
|
170
|
+
id.summary.name <- 'mean'
|
171
|
+
}else if(id.summary==3){
|
172
|
+
id.summary.func <- function(x) apply(x,2,median);
|
173
|
+
id.summary.name <- 'median'
|
174
|
+
}else if(id.summary==4){
|
175
|
+
id.summary.func <- function(x) apply(x,2,quantile,probs=0.05,names=FALSE);
|
176
|
+
id.summary.name <- '90% LB'
|
177
|
+
}else if(id.summary==5){
|
178
|
+
id.summary.func <- function(x) apply(x,2,quantile,probs=0.95,names=FALSE);
|
179
|
+
id.summary.name <- '90% UB'
|
180
|
+
}else if(id.summary==6){
|
181
|
+
id.summary.func <- function(x) apply(x,2,quantile,probs=0.025,names=FALSE);
|
182
|
+
id.summary.name <- '95% LB'
|
183
|
+
}else if(id.summary==7){
|
184
|
+
id.summary.func <- function(x) apply(x,2,quantile,probs=0.975,names=FALSE);
|
185
|
+
id.summary.name <- '95% UB'
|
186
|
+
}
|
187
|
+
|
188
|
+
# Configure metrics
|
189
|
+
id.metric <- pmatch(id.metric, METRICS);
|
190
|
+
if(is.na(id.metric)) stop('Invalid identity metric.');
|
191
|
+
if(id.metric == -1) stop('Ambiguous identity metric.');
|
192
|
+
if(id.metric==1){
|
193
|
+
id.reccol <- 3
|
194
|
+
id.shortname <- 'Id.'
|
195
|
+
id.fullname <- 'Identity'
|
196
|
+
id.units <- '%'
|
197
|
+
id.hallmarks <- seq(0, 100, by=5)
|
198
|
+
if(is.null(id.max)) id.max <- 100
|
199
|
+
if(is.null(id.cutoff)) id.cutoff <- 95
|
200
|
+
if(is.null(id.binsize)) id.binsize <- 0.1
|
201
|
+
}else if(id.metric==2){
|
202
|
+
if(ncol(rec)<6) stop("Requesting corrected identity, but .rec file doesn't have 6th column")
|
203
|
+
id.reccol <- 6
|
204
|
+
id.shortname <- 'cId.'
|
205
|
+
id.fullname <- 'Corrected identity'
|
206
|
+
id.units <- '%'
|
207
|
+
id.hallmarks <- seq(0, 100, by=5)
|
208
|
+
if(is.null(id.max)) id.max <- 100
|
209
|
+
if(is.null(id.cutoff)) id.cutoff <- 95
|
210
|
+
if(is.null(id.binsize)) id.binsize <- 0.1
|
211
|
+
}else if(id.metric==3){
|
212
|
+
id.reccol <- 4
|
213
|
+
id.shortname <- 'BSc.'
|
214
|
+
id.fullname <- 'Bit score'
|
215
|
+
id.units <- 'bits'
|
216
|
+
max.bs <- max(rec[, id.reccol])
|
217
|
+
id.hallmarks <- seq(0, max.bs*1.2, by=50)
|
218
|
+
if(is.null(id.max)) id.max <- max.bs
|
219
|
+
if(is.null(id.cutoff)) id.cutoff <- 0.95 * max.bs
|
220
|
+
if(is.null(id.binsize)) id.binsize <- 5
|
221
|
+
}
|
222
|
+
if(is.null(id.min)) id.min <- min(rec[, id.reccol]);
|
223
|
+
if(is.null(pos.max)) pos.max <- max(lim[, 3]);
|
224
|
+
id.lim <- c(id.min, id.max);
|
225
|
+
pos.lim <- c(pos.min, pos.max)/1e6;
|
226
|
+
id.breaks <- round((id.max-id.min)/id.binsize);
|
227
|
+
pos.breaks <- round((pos.max-pos.min)/pos.binsize);
|
228
|
+
if(is.null(main)) main <- paste('Recruitment plot of ', prefix, sep='');
|
229
|
+
pos.marks=seq(pos.min, pos.max, length.out=pos.breaks+1)/1e6;
|
230
|
+
id.marks=seq(id.min, id.max, length.out=id.breaks+1);
|
231
|
+
id.topclasses <- 0;
|
232
|
+
for(i in length(id.marks):1) if(id.marks[i]>id.cutoff) id.topclasses <- id.topclasses + 1;
|
233
|
+
|
234
|
+
# Set-up image
|
235
|
+
layout(matrix(c(3,4,1,2), nrow=2, byrow=TRUE), widths=c(2,1), heights=c(1,2));
|
236
|
+
out <- list();
|
237
|
+
|
238
|
+
# Recruitment plot
|
239
|
+
if(verbose) cat("Rec. plot.\n")
|
240
|
+
par(mar=c(5,4,0,0)+0.1);
|
241
|
+
rec.hist <- matrix(0, nrow=pos.breaks, ncol=id.breaks);
|
242
|
+
for(i in 1:nrow(rec)){
|
243
|
+
id.class <- ceiling((id.breaks)*((rec[i, id.reccol]-id.min)/(id.max-id.min)));
|
244
|
+
if(id.class<=id.breaks & id.class>0){
|
245
|
+
for(pos in rec[i, 1]:rec[i, 2]){
|
246
|
+
pos.class <- ceiling((pos.breaks)*((pos-pos.min)/(pos.max-pos.min)));
|
247
|
+
if(pos.class<=pos.breaks & pos.class>0) rec.hist[pos.class, id.class] <- rec.hist[pos.class, id.class]+1;
|
268
248
|
}
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
249
|
+
}
|
250
|
+
}
|
251
|
+
id.top <- c((1-id.topclasses):0) + id.breaks;
|
252
|
+
rec.col=gplots::colorpanel(256, rec.col1, rec.col2);
|
253
|
+
image(x=pos.marks, y=id.marks, z=log10(rec.hist),
|
254
|
+
breaks=seq(0, log10(max(rec.hist)), length.out=1+length(rec.col)), col=rec.col,
|
255
|
+
xlim=pos.lim, ylim=id.lim, xlab='Position in genome (Mbp)',
|
256
|
+
ylab=paste(id.fullname, ' (',id.units,')', sep=''), xaxs='i', yaxs='r');
|
257
|
+
if(!is.na(contig.col)) abline(v=c(lim$V2, lim$V3)/1e6, lty=1, col=contig.col);
|
258
|
+
abline(h=id.hallmarks, lty=2, col=grey(0.7));
|
259
|
+
abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
|
260
|
+
legend('bottomleft', 'Rec. plot', bg=rgb(1,1,1,2/3));
|
261
|
+
out <- c(out, list(pos.marks=pos.marks, id.marks=id.marks));
|
262
|
+
if(ret.recplot) out <- c(out, list(recplot=rec.hist));
|
263
|
+
|
264
|
+
# Identity histogram
|
265
|
+
if(verbose) cat(id.shortname, " hist.\n", sep='')
|
266
|
+
par(mar=c(5,0,0,2)+0.1);
|
267
|
+
id.hist <- id.summary.func(rec.hist);
|
268
|
+
plot(1, t='n', xlim=c(1, max(id.hist)), ylim=id.lim, ylab='', yaxt='n', xlab=paste('Sequences (bp),', id.summary.name), log='x', ...);
|
269
|
+
id.x <- rep(id.marks, each=2)[2:(id.breaks*2+1)]
|
270
|
+
id.f <- rep(id.hist, each=2)[1:(id.breaks*2)]
|
271
|
+
if(sum(id.f)>0){
|
272
|
+
lines(id.f, id.x, lwd=ifelse(id.splines>0, 1/2, 2), type='o', pch='.');
|
273
|
+
if(id.splines>0){
|
274
|
+
id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]), spar=id.splines)
|
275
|
+
lines(exp(id.spline$y), id.spline$x, lwd=2)
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
abline(h=id.hallmarks, lty=2, col=grey(0.7));
|
280
|
+
abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
|
281
|
+
legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
|
282
|
+
out <- c(out, list(id.mean=mean(rec[, id.reccol])));
|
283
|
+
out <- c(out, list(id.median=median(rec[, id.reccol])));
|
284
|
+
if(ret.hist) out <- c(out, list(id.hist=id.hist));
|
285
|
+
|
286
|
+
# Position histogram
|
287
|
+
if(verbose) cat("Pos. hist.\n")
|
288
|
+
par(mar=c(0,4,4,0)+0.1);
|
289
|
+
h1<-rep(0,nrow(rec.hist)) ;
|
290
|
+
h2<-rep(0,nrow(rec.hist)) ;
|
291
|
+
pos.winsize <- (pos.max-pos.min+1)/pos.breaks;
|
292
|
+
if(sum(rec.hist[, id.top])>0) h1 <- rowSums(matrix(rec.hist[, id.top], nrow=nrow(rec.hist)))/pos.winsize;
|
293
|
+
if(sum(rec.hist[,-id.top])>0) h2 <- rowSums(matrix(rec.hist[,-id.top], nrow=nrow(rec.hist)))/pos.winsize;
|
294
|
+
|
295
|
+
ymin <- min(1, h1[h1>0], h2[h2>0]);
|
296
|
+
ymax <- max(10, h1, h2);
|
297
|
+
if(is.na(ymin) || ymin<=0) ymin <- 1e-10;
|
298
|
+
if(is.na(ymax) || ymax<=0) ymax <- 1;
|
299
|
+
plot(1, t='n', xlab='', xaxt='n', ylab='Sequencing depth (X)', log='y', xlim=pos.lim,
|
300
|
+
ylim=c(ymin, ymax), xaxs='i', main=main, ...);
|
301
|
+
if(!is.na(contig.col)) abline(v=c(lim[,2], lim[,3])/1e6, lty=1, col=contig.col);
|
302
|
+
abline(h=10^c(0:5), lty=2, col=grey(0.7));
|
303
|
+
if(sum(h2)>0){
|
304
|
+
h2.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
|
305
|
+
h2.y <- rep(h2, each=2)[1:(pos.breaks*2)]
|
306
|
+
lines(h2.x, h2.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0.5));
|
307
|
+
if(pos.splines>0){
|
308
|
+
h2.spline <- smooth.spline(h2.x[h2.y>0], log(h2.y[h2.y>0]), spar=pos.splines)
|
309
|
+
lines(h2.spline$x, exp(h2.spline$y), lwd=2, col=grey(0.5))
|
310
|
+
}
|
311
|
+
if(ret.hist) out <- c(out, list(pos.hist.low=h2.y));
|
312
|
+
}
|
313
|
+
if(sum(h1)>0){
|
314
|
+
h1.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
|
315
|
+
h1.y <- rep(h1, each=2)[1:(pos.breaks*2)]
|
316
|
+
lines(h1.x, h1.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0));
|
317
|
+
if(pos.splines>0){
|
318
|
+
h1.spline <- smooth.spline(h1.x[h1.y>0], log(h1.y[h1.y>0]), spar=pos.splines)
|
319
|
+
lines(h1.spline$x, exp(h1.spline$y), lwd=2, col=grey(0))
|
320
|
+
}
|
321
|
+
if(ret.hist) out <- c(out, list(pos.hist.top=h1.y));
|
322
|
+
}
|
323
|
+
legend('topleft', 'Pos. histogram', bg=rgb(1,1,1,2/3));
|
324
|
+
out <- c(out, list(id.max=id.max, id.cutoff=id.marks[id.top[1]]));
|
325
|
+
out <- c(out, list(seqdepth.mean.top=mean(h1)));
|
326
|
+
out <- c(out, list(seqdepth.mean.low=mean(h2)));
|
327
|
+
out <- c(out, list(seqdepth.mean=mean(h1+h2)));
|
328
|
+
out <- c(out, list(seqdepth.median.top=median(h1)));
|
329
|
+
out <- c(out, list(seqdepth.median.low=median(h2)));
|
330
|
+
out <- c(out, list(seqdepth.median=median(h1+h2)));
|
331
|
+
out <- c(out, list(id.metric=id.fullname));
|
332
|
+
out <- c(out, list(id.summary=id.summary.name));
|
333
|
+
|
334
|
+
# Legend
|
335
|
+
par(mar=c(0,0,4,2)+0.1);
|
336
|
+
plot(1, t='n', xlab='', xaxt='n', ylab='', yaxt='n', xlim=c(0,1), ylim=c(0,1), xaxs='r', yaxs='i', ...);
|
337
|
+
text(1/2, 5/6, labels=paste('Reads per ', signif((pos.max-pos.min)/pos.breaks, 2), ' bp (rec. plot)', sep=''), pos=3);
|
338
|
+
leg.col <- gplots::colorpanel(100, rec.col1, rec.col2);
|
339
|
+
leg.lab <- signif(10^seq(0, log10(max(rec.hist)), length.out=10), 2);
|
340
|
+
for(i in 1:10){
|
341
|
+
for(j in 1:10){
|
342
|
+
k <- (i-1)*10 + j;
|
343
|
+
polygon(c(k-1, k, k, k-1)/100, c(2/3, 2/3, 5/6, 5/6), border=leg.col[k], col=leg.col[k]);
|
344
|
+
}
|
345
|
+
text((i-0.5)/10, 2/3, labels=paste(leg.lab[i], ''), srt=90, pos=2, offset=0, cex=3/4);
|
346
|
+
}
|
347
|
+
legend('bottom',
|
348
|
+
legend=c('Contig boundary', 'Hallmark', paste(id.fullname, 'cutoff'),
|
349
|
+
paste('Pos. hist.: ',id.shortname,' > ',signif(id.marks[id.top[1]],2),id.units,sep=''),
|
350
|
+
paste('Pos. hist.: ',id.shortname,' < ',signif(id.marks[id.top[1]],2),id.units,sep='')), ncol=2,
|
351
|
+
col=grey(c(0.85, 0.7, 0.5, 0, 0.5)), lty=c(1,2,3,1,1), lwd=c(1,1,1,2,2), bty='n', inset=0.05, cex=5/6);
|
352
|
+
return(out);
|
318
353
|
});
|
319
354
|
|