miga-base 1.2.17.1 → 1.2.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +2 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
- data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
- data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
- data/utils/enveomics/Scripts/SRA.download.bash +14 -2
- data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
- data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
- data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
- data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
- data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
- data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
- data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
- data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
- data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
- data/utils/enveomics/enveomics.R/R/utils.R +30 -20
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
- metadata +3 -37
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
|
@@ -35,150 +35,169 @@
|
|
|
35
35
|
#' Any value above 100 indicates that no values are to be reported.
|
|
36
36
|
#' @param order Controls how the rows should be ordered.
|
|
37
37
|
#' \itemize{
|
|
38
|
-
#'
|
|
39
|
-
#'
|
|
40
|
-
#'
|
|
41
|
-
#'
|
|
42
|
-
#'
|
|
43
|
-
#'
|
|
44
|
-
#'
|
|
45
|
-
#'
|
|
38
|
+
#' \item{
|
|
39
|
+
#' If \code{NULL} (default), \code{sort.by} is applied per row and the
|
|
40
|
+
#' results are sorted decreasingly.
|
|
41
|
+
#' }
|
|
42
|
+
#' \item{
|
|
43
|
+
#' If \code{NA}, no sorting is performed, i.e., the original order is
|
|
44
|
+
#' respected.
|
|
45
|
+
#' }
|
|
46
|
+
#' \item{
|
|
47
|
+
#' If a vector is provided, it is assumed to be the custom order to be used
|
|
48
|
+
#' (either by numeric index or by row names).
|
|
49
|
+
#' }
|
|
50
|
+
#' }
|
|
46
51
|
#' @param col Colors to use. If provided, overrides the variables \code{top}
|
|
47
52
|
#' and \code{colors.per.group}, but \code{other.col} is still used if the
|
|
48
|
-
#' vector is insufficient for all the rows. An additional palette is available
|
|
49
|
-
#' \code{col='coto'} (contributed by Luis (Coto) Orellana).
|
|
53
|
+
#' vector is insufficient for all the rows. An additional palette is available
|
|
54
|
+
#' with \code{col='coto'} (contributed by Luis (Coto) Orellana).
|
|
50
55
|
#' @param ... Any additional parameters to be passed to barplot.
|
|
51
56
|
#'
|
|
57
|
+
#' @return No return value
|
|
58
|
+
#'
|
|
52
59
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
|
53
60
|
#'
|
|
54
61
|
#' @examples
|
|
55
62
|
#' # Load data
|
|
56
|
-
#' data("phyla.counts", package="enveomics.R", envir=environment())
|
|
63
|
+
#' data("phyla.counts", package = "enveomics.R", envir = environment())
|
|
57
64
|
#' # Create a barplot sorted by variance with organic trends
|
|
58
65
|
#' enve.barplot(
|
|
59
|
-
#'
|
|
60
|
-
#'
|
|
61
|
-
#'
|
|
62
|
-
#'
|
|
63
|
-
#'
|
|
64
|
-
#'
|
|
65
|
-
#'
|
|
66
|
+
#' phyla.counts, # Counts of phyla in four sites
|
|
67
|
+
#' sizes = c(250,100,75,200), # Total sizes of the datasets of each site
|
|
68
|
+
#' bars.width = 2, # Decrease from default, so the names are fully displayed
|
|
69
|
+
#' organic.trend = TRUE, # Nice curvy background
|
|
70
|
+
#' sort.by = var # Sort by variance across sites
|
|
71
|
+
#' )
|
|
72
|
+
#'
|
|
66
73
|
#' @export
|
|
67
74
|
|
|
68
75
|
enve.barplot <- function(
|
|
69
76
|
x,
|
|
70
77
|
sizes,
|
|
71
|
-
top=25,
|
|
72
|
-
colors.per.group=9,
|
|
73
|
-
bars.width=4,
|
|
74
|
-
legend.ncol=1,
|
|
75
|
-
other.col=
|
|
76
|
-
add.trend=FALSE,
|
|
77
|
-
organic.trend=FALSE,
|
|
78
|
-
sort.by=median,
|
|
79
|
-
min.report=101,
|
|
80
|
-
order=NULL,
|
|
78
|
+
top = 25,
|
|
79
|
+
colors.per.group = 9,
|
|
80
|
+
bars.width = 4,
|
|
81
|
+
legend.ncol = 1,
|
|
82
|
+
other.col = "#000000",
|
|
83
|
+
add.trend = FALSE,
|
|
84
|
+
organic.trend = FALSE,
|
|
85
|
+
sort.by = median,
|
|
86
|
+
min.report = 101,
|
|
87
|
+
order = NULL,
|
|
81
88
|
col,
|
|
82
89
|
...
|
|
83
90
|
){
|
|
84
|
-
|
|
85
91
|
# Read input
|
|
86
|
-
if(is.character(x)){
|
|
87
|
-
c <- read.table(x, sep=
|
|
88
|
-
comment.char=
|
|
89
|
-
}else{
|
|
92
|
+
if (is.character(x)) {
|
|
93
|
+
c <- read.table(x, sep = "\t", header = TRUE, row.names = 1, quote = "",
|
|
94
|
+
comment.char = "")
|
|
95
|
+
} else {
|
|
90
96
|
c <- as.data.frame(x)
|
|
91
97
|
}
|
|
92
|
-
if(missing(sizes)) sizes
|
|
98
|
+
if (missing(sizes)) sizes <- colSums(c)
|
|
93
99
|
p <- c
|
|
94
|
-
for (i in 1:ncol(c)) p[, i] <- c[, i]*100/sizes[i]
|
|
95
|
-
if(top > nrow(p)) top
|
|
96
|
-
|
|
100
|
+
for (i in 1:ncol(c)) p[, i] <- c[, i] * 100 / sizes[i]
|
|
101
|
+
if(top > nrow(p)) top <- nrow(p)
|
|
102
|
+
|
|
97
103
|
# Sort
|
|
98
|
-
if(is.null(order[1])){
|
|
104
|
+
if (is.null(order[1])) {
|
|
99
105
|
p <- p[order(apply(p, 1, sort.by)), ]
|
|
100
|
-
}else if(is.na(order[1])){
|
|
106
|
+
} else if (is.na(order[1])) {
|
|
101
107
|
|
|
102
|
-
}else{
|
|
108
|
+
} else {
|
|
103
109
|
p <- p[order, ]
|
|
104
110
|
}
|
|
105
|
-
if(organic.trend) add.trend
|
|
106
|
-
|
|
111
|
+
if(organic.trend) add.trend <- TRUE
|
|
112
|
+
|
|
107
113
|
# Colors
|
|
108
|
-
if(is.null(top)) top <- nrow(p)
|
|
109
|
-
if(missing(col)){
|
|
110
|
-
color.col <- rainbow(min(colors.per.group, top), s=1, v=4/5)
|
|
111
|
-
if(top > colors.per.group)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
color.col <-
|
|
114
|
+
if (is.null(top)) top <- nrow(p)
|
|
115
|
+
if (missing(col)) {
|
|
116
|
+
color.col <- rainbow(min(colors.per.group, top), s = 1, v = 4/5)
|
|
117
|
+
if(top > colors.per.group)
|
|
118
|
+
color.col <- c(color.col,
|
|
119
|
+
rainbow(min(colors.per.group * 2, top) - colors.per.group,
|
|
120
|
+
s = 3/4, v = 3/5))
|
|
121
|
+
if(top > colors.per.group * 2)
|
|
122
|
+
color.col <- c(color.col,
|
|
123
|
+
rainbow(top-colors.per.group * 2, s = 1, v = 1.25 / 4))
|
|
124
|
+
} else if (length(col) == 1 & col[1] == "coto") {
|
|
125
|
+
color.col <- c("#5BC0EB", "#FDE74C", "#9BC53D", "#E55934", "#FA7921",
|
|
126
|
+
"#EF476F", "#FFD166", "#06D6A0", "#118AB2", "#073B4C",
|
|
127
|
+
"#264653", "#2A9D8F", "#E9C46A", "#F4A261", "#E76F51")
|
|
128
|
+
color.col <- head(color.col, n = nrow(p))
|
|
120
129
|
top <- length(color.col)
|
|
121
|
-
}else{
|
|
130
|
+
} else {
|
|
122
131
|
color.col <- col
|
|
123
|
-
color.col <- tail(color.col, n=nrow(p))
|
|
132
|
+
color.col <- tail(color.col, n = nrow(p))
|
|
124
133
|
top <- length(color.col)
|
|
125
134
|
}
|
|
126
135
|
|
|
127
136
|
# Plot
|
|
128
|
-
layout(matrix(1:2, nrow=1), widths=c(bars.width,1))
|
|
129
|
-
mar <- par(
|
|
130
|
-
par(mar
|
|
131
|
-
mp <- barplot(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
137
|
+
layout(matrix(1:2, nrow = 1), widths = c(bars.width, 1))
|
|
138
|
+
mar <- par(mar = c(5, 4, 4, 0) + 0.1)
|
|
139
|
+
on.exit(par(mar))
|
|
140
|
+
mp <- barplot(
|
|
141
|
+
as.matrix(p),
|
|
142
|
+
col = rev(c(color.col, rep(other.col, nrow(p) - length(color.col)))),
|
|
143
|
+
space = ifelse(add.trend,ifelse(organic.trend, 0.75, 0.5), 0.2),
|
|
144
|
+
border = NA, ...
|
|
145
|
+
)
|
|
146
|
+
if (add.trend || min.report < max(p)) {
|
|
135
147
|
color.alpha <- enve.col.alpha(c(color.col, other.col), 1/4)
|
|
136
|
-
if(top < nrow(p)){
|
|
148
|
+
if (top < nrow(p)) {
|
|
137
149
|
cf <- colSums(p[1:(nrow(p)-top), ])
|
|
138
|
-
}else{
|
|
150
|
+
} else {
|
|
139
151
|
cf <- rep(0, ncol(p))
|
|
140
152
|
}
|
|
141
|
-
for(i in (nrow(p)-top+1):nrow(p)){
|
|
153
|
+
for (i in (nrow(p) - top + 1):nrow(p)) {
|
|
142
154
|
f <- as.numeric(p[i, ])
|
|
143
155
|
cf <- as.numeric(cf + f)
|
|
144
|
-
if(nrow(p)-i < top){
|
|
145
|
-
if(organic.trend){
|
|
156
|
+
if (nrow(p) - i < top){
|
|
157
|
+
if (organic.trend) {
|
|
146
158
|
spc <- 0.5
|
|
147
|
-
x <- c(mp[1]-spc)
|
|
148
|
-
y1 <- c(cf[1]-f[1])
|
|
159
|
+
x <- c(mp[1] - spc)
|
|
160
|
+
y1 <- c(cf[1] - f[1])
|
|
149
161
|
y2 <- c(cf[1])
|
|
150
|
-
for(j in 2:ncol(p)){
|
|
151
|
-
x <- c(x, seq(mp[j-1]+spc, mp[j]-spc, length.out=22))
|
|
152
|
-
y1 <- c(
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
162
|
+
for (j in 2:ncol(p)) {
|
|
163
|
+
x <- c(x, seq(mp[j - 1] + spc, mp[j] - spc, length.out = 22))
|
|
164
|
+
y1 <- c(
|
|
165
|
+
y1, cf[j - 1] - f[j - 1],
|
|
166
|
+
(tanh(seq(-2.5, 2.5, length.out = 20)) / 2 + 0.5) *
|
|
167
|
+
((cf[j] - f[j]) - (cf[j - 1] - f[j - 1])) +
|
|
168
|
+
(cf[j - 1] - f[j - 1]),
|
|
169
|
+
cf[j] - f[j]
|
|
170
|
+
)
|
|
171
|
+
y2 <- c(
|
|
172
|
+
y2, cf[j-1],
|
|
173
|
+
(tanh(seq(-2.5, 2.5, length.out = 20)) / 2 + 0.5) *
|
|
174
|
+
(cf[j] - cf[j - 1]) + (cf[j - 1]), cf[j]
|
|
175
|
+
)
|
|
158
176
|
}
|
|
159
|
-
x <- c(x, mp[length(mp)]+spc)
|
|
160
|
-
y1 <- c(y1, cf[length(cf)]-f[length(f)])
|
|
177
|
+
x <- c(x, mp[length(mp)] + spc)
|
|
178
|
+
y1 <- c(y1, cf[length(cf)] - f[length(f)])
|
|
161
179
|
y2 <- c(y2, cf[length(cf)])
|
|
162
|
-
polygon(c(x, rev(x)), c(y1, rev(y2)),
|
|
163
|
-
border=NA)
|
|
164
|
-
}else if(add.trend){
|
|
165
|
-
x <- rep(mp, each=2)+c(-0.5,0.5)
|
|
166
|
-
if(add.trend)
|
|
167
|
-
|
|
168
|
-
|
|
180
|
+
polygon(c(x, rev(x)), c(y1, rev(y2)),
|
|
181
|
+
col = color.alpha[nrow(p) - i + 1], border = NA)
|
|
182
|
+
} else if (add.trend) {
|
|
183
|
+
x <- rep(mp, each = 2) + c(-0.5, 0.5)
|
|
184
|
+
if(add.trend)
|
|
185
|
+
polygon(c(x, rev(x)),
|
|
186
|
+
c(rep(cf - f, each = 2), rev(rep(cf, each = 2))),
|
|
187
|
+
col = color.alpha[nrow(p) - i + 1], border = NA)
|
|
169
188
|
}
|
|
170
|
-
text(mp, cf-f/2, ifelse(f>min.report, signif(f, 3),
|
|
189
|
+
text(mp, cf - f / 2, ifelse(f > min.report, signif(f, 3), ""),
|
|
190
|
+
col = "white")
|
|
171
191
|
}
|
|
172
192
|
}
|
|
173
193
|
}
|
|
174
|
-
|
|
194
|
+
|
|
175
195
|
# Legend
|
|
176
|
-
par(mar=rep(0,4)+0.1)
|
|
177
|
-
plot(1, t=
|
|
178
|
-
nam <- rownames(p[nrow(p):(nrow(p)-top+1), ])
|
|
179
|
-
if(top < nrow(p))
|
|
180
|
-
|
|
181
|
-
legend(
|
|
182
|
-
pt.cex=2, ncol=legend.ncol)
|
|
183
|
-
par(mar=mar)
|
|
196
|
+
par(mar = rep(0, 4) + 0.1) # par(mar) already being watched by on.exit
|
|
197
|
+
plot(1, t = "n", bty = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n")
|
|
198
|
+
nam <- rownames(p[nrow(p):(nrow(p) - top + 1), ])
|
|
199
|
+
if(top < nrow(p))
|
|
200
|
+
nam <- c(nam, paste("Other (", nrow(p) - length(color.col), ")", sep = ""))
|
|
201
|
+
legend("center", col = c(color.col, other.col), legend = nam, pch = 15,
|
|
202
|
+
bty = "n", pt.cex = 2, ncol = legend.ncol)
|
|
184
203
|
}
|
|
@@ -40,96 +40,102 @@ enve.cliopts <- function(
|
|
|
40
40
|
rd_file,
|
|
41
41
|
positional_arguments,
|
|
42
42
|
usage,
|
|
43
|
-
mandatory=c(),
|
|
44
|
-
vectorize=c(),
|
|
45
|
-
ignore=c(),
|
|
46
|
-
number=c(),
|
|
47
|
-
defaults=list(),
|
|
48
|
-
o_desc=list(),
|
|
49
|
-
p_desc=""
|
|
43
|
+
mandatory = c(),
|
|
44
|
+
vectorize = c(),
|
|
45
|
+
ignore = c(),
|
|
46
|
+
number = c(),
|
|
47
|
+
defaults = list(),
|
|
48
|
+
o_desc = list(),
|
|
49
|
+
p_desc = ""
|
|
50
50
|
){
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
requireNamespace("optparse", quietly=TRUE)))
|
|
51
|
+
# Load stuff
|
|
52
|
+
if (!suppressPackageStartupMessages(
|
|
53
|
+
requireNamespace("optparse", quietly = TRUE)))
|
|
55
54
|
stop("Package 'optparse' is required.")
|
|
56
|
-
requireNamespace("tools", quietly=TRUE)
|
|
57
|
-
if(missing(positional_arguments)) positional_arguments <- FALSE
|
|
58
|
-
if(missing(usage)) usage <- "usage: %prog [options]"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
if(!missing(rd_file)){
|
|
55
|
+
requireNamespace("tools", quietly = TRUE)
|
|
56
|
+
if (missing(positional_arguments)) positional_arguments <- FALSE
|
|
57
|
+
if (missing(usage)) usage <- "usage: %prog [options]"
|
|
58
|
+
|
|
59
|
+
# Get help (if any)
|
|
60
|
+
if (!missing(rd_file)) {
|
|
62
61
|
rd <- tools::parse_Rd(rd_file)
|
|
63
|
-
for(i in 1:length(rd)){
|
|
64
|
-
tag <- attr(rd[[i]],
|
|
65
|
-
if(tag=="\\description" && p_desc==""){
|
|
66
|
-
p_desc <- paste("\n\t",as.character(rd[[i]]),sep=
|
|
67
|
-
}else if(tag=="\\arguments"){
|
|
68
|
-
for(j in 1:length(rd[[i]])){
|
|
69
|
-
if(length(rd[[i]][[j]])==2){
|
|
62
|
+
for (i in 1:length(rd)) {
|
|
63
|
+
tag <- attr(rd[[i]], "Rd_tag")
|
|
64
|
+
if (tag == "\\description" && p_desc == "") {
|
|
65
|
+
p_desc <- paste("\n\t", as.character(rd[[i]]), sep = "")
|
|
66
|
+
} else if (tag == "\\arguments") {
|
|
67
|
+
for (j in 1:length(rd[[i]])) {
|
|
68
|
+
if (length(rd[[i]][[j]]) == 2) {
|
|
70
69
|
name <- as.character(rd[[i]][[j]][[1]])
|
|
71
|
-
if(length(o_desc[[name]])==1) next
|
|
70
|
+
if (length(o_desc[[name]]) == 1) next
|
|
72
71
|
desc <- as.character(rd[[i]][[j]][[2]])
|
|
73
|
-
o_desc[[name]] <- paste(gsub("\n","\n\t\t",desc), collapse=
|
|
72
|
+
o_desc[[name]] <- paste(gsub("\n", "\n\t\t", desc), collapse = "")
|
|
74
73
|
}
|
|
75
74
|
}
|
|
76
75
|
}
|
|
77
76
|
}
|
|
78
77
|
}
|
|
79
|
-
|
|
80
|
-
|
|
78
|
+
|
|
79
|
+
# Set options
|
|
81
80
|
o_i <- 0
|
|
82
81
|
opts <- list()
|
|
83
82
|
f <- formals(fx)
|
|
84
|
-
if(length(defaults)>0){
|
|
85
|
-
for(i in 1:length(defaults)) f[[names(defaults)[i]]] <- defaults[[i]]
|
|
83
|
+
if (length(defaults) > 0) {
|
|
84
|
+
for (i in 1:length(defaults)) f[[names(defaults)[i]]] <- defaults[[i]]
|
|
86
85
|
}
|
|
87
|
-
for(i in names(f)){
|
|
88
|
-
if(i=="..." || i %in% ignore) next
|
|
86
|
+
for (i in names(f)) {
|
|
87
|
+
if (i == "..." || i %in% ignore) next
|
|
89
88
|
o_i <- o_i + 1
|
|
90
|
-
flag <- gsub("\\.","-",i)
|
|
91
|
-
|
|
92
|
-
optopt <- list(help="")
|
|
93
|
-
if(length(o_desc[[i]])==1) optopt$help <- o_desc[[i]]
|
|
94
|
-
if(!is.null(f[[i]]) && !suppressWarnings(is.na(f[[i]])) &&
|
|
95
|
-
|
|
89
|
+
flag <- gsub("\\.", "-", i)
|
|
90
|
+
|
|
91
|
+
optopt <- list(help = "")
|
|
92
|
+
if (length(o_desc[[i]]) == 1) optopt$help <- o_desc[[i]]
|
|
93
|
+
if (!is.null(f[[i]]) && !suppressWarnings(is.na(f[[i]])) &&
|
|
94
|
+
is.logical(f[[i]])){
|
|
95
|
+
optopt$opt_str <- paste(ifelse(f[[i]], "--no-", "--"), flag, sep = "")
|
|
96
96
|
optopt$action <- ifelse(f[[i]], "store_false", "store_true")
|
|
97
|
-
}else{
|
|
98
|
-
optopt$opt_str <- paste("--", flag, sep=
|
|
97
|
+
} else {
|
|
98
|
+
optopt$opt_str <- paste("--", flag, sep = "")
|
|
99
99
|
optopt$action <- "store"
|
|
100
|
-
optopt$help <- paste(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
optopt$help <- paste(
|
|
101
|
+
optopt$help, "\n\t\t[",
|
|
102
|
+
ifelse(i %in% mandatory, "** MANDATORY", "default %default"),
|
|
103
|
+
ifelse(i %in% vectorize, ", separate values by commas", ""),
|
|
104
|
+
"].",
|
|
105
|
+
sep = ""
|
|
106
|
+
)
|
|
104
107
|
}
|
|
105
|
-
if(!is.name(f[[i]])){
|
|
108
|
+
if (!is.name(f[[i]])) {
|
|
106
109
|
optopt$default <- f[[i]]
|
|
107
110
|
optopt$metavar <- class(f[[i]])
|
|
108
111
|
}
|
|
109
|
-
if(i %in% number) optopt$metavar <- "NUMERIC"
|
|
112
|
+
if (i %in% number) optopt$metavar <- "NUMERIC"
|
|
110
113
|
optopt$dest <- i
|
|
111
|
-
|
|
114
|
+
|
|
112
115
|
opts[[o_i]] <- do.call(optparse::make_option, optopt)
|
|
113
116
|
}
|
|
114
117
|
opt <- optparse::parse_args(
|
|
115
|
-
optparse::OptionParser(
|
|
116
|
-
|
|
118
|
+
optparse::OptionParser(
|
|
119
|
+
option_list = opts, description = p_desc, usage = usage
|
|
120
|
+
),
|
|
121
|
+
positional_arguments = positional_arguments
|
|
122
|
+
)
|
|
117
123
|
|
|
118
|
-
|
|
119
|
-
if(length(opt[[
|
|
120
|
-
for(i in mandatory){
|
|
121
|
-
if(length(opt$options[[i]])==0) stop(
|
|
124
|
+
# Post-hoc checks
|
|
125
|
+
if (length(opt[["options"]]) == 0) opt <- list(options = opt, args = c())
|
|
126
|
+
for (i in mandatory) {
|
|
127
|
+
if(length(opt$options[[i]]) == 0) stop("Missing mandatory argument: ", i)
|
|
122
128
|
}
|
|
123
|
-
for(i in vectorize){
|
|
124
|
-
if(length(opt$options[[i]])==1)
|
|
125
|
-
opt$options[[i]] <- strsplit(opt$options[[i]],",")[[1]]
|
|
129
|
+
for (i in vectorize) {
|
|
130
|
+
if (length(opt$options[[i]]) == 1)
|
|
131
|
+
opt$options[[i]] <- strsplit(opt$options[[i]], ",")[[1]]
|
|
126
132
|
}
|
|
127
|
-
for(i in number){
|
|
128
|
-
if(length(opt$options[[i]])>0)
|
|
133
|
+
for (i in number) {
|
|
134
|
+
if (length(opt$options[[i]]) > 0)
|
|
129
135
|
opt$options[[i]] <- as.numeric(opt$options[[i]])
|
|
130
136
|
}
|
|
131
137
|
opt$options$help <- NULL
|
|
132
|
-
|
|
138
|
+
|
|
133
139
|
return(opt)
|
|
134
140
|
}
|
|
135
141
|
|
|
@@ -20,27 +20,40 @@
|
|
|
20
20
|
#' @return Returns a \strong{dist} object.
|
|
21
21
|
#'
|
|
22
22
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
|
23
|
+
#'
|
|
24
|
+
#' @examples
|
|
25
|
+
#' # A sparse matrix representation of similarities as data frame.
|
|
26
|
+
#' # The column "extra_data" is meaningless, only included to illustrate
|
|
27
|
+
#' # the use of the obj*.index parameters
|
|
28
|
+
#' sim <- data.frame(
|
|
29
|
+
#' extra_data = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.5),
|
|
30
|
+
#' query = c("A", "A", "A", "B", "C", "C", "D"),
|
|
31
|
+
#' subject = c("A", "B", "C", "B", "C", "B", "A"),
|
|
32
|
+
#' similarity = c(100, 90, 60, 100, 100, 70, 10)
|
|
33
|
+
#' )
|
|
34
|
+
#' dist <- enve.df2dist(sim, "query", "subject", "similarity", max.sim = 100)
|
|
35
|
+
#' print(dist)
|
|
23
36
|
#'
|
|
24
37
|
#' @export
|
|
25
|
-
|
|
26
38
|
enve.df2dist <- function(
|
|
27
39
|
x,
|
|
28
40
|
obj1.index = 1,
|
|
29
41
|
obj2.index = 2,
|
|
30
42
|
dist.index = 3,
|
|
31
|
-
default.d
|
|
32
|
-
max.sim
|
|
33
|
-
){
|
|
43
|
+
default.d = NA,
|
|
44
|
+
max.sim = 0
|
|
45
|
+
) {
|
|
34
46
|
x <- as.data.frame(x)
|
|
35
47
|
a <- as.character(x[, obj1.index])
|
|
36
48
|
b <- as.character(x[, obj2.index])
|
|
37
49
|
d <- as.double(x[, dist.index])
|
|
38
|
-
if(max.sim != 0) d <- (max.sim - d) / max.sim
|
|
39
|
-
ids <- unique(c(a,b))
|
|
40
|
-
m <- matrix(
|
|
41
|
-
nrow = length(ids), ncol = length(ids), dimnames = list(ids, ids)
|
|
50
|
+
if (max.sim != 0) d <- (max.sim - d) / max.sim
|
|
51
|
+
ids <- unique(c(a, b))
|
|
52
|
+
m <- matrix(
|
|
53
|
+
default.d, nrow = length(ids), ncol = length(ids), dimnames = list(ids, ids)
|
|
54
|
+
)
|
|
42
55
|
diag(m) <- 0.0
|
|
43
|
-
m[cbind(a,b)] <- d
|
|
56
|
+
m[cbind(a, b)] <- d
|
|
44
57
|
m <- pmin(m, t(m), na.rm = TRUE)
|
|
45
58
|
return(as.dist(m))
|
|
46
59
|
}
|
|
@@ -55,7 +68,8 @@ enve.df2dist <- function(
|
|
|
55
68
|
#' \enumerate{
|
|
56
69
|
#' \item ID of the object 1,
|
|
57
70
|
#' \item ID of the object 2, and
|
|
58
|
-
#' \item distance between the two objects.
|
|
71
|
+
#' \item distance between the two objects.
|
|
72
|
+
#' }
|
|
59
73
|
#' @param obj1.index Index of the column containing the ID of the object 1.
|
|
60
74
|
#' @param obj2.index Index of the column containing the ID of the object 2.
|
|
61
75
|
#' @param dist.index Index of the column containing the distance.
|
|
@@ -67,41 +81,65 @@ enve.df2dist <- function(
|
|
|
67
81
|
#'
|
|
68
82
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
|
69
83
|
#'
|
|
84
|
+
#' @examples
|
|
85
|
+
#' # A sparse matrix representation of distances as data frame.
|
|
86
|
+
#' # Note that some pairs are repeated.
|
|
87
|
+
#' dist.df <- data.frame(
|
|
88
|
+
#' query = c("A", "A", "A", "B", "C", "C", "B", "B", "B"),
|
|
89
|
+
#' subject = c("A", "B", "C", "B", "C", "B", "A", "C", "C"),
|
|
90
|
+
#' distance = c( 0, 0.1, 0.4, 0, 0, 0.4, 0.2, 0.2, 0.1)
|
|
91
|
+
#' )
|
|
92
|
+
#' dist <- enve.df2dist.group(dist.df)
|
|
93
|
+
#' print(dist)
|
|
94
|
+
#'
|
|
95
|
+
#' # Use the mean of all repeated occurrences instead of the median.
|
|
96
|
+
#' dist <- enve.df2dist.group(dist.df, summary = mean)
|
|
97
|
+
#'
|
|
98
|
+
#' # Simply use the first occurrence for any given pair.
|
|
99
|
+
#' dist <- enve.df2dist.group(dist.df, summary = function(x) head(x, n = 1))
|
|
100
|
+
#'
|
|
70
101
|
#' @export
|
|
71
|
-
|
|
72
102
|
enve.df2dist.group <- function(
|
|
73
103
|
x,
|
|
74
|
-
obj1.index=1,
|
|
75
|
-
obj2.index=2,
|
|
76
|
-
dist.index=3,
|
|
77
|
-
summary=median,
|
|
78
|
-
empty.rm=TRUE
|
|
79
|
-
){
|
|
80
|
-
x <- as.data.frame(x)
|
|
81
|
-
if(empty.rm)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
104
|
+
obj1.index = 1,
|
|
105
|
+
obj2.index = 2,
|
|
106
|
+
dist.index = 3,
|
|
107
|
+
summary = median,
|
|
108
|
+
empty.rm = TRUE
|
|
109
|
+
) {
|
|
110
|
+
x <- as.data.frame(x)
|
|
111
|
+
if(empty.rm)
|
|
112
|
+
x <- x[
|
|
113
|
+
!(is.na(x[, obj1.index]) |
|
|
114
|
+
is.na(x[, obj2.index]) |
|
|
115
|
+
x[, obj1.index] == "" |
|
|
116
|
+
x[, obj2.index] == ""),
|
|
117
|
+
]
|
|
118
|
+
a <- as.character(x[, obj1.index])
|
|
119
|
+
b <- as.character(x[, obj2.index])
|
|
120
|
+
d <- as.double(x[, dist.index])
|
|
121
|
+
ids <- unique(c(a, b))
|
|
122
|
+
if (length(ids) < 2) return(NA)
|
|
123
|
+
m <- matrix(
|
|
124
|
+
NA, nrow = length(ids), ncol = length(ids), dimnames = list(ids, ids)
|
|
125
|
+
)
|
|
88
126
|
diag(m) <- 0
|
|
89
|
-
for(i in 2:length(ids)){
|
|
90
|
-
id.i <- ids[i]
|
|
91
|
-
for(j in 1:(i-1)){
|
|
92
|
-
id.j <- ids[j]
|
|
93
|
-
d.ij <- summary(c(
|
|
94
|
-
m[id.i, id.j] <- d.ij
|
|
95
|
-
m[id.j, id.i] <- d.ij
|
|
127
|
+
for (i in 2:length(ids)) {
|
|
128
|
+
id.i <- ids[i]
|
|
129
|
+
for (j in 1:(i - 1)) {
|
|
130
|
+
id.j <- ids[j]
|
|
131
|
+
d.ij <- summary(c(d[a == id.i & b == id.j], d[b == id.i & a == id.j]))
|
|
132
|
+
m[id.i, id.j] <- d.ij
|
|
133
|
+
m[id.j, id.i] <- d.ij
|
|
96
134
|
}
|
|
97
135
|
}
|
|
98
|
-
return(as.dist(m))
|
|
136
|
+
return(as.dist(m))
|
|
99
137
|
}
|
|
100
138
|
|
|
101
139
|
#' Enveomics: Data Frame to Dist (List)
|
|
102
140
|
#'
|
|
103
141
|
#' Transform a dataframe (or coercible object, like a table)
|
|
104
|
-
#' into a \strong{dist}
|
|
142
|
+
#' into a \strong{list} of \strong{dist} objects, one per group.
|
|
105
143
|
#'
|
|
106
144
|
#' @param x A dataframe (or coercible object) with at least three columns:
|
|
107
145
|
#' \enumerate{
|
|
@@ -114,41 +152,41 @@ enve.df2dist.group <- function(
|
|
|
114
152
|
#' @param obj2.index Index of the column containing the ID of the object 2.
|
|
115
153
|
#' @param dist.index Index of the column containing the distance.
|
|
116
154
|
#' @param empty.rm Remove incomplete matrices.
|
|
117
|
-
#' @param ... Any other parameters supported by
|
|
118
|
-
#' \code{\link{enve.df2dist.group}}.
|
|
155
|
+
#' @param ... Any other parameters supported by \code{\link{enve.df2dist}}.
|
|
119
156
|
#'
|
|
120
157
|
#' @return Returns a \strong{list} of \strong{dist} objects.
|
|
121
158
|
#'
|
|
122
159
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
|
123
|
-
#'
|
|
160
|
+
#'
|
|
124
161
|
#' @export
|
|
125
162
|
|
|
126
163
|
enve.df2dist.list <- function(
|
|
127
164
|
x,
|
|
128
165
|
groups,
|
|
129
|
-
obj1.index=1,
|
|
130
|
-
obj2.index=2,
|
|
131
|
-
dist.index=3,
|
|
132
|
-
empty.rm=TRUE,
|
|
166
|
+
obj1.index = 1,
|
|
167
|
+
obj2.index = 2,
|
|
168
|
+
dist.index = 3,
|
|
169
|
+
empty.rm = TRUE,
|
|
133
170
|
...
|
|
134
|
-
){
|
|
135
|
-
x <- as.data.frame(x)
|
|
136
|
-
a <- as.character(x[, obj1.index])
|
|
137
|
-
b <- as.character(x[, obj2.index])
|
|
138
|
-
d <- as.numeric(x[, dist.index])
|
|
139
|
-
ids.all <- unique(c(a,b))
|
|
140
|
-
l <- list()
|
|
141
|
-
same_group <- groups[a]==groups[b]
|
|
142
|
-
same_group <- ifelse(is.na(same_group), FALSE, TRUE)
|
|
143
|
-
for(group in unique(groups)){
|
|
144
|
-
ids <- ids.all[
|
|
145
|
-
if(length(ids)>1 & group!=""){
|
|
146
|
-
x.sub <- x[
|
|
147
|
-
if(nrow(x.sub)>0){
|
|
148
|
-
d.g <- enve.df2dist(x.sub, obj1.index, obj2.index, dist.index, ...)
|
|
149
|
-
if(!empty.rm | !any(is.na(d.g))) l[[
|
|
171
|
+
) {
|
|
172
|
+
x <- as.data.frame(x)
|
|
173
|
+
a <- as.character(x[, obj1.index])
|
|
174
|
+
b <- as.character(x[, obj2.index])
|
|
175
|
+
d <- as.numeric(x[, dist.index])
|
|
176
|
+
ids.all <- unique(c(a, b))
|
|
177
|
+
l <- list()
|
|
178
|
+
same_group <- groups[a] == groups[b]
|
|
179
|
+
same_group <- ifelse(is.na(same_group), FALSE, TRUE)
|
|
180
|
+
for (group in unique(groups)) {
|
|
181
|
+
ids <- ids.all[groups[ids.all] == group]
|
|
182
|
+
if (length(ids) > 1 & group != "") {
|
|
183
|
+
x.sub <- x[same_group & (groups[a] == group) & (groups[b] == group), ]
|
|
184
|
+
if (nrow(x.sub) > 0) {
|
|
185
|
+
d.g <- enve.df2dist(x.sub, obj1.index, obj2.index, dist.index, ...)
|
|
186
|
+
if(!empty.rm | !any(is.na(d.g))) l[[group]] <- d.g
|
|
150
187
|
}
|
|
151
188
|
}
|
|
152
189
|
}
|
|
153
|
-
return(l)
|
|
190
|
+
return(l)
|
|
154
191
|
}
|
|
192
|
+
|