miga-base 1.2.17.1 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  4. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  5. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  6. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  7. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  8. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  9. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  10. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  11. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  12. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  13. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  14. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  15. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  16. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  17. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  18. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  19. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  21. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  22. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  23. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  24. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  25. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  26. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  27. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  28. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  30. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  31. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  32. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  48. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  50. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  51. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  52. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  53. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  54. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  55. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  56. metadata +3 -37
  57. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  58. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  59. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  62. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  63. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  66. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  67. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  68. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  69. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  70. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  71. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  72. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  73. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  74. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  75. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  76. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  77. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  78. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  79. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  80. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  81. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  85. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  86. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  87. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  88. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  89. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  90. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  91. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
@@ -35,150 +35,169 @@
35
35
  #' Any value above 100 indicates that no values are to be reported.
36
36
  #' @param order Controls how the rows should be ordered.
37
37
  #' \itemize{
38
- #' \item{If \code{NULL}
39
- #' (default), \code{sort.by} is applied per row and the results are
40
- #' sorted decreasingly.}
41
- #' \item{If \code{NA}, no sorting is performed, i.e., the original
42
- #' order is respected.}
43
- #' \item{If a vector is provided, it is assumed to be the
44
- #' custom order to be used (either by numeric index or by row names).}
45
- #' }
38
+ #' \item{
39
+ #' If \code{NULL} (default), \code{sort.by} is applied per row and the
40
+ #' results are sorted decreasingly.
41
+ #' }
42
+ #' \item{
43
+ #' If \code{NA}, no sorting is performed, i.e., the original order is
44
+ #' respected.
45
+ #' }
46
+ #' \item{
47
+ #' If a vector is provided, it is assumed to be the custom order to be used
48
+ #' (either by numeric index or by row names).
49
+ #' }
50
+ #' }
46
51
  #' @param col Colors to use. If provided, overrides the variables \code{top}
47
52
  #' and \code{colors.per.group}, but \code{other.col} is still used if the
48
- #' vector is insufficient for all the rows. An additional palette is available with
49
- #' \code{col='coto'} (contributed by Luis (Coto) Orellana).
53
+ #' vector is insufficient for all the rows. An additional palette is available
54
+ #' with \code{col='coto'} (contributed by Luis (Coto) Orellana).
50
55
  #' @param ... Any additional parameters to be passed to barplot.
51
56
  #'
57
+ #' @return No return value
58
+ #'
52
59
  #' @author Luis M. Rodriguez-R [aut, cre]
53
60
  #'
54
61
  #' @examples
55
62
  #' # Load data
56
- #' data("phyla.counts", package="enveomics.R", envir=environment())
63
+ #' data("phyla.counts", package = "enveomics.R", envir = environment())
57
64
  #' # Create a barplot sorted by variance with organic trends
58
65
  #' enve.barplot(
59
- #' phyla.counts, # Counts of phyla in four sites
60
- #' sizes=c(250,100,75,200), # Total sizes of the datasets of each site
61
- #' bars.width=2, # Decrease from default, so the names are fully displayed
62
- #' organic.trend=TRUE, # Nice curvy background
63
- #' sort.by=var # Sort by variance across sites
64
- #' )
65
- #'
66
+ #' phyla.counts, # Counts of phyla in four sites
67
+ #' sizes = c(250,100,75,200), # Total sizes of the datasets of each site
68
+ #' bars.width = 2, # Decrease from default, so the names are fully displayed
69
+ #' organic.trend = TRUE, # Nice curvy background
70
+ #' sort.by = var # Sort by variance across sites
71
+ #' )
72
+ #'
66
73
  #' @export
67
74
 
68
75
  enve.barplot <- function(
69
76
  x,
70
77
  sizes,
71
- top=25,
72
- colors.per.group=9,
73
- bars.width=4,
74
- legend.ncol=1,
75
- other.col='#000000',
76
- add.trend=FALSE,
77
- organic.trend=FALSE,
78
- sort.by=median,
79
- min.report=101,
80
- order=NULL,
78
+ top = 25,
79
+ colors.per.group = 9,
80
+ bars.width = 4,
81
+ legend.ncol = 1,
82
+ other.col = "#000000",
83
+ add.trend = FALSE,
84
+ organic.trend = FALSE,
85
+ sort.by = median,
86
+ min.report = 101,
87
+ order = NULL,
81
88
  col,
82
89
  ...
83
90
  ){
84
-
85
91
  # Read input
86
- if(is.character(x)){
87
- c <- read.table(x, sep='\t', header=TRUE, row.names=1, quote='',
88
- comment.char='')
89
- }else{
92
+ if (is.character(x)) {
93
+ c <- read.table(x, sep = "\t", header = TRUE, row.names = 1, quote = "",
94
+ comment.char = "")
95
+ } else {
90
96
  c <- as.data.frame(x)
91
97
  }
92
- if(missing(sizes)) sizes = colSums(c)
98
+ if (missing(sizes)) sizes <- colSums(c)
93
99
  p <- c
94
- for (i in 1:ncol(c)) p[, i] <- c[, i]*100/sizes[i]
95
- if(top > nrow(p)) top = nrow(p)
96
-
100
+ for (i in 1:ncol(c)) p[, i] <- c[, i] * 100 / sizes[i]
101
+ if(top > nrow(p)) top <- nrow(p)
102
+
97
103
  # Sort
98
- if(is.null(order[1])){
104
+ if (is.null(order[1])) {
99
105
  p <- p[order(apply(p, 1, sort.by)), ]
100
- }else if(is.na(order[1])){
106
+ } else if (is.na(order[1])) {
101
107
 
102
- }else{
108
+ } else {
103
109
  p <- p[order, ]
104
110
  }
105
- if(organic.trend) add.trend=TRUE
106
-
111
+ if(organic.trend) add.trend <- TRUE
112
+
107
113
  # Colors
108
- if(is.null(top)) top <- nrow(p)
109
- if(missing(col)){
110
- color.col <- rainbow(min(colors.per.group, top), s=1, v=4/5)
111
- if(top > colors.per.group) color.col <- c(color.col,
112
- rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
113
- if(top > colors.per.group*2) color.col <- c(color.col,
114
- rainbow(top-colors.per.group*2, s=1, v=1.25/4))
115
- }else if(length(col)==1 & col[1]=="coto"){
116
- color.col <- c("#5BC0EB","#FDE74C","#9BC53D","#E55934","#FA7921","#EF476F",
117
- "#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
118
- "#E9C46A","#F4A261","#E76F51")
119
- color.col <- head(color.col, n=nrow(p))
114
+ if (is.null(top)) top <- nrow(p)
115
+ if (missing(col)) {
116
+ color.col <- rainbow(min(colors.per.group, top), s = 1, v = 4/5)
117
+ if(top > colors.per.group)
118
+ color.col <- c(color.col,
119
+ rainbow(min(colors.per.group * 2, top) - colors.per.group,
120
+ s = 3/4, v = 3/5))
121
+ if(top > colors.per.group * 2)
122
+ color.col <- c(color.col,
123
+ rainbow(top-colors.per.group * 2, s = 1, v = 1.25 / 4))
124
+ } else if (length(col) == 1 & col[1] == "coto") {
125
+ color.col <- c("#5BC0EB", "#FDE74C", "#9BC53D", "#E55934", "#FA7921",
126
+ "#EF476F", "#FFD166", "#06D6A0", "#118AB2", "#073B4C",
127
+ "#264653", "#2A9D8F", "#E9C46A", "#F4A261", "#E76F51")
128
+ color.col <- head(color.col, n = nrow(p))
120
129
  top <- length(color.col)
121
- }else{
130
+ } else {
122
131
  color.col <- col
123
- color.col <- tail(color.col, n=nrow(p))
132
+ color.col <- tail(color.col, n = nrow(p))
124
133
  top <- length(color.col)
125
134
  }
126
135
 
127
136
  # Plot
128
- layout(matrix(1:2, nrow=1), widths=c(bars.width,1))
129
- mar <- par('mar')
130
- par(mar=c(5,4,4,0)+0.1)
131
- mp <- barplot(as.matrix(p),
132
- col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
133
- border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
134
- if(add.trend || min.report < max(p)){
137
+ layout(matrix(1:2, nrow = 1), widths = c(bars.width, 1))
138
+ mar <- par(mar = c(5, 4, 4, 0) + 0.1)
139
+ on.exit(par(mar))
140
+ mp <- barplot(
141
+ as.matrix(p),
142
+ col = rev(c(color.col, rep(other.col, nrow(p) - length(color.col)))),
143
+ space = ifelse(add.trend,ifelse(organic.trend, 0.75, 0.5), 0.2),
144
+ border = NA, ...
145
+ )
146
+ if (add.trend || min.report < max(p)) {
135
147
  color.alpha <- enve.col.alpha(c(color.col, other.col), 1/4)
136
- if(top < nrow(p)){
148
+ if (top < nrow(p)) {
137
149
  cf <- colSums(p[1:(nrow(p)-top), ])
138
- }else{
150
+ } else {
139
151
  cf <- rep(0, ncol(p))
140
152
  }
141
- for(i in (nrow(p)-top+1):nrow(p)){
153
+ for (i in (nrow(p) - top + 1):nrow(p)) {
142
154
  f <- as.numeric(p[i, ])
143
155
  cf <- as.numeric(cf + f)
144
- if(nrow(p)-i < top){
145
- if(organic.trend){
156
+ if (nrow(p) - i < top){
157
+ if (organic.trend) {
146
158
  spc <- 0.5
147
- x <- c(mp[1]-spc)
148
- y1 <- c(cf[1]-f[1])
159
+ x <- c(mp[1] - spc)
160
+ y1 <- c(cf[1] - f[1])
149
161
  y2 <- c(cf[1])
150
- for(j in 2:ncol(p)){
151
- x <- c(x, seq(mp[j-1]+spc, mp[j]-spc, length.out=22))
152
- y1 <- c(y1, cf[j-1]-f[j-1],
153
- (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
154
- ((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
155
- y2 <- c(y2, cf[j-1],
156
- (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
157
- (cf[j]-cf[j-1])+(cf[j-1]), cf[j])
162
+ for (j in 2:ncol(p)) {
163
+ x <- c(x, seq(mp[j - 1] + spc, mp[j] - spc, length.out = 22))
164
+ y1 <- c(
165
+ y1, cf[j - 1] - f[j - 1],
166
+ (tanh(seq(-2.5, 2.5, length.out = 20)) / 2 + 0.5) *
167
+ ((cf[j] - f[j]) - (cf[j - 1] - f[j - 1])) +
168
+ (cf[j - 1] - f[j - 1]),
169
+ cf[j] - f[j]
170
+ )
171
+ y2 <- c(
172
+ y2, cf[j-1],
173
+ (tanh(seq(-2.5, 2.5, length.out = 20)) / 2 + 0.5) *
174
+ (cf[j] - cf[j - 1]) + (cf[j - 1]), cf[j]
175
+ )
158
176
  }
159
- x <- c(x, mp[length(mp)]+spc)
160
- y1 <- c(y1, cf[length(cf)]-f[length(f)])
177
+ x <- c(x, mp[length(mp)] + spc)
178
+ y1 <- c(y1, cf[length(cf)] - f[length(f)])
161
179
  y2 <- c(y2, cf[length(cf)])
162
- polygon(c(x, rev(x)), c(y1, rev(y2)), col=color.alpha[nrow(p)-i+1],
163
- border=NA)
164
- }else if(add.trend){
165
- x <- rep(mp, each=2)+c(-0.5,0.5)
166
- if(add.trend) polygon(c(x, rev(x)),
167
- c(rep(cf-f, each=2), rev(rep(cf, each=2))),
168
- col=color.alpha[nrow(p)-i+1], border=NA)
180
+ polygon(c(x, rev(x)), c(y1, rev(y2)),
181
+ col = color.alpha[nrow(p) - i + 1], border = NA)
182
+ } else if (add.trend) {
183
+ x <- rep(mp, each = 2) + c(-0.5, 0.5)
184
+ if(add.trend)
185
+ polygon(c(x, rev(x)),
186
+ c(rep(cf - f, each = 2), rev(rep(cf, each = 2))),
187
+ col = color.alpha[nrow(p) - i + 1], border = NA)
169
188
  }
170
- text(mp, cf-f/2, ifelse(f>min.report, signif(f, 3), ''), col='white')
189
+ text(mp, cf - f / 2, ifelse(f > min.report, signif(f, 3), ""),
190
+ col = "white")
171
191
  }
172
192
  }
173
193
  }
174
-
194
+
175
195
  # Legend
176
- par(mar=rep(0,4)+0.1)
177
- plot(1, t='n', bty='n', xlab='', ylab='', xaxt='n', yaxt='n')
178
- nam <- rownames(p[nrow(p):(nrow(p)-top+1), ])
179
- if(top < nrow(p)) nam <- c(nam,
180
- paste('Other (',nrow(p)-length(color.col),')', sep=''))
181
- legend('center', col=c(color.col, other.col), legend=nam, pch=15, bty='n',
182
- pt.cex=2, ncol=legend.ncol)
183
- par(mar=mar)
196
+ par(mar = rep(0, 4) + 0.1) # par(mar) already being watched by on.exit
197
+ plot(1, t = "n", bty = "n", xlab = "", ylab = "", xaxt = "n", yaxt = "n")
198
+ nam <- rownames(p[nrow(p):(nrow(p) - top + 1), ])
199
+ if(top < nrow(p))
200
+ nam <- c(nam, paste("Other (", nrow(p) - length(color.col), ")", sep = ""))
201
+ legend("center", col = c(color.col, other.col), legend = nam, pch = 15,
202
+ bty = "n", pt.cex = 2, ncol = legend.ncol)
184
203
  }
@@ -40,96 +40,102 @@ enve.cliopts <- function(
40
40
  rd_file,
41
41
  positional_arguments,
42
42
  usage,
43
- mandatory=c(),
44
- vectorize=c(),
45
- ignore=c(),
46
- number=c(),
47
- defaults=list(),
48
- o_desc=list(),
49
- p_desc=""
43
+ mandatory = c(),
44
+ vectorize = c(),
45
+ ignore = c(),
46
+ number = c(),
47
+ defaults = list(),
48
+ o_desc = list(),
49
+ p_desc = ""
50
50
  ){
51
-
52
- #= Load stuff
53
- if(!suppressPackageStartupMessages(
54
- requireNamespace("optparse", quietly=TRUE)))
51
+ # Load stuff
52
+ if (!suppressPackageStartupMessages(
53
+ requireNamespace("optparse", quietly = TRUE)))
55
54
  stop("Package 'optparse' is required.")
56
- requireNamespace("tools", quietly=TRUE)
57
- if(missing(positional_arguments)) positional_arguments <- FALSE
58
- if(missing(usage)) usage <- "usage: %prog [options]"
59
-
60
- #= Get help (if any)
61
- if(!missing(rd_file)){
55
+ requireNamespace("tools", quietly = TRUE)
56
+ if (missing(positional_arguments)) positional_arguments <- FALSE
57
+ if (missing(usage)) usage <- "usage: %prog [options]"
58
+
59
+ # Get help (if any)
60
+ if (!missing(rd_file)) {
62
61
  rd <- tools::parse_Rd(rd_file)
63
- for(i in 1:length(rd)){
64
- tag <- attr(rd[[i]],'Rd_tag')
65
- if(tag=="\\description" && p_desc==""){
66
- p_desc <- paste("\n\t",as.character(rd[[i]]),sep='')
67
- }else if(tag=="\\arguments"){
68
- for(j in 1:length(rd[[i]])){
69
- if(length(rd[[i]][[j]])==2){
62
+ for (i in 1:length(rd)) {
63
+ tag <- attr(rd[[i]], "Rd_tag")
64
+ if (tag == "\\description" && p_desc == "") {
65
+ p_desc <- paste("\n\t", as.character(rd[[i]]), sep = "")
66
+ } else if (tag == "\\arguments") {
67
+ for (j in 1:length(rd[[i]])) {
68
+ if (length(rd[[i]][[j]]) == 2) {
70
69
  name <- as.character(rd[[i]][[j]][[1]])
71
- if(length(o_desc[[name]])==1) next
70
+ if (length(o_desc[[name]]) == 1) next
72
71
  desc <- as.character(rd[[i]][[j]][[2]])
73
- o_desc[[name]] <- paste(gsub("\n","\n\t\t",desc), collapse='')
72
+ o_desc[[name]] <- paste(gsub("\n", "\n\t\t", desc), collapse = "")
74
73
  }
75
74
  }
76
75
  }
77
76
  }
78
77
  }
79
-
80
- #= Set options
78
+
79
+ # Set options
81
80
  o_i <- 0
82
81
  opts <- list()
83
82
  f <- formals(fx)
84
- if(length(defaults)>0){
85
- for(i in 1:length(defaults)) f[[names(defaults)[i]]] <- defaults[[i]]
83
+ if (length(defaults) > 0) {
84
+ for (i in 1:length(defaults)) f[[names(defaults)[i]]] <- defaults[[i]]
86
85
  }
87
- for(i in names(f)){
88
- if(i=="..." || i %in% ignore) next
86
+ for (i in names(f)) {
87
+ if (i == "..." || i %in% ignore) next
89
88
  o_i <- o_i + 1
90
- flag <- gsub("\\.","-",i)
91
-
92
- optopt <- list(help="")
93
- if(length(o_desc[[i]])==1) optopt$help <- o_desc[[i]]
94
- if(!is.null(f[[i]]) && !suppressWarnings(is.na(f[[i]])) && is.logical(f[[i]])){
95
- optopt$opt_str <- paste(ifelse(f[[i]], "--no-", "--"), flag, sep='')
89
+ flag <- gsub("\\.", "-", i)
90
+
91
+ optopt <- list(help = "")
92
+ if (length(o_desc[[i]]) == 1) optopt$help <- o_desc[[i]]
93
+ if (!is.null(f[[i]]) && !suppressWarnings(is.na(f[[i]])) &&
94
+ is.logical(f[[i]])){
95
+ optopt$opt_str <- paste(ifelse(f[[i]], "--no-", "--"), flag, sep = "")
96
96
  optopt$action <- ifelse(f[[i]], "store_false", "store_true")
97
- }else{
98
- optopt$opt_str <- paste("--", flag, sep='')
97
+ } else {
98
+ optopt$opt_str <- paste("--", flag, sep = "")
99
99
  optopt$action <- "store"
100
- optopt$help <- paste(optopt$help, "\n\t\t[",
101
- ifelse(i %in% mandatory, "** MANDATORY", "default %default"),
102
- ifelse(i %in% vectorize, ", separate values by commas", ""),
103
- "].", sep="")
100
+ optopt$help <- paste(
101
+ optopt$help, "\n\t\t[",
102
+ ifelse(i %in% mandatory, "** MANDATORY", "default %default"),
103
+ ifelse(i %in% vectorize, ", separate values by commas", ""),
104
+ "].",
105
+ sep = ""
106
+ )
104
107
  }
105
- if(!is.name(f[[i]])){
108
+ if (!is.name(f[[i]])) {
106
109
  optopt$default <- f[[i]]
107
110
  optopt$metavar <- class(f[[i]])
108
111
  }
109
- if(i %in% number) optopt$metavar <- "NUMERIC"
112
+ if (i %in% number) optopt$metavar <- "NUMERIC"
110
113
  optopt$dest <- i
111
-
114
+
112
115
  opts[[o_i]] <- do.call(optparse::make_option, optopt)
113
116
  }
114
117
  opt <- optparse::parse_args(
115
- optparse::OptionParser(option_list=opts, description=p_desc, usage=usage),
116
- positional_arguments=positional_arguments)
118
+ optparse::OptionParser(
119
+ option_list = opts, description = p_desc, usage = usage
120
+ ),
121
+ positional_arguments = positional_arguments
122
+ )
117
123
 
118
- #= Post-hoc checks
119
- if(length(opt[['options']])==0) opt <- list(options=opt, args=c())
120
- for(i in mandatory){
121
- if(length(opt$options[[i]])==0) stop('Missing mandatory argument: ',i)
124
+ # Post-hoc checks
125
+ if (length(opt[["options"]]) == 0) opt <- list(options = opt, args = c())
126
+ for (i in mandatory) {
127
+ if(length(opt$options[[i]]) == 0) stop("Missing mandatory argument: ", i)
122
128
  }
123
- for(i in vectorize){
124
- if(length(opt$options[[i]])==1)
125
- opt$options[[i]] <- strsplit(opt$options[[i]],",")[[1]]
129
+ for (i in vectorize) {
130
+ if (length(opt$options[[i]]) == 1)
131
+ opt$options[[i]] <- strsplit(opt$options[[i]], ",")[[1]]
126
132
  }
127
- for(i in number){
128
- if(length(opt$options[[i]])>0)
133
+ for (i in number) {
134
+ if (length(opt$options[[i]]) > 0)
129
135
  opt$options[[i]] <- as.numeric(opt$options[[i]])
130
136
  }
131
137
  opt$options$help <- NULL
132
-
138
+
133
139
  return(opt)
134
140
  }
135
141
 
@@ -20,27 +20,40 @@
20
20
  #' @return Returns a \strong{dist} object.
21
21
  #'
22
22
  #' @author Luis M. Rodriguez-R [aut, cre]
23
+ #'
24
+ #' @examples
25
+ #' # A sparse matrix representation of similarities as data frame.
26
+ #' # The column "extra_data" is meaningless, only included to illustrate
27
+ #' # the use of the obj*.index parameters
28
+ #' sim <- data.frame(
29
+ #' extra_data = c(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.5),
30
+ #' query = c("A", "A", "A", "B", "C", "C", "D"),
31
+ #' subject = c("A", "B", "C", "B", "C", "B", "A"),
32
+ #' similarity = c(100, 90, 60, 100, 100, 70, 10)
33
+ #' )
34
+ #' dist <- enve.df2dist(sim, "query", "subject", "similarity", max.sim = 100)
35
+ #' print(dist)
23
36
  #'
24
37
  #' @export
25
-
26
38
  enve.df2dist <- function(
27
39
  x,
28
40
  obj1.index = 1,
29
41
  obj2.index = 2,
30
42
  dist.index = 3,
31
- default.d = NA,
32
- max.sim = 0
33
- ){
43
+ default.d = NA,
44
+ max.sim = 0
45
+ ) {
34
46
  x <- as.data.frame(x)
35
47
  a <- as.character(x[, obj1.index])
36
48
  b <- as.character(x[, obj2.index])
37
49
  d <- as.double(x[, dist.index])
38
- if(max.sim != 0) d <- (max.sim - d) / max.sim
39
- ids <- unique(c(a,b))
40
- m <- matrix(default.d,
41
- nrow = length(ids), ncol = length(ids), dimnames = list(ids, ids))
50
+ if (max.sim != 0) d <- (max.sim - d) / max.sim
51
+ ids <- unique(c(a, b))
52
+ m <- matrix(
53
+ default.d, nrow = length(ids), ncol = length(ids), dimnames = list(ids, ids)
54
+ )
42
55
  diag(m) <- 0.0
43
- m[cbind(a,b)] <- d
56
+ m[cbind(a, b)] <- d
44
57
  m <- pmin(m, t(m), na.rm = TRUE)
45
58
  return(as.dist(m))
46
59
  }
@@ -55,7 +68,8 @@ enve.df2dist <- function(
55
68
  #' \enumerate{
56
69
  #' \item ID of the object 1,
57
70
  #' \item ID of the object 2, and
58
- #' \item distance between the two objects.}
71
+ #' \item distance between the two objects.
72
+ #' }
59
73
  #' @param obj1.index Index of the column containing the ID of the object 1.
60
74
  #' @param obj2.index Index of the column containing the ID of the object 2.
61
75
  #' @param dist.index Index of the column containing the distance.
@@ -67,41 +81,65 @@ enve.df2dist <- function(
67
81
  #'
68
82
  #' @author Luis M. Rodriguez-R [aut, cre]
69
83
  #'
84
+ #' @examples
85
+ #' # A sparse matrix representation of distances as data frame.
86
+ #' # Note that some pairs are repeated.
87
+ #' dist.df <- data.frame(
88
+ #' query = c("A", "A", "A", "B", "C", "C", "B", "B", "B"),
89
+ #' subject = c("A", "B", "C", "B", "C", "B", "A", "C", "C"),
90
+ #' distance = c( 0, 0.1, 0.4, 0, 0, 0.4, 0.2, 0.2, 0.1)
91
+ #' )
92
+ #' dist <- enve.df2dist.group(dist.df)
93
+ #' print(dist)
94
+ #'
95
+ #' # Use the mean of all repeated occurrences instead of the median.
96
+ #' dist <- enve.df2dist.group(dist.df, summary = mean)
97
+ #'
98
+ #' # Simply use the first occurrence for any given pair.
99
+ #' dist <- enve.df2dist.group(dist.df, summary = function(x) head(x, n = 1))
100
+ #'
70
101
  #' @export
71
-
72
102
  enve.df2dist.group <- function(
73
103
  x,
74
- obj1.index=1,
75
- obj2.index=2,
76
- dist.index=3,
77
- summary=median,
78
- empty.rm=TRUE
79
- ){
80
- x <- as.data.frame(x);
81
- if(empty.rm) x <- x[ !(is.na(x[,obj1.index]) | is.na(x[,obj2.index]) | x[,obj1.index]=='' | x[,obj2.index]==''), ]
82
- a <- as.character(x[, obj1.index]);
83
- b <- as.character(x[, obj2.index]);
84
- d <- as.double(x[, dist.index]);
85
- ids <- unique(c(a,b));
86
- if(length(ids)<2) return(NA);
87
- m <- matrix(NA, nrow=length(ids), ncol=length(ids), dimnames=list(ids, ids));
104
+ obj1.index = 1,
105
+ obj2.index = 2,
106
+ dist.index = 3,
107
+ summary = median,
108
+ empty.rm = TRUE
109
+ ) {
110
+ x <- as.data.frame(x)
111
+ if(empty.rm)
112
+ x <- x[
113
+ !(is.na(x[, obj1.index]) |
114
+ is.na(x[, obj2.index]) |
115
+ x[, obj1.index] == "" |
116
+ x[, obj2.index] == ""),
117
+ ]
118
+ a <- as.character(x[, obj1.index])
119
+ b <- as.character(x[, obj2.index])
120
+ d <- as.double(x[, dist.index])
121
+ ids <- unique(c(a, b))
122
+ if (length(ids) < 2) return(NA)
123
+ m <- matrix(
124
+ NA, nrow = length(ids), ncol = length(ids), dimnames = list(ids, ids)
125
+ )
88
126
  diag(m) <- 0
89
- for(i in 2:length(ids)){
90
- id.i <- ids[i];
91
- for(j in 1:(i-1)){
92
- id.j <- ids[j];
93
- d.ij <- summary(c( d[ a==id.i & b==id.j], d[ b==id.i & a==id.j] ));
94
- m[id.i, id.j] <- d.ij;
95
- m[id.j, id.i] <- d.ij;
127
+ for (i in 2:length(ids)) {
128
+ id.i <- ids[i]
129
+ for (j in 1:(i - 1)) {
130
+ id.j <- ids[j]
131
+ d.ij <- summary(c(d[a == id.i & b == id.j], d[b == id.i & a == id.j]))
132
+ m[id.i, id.j] <- d.ij
133
+ m[id.j, id.i] <- d.ij
96
134
  }
97
135
  }
98
- return(as.dist(m));
136
+ return(as.dist(m))
99
137
  }
100
138
 
101
139
  #' Enveomics: Data Frame to Dist (List)
102
140
  #'
103
141
  #' Transform a dataframe (or coercible object, like a table)
104
- #' into a \strong{dist} object.
142
+ #' into a \strong{list} of \strong{dist} objects, one per group.
105
143
  #'
106
144
  #' @param x A dataframe (or coercible object) with at least three columns:
107
145
  #' \enumerate{
@@ -114,41 +152,41 @@ enve.df2dist.group <- function(
114
152
  #' @param obj2.index Index of the column containing the ID of the object 2.
115
153
  #' @param dist.index Index of the column containing the distance.
116
154
  #' @param empty.rm Remove incomplete matrices.
117
- #' @param ... Any other parameters supported by
118
- #' \code{\link{enve.df2dist.group}}.
155
+ #' @param ... Any other parameters supported by \code{\link{enve.df2dist}}.
119
156
  #'
120
157
  #' @return Returns a \strong{list} of \strong{dist} objects.
121
158
  #'
122
159
  #' @author Luis M. Rodriguez-R [aut, cre]
123
- #'
160
+ #'
124
161
  #' @export
125
162
 
126
163
  enve.df2dist.list <- function(
127
164
  x,
128
165
  groups,
129
- obj1.index=1,
130
- obj2.index=2,
131
- dist.index=3,
132
- empty.rm=TRUE,
166
+ obj1.index = 1,
167
+ obj2.index = 2,
168
+ dist.index = 3,
169
+ empty.rm = TRUE,
133
170
  ...
134
- ){
135
- x <- as.data.frame(x);
136
- a <- as.character(x[, obj1.index]);
137
- b <- as.character(x[, obj2.index]);
138
- d <- as.numeric(x[, dist.index]);
139
- ids.all <- unique(c(a,b));
140
- l <- list();
141
- same_group <- groups[a]==groups[b];
142
- same_group <- ifelse(is.na(same_group), FALSE, TRUE);
143
- for(group in unique(groups)){
144
- ids <- ids.all[ groups[ids.all]==group ];
145
- if(length(ids)>1 & group!=""){
146
- x.sub <- x[ same_group & (groups[a]==group) & (groups[b]==group), ]
147
- if(nrow(x.sub)>0){
148
- d.g <- enve.df2dist(x.sub, obj1.index, obj2.index, dist.index, ...);
149
- if(!empty.rm | !any(is.na(d.g))) l[[ group ]] <- d.g;
171
+ ) {
172
+ x <- as.data.frame(x)
173
+ a <- as.character(x[, obj1.index])
174
+ b <- as.character(x[, obj2.index])
175
+ d <- as.numeric(x[, dist.index])
176
+ ids.all <- unique(c(a, b))
177
+ l <- list()
178
+ same_group <- groups[a] == groups[b]
179
+ same_group <- ifelse(is.na(same_group), FALSE, TRUE)
180
+ for (group in unique(groups)) {
181
+ ids <- ids.all[groups[ids.all] == group]
182
+ if (length(ids) > 1 & group != "") {
183
+ x.sub <- x[same_group & (groups[a] == group) & (groups[b] == group), ]
184
+ if (nrow(x.sub) > 0) {
185
+ d.g <- enve.df2dist(x.sub, obj1.index, obj2.index, dist.index, ...)
186
+ if(!empty.rm | !any(is.na(d.g))) l[[group]] <- d.g
150
187
  }
151
188
  }
152
189
  }
153
- return(l);
190
+ return(l)
154
191
  }
192
+