miga-base 0.4.3.0 → 0.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -24,400 +24,560 @@
24
24
  # ...
25
25
 
26
26
 
27
+
27
28
  #==============> Define S4 classes
28
- setClass("enve.TRIBS",
29
- ### Enve-omics representation of "Transformed-space Resampling In Biased Sets
30
- ### (TRIBS)". This object represents sets of distances between objects,
31
- ### sampled nearly-uniformly at random in "distance space". Subsampling
32
- ### without selection is trivial, since both the distances space and the
33
- ### selection occur in the same transformed space. However, it's useful to
34
- ### compare randomly subsampled sets against a selected set of objects. This
35
- ### is intended to identify overdispersion or overclustering (see
36
- ### `enve.TRIBStest`) of a subset against the entire collection of objects
37
- ### with minimum impact of sampling biases. This object can be produced by
38
- ### `enve.tribs` and supports S4 methods `plot` and `summary`.
39
- representation(
40
- distance='numeric', ##<< Centrality measurement of the distances between the
41
- ##<< selected objects (without subsampling).
42
- points='matrix', ##<< Position of the different objects in distance
43
- ##<< space.
44
- distances='matrix', ##<< Subsampled distances, where the rows are replicates
45
- ##<< and the columns are subsampling levels.
46
- spaceSize='numeric', ##<< Number of objects.
47
- selSize='numeric', ##<< Number of selected objects.
48
- dimensions='numeric',##<< Number of dimensions in the distance space.
49
- subsamples='numeric',##<< Subsampling levels (as fractions, from 0 to 1).
50
- call='call') ##<< Call producing this object.
51
- ,package='enveomics.R'
52
- );
53
- setClass("enve.TRIBStest",
54
- ### Test of significance of overclustering or overdispersion in a selected
55
- ### set of objects with respect to the entire set (see `enve.TRIBS`). This
56
- ### object can be produced by `enve.tribs.test` and supports S4 methods
57
- ### `plot` and `summary`.
58
- representation(
59
- pval.gt='numeric',
60
- ### P-value for the overdispersion test.
61
- pval.lt='numeric',
62
- ### P-value for the overclustering test.
63
- all.dist='numeric',
64
- ### Empiric PDF of distances for the entire dataset (subsampled at selection
65
- ### size).
66
- sel.dist='numeric',
67
- ### Empiric PDF of distances for the selected objects (without subsampling).
68
- diff.dist='numeric',
69
- ### Empiric PDF of the difference between `all.dist` and `sel.dist`. The
70
- ### p-values are estimating by comparing areas in this PDF greater than and
71
- ### lesser than zero.
72
- dist.mids='numeric',
73
- ### Midpoints of the empiric PDFs of distances.
74
- diff.mids='numeric',
75
- ### Midpoints of the empiric PDF of difference of distances.
76
- call='call')
77
- ### Call producing this object.
78
- ,package='enveomics.R'
79
- );
29
+
30
+ #' Enveomics: TRIBS S4 Class
31
+ #'
32
+ #' Enve-omics representation of "Transformed-space Resampling In Biased Sets
33
+ #' (TRIBS)". This object represents sets of distances between objects,
34
+ #' sampled nearly-uniformly at random in "distance space". Subsampling
35
+ #' without selection is trivial, since both the distances space and the
36
+ #' selection occur in the same transformed space. However, it's useful to
37
+ #' compare randomly subsampled sets against a selected set of objects. This
38
+ #' is intended to identify overdispersion or overclustering (see
39
+ #' \code{\link{enve.TRIBStest}}) of a subset against the entire collection of objects
40
+ #' with minimum impact of sampling biases. This object can be produced by
41
+ #' \code{\link{enve.tribs}} and supports S4 methods \code{plot} and \code{summary}.
42
+ #'
43
+ #' @slot distance \code{(numeric)} Centrality measurement of the distances
44
+ #' between the selected objects (without subsampling).
45
+ #' @slot points \code{(matrix)} Position of the different objects in distance
46
+ #' space.
47
+ #' @slot distances \code{(matrix)} Subsampled distances, where the rows are
48
+ #' replicates and the columns are subsampling levels.
49
+ #' @slot spaceSize \code{(numeric)} Number of objects.
50
+ #' @slot selSize \code{(numeric)} Number of selected objects.
51
+ #' @slot dimensions \code{(numeric)} Number of dimensions in the distance space.
52
+ #' @slot subsamples \code{(numeric)} Subsampling levels (as fractions, from
53
+ #' 0 to 1).
54
+ #' @slot call \code{(call)} Call producing this object.
55
+ #'
56
+ #' @author Luis M. Rodriguez-R [aut, cre]
57
+ #'
58
+ #' @exportClass
59
+
60
+ enve.TRIBS <- setClass("enve.TRIBS",
61
+ representation(
62
+ distance='numeric',
63
+ points='matrix',
64
+ distances='matrix',
65
+ spaceSize='numeric',
66
+ selSize='numeric',
67
+ dimensions='numeric',
68
+ subsamples='numeric',
69
+ call='call')
70
+ ,package='enveomics.R'
71
+ );
72
+
73
+ #' Enveomics: TRIBS Test S4 Class
74
+ #'
75
+ #' Test of significance of overclustering or overdispersion in a selected
76
+ #' set of objects with respect to the entire set (see \code{\link{enve.TRIBS}}). This
77
+ #' object can be produced by \code{\link{enve.tribs.test}} and supports S4 methods
78
+ #' \code{plot} and \code{summary}.
79
+ #'
80
+ #' @slot pval.gt \code{(numeric)}
81
+ #' P-value for the overdispersion test.
82
+ #' @slot pval.lt \code{(numeric)}
83
+ #' P-value for the overclustering test.
84
+ #' @slot all.dist \code{(numeric)}
85
+ #' Empiric PDF of distances for the entire dataset (subsampled at selection
86
+ #' size).
87
+ #' @slot sel.dist \code{(numeric)}
88
+ #' Empiric PDF of distances for the selected objects (without subsampling).
89
+ #' @slot diff.dist \code{(numeric)}
90
+ #' Empiric PDF of the difference between \code{all.dist} and \code{sel.dist}.
91
+ #' The p-values are estimating by comparing areas in this PDF greater than and
92
+ #' lesser than zero.
93
+ #' @slot dist.mids \code{(numeric)}
94
+ #' Midpoints of the empiric PDFs of distances.
95
+ #' @slot diff.mids \code{(numeric)}
96
+ #' Midpoints of the empiric PDF of difference of distances.
97
+ #' @slot call \code{(call)}
98
+ #' Call producing this object.
99
+ #'
100
+ #' @author Luis M. Rodriguez-R [aut, cre]
101
+ #'
102
+ #' @exportClass
103
+
104
+ enve.TRIBStest <- setClass("enve.TRIBStest",
105
+ representation(
106
+ pval.gt='numeric',
107
+ pval.lt='numeric',
108
+ all.dist='numeric',
109
+ sel.dist='numeric',
110
+ diff.dist='numeric',
111
+ dist.mids='numeric',
112
+ diff.mids='numeric',
113
+ call='call')
114
+ ,package='enveomics.R'
115
+ );
80
116
 
81
117
  #==============> Define S4 methods
118
+
119
+ #' Enveomics: TRIBS Summary
120
+ #'
121
+ #' Summary of an \code{\link{enve.TRIBS}} object.
122
+ #'
123
+ #' @param object
124
+ #' \code{\link{enve.TRIBS}} object.
125
+ #' @param ...
126
+ #' No additional parameters are currently supported.
127
+ #'
128
+ #' @author Luis M. Rodriguez-R [aut, cre]
129
+ #'
130
+ #' @method summary enve.TRIBS
131
+ #' @export
132
+
82
133
  summary.enve.TRIBS <- function
83
- ### Summary of an `enve.TRIBS` object.
84
- (object,
85
- ### `enve.TRIBS` object.
86
- ...
87
- ### No additional parameters are currently supported.
88
- ){
89
- cat('===[ enve.TRIBS ]-------------------------\n');
90
- cat('Selected',attr(object,'selSize'),'of',
134
+ (object,
135
+ ...
136
+ ){
137
+ cat('===[ enve.TRIBS ]-------------------------\n');
138
+ cat('Selected',attr(object,'selSize'),'of',
91
139
  attr(object,'spaceSize'),'objects in',
92
140
  attr(object,'dimensions'),'dimensions.\n');
93
- cat('Collected',length(attr(object,'subsamples')),'subsamples with',
141
+ cat('Collected',length(attr(object,'subsamples')),'subsamples with',
94
142
  nrow(attr(object,'distances')),'replicates each.\n');
95
- cat('------------------------------------------\n');
96
- cat('call:',as.character(attr(object,'call')),'\n');
97
- cat('------------------------------------------\n');
143
+ cat('------------------------------------------\n');
144
+ cat('call:',as.character(attr(object,'call')),'\n');
145
+ cat('------------------------------------------\n');
98
146
  }
99
147
 
148
+ #' Enveomics: TRIBS Plot
149
+ #'
150
+ #' Plot an \code{\link{enve.TRIBS}} object.
151
+ #'
152
+ #' @param x
153
+ #' \code{\link{enve.TRIBS}} object to plot.
154
+ #' @param new
155
+ #' Should a new canvas be drawn?
156
+ #' @param type
157
+ #' Type of plot. The \strong{points} plot shows all the replicates, the
158
+ #' \strong{boxplot} plot represents the values found by
159
+ #' \code{\link[grDevices]{boxplot.stats}}.
160
+ #' as areas, and plots the outliers as points.
161
+ #' @param col
162
+ #' Color of the areas and/or the points.
163
+ #' @param pt.cex
164
+ #' Size of the points.
165
+ #' @param pt.pch
166
+ #' Points character.
167
+ #' @param pt.col
168
+ #' Color of the points.
169
+ #' @param ln.col
170
+ #' Color of the lines.
171
+ #' @param ...
172
+ #' Any additional parameters supported by \code{plot}.
173
+ #'
174
+ #' @author Luis M. Rodriguez-R [aut, cre]
175
+ #'
176
+ #' @method plot enve.TRIBS
177
+ #' @export
178
+
100
179
  plot.enve.TRIBS <- function
101
- ### Plot an `enve.TRIBS` object.
102
- (x,
103
- ### `enve.TRIBS` object to plot.
104
- new=TRUE,
105
- ### Should a new canvas be drawn?
106
- type=c('boxplot', 'points'),
107
- ### Type of plot. The 'points' plot shows all the replicates, the 'boxplot'
108
- ### plot represents the values found by `boxplot.stats` as areas, and plots
109
- ### the outliers as points.
110
- col='#00000044',
111
- ### Color of the areas and/or the points.
112
- pt.cex=1/2,
113
- ### Size of the points.
114
- pt.pch=19,
115
- ### Points character.
116
- pt.col=col,
117
- ### Color of the points.
118
- ln.col=col,
119
- ### Color of the lines.
120
- ...
121
- ### Any additional parameters supported by `plot`.
122
- ){
123
- type <- match.arg(type);
124
- plot.opts <- list(xlim=range(attr(x,'subsamples'))*attr(x,'selSize'),
125
- ylim=range(attr(x,'distances')), ..., t='n', x=1);
126
- if(new) do.call(plot, plot.opts);
127
- abline(h=attr(x,'distance'), lty=3, col=ln.col);
128
- replicates <- nrow(attr(x,'distances'));
129
- if(type=='points'){
130
- for(i in 1:ncol(attr(x,'distances')))
131
- points(rep(round(attr(x,'subsamples')[i]*attr(x,'selSize')),
132
- replicates), attr(x,'distances')[,i], cex=pt.cex, pch=pt.pch,
133
- col=pt.col);
134
- }else{
135
- stats <- matrix(NA, nrow=7, ncol=ncol(attr(x,'distances')));
136
- for(i in 1:ncol(attr(x,'distances'))){
137
- b <- boxplot.stats(attr(x,'distances')[,i]);
138
- points(rep(round(attr(x,'subsamples')[i]*attr(x,'selSize')),
139
- length(b$out)), b$out, cex=pt.cex, pch=pt.pch, col=pt.col);
140
- stats[, i] <- c(b$conf, b$stats[c(1,5,2,4,3)]);
141
- }
142
- x <- round(attr(x,'subsamples')*attr(x,'selSize'))
143
- for(i in c(1,3,5))
144
- polygon(c(x, rev(x)), c(stats[i,], rev(stats[i+1,])), border=NA,
145
- col=col);
146
- lines(x, stats[7,], col=ln.col, lwd=2);
147
- }
180
+ (x,
181
+ new=TRUE,
182
+ type=c('boxplot', 'points'),
183
+ col='#00000044',
184
+ pt.cex=1/2,
185
+ pt.pch=19,
186
+ pt.col=col,
187
+ ln.col=col,
188
+ ...
189
+ ){
190
+ type <- match.arg(type);
191
+ plot.opts <- list(xlim=range(attr(x,'subsamples'))*attr(x,'selSize'),
192
+ ylim=range(attr(x,'distances')), ..., t='n', x=1);
193
+ if(new) do.call(plot, plot.opts);
194
+ abline(h=attr(x,'distance'), lty=3, col=ln.col);
195
+ replicates <- nrow(attr(x,'distances'));
196
+ if(type=='points'){
197
+ for(i in 1:ncol(attr(x,'distances')))
198
+ points(rep(round(attr(x,'subsamples')[i]*attr(x,'selSize')),
199
+ replicates), attr(x,'distances')[,i], cex=pt.cex, pch=pt.pch,
200
+ col=pt.col);
201
+ }else{
202
+ stats <- matrix(NA, nrow=7, ncol=ncol(attr(x,'distances')));
203
+ for(i in 1:ncol(attr(x,'distances'))){
204
+ b <- boxplot.stats(attr(x,'distances')[,i]);
205
+ points(rep(round(attr(x,'subsamples')[i]*attr(x,'selSize')),
206
+ length(b$out)), b$out, cex=pt.cex, pch=pt.pch, col=pt.col);
207
+ stats[, i] <- c(b$conf, b$stats[c(1,5,2,4,3)]);
208
+ }
209
+ x <- round(attr(x,'subsamples')*attr(x,'selSize'))
210
+ for(i in c(1,3,5))
211
+ polygon(c(x, rev(x)), c(stats[i,], rev(stats[i+1,])), border=NA,
212
+ col=col);
213
+ lines(x, stats[7,], col=ln.col, lwd=2);
214
+ }
148
215
  }
149
216
 
217
+ #' Enveomics: TRIBS Summary Test
218
+ #'
219
+ #' Summary of an \code{\link{enve.TRIBStest}} object.
220
+ #'
221
+ #' @param object
222
+ #' \code{\link{enve.TRIBStest}} object.
223
+ #' @param ...
224
+ #' No additional parameters are currently supported.
225
+ #'
226
+ #' @author Luis M. Rodriguez-R [aut, cre]
227
+ #'
228
+ #' @method summary enve.TRIBStest
229
+ #' @export
230
+
150
231
  summary.enve.TRIBStest <- function
151
- ### Summary of an `enve.TRIBStest` object.
152
- (object,
153
- ### `enve.TRIBStest` object.
154
- ...
155
- ### No additional parameters are currently supported.
156
- ){
157
- cat('===[ enve.TRIBStest ]---------------------\n');
158
- cat('Alternative hypothesis:\n');
159
- cat(' The distances in the selection are\n');
160
- if(attr(object, 'pval.gt') > attr(object, 'pval.lt')){
161
- cat(' smaller than in the entire dataset\n (overclustering)\n');
162
- }else{
163
- cat(' larger than in the entire dataset\n (overdispersion)\n');
164
- }
165
- p.val <- min(attr(object, 'pval.gt'), attr(object, 'pval.lt'));
166
- if(p.val==0){
167
- diff.dist <- attr(object, 'diff.dist');
168
- p.val.lim <- min(diff.dist[diff.dist>0]);
169
- cat('\n P-value <= ', signif(p.val.lim, 4), sep='');
170
- }else{
171
- p.val.lim <- p.val;
172
- cat('\n P-value: ', signif(p.val, 4), sep='');
173
- }
174
- cat(' ', ifelse(p.val.lim<=0.01, "**", ifelse(p.val.lim<=0.05, "*", "")),
232
+ (object,
233
+ ...
234
+ ){
235
+ cat('===[ enve.TRIBStest ]---------------------\n');
236
+ cat('Alternative hypothesis:\n');
237
+ cat(' The distances in the selection are\n');
238
+ if(attr(object, 'pval.gt') > attr(object, 'pval.lt')){
239
+ cat(' smaller than in the entire dataset\n (overclustering)\n');
240
+ }else{
241
+ cat(' larger than in the entire dataset\n (overdispersion)\n');
242
+ }
243
+ p.val <- min(attr(object, 'pval.gt'), attr(object, 'pval.lt'));
244
+ if(p.val==0){
245
+ diff.dist <- attr(object, 'diff.dist');
246
+ p.val.lim <- min(diff.dist[diff.dist>0]);
247
+ cat('\n P-value <= ', signif(p.val.lim, 4), sep='');
248
+ }else{
249
+ p.val.lim <- p.val;
250
+ cat('\n P-value: ', signif(p.val, 4), sep='');
251
+ }
252
+ cat(' ', ifelse(p.val.lim<=0.01, "**", ifelse(p.val.lim<=0.05, "*", "")),
175
253
  '\n', sep='');
176
- cat('------------------------------------------\n');
177
- cat('call:',as.character(attr(object,'call')),'\n');
178
- cat('------------------------------------------\n');
254
+ cat('------------------------------------------\n');
255
+ cat('call:',as.character(attr(object,'call')),'\n');
256
+ cat('------------------------------------------\n');
179
257
  }
180
258
 
259
+ #' Enveomics: TRIBS Plot Test
260
+ #'
261
+ #' Plots an \code{\link{enve.TRIBStest}} object.
262
+ #'
263
+ #' @param x
264
+ #' \code{\link{enve.TRIBStest}} object to plot.
265
+ #' @param type
266
+ #' What to plot. \code{overlap} generates a plot of the two contrasting empirical
267
+ #' PDFs (to compare against each other), \code{difference} produces a plot of the
268
+ #' differences between the empirical PDFs (to compare against zero).
269
+ #' @param col
270
+ #' Main color of the plot if type=\code{difference}.
271
+ #' @param col1
272
+ #' First color of the plot if type=\code{overlap}.
273
+ #' @param col2
274
+ #' Second color of the plot if type=\code{overlap}.
275
+ #' @param ylab
276
+ #' Y-axis label.
277
+ #' @param xlim
278
+ #' X-axis limits.
279
+ #' @param ylim
280
+ #' Y-axis limits.
281
+ #' @param ...
282
+ #' Any other graphical arguments.
283
+ #'
284
+ #' @author Luis M. Rodriguez-R [aut, cre]
285
+ #'
286
+ #' @method plot enve.TRIBStest
287
+ #' @export
288
+
181
289
  plot.enve.TRIBStest <- function
182
- ### Plots an `enve.TRIBStest` object.
183
- (x,
184
- ### `enve.TRIBStest` object to plot.
185
- type=c('overlap', 'difference'),
186
- ### What to plot. 'overlap' generates a plot of the two contrasting empirical
187
- ### PDFs (to compare against each other), 'difference' produces a plot of the
188
- ### differences between the empirical PDFs (to compare against zero).
189
- col='#00000044',
190
- ### Main color of the plot if type='difference'.
191
- col1=col,
192
- ### First color of the plot if type='overlap'.
193
- col2='#44001144',
194
- ### Second color of the plot if type='overlap'.
195
- ylab='Probability',
196
- ### Y-axis label.
197
- xlim=range(attr(x, 'dist.mids')),
198
- ### X-axis limits.
199
- ylim=c(0,max(c(attr(x, 'all.dist'), attr(x, 'sel.dist')))),
200
- ### Y-axis limits.
201
- ...
202
- ### Any other graphical arguments.
203
- ){
204
- type <- match.arg(type);
205
- if(type=='overlap'){
206
- plot.opts <- list(xlim=xlim, ylim=ylim, ylab=ylab, ..., t='n', x=1);
207
- do.call(plot, plot.opts);
208
- bins <- length(attr(x, 'dist.mids'))
209
- polygon(attr(x, 'dist.mids')[c(1, 1:bins, bins)],
210
- c(0,attr(x, 'all.dist'),0), col=col1,
211
- border=do.call(rgb, as.list(c(col2rgb(col1)/256, 0.5))));
212
- polygon(attr(x, 'dist.mids')[c(1, 1:bins, bins)],
213
- c(0,attr(x, 'sel.dist'),0), col=col2,
214
- border=do.call(rgb, as.list(c(col2rgb(col2)/256, 0.5))));
215
- }else{
216
- plot.opts <- list(xlim=range(attr(x, 'diff.mids')),
217
- ylim=c(0,max(attr(x, 'diff.dist'))), ylab=ylab, ..., t='n', x=1);
218
- do.call(plot, plot.opts);
219
- bins <- length(attr(x, 'diff.mids'));
220
- polygon(attr(x, 'diff.mids')[c(1, 1:bins, bins)],
221
- c(0,attr(x, 'diff.dist'),0), col=col,
222
- border=do.call(rgb, as.list(c(col2rgb(col)/256, 0.5))));
223
- }
290
+ (x,
291
+ type=c('overlap', 'difference'),
292
+ col='#00000044',
293
+ col1=col,
294
+ col2='#44001144',
295
+ ylab='Probability',
296
+ xlim=range(attr(x, 'dist.mids')),
297
+ ylim=c(0,max(c(attr(x, 'all.dist'), attr(x, 'sel.dist')))),
298
+ ...
299
+ ){
300
+ type <- match.arg(type);
301
+ if(type=='overlap'){
302
+ plot.opts <- list(xlim=xlim, ylim=ylim, ylab=ylab, ..., t='n', x=1);
303
+ do.call(plot, plot.opts);
304
+ bins <- length(attr(x, 'dist.mids'))
305
+ polygon(attr(x, 'dist.mids')[c(1, 1:bins, bins)],
306
+ c(0,attr(x, 'all.dist'),0), col=col1,
307
+ border=do.call(rgb, as.list(c(col2rgb(col1)/256, 0.5))));
308
+ polygon(attr(x, 'dist.mids')[c(1, 1:bins, bins)],
309
+ c(0,attr(x, 'sel.dist'),0), col=col2,
310
+ border=do.call(rgb, as.list(c(col2rgb(col2)/256, 0.5))));
311
+ }else{
312
+ plot.opts <- list(xlim=range(attr(x, 'diff.mids')),
313
+ ylim=c(0,max(attr(x, 'diff.dist'))), ylab=ylab, ..., t='n', x=1);
314
+ do.call(plot, plot.opts);
315
+ bins <- length(attr(x, 'diff.mids'));
316
+ polygon(attr(x, 'diff.mids')[c(1, 1:bins, bins)],
317
+ c(0,attr(x, 'diff.dist'),0), col=col,
318
+ border=do.call(rgb, as.list(c(col2rgb(col)/256, 0.5))));
319
+ }
224
320
  }
225
321
 
322
+ #' Enveomics: TRIBS Merge
323
+ #'
324
+ #' Merges two \code{\link{enve.TRIBS}} objects generated from the same objects at
325
+ #' different subsampling levels.
326
+ #'
327
+ #' @param x
328
+ #' First \code{\link{enve.TRIBS}} object.
329
+ #' @param y
330
+ #' Second \code{\link{enve.TRIBS}} object.
331
+ #'
332
+ #' @return Returns an \code{\link{enve.TRIBS}} object.
333
+ #'
334
+ #' @author Luis M. Rodriguez-R [aut, cre]
335
+ #'
336
+ #' @export
337
+
226
338
  enve.TRIBS.merge <- function
227
- ### Merges two `enve.TRIBS` objects generated from the same objects at
228
- ### different subsampling levels.
229
- (x,
230
- ### First `enve.TRIBS` object.
231
- y
232
- ### Second `enve.TRIBS` object.
233
- ){
234
- # Check consistency
235
- if(attr(x,'distance') != attr(y,'distance'))
236
- stop('Total distances in objects are different.');
237
- if(any(attr(x,'points') != attr(y,'points')))
238
- stop('Points in objects are different.');
239
- if(attr(x,'spaceSize') != attr(y,'spaceSize'))
240
- stop('Space size in objects are different.');
241
- if(attr(x,'selSize') != attr(y,'selSize'))
242
- stop('Selection size in objects are different.');
243
- if(attr(x,'dimensions') != attr(y,'dimensions'))
244
- stop('Dimensions in objects are different.');
245
- if(nrow(attr(x,'distances')) != nrow(attr(y,'distances')))
246
- stop('Replicates in objects are different.');
247
- # Merge
248
- a <- attr(x,'subsamples');
249
- b <- attr(y,'subsamples');
250
- o <- order(c(a,b));
251
- o <- o[!duplicated(c(a,b)[o])] ;
252
- d <- cbind(attr(x,'distances'), attr(y,'distances'))[, o] ;
253
- z <- new('enve.TRIBS',
254
- distance=attr(x,'distance'), points=attr(x,'points'),
255
- distances=d, spaceSize=attr(x,'spaceSize'),
256
- selSize=attr(x,'selSize'), dimensions=attr(x,'dimensions'),
257
- subsamples=c(a,b)[o], call=match.call());
258
- return(z) ;
259
- ### Returns an `enve.TRIBS` object.
339
+ (x,
340
+ y
341
+ ){
342
+ # Check consistency
343
+ if(attr(x,'distance') != attr(y,'distance'))
344
+ stop('Total distances in objects are different.');
345
+ if(any(attr(x,'points') != attr(y,'points')))
346
+ stop('Points in objects are different.');
347
+ if(attr(x,'spaceSize') != attr(y,'spaceSize'))
348
+ stop('Space size in objects are different.');
349
+ if(attr(x,'selSize') != attr(y,'selSize'))
350
+ stop('Selection size in objects are different.');
351
+ if(attr(x,'dimensions') != attr(y,'dimensions'))
352
+ stop('Dimensions in objects are different.');
353
+ if(nrow(attr(x,'distances')) != nrow(attr(y,'distances')))
354
+ stop('Replicates in objects are different.');
355
+ # Merge
356
+ a <- attr(x,'subsamples');
357
+ b <- attr(y,'subsamples');
358
+ o <- order(c(a,b));
359
+ o <- o[!duplicated(c(a,b)[o])] ;
360
+ d <- cbind(attr(x,'distances'), attr(y,'distances'))[, o] ;
361
+ z <- new('enve.TRIBS',
362
+ distance=attr(x,'distance'), points=attr(x,'points'),
363
+ distances=d, spaceSize=attr(x,'spaceSize'),
364
+ selSize=attr(x,'selSize'), dimensions=attr(x,'dimensions'),
365
+ subsamples=c(a,b)[o], call=match.call());
366
+ return(z) ;
260
367
  }
261
368
 
262
369
  #==============> Define core functions
370
+
371
+ #' Enveomics: TRIBS Test
372
+ #'
373
+ #' Estimates the empirical difference between all the distances in a set of
374
+ #' objects and a subset, together with its statistical significance.
375
+ #'
376
+ #' @param dist
377
+ #' Distances as \code{dist} object.
378
+ #' @param selection
379
+ #' Selection defining the subset.
380
+ #' @param bins
381
+ #' Number of bins to evaluate in the range of distances.
382
+ #' @param ...
383
+ #' Any other parameters supported by \code{\link{enve.tribs}},
384
+ #' except \code{subsamples}.
385
+ #'
386
+ #' @return Returns an \code{\link{enve.TRIBStest}} object.
387
+ #'
388
+ #' @author Luis M. Rodriguez-R [aut, cre]
389
+ #'
390
+ #' @export
391
+
263
392
  enve.tribs.test <- function
264
- ### Estimates the empirical difference between all the distances in a set of
265
- ### objects and a subset, together with its statistical significance.
266
- (dist,
267
- ### Distances as `dist` object.
268
- selection,
269
- ### Selection defining the subset.
270
- bins=50,
271
- ### Number of bins to evaluate in the range of distances.
272
- ...
273
- ### Any other parameters supported by `enve.tribs`, except `subsamples`.
274
- ){
275
- s.tribs <- enve.tribs(dist, selection, subsamples=c(0,1), ...);
276
- a.tribs <- enve.tribs(dist,
277
- subsamples=c(0,attr(s.tribs, 'selSize')/attr(s.tribs, 'spaceSize')), ...);
278
- s.dist <- attr(s.tribs, 'distances')[, 2];
279
- a.dist <- attr(a.tribs, 'distances')[, 2];
280
- range <- range(c(s.dist, a.dist));
281
- a.f <- hist(a.dist, breaks=seq(range[1], range[2], length.out=bins),
282
- plot=FALSE);
283
- s.f <- hist(s.dist, breaks=seq(range[1], range[2], length.out=bins),
284
- plot=FALSE);
285
- zp.f <- c(); zz.f <- 0; zn.f <- c();
286
- p.x <- a.f$counts/sum(a.f$counts);
287
- p.y <- s.f$counts/sum(s.f$counts);
288
- for(z in 1:length(a.f$mids)){
289
- zn.f[z] <- 0;
290
- zz.f <- 0;
291
- zp.f[z] <- 0;
292
- for(k in 1:length(a.f$mids)){
293
- if(z < k){
294
- zp.f[z] <- zp.f[z] + p.x[k]*p.y[k-z];
295
- zn.f[z] <- zn.f[z] + p.x[k-z]*p.y[k];
296
- }
297
- zz.f <- zz.f + p.x[k]*p.y[k];
393
+ (dist,
394
+ selection,
395
+ bins=50,
396
+ ...
397
+ ){
398
+ s.tribs <- enve.tribs(dist, selection, subsamples=c(0,1), ...);
399
+ a.tribs <- enve.tribs(dist,
400
+ subsamples=c(0,attr(s.tribs, 'selSize')/attr(s.tribs, 'spaceSize')), ...);
401
+ s.dist <- attr(s.tribs, 'distances')[, 2];
402
+ a.dist <- attr(a.tribs, 'distances')[, 2];
403
+ range <- range(c(s.dist, a.dist));
404
+ a.f <- hist(a.dist, breaks=seq(range[1], range[2], length.out=bins),
405
+ plot=FALSE);
406
+ s.f <- hist(s.dist, breaks=seq(range[1], range[2], length.out=bins),
407
+ plot=FALSE);
408
+ zp.f <- c(); zz.f <- 0; zn.f <- c();
409
+ p.x <- a.f$counts/sum(a.f$counts);
410
+ p.y <- s.f$counts/sum(s.f$counts);
411
+ for(z in 1:length(a.f$mids)){
412
+ zn.f[z] <- 0;
413
+ zz.f <- 0;
414
+ zp.f[z] <- 0;
415
+ for(k in 1:length(a.f$mids)){
416
+ if(z < k){
417
+ zp.f[z] <- zp.f[z] + p.x[k]*p.y[k-z];
418
+ zn.f[z] <- zn.f[z] + p.x[k-z]*p.y[k];
298
419
  }
299
- }
300
- return(new('enve.TRIBStest',
301
- pval.gt=sum(c(zz.f, zp.f)), pval.lt=sum(c(zz.f, zn.f)),
302
- all.dist=p.x, sel.dist=p.y, diff.dist=c(rev(zn.f), zz.f, zp.f),
303
- dist.mids=a.f$mids,
304
- diff.mids=seq(diff(range(a.f$mids)), -diff(range(a.f$mids)),
305
- length.out=1+2*length(a.f$mids)),
306
- call=match.call()));
307
- ### Returns an `enve.TRIBStest` object.
420
+ zz.f <- zz.f + p.x[k]*p.y[k];
421
+ }
422
+ }
423
+ return(new('enve.TRIBStest',
424
+ pval.gt=sum(c(zz.f, zp.f)), pval.lt=sum(c(zz.f, zn.f)),
425
+ all.dist=p.x, sel.dist=p.y, diff.dist=c(rev(zn.f), zz.f, zp.f),
426
+ dist.mids=a.f$mids,
427
+ diff.mids=seq(diff(range(a.f$mids)), -diff(range(a.f$mids)),
428
+ length.out=1+2*length(a.f$mids)),
429
+ call=match.call()));
308
430
  }
309
431
 
432
+ #' Enveomics: TRIBS
433
+ #'
434
+ #' Subsample any objects in "distance space" to reduce the effect of
435
+ #' sample-clustering. This function was originally designed to subsample
436
+ #' genomes in "phylogenetic distance space", a clear case of strong
437
+ #' clustering bias in sampling, by Luis M. Rodriguez-R and Michael R
438
+ #' Weigand.
439
+ #'
440
+ #' @param dist
441
+ #' Distances as a \code{dist} object.
442
+ #' @param selection
443
+ #' Objects to include in the subsample. By default, all objects are
444
+ #' selected.
445
+ #' @param replicates
446
+ #' Number of replications per point.
447
+ #' @param summary.fx
448
+ #' Function to summarize the distance distributions in a given replicate. By
449
+ #' default, the median distance is estimated.
450
+ #' @param dist.method
451
+ #' Distance method between random points and samples in the transformed
452
+ #' space. See \code{dist}.
453
+ #' @param subsamples
454
+ #' Subsampling fractions.
455
+ #' @param dimensions
456
+ #' Dimensions to use in the NMDS. By default, 5\% of the selection length.
457
+ #' @param metaMDS.opts
458
+ #' Any additional options to pass to metaMDS, as \code{list}.
459
+ #' @param threads
460
+ #' Number of threads to use.
461
+ #' @param verbosity
462
+ #' Verbosity. Use 0 to run quietly, increase for additional information.
463
+ #' @param points
464
+ #' Optional. If passed, the MDS step is skipped and this object is used
465
+ #' instead. It can be the \code{$points} slot of class \code{metaMDS}
466
+ #' (from \code{vegan}).
467
+ #' It must be a matrix or matrix-coercible object, with samples as rows and
468
+ #' dimensions as columns.
469
+ #' @param pre.tribs
470
+ #' Optional. If passed, the points are recovered from this object (except if
471
+ #' \code{points} is also passed. This should be an \code{\link{enve.TRIBS}} object
472
+ #' estimated on the same objects (the selection is unimportant).
473
+ #'
474
+ #' @return Returns an \code{\link{enve.TRIBS}} object.
475
+ #'
476
+ #' @author Luis M. Rodriguez-R [aut, cre]
477
+ #'
478
+ #' @export
479
+
310
480
  enve.tribs <- function
311
- ### Subsample any objects in "distance space" to reduce the effect of
312
- ### sample-clustering. This function was originally designed to subsample
313
- ### genomes in "phylogenetic distance space", a clear case of strong
314
- ### clustering bias in sampling, by Luis M. Rodriguez-R and Michael R
315
- ### Weigand.
316
- (dist,
317
- ### Distances as a `dist` object.
318
- selection=labels(dist),
319
- ### Objects to include in the subsample. By default, all objects are
320
- ### selected.
321
- replicates=1000,
322
- ### Number of replications per point
323
- summary.fx=median,
324
- ### Function to summarize the distance distributions in a given replicate. By
325
- ### default, the median distance is estimated.
326
- dist.method='euclidean',
327
- ### Distance method between random points and samples in the transformed
328
- ### space. See `dist`.
329
- subsamples=seq(0,1,by=0.01),
330
- ### Subsampling fractions
331
- dimensions=ceiling(length(selection)*0.05),
332
- ### Dimensions to use in the NMDS. By default, 5% of the selection length.
333
- metaMDS.opts=list(),
334
- ### Any additional options to pass to metaMDS, as `list`.
335
- threads=2,
336
- ### Number of threads to use.
337
- verbosity=1,
338
- ### Verbosity. Use 0 to run quietly, increase for additional information.
339
- points,
340
- ### Optional. If passed, the MDS step is skipped and this object is used
341
- ### instead. It can be the `$points` slot of class `metaMDS` (from `vegan`).
342
- ### It must be a matrix or matrix-coercible object, with samples as rows and
343
- ### dimensions as columns.
344
- pre.tribs
345
- ### Optional. If passed, the points are recovered from this object (except if
346
- ### `points` is also passed. This should be an `enve.TRIBS` object estimated
347
- ### on the same objects (the selection is unimportant).
348
- ){
349
- if(!is(dist, 'dist'))
350
- stop('`dist` parameter must be a `dist` object.');
351
- # 1. NMDS
352
- if(missing(points)){
353
- if(missing(pre.tribs)){
354
- if(verbosity > 0)
355
- cat('===[ Estimating NMDS ]\n');
356
- if(!suppressPackageStartupMessages(
357
- requireNamespace("vegan", quietly=TRUE)))
358
- stop('Unavailable required package: `vegan`.');
359
- mds.args <- c(metaMDS.opts, list(comm=dist, k=dimensions,
360
- trace=verbosity));
361
- points <- do.call(vegan::metaMDS, mds.args)$points;
362
- }else{
363
- points <- attr(pre.tribs, 'points');
364
- dimensions <- ncol(points);
365
- }
366
- }else{
367
- points <- as.matrix(points);
481
+ (dist,
482
+ selection=labels(dist),
483
+ replicates=1000,
484
+ summary.fx=median,
485
+ dist.method='euclidean',
486
+ subsamples=seq(0,1,by=0.01),
487
+ dimensions=ceiling(length(selection)*0.05),
488
+ metaMDS.opts=list(),
489
+ threads=2,
490
+ verbosity=1,
491
+ points,
492
+ pre.tribs
493
+ ){
494
+ if(!is(dist, 'dist'))
495
+ stop('`dist` parameter must be a `dist` object.');
496
+ # 1. NMDS
497
+ if(missing(points)){
498
+ if(missing(pre.tribs)){
499
+ if(verbosity > 0)
500
+ cat('===[ Estimating NMDS ]\n');
501
+ if(!suppressPackageStartupMessages(
502
+ requireNamespace("vegan", quietly=TRUE)))
503
+ stop('Unavailable required package: `vegan`.');
504
+ mds.args <- c(metaMDS.opts, list(comm=dist, k=dimensions,
505
+ trace=verbosity));
506
+ points <- do.call(vegan::metaMDS, mds.args)$points;
507
+ }else{
508
+ points <- attr(pre.tribs, 'points');
368
509
  dimensions <- ncol(points);
369
- }
370
- # 2. Pad ranges
371
- if(verbosity > 0) cat('===[ Padding ranges ]\n');
372
- dots <- matrix(NA, nrow=nrow(points), ncol=dimensions,
373
- dimnames=list(rownames(points), 1:dimensions));
374
- selection <- selection[!is.na(match(selection, rownames(dots)))];
375
- for(dim in 1:dimensions){
376
- dimRange <- range(points[,dim]) +
377
- c(-1,1)*diff(range(points[,1]))/length(selection);
378
- dots[, dim] <- (points[,dim]-dimRange[1])/diff(dimRange);
379
- }
380
- # 3. Select points and summarize distances
381
- if(verbosity > 0) cat('===[ Sub-sampling ]\n');
382
- distances <- matrix(NA, nrow=replicates, ncol=length(subsamples),
383
- dimnames=list(1:replicates, as.character(subsamples)));
384
- cl <- makeCluster(threads);
385
- for(frx in subsamples){
386
- if(verbosity > 1) cat('Sub-sampling at ',(frx*100),'%\n',sep='');
387
- distances[, as.character(frx)] = parSapply(cl, 1:replicates, enve.__tribs,
388
- frx, match(selection, rownames(dots)), dimensions, dots, dist.method,
389
- summary.fx, dist);
390
- }
391
- stopCluster(cl);
392
- # 4. Build object and return
393
- return(new('enve.TRIBS',
394
- distance=do.call(summary.fx, list(as.matrix(dist)[selection, selection])),
395
- points=points, distances=distances, spaceSize=nrow(points),
396
- selSize=length(selection), dimensions=dimensions, subsamples=subsamples,
397
- call=match.call()));
398
- ### Returns an `enve.TRIBS` object.
510
+ }
511
+ }else{
512
+ points <- as.matrix(points);
513
+ dimensions <- ncol(points);
514
+ }
515
+ # 2. Pad ranges
516
+ if(verbosity > 0) cat('===[ Padding ranges ]\n');
517
+ dots <- matrix(NA, nrow=nrow(points), ncol=dimensions,
518
+ dimnames=list(rownames(points), 1:dimensions));
519
+ selection <- selection[!is.na(match(selection, rownames(dots)))];
520
+ for(dim in 1:dimensions){
521
+ dimRange <- range(points[,dim]) +
522
+ c(-1,1)*diff(range(points[,1]))/length(selection);
523
+ dots[, dim] <- (points[,dim]-dimRange[1])/diff(dimRange);
524
+ }
525
+ # 3. Select points and summarize distances
526
+ if(verbosity > 0) cat('===[ Sub-sampling ]\n');
527
+ distances <- matrix(NA, nrow=replicates, ncol=length(subsamples),
528
+ dimnames=list(1:replicates, as.character(subsamples)));
529
+ cl <- makeCluster(threads);
530
+ for(frx in subsamples){
531
+ if(verbosity > 1) cat('Sub-sampling at ',(frx*100),'%\n',sep='');
532
+ distances[, as.character(frx)] = parSapply(cl, 1:replicates, enve.__tribs,
533
+ frx, match(selection, rownames(dots)), dimensions, dots, dist.method,
534
+ summary.fx, dist);
535
+ }
536
+ stopCluster(cl);
537
+ # 4. Build object and return
538
+ return(new('enve.TRIBS',
539
+ distance=do.call(summary.fx, list(as.matrix(dist)[selection, selection])),
540
+ points=points, distances=distances, spaceSize=nrow(points),
541
+ selSize=length(selection), dimensions=dimensions, subsamples=subsamples,
542
+ call=match.call()));
399
543
  }
400
544
 
545
+ #' Enveomics: TRIBS - Internal Ancillary Function
546
+ #'
547
+ #' Internal ancillary function (see \code{\link{enve.tribs}}).
548
+ #'
549
+ #' @param rep Replicates
550
+ #' @param frx Fraction
551
+ #' @param selection Selection
552
+ #' @param dimensions Dimensions
553
+ #' @param dots Sampling points
554
+ #' @param dist.method Distance method
555
+ #' @param summary.fx Summary function
556
+ #' @param dist Distance
557
+ #'
558
+ #' @author Luis M. Rodriguez-R [aut, cre]
559
+ #'
560
+ #' @export
561
+
401
562
  enve.__tribs <- function
402
- ### Internal ancilliary function (see `enve.tribs`).
403
- (rep, frx, selection, dimensions, dots, dist.method, summary.fx, dist){
404
- sample <- c();
405
- if(frx==0) return(0);
406
- for(point in 1:round(frx*length(selection))){
407
- rand.point <- runif(dimensions);
408
- closest.dot <- '';
409
- closest.dist <- Inf;
410
- for(dot in selection){
411
- dot.dist <- as.numeric(dist(matrix(c(rand.point, dots[dot,]), nrow=2,
412
- byrow=TRUE), method=dist.method));
413
- if(dot.dist < closest.dist){
414
- closest.dot <- dot;
415
- closest.dist <- dot.dist;
416
- }
563
+ (rep, frx, selection, dimensions, dots, dist.method, summary.fx, dist){
564
+ sample <- c();
565
+ if(frx==0) return(0);
566
+ for(point in 1:round(frx*length(selection))){
567
+ rand.point <- runif(dimensions);
568
+ closest.dot <- '';
569
+ closest.dist <- Inf;
570
+ for(dot in selection){
571
+ dot.dist <- as.numeric(dist(matrix(c(rand.point, dots[dot,]), nrow=2,
572
+ byrow=TRUE), method=dist.method));
573
+ if(dot.dist < closest.dist){
574
+ closest.dot <- dot;
575
+ closest.dist <- dot.dist;
417
576
  }
418
- sample <- c(sample, closest.dot);
419
- }
420
- return( do.call(summary.fx, list(as.matrix(dist)[sample, sample])) );
577
+ }
578
+ sample <- c(sample, closest.dot);
579
+ }
580
+ return( do.call(summary.fx, list(as.matrix(dist)[sample, sample])) );
421
581
  }
422
582
 
423
583