umappp 0.1.6 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -16
- data/ext/umappp/numo.hpp +957 -833
- data/ext/umappp/umappp.cpp +39 -45
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +7 -10
data/vendor/irlba/wrappers.hpp
CHANGED
@@ -17,15 +17,150 @@
|
|
17
17
|
*
|
18
18
|
* - `mat.rows()`, returning the number of rows.
|
19
19
|
* - `mat.cols()`, returning the number of columns.
|
20
|
-
* - `mat.
|
21
|
-
*
|
22
|
-
* - `mat.
|
23
|
-
* `rhs` should be an `Eigen::VectorXd` (or an expression equivalent) while `out` should be a `Eigen::VectorXd`.
|
20
|
+
* - `mat.workspace()`, returning an instance of a workspace class for multiplication.
|
21
|
+
* - `mat.adjoint_workspace()`, returning an instance of a workspace class for adjoint multiplication.
|
22
|
+
* - `mat.multiply(rhs, work, out)`, which computes the matrix product `mat * rhs` and stores it in `out` - see `irlba::Centered::multiply()` for the typical signature.
|
23
|
+
* `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
|
24
|
+
* `work` should be the return value of `mat.workspace()` and is passed in as a non-const reference.
|
25
|
+
* - `mat.adjoint_multiply(rhs, work, out)`, which computes the matrix product `mat.adjoint() * rhs` and stores it in `out` - see `irlba::Centered::adjoint_multiply()` for the typical signature.
|
26
|
+
* `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
|
27
|
+
* `work` should be the return value of `mat.adjoint_workspace()` and is passed in as a non-const reference.
|
24
28
|
* - `mat.realize()`, which returns an `Eigen::MatrixXd` containing the matrix with all modifications applied.
|
29
|
+
*
|
30
|
+
* The workspace class is used to allocate space for intermediate results across multiple calls to `multiply()`.
|
31
|
+
* This class should contain a member of type `WrappedWorkspace<M>`, where `M` is the type of the underlying matrix;
|
32
|
+
* this member can be initialized by calling the `wrapped_workspace()` function on the underlying matrix.
|
33
|
+
* If a wrapper does not have any intermediate results, it can just return `WrappedWorkspace<M>` directly.
|
34
|
+
* The same logic applies to `adjoint_multiply()` using the `AdjointWrappedWorkspace` template class and `wrapped_adjoint_workspace()`.
|
35
|
+
*
|
36
|
+
* Implementations of the `multiply()` and `adjoint_multiply()` methods may use the `wrapped_multiply()` and `wrapped_adjoint_multiply()` functions.
|
37
|
+
* This will handle the differences in the calls between **Eigen** matrices and **irlba** wrappers.
|
25
38
|
*/
|
26
39
|
|
27
40
|
namespace irlba {
|
28
41
|
|
42
|
+
/**
|
43
|
+
* @cond
|
44
|
+
*/
|
45
|
+
template<class Matrix, typename = int>
|
46
|
+
struct WrappedWorkspaceInternal {
|
47
|
+
typedef bool type;
|
48
|
+
};
|
49
|
+
|
50
|
+
template<class Matrix>
|
51
|
+
struct WrappedWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().workspace(), 0)> {
|
52
|
+
typedef decltype(std::declval<Matrix>().workspace()) type;
|
53
|
+
};
|
54
|
+
|
55
|
+
template<class Matrix, typename = int>
|
56
|
+
struct WrappedAdjointWorkspaceInternal {
|
57
|
+
typedef bool type;
|
58
|
+
};
|
59
|
+
|
60
|
+
template<class Matrix>
|
61
|
+
struct WrappedAdjointWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().adjoint_workspace(), 0)> {
|
62
|
+
typedef decltype(std::declval<Matrix>().adjoint_workspace()) type;
|
63
|
+
};
|
64
|
+
/**
|
65
|
+
* @endcond
|
66
|
+
*/
|
67
|
+
|
68
|
+
/**
|
69
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
70
|
+
*
|
71
|
+
* This type is equivalent to the workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
|
72
|
+
*/
|
73
|
+
template<class Matrix>
|
74
|
+
using WrappedWorkspace = typename WrappedWorkspaceInternal<Matrix>::type;
|
75
|
+
|
76
|
+
/**
|
77
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
78
|
+
*
|
79
|
+
* This type is equivalent to the adjoint workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
|
80
|
+
*/
|
81
|
+
template<class Matrix>
|
82
|
+
using WrappedAdjointWorkspace = typename WrappedAdjointWorkspaceInternal<Matrix>::type;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
86
|
+
* @param mat Pointer to the wrapped matrix instance.
|
87
|
+
* @return The workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
|
88
|
+
*/
|
89
|
+
template<class Matrix>
|
90
|
+
WrappedWorkspace<Matrix> wrapped_workspace(const Matrix* mat) {
|
91
|
+
if constexpr(has_multiply_method<Matrix>::value) { // using this as a proxy for whether it's an Eigen matrix or not.
|
92
|
+
return false;
|
93
|
+
} else {
|
94
|
+
return mat->workspace();
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
/**
|
99
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
100
|
+
* @param mat Pointer to the wrapped matrix instance.
|
101
|
+
* @return The adjoint workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
|
102
|
+
*/
|
103
|
+
template<class Matrix>
|
104
|
+
WrappedAdjointWorkspace<Matrix> wrapped_adjoint_workspace(const Matrix* mat) {
|
105
|
+
if constexpr(has_adjoint_multiply_method<Matrix>::value) {
|
106
|
+
return false;
|
107
|
+
} else {
|
108
|
+
return mat->adjoint_workspace();
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* @tparam Matrix Type of the wrapped matrix.
|
114
|
+
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
115
|
+
*
|
116
|
+
* @param[in] mat Pointer to the wrapped matrix instance.
|
117
|
+
* @param[in] rhs The right-hand side of the matrix product.
|
118
|
+
* @param work The return value of `wrapped_workspace()` on `mat`.
|
119
|
+
* @param[out] out The output vector to store the matrix product.
|
120
|
+
* This is filled with the product of this matrix and `rhs`.
|
121
|
+
*/
|
122
|
+
template<class Matrix, class Right>
|
123
|
+
void wrapped_multiply(const Matrix* mat, const Right& rhs, WrappedWorkspace<Matrix>& work, Eigen::VectorXd& out) {
|
124
|
+
if constexpr(has_multiply_method<Matrix>::value) {
|
125
|
+
out.noalias() = *mat * rhs;
|
126
|
+
} else {
|
127
|
+
mat->multiply(rhs, work, out);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
/**
|
132
|
+
* @tparam Matrix Type of the wrapped matrix.
|
133
|
+
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
134
|
+
*
|
135
|
+
* @param[in] mat Poitner to the wrapped matrix instance.
|
136
|
+
* @param[in] rhs The right-hand side of the matrix product.
|
137
|
+
* @param work The return value of `wrapped_adjoint_workspace()` on `mat`.
|
138
|
+
* @param[out] out The output vector to store the matrix product.
|
139
|
+
* This is filled with the product of this matrix and `rhs`.
|
140
|
+
*/
|
141
|
+
template<class Matrix, class Right>
|
142
|
+
void wrapped_adjoint_multiply(const Matrix* mat, const Right& rhs, WrappedAdjointWorkspace<Matrix>& work, Eigen::VectorXd& out) {
|
143
|
+
if constexpr(has_adjoint_multiply_method<Matrix>::value) {
|
144
|
+
out.noalias() = mat->adjoint() * rhs;
|
145
|
+
} else {
|
146
|
+
mat->adjoint_multiply(rhs, work, out);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* @tparam Matrix Type of the wrapped matrix.
|
152
|
+
* @param[in] mat Pointer to the wrapped matrix instance.
|
153
|
+
* @return A dense **Eigen** matrix containing the realized contents of `mat`.
|
154
|
+
*/
|
155
|
+
template<class Matrix>
|
156
|
+
Eigen::MatrixXd wrapped_realize(const Matrix* mat) {
|
157
|
+
if constexpr(has_realize_method<Matrix>::value) {
|
158
|
+
return mat->realize();
|
159
|
+
} else {
|
160
|
+
return Eigen::MatrixXd(*mat);
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
29
164
|
/**
|
30
165
|
* @brief Wrapper for a centered matrix.
|
31
166
|
*
|
@@ -53,23 +188,46 @@ struct Centered {
|
|
53
188
|
*/
|
54
189
|
auto cols() const { return mat->cols(); }
|
55
190
|
|
191
|
+
public:
|
192
|
+
/**
|
193
|
+
* Workspace type for `multiply()`.
|
194
|
+
* Currently, this is just an alias for the workspace type of the underlying matrix.
|
195
|
+
*/
|
196
|
+
typedef WrappedWorkspace<Matrix> Workspace;
|
197
|
+
|
198
|
+
/**
|
199
|
+
* @return Workspace for use in `multiply()`.
|
200
|
+
*/
|
201
|
+
Workspace workspace() const {
|
202
|
+
return wrapped_workspace(mat);
|
203
|
+
}
|
204
|
+
|
205
|
+
/**
|
206
|
+
* Workspace type for `adjoint_multiply()`.
|
207
|
+
* Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
|
208
|
+
*/
|
209
|
+
typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
|
210
|
+
|
211
|
+
/**
|
212
|
+
* @return Workspace for use in `adjoint_multiply()`.
|
213
|
+
*/
|
214
|
+
AdjointWorkspace adjoint_workspace() const {
|
215
|
+
return wrapped_adjoint_workspace(mat);
|
216
|
+
}
|
217
|
+
|
218
|
+
public:
|
56
219
|
/**
|
57
220
|
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
58
221
|
*
|
59
222
|
* @param[in] rhs The right-hand side of the matrix product.
|
60
|
-
*
|
223
|
+
* @param work The return value of `workspace()`.
|
224
|
+
* This can be reused across multiple `multiply()` calls.
|
61
225
|
* @param[out] out The output vector to store the matrix product.
|
62
|
-
*
|
63
|
-
* @return `out` is filled with the product of this matrix and `rhs`.
|
226
|
+
* This is filled with the product of this matrix and `rhs`.
|
64
227
|
*/
|
65
228
|
template<class Right>
|
66
|
-
void multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
67
|
-
|
68
|
-
out.noalias() = *mat * rhs;
|
69
|
-
} else {
|
70
|
-
mat->multiply(rhs, out);
|
71
|
-
}
|
72
|
-
|
229
|
+
void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
|
230
|
+
wrapped_multiply(mat, rhs, work, out);
|
73
231
|
double beta = rhs.dot(*center);
|
74
232
|
for (auto& o : out) {
|
75
233
|
o -= beta;
|
@@ -81,19 +239,14 @@ struct Centered {
|
|
81
239
|
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
82
240
|
*
|
83
241
|
* @param[in] rhs The right-hand side of the matrix product.
|
84
|
-
*
|
242
|
+
* @param work The return value of `adjoint_workspace()`.
|
243
|
+
* This can be reused across multiple `adjoint_multiply()` calls.
|
85
244
|
* @param[out] out The output vector to store the matrix product.
|
86
|
-
*
|
87
|
-
* @return `out` is filled with the product of the transpose of this matrix and `rhs`.
|
245
|
+
* This is filled with the product of the transpose of this matrix and `rhs`.
|
88
246
|
*/
|
89
247
|
template<class Right>
|
90
|
-
void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
91
|
-
|
92
|
-
out.noalias() = mat->adjoint() * rhs;
|
93
|
-
} else {
|
94
|
-
mat->adjoint_multiply(rhs, out);
|
95
|
-
}
|
96
|
-
|
248
|
+
void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
|
249
|
+
wrapped_adjoint_multiply(mat, rhs, work, out);
|
97
250
|
double beta = rhs.sum();
|
98
251
|
out -= beta * (*center);
|
99
252
|
return;
|
@@ -104,23 +257,13 @@ struct Centered {
|
|
104
257
|
* where the centering has been explicitly applied.
|
105
258
|
*/
|
106
259
|
Eigen::MatrixXd realize() const {
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
}
|
260
|
+
Eigen::MatrixXd output = wrapped_realize(mat);
|
261
|
+
for (Eigen::Index c = 0; c < output.cols(); ++c) {
|
262
|
+
for (Eigen::Index r = 0; r < output.rows(); ++r) {
|
263
|
+
output(r, c) -= (*center)[c];
|
112
264
|
}
|
113
|
-
};
|
114
|
-
|
115
|
-
if constexpr(has_realize_method<Matrix>::value) {
|
116
|
-
Eigen::MatrixXd output = mat->realize();
|
117
|
-
subtractor(output);
|
118
|
-
return output;
|
119
|
-
} else {
|
120
|
-
Eigen::MatrixXd output(*mat);
|
121
|
-
subtractor(output);
|
122
|
-
return output;
|
123
265
|
}
|
266
|
+
return output;
|
124
267
|
}
|
125
268
|
|
126
269
|
private:
|
@@ -155,22 +298,61 @@ struct Scaled {
|
|
155
298
|
*/
|
156
299
|
auto cols() const { return mat->cols(); }
|
157
300
|
|
301
|
+
public:
|
302
|
+
/**
|
303
|
+
* @brief Workspace type for `multiply()`.
|
304
|
+
*/
|
305
|
+
struct Workspace {
|
306
|
+
/**
|
307
|
+
* @cond
|
308
|
+
*/
|
309
|
+
Workspace(size_t n, WrappedWorkspace<Matrix> c) : product(n), child(std::move(c)) {}
|
310
|
+
Eigen::VectorXd product;
|
311
|
+
WrappedWorkspace<Matrix> child;
|
312
|
+
/**
|
313
|
+
* @endcond
|
314
|
+
*/
|
315
|
+
};
|
316
|
+
|
317
|
+
/**
|
318
|
+
* @return Workspace for use in `multiply()`.
|
319
|
+
*/
|
320
|
+
Workspace workspace() const {
|
321
|
+
return Workspace(mat->cols(), wrapped_workspace(mat));
|
322
|
+
}
|
323
|
+
|
324
|
+
/**
|
325
|
+
* Workspace type for `adjoint_multiply()`.
|
326
|
+
* Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
|
327
|
+
*/
|
328
|
+
typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
|
329
|
+
|
330
|
+
/**
|
331
|
+
* @return Workspace for use in `adjoint_multiply()`.
|
332
|
+
*/
|
333
|
+
AdjointWorkspace adjoint_workspace() const {
|
334
|
+
return wrapped_adjoint_workspace(mat);
|
335
|
+
}
|
336
|
+
|
337
|
+
public:
|
158
338
|
/**
|
159
339
|
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
160
340
|
*
|
161
341
|
* @param[in] rhs The right-hand side of the matrix product.
|
162
342
|
* This should be a vector or have only one column.
|
343
|
+
* @param work The return value of `workspace()`.
|
344
|
+
* This can be reused across multiple `multiply()` calls.
|
163
345
|
* @param[out] out The output vector to store the matrix product.
|
164
|
-
*
|
165
|
-
* @return `out` is filled with the product of this matrix and `rhs`.
|
346
|
+
* This is filled with the product of this matrix and `rhs`.
|
166
347
|
*/
|
167
348
|
template<class Right>
|
168
|
-
void multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
349
|
+
void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
|
350
|
+
// We store the result here, because the underlying matrix's multiply()
|
351
|
+
// might need to access rhs/scale multiple times, especially if it's
|
352
|
+
// parallelized. Better to pay the cost of accessing a separate memory
|
353
|
+
// space than computing the quotient repeatedly.
|
354
|
+
work.product = rhs.cwiseQuotient(*scale);
|
355
|
+
wrapped_multiply(mat, work.product, work.child, out);
|
174
356
|
return;
|
175
357
|
}
|
176
358
|
|
@@ -179,17 +361,14 @@ struct Scaled {
|
|
179
361
|
*
|
180
362
|
* @param[in] rhs The right-hand side of the matrix product.
|
181
363
|
* This should be a vector or have only one column.
|
364
|
+
* @param work The return value of `adjoint_workspace()`.
|
365
|
+
* This can be reused across multiple `adjoint_multiply()` calls.
|
182
366
|
* @param[out] out The output vector to store the matrix product.
|
183
|
-
*
|
184
|
-
* @return `out` is filled with the product of the transpose of this matrix and `rhs`.
|
367
|
+
* This is filled with the product of the transpose of this matrix and `rhs`.
|
185
368
|
*/
|
186
369
|
template<class Right>
|
187
|
-
void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
188
|
-
|
189
|
-
out.noalias() = mat->adjoint() * rhs;
|
190
|
-
} else {
|
191
|
-
mat->adjoint_multiply(rhs, out);
|
192
|
-
}
|
370
|
+
void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
|
371
|
+
wrapped_adjoint_multiply(mat, rhs, work, out);
|
193
372
|
out.noalias() = out.cwiseQuotient(*scale);
|
194
373
|
return;
|
195
374
|
}
|
@@ -199,23 +378,13 @@ struct Scaled {
|
|
199
378
|
* where the scaling has been explicitly applied.
|
200
379
|
*/
|
201
380
|
Eigen::MatrixXd realize() const {
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
}
|
381
|
+
Eigen::MatrixXd output = wrapped_realize(mat);
|
382
|
+
for (Eigen::Index c = 0; c < output.cols(); ++c) {
|
383
|
+
for (Eigen::Index r = 0; r < output.rows(); ++r) {
|
384
|
+
output(r, c) /= (*scale)[c];
|
207
385
|
}
|
208
|
-
};
|
209
|
-
|
210
|
-
if constexpr(has_realize_method<Matrix>::value) {
|
211
|
-
Eigen::MatrixXd output = mat->realize();
|
212
|
-
scaler(output);
|
213
|
-
return output;
|
214
|
-
} else {
|
215
|
-
Eigen::MatrixXd output(*mat);
|
216
|
-
scaler(output);
|
217
|
-
return output;
|
218
386
|
}
|
387
|
+
return output;
|
219
388
|
}
|
220
389
|
|
221
390
|
private:
|
data/vendor/kmeans/Details.hpp
CHANGED
@@ -106,13 +106,19 @@ public:
|
|
106
106
|
*/
|
107
107
|
struct Defaults {
|
108
108
|
/**
|
109
|
-
* See `
|
109
|
+
* See `set_max_iterations()` for more details.
|
110
110
|
*/
|
111
111
|
static constexpr int max_iterations = 10;
|
112
|
+
|
113
|
+
/**
|
114
|
+
* See `set_num_threads()` for more details.
|
115
|
+
*/
|
116
|
+
static constexpr int num_threads = 1;
|
112
117
|
};
|
113
118
|
|
114
119
|
private:
|
115
120
|
int maxiter = Defaults::max_iterations;
|
121
|
+
int nthreads = Defaults::num_threads;
|
116
122
|
|
117
123
|
public:
|
118
124
|
/**
|
@@ -126,6 +132,16 @@ public:
|
|
126
132
|
return *this;
|
127
133
|
}
|
128
134
|
|
135
|
+
/**
|
136
|
+
* @param n Number of threads to use.
|
137
|
+
*
|
138
|
+
* @return A reference to this `HartiganWong` object.
|
139
|
+
*/
|
140
|
+
HartiganWong& set_num_threads(int n = Defaults::num_threads) {
|
141
|
+
nthreads = n;
|
142
|
+
return *this;
|
143
|
+
}
|
144
|
+
|
129
145
|
public:
|
130
146
|
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
131
147
|
num_dim = ndim;
|
@@ -160,8 +176,13 @@ public:
|
|
160
176
|
/* For each point I, find its two closest centres, IC1(I) and
|
161
177
|
* IC2(I). Assign it to IC1(I).
|
162
178
|
*/
|
163
|
-
|
179
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
180
|
+
#pragma omp parallel for num_threads(nthreads)
|
164
181
|
for (INDEX_t obs = 0; obs < num_obs; ++obs) {
|
182
|
+
#else
|
183
|
+
KMEANS_CUSTOM_PARALLEL(num_obs, [&](INDEX_t first, INDEX_t last) -> void {
|
184
|
+
for (INDEX_t obs = first; obs < last; ++obs) {
|
185
|
+
#endif
|
165
186
|
auto& best = ic1[obs];
|
166
187
|
best = 0;
|
167
188
|
DATA_t best_dist = squared_distance_from_cluster(obs, best);
|
@@ -186,7 +207,12 @@ public:
|
|
186
207
|
}
|
187
208
|
}
|
188
209
|
}
|
210
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
211
|
+
}
|
212
|
+
#else
|
189
213
|
}
|
214
|
+
}, nthreads);
|
215
|
+
#endif
|
190
216
|
|
191
217
|
/* Update cluster centres to be the average of points contained
|
192
218
|
* within them.
|
@@ -44,6 +44,11 @@ public:
|
|
44
44
|
* See `set_seed()` for more details.
|
45
45
|
*/
|
46
46
|
static constexpr uint64_t seed = 6523u;
|
47
|
+
|
48
|
+
/**
|
49
|
+
* See `set_num_threads()` for more details.
|
50
|
+
*/
|
51
|
+
static constexpr int num_threads = 1;
|
47
52
|
};
|
48
53
|
|
49
54
|
/**
|
@@ -55,8 +60,20 @@ public:
|
|
55
60
|
seed = s;
|
56
61
|
return *this;
|
57
62
|
}
|
63
|
+
|
64
|
+
/**
|
65
|
+
* @param n Number of threads to use.
|
66
|
+
*
|
67
|
+
* @return A reference to this `InitializeKmeansPP` object.
|
68
|
+
*/
|
69
|
+
InitializeKmeansPP& set_num_threads(int n = Defaults::num_threads) {
|
70
|
+
nthreads = n;
|
71
|
+
return *this;
|
72
|
+
}
|
73
|
+
|
58
74
|
private:
|
59
75
|
uint64_t seed = Defaults::seed;
|
76
|
+
int nthreads = Defaults::num_threads;
|
60
77
|
|
61
78
|
public:
|
62
79
|
/**
|
@@ -74,8 +91,13 @@ public:
|
|
74
91
|
if (!sofar.empty()) {
|
75
92
|
auto last = sofar.back();
|
76
93
|
|
77
|
-
|
94
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
95
|
+
#pragma omp parallel for num_threads(nthreads)
|
78
96
|
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
97
|
+
#else
|
98
|
+
KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t end) -> void {
|
99
|
+
for (INDEX_t obs = first; obs < end; ++obs) {
|
100
|
+
#endif
|
79
101
|
if (mindist[obs]) {
|
80
102
|
const DATA_t* acopy = data + obs * ndim;
|
81
103
|
const DATA_t* scopy = data + last * ndim;
|
@@ -88,7 +110,13 @@ public:
|
|
88
110
|
mindist[obs] = r2;
|
89
111
|
}
|
90
112
|
}
|
113
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
91
114
|
}
|
115
|
+
#else
|
116
|
+
}
|
117
|
+
}, nthreads);
|
118
|
+
#endif
|
119
|
+
|
92
120
|
} else {
|
93
121
|
counter = nobs;
|
94
122
|
}
|
data/vendor/kmeans/Kmeans.hpp
CHANGED
@@ -40,10 +40,16 @@ public:
|
|
40
40
|
* See `set_seed()` for more details.
|
41
41
|
*/
|
42
42
|
static constexpr uint64_t seed = 5489u;
|
43
|
+
|
44
|
+
/**
|
45
|
+
* See `set_num_threads()` for more details.
|
46
|
+
*/
|
47
|
+
static constexpr int num_threads = 1;
|
43
48
|
};
|
44
49
|
|
45
50
|
private:
|
46
51
|
uint64_t seed = Defaults::seed;
|
52
|
+
int nthreads = Defaults::num_threads;
|
47
53
|
|
48
54
|
public:
|
49
55
|
/**
|
@@ -52,14 +58,27 @@ public:
|
|
52
58
|
*
|
53
59
|
* @return A reference to this `Kmeans` object.
|
54
60
|
*
|
55
|
-
* This seed is only used for the default `
|
56
|
-
* Otherwise,
|
61
|
+
* This seed is only used for the default `initializer` instance in `run()`.
|
62
|
+
* Otherwise, if an `initializer` is explicitly passed to `run()`, its seed is respected.
|
57
63
|
*/
|
58
64
|
Kmeans& set_seed(uint64_t s = 5489u) {
|
59
65
|
seed = s;
|
60
66
|
return *this;
|
61
67
|
}
|
62
68
|
|
69
|
+
/**
|
70
|
+
* @param n Number of threads to use.
|
71
|
+
*
|
72
|
+
* @return A reference to this `Kmeans` object.
|
73
|
+
*
|
74
|
+
* This setting is only used for the default `refiner` and `initializer` instances in `run()`.
|
75
|
+
* Otherwise, if an `initializer` or `refiner` is explicitly passed to `run()`, the number of threads specified in the instance is respected.
|
76
|
+
*/
|
77
|
+
Kmeans& set_num_threads(int n = Defaults::num_threads) {
|
78
|
+
nthreads = n;
|
79
|
+
return *this;
|
80
|
+
}
|
81
|
+
|
63
82
|
public:
|
64
83
|
/**
|
65
84
|
* @param ndim Number of dimensions.
|
@@ -88,6 +107,7 @@ public:
|
|
88
107
|
{
|
89
108
|
if (initializer == NULL) {
|
90
109
|
InitializeKmeansPP<DATA_t, CLUSTER_t, INDEX_t> init;
|
110
|
+
init.set_seed(seed).set_num_threads(nthreads);
|
91
111
|
ncenters = init.run(ndim, nobs, data, ncenters, centers, clusters);
|
92
112
|
} else {
|
93
113
|
ncenters = initializer->run(ndim, nobs, data, ncenters, centers, clusters);
|
@@ -95,6 +115,7 @@ public:
|
|
95
115
|
|
96
116
|
if (refiner == NULL) {
|
97
117
|
HartiganWong<DATA_t, CLUSTER_t, INDEX_t> hw;
|
118
|
+
hw.set_num_threads(nthreads);
|
98
119
|
return hw.run(ndim, nobs, data, ncenters, centers, clusters);
|
99
120
|
} else {
|
100
121
|
return refiner->run(ndim, nobs, data, ncenters, centers, clusters);
|
@@ -110,6 +131,8 @@ public:
|
|
110
131
|
* @cond
|
111
132
|
*/
|
112
133
|
Results(int ndim, INDEX_t nobs, CLUSTER_t ncenters) : centers(ndim * ncenters), clusters(nobs) {}
|
134
|
+
|
135
|
+
Results() {}
|
113
136
|
/**
|
114
137
|
* @endcond
|
115
138
|
*/
|
data/vendor/kmeans/Lloyd.hpp
CHANGED
@@ -48,13 +48,19 @@ public:
|
|
48
48
|
*/
|
49
49
|
struct Defaults {
|
50
50
|
/**
|
51
|
-
* See `
|
51
|
+
* See `set_max_iterations()` for more details.
|
52
52
|
*/
|
53
53
|
static constexpr int max_iterations = 10;
|
54
|
+
|
55
|
+
/**
|
56
|
+
* See `set_num_threads()` for more details.
|
57
|
+
*/
|
58
|
+
static constexpr int num_threads = 1;
|
54
59
|
};
|
55
60
|
|
56
61
|
private:
|
57
62
|
int maxiter = Defaults::max_iterations;
|
63
|
+
int nthreads = Defaults::num_threads;
|
58
64
|
|
59
65
|
public:
|
60
66
|
/**
|
@@ -68,6 +74,16 @@ public:
|
|
68
74
|
return *this;
|
69
75
|
}
|
70
76
|
|
77
|
+
/**
|
78
|
+
* @param n Number of threads to use.
|
79
|
+
*
|
80
|
+
* @return A reference to this `HartiganWong` object.
|
81
|
+
*/
|
82
|
+
Lloyd& set_num_threads(int n = Defaults::num_threads) {
|
83
|
+
nthreads = n;
|
84
|
+
return *this;
|
85
|
+
}
|
86
|
+
|
71
87
|
public:
|
72
88
|
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
73
89
|
if (is_edge_case(nobs, ncenters)) {
|
@@ -83,10 +99,21 @@ public:
|
|
83
99
|
// Note that we move the `updated` check outside of this loop
|
84
100
|
// so that, in the future, this is more easily parallelized.
|
85
101
|
QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
|
86
|
-
|
102
|
+
|
103
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
104
|
+
#pragma omp parallel for num_threads(nthreads)
|
87
105
|
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
106
|
+
#else
|
107
|
+
KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t last) -> void {
|
108
|
+
for (INDEX_t obs = first; obs < last; ++obs) {
|
109
|
+
#endif
|
88
110
|
copy[obs] = index.find(data + obs * ndim);
|
111
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
112
|
+
}
|
113
|
+
#else
|
89
114
|
}
|
115
|
+
}, nthreads);
|
116
|
+
#endif
|
90
117
|
|
91
118
|
bool updated = false;
|
92
119
|
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|