umappp 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -4
- data/ext/umappp/umappp.cpp +41 -43
- data/lib/umappp/version.rb +1 -1
- data/lib/umappp.rb +5 -4
- data/vendor/aarand/aarand.hpp +141 -28
- data/vendor/annoy/annoylib.h +1 -1
- data/vendor/hnswlib/bruteforce.h +142 -127
- data/vendor/hnswlib/hnswalg.h +1018 -939
- data/vendor/hnswlib/hnswlib.h +149 -58
- data/vendor/hnswlib/space_ip.h +322 -229
- data/vendor/hnswlib/space_l2.h +283 -240
- data/vendor/hnswlib/visited_list_pool.h +54 -55
- data/vendor/irlba/irlba.hpp +12 -27
- data/vendor/irlba/lanczos.hpp +30 -31
- data/vendor/irlba/parallel.hpp +37 -38
- data/vendor/irlba/utils.hpp +12 -23
- data/vendor/irlba/wrappers.hpp +239 -70
- data/vendor/kmeans/Details.hpp +1 -1
- data/vendor/kmeans/HartiganWong.hpp +28 -2
- data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
- data/vendor/kmeans/Kmeans.hpp +25 -2
- data/vendor/kmeans/Lloyd.hpp +29 -2
- data/vendor/kmeans/MiniBatch.hpp +48 -8
- data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
- data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
- data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
- data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
- data/vendor/umappp/Umap.hpp +85 -43
- data/vendor/umappp/optimize_layout.hpp +410 -133
- data/vendor/umappp/spectral_init.hpp +4 -1
- metadata +6 -6
data/vendor/irlba/wrappers.hpp
CHANGED
@@ -17,15 +17,150 @@
|
|
17
17
|
*
|
18
18
|
* - `mat.rows()`, returning the number of rows.
|
19
19
|
* - `mat.cols()`, returning the number of columns.
|
20
|
-
* - `mat.
|
21
|
-
*
|
22
|
-
* - `mat.
|
23
|
-
* `rhs` should be an `Eigen::VectorXd` (or an expression equivalent) while `out` should be a `Eigen::VectorXd`.
|
20
|
+
* - `mat.workspace()`, returning an instance of a workspace class for multiplication.
|
21
|
+
* - `mat.adjoint_workspace()`, returning an instance of a workspace class for adjoint multiplication.
|
22
|
+
* - `mat.multiply(rhs, work, out)`, which computes the matrix product `mat * rhs` and stores it in `out` - see `irlba::Centered::multiply()` for the typical signature.
|
23
|
+
* `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
|
24
|
+
* `work` should be the return value of `mat.workspace()` and is passed in as a non-const reference.
|
25
|
+
* - `mat.adjoint_multiply(rhs, work, out)`, which computes the matrix product `mat.adjoint() * rhs` and stores it in `out` - see `irlba::Centered::adjoint_multiply()` for the typical signature.
|
26
|
+
* `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
|
27
|
+
* `work` should be the return value of `mat.adjoint_workspace()` and is passed in as a non-const reference.
|
24
28
|
* - `mat.realize()`, which returns an `Eigen::MatrixXd` containing the matrix with all modifications applied.
|
29
|
+
*
|
30
|
+
* The workspace class is used to allocate space for intermediate results across multiple calls to `multiply()`.
|
31
|
+
* This class should contain a member of type `WrappedWorkspace<M>`, where `M` is the type of the underlying matrix;
|
32
|
+
* this member can be initialized by calling the `wrapped_workspace()` function on the underlying matrix.
|
33
|
+
* If a wrapper does not have any intermediate results, it can just return `WrappedWorkspace<M>` directly.
|
34
|
+
* The same logic applies to `adjoint_multiply()` using the `AdjointWrappedWorkspace` template class and `wrapped_adjoint_workspace()`.
|
35
|
+
*
|
36
|
+
* Implementations of the `multiply()` and `adjoint_multiply()` methods may use the `wrapped_multiply()` and `wrapped_adjoint_multiply()` functions.
|
37
|
+
* This will handle the differences in the calls between **Eigen** matrices and **irlba** wrappers.
|
25
38
|
*/
|
26
39
|
|
27
40
|
namespace irlba {
|
28
41
|
|
42
|
+
/**
|
43
|
+
* @cond
|
44
|
+
*/
|
45
|
+
template<class Matrix, typename = int>
|
46
|
+
struct WrappedWorkspaceInternal {
|
47
|
+
typedef bool type;
|
48
|
+
};
|
49
|
+
|
50
|
+
template<class Matrix>
|
51
|
+
struct WrappedWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().workspace(), 0)> {
|
52
|
+
typedef decltype(std::declval<Matrix>().workspace()) type;
|
53
|
+
};
|
54
|
+
|
55
|
+
template<class Matrix, typename = int>
|
56
|
+
struct WrappedAdjointWorkspaceInternal {
|
57
|
+
typedef bool type;
|
58
|
+
};
|
59
|
+
|
60
|
+
template<class Matrix>
|
61
|
+
struct WrappedAdjointWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().adjoint_workspace(), 0)> {
|
62
|
+
typedef decltype(std::declval<Matrix>().adjoint_workspace()) type;
|
63
|
+
};
|
64
|
+
/**
|
65
|
+
* @endcond
|
66
|
+
*/
|
67
|
+
|
68
|
+
/**
|
69
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
70
|
+
*
|
71
|
+
* This type is equivalent to the workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
|
72
|
+
*/
|
73
|
+
template<class Matrix>
|
74
|
+
using WrappedWorkspace = typename WrappedWorkspaceInternal<Matrix>::type;
|
75
|
+
|
76
|
+
/**
|
77
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
78
|
+
*
|
79
|
+
* This type is equivalent to the adjoint workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
|
80
|
+
*/
|
81
|
+
template<class Matrix>
|
82
|
+
using WrappedAdjointWorkspace = typename WrappedAdjointWorkspaceInternal<Matrix>::type;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
86
|
+
* @param mat Pointer to the wrapped matrix instance.
|
87
|
+
* @return The workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
|
88
|
+
*/
|
89
|
+
template<class Matrix>
|
90
|
+
WrappedWorkspace<Matrix> wrapped_workspace(const Matrix* mat) {
|
91
|
+
if constexpr(has_multiply_method<Matrix>::value) { // using this as a proxy for whether it's an Eigen matrix or not.
|
92
|
+
return false;
|
93
|
+
} else {
|
94
|
+
return mat->workspace();
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
/**
|
99
|
+
* @tparam Matrix Type of the underlying matrix in the wrapper.
|
100
|
+
* @param mat Pointer to the wrapped matrix instance.
|
101
|
+
* @return The adjoint workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
|
102
|
+
*/
|
103
|
+
template<class Matrix>
|
104
|
+
WrappedAdjointWorkspace<Matrix> wrapped_adjoint_workspace(const Matrix* mat) {
|
105
|
+
if constexpr(has_adjoint_multiply_method<Matrix>::value) {
|
106
|
+
return false;
|
107
|
+
} else {
|
108
|
+
return mat->adjoint_workspace();
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* @tparam Matrix Type of the wrapped matrix.
|
114
|
+
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
115
|
+
*
|
116
|
+
* @param[in] mat Pointer to the wrapped matrix instance.
|
117
|
+
* @param[in] rhs The right-hand side of the matrix product.
|
118
|
+
* @param work The return value of `wrapped_workspace()` on `mat`.
|
119
|
+
* @param[out] out The output vector to store the matrix product.
|
120
|
+
* This is filled with the product of this matrix and `rhs`.
|
121
|
+
*/
|
122
|
+
template<class Matrix, class Right>
|
123
|
+
void wrapped_multiply(const Matrix* mat, const Right& rhs, WrappedWorkspace<Matrix>& work, Eigen::VectorXd& out) {
|
124
|
+
if constexpr(has_multiply_method<Matrix>::value) {
|
125
|
+
out.noalias() = *mat * rhs;
|
126
|
+
} else {
|
127
|
+
mat->multiply(rhs, work, out);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
/**
|
132
|
+
* @tparam Matrix Type of the wrapped matrix.
|
133
|
+
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
134
|
+
*
|
135
|
+
* @param[in] mat Poitner to the wrapped matrix instance.
|
136
|
+
* @param[in] rhs The right-hand side of the matrix product.
|
137
|
+
* @param work The return value of `wrapped_adjoint_workspace()` on `mat`.
|
138
|
+
* @param[out] out The output vector to store the matrix product.
|
139
|
+
* This is filled with the product of this matrix and `rhs`.
|
140
|
+
*/
|
141
|
+
template<class Matrix, class Right>
|
142
|
+
void wrapped_adjoint_multiply(const Matrix* mat, const Right& rhs, WrappedAdjointWorkspace<Matrix>& work, Eigen::VectorXd& out) {
|
143
|
+
if constexpr(has_adjoint_multiply_method<Matrix>::value) {
|
144
|
+
out.noalias() = mat->adjoint() * rhs;
|
145
|
+
} else {
|
146
|
+
mat->adjoint_multiply(rhs, work, out);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* @tparam Matrix Type of the wrapped matrix.
|
152
|
+
* @param[in] mat Pointer to the wrapped matrix instance.
|
153
|
+
* @return A dense **Eigen** matrix containing the realized contents of `mat`.
|
154
|
+
*/
|
155
|
+
template<class Matrix>
|
156
|
+
Eigen::MatrixXd wrapped_realize(const Matrix* mat) {
|
157
|
+
if constexpr(has_realize_method<Matrix>::value) {
|
158
|
+
return mat->realize();
|
159
|
+
} else {
|
160
|
+
return Eigen::MatrixXd(*mat);
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
29
164
|
/**
|
30
165
|
* @brief Wrapper for a centered matrix.
|
31
166
|
*
|
@@ -53,23 +188,46 @@ struct Centered {
|
|
53
188
|
*/
|
54
189
|
auto cols() const { return mat->cols(); }
|
55
190
|
|
191
|
+
public:
|
192
|
+
/**
|
193
|
+
* Workspace type for `multiply()`.
|
194
|
+
* Currently, this is just an alias for the workspace type of the underlying matrix.
|
195
|
+
*/
|
196
|
+
typedef WrappedWorkspace<Matrix> Workspace;
|
197
|
+
|
198
|
+
/**
|
199
|
+
* @return Workspace for use in `multiply()`.
|
200
|
+
*/
|
201
|
+
Workspace workspace() const {
|
202
|
+
return wrapped_workspace(mat);
|
203
|
+
}
|
204
|
+
|
205
|
+
/**
|
206
|
+
* Workspace type for `adjoint_multiply()`.
|
207
|
+
* Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
|
208
|
+
*/
|
209
|
+
typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
|
210
|
+
|
211
|
+
/**
|
212
|
+
* @return Workspace for use in `adjoint_multiply()`.
|
213
|
+
*/
|
214
|
+
AdjointWorkspace adjoint_workspace() const {
|
215
|
+
return wrapped_adjoint_workspace(mat);
|
216
|
+
}
|
217
|
+
|
218
|
+
public:
|
56
219
|
/**
|
57
220
|
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
58
221
|
*
|
59
222
|
* @param[in] rhs The right-hand side of the matrix product.
|
60
|
-
*
|
223
|
+
* @param work The return value of `workspace()`.
|
224
|
+
* This can be reused across multiple `multiply()` calls.
|
61
225
|
* @param[out] out The output vector to store the matrix product.
|
62
|
-
*
|
63
|
-
* @return `out` is filled with the product of this matrix and `rhs`.
|
226
|
+
* This is filled with the product of this matrix and `rhs`.
|
64
227
|
*/
|
65
228
|
template<class Right>
|
66
|
-
void multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
67
|
-
|
68
|
-
out.noalias() = *mat * rhs;
|
69
|
-
} else {
|
70
|
-
mat->multiply(rhs, out);
|
71
|
-
}
|
72
|
-
|
229
|
+
void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
|
230
|
+
wrapped_multiply(mat, rhs, work, out);
|
73
231
|
double beta = rhs.dot(*center);
|
74
232
|
for (auto& o : out) {
|
75
233
|
o -= beta;
|
@@ -81,19 +239,14 @@ struct Centered {
|
|
81
239
|
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
82
240
|
*
|
83
241
|
* @param[in] rhs The right-hand side of the matrix product.
|
84
|
-
*
|
242
|
+
* @param work The return value of `adjoint_workspace()`.
|
243
|
+
* This can be reused across multiple `adjoint_multiply()` calls.
|
85
244
|
* @param[out] out The output vector to store the matrix product.
|
86
|
-
*
|
87
|
-
* @return `out` is filled with the product of the transpose of this matrix and `rhs`.
|
245
|
+
* This is filled with the product of the transpose of this matrix and `rhs`.
|
88
246
|
*/
|
89
247
|
template<class Right>
|
90
|
-
void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
91
|
-
|
92
|
-
out.noalias() = mat->adjoint() * rhs;
|
93
|
-
} else {
|
94
|
-
mat->adjoint_multiply(rhs, out);
|
95
|
-
}
|
96
|
-
|
248
|
+
void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
|
249
|
+
wrapped_adjoint_multiply(mat, rhs, work, out);
|
97
250
|
double beta = rhs.sum();
|
98
251
|
out -= beta * (*center);
|
99
252
|
return;
|
@@ -104,23 +257,13 @@ struct Centered {
|
|
104
257
|
* where the centering has been explicitly applied.
|
105
258
|
*/
|
106
259
|
Eigen::MatrixXd realize() const {
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
}
|
260
|
+
Eigen::MatrixXd output = wrapped_realize(mat);
|
261
|
+
for (Eigen::Index c = 0; c < output.cols(); ++c) {
|
262
|
+
for (Eigen::Index r = 0; r < output.rows(); ++r) {
|
263
|
+
output(r, c) -= (*center)[c];
|
112
264
|
}
|
113
|
-
};
|
114
|
-
|
115
|
-
if constexpr(has_realize_method<Matrix>::value) {
|
116
|
-
Eigen::MatrixXd output = mat->realize();
|
117
|
-
subtractor(output);
|
118
|
-
return output;
|
119
|
-
} else {
|
120
|
-
Eigen::MatrixXd output(*mat);
|
121
|
-
subtractor(output);
|
122
|
-
return output;
|
123
265
|
}
|
266
|
+
return output;
|
124
267
|
}
|
125
268
|
|
126
269
|
private:
|
@@ -155,22 +298,61 @@ struct Scaled {
|
|
155
298
|
*/
|
156
299
|
auto cols() const { return mat->cols(); }
|
157
300
|
|
301
|
+
public:
|
302
|
+
/**
|
303
|
+
* @brief Workspace type for `multiply()`.
|
304
|
+
*/
|
305
|
+
struct Workspace {
|
306
|
+
/**
|
307
|
+
* @cond
|
308
|
+
*/
|
309
|
+
Workspace(size_t n, WrappedWorkspace<Matrix> c) : product(n), child(std::move(c)) {}
|
310
|
+
Eigen::VectorXd product;
|
311
|
+
WrappedWorkspace<Matrix> child;
|
312
|
+
/**
|
313
|
+
* @endcond
|
314
|
+
*/
|
315
|
+
};
|
316
|
+
|
317
|
+
/**
|
318
|
+
* @return Workspace for use in `multiply()`.
|
319
|
+
*/
|
320
|
+
Workspace workspace() const {
|
321
|
+
return Workspace(mat->cols(), wrapped_workspace(mat));
|
322
|
+
}
|
323
|
+
|
324
|
+
/**
|
325
|
+
* Workspace type for `adjoint_multiply()`.
|
326
|
+
* Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
|
327
|
+
*/
|
328
|
+
typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
|
329
|
+
|
330
|
+
/**
|
331
|
+
* @return Workspace for use in `adjoint_multiply()`.
|
332
|
+
*/
|
333
|
+
AdjointWorkspace adjoint_workspace() const {
|
334
|
+
return wrapped_adjoint_workspace(mat);
|
335
|
+
}
|
336
|
+
|
337
|
+
public:
|
158
338
|
/**
|
159
339
|
* @tparam Right An `Eigen::VectorXd` or equivalent expression.
|
160
340
|
*
|
161
341
|
* @param[in] rhs The right-hand side of the matrix product.
|
162
342
|
* This should be a vector or have only one column.
|
343
|
+
* @param work The return value of `workspace()`.
|
344
|
+
* This can be reused across multiple `multiply()` calls.
|
163
345
|
* @param[out] out The output vector to store the matrix product.
|
164
|
-
*
|
165
|
-
* @return `out` is filled with the product of this matrix and `rhs`.
|
346
|
+
* This is filled with the product of this matrix and `rhs`.
|
166
347
|
*/
|
167
348
|
template<class Right>
|
168
|
-
void multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
349
|
+
void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
|
350
|
+
// We store the result here, because the underlying matrix's multiply()
|
351
|
+
// might need to access rhs/scale multiple times, especially if it's
|
352
|
+
// parallelized. Better to pay the cost of accessing a separate memory
|
353
|
+
// space than computing the quotient repeatedly.
|
354
|
+
work.product = rhs.cwiseQuotient(*scale);
|
355
|
+
wrapped_multiply(mat, work.product, work.child, out);
|
174
356
|
return;
|
175
357
|
}
|
176
358
|
|
@@ -179,17 +361,14 @@ struct Scaled {
|
|
179
361
|
*
|
180
362
|
* @param[in] rhs The right-hand side of the matrix product.
|
181
363
|
* This should be a vector or have only one column.
|
364
|
+
* @param work The return value of `adjoint_workspace()`.
|
365
|
+
* This can be reused across multiple `adjoint_multiply()` calls.
|
182
366
|
* @param[out] out The output vector to store the matrix product.
|
183
|
-
*
|
184
|
-
* @return `out` is filled with the product of the transpose of this matrix and `rhs`.
|
367
|
+
* This is filled with the product of the transpose of this matrix and `rhs`.
|
185
368
|
*/
|
186
369
|
template<class Right>
|
187
|
-
void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
|
188
|
-
|
189
|
-
out.noalias() = mat->adjoint() * rhs;
|
190
|
-
} else {
|
191
|
-
mat->adjoint_multiply(rhs, out);
|
192
|
-
}
|
370
|
+
void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
|
371
|
+
wrapped_adjoint_multiply(mat, rhs, work, out);
|
193
372
|
out.noalias() = out.cwiseQuotient(*scale);
|
194
373
|
return;
|
195
374
|
}
|
@@ -199,23 +378,13 @@ struct Scaled {
|
|
199
378
|
* where the scaling has been explicitly applied.
|
200
379
|
*/
|
201
380
|
Eigen::MatrixXd realize() const {
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
}
|
381
|
+
Eigen::MatrixXd output = wrapped_realize(mat);
|
382
|
+
for (Eigen::Index c = 0; c < output.cols(); ++c) {
|
383
|
+
for (Eigen::Index r = 0; r < output.rows(); ++r) {
|
384
|
+
output(r, c) /= (*scale)[c];
|
207
385
|
}
|
208
|
-
};
|
209
|
-
|
210
|
-
if constexpr(has_realize_method<Matrix>::value) {
|
211
|
-
Eigen::MatrixXd output = mat->realize();
|
212
|
-
scaler(output);
|
213
|
-
return output;
|
214
|
-
} else {
|
215
|
-
Eigen::MatrixXd output(*mat);
|
216
|
-
scaler(output);
|
217
|
-
return output;
|
218
386
|
}
|
387
|
+
return output;
|
219
388
|
}
|
220
389
|
|
221
390
|
private:
|
data/vendor/kmeans/Details.hpp
CHANGED
@@ -106,13 +106,19 @@ public:
|
|
106
106
|
*/
|
107
107
|
struct Defaults {
|
108
108
|
/**
|
109
|
-
* See `
|
109
|
+
* See `set_max_iterations()` for more details.
|
110
110
|
*/
|
111
111
|
static constexpr int max_iterations = 10;
|
112
|
+
|
113
|
+
/**
|
114
|
+
* See `set_num_threads()` for more details.
|
115
|
+
*/
|
116
|
+
static constexpr int num_threads = 1;
|
112
117
|
};
|
113
118
|
|
114
119
|
private:
|
115
120
|
int maxiter = Defaults::max_iterations;
|
121
|
+
int nthreads = Defaults::num_threads;
|
116
122
|
|
117
123
|
public:
|
118
124
|
/**
|
@@ -126,6 +132,16 @@ public:
|
|
126
132
|
return *this;
|
127
133
|
}
|
128
134
|
|
135
|
+
/**
|
136
|
+
* @param n Number of threads to use.
|
137
|
+
*
|
138
|
+
* @return A reference to this `HartiganWong` object.
|
139
|
+
*/
|
140
|
+
HartiganWong& set_num_threads(int n = Defaults::num_threads) {
|
141
|
+
nthreads = n;
|
142
|
+
return *this;
|
143
|
+
}
|
144
|
+
|
129
145
|
public:
|
130
146
|
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
131
147
|
num_dim = ndim;
|
@@ -160,8 +176,13 @@ public:
|
|
160
176
|
/* For each point I, find its two closest centres, IC1(I) and
|
161
177
|
* IC2(I). Assign it to IC1(I).
|
162
178
|
*/
|
163
|
-
|
179
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
180
|
+
#pragma omp parallel for num_threads(nthreads)
|
164
181
|
for (INDEX_t obs = 0; obs < num_obs; ++obs) {
|
182
|
+
#else
|
183
|
+
KMEANS_CUSTOM_PARALLEL(num_obs, [&](INDEX_t first, INDEX_t last) -> void {
|
184
|
+
for (INDEX_t obs = first; obs < last; ++obs) {
|
185
|
+
#endif
|
165
186
|
auto& best = ic1[obs];
|
166
187
|
best = 0;
|
167
188
|
DATA_t best_dist = squared_distance_from_cluster(obs, best);
|
@@ -186,7 +207,12 @@ public:
|
|
186
207
|
}
|
187
208
|
}
|
188
209
|
}
|
210
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
211
|
+
}
|
212
|
+
#else
|
189
213
|
}
|
214
|
+
}, nthreads);
|
215
|
+
#endif
|
190
216
|
|
191
217
|
/* Update cluster centres to be the average of points contained
|
192
218
|
* within them.
|
@@ -44,6 +44,11 @@ public:
|
|
44
44
|
* See `set_seed()` for more details.
|
45
45
|
*/
|
46
46
|
static constexpr uint64_t seed = 6523u;
|
47
|
+
|
48
|
+
/**
|
49
|
+
* See `set_num_threads()` for more details.
|
50
|
+
*/
|
51
|
+
static constexpr int num_threads = 1;
|
47
52
|
};
|
48
53
|
|
49
54
|
/**
|
@@ -55,8 +60,20 @@ public:
|
|
55
60
|
seed = s;
|
56
61
|
return *this;
|
57
62
|
}
|
63
|
+
|
64
|
+
/**
|
65
|
+
* @param n Number of threads to use.
|
66
|
+
*
|
67
|
+
* @return A reference to this `InitializeKmeansPP` object.
|
68
|
+
*/
|
69
|
+
InitializeKmeansPP& set_num_threads(int n = Defaults::num_threads) {
|
70
|
+
nthreads = n;
|
71
|
+
return *this;
|
72
|
+
}
|
73
|
+
|
58
74
|
private:
|
59
75
|
uint64_t seed = Defaults::seed;
|
76
|
+
int nthreads = Defaults::num_threads;
|
60
77
|
|
61
78
|
public:
|
62
79
|
/**
|
@@ -74,8 +91,13 @@ public:
|
|
74
91
|
if (!sofar.empty()) {
|
75
92
|
auto last = sofar.back();
|
76
93
|
|
77
|
-
|
94
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
95
|
+
#pragma omp parallel for num_threads(nthreads)
|
78
96
|
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
97
|
+
#else
|
98
|
+
KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t end) -> void {
|
99
|
+
for (INDEX_t obs = first; obs < end; ++obs) {
|
100
|
+
#endif
|
79
101
|
if (mindist[obs]) {
|
80
102
|
const DATA_t* acopy = data + obs * ndim;
|
81
103
|
const DATA_t* scopy = data + last * ndim;
|
@@ -88,7 +110,13 @@ public:
|
|
88
110
|
mindist[obs] = r2;
|
89
111
|
}
|
90
112
|
}
|
113
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
91
114
|
}
|
115
|
+
#else
|
116
|
+
}
|
117
|
+
}, nthreads);
|
118
|
+
#endif
|
119
|
+
|
92
120
|
} else {
|
93
121
|
counter = nobs;
|
94
122
|
}
|
data/vendor/kmeans/Kmeans.hpp
CHANGED
@@ -40,10 +40,16 @@ public:
|
|
40
40
|
* See `set_seed()` for more details.
|
41
41
|
*/
|
42
42
|
static constexpr uint64_t seed = 5489u;
|
43
|
+
|
44
|
+
/**
|
45
|
+
* See `set_num_threads()` for more details.
|
46
|
+
*/
|
47
|
+
static constexpr int num_threads = 1;
|
43
48
|
};
|
44
49
|
|
45
50
|
private:
|
46
51
|
uint64_t seed = Defaults::seed;
|
52
|
+
int nthreads = Defaults::num_threads;
|
47
53
|
|
48
54
|
public:
|
49
55
|
/**
|
@@ -52,14 +58,27 @@ public:
|
|
52
58
|
*
|
53
59
|
* @return A reference to this `Kmeans` object.
|
54
60
|
*
|
55
|
-
* This seed is only used for the default `
|
56
|
-
* Otherwise,
|
61
|
+
* This seed is only used for the default `initializer` instance in `run()`.
|
62
|
+
* Otherwise, if an `initializer` is explicitly passed to `run()`, its seed is respected.
|
57
63
|
*/
|
58
64
|
Kmeans& set_seed(uint64_t s = 5489u) {
|
59
65
|
seed = s;
|
60
66
|
return *this;
|
61
67
|
}
|
62
68
|
|
69
|
+
/**
|
70
|
+
* @param n Number of threads to use.
|
71
|
+
*
|
72
|
+
* @return A reference to this `Kmeans` object.
|
73
|
+
*
|
74
|
+
* This setting is only used for the default `refiner` and `initializer` instances in `run()`.
|
75
|
+
* Otherwise, if an `initializer` or `refiner` is explicitly passed to `run()`, the number of threads specified in the instance is respected.
|
76
|
+
*/
|
77
|
+
Kmeans& set_num_threads(int n = Defaults::num_threads) {
|
78
|
+
nthreads = n;
|
79
|
+
return *this;
|
80
|
+
}
|
81
|
+
|
63
82
|
public:
|
64
83
|
/**
|
65
84
|
* @param ndim Number of dimensions.
|
@@ -88,6 +107,7 @@ public:
|
|
88
107
|
{
|
89
108
|
if (initializer == NULL) {
|
90
109
|
InitializeKmeansPP<DATA_t, CLUSTER_t, INDEX_t> init;
|
110
|
+
init.set_seed(seed).set_num_threads(nthreads);
|
91
111
|
ncenters = init.run(ndim, nobs, data, ncenters, centers, clusters);
|
92
112
|
} else {
|
93
113
|
ncenters = initializer->run(ndim, nobs, data, ncenters, centers, clusters);
|
@@ -95,6 +115,7 @@ public:
|
|
95
115
|
|
96
116
|
if (refiner == NULL) {
|
97
117
|
HartiganWong<DATA_t, CLUSTER_t, INDEX_t> hw;
|
118
|
+
hw.set_num_threads(nthreads);
|
98
119
|
return hw.run(ndim, nobs, data, ncenters, centers, clusters);
|
99
120
|
} else {
|
100
121
|
return refiner->run(ndim, nobs, data, ncenters, centers, clusters);
|
@@ -110,6 +131,8 @@ public:
|
|
110
131
|
* @cond
|
111
132
|
*/
|
112
133
|
Results(int ndim, INDEX_t nobs, CLUSTER_t ncenters) : centers(ndim * ncenters), clusters(nobs) {}
|
134
|
+
|
135
|
+
Results() {}
|
113
136
|
/**
|
114
137
|
* @endcond
|
115
138
|
*/
|
data/vendor/kmeans/Lloyd.hpp
CHANGED
@@ -48,13 +48,19 @@ public:
|
|
48
48
|
*/
|
49
49
|
struct Defaults {
|
50
50
|
/**
|
51
|
-
* See `
|
51
|
+
* See `set_max_iterations()` for more details.
|
52
52
|
*/
|
53
53
|
static constexpr int max_iterations = 10;
|
54
|
+
|
55
|
+
/**
|
56
|
+
* See `set_num_threads()` for more details.
|
57
|
+
*/
|
58
|
+
static constexpr int num_threads = 1;
|
54
59
|
};
|
55
60
|
|
56
61
|
private:
|
57
62
|
int maxiter = Defaults::max_iterations;
|
63
|
+
int nthreads = Defaults::num_threads;
|
58
64
|
|
59
65
|
public:
|
60
66
|
/**
|
@@ -68,6 +74,16 @@ public:
|
|
68
74
|
return *this;
|
69
75
|
}
|
70
76
|
|
77
|
+
/**
|
78
|
+
* @param n Number of threads to use.
|
79
|
+
*
|
80
|
+
* @return A reference to this `HartiganWong` object.
|
81
|
+
*/
|
82
|
+
Lloyd& set_num_threads(int n = Defaults::num_threads) {
|
83
|
+
nthreads = n;
|
84
|
+
return *this;
|
85
|
+
}
|
86
|
+
|
71
87
|
public:
|
72
88
|
Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
|
73
89
|
if (is_edge_case(nobs, ncenters)) {
|
@@ -83,10 +99,21 @@ public:
|
|
83
99
|
// Note that we move the `updated` check outside of this loop
|
84
100
|
// so that, in the future, this is more easily parallelized.
|
85
101
|
QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
|
86
|
-
|
102
|
+
|
103
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
104
|
+
#pragma omp parallel for num_threads(nthreads)
|
87
105
|
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|
106
|
+
#else
|
107
|
+
KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t last) -> void {
|
108
|
+
for (INDEX_t obs = first; obs < last; ++obs) {
|
109
|
+
#endif
|
88
110
|
copy[obs] = index.find(data + obs * ndim);
|
111
|
+
#ifndef KMEANS_CUSTOM_PARALLEL
|
112
|
+
}
|
113
|
+
#else
|
89
114
|
}
|
115
|
+
}, nthreads);
|
116
|
+
#endif
|
90
117
|
|
91
118
|
bool updated = false;
|
92
119
|
for (INDEX_t obs = 0; obs < nobs; ++obs) {
|