umappp 0.1.6 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,15 +17,150 @@
17
17
  *
18
18
  * - `mat.rows()`, returning the number of rows.
19
19
  * - `mat.cols()`, returning the number of columns.
20
- * - `mat.multiply(rhs, out)`, which computes the matrix product `mat * rhs` and stores it in `out`.
21
- * `rhs` should be an `Eigen::VectorXd` (or an expression equivalent) while `out` should be a `Eigen::VectorXd`.
22
- * - `mat.adjoint_multiply(rhs, out)`, which computes the matrix product `mat.adjoint() * rhs` and stores it in `out`.
23
- * `rhs` should be an `Eigen::VectorXd` (or an expression equivalent) while `out` should be a `Eigen::VectorXd`.
20
+ * - `mat.workspace()`, returning an instance of a workspace class for multiplication.
21
+ * - `mat.adjoint_workspace()`, returning an instance of a workspace class for adjoint multiplication.
22
+ * - `mat.multiply(rhs, work, out)`, which computes the matrix product `mat * rhs` and stores it in `out` - see `irlba::Centered::multiply()` for the typical signature.
23
+ * `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
24
+ * `work` should be the return value of `mat.workspace()` and is passed in as a non-const reference.
25
+ * - `mat.adjoint_multiply(rhs, work, out)`, which computes the matrix product `mat.adjoint() * rhs` and stores it in `out` - see `irlba::Centered::adjoint_multiply()` for the typical signature.
26
+ * `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
27
+ * `work` should be the return value of `mat.adjoint_workspace()` and is passed in as a non-const reference.
24
28
  * - `mat.realize()`, which returns an `Eigen::MatrixXd` containing the matrix with all modifications applied.
29
+ *
30
+ * The workspace class is used to allocate space for intermediate results across multiple calls to `multiply()`.
31
+ * This class should contain a member of type `WrappedWorkspace<M>`, where `M` is the type of the underlying matrix;
32
+ * this member can be initialized by calling the `wrapped_workspace()` function on the underlying matrix.
33
+ * If a wrapper does not have any intermediate results, it can just return `WrappedWorkspace<M>` directly.
34
+ * The same logic applies to `adjoint_multiply()` using the `AdjointWrappedWorkspace` template class and `wrapped_adjoint_workspace()`.
35
+ *
36
+ * Implementations of the `multiply()` and `adjoint_multiply()` methods may use the `wrapped_multiply()` and `wrapped_adjoint_multiply()` functions.
37
+ * This will handle the differences in the calls between **Eigen** matrices and **irlba** wrappers.
25
38
  */
26
39
 
27
40
  namespace irlba {
28
41
 
42
+ /**
43
+ * @cond
44
+ */
45
+ template<class Matrix, typename = int>
46
+ struct WrappedWorkspaceInternal {
47
+ typedef bool type;
48
+ };
49
+
50
+ template<class Matrix>
51
+ struct WrappedWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().workspace(), 0)> {
52
+ typedef decltype(std::declval<Matrix>().workspace()) type;
53
+ };
54
+
55
+ template<class Matrix, typename = int>
56
+ struct WrappedAdjointWorkspaceInternal {
57
+ typedef bool type;
58
+ };
59
+
60
+ template<class Matrix>
61
+ struct WrappedAdjointWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().adjoint_workspace(), 0)> {
62
+ typedef decltype(std::declval<Matrix>().adjoint_workspace()) type;
63
+ };
64
+ /**
65
+ * @endcond
66
+ */
67
+
68
+ /**
69
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
70
+ *
71
+ * This type is equivalent to the workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
72
+ */
73
+ template<class Matrix>
74
+ using WrappedWorkspace = typename WrappedWorkspaceInternal<Matrix>::type;
75
+
76
+ /**
77
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
78
+ *
79
+ * This type is equivalent to the adjoint workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
80
+ */
81
+ template<class Matrix>
82
+ using WrappedAdjointWorkspace = typename WrappedAdjointWorkspaceInternal<Matrix>::type;
83
+
84
+ /**
85
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
86
+ * @param mat Pointer to the wrapped matrix instance.
87
+ * @return The workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
88
+ */
89
+ template<class Matrix>
90
+ WrappedWorkspace<Matrix> wrapped_workspace(const Matrix* mat) {
91
+ if constexpr(has_multiply_method<Matrix>::value) { // using this as a proxy for whether it's an Eigen matrix or not.
92
+ return false;
93
+ } else {
94
+ return mat->workspace();
95
+ }
96
+ }
97
+
98
+ /**
99
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
100
+ * @param mat Pointer to the wrapped matrix instance.
101
+ * @return The adjoint workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
102
+ */
103
+ template<class Matrix>
104
+ WrappedAdjointWorkspace<Matrix> wrapped_adjoint_workspace(const Matrix* mat) {
105
+ if constexpr(has_adjoint_multiply_method<Matrix>::value) {
106
+ return false;
107
+ } else {
108
+ return mat->adjoint_workspace();
109
+ }
110
+ }
111
+
112
+ /**
113
+ * @tparam Matrix Type of the wrapped matrix.
114
+ * @tparam Right An `Eigen::VectorXd` or equivalent expression.
115
+ *
116
+ * @param[in] mat Pointer to the wrapped matrix instance.
117
+ * @param[in] rhs The right-hand side of the matrix product.
118
+ * @param work The return value of `wrapped_workspace()` on `mat`.
119
+ * @param[out] out The output vector to store the matrix product.
120
+ * This is filled with the product of this matrix and `rhs`.
121
+ */
122
+ template<class Matrix, class Right>
123
+ void wrapped_multiply(const Matrix* mat, const Right& rhs, WrappedWorkspace<Matrix>& work, Eigen::VectorXd& out) {
124
+ if constexpr(has_multiply_method<Matrix>::value) {
125
+ out.noalias() = *mat * rhs;
126
+ } else {
127
+ mat->multiply(rhs, work, out);
128
+ }
129
+ }
130
+
131
+ /**
132
+ * @tparam Matrix Type of the wrapped matrix.
133
+ * @tparam Right An `Eigen::VectorXd` or equivalent expression.
134
+ *
135
+ * @param[in] mat Poitner to the wrapped matrix instance.
136
+ * @param[in] rhs The right-hand side of the matrix product.
137
+ * @param work The return value of `wrapped_adjoint_workspace()` on `mat`.
138
+ * @param[out] out The output vector to store the matrix product.
139
+ * This is filled with the product of this matrix and `rhs`.
140
+ */
141
+ template<class Matrix, class Right>
142
+ void wrapped_adjoint_multiply(const Matrix* mat, const Right& rhs, WrappedAdjointWorkspace<Matrix>& work, Eigen::VectorXd& out) {
143
+ if constexpr(has_adjoint_multiply_method<Matrix>::value) {
144
+ out.noalias() = mat->adjoint() * rhs;
145
+ } else {
146
+ mat->adjoint_multiply(rhs, work, out);
147
+ }
148
+ }
149
+
150
+ /**
151
+ * @tparam Matrix Type of the wrapped matrix.
152
+ * @param[in] mat Pointer to the wrapped matrix instance.
153
+ * @return A dense **Eigen** matrix containing the realized contents of `mat`.
154
+ */
155
+ template<class Matrix>
156
+ Eigen::MatrixXd wrapped_realize(const Matrix* mat) {
157
+ if constexpr(has_realize_method<Matrix>::value) {
158
+ return mat->realize();
159
+ } else {
160
+ return Eigen::MatrixXd(*mat);
161
+ }
162
+ }
163
+
29
164
  /**
30
165
  * @brief Wrapper for a centered matrix.
31
166
  *
@@ -53,23 +188,46 @@ struct Centered {
53
188
  */
54
189
  auto cols() const { return mat->cols(); }
55
190
 
191
+ public:
192
+ /**
193
+ * Workspace type for `multiply()`.
194
+ * Currently, this is just an alias for the workspace type of the underlying matrix.
195
+ */
196
+ typedef WrappedWorkspace<Matrix> Workspace;
197
+
198
+ /**
199
+ * @return Workspace for use in `multiply()`.
200
+ */
201
+ Workspace workspace() const {
202
+ return wrapped_workspace(mat);
203
+ }
204
+
205
+ /**
206
+ * Workspace type for `adjoint_multiply()`.
207
+ * Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
208
+ */
209
+ typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
210
+
211
+ /**
212
+ * @return Workspace for use in `adjoint_multiply()`.
213
+ */
214
+ AdjointWorkspace adjoint_workspace() const {
215
+ return wrapped_adjoint_workspace(mat);
216
+ }
217
+
218
+ public:
56
219
  /**
57
220
  * @tparam Right An `Eigen::VectorXd` or equivalent expression.
58
221
  *
59
222
  * @param[in] rhs The right-hand side of the matrix product.
60
- * This should be a vector or have only one column.
223
+ * @param work The return value of `workspace()`.
224
+ * This can be reused across multiple `multiply()` calls.
61
225
  * @param[out] out The output vector to store the matrix product.
62
- *
63
- * @return `out` is filled with the product of this matrix and `rhs`.
226
+ * This is filled with the product of this matrix and `rhs`.
64
227
  */
65
228
  template<class Right>
66
- void multiply(const Right& rhs, Eigen::VectorXd& out) const {
67
- if constexpr(has_multiply_method<Matrix>::value) {
68
- out.noalias() = *mat * rhs;
69
- } else {
70
- mat->multiply(rhs, out);
71
- }
72
-
229
+ void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
230
+ wrapped_multiply(mat, rhs, work, out);
73
231
  double beta = rhs.dot(*center);
74
232
  for (auto& o : out) {
75
233
  o -= beta;
@@ -81,19 +239,14 @@ struct Centered {
81
239
  * @tparam Right An `Eigen::VectorXd` or equivalent expression.
82
240
  *
83
241
  * @param[in] rhs The right-hand side of the matrix product.
84
- * This should be a vector or have only one column.
242
+ * @param work The return value of `adjoint_workspace()`.
243
+ * This can be reused across multiple `adjoint_multiply()` calls.
85
244
  * @param[out] out The output vector to store the matrix product.
86
- *
87
- * @return `out` is filled with the product of the transpose of this matrix and `rhs`.
245
+ * This is filled with the product of the transpose of this matrix and `rhs`.
88
246
  */
89
247
  template<class Right>
90
- void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
91
- if constexpr(has_adjoint_multiply_method<Matrix>::value) {
92
- out.noalias() = mat->adjoint() * rhs;
93
- } else {
94
- mat->adjoint_multiply(rhs, out);
95
- }
96
-
248
+ void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
249
+ wrapped_adjoint_multiply(mat, rhs, work, out);
97
250
  double beta = rhs.sum();
98
251
  out -= beta * (*center);
99
252
  return;
@@ -104,23 +257,13 @@ struct Centered {
104
257
  * where the centering has been explicitly applied.
105
258
  */
106
259
  Eigen::MatrixXd realize() const {
107
- auto subtractor = [&](Eigen::MatrixXd& m) -> void {
108
- for (Eigen::Index c = 0; c < m.cols(); ++c) {
109
- for (Eigen::Index r = 0; r < m.rows(); ++r) {
110
- m(r, c) -= (*center)[c];
111
- }
260
+ Eigen::MatrixXd output = wrapped_realize(mat);
261
+ for (Eigen::Index c = 0; c < output.cols(); ++c) {
262
+ for (Eigen::Index r = 0; r < output.rows(); ++r) {
263
+ output(r, c) -= (*center)[c];
112
264
  }
113
- };
114
-
115
- if constexpr(has_realize_method<Matrix>::value) {
116
- Eigen::MatrixXd output = mat->realize();
117
- subtractor(output);
118
- return output;
119
- } else {
120
- Eigen::MatrixXd output(*mat);
121
- subtractor(output);
122
- return output;
123
265
  }
266
+ return output;
124
267
  }
125
268
 
126
269
  private:
@@ -155,22 +298,61 @@ struct Scaled {
155
298
  */
156
299
  auto cols() const { return mat->cols(); }
157
300
 
301
+ public:
302
+ /**
303
+ * @brief Workspace type for `multiply()`.
304
+ */
305
+ struct Workspace {
306
+ /**
307
+ * @cond
308
+ */
309
+ Workspace(size_t n, WrappedWorkspace<Matrix> c) : product(n), child(std::move(c)) {}
310
+ Eigen::VectorXd product;
311
+ WrappedWorkspace<Matrix> child;
312
+ /**
313
+ * @endcond
314
+ */
315
+ };
316
+
317
+ /**
318
+ * @return Workspace for use in `multiply()`.
319
+ */
320
+ Workspace workspace() const {
321
+ return Workspace(mat->cols(), wrapped_workspace(mat));
322
+ }
323
+
324
+ /**
325
+ * Workspace type for `adjoint_multiply()`.
326
+ * Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
327
+ */
328
+ typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
329
+
330
+ /**
331
+ * @return Workspace for use in `adjoint_multiply()`.
332
+ */
333
+ AdjointWorkspace adjoint_workspace() const {
334
+ return wrapped_adjoint_workspace(mat);
335
+ }
336
+
337
+ public:
158
338
  /**
159
339
  * @tparam Right An `Eigen::VectorXd` or equivalent expression.
160
340
  *
161
341
  * @param[in] rhs The right-hand side of the matrix product.
162
342
  * This should be a vector or have only one column.
343
+ * @param work The return value of `workspace()`.
344
+ * This can be reused across multiple `multiply()` calls.
163
345
  * @param[out] out The output vector to store the matrix product.
164
- *
165
- * @return `out` is filled with the product of this matrix and `rhs`.
346
+ * This is filled with the product of this matrix and `rhs`.
166
347
  */
167
348
  template<class Right>
168
- void multiply(const Right& rhs, Eigen::VectorXd& out) const {
169
- if constexpr(has_multiply_method<Matrix>::value) {
170
- out.noalias() = *mat * rhs.cwiseQuotient(*scale);
171
- } else {
172
- mat->multiply(rhs.cwiseQuotient(*scale), out);
173
- }
349
+ void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
350
+ // We store the result here, because the underlying matrix's multiply()
351
+ // might need to access rhs/scale multiple times, especially if it's
352
+ // parallelized. Better to pay the cost of accessing a separate memory
353
+ // space than computing the quotient repeatedly.
354
+ work.product = rhs.cwiseQuotient(*scale);
355
+ wrapped_multiply(mat, work.product, work.child, out);
174
356
  return;
175
357
  }
176
358
 
@@ -179,17 +361,14 @@ struct Scaled {
179
361
  *
180
362
  * @param[in] rhs The right-hand side of the matrix product.
181
363
  * This should be a vector or have only one column.
364
+ * @param work The return value of `adjoint_workspace()`.
365
+ * This can be reused across multiple `adjoint_multiply()` calls.
182
366
  * @param[out] out The output vector to store the matrix product.
183
- *
184
- * @return `out` is filled with the product of the transpose of this matrix and `rhs`.
367
+ * This is filled with the product of the transpose of this matrix and `rhs`.
185
368
  */
186
369
  template<class Right>
187
- void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
188
- if constexpr(has_adjoint_multiply_method<Matrix>::value) {
189
- out.noalias() = mat->adjoint() * rhs;
190
- } else {
191
- mat->adjoint_multiply(rhs, out);
192
- }
370
+ void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
371
+ wrapped_adjoint_multiply(mat, rhs, work, out);
193
372
  out.noalias() = out.cwiseQuotient(*scale);
194
373
  return;
195
374
  }
@@ -199,23 +378,13 @@ struct Scaled {
199
378
  * where the scaling has been explicitly applied.
200
379
  */
201
380
  Eigen::MatrixXd realize() const {
202
- auto scaler = [&](Eigen::MatrixXd& m) -> void {
203
- for (Eigen::Index c = 0; c < m.cols(); ++c) {
204
- for (Eigen::Index r = 0; r < m.rows(); ++r) {
205
- m(r, c) /= (*scale)[c];
206
- }
381
+ Eigen::MatrixXd output = wrapped_realize(mat);
382
+ for (Eigen::Index c = 0; c < output.cols(); ++c) {
383
+ for (Eigen::Index r = 0; r < output.rows(); ++r) {
384
+ output(r, c) /= (*scale)[c];
207
385
  }
208
- };
209
-
210
- if constexpr(has_realize_method<Matrix>::value) {
211
- Eigen::MatrixXd output = mat->realize();
212
- scaler(output);
213
- return output;
214
- } else {
215
- Eigen::MatrixXd output(*mat);
216
- scaler(output);
217
- return output;
218
386
  }
387
+ return output;
219
388
  }
220
389
 
221
390
  private:
@@ -23,7 +23,7 @@ struct Details {
23
23
  /**
24
24
  * @cond
25
25
  */
26
- Details() {}
26
+ Details() : iterations(0), status(0) {}
27
27
 
28
28
  Details(int it, int st) : sizes(0), withinss(0), iterations(it), status(st) {}
29
29
 
@@ -106,13 +106,19 @@ public:
106
106
  */
107
107
  struct Defaults {
108
108
  /**
109
- * See `HartiganWong::set_max_iterations()`.
109
+ * See `set_max_iterations()` for more details.
110
110
  */
111
111
  static constexpr int max_iterations = 10;
112
+
113
+ /**
114
+ * See `set_num_threads()` for more details.
115
+ */
116
+ static constexpr int num_threads = 1;
112
117
  };
113
118
 
114
119
  private:
115
120
  int maxiter = Defaults::max_iterations;
121
+ int nthreads = Defaults::num_threads;
116
122
 
117
123
  public:
118
124
  /**
@@ -126,6 +132,16 @@ public:
126
132
  return *this;
127
133
  }
128
134
 
135
+ /**
136
+ * @param n Number of threads to use.
137
+ *
138
+ * @return A reference to this `HartiganWong` object.
139
+ */
140
+ HartiganWong& set_num_threads(int n = Defaults::num_threads) {
141
+ nthreads = n;
142
+ return *this;
143
+ }
144
+
129
145
  public:
130
146
  Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
131
147
  num_dim = ndim;
@@ -160,8 +176,13 @@ public:
160
176
  /* For each point I, find its two closest centres, IC1(I) and
161
177
  * IC2(I). Assign it to IC1(I).
162
178
  */
163
- #pragma omp parallel for
179
+ #ifndef KMEANS_CUSTOM_PARALLEL
180
+ #pragma omp parallel for num_threads(nthreads)
164
181
  for (INDEX_t obs = 0; obs < num_obs; ++obs) {
182
+ #else
183
+ KMEANS_CUSTOM_PARALLEL(num_obs, [&](INDEX_t first, INDEX_t last) -> void {
184
+ for (INDEX_t obs = first; obs < last; ++obs) {
185
+ #endif
165
186
  auto& best = ic1[obs];
166
187
  best = 0;
167
188
  DATA_t best_dist = squared_distance_from_cluster(obs, best);
@@ -186,7 +207,12 @@ public:
186
207
  }
187
208
  }
188
209
  }
210
+ #ifndef KMEANS_CUSTOM_PARALLEL
211
+ }
212
+ #else
189
213
  }
214
+ }, nthreads);
215
+ #endif
190
216
 
191
217
  /* Update cluster centres to be the average of points contained
192
218
  * within them.
@@ -44,6 +44,11 @@ public:
44
44
  * See `set_seed()` for more details.
45
45
  */
46
46
  static constexpr uint64_t seed = 6523u;
47
+
48
+ /**
49
+ * See `set_num_threads()` for more details.
50
+ */
51
+ static constexpr int num_threads = 1;
47
52
  };
48
53
 
49
54
  /**
@@ -55,8 +60,20 @@ public:
55
60
  seed = s;
56
61
  return *this;
57
62
  }
63
+
64
+ /**
65
+ * @param n Number of threads to use.
66
+ *
67
+ * @return A reference to this `InitializeKmeansPP` object.
68
+ */
69
+ InitializeKmeansPP& set_num_threads(int n = Defaults::num_threads) {
70
+ nthreads = n;
71
+ return *this;
72
+ }
73
+
58
74
  private:
59
75
  uint64_t seed = Defaults::seed;
76
+ int nthreads = Defaults::num_threads;
60
77
 
61
78
  public:
62
79
  /**
@@ -74,8 +91,13 @@ public:
74
91
  if (!sofar.empty()) {
75
92
  auto last = sofar.back();
76
93
 
77
- #pragma omp parallel for
94
+ #ifndef KMEANS_CUSTOM_PARALLEL
95
+ #pragma omp parallel for num_threads(nthreads)
78
96
  for (INDEX_t obs = 0; obs < nobs; ++obs) {
97
+ #else
98
+ KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t end) -> void {
99
+ for (INDEX_t obs = first; obs < end; ++obs) {
100
+ #endif
79
101
  if (mindist[obs]) {
80
102
  const DATA_t* acopy = data + obs * ndim;
81
103
  const DATA_t* scopy = data + last * ndim;
@@ -88,7 +110,13 @@ public:
88
110
  mindist[obs] = r2;
89
111
  }
90
112
  }
113
+ #ifndef KMEANS_CUSTOM_PARALLEL
91
114
  }
115
+ #else
116
+ }
117
+ }, nthreads);
118
+ #endif
119
+
92
120
  } else {
93
121
  counter = nobs;
94
122
  }
@@ -40,10 +40,16 @@ public:
40
40
  * See `set_seed()` for more details.
41
41
  */
42
42
  static constexpr uint64_t seed = 5489u;
43
+
44
+ /**
45
+ * See `set_num_threads()` for more details.
46
+ */
47
+ static constexpr int num_threads = 1;
43
48
  };
44
49
 
45
50
  private:
46
51
  uint64_t seed = Defaults::seed;
52
+ int nthreads = Defaults::num_threads;
47
53
 
48
54
  public:
49
55
  /**
@@ -52,14 +58,27 @@ public:
52
58
  *
53
59
  * @return A reference to this `Kmeans` object.
54
60
  *
55
- * This seed is only used for the default `refiner` and `initializer` instances in `run()`.
56
- * Otherwise, the seed from individual instances is respected.
61
+ * This seed is only used for the default `initializer` instance in `run()`.
62
+ * Otherwise, if an `initializer` is explicitly passed to `run()`, its seed is respected.
57
63
  */
58
64
  Kmeans& set_seed(uint64_t s = 5489u) {
59
65
  seed = s;
60
66
  return *this;
61
67
  }
62
68
 
69
+ /**
70
+ * @param n Number of threads to use.
71
+ *
72
+ * @return A reference to this `Kmeans` object.
73
+ *
74
+ * This setting is only used for the default `refiner` and `initializer` instances in `run()`.
75
+ * Otherwise, if an `initializer` or `refiner` is explicitly passed to `run()`, the number of threads specified in the instance is respected.
76
+ */
77
+ Kmeans& set_num_threads(int n = Defaults::num_threads) {
78
+ nthreads = n;
79
+ return *this;
80
+ }
81
+
63
82
  public:
64
83
  /**
65
84
  * @param ndim Number of dimensions.
@@ -88,6 +107,7 @@ public:
88
107
  {
89
108
  if (initializer == NULL) {
90
109
  InitializeKmeansPP<DATA_t, CLUSTER_t, INDEX_t> init;
110
+ init.set_seed(seed).set_num_threads(nthreads);
91
111
  ncenters = init.run(ndim, nobs, data, ncenters, centers, clusters);
92
112
  } else {
93
113
  ncenters = initializer->run(ndim, nobs, data, ncenters, centers, clusters);
@@ -95,6 +115,7 @@ public:
95
115
 
96
116
  if (refiner == NULL) {
97
117
  HartiganWong<DATA_t, CLUSTER_t, INDEX_t> hw;
118
+ hw.set_num_threads(nthreads);
98
119
  return hw.run(ndim, nobs, data, ncenters, centers, clusters);
99
120
  } else {
100
121
  return refiner->run(ndim, nobs, data, ncenters, centers, clusters);
@@ -110,6 +131,8 @@ public:
110
131
  * @cond
111
132
  */
112
133
  Results(int ndim, INDEX_t nobs, CLUSTER_t ncenters) : centers(ndim * ncenters), clusters(nobs) {}
134
+
135
+ Results() {}
113
136
  /**
114
137
  * @endcond
115
138
  */
@@ -48,13 +48,19 @@ public:
48
48
  */
49
49
  struct Defaults {
50
50
  /**
51
- * See `Lloyd::set_max_iterations()`.
51
+ * See `set_max_iterations()` for more details.
52
52
  */
53
53
  static constexpr int max_iterations = 10;
54
+
55
+ /**
56
+ * See `set_num_threads()` for more details.
57
+ */
58
+ static constexpr int num_threads = 1;
54
59
  };
55
60
 
56
61
  private:
57
62
  int maxiter = Defaults::max_iterations;
63
+ int nthreads = Defaults::num_threads;
58
64
 
59
65
  public:
60
66
  /**
@@ -68,6 +74,16 @@ public:
68
74
  return *this;
69
75
  }
70
76
 
77
+ /**
78
+ * @param n Number of threads to use.
79
+ *
80
+ * @return A reference to this `HartiganWong` object.
81
+ */
82
+ Lloyd& set_num_threads(int n = Defaults::num_threads) {
83
+ nthreads = n;
84
+ return *this;
85
+ }
86
+
71
87
  public:
72
88
  Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
73
89
  if (is_edge_case(nobs, ncenters)) {
@@ -83,10 +99,21 @@ public:
83
99
  // Note that we move the `updated` check outside of this loop
84
100
  // so that, in the future, this is more easily parallelized.
85
101
  QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
86
- #pragma omp parallel for
102
+
103
+ #ifndef KMEANS_CUSTOM_PARALLEL
104
+ #pragma omp parallel for num_threads(nthreads)
87
105
  for (INDEX_t obs = 0; obs < nobs; ++obs) {
106
+ #else
107
+ KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t last) -> void {
108
+ for (INDEX_t obs = first; obs < last; ++obs) {
109
+ #endif
88
110
  copy[obs] = index.find(data + obs * ndim);
111
+ #ifndef KMEANS_CUSTOM_PARALLEL
112
+ }
113
+ #else
89
114
  }
115
+ }, nthreads);
116
+ #endif
90
117
 
91
118
  bool updated = false;
92
119
  for (INDEX_t obs = 0; obs < nobs; ++obs) {