RubyGems - umappp - Versions diffs - 0.1.5 → 0.2.0 - Mend

umappp 0.1.5 → 0.2.0

Files changed (32) hide show

checksums.yaml +4 -4
data/README.md +11 -4
data/ext/umappp/umappp.cpp +41 -43
data/lib/umappp/version.rb +1 -1
data/lib/umappp.rb +5 -4
data/vendor/aarand/aarand.hpp +141 -28
data/vendor/annoy/annoylib.h +1 -1
data/vendor/hnswlib/bruteforce.h +142 -127
data/vendor/hnswlib/hnswalg.h +1018 -939
data/vendor/hnswlib/hnswlib.h +149 -58
data/vendor/hnswlib/space_ip.h +322 -229
data/vendor/hnswlib/space_l2.h +283 -240
data/vendor/hnswlib/visited_list_pool.h +54 -55
data/vendor/irlba/irlba.hpp +12 -27
data/vendor/irlba/lanczos.hpp +30 -31
data/vendor/irlba/parallel.hpp +37 -38
data/vendor/irlba/utils.hpp +12 -23
data/vendor/irlba/wrappers.hpp +239 -70
data/vendor/kmeans/Details.hpp +1 -1
data/vendor/kmeans/HartiganWong.hpp +28 -2
data/vendor/kmeans/InitializeKmeansPP.hpp +29 -1
data/vendor/kmeans/Kmeans.hpp +25 -2
data/vendor/kmeans/Lloyd.hpp +29 -2
data/vendor/kmeans/MiniBatch.hpp +48 -8
data/vendor/knncolle/Annoy/Annoy.hpp +3 -0
data/vendor/knncolle/Hnsw/Hnsw.hpp +3 -0
data/vendor/knncolle/Kmknn/Kmknn.hpp +11 -1
data/vendor/knncolle/utils/find_nearest_neighbors.hpp +8 -6
data/vendor/umappp/Umap.hpp +85 -43
data/vendor/umappp/optimize_layout.hpp +410 -133
data/vendor/umappp/spectral_init.hpp +4 -1
metadata +6 -6

data/vendor/irlba/wrappers.hpp CHANGED Viewed

@@ -17,15 +17,150 @@
  *
  * - `mat.rows()`, returning the number of rows.
  * - `mat.cols()`, returning the number of columns.
- * - `mat.multiply(rhs, out)`, which computes the matrix product `mat * rhs` and stores it in `out`.
- * `rhs` should be an `Eigen::VectorXd` (or an expression equivalent) while `out` should be a `Eigen::VectorXd`.
- * - `mat.adjoint_multiply(rhs, out)`, which computes the matrix product `mat.adjoint() * rhs` and stores it in `out`.
- * `rhs` should be an `Eigen::VectorXd` (or an expression equivalent) while `out` should be a `Eigen::VectorXd`.
+ * - `mat.workspace()`, returning an instance of a workspace class for multiplication.
+ * - `mat.adjoint_workspace()`, returning an instance of a workspace class for adjoint multiplication.
+ * - `mat.multiply(rhs, work, out)`, which computes the matrix product `mat * rhs` and stores it in `out` - see `irlba::Centered::multiply()` for the typical signature.
+ * `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
+ * `work` should be the return value of `mat.workspace()` and is passed in as a non-const reference.
+ * - `mat.adjoint_multiply(rhs, work, out)`, which computes the matrix product `mat.adjoint() * rhs` and stores it in `out` - see `irlba::Centered::adjoint_multiply()` for the typical signature.
+ * `rhs` should be a const reference to an `Eigen::VectorXd` (or an expression equivalent, via templating) while `out` should be a non-const reference to a `Eigen::VectorXd`.
+ * `work` should be the return value of `mat.adjoint_workspace()` and is passed in as a non-const reference.
  * - `mat.realize()`, which returns an `Eigen::MatrixXd` containing the matrix with all modifications applied.
+ *
+ * The workspace class is used to allocate space for intermediate results across multiple calls to `multiply()`.
+ * This class should contain a member of type `WrappedWorkspace<M>`, where `M` is the type of the underlying matrix;
+ * this member can be initialized by calling the `wrapped_workspace()` function on the underlying matrix.
+ * If a wrapper does not have any intermediate results, it can just return `WrappedWorkspace<M>` directly.
+ * The same logic applies to `adjoint_multiply()` using the `AdjointWrappedWorkspace` template class and `wrapped_adjoint_workspace()`.
+ *
+ * Implementations of the `multiply()` and `adjoint_multiply()` methods may use the `wrapped_multiply()` and `wrapped_adjoint_multiply()` functions.
+ * This will handle the differences in the calls between **Eigen** matrices and **irlba** wrappers.
  */
 namespace irlba {
+/**
+ * @cond
+ */
+template<class Matrix, typename = int>
+struct WrappedWorkspaceInternal {
+    typedef bool type;
+};
+template<class Matrix>
+struct WrappedWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().workspace(), 0)> {
+    typedef decltype(std::declval<Matrix>().workspace()) type;
+};
+template<class Matrix, typename = int>
+struct WrappedAdjointWorkspaceInternal {
+    typedef bool type;
+};
+template<class Matrix>
+struct WrappedAdjointWorkspaceInternal<Matrix, decltype((void) std::declval<Matrix>().adjoint_workspace(), 0)> {
+    typedef decltype(std::declval<Matrix>().adjoint_workspace()) type;
+};
+/**
+ * @endcond
+ */
+/**
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
+ *
+ * This type is equivalent to the workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
+ */
+template<class Matrix>
+using WrappedWorkspace = typename WrappedWorkspaceInternal<Matrix>::type;
+/**
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
+ *
+ * This type is equivalent to the adjoint workspace class of `Matrix`, or a placeholder boolean if `Matrix` is an Eigen class.
+ */
+template<class Matrix>
+using WrappedAdjointWorkspace = typename WrappedAdjointWorkspaceInternal<Matrix>::type;
+/**
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
+ * @param mat Pointer to the wrapped matrix instance.
+ * @return The workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
+ */
+template<class Matrix>
+WrappedWorkspace<Matrix> wrapped_workspace(const Matrix* mat) {
+    if constexpr(has_multiply_method<Matrix>::value) { // using this as a proxy for whether it's an Eigen matrix or not.
+        return false;
+    } else {
+        return mat->workspace();
+    }
+}
+/**
+ * @tparam Matrix Type of the underlying matrix in the wrapper.
+ * @param mat Pointer to the wrapped matrix instance.
+ * @return The adjoint workspace of `mat`, or `false` if `Matrix` is an **Eigen** class.
+ */
+template<class Matrix>
+WrappedAdjointWorkspace<Matrix> wrapped_adjoint_workspace(const Matrix* mat) {
+    if constexpr(has_adjoint_multiply_method<Matrix>::value) {
+        return false;
+    } else {
+        return mat->adjoint_workspace();
+    }
+}
+/**
+ * @tparam Matrix Type of the wrapped matrix.
+ * @tparam Right An `Eigen::VectorXd` or equivalent expression.
+ *
+ * @param[in] mat Pointer to the wrapped matrix instance.
+ * @param[in] rhs The right-hand side of the matrix product.
+ * @param work The return value of `wrapped_workspace()` on `mat`.
+ * @param[out] out The output vector to store the matrix product.
+ * This is filled with the product of this matrix and `rhs`.
+ */
+template<class Matrix, class Right>
+void wrapped_multiply(const Matrix* mat, const Right& rhs, WrappedWorkspace<Matrix>& work, Eigen::VectorXd& out) {
+    if constexpr(has_multiply_method<Matrix>::value) {
+        out.noalias() = *mat * rhs;
+    } else {
+        mat->multiply(rhs, work, out);
+    }
+}
+/**
+ * @tparam Matrix Type of the wrapped matrix.
+ * @tparam Right An `Eigen::VectorXd` or equivalent expression.
+ *
+ * @param[in] mat Poitner to the wrapped matrix instance.
+ * @param[in] rhs The right-hand side of the matrix product.
+ * @param work The return value of `wrapped_adjoint_workspace()` on `mat`.
+ * @param[out] out The output vector to store the matrix product.
+ * This is filled with the product of this matrix and `rhs`.
+ */
+template<class Matrix, class Right>
+void wrapped_adjoint_multiply(const Matrix* mat, const Right& rhs, WrappedAdjointWorkspace<Matrix>& work, Eigen::VectorXd& out) {
+    if constexpr(has_adjoint_multiply_method<Matrix>::value) {
+        out.noalias() = mat->adjoint() * rhs;
+    } else {
+        mat->adjoint_multiply(rhs, work, out);
+    }
+}
+/**
+ * @tparam Matrix Type of the wrapped matrix.
+ * @param[in] mat Pointer to the wrapped matrix instance.
+ * @return A dense **Eigen** matrix containing the realized contents of `mat`.
+ */
+template<class Matrix>
+Eigen::MatrixXd wrapped_realize(const Matrix* mat) {
+    if constexpr(has_realize_method<Matrix>::value) {
+        return mat->realize();
+    } else {
+        return Eigen::MatrixXd(*mat);
+    }
+}
 /**
  * @brief Wrapper for a centered matrix.
  *
@@ -53,23 +188,46 @@ struct Centered {
      */
     auto cols() const { return mat->cols(); }
+public:
+    /**
+     * Workspace type for `multiply()`.
+     * Currently, this is just an alias for the workspace type of the underlying matrix.
+     */
+    typedef WrappedWorkspace<Matrix> Workspace;
+    /**
+     * @return Workspace for use in `multiply()`.
+     */
+    Workspace workspace() const {
+        return wrapped_workspace(mat);
+    }
+    /**
+     * Workspace type for `adjoint_multiply()`.
+     * Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
+     */
+    typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
+    /**
+     * @return Workspace for use in `adjoint_multiply()`.
+     */
+    AdjointWorkspace adjoint_workspace() const {
+        return wrapped_adjoint_workspace(mat);
+    }
+public:
     /**
      * @tparam Right An `Eigen::VectorXd` or equivalent expression.
      *
      * @param[in] rhs The right-hand side of the matrix product.
-     * This should be a vector or have only one column.
+     * @param work The return value of `workspace()`.
+     * This can be reused across multiple `multiply()` calls.
      * @param[out] out The output vector to store the matrix product.
-     *
-     * @return `out` is filled with the product of this matrix and `rhs`.
+     * This is filled with the product of this matrix and `rhs`.
      */
     template<class Right>
-    void multiply(const Right& rhs, Eigen::VectorXd& out) const {
-        if constexpr(has_multiply_method<Matrix>::value) {
-            out.noalias() = *mat * rhs;
-        } else {
-            mat->multiply(rhs, out);
-        }
+    void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
+        wrapped_multiply(mat, rhs, work, out);
         double beta = rhs.dot(*center);
         for (auto& o : out) {
             o -= beta;
@@ -81,19 +239,14 @@ struct Centered {
      * @tparam Right An `Eigen::VectorXd` or equivalent expression.
      *
      * @param[in] rhs The right-hand side of the matrix product.
-     * This should be a vector or have only one column.
+     * @param work The return value of `adjoint_workspace()`.
+     * This can be reused across multiple `adjoint_multiply()` calls.
      * @param[out] out The output vector to store the matrix product.
-     *
-     * @return `out` is filled with the product of the transpose of this matrix and `rhs`.
+     * This is filled with the product of the transpose of this matrix and `rhs`.
      */
     template<class Right>
-    void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
-        if constexpr(has_adjoint_multiply_method<Matrix>::value) {
-            out.noalias() = mat->adjoint() * rhs;
-        } else {
-            mat->adjoint_multiply(rhs, out);
-        }
+    void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
+        wrapped_adjoint_multiply(mat, rhs, work, out);
         double beta = rhs.sum();
         out -= beta * (*center);
         return;
@@ -104,23 +257,13 @@ struct Centered {
      * where the centering has been explicitly applied.
      */
     Eigen::MatrixXd realize() const {
-        auto subtractor = [&](Eigen::MatrixXd& m) -> void {
-            for (Eigen::Index c = 0; c < m.cols(); ++c) {
-                for (Eigen::Index r = 0; r < m.rows(); ++r) {
-                    m(r, c) -= (*center)[c];
-                }
+        Eigen::MatrixXd output = wrapped_realize(mat);
+        for (Eigen::Index c = 0; c < output.cols(); ++c) {
+            for (Eigen::Index r = 0; r < output.rows(); ++r) {
+                output(r, c) -= (*center)[c];
             }
-        };
-        if constexpr(has_realize_method<Matrix>::value) {
-            Eigen::MatrixXd output = mat->realize();
-            subtractor(output);
-            return output;
-        } else {
-            Eigen::MatrixXd output(*mat);
-            subtractor(output);
-            return output;
         }
+        return output;
     }
 private:
@@ -155,22 +298,61 @@ struct Scaled {
      */
     auto cols() const { return mat->cols(); }
+public:
+    /**
+     * @brief Workspace type for `multiply()`.
+     */
+    struct Workspace {
+        /**
+         * @cond
+         */
+        Workspace(size_t n, WrappedWorkspace<Matrix> c) : product(n), child(std::move(c)) {}
+        Eigen::VectorXd product;
+        WrappedWorkspace<Matrix> child;
+        /**
+         * @endcond
+         */
+    };
+    /**
+     * @return Workspace for use in `multiply()`.
+     */
+    Workspace workspace() const {
+        return Workspace(mat->cols(), wrapped_workspace(mat));
+    }
+    /**
+     * Workspace type for `adjoint_multiply()`.
+     * Currently, this is just an alias for the adjoint workspace type of the underlying matrix.
+     */
+    typedef WrappedAdjointWorkspace<Matrix> AdjointWorkspace;
+    /**
+     * @return Workspace for use in `adjoint_multiply()`.
+     */
+    AdjointWorkspace adjoint_workspace() const {
+        return wrapped_adjoint_workspace(mat);
+    }
+public:
     /**
      * @tparam Right An `Eigen::VectorXd` or equivalent expression.
      *
      * @param[in] rhs The right-hand side of the matrix product.
      * This should be a vector or have only one column.
+     * @param work The return value of `workspace()`.
+     * This can be reused across multiple `multiply()` calls.
      * @param[out] out The output vector to store the matrix product.
-     *
-     * @return `out` is filled with the product of this matrix and `rhs`.
+     * This is filled with the product of this matrix and `rhs`.
      */
     template<class Right>
-    void multiply(const Right& rhs, Eigen::VectorXd& out) const {
-        if constexpr(has_multiply_method<Matrix>::value) {
-            out.noalias() = *mat * rhs.cwiseQuotient(*scale);
-        } else {
-            mat->multiply(rhs.cwiseQuotient(*scale), out);
-        }
+    void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& out) const {
+        // We store the result here, because the underlying matrix's multiply()
+        // might need to access rhs/scale multiple times, especially if it's
+        // parallelized. Better to pay the cost of accessing a separate memory
+        // space than computing the quotient repeatedly.
+        work.product = rhs.cwiseQuotient(*scale);
+        wrapped_multiply(mat, work.product, work.child, out);
         return;
     }
@@ -179,17 +361,14 @@ struct Scaled {
      *
      * @param[in] rhs The right-hand side of the matrix product.
      * This should be a vector or have only one column.
+     * @param work The return value of `adjoint_workspace()`.
+     * This can be reused across multiple `adjoint_multiply()` calls.
      * @param[out] out The output vector to store the matrix product.
-     *
-     * @return `out` is filled with the product of the transpose of this matrix and `rhs`.
+     * This is filled with the product of the transpose of this matrix and `rhs`.
      */
     template<class Right>
-    void adjoint_multiply(const Right& rhs, Eigen::VectorXd& out) const {
-        if constexpr(has_adjoint_multiply_method<Matrix>::value) {
-            out.noalias() = mat->adjoint() * rhs;
-        } else {
-            mat->adjoint_multiply(rhs, out);
-        }
+    void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& out) const {
+        wrapped_adjoint_multiply(mat, rhs, work, out);
         out.noalias() = out.cwiseQuotient(*scale);
         return;
     }
@@ -199,23 +378,13 @@ struct Scaled {
      * where the scaling has been explicitly applied.
      */
     Eigen::MatrixXd realize() const {
-        auto scaler = [&](Eigen::MatrixXd& m) -> void {
-            for (Eigen::Index c = 0; c < m.cols(); ++c) {
-                for (Eigen::Index r = 0; r < m.rows(); ++r) {
-                    m(r, c) /= (*scale)[c];
-                }
+        Eigen::MatrixXd output = wrapped_realize(mat);
+        for (Eigen::Index c = 0; c < output.cols(); ++c) {
+            for (Eigen::Index r = 0; r < output.rows(); ++r) {
+                output(r, c) /= (*scale)[c];
             }
-        };
-        if constexpr(has_realize_method<Matrix>::value) {
-            Eigen::MatrixXd output = mat->realize();
-            scaler(output);
-            return output;
-        } else {
-            Eigen::MatrixXd output(*mat);
-            scaler(output);
-            return output;
         }
+        return output;
     }
 private:

data/vendor/kmeans/Details.hpp CHANGED Viewed

@@ -23,7 +23,7 @@ struct Details {
     /**
      * @cond
      */
-    Details() {}
+    Details() : iterations(0), status(0) {}
     Details(int it, int st) : sizes(0), withinss(0), iterations(it), status(st) {}

data/vendor/kmeans/HartiganWong.hpp CHANGED Viewed

@@ -106,13 +106,19 @@ public:
      */
     struct Defaults {
         /**
-         * See `HartiganWong::set_max_iterations()`.
+         * See `set_max_iterations()` for more details.
          */
         static constexpr int max_iterations = 10;
+        /**
+         * See `set_num_threads()` for more details.
+         */
+        static constexpr int num_threads = 1;
     };
 private:
     int maxiter = Defaults::max_iterations;
+    int nthreads = Defaults::num_threads;
 public:
     /**
@@ -126,6 +132,16 @@ public:
         return *this;
     }
+    /**
+     * @param n Number of threads to use.
+     *
+     * @return A reference to this `HartiganWong` object.
+     */
+    HartiganWong& set_num_threads(int n = Defaults::num_threads) {
+        nthreads = n;
+        return *this;
+    }
 public:
     Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
         num_dim = ndim;
@@ -160,8 +176,13 @@ public:
         /* For each point I, find its two closest centres, IC1(I) and
          * IC2(I). Assign it to IC1(I).
          */
-        #pragma omp parallel for
+#ifndef KMEANS_CUSTOM_PARALLEL
+        #pragma omp parallel for num_threads(nthreads)
         for (INDEX_t obs = 0; obs < num_obs; ++obs) {
+#else
+        KMEANS_CUSTOM_PARALLEL(num_obs, [&](INDEX_t first, INDEX_t last) -> void {
+        for (INDEX_t obs = first; obs < last; ++obs) {
+#endif
             auto& best = ic1[obs];
             best = 0;
             DATA_t best_dist = squared_distance_from_cluster(obs, best);
@@ -186,7 +207,12 @@ public:
                     }
                 }
             }
+#ifndef KMEANS_CUSTOM_PARALLEL
+        }
+#else
         }
+        }, nthreads);
+#endif
         /* Update cluster centres to be the average of points contained
          * within them.

data/vendor/kmeans/InitializeKmeansPP.hpp CHANGED Viewed

@@ -44,6 +44,11 @@ public:
          * See `set_seed()` for more details.
          */
         static constexpr uint64_t seed = 6523u;
+        /**
+         * See `set_num_threads()` for more details.
+         */
+        static constexpr int num_threads = 1;
     };
     /**
@@ -55,8 +60,20 @@ public:
         seed = s;
         return *this;
     }
+    /**
+     * @param n Number of threads to use.
+     *
+     * @return A reference to this `InitializeKmeansPP` object.
+     */
+    InitializeKmeansPP& set_num_threads(int n = Defaults::num_threads) {
+        nthreads = n;
+        return *this;
+    }
 private:
     uint64_t seed = Defaults::seed;
+    int nthreads = Defaults::num_threads;
 public:
     /**
@@ -74,8 +91,13 @@ public:
             if (!sofar.empty()) {
                 auto last = sofar.back();
-                #pragma omp parallel for
+#ifndef KMEANS_CUSTOM_PARALLEL
+                #pragma omp parallel for num_threads(nthreads)
                 for (INDEX_t obs = 0; obs < nobs; ++obs) {
+#else
+                KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t end) -> void {
+                for (INDEX_t obs = first; obs < end; ++obs) {
+#endif
                     if (mindist[obs]) {
                         const DATA_t* acopy = data + obs * ndim;
                         const DATA_t* scopy = data + last * ndim;
@@ -88,7 +110,13 @@ public:
                             mindist[obs] = r2;
                         }
                     }
+#ifndef KMEANS_CUSTOM_PARALLEL
                 }
+#else
+                }
+                }, nthreads);
+#endif
             } else {
                 counter = nobs;
             }

data/vendor/kmeans/Kmeans.hpp CHANGED Viewed

@@ -40,10 +40,16 @@ public:
          * See `set_seed()` for more details.
          */
         static constexpr uint64_t seed = 5489u;
+        /**
+         * See `set_num_threads()` for more details.
+         */
+        static constexpr int num_threads = 1;
     };
 private:
     uint64_t seed = Defaults::seed;
+    int nthreads = Defaults::num_threads;
 public:
     /**
@@ -52,14 +58,27 @@ public:
      *
      * @return A reference to this `Kmeans` object.
      *
-     * This seed is only used for the default `refiner` and `initializer` instances in `run()`.
-     * Otherwise, the seed from individual instances is respected.
+     * This seed is only used for the default `initializer` instance in `run()`.
+     * Otherwise, if an `initializer` is explicitly passed to `run()`, its seed is respected.
      */
     Kmeans& set_seed(uint64_t s = 5489u) {
         seed = s;
         return *this;
     }
+    /**
+     * @param n Number of threads to use.
+     *
+     * @return A reference to this `Kmeans` object.
+     *
+     * This setting is only used for the default `refiner` and `initializer` instances in `run()`.
+     * Otherwise, if an `initializer` or `refiner` is explicitly passed to `run()`, the number of threads specified in the instance is respected.
+     */
+    Kmeans& set_num_threads(int n = Defaults::num_threads) {
+        nthreads = n;
+        return *this;
+    }
 public:
     /**
      * @param ndim Number of dimensions.
@@ -88,6 +107,7 @@ public:
     {
         if (initializer == NULL) {
             InitializeKmeansPP<DATA_t, CLUSTER_t, INDEX_t> init;
+            init.set_seed(seed).set_num_threads(nthreads);
             ncenters = init.run(ndim, nobs, data, ncenters, centers, clusters);
         } else {
             ncenters = initializer->run(ndim, nobs, data, ncenters, centers, clusters);
@@ -95,6 +115,7 @@ public:
         if (refiner == NULL) {
             HartiganWong<DATA_t, CLUSTER_t, INDEX_t> hw;
+            hw.set_num_threads(nthreads);
             return hw.run(ndim, nobs, data, ncenters, centers, clusters);
         } else {
             return refiner->run(ndim, nobs, data, ncenters, centers, clusters);
@@ -110,6 +131,8 @@ public:
          * @cond
          */
         Results(int ndim, INDEX_t nobs, CLUSTER_t ncenters) : centers(ndim * ncenters), clusters(nobs) {}
+        Results() {}
         /**
          * @endcond
          */

data/vendor/kmeans/Lloyd.hpp CHANGED Viewed

@@ -48,13 +48,19 @@ public:
      */
     struct Defaults {
         /**
-         * See `Lloyd::set_max_iterations()`.
+         * See `set_max_iterations()` for more details.
          */
         static constexpr int max_iterations = 10;
+        /**
+         * See `set_num_threads()` for more details.
+         */
+        static constexpr int num_threads = 1;
     };
 private:
     int maxiter = Defaults::max_iterations;
+    int nthreads = Defaults::num_threads;
 public:
     /**
@@ -68,6 +74,16 @@ public:
         return *this;
     }
+    /**
+     * @param n Number of threads to use.
+     *
+     * @return A reference to this `HartiganWong` object.
+     */
+    Lloyd& set_num_threads(int n = Defaults::num_threads) {
+        nthreads = n;
+        return *this;
+    }
 public:
     Details<DATA_t, INDEX_t> run(int ndim, INDEX_t nobs, const DATA_t* data, CLUSTER_t ncenters, DATA_t* centers, CLUSTER_t* clusters) {
         if (is_edge_case(nobs, ncenters)) {
@@ -83,10 +99,21 @@ public:
             // Note that we move the `updated` check outside of this loop
             // so that, in the future, this is more easily parallelized.
             QuickSearch<DATA_t, CLUSTER_t> index(ndim, ncenters, centers);
-            #pragma omp parallel for
+#ifndef KMEANS_CUSTOM_PARALLEL
+            #pragma omp parallel for num_threads(nthreads)
             for (INDEX_t obs = 0; obs < nobs; ++obs) {
+#else
+            KMEANS_CUSTOM_PARALLEL(nobs, [&](INDEX_t first, INDEX_t last) -> void {
+            for (INDEX_t obs = first; obs < last; ++obs) {
+#endif
                 copy[obs] = index.find(data + obs * ndim);
+#ifndef KMEANS_CUSTOM_PARALLEL
+            }
+#else
             }
+            }, nthreads);
+#endif
             bool updated = false;
             for (INDEX_t obs = 0; obs < nobs; ++obs) {