PyPI - mlx-cluster - Versions diffs - 0.0.4__tar.gz → 0.0.5__tar.gz - Mend

mlx-cluster 0.0.4tar.gz → 0.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{mlx_cluster-0.0.4/mlx_cluster.egg-info → mlx_cluster-0.0.5}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: mlx_cluster
-Version: 0.0.4
-Summary: C++ and Metal extensions for MLX CTC Loss
+Version: 0.0.5
+Summary: C++ extension for generating random graphs
 Author-email: Vinay Pandya <vinayharshadpandya27@gmail.com>
 Project-URL: Homepage, https://github.com/vinayhpandya/mlx_cluster
 Project-URL: Issues, https://github.com/vinayhpandya/mlx_cluster/Issues
@@ -9,17 +9,22 @@ Classifier: Development Status :: 3 - Alpha
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: C++
 Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
+Classifier: Operating System :: MacOS
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Provides-Extra: dev
 Provides-Extra: test
-Requires-Dist: mlx_graphs==0.0.7; extra == "test"
-Requires-Dist: torch==2.2.0; extra == "test"
-Requires-Dist: mlx>=0.17.0; extra == "test"
+Requires-Dist: mlx-graphs>=0.0.8; extra == "test"
+Requires-Dist: torch>=2.2.0; extra == "test"
+Requires-Dist: mlx>=0.26.0; extra == "test"
 Requires-Dist: pytest==7.4.4; extra == "test"
-Requires-Dist: scipy==1.12.0; extra == "test"
+Requires-Dist: scipy>=1.13.0; extra == "test"
+Requires-Dist: requests==2.31.0; extra == "test"
+Requires-Dist: fsspec[http]==2024.2.0; extra == "test"
+Requires-Dist: tqdm==4.66.1; extra == "test"
+Dynamic: license-file
+Dynamic: requires-python
 # mlx_cluster
@@ -52,24 +57,37 @@ for testing purposes you need to have `mlx-graphs`  and `torch_geometric` instal
 ```
-from mlx_graphs.utils.sorting import sort_edge_index
-from mlx_graphs.loaders import Dataloader
-from mlx_graphs_extension import random_walk
+# Can also use mlx for generating starting indices
+import torch
+from torch.utils.data import DataLoader
+loader = DataLoader(range(2708), batch_size=2000)
+start_indices = next(iter(loader))
+from mlx_graphs.datasets import PlanetoidDataset
+from mlx_graphs.utils.sorting import sort_edge_index
+from torch.utils.data import DataLoader
+from mlx_cluster import random_walk
 cora_dataset = PlanetoidDataset(name="cora", base_dir="~")
-start = mx.arange(0, 1000)
-start_time = time.time()
+# For some reason int_64t and int_32t are not compatible
 edge_index = cora_dataset.graphs[0].edge_index.astype(mx.int64)
-num_nodes = cora_dataset.graphs[0].num_nodes
+# Convert edge index into a CSR matrix
 sorted_edge_index = sort_edge_index(edge_index=edge_index)
 row_mlx = sorted_edge_index[0][0]
 col_mlx = sorted_edge_index[0][1]
-unique_vals, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
+_, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
 cum_sum_mlx = counts_mlx.cumsum()
-rand = mx.random.uniform(shape=[start.shape[0], 100])
 row_ptr_mlx = mx.concatenate([mx.array([0]), mx.array(cum_sum_mlx)])
-random_walk(row_ptr_mlx, col_mlx, start, rand, 1000, stream = mx.gpu)
+start_indices = mx.array(start_indices.numpy())
+rand_data = mx.random.uniform(shape=[start_indices.shape[0], 5])
+node_sequence = random_walk(
+    row_ptr_mlx, col_mlx, start_indices, rand_data, 5, stream=mx.cpu
+)
 ```
 ## TODO

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/README.md RENAMED Viewed

@@ -29,24 +29,37 @@ for testing purposes you need to have `mlx-graphs`  and `torch_geometric` instal
 ```
-from mlx_graphs.utils.sorting import sort_edge_index
-from mlx_graphs.loaders import Dataloader
-from mlx_graphs_extension import random_walk
+# Can also use mlx for generating starting indices
+import torch
+from torch.utils.data import DataLoader
+loader = DataLoader(range(2708), batch_size=2000)
+start_indices = next(iter(loader))
+from mlx_graphs.datasets import PlanetoidDataset
+from mlx_graphs.utils.sorting import sort_edge_index
+from torch.utils.data import DataLoader
+from mlx_cluster import random_walk
 cora_dataset = PlanetoidDataset(name="cora", base_dir="~")
-start = mx.arange(0, 1000)
-start_time = time.time()
+# For some reason int_64t and int_32t are not compatible
 edge_index = cora_dataset.graphs[0].edge_index.astype(mx.int64)
-num_nodes = cora_dataset.graphs[0].num_nodes
+# Convert edge index into a CSR matrix
 sorted_edge_index = sort_edge_index(edge_index=edge_index)
 row_mlx = sorted_edge_index[0][0]
 col_mlx = sorted_edge_index[0][1]
-unique_vals, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
+_, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
 cum_sum_mlx = counts_mlx.cumsum()
-rand = mx.random.uniform(shape=[start.shape[0], 100])
 row_ptr_mlx = mx.concatenate([mx.array([0]), mx.array(cum_sum_mlx)])
-random_walk(row_ptr_mlx, col_mlx, start, rand, 1000, stream = mx.gpu)
+start_indices = mx.array(start_indices.numpy())
+rand_data = mx.random.uniform(shape=[start_indices.shape[0], 5])
+node_sequence = random_walk(
+    row_ptr_mlx, col_mlx, start_indices, rand_data, 5, stream=mx.cpu
+)
 ```
 ## TODO

mlx_cluster-0.0.5/bindings.cpp ADDED Viewed

@@ -0,0 +1,81 @@
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/variant.h>
+#include <random_walks/RandomWalk.h>
+#include <random_walks/BiasedRandomWalk.h>
+namespace nb = nanobind;
+using namespace nb::literals;
+using namespace mlx::core;
+NB_MODULE(_ext, m){
+    m.def(
+      "random_walk",
+      [](const mx::array& rowptr,
+        const mx::array& col,
+        const mx::array& start,
+        const mx::array& rand,
+        int  walk_length,
+        nb::object stream = nb::none()) {
+          // call the real C++ implementation
+          auto outs = mlx_random_walk::random_walk(
+              rowptr, col, start, rand, walk_length,
+              stream.is_none() ? mx::StreamOrDevice{}
+                              : nb::cast<mx::StreamOrDevice>(stream));
+          // vector -> tuple (move avoids a copy)
+          return nb::make_tuple(std::move(outs[0]), std::move(outs[1]));
+      },
+      "rowptr"_a, "col"_a, "start"_a, "rand"_a, "walk_length"_a,
+      nb::kw_only(), "stream"_a = nb::none(),
+      R"(
+          Uniform random walks.
+          Returns:
+              (nodes, edges) tuple of arrays
+      )",
+      nb::rv_policy::move);
+      m.def(
+        "rejection_sampling",
+        [](const mx::array& rowptr,
+          const mx::array& col,
+          const mx::array& start,
+          int walk_length,
+          float p,
+          float q,
+          nb::object stream = nb::none()
+        ){
+          auto outs = mlx_biased_random_walk::rejection_sampling(
+            rowptr, col, start, walk_length, p, q,
+            stream.is_none() ? mx::StreamOrDevice{}
+                              : nb::cast<mx::StreamOrDevice>(stream));
+          return nb::make_tuple(std::move(outs[0]), std::move(outs[1]));
+        },
+        "rowptr"_a,
+        "col"_a,
+        "start"_a,
+        "walk_length"_a,
+        "p"_a,
+        "q"_a,
+        nb::kw_only(), "stream"_a = nb::none(),
+      R"(
+        Sample nodes from the graph by sampling neighbors based
+        on probablity p and q
+        Args:
+            rowptr (array): rowptr of graph in csr format.
+            col (array): edges in csr format.
+            start (array): starting node of graph from which
+                            biased sampling will be performed.
+            walk_length (int) : walk length of random graph
+            p : Likelihood of immediately revisiting a node in the walk.
+            q : Control parameter to interpolate between
+                breadth-first strategy and depth-first strategy
+        Returns:
+            (nodes, edges) tuple of arrays
+      )",
+      nb::rv_policy::move);
+}

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/mlx_cluster/mlx_cluster.metallib RENAMED Viewed

Binary file

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5/mlx_cluster.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: mlx_cluster
-Version: 0.0.4
-Summary: C++ and Metal extensions for MLX CTC Loss
+Version: 0.0.5
+Summary: C++ extension for generating random graphs
 Author-email: Vinay Pandya <vinayharshadpandya27@gmail.com>
 Project-URL: Homepage, https://github.com/vinayhpandya/mlx_cluster
 Project-URL: Issues, https://github.com/vinayhpandya/mlx_cluster/Issues
@@ -9,17 +9,22 @@ Classifier: Development Status :: 3 - Alpha
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: C++
 Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
+Classifier: Operating System :: MacOS
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Provides-Extra: dev
 Provides-Extra: test
-Requires-Dist: mlx_graphs==0.0.7; extra == "test"
-Requires-Dist: torch==2.2.0; extra == "test"
-Requires-Dist: mlx>=0.17.0; extra == "test"
+Requires-Dist: mlx-graphs>=0.0.8; extra == "test"
+Requires-Dist: torch>=2.2.0; extra == "test"
+Requires-Dist: mlx>=0.26.0; extra == "test"
 Requires-Dist: pytest==7.4.4; extra == "test"
-Requires-Dist: scipy==1.12.0; extra == "test"
+Requires-Dist: scipy>=1.13.0; extra == "test"
+Requires-Dist: requests==2.31.0; extra == "test"
+Requires-Dist: fsspec[http]==2024.2.0; extra == "test"
+Requires-Dist: tqdm==4.66.1; extra == "test"
+Dynamic: license-file
+Dynamic: requires-python
 # mlx_cluster
@@ -52,24 +57,37 @@ for testing purposes you need to have `mlx-graphs`  and `torch_geometric` instal
 ```
-from mlx_graphs.utils.sorting import sort_edge_index
-from mlx_graphs.loaders import Dataloader
-from mlx_graphs_extension import random_walk
+# Can also use mlx for generating starting indices
+import torch
+from torch.utils.data import DataLoader
+loader = DataLoader(range(2708), batch_size=2000)
+start_indices = next(iter(loader))
+from mlx_graphs.datasets import PlanetoidDataset
+from mlx_graphs.utils.sorting import sort_edge_index
+from torch.utils.data import DataLoader
+from mlx_cluster import random_walk
 cora_dataset = PlanetoidDataset(name="cora", base_dir="~")
-start = mx.arange(0, 1000)
-start_time = time.time()
+# For some reason int_64t and int_32t are not compatible
 edge_index = cora_dataset.graphs[0].edge_index.astype(mx.int64)
-num_nodes = cora_dataset.graphs[0].num_nodes
+# Convert edge index into a CSR matrix
 sorted_edge_index = sort_edge_index(edge_index=edge_index)
 row_mlx = sorted_edge_index[0][0]
 col_mlx = sorted_edge_index[0][1]
-unique_vals, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
+_, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
 cum_sum_mlx = counts_mlx.cumsum()
-rand = mx.random.uniform(shape=[start.shape[0], 100])
 row_ptr_mlx = mx.concatenate([mx.array([0]), mx.array(cum_sum_mlx)])
-random_walk(row_ptr_mlx, col_mlx, start, rand, 1000, stream = mx.gpu)
+start_indices = mx.array(start_indices.numpy())
+rand_data = mx.random.uniform(shape=[start_indices.shape[0], 5])
+node_sequence = random_walk(
+    row_ptr_mlx, col_mlx, start_indices, rand_data, 5, stream=mx.cpu
+)
 ```
 ## TODO

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/mlx_cluster.egg-info/SOURCES.txt RENAMED Viewed

@@ -6,9 +6,6 @@ bindings.cpp
 pyproject.toml
 setup.py
 mlx_cluster/__init__.py
-mlx_cluster/_ext.cpython-311-darwin.so
-mlx_cluster/libmlx.dylib
-mlx_cluster/libmlx_cluster.dylib
 mlx_cluster/mlx_cluster.metallib
 mlx_cluster.egg-info/PKG-INFO
 mlx_cluster.egg-info/SOURCES.txt

mlx_cluster-0.0.5/mlx_cluster.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,12 @@
+[dev]
+[test]
+mlx-graphs>=0.0.8
+torch>=2.2.0
+mlx>=0.26.0
+pytest==7.4.4
+scipy>=1.13.0
+requests==2.31.0
+fsspec[http]==2024.2.0
+tqdm==4.66.1

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/pyproject.toml RENAMED Viewed

@@ -1,41 +1,48 @@
 [project]
 name = "mlx_cluster"
-version = "0.0.4"
+version = "0.0.5"
 authors = [
   { name = "Vinay Pandya", email = "vinayharshadpandya27@gmail.com" },
 ]
-description = "C++ and Metal extensions for MLX CTC Loss"
+description = "C++ extension for generating random graphs"
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.10"
 classifiers = [
   "Development Status :: 3 - Alpha",
   "Programming Language :: Python :: 3",
   "Programming Language :: C++",
   "License :: OSI Approved :: MIT License",
-  "Operating System :: OS Independent",
+  "Operating System :: MacOS",
 ]
 [project.optional-dependencies]
 dev = []
 test = [
-  "mlx_graphs==0.0.7",
-  "torch==2.2.0",
-  "mlx>=0.17.0",
+  "mlx-graphs>=0.0.8",
+  "torch>=2.2.0",
+  "mlx>=0.26.0",
   "pytest==7.4.4",
-  "scipy==1.12.0",
+  "scipy>=1.13.0",
+  "requests==2.31.0",
+  "fsspec[http]==2024.2.0",
+  "tqdm==4.66.1",
 ]
 [project.urls]
 Homepage = "https://github.com/vinayhpandya/mlx_cluster"
 Issues = "https://github.com/vinayhpandya/mlx_cluster/Issues"
+[tool.pytest.ini_options]
+addopts = "-ra"
+markers = [
+    "slow: marks tests that download data, compile kernels, or are otherwise time-consuming (deselect with -m 'not slow')",
+]
 [build-system]
 requires = [
+  "nanobind==2.4.0",
   "setuptools>=42",
   "cmake>=3.24",
-  "mlx==0.18.0",
-  "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
+  "mlx>=0.26.0",
 ]
-build-backend = "setuptools.build_meta"
+build-backend = "setuptools.build_meta"

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/random_walks/BiasedRandomWalk.cpp RENAMED Viewed

@@ -15,7 +15,8 @@
 #endif
 #include "random_walks/BiasedRandomWalk.h"
-namespace mlx::core {
+namespace mlx_biased_random_walk {
     bool inline is_neighbor(const int64_t *rowptr, const int64_t *col, int64_t v,
                         int64_t w) {
@@ -27,21 +28,20 @@ namespace mlx::core {
         return false;
     }
-    void BiasedRandomWalk::eval_cpu(const std::vector<array>& inputs, std::vector<array>& outputs) {
+    void BiasedRandomWalk::eval_cpu(const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs) {
         auto& rowptr = inputs[0];
         auto& col = inputs[1];
         auto& start = inputs[2];
         auto& rand = inputs[3];
         int numel = start.size();
+        std::cout<<"Inside biased random walk"<<std::endl;
         // Initialize outputs
         assert(outputs.size() == 2);
         // Allocate memory for outputs if not already allocated
-        outputs[0].set_data(allocator::malloc_or_wait(numel*(walk_length_+1)*sizeof(int64_t)));
-        outputs[1].set_data(allocator::malloc_or_wait(numel*walk_length_*sizeof(int64_t)));
+        outputs[0].set_data(mx::allocator::malloc(numel*(walk_length_+1)*sizeof(int64_t)));
+        outputs[1].set_data(mx::allocator::malloc(numel*walk_length_*sizeof(int64_t)));
         auto& n_out = outputs[0];
         auto& e_out = outputs[1];
         auto* n_out_ptr = n_out.data<int64_t>();
         auto* e_out_ptr = e_out.data<int64_t>();
         auto* start_values = start.data<int64_t>();
@@ -53,7 +53,7 @@ namespace mlx::core {
         double prob_0 = 1. / p_ / max_prob;
         double prob_1 = 1. / max_prob;
         double prob_2 = 1. / q_ / max_prob;
         for (int64_t n = 0; n < numel; n++) {
             int64_t t = start_values[n], v, x, e_cur, row_start, row_end;
             n_out_ptr[n * (walk_length_ + 1)] = t;
@@ -91,7 +91,6 @@ namespace mlx::core {
                     break;
                     }
                 }
                 n_out_ptr[n * (walk_length_ + 1) + (l + 1)] = x;
                 e_out_ptr[n * walk_length_ + l] = e_cur;
                 t = v;
@@ -101,9 +100,9 @@ namespace mlx::core {
     };
-    std::vector<array> BiasedRandomWalk::jvp(
-    const std::vector<array>& primals,
-    const std::vector<array>& tangents,
+    std::vector<mx::array> BiasedRandomWalk::jvp(
+    const std::vector<mx::array>& primals,
+    const std::vector<mx::array>& tangents,
     const std::vector<int>& argnums)
 {
     // Random walk is not differentiable, so we return zero tangents
@@ -121,8 +120,8 @@ namespace mlx::core {
 //         int numel = start.size();
 //         assert(outputs.size() == 2);
-//         outputs[0].set_data(allocator::malloc_or_wait(numel * (walk_length_ + 1) * sizeof(int64_t)));
-//         outputs[1].set_data(allocator::malloc_or_wait(numel * walk_length_ * sizeof(int64_t)));
+//         outputs[0].set_data(allocator::malloc(numel * (walk_length_ + 1) * sizeof(int64_t)));
+//         outputs[1].set_data(allocator::malloc(numel * walk_length_ * sizeof(int64_t)));
 //         std::cout<<"after setting data"<<std::endl;
 //         auto& s = stream();
 //         auto& d = metal::device(s.device);
@@ -148,47 +147,42 @@ namespace mlx::core {
 //     }
 // #endif
 void BiasedRandomWalk::eval_gpu(
-       const std::vector<array>& inputs, std::vector<array>& outputs
+       const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs
   )
 {
     throw std::runtime_error("Random walk has no GPU implementation.");
 }
-std::vector<array> BiasedRandomWalk::vjp(
-    const std::vector<array>& primals,
-    const std::vector<array>& cotangents,
+std::vector<mx::array> BiasedRandomWalk::vjp(
+    const std::vector<mx::array>& primals,
+    const std::vector<mx::array>& cotangents,
     const std::vector<int>& argnums,
-    const std::vector<array>& outputs)
+    const std::vector<mx::array>& outputs)
 {
     // Random walk is not differentiable, so we return zero gradients
     throw std::runtime_error("Random walk has no JVP implementation.");
 }
-std::pair<std::vector<array>, std::vector<int>> BiasedRandomWalk::vmap(
-    const std::vector<array>& inputs,
+std::pair<std::vector<mx::array>, std::vector<int>> BiasedRandomWalk::vmap(
+    const std::vector<mx::array>& inputs,
     const std::vector<int>& axes)
 {
     throw std::runtime_error("vmap not implemented for biasedRandomWalk");
 }
-bool BiasedRandomWalk::is_equivalent(const Primitive& other) const
-{
-    throw std::runtime_error("biased Random walk has no GPU implementation.");
-}
-std::vector<std::vector<int>> BiasedRandomWalk::output_shapes(const std::vector<array>& inputs)
+bool BiasedRandomWalk::is_equivalent(const mx::Primitive& other) const
 {
     throw std::runtime_error("biased Random walk has no GPU implementation.");
 }
-array rejection_sampling(const array& rowptr, const array& col, const array& start, int walk_length, const double p,
-       const double q, StreamOrDevice s)
+std::vector<mx::array> rejection_sampling(const mx::array& rowptr, const mx::array& col, const mx::array& start, int walk_length, const double p,
+       const double q, mx::StreamOrDevice s)
 {
     int nodes = start.size();
     auto primitive = std::make_shared<BiasedRandomWalk>(to_stream(s), walk_length, p, q);
-    return array::make_arrays({{nodes,walk_length+1},{nodes, walk_length}},
+    return mx::array::make_arrays({{nodes,walk_length+1},{nodes, walk_length}},
      {rowptr.dtype(), rowptr.dtype()},
      primitive,
      {rowptr, col, start}
-    )[0];
+    );
 }
 }

mlx_cluster-0.0.5/random_walks/BiasedRandomWalk.h ADDED Viewed

@@ -0,0 +1,65 @@
+#pragma once
+#include <mlx/array.h>
+#include <mlx/ops.h>
+#include <mlx/primitives.h>
+namespace mx = mlx::core;
+namespace mlx_biased_random_walk{
+    class BiasedRandomWalk : public mx::Primitive {
+        public:
+            BiasedRandomWalk(mx::Stream stream, int walk_length, double p, double q)
+            : mx::Primitive(stream), walk_length_(walk_length), p_(p), q_(q) {}
+            void eval_cpu(const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs)
+            override;
+            void eval_gpu(const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs)
+            override;
+            /** The Jacobian-vector product. */
+            std::vector<mx::array> jvp(
+                const std::vector<mx::array>& primals,
+                const std::vector<mx::array>& tangents,
+                const std::vector<int>& argnums) override;
+            /** The vector-Jacobian product. */
+            std::vector<mx::array> vjp(
+                const std::vector<mx::array>& primals,
+                const std::vector<mx::array>& cotangents,
+                const std::vector<int>& argnums,
+                const std::vector<mx::array>& outputs) override;
+            /**
+             * The primitive must know how to vectorize itself across
+             * the given axes. The output is a pair containing the array
+             * representing the vectorized computation and the axis which
+             * corresponds to the output vectorized dimension.
+             */
+            std::pair<std::vector<mx::array>, std::vector<int>> vmap(
+                const std::vector<mx::array>& inputs,
+                const std::vector<int>& axes) override;
+            /** Print the primitive. */
+            virtual const char* name() const override {
+                return "biased random walk implementation";
+            }
+            /** Equivalence check **/
+            bool is_equivalent(const mx::Primitive& other) const override;
+        private:
+            int walk_length_;
+            double p_;
+            double q_;
+    };
+    std::vector<mx::array> rejection_sampling(const mx::array& rowptr,
+     const mx::array& col,
+    const mx::array& start,
+       int walk_length,
+       const double p,
+       const double q,
+        mx::StreamOrDevice s = {});
+};

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/random_walks/RandomWalk.cpp RENAMED Viewed

@@ -1,7 +1,7 @@
 #include <cassert>
 #include <iostream>
 #include <sstream>
+#include <dlfcn.h>
 #include "mlx/backend/common/copy.h"
 #include "mlx/backend/common/utils.h"
 #include "mlx/utils.h"
@@ -14,19 +14,31 @@
 #endif
 #include "random_walks/RandomWalk.h"
-namespace mlx::core {
-    void RandomWalk::eval_cpu(const std::vector<array>& inputs, std::vector<array>& outputs) {
+namespace mlx_random_walk {
+    std::string current_binary_dir() {
+        static std::string binary_dir = []() {
+          Dl_info info;
+          if (!dladdr(reinterpret_cast<void*>(&current_binary_dir), &info)) {
+            throw std::runtime_error("Unable to get current binary dir.");
+          }
+          return std::filesystem::path(info.dli_fname).parent_path().string();
+        }();
+        return binary_dir;
+      }
+    void RandomWalk::eval_cpu(const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs) {
     auto& rowptr = inputs[0];
     auto& col = inputs[1];
     auto& start = inputs[2];
     auto& rand = inputs[3];
     int numel = start.size();
+    std::cout<<"Its really inside cpu"<<std::endl;
     // Initialize outputs
     assert(outputs.size() == 2);
     // Allocate memory for outputs if not already allocated
-    outputs[0].set_data(allocator::malloc_or_wait(numel*(walk_length_+1)*sizeof(int64_t)));
-    outputs[1].set_data(allocator::malloc_or_wait(numel*walk_length_*sizeof(int64_t)));
+    outputs[0].set_data(mx::allocator::malloc(numel*(walk_length_+1)*sizeof(int64_t)));
+    outputs[1].set_data(mx::allocator::malloc(numel*walk_length_*sizeof(int64_t)));
     auto& n_out = outputs[0];
     auto& e_out = outputs[1];
@@ -37,7 +49,6 @@ namespace mlx::core {
     auto* col_values = col.data<int64_t>();
     auto* rand_values = rand.data<float>();
-    std::cout<<"After evaluating outputs"<<std::endl;
     for (int64_t n = 0; n < numel; n++) {
         int64_t n_cur = start_values[n];
         n_out_ptr[n * (walk_length_ + 1)] = n_cur;
@@ -61,9 +72,9 @@ namespace mlx::core {
     };
-    std::vector<array> RandomWalk::jvp(
-    const std::vector<array>& primals,
-    const std::vector<array>& tangents,
+    std::vector<mx::array> RandomWalk::jvp(
+    const std::vector<mx::array>& primals,
+    const std::vector<mx::array>& tangents,
     const std::vector<int>& argnums)
 {
     // Random walk is not differentiable, so we return zero tangents
@@ -71,8 +82,8 @@ namespace mlx::core {
 }
 #ifdef _METAL_
 void RandomWalk::eval_gpu(
-        const std::vector<array>& inputs,
-        std::vector<array>& outputs
+        const std::vector<mx::array>& inputs,
+        std::vector<mx::array>& outputs
     ){
        auto& rowptr = inputs[0];
         auto& col = inputs[1];
@@ -81,17 +92,16 @@ void RandomWalk::eval_gpu(
         int numel = start.size();
         assert(outputs.size() == 2);
-        outputs[0].set_data(allocator::malloc_or_wait(numel * (walk_length_ + 1) * sizeof(int64_t)));
-        outputs[1].set_data(allocator::malloc_or_wait(numel * walk_length_ * sizeof(int64_t)));
-        std::cout<<"after setting data"<<std::endl;
+        outputs[0].set_data(mx::allocator::malloc(numel * (walk_length_ + 1) * sizeof(int64_t)));
+        outputs[1].set_data(mx::allocator::malloc(numel * walk_length_ * sizeof(int64_t)));
         auto& s = stream();
-        auto& d = metal::device(s.device);
-        d.register_library("mlx_cluster");
-        auto kernel = d.get_kernel("random_walk", "mlx_cluster");
+        auto& d = mx::metal::device(s.device);
+        std::cout<<"Its really inside gpu"<<std::endl;
+        auto lib = d.get_library("mlx_cluster", current_binary_dir());
+        auto kernel = d.get_kernel("random_walk", lib);
         auto& compute_encoder = d.get_command_encoder(s.index);
-        compute_encoder->setComputePipelineState(kernel);
+        compute_encoder.set_compute_pipeline_state(kernel);
         compute_encoder.set_input_array(rowptr, 0);
         compute_encoder.set_input_array(col, 1);
@@ -99,51 +109,46 @@ void RandomWalk::eval_gpu(
         compute_encoder.set_input_array(rand, 3);
         compute_encoder.set_output_array(outputs[0], 4);
         compute_encoder.set_output_array(outputs[1], 5);
-        compute_encoder->setBytes(&walk_length_, sizeof(int32), 6);
+        compute_encoder.set_bytes(&walk_length_, sizeof(walk_length_), 6);
         MTL::Size grid_size = MTL::Size(numel, 1, 1);
         MTL::Size thread_group_size = MTL::Size(kernel->maxTotalThreadsPerThreadgroup(), 1, 1);
-        compute_encoder.dispatchThreads(grid_size, thread_group_size);
+        compute_encoder.dispatch_threads(grid_size, thread_group_size);
     }
 #endif
-std::vector<array> RandomWalk::vjp(
-    const std::vector<array>& primals,
-    const std::vector<array>& cotangents,
+std::vector<mx::array> RandomWalk::vjp(
+    const std::vector<mx::array>& primals,
+    const std::vector<mx::array>& cotangents,
     const std::vector<int>& argnums,
-    const std::vector<array>& outputs)
+    const std::vector<mx::array>& outputs)
 {
     // Random walk is not differentiable, so we return zero gradients
     throw std::runtime_error("Random walk has no GPU implementation.");
 }
-std::pair<std::vector<array>, std::vector<int>> RandomWalk::vmap(
-    const std::vector<array>& inputs,
+std::pair<std::vector<mx::array>, std::vector<int>> RandomWalk::vmap(
+    const std::vector<mx::array>& inputs,
     const std::vector<int>& axes)
 {
     throw std::runtime_error("vmap not implemented for RandomWalk");
 }
-bool RandomWalk::is_equivalent(const Primitive& other) const
-{
-    throw std::runtime_error("Random walk has no GPU implementation.");
-}
-std::vector<std::vector<int>> RandomWalk::output_shapes(const std::vector<array>& inputs)
+bool RandomWalk::is_equivalent(const mx::Primitive& other) const
 {
     throw std::runtime_error("Random walk has no GPU implementation.");
 }
-array random_walk(const array& rowptr, const array& col, const array& start, const array& rand, int walk_length, StreamOrDevice s)
+std::vector<mx::array> random_walk(const mx::array& rowptr, const mx::array& col, const mx::array& start, const mx::array& rand, int walk_length, mx::StreamOrDevice s)
 {
     std::cout<<"Inside random walk"<<std::endl;
     int nodes = start.size();
     auto primitive = std::make_shared<RandomWalk>(walk_length, to_stream(s));
-    return array::make_arrays({{nodes,walk_length+1},{nodes, walk_length}},
+    return mx::array::make_arrays({{nodes,walk_length+1},{nodes, walk_length}},
      {start.dtype(), start.dtype()},
      primitive,
      {rowptr, col, start, rand}
-    )[0];
+    );
 }
 }

mlx_cluster-0.0.5/random_walks/RandomWalk.h ADDED Viewed

@@ -0,0 +1,62 @@
+#pragma once
+#include <mlx/array.h>
+#include <mlx/ops.h>
+#include <mlx/primitives.h>
+namespace mx = mlx::core;
+namespace mlx_random_walk{
+    class RandomWalk : public mx::Primitive {
+        public:
+            explicit RandomWalk(int walk_length, mx::Stream stream):
+                mx::Primitive(stream), walk_length_(walk_length) {};
+            void eval_cpu(const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs)
+            override;
+            void eval_gpu(const std::vector<mx::array>& inputs, std::vector<mx::array>& outputs)
+            override;
+            /** The Jacobian-vector product. */
+            std::vector<mx::array> jvp(
+                const std::vector<mx::array>& primals,
+                const std::vector<mx::array>& tangents,
+                const std::vector<int>& argnums) override;
+            /** The vector-Jacobian product. */
+            std::vector<mx::array> vjp(
+                const std::vector<mx::array>& primals,
+                const std::vector<mx::array>& cotangents,
+                const std::vector<int>& argnums,
+                const std::vector<mx::array>& outputs) override;
+            /**
+             * The primitive must know how to vectorize itself across
+             * the given axes. The output is a pair containing the array
+             * representing the vectorized computation and the axis which
+             * corresponds to the output vectorized dimension.
+             */
+            std::pair<std::vector<mx::array>, std::vector<int>> vmap(
+                const std::vector<mx::array>& inputs,
+                const std::vector<int>& axes) override;
+            /** Print the primitive. */
+            virtual const char* name() const override {
+                return "Random walk implementation";
+            }
+            /** Equivalence check **/
+            bool is_equivalent(const mx::Primitive& other) const override;
+        private:
+            int walk_length_;
+    };
+    std::vector<mx::array> random_walk(const mx::array& rowptr,
+     const mx::array& col,
+    const mx::array& start,
+    const mx::array& rand,
+       int walk_length,
+        mx::StreamOrDevice s = {});
+};

{mlx_cluster-0.0.4 → mlx_cluster-0.0.5}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ from mlx import extension
 if __name__ == "__main__":
     setup(
         name="mlx_cluster",
-        version="0.0.4",
+        version="0.0.5",
         description="Sample C++ and Metal extensions for MLX primitives.",
         ext_modules=[extension.CMakeExtension("mlx_cluster._ext")],
         cmdclass={"build_ext": extension.CMakeBuild},

mlx_cluster-0.0.5/tests/test_random_walk.py ADDED Viewed

@@ -0,0 +1,72 @@
+import mlx.core as mx
+import numpy as np
+import time
+# Torch dataset
+import torch
+from torch.utils.data import DataLoader
+loader = DataLoader(range(2708), batch_size=2000)
+start_indices = next(iter(loader))
+from mlx_graphs.datasets import PlanetoidDataset
+from mlx_graphs.utils.sorting import sort_edge_index
+from torch.utils.data import DataLoader
+from mlx_cluster import random_walk
+import pytest
+import time
+@pytest.mark.slow  # give download/compile plenty of time on CI
+def test_random_walk(tmp_path):
+    """
+    Runs 1 000 random walks of length 10 on the Cora graph and checks:
+    1. output tensor shape == (num_start_nodes, walk_length + 1)
+    2. all returned node indices are valid ( < num_nodes )
+    """
+    # ---------- Dataset (downloaded to the temp dir) ----------
+    data_dir = tmp_path / "mlx_datasets"
+    cora = PlanetoidDataset(name="cora", base_dir=data_dir)
+    edge_index = cora.graphs[0].edge_index.astype(mx.int64)
+    # CSR conversion
+    sorted_edge_index = sort_edge_index(edge_index=edge_index)
+    row = sorted_edge_index[0][0]
+    col = sorted_edge_index[0][1]
+    _, counts = np.unique(np.array(row, copy=False), return_counts=True)
+    row_ptr = mx.concatenate([mx.array([0]), mx.array(counts.cumsum())])
+    # pick 1 000 random start nodes
+    num_starts = 1_000
+    rng = np.random.default_rng(42)
+    start_idx = mx.array(rng.integers(low=0, high=row.max().item() + 1,
+                                      size=num_starts, dtype=np.int64))
+    # random numbers for the kernel (shape [num_starts, walk_length])
+    walk_len = 10
+    rand_data = mx.random.uniform(shape=[num_starts, walk_len])
+    # ---------- Warm-up ----------
+    mx.eval(row_ptr, col, start_idx, rand_data)
+    # ---------- Run kernel ----------
+    t0 = time.time()
+    node_seq = random_walk(row_ptr, col, start_idx, rand_data,
+                           walk_len, stream=mx.cpu)
+    elapsed = time.time() - t0
+    print(f"Random-walk kernel took {elapsed:.3f} s")
+    print("Node sequence is ", node_seq[0])
+    # ---------- Assertions ----------
+    assert node_seq[0].shape == (num_starts, walk_len + 1)
+    # num_nodes = cora.graphs[0].num_nodes
+    # assert (node_seq < num_nodes).all().item(), \
+    #     "Random walk produced invalid node indices"
+    t0 = time.time()
+    node_seq_gpu = random_walk(row_ptr, col, start_idx, rand_data,
+                           walk_len, stream=mx.gpu)
+    elapsed = time.time() - t0
+    print(f"Random-walk kernel on gpu took {elapsed:.3f} s")
+    print("Node sequence is ", node_seq_gpu[0])

mlx_cluster-0.0.5/tests/test_rejection_sampling.py ADDED Viewed

@@ -0,0 +1,62 @@
+import mlx.core as mx
+import numpy as np
+import time
+import pytest
+# Torch dataset
+import torch
+from torch.utils.data import DataLoader
+loader = DataLoader(range(2708), batch_size=2000)
+start_indices = next(iter(loader))
+# random_walks = torch.ops.torch_cluster.random_walk(
+#     row_ptr, col, start_indices, 5, 1.0, 3.0
+# )
+from mlx_graphs.datasets import PlanetoidDataset
+from mlx_graphs.utils.sorting import sort_edge_index
+from torch.utils.data import DataLoader
+from mlx_cluster import rejection_sampling
+@pytest.mark.slow  # give download/compile plenty of time on CI
+def test_random_walk(tmp_path):
+    """
+    Runs 1 000 random walks of length 10 on the Cora graph and checks:
+    1. output tensor shape == (num_start_nodes, walk_length + 1)
+    2. all returned node indices are valid ( < num_nodes )
+    """
+    # ---------- Dataset (downloaded to the temp dir) ----------
+    data_dir = tmp_path / "mlx_datasets"
+    cora = PlanetoidDataset(name="cora", base_dir=data_dir)
+    edge_index = cora.graphs[0].edge_index.astype(mx.int64)
+    # CSR conversion
+    sorted_edge_index = sort_edge_index(edge_index=edge_index)
+    row = sorted_edge_index[0][0]
+    col = sorted_edge_index[0][1]
+    _, counts = np.unique(np.array(row, copy=False), return_counts=True)
+    row_ptr = mx.concatenate([mx.array([0]), mx.array(counts.cumsum())])
+    # pick 1 000 random start nodes
+    num_starts = 1_000
+    rng = np.random.default_rng(42)
+    start_idx = mx.array(rng.integers(low=0, high=row.max().item() + 1,
+                                      size=num_starts, dtype=np.int64))
+    # random numbers for the kernel (shape [num_starts, walk_length])
+    walk_len = 10
+    rand_data = mx.random.uniform(shape=[num_starts, walk_len])
+    # ---------- Warm-up ----------
+    mx.eval(row_ptr, col, start_idx, rand_data)
+    # ---------- Run kernel ----------
+    t0 = time.time()
+    node_seq = rejection_sampling(row_ptr, col, start_idx, walk_len, 1.0, 3.0, stream=mx.cpu)
+    elapsed = time.time() - t0
+    print(f"Random-walk kernel took {elapsed:.3f} s")
+    print("Node sequence is ", node_seq)
+    # ---------- Assertions ----------
+    assert node_seq[0].shape == (num_starts, walk_len + 1)

mlx_cluster-0.0.4/bindings.cpp DELETED Viewed

@@ -1,65 +0,0 @@
-#include <nanobind/nanobind.h>
-#include <nanobind/stl/variant.h>
-#include <random_walks/RandomWalk.h>
-#include <random_walks/BiasedRandomWalk.h>
-namespace nb = nanobind;
-using namespace nb::literals;
-using namespace mlx::core;
-NB_MODULE(_ext, m){
-      m.def(
-        "random_walk",
-        &random_walk,
-        "rowptr"_a,
-        "col"_a,
-        "start"_a,
-        "rand"_a,
-        "walk_length"_a,
-        nb::kw_only(),
-      "stream"_a = nb::none(),
-      R"(
-        uniformly sample a graph
-        Args:
-            rowptr (array): rowptr of graph in csr format.
-            col (array): edges in csr format.
-            walk_length (int) : walk length of random graph
-        Returns:
-            array: consisting of nodes visited on random walk
-      )");
-      m.def(
-        "rejection_sampling",
-        &rejection_sampling,
-        "rowptr"_a,
-        "col"_a,
-        "start"_a,
-        "walk_length"_a,
-        "p"_a,
-        "q"_a,
-        nb::kw_only(),
-      "stream"_a = nb::none(),
-      R"(
-        Sample nodes from the graph by sampling neighbors based
-        on probablity p and q
-        Args:
-            rowptr (array): rowptr of graph in csr format.
-            col (array): edges in csr format.
-            start (array): starting node of graph from which
-                            biased sampling will be performed.
-            walk_length (int) : walk length of random graph
-            p : Likelihood of immediately revisiting a node in the walk.
-            q : Control parameter to interpolate between
-                breadth-first strategy and depth-first strategy
-        Returns:
-            array: consisting of nodes visited on random walk
-      )");
-}

mlx_cluster-0.0.4/mlx_cluster/_ext.cpython-311-darwin.so DELETED Viewed

Binary file

mlx_cluster-0.0.4/mlx_cluster/libmlx.dylib DELETED Viewed

Binary file

mlx_cluster-0.0.4/mlx_cluster/libmlx_cluster.dylib DELETED Viewed

Binary file

mlx_cluster-0.0.4/mlx_cluster.egg-info/requires.txt DELETED Viewed

@@ -1,9 +0,0 @@
-[dev]
-[test]
-mlx_graphs==0.0.7
-torch==2.2.0
-mlx>=0.17.0
-pytest==7.4.4
-scipy==1.12.0

mlx_cluster-0.0.4/random_walks/BiasedRandomWalk.h DELETED Viewed

@@ -1,66 +0,0 @@
-#pragma once
-#include <mlx/array.h>
-#include <mlx/ops.h>
-#include <mlx/primitives.h>
-namespace mlx::core{
-    class BiasedRandomWalk : public Primitive {
-        public:
-            BiasedRandomWalk(Stream stream, int walk_length, double p, double q)
-            : Primitive(stream), walk_length_(walk_length), p_(p), q_(q) {}
-            void eval_cpu(const std::vector<array>& inputs, std::vector<array>& outputs)
-            override;
-            void eval_gpu(const std::vector<array>& inputs, std::vector<array>& outputs)
-            override;
-            /** The Jacobian-vector product. */
-            std::vector<array> jvp(
-                const std::vector<array>& primals,
-                const std::vector<array>& tangents,
-                const std::vector<int>& argnums) override;
-            /** The vector-Jacobian product. */
-            std::vector<array> vjp(
-                const std::vector<array>& primals,
-                const std::vector<array>& cotangents,
-                const std::vector<int>& argnums,
-                const std::vector<array>& outputs) override;
-            /**
-             * The primitive must know how to vectorize itself across
-             * the given axes. The output is a pair containing the array
-             * representing the vectorized computation and the axis which
-             * corresponds to the output vectorized dimension.
-             */
-            std::pair<std::vector<array>, std::vector<int>> vmap(
-                const std::vector<array>& inputs,
-                const std::vector<int>& axes) override;
-            /** Print the primitive. */
-            void print(std::ostream& os) override {
-                os << "biased random walk implementation";
-            }
-            /** Equivalence check **/
-            bool is_equivalent(const Primitive& other) const override;
-            std::vector<std::vector<int>> output_shapes(const std::vector<array>& inputs) override;
-        private:
-            int walk_length_;
-            double p_;
-            double q_;
-    };
-    array rejection_sampling(const array& rowptr,
-     const array& col,
-    const array& start,
-       int walk_length,
-       const double p,
-       const double q,
-        StreamOrDevice s = {});
-};

mlx_cluster-0.0.4/random_walks/RandomWalk.h DELETED Viewed

@@ -1,63 +0,0 @@
-#pragma once
-#include <mlx/array.h>
-#include <mlx/ops.h>
-#include <mlx/primitives.h>
-namespace mlx::core{
-    class RandomWalk : public Primitive {
-        public:
-            explicit RandomWalk(int walk_length, Stream stream):
-                Primitive(stream), walk_length_(walk_length) {};
-            void eval_cpu(const std::vector<array>& inputs, std::vector<array>& outputs)
-            override;
-            void eval_gpu(const std::vector<array>& inputs, std::vector<array>& outputs)
-            override;
-            /** The Jacobian-vector product. */
-            std::vector<array> jvp(
-                const std::vector<array>& primals,
-                const std::vector<array>& tangents,
-                const std::vector<int>& argnums) override;
-            /** The vector-Jacobian product. */
-            std::vector<array> vjp(
-                const std::vector<array>& primals,
-                const std::vector<array>& cotangents,
-                const std::vector<int>& argnums,
-                const std::vector<array>& outputs) override;
-            /**
-             * The primitive must know how to vectorize itself across
-             * the given axes. The output is a pair containing the array
-             * representing the vectorized computation and the axis which
-             * corresponds to the output vectorized dimension.
-             */
-            std::pair<std::vector<array>, std::vector<int>> vmap(
-                const std::vector<array>& inputs,
-                const std::vector<int>& axes) override;
-            /** Print the primitive. */
-            void print(std::ostream& os) override {
-                os << "Random walk implementation";
-            }
-            /** Equivalence check **/
-            bool is_equivalent(const Primitive& other) const override;
-            std::vector<std::vector<int>> output_shapes(const std::vector<array>& inputs) override;
-        private:
-            int walk_length_;
-    };
-    array random_walk(const array& rowptr,
-     const array& col,
-    const array& start,
-    const array& rand,
-       int walk_length,
-        StreamOrDevice s = {});
-};

mlx_cluster-0.0.4/tests/test_random_walk.py DELETED Viewed

@@ -1,38 +0,0 @@
-import mlx.core as mx
-import numpy as np
-import time
-# Torch dataset
-import torch
-from torch.utils.data import DataLoader
-loader = DataLoader(range(2708), batch_size=2000)
-start_indices = next(iter(loader))
-from mlx_graphs.datasets import PlanetoidDataset
-from mlx_graphs.utils.sorting import sort_edge_index
-from torch.utils.data import DataLoader
-from mlx_cluster import random_walk
-cora_dataset = PlanetoidDataset(name="cora", base_dir="~")
-# For some reason int_64t and int_32t are not compatible
-edge_index = cora_dataset.graphs[0].edge_index.astype(mx.int64)
-# Convert edge index into a CSR matrix
-sorted_edge_index = sort_edge_index(edge_index=edge_index)
-row_mlx = sorted_edge_index[0][0]
-col_mlx = sorted_edge_index[0][1]
-_, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
-cum_sum_mlx = counts_mlx.cumsum()
-row_ptr_mlx = mx.concatenate([mx.array([0]), mx.array(cum_sum_mlx)])
-start_indices = mx.array(start_indices.numpy())
-rand_data = mx.random.uniform(shape=[start_indices.shape[0], 5])
-start_time = time.time()
-node_sequence = random_walk(
-    row_ptr_mlx, col_mlx, start_indices, rand_data, 5, stream=mx.cpu
-)
-print("Time taken to complete 1000 random walks : ", time.time() - start_time)
-print("MLX random walks are", node_sequence)

mlx_cluster-0.0.4/tests/test_rejection_sampling.py DELETED Viewed

@@ -1,35 +0,0 @@
-import mlx.core as mx
-import numpy as np
-import time
-# Torch dataset
-import torch
-from torch.utils.data import DataLoader
-loader = DataLoader(range(2708), batch_size=2000)
-start_indices = next(iter(loader))
-# random_walks = torch.ops.torch_cluster.random_walk(
-#     row_ptr, col, start_indices, 5, 1.0, 3.0
-# )
-from mlx_graphs.datasets import PlanetoidDataset
-from mlx_graphs.utils.sorting import sort_edge_index
-from torch.utils.data import DataLoader
-from mlx_cluster import rejection_sampling
-cora_dataset = PlanetoidDataset(name="cora", base_dir="~")
-edge_index = cora_dataset.graphs[0].edge_index.astype(mx.int64)
-sorted_edge_index = sort_edge_index(edge_index=edge_index)
-row_mlx = sorted_edge_index[0][0]
-col_mlx = sorted_edge_index[0][1]
-_, counts_mlx = np.unique(np.array(row_mlx, copy=False), return_counts=True)
-cum_sum_mlx = counts_mlx.cumsum()
-row_ptr_mlx = mx.concatenate([mx.array([0]), mx.array(cum_sum_mlx)])
-start_indices = mx.array(start_indices.numpy())
-rand_data = mx.random.uniform(shape=[start_indices.shape[0], 5])
-start_time = time.time()
-node_sequence = rejection_sampling(
-    row_ptr_mlx, col_mlx, start_indices, 5, 1.0, 3.0, stream=mx.cpu
-)
-print("Time taken to complete 1000 random walks : ", time.time() - start_time)
-print(node_sequence)