graphzero 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. graphzero-0.1.0/.github/workflows/release.yml +69 -0
  2. graphzero-0.1.0/.gitignore +10 -0
  3. graphzero-0.1.0/CMakeLists.txt +25 -0
  4. graphzero-0.1.0/CODE-DOCS.md +225 -0
  5. graphzero-0.1.0/LICENSE +21 -0
  6. graphzero-0.1.0/PKG-INFO +136 -0
  7. graphzero-0.1.0/README.md +126 -0
  8. graphzero-0.1.0/benchmark/benchmark_papers100M_Pyg.py +57 -0
  9. graphzero-0.1.0/benchmark/benchmark_papers100M_gz.py +93 -0
  10. graphzero-0.1.0/benchmark/example_code.py +122 -0
  11. graphzero-0.1.0/benchmark/extract_edges_paper100M.py +83 -0
  12. graphzero-0.1.0/benchmark/glconvert.py +7 -0
  13. graphzero-0.1.0/benchmark/how_to.txt +39 -0
  14. graphzero-0.1.0/benchmark/images/Screenshot 2025-12-27 171432.png +0 -0
  15. graphzero-0.1.0/benchmark/images/examplecode.png +0 -0
  16. graphzero-0.1.0/benchmark/images/graphzero.png +0 -0
  17. graphzero-0.1.0/benchmark/images/gz_bench.png +0 -0
  18. graphzero-0.1.0/benchmark/images/py_crash.png +0 -0
  19. graphzero-0.1.0/dummy.csv +5 -0
  20. graphzero-0.1.0/generateGraph.cpp +18 -0
  21. graphzero-0.1.0/graphzero/__init__.py +12 -0
  22. graphzero-0.1.0/graphzero/graphzero.pyi +112 -0
  23. graphzero-0.1.0/main.cpp +48 -0
  24. graphzero-0.1.0/pyproject.toml +20 -0
  25. graphzero-0.1.0/src/AliasTable.hpp +144 -0
  26. graphzero-0.1.0/src/CSR.hpp +99 -0
  27. graphzero-0.1.0/src/Graphzero.hpp +242 -0
  28. graphzero-0.1.0/src/MemoryMap.hpp +138 -0
  29. graphzero-0.1.0/src/ThreadLocalRNG.hpp +35 -0
  30. graphzero-0.1.0/src/bindings.cpp +206 -0
  31. graphzero-0.1.0/src/csrFilegen.hpp +400 -0
  32. graphzero-0.1.0/tests/dataloader_test.py +65 -0
  33. graphzero-0.1.0/tests/dummy.csv +5 -0
  34. graphzero-0.1.0/tests/test.py +55 -0
@@ -0,0 +1,69 @@
1
+ name: Build and Publish Wheels
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ workflow_dispatch: # Allows manual triggering for testing
7
+
8
+ jobs:
9
+ build_wheels:
10
+ name: Build wheels on ${{ matrix.os }}
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ matrix:
14
+ os: [ubuntu-latest, windows-latest, macos-latest]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Build wheels
20
+ uses: pypa/cibuildwheel@v2.16.5
21
+ env:
22
+ # SKIP: Old Python, PyPy, Musl (Alpine), and 32-bit Linux
23
+ CIBW_SKIP: "cp36-* cp37-* cp38-* pp* *musllinux* *i686*"
24
+
25
+ # LINUX: Force C++17 (compatible with manylinux2014)
26
+ CIBW_ENVIRONMENT_LINUX: "CXXFLAGS='-std=c++17'"
27
+
28
+ # MACOS: Install OpenMP and configure paths
29
+ CIBW_BEFORE_ALL_MACOS: "brew install libomp"
30
+ CIBW_ENVIRONMENT_MACOS: >
31
+ CFLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include"
32
+ CXXFLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include"
33
+ LDFLAGS="-L$(brew --prefix libomp)/lib -lomp"
34
+ OpenMP_ROOT="$(brew --prefix libomp)"
35
+ # MACOS: Prevent cross-compilation errors (build arm64 on arm64 runner)
36
+ CIBW_ARCHS_MACOS: "auto"
37
+
38
+ - uses: actions/upload-artifact@v4
39
+ with:
40
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
41
+ path: ./wheelhouse/*.whl
42
+
43
+ build_sdist:
44
+ name: Build source distribution
45
+ runs-on: ubuntu-latest
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ - name: Build sdist
49
+ run: pipx run build --sdist
50
+ - uses: actions/upload-artifact@v4
51
+ with:
52
+ name: cibw-sdist
53
+ path: dist/*.tar.gz
54
+
55
+ publish_to_pypi:
56
+ needs: [build_wheels, build_sdist]
57
+ runs-on: ubuntu-latest
58
+ steps:
59
+ - uses: actions/download-artifact@v4
60
+ with:
61
+ pattern: cibw-*
62
+ path: dist
63
+ merge-multiple: true
64
+
65
+ - name: Publish to PyPI
66
+ uses: pypa/gh-action-pypi-publish@release/v1
67
+ with:
68
+ user: __token__
69
+ password: ${{ secrets.PYPI_PASSWORD }}
@@ -0,0 +1,10 @@
1
+ .vscode/
2
+ venv/
3
+ build/
4
+ benchmark/dataset
5
+
6
+ *.exe
7
+ *.gl
8
+ *.zip
9
+ *.npz
10
+ benchmark/*.csv
@@ -0,0 +1,25 @@
1
+ cmake_minimum_required(VERSION 3.15)
2
+ project(graphzero)
3
+
4
+ # 1. Setup Dependencies
5
+ find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
6
+ find_package(nanobind CONFIG REQUIRED)
7
+ find_package(OpenMP)
8
+
9
+ # 2. Define the Module (Must match the Python import name!)
10
+ nanobind_add_module(graphzero
11
+ src/bindings.cpp
12
+ )
13
+
14
+ # 3. Compiler Settings
15
+ target_include_directories(graphzero PRIVATE src)
16
+ target_compile_features(graphzero PUBLIC cxx_std_20)
17
+ target_compile_definitions(graphzero PRIVATE NOMINMAX)
18
+
19
+ # Link OpenMP if found (Essential for your Day 20 speedup)
20
+ if(OpenMP_CXX_FOUND)
21
+ target_link_libraries(graphzero PRIVATE OpenMP::OpenMP_CXX)
22
+ endif()
23
+
24
+ # 4. Installation (Critical for pip)
25
+ install(TARGETS graphzero LIBRARY DESTINATION .)
@@ -0,0 +1,225 @@
1
+ # GraphZero API Reference πŸ“˜
2
+
3
+ This document details the Python API exposed by the `graphzero` C++ engine.
4
+
5
+ ## πŸ“¦ Core Class: `Graph`
6
+
7
+ The main entry point for interacting with the graph.
8
+
9
+ ```python
10
+ import graphzero as gz
11
+ g = gz.Graph("path/to/graph.gl")
12
+
13
+ ```
14
+
15
+ ### Properties
16
+
17
+ | Property | Type | Description |
18
+ | --- | --- | --- |
19
+ | `g.num_nodes` | `int` | Total number of nodes in the graph. |
20
+ | `g.num_edges` | `int` | Total number of edges (directed). |
21
+
22
+ ### Methods
23
+
24
+ #### `get_degree(node_id: int) -> int`
25
+
26
+ Returns the out-degree (number of neighbors) for a specific node.
27
+
28
+ * **Usage:** checking if a node is a dead-end before walking.
29
+
30
+ #### `get_neighbours(node_id: int) -> numpy.ndarray`
31
+
32
+ Returns a **1-D numpy ndarray** of neighbour node IDs (dtype: `np.int64`). This is returned from the C++ layer as a fast zero-copy buffer and can be used directly with NumPy/PyTorch.
33
+
34
+ * **Notes:**
35
+ - The binding uses the British spelling `get_neighbours` (this is the function name exposed in the Python API).
36
+ - For very high-degree nodes prefer `sample_neighbours` or `batch_random_fanout` to avoid copying large arrays.
37
+
38
+ ---
39
+
40
+ ### 🎲 Sampling Methods (The Engine)
41
+
42
+ These functions use OpenMP multithreading on the C++ side and release the GIL to fully saturate CPU/disk bandwidth. All batch functions return a **NumPy ndarray** of dtype `np.int64`.
43
+
44
+ #### `batch_random_walk_uniform(start_nodes: List[int], walk_length: int) -> numpy.ndarray`
45
+
46
+ **The Speed King.** Performs unbiased uniform random walks.
47
+
48
+ * **Return shape & dtype:** `ndarray` with shape `(len(start_nodes), walk_length)` and dtype `np.int64`.
49
+ * **Algorithm:** At every step, pick a neighbour uniformly at random.
50
+ * **Use Case:** DeepWalk, uniform walk baselines, and fast data generation for training.
51
+
52
+ #### `batch_random_walk(start_nodes: List[int], walk_length: int, p: float = 1.0, q: float = 1.0) -> numpy.ndarray`
53
+
54
+ **The Biased Walker.** Performs Node2Vec-style 2nd-order random walks.
55
+
56
+ * **Arguments:**
57
+ - `p` (Return parameter): Low = keeps walk local (BFS-like).
58
+ - `q` (In-out parameter): Low = explores far away (DFS-like).
59
+ * **Return shape & dtype:** `ndarray` with shape `(len(start_nodes), walk_length)` and dtype `np.int64`.
60
+ * **Performance:** Slower than uniform walks due to additional transition calculations.
61
+
62
+ #### `batch_random_fanout(start_nodes: List[int], K: int) -> numpy.ndarray`
63
+
64
+ Performs uniform neighbor *fanout* sampling for a batch of start nodes (useful for GNN neighbour sampling).
65
+
66
+ * **Behavior:** For each start node returns `K` sampled neighbour IDs (using reservoir sampling / uniform sampling without replacement where possible).
67
+ * **Return shape & dtype:** `ndarray` with shape `(len(start_nodes), K)`, dtype `np.int64`.
68
+
69
+ #### `sample_neighbours(start_node: int, K: int) -> numpy.ndarray`
70
+
71
+ Performs uniform neighbour sampling for a single node using **reservoir sampling**.
72
+
73
+ * **Behavior:** Returns up to `K` neighbour IDs sampled uniformly at random. If the node degree <= `K`, all neighbours are returned.
74
+ * **Return shape & dtype:** 1-D `ndarray` of length `<= K`, dtype `np.int64`.
75
+
76
+ ## πŸ› οΈ Utilities
77
+
78
+ #### `gz.convert_csv_to_gl(input_csv: str, output_bin: str, directed: bool)`
79
+
80
+ Converts a raw Edge List CSV into the optimized GraphLite binary format (`.gl`).
81
+
82
+ * **Input CSV Format:** Two columns (Source, Destination). Headers are ignored if they exist.
83
+ * **Process:** 1. **Pass 1:** Scans file to count degrees (Memory: Low).
84
+ 2. **Allocation:** Creates the `.gl` file and `mmaps` it.
85
+ 3. **Pass 2:** Reads CSV again and places edges into the correct memory buckets.
86
+ * **Note:** This process handles graphs larger than RAM.
87
+
88
+
89
+ # 🧠 Example: Training Node2Vec with PyTorch
90
+
91
+ This script demonstrates how to use `GraphZero` to train a real Node2Vec model.
92
+ Since `GraphZero` handles the **Data Loading** (the bottleneck), the GPU can focus entirely on **Training** (the math).
93
+
94
+ **File:** `train_node2vec.py`
95
+
96
+ ```python
97
+ import torch
98
+ import torch.nn as nn
99
+ import torch.optim as optim
100
+ import graphzero as gz
101
+ import numpy as np
102
+ from torch.utils.data import DataLoader, Dataset
103
+
104
+ # --- CONFIGURATION ---
105
+ GRAPH_PATH = "papers100M.gl" # The beast
106
+ EMBEDDING_DIM = 128
107
+ WALK_LENGTH = 20
108
+ WALKS_PER_EPOCH = 100_000 # Number of starts per batch
109
+ BATCH_SIZE = 1024
110
+ EPOCHS = 5
111
+
112
+ print(f"Initializing GraphZero Engine on {GRAPH_PATH}...")
113
+ g = gz.Graph(GRAPH_PATH)
114
+ print(f" Nodes: {g.num_nodes:,} | Edges: {g.num_edges:,}")
115
+
116
+ # --- 1. THE DATASET (Powered by GraphZero) ---
117
+ class GraphZeroWalkDataset(Dataset):
118
+ """
119
+ Generates random walks on-the-fly using C++ engine.
120
+ """
121
+ def __init__(self, graph_engine, num_walks, walk_len):
122
+ self.g = graph_engine
123
+ self.num_walks = num_walks
124
+ self.walk_len = walk_len
125
+
126
+ def __len__(self):
127
+ # In a real scenario, this might be num_nodes
128
+ # For this demo, we define an arbitrary epoch size
129
+ return self.num_walks
130
+
131
+ def __getitem__(self, idx):
132
+ # We don't generate single walks (too slow).
133
+ # We let the DataLoader batch them, then call C++ in the collate_fn.
134
+ # So we just return a random start node here.
135
+ return np.random.randint(0, self.g.num_nodes)
136
+
137
+ # --- 2. CUSTOM COLLATE FUNCTION (The Secret Sauce) ---
138
+ def collate_walks(batch_start_nodes):
139
+ """
140
+ This is where the magic happens.
141
+ Instead of Python looping, we give the whole batch of start nodes
142
+ to C++ and get back the massive walk matrix instantly.
143
+ """
144
+ # 1. Convert batch to list of uint64 for C++
145
+ start_nodes = [int(x) for x in batch_start_nodes]
146
+
147
+ # 2. Call C++ Engine (Releases GIL, runs OpenMP)
148
+ # Result is a flat list: [walk1_step1, walk1_step2... walk2_step1...]
149
+ flat_walks = g.batch_random_walk_uniform(start_nodes, WALK_LENGTH)
150
+
151
+ # 3. Reshape for PyTorch (Batch Size, Walk Length)
152
+ walks_tensor = torch.tensor(flat_walks, dtype=torch.long)
153
+ walks_tensor = walks_tensor.view(len(start_nodes), WALK_LENGTH)
154
+
155
+ return walks_tensor
156
+
157
+ # --- CONFIGURATION ADJUSTMENT ---
158
+ # We map 204M nodes -> 1M unique embeddings to save RAM
159
+ HASH_SIZE = 1_000_000
160
+ # RAM Usage: 1M * 128 * 4 bytes = ~512 MB (Very safe)
161
+
162
+ # --- 3. THE MODEL (Hashed Skip-Gram) ---
163
+ class Node2Vec(nn.Module):
164
+ def __init__(self, num_nodes, embed_dim):
165
+ super().__init__()
166
+ # INSTEAD OF: self.in_embed = nn.Embedding(num_nodes, embed_dim)
167
+ # WE USE:
168
+ self.in_embed = nn.Embedding(HASH_SIZE, embed_dim)
169
+ self.out_embed = nn.Embedding(HASH_SIZE, embed_dim)
170
+
171
+ def forward(self, target, context):
172
+ # Hashing Trick: Map massive ID -> Small ID
173
+ # In a real app, you'd use a better hash, but modulo is fine for a demo
174
+ t_hashed = target % HASH_SIZE
175
+ c_hashed = context % HASH_SIZE
176
+
177
+ v_in = self.in_embed(t_hashed)
178
+ v_out = self.out_embed(c_hashed)
179
+
180
+ return torch.sum(v_in * v_out, dim=1)
181
+
182
+ # --- 4. TRAINING LOOP ---
183
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
184
+ model = Node2Vec(g.num_nodes, EMBEDDING_DIM).to(device)
185
+ optimizer = optim.Adam(model.parameters(), lr=0.01)
186
+
187
+ # PyTorch DataLoader wraps our C++ engine
188
+ loader = DataLoader(
189
+ GraphZeroWalkDataset(g, WALKS_PER_EPOCH, WALK_LENGTH),
190
+ batch_size=BATCH_SIZE,
191
+ collate_fn=collate_walks, # <--- Connects PyTorch to GraphZero
192
+ num_workers=0 # Windows needs 0, Linux can use more
193
+ )
194
+
195
+ print("\nStarting Training...")
196
+
197
+ for epoch in range(EPOCHS):
198
+ total_loss = 0
199
+
200
+ for batch_walks in loader:
201
+ # batch_walks shape: [1024, 20]
202
+ batch_walks = batch_walks.to(device)
203
+
204
+ # Simple Positive Pair generation: (Current, Next)
205
+ # Real implementations use sliding windows, simplified here for brevity
206
+ target = batch_walks[:, :-1].flatten()
207
+ context = batch_walks[:, 1:].flatten()
208
+
209
+ optimizer.zero_grad()
210
+ loss = -model(target, context).mean() # Dummy loss for demo
211
+ loss.backward()
212
+ optimizer.step()
213
+
214
+ total_loss += loss.item()
215
+
216
+ print(f"Epoch {epoch+1}/{EPOCHS} | Avg Loss: {total_loss/len(loader):.4f}")
217
+
218
+ print("βœ… Training Complete.")
219
+
220
+ ```
221
+
222
+ This example showcases how `GraphZero` can be seamlessly integrated into a PyTorch training loop, allowing for efficient data loading and processing of massive graphs. The C++ engine handles the heavy lifting of random walk generation, freeing up Python to focus on model training.
223
+ here is the screenshot of the output when running the script:
224
+
225
+ ![Training Output](benchmark/images/examplecode.png)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Krish
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,136 @@
1
+ Metadata-Version: 2.2
2
+ Name: graphzero
3
+ Version: 0.1.0
4
+ Summary: High-performance Zero-Copy Graph Engine
5
+ Author-Email: Krish <krishsingaria2005@gmail.com>
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: numpy
8
+ Requires-Dist: torch
9
+ Description-Content-Type: text/markdown
10
+
11
+ # GraphZero
12
+
13
+ **High-Performance, Zero-Copy Graph Engine for Massive Datasets on Consumer Hardware.**
14
+
15
+ GraphZero is a C++ graph processing engine with lightweight Python bindings designed to solve the **"Memory Wall"** in Graph Neural Networks (GNNs). It allows you to load and sample **100 Million+ node graphs** (like `ogbn-papers100M`) on a standard 16GB RAM laptopβ€”something standard libraries like PyTorch Geometric (PyG) or DGL cannot do.
16
+
17
+
18
+ ## ⚑ The Problem
19
+
20
+ GNN datasets can be massive. `ogbn-papers100M` contains **111 Million nodes** and **1.6 Billion edges**.
21
+
22
+ * **Standard approach (PyG/NetworkX):** Tries to load the entire graph structure into **RAM**.
23
+ * **The Result:** `MemoryError` (OOM) on consumer hardware. You need 64GB+ **RAM** servers just to *load* the data.
24
+
25
+ ## πŸ› οΈ The Solution:
26
+
27
+ ![GraphZero Architecture](benchmark/images/graphzero.png)
28
+
29
+ GraphZero abandons the "Load-to-RAM" model. Instead, it uses a custom **Zero-Copy Architecture**:
30
+
31
+ * **Memory Mapping (`mmap`):** The graph stays on disk. The OS only loads the specific "hot" pages needed for computation into RAM.
32
+ * **Compressed CSR:** A custom binary format (`.gl`) that compresses raw edges by **~60%** (30GB CSV 13GB Binary).
33
+ * **Parallel Sampling:** OpenMP-accelerated random walks that saturate NVMe SSD throughput.
34
+
35
+ ## πŸ† Benchmarks: GraphZero vs. PyTorch Geometric
36
+
37
+ **Task:** Load `ogbn-papers100M` (56GB Raw) and perform random walks.
38
+ **Hardware:** Windows Laptop (16GB RAM, NVMe SSD).
39
+
40
+ | Metric | GraphZero (v0.1) | PyTorch Geometric |
41
+ | --- | --- | --- |
42
+ | **Load Time** | **0.000000 s** ⚑ | **FAILED** (Crash) ❌ |
43
+ | **Peak RAM Usage** | **~5.1 GB** (OS Cache) | **>24.1 GB** (Required) |
44
+ | **Throughput** | **1,264,000 steps/s** | N/A |
45
+ | **Status** | βœ… **Success** | ❌ **OOM Error** |
46
+
47
+ ### Proof of Performance
48
+
49
+ > *Left: GraphZero loading instantly and utilizing OS Page Cache. Right: PyG crashing with `Unable to allocate 24.1 GiB`.*
50
+
51
+ <p float="left ">
52
+ <img src="benchmark/images/gz_bench.png" width="45%" />
53
+ <img src="benchmark/images/py_crash.png" width="45%" />
54
+ </p>
55
+
56
+ ---
57
+
58
+ ## πŸ“¦ Installation
59
+
60
+ GraphZero is available on PyPI (Pre-Alpha):
61
+
62
+ ```bash
63
+ pip install graphzero
64
+
65
+ ```
66
+
67
+ *Requirements: Python 3.8+, C++17 Compiler (MSVC/GCC), OpenMP.*
68
+
69
+ ---
70
+
71
+ ## πŸš€ Quick Start
72
+
73
+ ### 1. Convert Your Data
74
+
75
+ GraphZero uses a high-efficiency binary format (`.gl`). Convert your generic CSV edges list once.
76
+
77
+ ```python
78
+ import graphzero as gz
79
+
80
+ # Converts raw CSV (src, dst) to memory-mapped binary
81
+ # Handles 100M+ edges easily on minimal RAM
82
+ gz.convert_csv_to_gl(
83
+ input_csv="dataset/edges.csv",
84
+ output_bin="graph.gl",
85
+ directed=True
86
+ )
87
+
88
+ ```
89
+
90
+ ### 2. High-Speed Sampling
91
+
92
+ Once converted, the graph is instantly accessible.
93
+
94
+ ```python
95
+ import graphzero as gz
96
+ import numpy as np
97
+
98
+ # 1. Zero-Copy Load (Instant)
99
+ g = gz.Graph("graph.gl")
100
+
101
+ # 2. Define Start Nodes (e.g., 1000 random nodes)
102
+ start_nodes = np.random.randint(0, g.num_nodes, 1000).astype(np.uint64)
103
+
104
+ # 3. Parallel Random Walk (node2vec / DeepWalk style)
105
+ # Returns: List of walks (flat or list-of-lists)
106
+ walks = g.batch_random_walk_uniform(
107
+ start_nodes=start_nodes,
108
+ walk_length=10
109
+ )
110
+
111
+ print(f"Generated {len(walks)} steps instantly.")
112
+
113
+ ```
114
+
115
+
116
+ ## βš™οΈ Under the Hood
117
+
118
+ GraphZero is built for **Systems & GNN** enthusiasts.
119
+
120
+ * **Core:** C++20 with `nanobind` for Python bindings.
121
+ * **Parallelism:** Uses `#pragma omp` with thread-local RNGs to prevent false sharing and lock contention.
122
+ * **IO:** Direct `CreateFileMapping` (Windows) and `mmap` (Linux) calls with alignment optimization (4KB/2MB pages).
123
+
124
+
125
+ ## πŸ—ΊοΈ Roadmap
126
+
127
+ * **v0.1 (Current):** Topology-only support. Uniform Random Walks.
128
+ * **v0.2:** Columnar Feature Store (mmap support for Node Features ).
129
+ * **v0.3:** Weighted Edges & SIMD (AVX2) Neighbor Intersection.
130
+ * **v0.4:** Dynamic Updates (LSM-Tree based mutable graphs).
131
+ * **v0.5:** Pinned Memory Allocator for faster CPU GPU transfer.
132
+
133
+
134
+ ## πŸ“„ License
135
+
136
+ MIT License. Created by **Krish Singaria** (IIT Mandi).
@@ -0,0 +1,126 @@
1
+ # GraphZero
2
+
3
+ **High-Performance, Zero-Copy Graph Engine for Massive Datasets on Consumer Hardware.**
4
+
5
+ GraphZero is a C++ graph processing engine with lightweight Python bindings designed to solve the **"Memory Wall"** in Graph Neural Networks (GNNs). It allows you to load and sample **100 Million+ node graphs** (like `ogbn-papers100M`) on a standard 16GB RAM laptopβ€”something standard libraries like PyTorch Geometric (PyG) or DGL cannot do.
6
+
7
+
8
+ ## ⚑ The Problem
9
+
10
+ GNN datasets can be massive. `ogbn-papers100M` contains **111 Million nodes** and **1.6 Billion edges**.
11
+
12
+ * **Standard approach (PyG/NetworkX):** Tries to load the entire graph structure into **RAM**.
13
+ * **The Result:** `MemoryError` (OOM) on consumer hardware. You need 64GB+ **RAM** servers just to *load* the data.
14
+
15
+ ## πŸ› οΈ The Solution:
16
+
17
+ ![GraphZero Architecture](benchmark/images/graphzero.png)
18
+
19
+ GraphZero abandons the "Load-to-RAM" model. Instead, it uses a custom **Zero-Copy Architecture**:
20
+
21
+ * **Memory Mapping (`mmap`):** The graph stays on disk. The OS only loads the specific "hot" pages needed for computation into RAM.
22
+ * **Compressed CSR:** A custom binary format (`.gl`) that compresses raw edges by **~60%** (30GB CSV 13GB Binary).
23
+ * **Parallel Sampling:** OpenMP-accelerated random walks that saturate NVMe SSD throughput.
24
+
25
+ ## πŸ† Benchmarks: GraphZero vs. PyTorch Geometric
26
+
27
+ **Task:** Load `ogbn-papers100M` (56GB Raw) and perform random walks.
28
+ **Hardware:** Windows Laptop (16GB RAM, NVMe SSD).
29
+
30
+ | Metric | GraphZero (v0.1) | PyTorch Geometric |
31
+ | --- | --- | --- |
32
+ | **Load Time** | **0.000000 s** ⚑ | **FAILED** (Crash) ❌ |
33
+ | **Peak RAM Usage** | **~5.1 GB** (OS Cache) | **>24.1 GB** (Required) |
34
+ | **Throughput** | **1,264,000 steps/s** | N/A |
35
+ | **Status** | βœ… **Success** | ❌ **OOM Error** |
36
+
37
+ ### Proof of Performance
38
+
39
+ > *Left: GraphZero loading instantly and utilizing OS Page Cache. Right: PyG crashing with `Unable to allocate 24.1 GiB`.*
40
+
41
+ <p float="left ">
42
+ <img src="benchmark/images/gz_bench.png" width="45%" />
43
+ <img src="benchmark/images/py_crash.png" width="45%" />
44
+ </p>
45
+
46
+ ---
47
+
48
+ ## πŸ“¦ Installation
49
+
50
+ GraphZero is available on PyPI (Pre-Alpha):
51
+
52
+ ```bash
53
+ pip install graphzero
54
+
55
+ ```
56
+
57
+ *Requirements: Python 3.8+, C++17 Compiler (MSVC/GCC), OpenMP.*
58
+
59
+ ---
60
+
61
+ ## πŸš€ Quick Start
62
+
63
+ ### 1. Convert Your Data
64
+
65
+ GraphZero uses a high-efficiency binary format (`.gl`). Convert your generic CSV edges list once.
66
+
67
+ ```python
68
+ import graphzero as gz
69
+
70
+ # Converts raw CSV (src, dst) to memory-mapped binary
71
+ # Handles 100M+ edges easily on minimal RAM
72
+ gz.convert_csv_to_gl(
73
+ input_csv="dataset/edges.csv",
74
+ output_bin="graph.gl",
75
+ directed=True
76
+ )
77
+
78
+ ```
79
+
80
+ ### 2. High-Speed Sampling
81
+
82
+ Once converted, the graph is instantly accessible.
83
+
84
+ ```python
85
+ import graphzero as gz
86
+ import numpy as np
87
+
88
+ # 1. Zero-Copy Load (Instant)
89
+ g = gz.Graph("graph.gl")
90
+
91
+ # 2. Define Start Nodes (e.g., 1000 random nodes)
92
+ start_nodes = np.random.randint(0, g.num_nodes, 1000).astype(np.uint64)
93
+
94
+ # 3. Parallel Random Walk (node2vec / DeepWalk style)
95
+ # Returns: List of walks (flat or list-of-lists)
96
+ walks = g.batch_random_walk_uniform(
97
+ start_nodes=start_nodes,
98
+ walk_length=10
99
+ )
100
+
101
+ print(f"Generated {len(walks)} steps instantly.")
102
+
103
+ ```
104
+
105
+
106
+ ## βš™οΈ Under the Hood
107
+
108
+ GraphZero is built for **Systems & GNN** enthusiasts.
109
+
110
+ * **Core:** C++20 with `nanobind` for Python bindings.
111
+ * **Parallelism:** Uses `#pragma omp` with thread-local RNGs to prevent false sharing and lock contention.
112
+ * **IO:** Direct `CreateFileMapping` (Windows) and `mmap` (Linux) calls with alignment optimization (4KB/2MB pages).
113
+
114
+
115
+ ## πŸ—ΊοΈ Roadmap
116
+
117
+ * **v0.1 (Current):** Topology-only support. Uniform Random Walks.
118
+ * **v0.2:** Columnar Feature Store (mmap support for Node Features ).
119
+ * **v0.3:** Weighted Edges & SIMD (AVX2) Neighbor Intersection.
120
+ * **v0.4:** Dynamic Updates (LSM-Tree based mutable graphs).
121
+ * **v0.5:** Pinned Memory Allocator for faster CPU GPU transfer.
122
+
123
+
124
+ ## πŸ“„ License
125
+
126
+ MIT License. Created by **Krish Singaria** (IIT Mandi).
@@ -0,0 +1,57 @@
1
+ import time
2
+ import psutil
3
+ import os
4
+ import torch
5
+ from ogb.nodeproppred import PygNodePropPredDataset
6
+
7
+ print("==================================================")
8
+ print(" PYG BENCHMARK: The 'Control' Experiment")
9
+ print(" Warning: This script may freeze your laptop.")
10
+ print("==================================================")
11
+
12
+ process = psutil.Process(os.getpid())
13
+ def get_ram_usage():
14
+ return process.memory_info().rss / (1024 ** 3)
15
+
16
+ print(f"Initial RAM: {get_ram_usage():.4f} GB")
17
+
18
+ # 1. Measure Loading Time
19
+ print("\n[Step 1] Attempting to load ogbn-papers100M with PyG...")
20
+ t0 = time.time()
21
+
22
+ try:
23
+ # This tries to load the processed .pt file into RAM
24
+ # If this line finishes in < 60 seconds, I will be shocked.
25
+ dataset = PygNodePropPredDataset(name='ogbn-papers100M',root='dataset')
26
+ data = dataset[0] # The actual graph object
27
+
28
+ t_load = time.time() - t0
29
+ print(f"βœ… Loaded! (Miraculously)")
30
+ print(f"⏱️ Load Time: {t_load:.4f} s")
31
+ print(f"πŸ’Ύ RAM Usage: {get_ram_usage():.4f} GB")
32
+
33
+ except Exception as e:
34
+ print(f"\n❌ CRASHED as expected: {e}")
35
+ print(f"πŸ’Ύ RAM at Crash: {get_ram_usage():.4f} GB")
36
+ exit(1)
37
+
38
+ # 2. Random Walk Benchmark (If we survived loading)
39
+ print("\n[Step 2] Attempting Random Walks (ClusterGCN style)...")
40
+ # PyG doesn't have a direct "random walk" sampler on CPU that is easy to invoke
41
+ # without a DataLoader, so we will just try to access the edge_index
42
+ # to simulate 'touching' the memory.
43
+
44
+ try:
45
+ t0 = time.time()
46
+ # Simulate reading 1M random edges
47
+ num_edges = data.edge_index.shape[1]
48
+ indices = torch.randint(0, num_edges, (1_000_000,))
49
+
50
+ # Force read
51
+ subset = data.edge_index[:, indices]
52
+
53
+ t_bench = time.time() - t0
54
+ print(f"βœ… Access Test Complete in {t_bench:.4f} s")
55
+
56
+ except Exception as e:
57
+ print(f"❌ Failed during access: {e}")