graphzero 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphzero-0.1.0/.github/workflows/release.yml +69 -0
- graphzero-0.1.0/.gitignore +10 -0
- graphzero-0.1.0/CMakeLists.txt +25 -0
- graphzero-0.1.0/CODE-DOCS.md +225 -0
- graphzero-0.1.0/LICENSE +21 -0
- graphzero-0.1.0/PKG-INFO +136 -0
- graphzero-0.1.0/README.md +126 -0
- graphzero-0.1.0/benchmark/benchmark_papers100M_Pyg.py +57 -0
- graphzero-0.1.0/benchmark/benchmark_papers100M_gz.py +93 -0
- graphzero-0.1.0/benchmark/example_code.py +122 -0
- graphzero-0.1.0/benchmark/extract_edges_paper100M.py +83 -0
- graphzero-0.1.0/benchmark/glconvert.py +7 -0
- graphzero-0.1.0/benchmark/how_to.txt +39 -0
- graphzero-0.1.0/benchmark/images/Screenshot 2025-12-27 171432.png +0 -0
- graphzero-0.1.0/benchmark/images/examplecode.png +0 -0
- graphzero-0.1.0/benchmark/images/graphzero.png +0 -0
- graphzero-0.1.0/benchmark/images/gz_bench.png +0 -0
- graphzero-0.1.0/benchmark/images/py_crash.png +0 -0
- graphzero-0.1.0/dummy.csv +5 -0
- graphzero-0.1.0/generateGraph.cpp +18 -0
- graphzero-0.1.0/graphzero/__init__.py +12 -0
- graphzero-0.1.0/graphzero/graphzero.pyi +112 -0
- graphzero-0.1.0/main.cpp +48 -0
- graphzero-0.1.0/pyproject.toml +20 -0
- graphzero-0.1.0/src/AliasTable.hpp +144 -0
- graphzero-0.1.0/src/CSR.hpp +99 -0
- graphzero-0.1.0/src/Graphzero.hpp +242 -0
- graphzero-0.1.0/src/MemoryMap.hpp +138 -0
- graphzero-0.1.0/src/ThreadLocalRNG.hpp +35 -0
- graphzero-0.1.0/src/bindings.cpp +206 -0
- graphzero-0.1.0/src/csrFilegen.hpp +400 -0
- graphzero-0.1.0/tests/dataloader_test.py +65 -0
- graphzero-0.1.0/tests/dummy.csv +5 -0
- graphzero-0.1.0/tests/test.py +55 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
name: Build and Publish Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch: # Allows manual triggering for testing
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build_wheels:
|
|
10
|
+
name: Build wheels on ${{ matrix.os }}
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Build wheels
|
|
20
|
+
uses: pypa/cibuildwheel@v2.16.5
|
|
21
|
+
env:
|
|
22
|
+
# SKIP: Old Python, PyPy, Musl (Alpine), and 32-bit Linux
|
|
23
|
+
CIBW_SKIP: "cp36-* cp37-* cp38-* pp* *musllinux* *i686*"
|
|
24
|
+
|
|
25
|
+
# LINUX: Force C++17 (compatible with manylinux2014)
|
|
26
|
+
CIBW_ENVIRONMENT_LINUX: "CXXFLAGS='-std=c++17'"
|
|
27
|
+
|
|
28
|
+
# MACOS: Install OpenMP and configure paths
|
|
29
|
+
CIBW_BEFORE_ALL_MACOS: "brew install libomp"
|
|
30
|
+
CIBW_ENVIRONMENT_MACOS: >
|
|
31
|
+
CFLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include"
|
|
32
|
+
CXXFLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include"
|
|
33
|
+
LDFLAGS="-L$(brew --prefix libomp)/lib -lomp"
|
|
34
|
+
OpenMP_ROOT="$(brew --prefix libomp)"
|
|
35
|
+
# MACOS: Prevent cross-compilation errors (build arm64 on arm64 runner)
|
|
36
|
+
CIBW_ARCHS_MACOS: "auto"
|
|
37
|
+
|
|
38
|
+
- uses: actions/upload-artifact@v4
|
|
39
|
+
with:
|
|
40
|
+
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
|
|
41
|
+
path: ./wheelhouse/*.whl
|
|
42
|
+
|
|
43
|
+
build_sdist:
|
|
44
|
+
name: Build source distribution
|
|
45
|
+
runs-on: ubuntu-latest
|
|
46
|
+
steps:
|
|
47
|
+
- uses: actions/checkout@v4
|
|
48
|
+
- name: Build sdist
|
|
49
|
+
run: pipx run build --sdist
|
|
50
|
+
- uses: actions/upload-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: cibw-sdist
|
|
53
|
+
path: dist/*.tar.gz
|
|
54
|
+
|
|
55
|
+
publish_to_pypi:
|
|
56
|
+
needs: [build_wheels, build_sdist]
|
|
57
|
+
runs-on: ubuntu-latest
|
|
58
|
+
steps:
|
|
59
|
+
- uses: actions/download-artifact@v4
|
|
60
|
+
with:
|
|
61
|
+
pattern: cibw-*
|
|
62
|
+
path: dist
|
|
63
|
+
merge-multiple: true
|
|
64
|
+
|
|
65
|
+
- name: Publish to PyPI
|
|
66
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
67
|
+
with:
|
|
68
|
+
user: __token__
|
|
69
|
+
password: ${{ secrets.PYPI_PASSWORD }}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.15)
|
|
2
|
+
project(graphzero)
|
|
3
|
+
|
|
4
|
+
# 1. Setup Dependencies
|
|
5
|
+
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
|
|
6
|
+
find_package(nanobind CONFIG REQUIRED)
|
|
7
|
+
find_package(OpenMP)
|
|
8
|
+
|
|
9
|
+
# 2. Define the Module (Must match the Python import name!)
|
|
10
|
+
nanobind_add_module(graphzero
|
|
11
|
+
src/bindings.cpp
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# 3. Compiler Settings
|
|
15
|
+
target_include_directories(graphzero PRIVATE src)
|
|
16
|
+
target_compile_features(graphzero PUBLIC cxx_std_20)
|
|
17
|
+
target_compile_definitions(graphzero PRIVATE NOMINMAX)
|
|
18
|
+
|
|
19
|
+
# Link OpenMP if found (Essential for your Day 20 speedup)
|
|
20
|
+
if(OpenMP_CXX_FOUND)
|
|
21
|
+
target_link_libraries(graphzero PRIVATE OpenMP::OpenMP_CXX)
|
|
22
|
+
endif()
|
|
23
|
+
|
|
24
|
+
# 4. Installation (Critical for pip)
|
|
25
|
+
install(TARGETS graphzero LIBRARY DESTINATION .)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# GraphZero API Reference π
|
|
2
|
+
|
|
3
|
+
This document details the Python API exposed by the `graphzero` C++ engine.
|
|
4
|
+
|
|
5
|
+
## π¦ Core Class: `Graph`
|
|
6
|
+
|
|
7
|
+
The main entry point for interacting with the graph.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import graphzero as gz
|
|
11
|
+
g = gz.Graph("path/to/graph.gl")
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### Properties
|
|
16
|
+
|
|
17
|
+
| Property | Type | Description |
|
|
18
|
+
| --- | --- | --- |
|
|
19
|
+
| `g.num_nodes` | `int` | Total number of nodes in the graph. |
|
|
20
|
+
| `g.num_edges` | `int` | Total number of edges (directed). |
|
|
21
|
+
|
|
22
|
+
### Methods
|
|
23
|
+
|
|
24
|
+
#### `get_degree(node_id: int) -> int`
|
|
25
|
+
|
|
26
|
+
Returns the out-degree (number of neighbors) for a specific node.
|
|
27
|
+
|
|
28
|
+
* **Usage:** checking if a node is a dead-end before walking.
|
|
29
|
+
|
|
30
|
+
#### `get_neighbours(node_id: int) -> numpy.ndarray`
|
|
31
|
+
|
|
32
|
+
Returns a **1-D numpy ndarray** of neighbour node IDs (dtype: `np.int64`). This is returned from the C++ layer as a fast zero-copy buffer and can be used directly with NumPy/PyTorch.
|
|
33
|
+
|
|
34
|
+
* **Notes:**
|
|
35
|
+
- The binding uses the British spelling `get_neighbours` (this is the function name exposed in the Python API).
|
|
36
|
+
- For very high-degree nodes prefer `sample_neighbours` or `batch_random_fanout` to avoid copying large arrays.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
### π² Sampling Methods (The Engine)
|
|
41
|
+
|
|
42
|
+
These functions use OpenMP multithreading on the C++ side and release the GIL to fully saturate CPU/disk bandwidth. All batch functions return a **NumPy ndarray** of dtype `np.int64`.
|
|
43
|
+
|
|
44
|
+
#### `batch_random_walk_uniform(start_nodes: List[int], walk_length: int) -> numpy.ndarray`
|
|
45
|
+
|
|
46
|
+
**The Speed King.** Performs unbiased uniform random walks.
|
|
47
|
+
|
|
48
|
+
* **Return shape & dtype:** `ndarray` with shape `(len(start_nodes), walk_length)` and dtype `np.int64`.
|
|
49
|
+
* **Algorithm:** At every step, pick a neighbour uniformly at random.
|
|
50
|
+
* **Use Case:** DeepWalk, uniform walk baselines, and fast data generation for training.
|
|
51
|
+
|
|
52
|
+
#### `batch_random_walk(start_nodes: List[int], walk_length: int, p: float = 1.0, q: float = 1.0) -> numpy.ndarray`
|
|
53
|
+
|
|
54
|
+
**The Biased Walker.** Performs Node2Vec-style 2nd-order random walks.
|
|
55
|
+
|
|
56
|
+
* **Arguments:**
|
|
57
|
+
- `p` (Return parameter): Low = keeps walk local (BFS-like).
|
|
58
|
+
- `q` (In-out parameter): Low = explores far away (DFS-like).
|
|
59
|
+
* **Return shape & dtype:** `ndarray` with shape `(len(start_nodes), walk_length)` and dtype `np.int64`.
|
|
60
|
+
* **Performance:** Slower than uniform walks due to additional transition calculations.
|
|
61
|
+
|
|
62
|
+
#### `batch_random_fanout(start_nodes: List[int], K: int) -> numpy.ndarray`
|
|
63
|
+
|
|
64
|
+
Performs uniform neighbor *fanout* sampling for a batch of start nodes (useful for GNN neighbour sampling).
|
|
65
|
+
|
|
66
|
+
* **Behavior:** For each start node returns `K` sampled neighbour IDs (using reservoir sampling / uniform sampling without replacement where possible).
|
|
67
|
+
* **Return shape & dtype:** `ndarray` with shape `(len(start_nodes), K)`, dtype `np.int64`.
|
|
68
|
+
|
|
69
|
+
#### `sample_neighbours(start_node: int, K: int) -> numpy.ndarray`
|
|
70
|
+
|
|
71
|
+
Performs uniform neighbour sampling for a single node using **reservoir sampling**.
|
|
72
|
+
|
|
73
|
+
* **Behavior:** Returns up to `K` neighbour IDs sampled uniformly at random. If the node degree <= `K`, all neighbours are returned.
|
|
74
|
+
* **Return shape & dtype:** 1-D `ndarray` of length `<= K`, dtype `np.int64`.
|
|
75
|
+
|
|
76
|
+
## π οΈ Utilities
|
|
77
|
+
|
|
78
|
+
#### `gz.convert_csv_to_gl(input_csv: str, output_bin: str, directed: bool)`
|
|
79
|
+
|
|
80
|
+
Converts a raw Edge List CSV into the optimized GraphLite binary format (`.gl`).
|
|
81
|
+
|
|
82
|
+
* **Input CSV Format:** Two columns (Source, Destination). Headers are ignored if they exist.
|
|
83
|
+
* **Process:** 1. **Pass 1:** Scans file to count degrees (Memory: Low).
|
|
84
|
+
2. **Allocation:** Creates the `.gl` file and `mmaps` it.
|
|
85
|
+
3. **Pass 2:** Reads CSV again and places edges into the correct memory buckets.
|
|
86
|
+
* **Note:** This process handles graphs larger than RAM.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# π§ Example: Training Node2Vec with PyTorch
|
|
90
|
+
|
|
91
|
+
This script demonstrates how to use `GraphZero` to train a real Node2Vec model.
|
|
92
|
+
Since `GraphZero` handles the **Data Loading** (the bottleneck), the GPU can focus entirely on **Training** (the math).
|
|
93
|
+
|
|
94
|
+
**File:** `train_node2vec.py`
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
import torch
|
|
98
|
+
import torch.nn as nn
|
|
99
|
+
import torch.optim as optim
|
|
100
|
+
import graphzero as gz
|
|
101
|
+
import numpy as np
|
|
102
|
+
from torch.utils.data import DataLoader, Dataset
|
|
103
|
+
|
|
104
|
+
# --- CONFIGURATION ---
|
|
105
|
+
GRAPH_PATH = "papers100M.gl" # The beast
|
|
106
|
+
EMBEDDING_DIM = 128
|
|
107
|
+
WALK_LENGTH = 20
|
|
108
|
+
WALKS_PER_EPOCH = 100_000 # Number of starts per batch
|
|
109
|
+
BATCH_SIZE = 1024
|
|
110
|
+
EPOCHS = 5
|
|
111
|
+
|
|
112
|
+
print(f"Initializing GraphZero Engine on {GRAPH_PATH}...")
|
|
113
|
+
g = gz.Graph(GRAPH_PATH)
|
|
114
|
+
print(f" Nodes: {g.num_nodes:,} | Edges: {g.num_edges:,}")
|
|
115
|
+
|
|
116
|
+
# --- 1. THE DATASET (Powered by GraphZero) ---
|
|
117
|
+
class GraphZeroWalkDataset(Dataset):
|
|
118
|
+
"""
|
|
119
|
+
Generates random walks on-the-fly using C++ engine.
|
|
120
|
+
"""
|
|
121
|
+
def __init__(self, graph_engine, num_walks, walk_len):
|
|
122
|
+
self.g = graph_engine
|
|
123
|
+
self.num_walks = num_walks
|
|
124
|
+
self.walk_len = walk_len
|
|
125
|
+
|
|
126
|
+
def __len__(self):
|
|
127
|
+
# In a real scenario, this might be num_nodes
|
|
128
|
+
# For this demo, we define an arbitrary epoch size
|
|
129
|
+
return self.num_walks
|
|
130
|
+
|
|
131
|
+
def __getitem__(self, idx):
|
|
132
|
+
# We don't generate single walks (too slow).
|
|
133
|
+
# We let the DataLoader batch them, then call C++ in the collate_fn.
|
|
134
|
+
# So we just return a random start node here.
|
|
135
|
+
return np.random.randint(0, self.g.num_nodes)
|
|
136
|
+
|
|
137
|
+
# --- 2. CUSTOM COLLATE FUNCTION (The Secret Sauce) ---
|
|
138
|
+
def collate_walks(batch_start_nodes):
|
|
139
|
+
"""
|
|
140
|
+
This is where the magic happens.
|
|
141
|
+
Instead of Python looping, we give the whole batch of start nodes
|
|
142
|
+
to C++ and get back the massive walk matrix instantly.
|
|
143
|
+
"""
|
|
144
|
+
# 1. Convert batch to list of uint64 for C++
|
|
145
|
+
start_nodes = [int(x) for x in batch_start_nodes]
|
|
146
|
+
|
|
147
|
+
# 2. Call C++ Engine (Releases GIL, runs OpenMP)
|
|
148
|
+
# Result is a flat list: [walk1_step1, walk1_step2... walk2_step1...]
|
|
149
|
+
flat_walks = g.batch_random_walk_uniform(start_nodes, WALK_LENGTH)
|
|
150
|
+
|
|
151
|
+
# 3. Reshape for PyTorch (Batch Size, Walk Length)
|
|
152
|
+
walks_tensor = torch.tensor(flat_walks, dtype=torch.long)
|
|
153
|
+
walks_tensor = walks_tensor.view(len(start_nodes), WALK_LENGTH)
|
|
154
|
+
|
|
155
|
+
return walks_tensor
|
|
156
|
+
|
|
157
|
+
# --- CONFIGURATION ADJUSTMENT ---
|
|
158
|
+
# We map 204M nodes -> 1M unique embeddings to save RAM
|
|
159
|
+
HASH_SIZE = 1_000_000
|
|
160
|
+
# RAM Usage: 1M * 128 * 4 bytes = ~512 MB (Very safe)
|
|
161
|
+
|
|
162
|
+
# --- 3. THE MODEL (Hashed Skip-Gram) ---
|
|
163
|
+
class Node2Vec(nn.Module):
|
|
164
|
+
def __init__(self, num_nodes, embed_dim):
|
|
165
|
+
super().__init__()
|
|
166
|
+
# INSTEAD OF: self.in_embed = nn.Embedding(num_nodes, embed_dim)
|
|
167
|
+
# WE USE:
|
|
168
|
+
self.in_embed = nn.Embedding(HASH_SIZE, embed_dim)
|
|
169
|
+
self.out_embed = nn.Embedding(HASH_SIZE, embed_dim)
|
|
170
|
+
|
|
171
|
+
def forward(self, target, context):
|
|
172
|
+
# Hashing Trick: Map massive ID -> Small ID
|
|
173
|
+
# In a real app, you'd use a better hash, but modulo is fine for a demo
|
|
174
|
+
t_hashed = target % HASH_SIZE
|
|
175
|
+
c_hashed = context % HASH_SIZE
|
|
176
|
+
|
|
177
|
+
v_in = self.in_embed(t_hashed)
|
|
178
|
+
v_out = self.out_embed(c_hashed)
|
|
179
|
+
|
|
180
|
+
return torch.sum(v_in * v_out, dim=1)
|
|
181
|
+
|
|
182
|
+
# --- 4. TRAINING LOOP ---
|
|
183
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
184
|
+
model = Node2Vec(g.num_nodes, EMBEDDING_DIM).to(device)
|
|
185
|
+
optimizer = optim.Adam(model.parameters(), lr=0.01)
|
|
186
|
+
|
|
187
|
+
# PyTorch DataLoader wraps our C++ engine
|
|
188
|
+
loader = DataLoader(
|
|
189
|
+
GraphZeroWalkDataset(g, WALKS_PER_EPOCH, WALK_LENGTH),
|
|
190
|
+
batch_size=BATCH_SIZE,
|
|
191
|
+
collate_fn=collate_walks, # <--- Connects PyTorch to GraphZero
|
|
192
|
+
num_workers=0 # Windows needs 0, Linux can use more
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
print("\nStarting Training...")
|
|
196
|
+
|
|
197
|
+
for epoch in range(EPOCHS):
|
|
198
|
+
total_loss = 0
|
|
199
|
+
|
|
200
|
+
for batch_walks in loader:
|
|
201
|
+
# batch_walks shape: [1024, 20]
|
|
202
|
+
batch_walks = batch_walks.to(device)
|
|
203
|
+
|
|
204
|
+
# Simple Positive Pair generation: (Current, Next)
|
|
205
|
+
# Real implementations use sliding windows, simplified here for brevity
|
|
206
|
+
target = batch_walks[:, :-1].flatten()
|
|
207
|
+
context = batch_walks[:, 1:].flatten()
|
|
208
|
+
|
|
209
|
+
optimizer.zero_grad()
|
|
210
|
+
loss = -model(target, context).mean() # Dummy loss for demo
|
|
211
|
+
loss.backward()
|
|
212
|
+
optimizer.step()
|
|
213
|
+
|
|
214
|
+
total_loss += loss.item()
|
|
215
|
+
|
|
216
|
+
print(f"Epoch {epoch+1}/{EPOCHS} | Avg Loss: {total_loss/len(loader):.4f}")
|
|
217
|
+
|
|
218
|
+
print("β
Training Complete.")
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
This example showcases how `GraphZero` can be seamlessly integrated into a PyTorch training loop, allowing for efficient data loading and processing of massive graphs. The C++ engine handles the heavy lifting of random walk generation, freeing up Python to focus on model training.
|
|
223
|
+
here is the screenshot of the output when running the script:
|
|
224
|
+
|
|
225
|
+

|
graphzero-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Krish
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
graphzero-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: graphzero
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: High-performance Zero-Copy Graph Engine
|
|
5
|
+
Author-Email: Krish <krishsingaria2005@gmail.com>
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Requires-Dist: numpy
|
|
8
|
+
Requires-Dist: torch
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# GraphZero
|
|
12
|
+
|
|
13
|
+
**High-Performance, Zero-Copy Graph Engine for Massive Datasets on Consumer Hardware.**
|
|
14
|
+
|
|
15
|
+
GraphZero is a C++ graph processing engine with lightweight Python bindings designed to solve the **"Memory Wall"** in Graph Neural Networks (GNNs). It allows you to load and sample **100 Million+ node graphs** (like `ogbn-papers100M`) on a standard 16GB RAM laptopβsomething standard libraries like PyTorch Geometric (PyG) or DGL cannot do.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## β‘ The Problem
|
|
19
|
+
|
|
20
|
+
GNN datasets can be massive. `ogbn-papers100M` contains **111 Million nodes** and **1.6 Billion edges**.
|
|
21
|
+
|
|
22
|
+
* **Standard approach (PyG/NetworkX):** Tries to load the entire graph structure into **RAM**.
|
|
23
|
+
* **The Result:** `MemoryError` (OOM) on consumer hardware. You need 64GB+ **RAM** servers just to *load* the data.
|
|
24
|
+
|
|
25
|
+
## π οΈ The Solution:
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+
|
|
29
|
+
GraphZero abandons the "Load-to-RAM" model. Instead, it uses a custom **Zero-Copy Architecture**:
|
|
30
|
+
|
|
31
|
+
* **Memory Mapping (`mmap`):** The graph stays on disk. The OS only loads the specific "hot" pages needed for computation into RAM.
|
|
32
|
+
* **Compressed CSR:** A custom binary format (`.gl`) that compresses raw edges by **~60%** (30GB CSV 13GB Binary).
|
|
33
|
+
* **Parallel Sampling:** OpenMP-accelerated random walks that saturate NVMe SSD throughput.
|
|
34
|
+
|
|
35
|
+
## π Benchmarks: GraphZero vs. PyTorch Geometric
|
|
36
|
+
|
|
37
|
+
**Task:** Load `ogbn-papers100M` (56GB Raw) and perform random walks.
|
|
38
|
+
**Hardware:** Windows Laptop (16GB RAM, NVMe SSD).
|
|
39
|
+
|
|
40
|
+
| Metric | GraphZero (v0.1) | PyTorch Geometric |
|
|
41
|
+
| --- | --- | --- |
|
|
42
|
+
| **Load Time** | **0.000000 s** β‘ | **FAILED** (Crash) β |
|
|
43
|
+
| **Peak RAM Usage** | **~5.1 GB** (OS Cache) | **>24.1 GB** (Required) |
|
|
44
|
+
| **Throughput** | **1,264,000 steps/s** | N/A |
|
|
45
|
+
| **Status** | β
**Success** | β **OOM Error** |
|
|
46
|
+
|
|
47
|
+
### Proof of Performance
|
|
48
|
+
|
|
49
|
+
> *Left: GraphZero loading instantly and utilizing OS Page Cache. Right: PyG crashing with `Unable to allocate 24.1 GiB`.*
|
|
50
|
+
|
|
51
|
+
<p float="left ">
|
|
52
|
+
<img src="benchmark/images/gz_bench.png" width="45%" />
|
|
53
|
+
<img src="benchmark/images/py_crash.png" width="45%" />
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## π¦ Installation
|
|
59
|
+
|
|
60
|
+
GraphZero is available on PyPI (Pre-Alpha):
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install graphzero
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
*Requirements: Python 3.8+, C++17 Compiler (MSVC/GCC), OpenMP.*
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## π Quick Start
|
|
72
|
+
|
|
73
|
+
### 1. Convert Your Data
|
|
74
|
+
|
|
75
|
+
GraphZero uses a high-efficiency binary format (`.gl`). Convert your generic CSV edges list once.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import graphzero as gz
|
|
79
|
+
|
|
80
|
+
# Converts raw CSV (src, dst) to memory-mapped binary
|
|
81
|
+
# Handles 100M+ edges easily on minimal RAM
|
|
82
|
+
gz.convert_csv_to_gl(
|
|
83
|
+
input_csv="dataset/edges.csv",
|
|
84
|
+
output_bin="graph.gl",
|
|
85
|
+
directed=True
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### 2. High-Speed Sampling
|
|
91
|
+
|
|
92
|
+
Once converted, the graph is instantly accessible.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
import graphzero as gz
|
|
96
|
+
import numpy as np
|
|
97
|
+
|
|
98
|
+
# 1. Zero-Copy Load (Instant)
|
|
99
|
+
g = gz.Graph("graph.gl")
|
|
100
|
+
|
|
101
|
+
# 2. Define Start Nodes (e.g., 1000 random nodes)
|
|
102
|
+
start_nodes = np.random.randint(0, g.num_nodes, 1000).astype(np.uint64)
|
|
103
|
+
|
|
104
|
+
# 3. Parallel Random Walk (node2vec / DeepWalk style)
|
|
105
|
+
# Returns: List of walks (flat or list-of-lists)
|
|
106
|
+
walks = g.batch_random_walk_uniform(
|
|
107
|
+
start_nodes=start_nodes,
|
|
108
|
+
walk_length=10
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
print(f"Generated {len(walks)} steps instantly.")
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
## βοΈ Under the Hood
|
|
117
|
+
|
|
118
|
+
GraphZero is built for **Systems & GNN** enthusiasts.
|
|
119
|
+
|
|
120
|
+
* **Core:** C++20 with `nanobind` for Python bindings.
|
|
121
|
+
* **Parallelism:** Uses `#pragma omp` with thread-local RNGs to prevent false sharing and lock contention.
|
|
122
|
+
* **IO:** Direct `CreateFileMapping` (Windows) and `mmap` (Linux) calls with alignment optimization (4KB/2MB pages).
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
## πΊοΈ Roadmap
|
|
126
|
+
|
|
127
|
+
* **v0.1 (Current):** Topology-only support. Uniform Random Walks.
|
|
128
|
+
* **v0.2:** Columnar Feature Store (mmap support for Node Features ).
|
|
129
|
+
* **v0.3:** Weighted Edges & SIMD (AVX2) Neighbor Intersection.
|
|
130
|
+
* **v0.4:** Dynamic Updates (LSM-Tree based mutable graphs).
|
|
131
|
+
* **v0.5:** Pinned Memory Allocator for faster CPU GPU transfer.
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
## π License
|
|
135
|
+
|
|
136
|
+
MIT License. Created by **Krish Singaria** (IIT Mandi).
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# GraphZero
|
|
2
|
+
|
|
3
|
+
**High-Performance, Zero-Copy Graph Engine for Massive Datasets on Consumer Hardware.**
|
|
4
|
+
|
|
5
|
+
GraphZero is a C++ graph processing engine with lightweight Python bindings designed to solve the **"Memory Wall"** in Graph Neural Networks (GNNs). It allows you to load and sample **100 Million+ node graphs** (like `ogbn-papers100M`) on a standard 16GB RAM laptopβsomething standard libraries like PyTorch Geometric (PyG) or DGL cannot do.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
## β‘ The Problem
|
|
9
|
+
|
|
10
|
+
GNN datasets can be massive. `ogbn-papers100M` contains **111 Million nodes** and **1.6 Billion edges**.
|
|
11
|
+
|
|
12
|
+
* **Standard approach (PyG/NetworkX):** Tries to load the entire graph structure into **RAM**.
|
|
13
|
+
* **The Result:** `MemoryError` (OOM) on consumer hardware. You need 64GB+ **RAM** servers just to *load* the data.
|
|
14
|
+
|
|
15
|
+
## π οΈ The Solution:
|
|
16
|
+
|
|
17
|
+

|
|
18
|
+
|
|
19
|
+
GraphZero abandons the "Load-to-RAM" model. Instead, it uses a custom **Zero-Copy Architecture**:
|
|
20
|
+
|
|
21
|
+
* **Memory Mapping (`mmap`):** The graph stays on disk. The OS only loads the specific "hot" pages needed for computation into RAM.
|
|
22
|
+
* **Compressed CSR:** A custom binary format (`.gl`) that compresses raw edges by **~60%** (30GB CSV 13GB Binary).
|
|
23
|
+
* **Parallel Sampling:** OpenMP-accelerated random walks that saturate NVMe SSD throughput.
|
|
24
|
+
|
|
25
|
+
## π Benchmarks: GraphZero vs. PyTorch Geometric
|
|
26
|
+
|
|
27
|
+
**Task:** Load `ogbn-papers100M` (56GB Raw) and perform random walks.
|
|
28
|
+
**Hardware:** Windows Laptop (16GB RAM, NVMe SSD).
|
|
29
|
+
|
|
30
|
+
| Metric | GraphZero (v0.1) | PyTorch Geometric |
|
|
31
|
+
| --- | --- | --- |
|
|
32
|
+
| **Load Time** | **0.000000 s** β‘ | **FAILED** (Crash) β |
|
|
33
|
+
| **Peak RAM Usage** | **~5.1 GB** (OS Cache) | **>24.1 GB** (Required) |
|
|
34
|
+
| **Throughput** | **1,264,000 steps/s** | N/A |
|
|
35
|
+
| **Status** | β
**Success** | β **OOM Error** |
|
|
36
|
+
|
|
37
|
+
### Proof of Performance
|
|
38
|
+
|
|
39
|
+
> *Left: GraphZero loading instantly and utilizing OS Page Cache. Right: PyG crashing with `Unable to allocate 24.1 GiB`.*
|
|
40
|
+
|
|
41
|
+
<p float="left ">
|
|
42
|
+
<img src="benchmark/images/gz_bench.png" width="45%" />
|
|
43
|
+
<img src="benchmark/images/py_crash.png" width="45%" />
|
|
44
|
+
</p>
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## π¦ Installation
|
|
49
|
+
|
|
50
|
+
GraphZero is available on PyPI (Pre-Alpha):
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install graphzero
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
*Requirements: Python 3.8+, C++17 Compiler (MSVC/GCC), OpenMP.*
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## π Quick Start
|
|
62
|
+
|
|
63
|
+
### 1. Convert Your Data
|
|
64
|
+
|
|
65
|
+
GraphZero uses a high-efficiency binary format (`.gl`). Convert your generic CSV edges list once.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import graphzero as gz
|
|
69
|
+
|
|
70
|
+
# Converts raw CSV (src, dst) to memory-mapped binary
|
|
71
|
+
# Handles 100M+ edges easily on minimal RAM
|
|
72
|
+
gz.convert_csv_to_gl(
|
|
73
|
+
input_csv="dataset/edges.csv",
|
|
74
|
+
output_bin="graph.gl",
|
|
75
|
+
directed=True
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 2. High-Speed Sampling
|
|
81
|
+
|
|
82
|
+
Once converted, the graph is instantly accessible.
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import graphzero as gz
|
|
86
|
+
import numpy as np
|
|
87
|
+
|
|
88
|
+
# 1. Zero-Copy Load (Instant)
|
|
89
|
+
g = gz.Graph("graph.gl")
|
|
90
|
+
|
|
91
|
+
# 2. Define Start Nodes (e.g., 1000 random nodes)
|
|
92
|
+
start_nodes = np.random.randint(0, g.num_nodes, 1000).astype(np.uint64)
|
|
93
|
+
|
|
94
|
+
# 3. Parallel Random Walk (node2vec / DeepWalk style)
|
|
95
|
+
# Returns: List of walks (flat or list-of-lists)
|
|
96
|
+
walks = g.batch_random_walk_uniform(
|
|
97
|
+
start_nodes=start_nodes,
|
|
98
|
+
walk_length=10
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
print(f"Generated {len(walks)} steps instantly.")
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
## βοΈ Under the Hood
|
|
107
|
+
|
|
108
|
+
GraphZero is built for **Systems & GNN** enthusiasts.
|
|
109
|
+
|
|
110
|
+
* **Core:** C++20 with `nanobind` for Python bindings.
|
|
111
|
+
* **Parallelism:** Uses `#pragma omp` with thread-local RNGs to prevent false sharing and lock contention.
|
|
112
|
+
* **IO:** Direct `CreateFileMapping` (Windows) and `mmap` (Linux) calls with alignment optimization (4KB/2MB pages).
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
## πΊοΈ Roadmap
|
|
116
|
+
|
|
117
|
+
* **v0.1 (Current):** Topology-only support. Uniform Random Walks.
|
|
118
|
+
* **v0.2:** Columnar Feature Store (mmap support for Node Features ).
|
|
119
|
+
* **v0.3:** Weighted Edges & SIMD (AVX2) Neighbor Intersection.
|
|
120
|
+
* **v0.4:** Dynamic Updates (LSM-Tree based mutable graphs).
|
|
121
|
+
* **v0.5:** Pinned Memory Allocator for faster CPU GPU transfer.
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
## π License
|
|
125
|
+
|
|
126
|
+
MIT License. Created by **Krish Singaria** (IIT Mandi).
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import psutil
|
|
3
|
+
import os
|
|
4
|
+
import torch
|
|
5
|
+
from ogb.nodeproppred import PygNodePropPredDataset
|
|
6
|
+
|
|
7
|
+
print("==================================================")
|
|
8
|
+
print(" PYG BENCHMARK: The 'Control' Experiment")
|
|
9
|
+
print(" Warning: This script may freeze your laptop.")
|
|
10
|
+
print("==================================================")
|
|
11
|
+
|
|
12
|
+
process = psutil.Process(os.getpid())
|
|
13
|
+
def get_ram_usage():
|
|
14
|
+
return process.memory_info().rss / (1024 ** 3)
|
|
15
|
+
|
|
16
|
+
print(f"Initial RAM: {get_ram_usage():.4f} GB")
|
|
17
|
+
|
|
18
|
+
# 1. Measure Loading Time
|
|
19
|
+
print("\n[Step 1] Attempting to load ogbn-papers100M with PyG...")
|
|
20
|
+
t0 = time.time()
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
# This tries to load the processed .pt file into RAM
|
|
24
|
+
# If this line finishes in < 60 seconds, I will be shocked.
|
|
25
|
+
dataset = PygNodePropPredDataset(name='ogbn-papers100M',root='dataset')
|
|
26
|
+
data = dataset[0] # The actual graph object
|
|
27
|
+
|
|
28
|
+
t_load = time.time() - t0
|
|
29
|
+
print(f"β
Loaded! (Miraculously)")
|
|
30
|
+
print(f"β±οΈ Load Time: {t_load:.4f} s")
|
|
31
|
+
print(f"πΎ RAM Usage: {get_ram_usage():.4f} GB")
|
|
32
|
+
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print(f"\nβ CRASHED as expected: {e}")
|
|
35
|
+
print(f"πΎ RAM at Crash: {get_ram_usage():.4f} GB")
|
|
36
|
+
exit(1)
|
|
37
|
+
|
|
38
|
+
# 2. Random Walk Benchmark (If we survived loading)
|
|
39
|
+
print("\n[Step 2] Attempting Random Walks (ClusterGCN style)...")
|
|
40
|
+
# PyG doesn't have a direct "random walk" sampler on CPU that is easy to invoke
|
|
41
|
+
# without a DataLoader, so we will just try to access the edge_index
|
|
42
|
+
# to simulate 'touching' the memory.
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
t0 = time.time()
|
|
46
|
+
# Simulate reading 1M random edges
|
|
47
|
+
num_edges = data.edge_index.shape[1]
|
|
48
|
+
indices = torch.randint(0, num_edges, (1_000_000,))
|
|
49
|
+
|
|
50
|
+
# Force read
|
|
51
|
+
subset = data.edge_index[:, indices]
|
|
52
|
+
|
|
53
|
+
t_bench = time.time() - t0
|
|
54
|
+
print(f"β
Access Test Complete in {t_bench:.4f} s")
|
|
55
|
+
|
|
56
|
+
except Exception as e:
|
|
57
|
+
print(f"β Failed during access: {e}")
|