blink-gpu 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blink_gpu-0.1.0/PKG-INFO +315 -0
- blink_gpu-0.1.0/README.md +258 -0
- blink_gpu-0.1.0/blink/__init__.py +30 -0
- blink_gpu-0.1.0/blink/__main__.py +117 -0
- blink_gpu-0.1.0/blink/_analyzer.py +98 -0
- blink_gpu-0.1.0/blink/_predictor.py +164 -0
- blink_gpu-0.1.0/blink/_version.py +1 -0
- blink_gpu-0.1.0/blink/py.typed +1 -0
- blink_gpu-0.1.0/blink_gpu.egg-info/PKG-INFO +315 -0
- blink_gpu-0.1.0/blink_gpu.egg-info/SOURCES.txt +18 -0
- blink_gpu-0.1.0/blink_gpu.egg-info/dependency_links.txt +1 -0
- blink_gpu-0.1.0/blink_gpu.egg-info/entry_points.txt +3 -0
- blink_gpu-0.1.0/blink_gpu.egg-info/requires.txt +39 -0
- blink_gpu-0.1.0/blink_gpu.egg-info/top_level.txt +1 -0
- blink_gpu-0.1.0/pyproject.toml +127 -0
- blink_gpu-0.1.0/setup.cfg +4 -0
- blink_gpu-0.1.0/tests/test_diverse_models.py +190 -0
- blink_gpu-0.1.0/tests/test_gnn_scaling.py +41 -0
- blink_gpu-0.1.0/tests/test_predictors.py +59 -0
- blink_gpu-0.1.0/tests/test_profiler.py +42 -0
blink_gpu-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: blink-gpu
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Predict GPU execution time & memory for PyTorch models โ without running them.
|
|
5
|
+
Author-email: Aniket Mishra <aniket@blink-gpu.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Aniketxmishra/Blink_Main
|
|
8
|
+
Project-URL: Documentation, https://github.com/Aniketxmishra/Blink_Main#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/Aniketxmishra/Blink_Main.git
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/Aniketxmishra/Blink_Main/issues
|
|
11
|
+
Keywords: gpu,performance,prediction,pytorch,neural-network,profiling,machine-learning,explainability
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: System :: Hardware
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: numpy>=1.24
|
|
25
|
+
Requires-Dist: pandas>=2.0
|
|
26
|
+
Requires-Dist: scikit-learn>=1.3
|
|
27
|
+
Requires-Dist: xgboost>=2.0
|
|
28
|
+
Requires-Dist: joblib>=1.3
|
|
29
|
+
Requires-Dist: thop>=0.1.1
|
|
30
|
+
Provides-Extra: full
|
|
31
|
+
Requires-Dist: optuna; extra == "full"
|
|
32
|
+
Requires-Dist: lightgbm; extra == "full"
|
|
33
|
+
Requires-Dist: pynvml; extra == "full"
|
|
34
|
+
Requires-Dist: shap>=0.44; extra == "full"
|
|
35
|
+
Requires-Dist: streamlit>=1.30; extra == "full"
|
|
36
|
+
Requires-Dist: plotly>=5.18; extra == "full"
|
|
37
|
+
Requires-Dist: matplotlib>=3.8; extra == "full"
|
|
38
|
+
Requires-Dist: seaborn>=0.13; extra == "full"
|
|
39
|
+
Provides-Extra: api
|
|
40
|
+
Requires-Dist: fastapi>=0.110; extra == "api"
|
|
41
|
+
Requires-Dist: uvicorn[standard]>=0.27; extra == "api"
|
|
42
|
+
Requires-Dist: python-multipart; extra == "api"
|
|
43
|
+
Requires-Dist: httpx; extra == "api"
|
|
44
|
+
Provides-Extra: gnn
|
|
45
|
+
Requires-Dist: torch-geometric; extra == "gnn"
|
|
46
|
+
Provides-Extra: explain
|
|
47
|
+
Requires-Dist: shap>=0.44; extra == "explain"
|
|
48
|
+
Provides-Extra: dev
|
|
49
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
50
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
51
|
+
Requires-Dist: ruff; extra == "dev"
|
|
52
|
+
Requires-Dist: black; extra == "dev"
|
|
53
|
+
Requires-Dist: mypy; extra == "dev"
|
|
54
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
55
|
+
Provides-Extra: all
|
|
56
|
+
Requires-Dist: blink-gpu[api,dev,explain,full,gnn]; extra == "all"
|
|
57
|
+
|
|
58
|
+
๏ปฟ# Blink ๐ญ
|
|
59
|
+
> **GPU Performance Predictor for Deep Learning Models**
|
|
60
|
+
|
|
61
|
+
Blink predicts **execution time** and **memory usage** of PyTorch neural networks on GPU without actually running them. It combines classical ML (XGBoost, Random Forest) with a Graph Neural Network (GNN) that encodes the computational graph of any model architecture.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## ๐ Table of Contents
|
|
66
|
+
- [Overview](#overview)
|
|
67
|
+
- [Architecture](#architecture)
|
|
68
|
+
- [Project Structure](#project-structure)
|
|
69
|
+
- [Installation](#installation)
|
|
70
|
+
- [Usage](#usage)
|
|
71
|
+
- [Data Pipeline](#data-pipeline)
|
|
72
|
+
- [Model Performance](#model-performance)
|
|
73
|
+
- [Dashboard](#dashboard)
|
|
74
|
+
- [Paper Reproducibility](#paper-reproducibility)
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Overview
|
|
79
|
+
|
|
80
|
+
Given a PyTorch model and a batch size, Blink answers:
|
|
81
|
+
- *How long will a forward pass take on this GPU?*
|
|
82
|
+
- *How much GPU memory will it consume?*
|
|
83
|
+
|
|
84
|
+
This is useful for:
|
|
85
|
+
- **Batch size optimization** before deployment
|
|
86
|
+
- **Hardware cost estimation** for training runs
|
|
87
|
+
- **NAS (Neural Architecture Search)** โ filtering architectures by predicted cost
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Architecture
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
PyTorch Model
|
|
95
|
+
โ
|
|
96
|
+
โผ
|
|
97
|
+
โโโโโโโโโโโโโโโโโโโโโโโ
|
|
98
|
+
โ Feature Extractor โ โ layer counts, FLOPs, params, depth, width, skip connections
|
|
99
|
+
โ + GNN Extractor โ โ graph-based architecture encoding (ArchitectureGNN)
|
|
100
|
+
โโโโโโโโโโโฌโโโโโโโโโโโโ
|
|
101
|
+
โ
|
|
102
|
+
โผ
|
|
103
|
+
โโโโโโโโโโโโโโโโโโโโโโโ
|
|
104
|
+
โ Prediction Models โ
|
|
105
|
+
โ โโโโโโโโโโโโโโโโโ โ
|
|
106
|
+
โ ยท XGBoost (tuned) โ โ main predictor (best MAPE)
|
|
107
|
+
โ ยท Random Forest โ โ ensemble comparison
|
|
108
|
+
โ ยท GNN Predictor โ โ graph-native, generalizes across architectures
|
|
109
|
+
โ ยท Linear / Ridge โ โ baselines
|
|
110
|
+
โโโโโโโโโโโฌโโโโโโโโโโโโ
|
|
111
|
+
โ
|
|
112
|
+
โผ
|
|
113
|
+
Predicted: execution_time_ms, memory_mb
|
|
114
|
+
+ Uncertainty bounds (lower / upper)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Project Structure
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
Blink/
|
|
123
|
+
โโโ dashboard.py # ๐ฅ๏ธ Main Streamlit web app (run this)
|
|
124
|
+
โโโ prediction_api.py # ๐ Flask REST API
|
|
125
|
+
โ
|
|
126
|
+
โโโ โโ Core ML Modules โโ
|
|
127
|
+
โ โโโ model_profiler.py # GPU profiler (CUDA events)
|
|
128
|
+
โ โโโ feature_extractor.py # Static feature extraction from nn.Module
|
|
129
|
+
โ โโโ gnn_extractor.py # GNN-based graph feature extraction
|
|
130
|
+
โ โโโ gnn_model.py # ArchitectureGNN model definition (PyG)
|
|
131
|
+
โ โโโ prediction_model.py # Train XGBoost / RF / Linear models
|
|
132
|
+
โ โโโ train_gnn.py # Train the GNN predictor
|
|
133
|
+
โ โโโ train_memory_model.py# Train memory prediction model
|
|
134
|
+
โ โโโ gpu_predictor.py # Inference class with caching & batch support
|
|
135
|
+
โ โโโ model_analyser.py # Model complexity analysis utilities
|
|
136
|
+
โ โโโ advanced_features.py # Extended feature engineering
|
|
137
|
+
โ โโโ dynamic_predictor.py # Dynamic / online prediction
|
|
138
|
+
โ โโโ gpu_info.py # GPU metadata (pynvml)
|
|
139
|
+
โ โโโ workload_scheduler.py# Batch workload scheduler
|
|
140
|
+
โ โโโ performance_monitor.py
|
|
141
|
+
โ
|
|
142
|
+
โโโ scripts/ # ๐ฌ Experiment & data scripts
|
|
143
|
+
โ โโโ collect_data.py # Profile CNN/Transformer/custom models โ data/raw/
|
|
144
|
+
โ โโโ enhance_dataset.py # Augment dataset (more batch sizes / models)
|
|
145
|
+
โ โโโ diverse_architectures.py # Profile diverse arch families
|
|
146
|
+
โ โโโ ablation_study.py # 5-condition ablation (Table II in paper)
|
|
147
|
+
โ โโโ generate_paper_figures.py # Reproduce all paper figures
|
|
148
|
+
โ โโโ generate_paper_tables.py # Reproduce paper tables
|
|
149
|
+
โ
|
|
150
|
+
โโโ tests/ # โ
Test suite
|
|
151
|
+
โ โโโ test_diverse_models.py
|
|
152
|
+
โ โโโ test_predictors.py
|
|
153
|
+
โ โโโ test_profiler.py
|
|
154
|
+
โ โโโ test_gnn_scaling.py
|
|
155
|
+
โ โโโ evaluate_gnn_vs_xgb.py
|
|
156
|
+
โ
|
|
157
|
+
โโโ data/
|
|
158
|
+
โ โโโ raw/ # Raw profiling CSVs (gitignored)
|
|
159
|
+
โ โโโ processed/ # Feature-engineered CSVs
|
|
160
|
+
โ โโโ enriched/ # Final training-ready dataset
|
|
161
|
+
โ โโโ feedback_log.csv # Online feedback loop log
|
|
162
|
+
โ
|
|
163
|
+
โโโ models/ # Serialized model artifacts (gitignored)
|
|
164
|
+
โ โโโ xgboost_(tuned)_model.joblib
|
|
165
|
+
โ โโโ random_forest_model.joblib
|
|
166
|
+
โ โโโ gnn_predictor.pth
|
|
167
|
+
โ โโโ memory_model.joblib
|
|
168
|
+
โ โโโ ...
|
|
169
|
+
โ
|
|
170
|
+
โโโ results/
|
|
171
|
+
โ โโโ figures/ # Paper figures (PNG)
|
|
172
|
+
โ โโโ ablation_study_table.csv
|
|
173
|
+
โ โโโ gnn_scaling_table.csv
|
|
174
|
+
โ โโโ ...
|
|
175
|
+
โ
|
|
176
|
+
โโโ templates/index.html # HTML template for web interface
|
|
177
|
+
โโโ legacy/ # Archived / superseded scripts
|
|
178
|
+
โโโ requirements.txt
|
|
179
|
+
โโโ .gitignore
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Installation
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# 1. Clone the repo
|
|
188
|
+
git clone <your-repo-url>
|
|
189
|
+
cd Blink
|
|
190
|
+
|
|
191
|
+
# 2. Create a virtual environment
|
|
192
|
+
python -m venv venv
|
|
193
|
+
venv\Scripts\activate # Windows
|
|
194
|
+
# source venv/bin/activate # Linux/macOS
|
|
195
|
+
|
|
196
|
+
# 3. Install dependencies
|
|
197
|
+
pip install -r requirements.txt
|
|
198
|
+
|
|
199
|
+
# 4. Install PyTorch Geometric (match your CUDA version)
|
|
200
|
+
# See: https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html
|
|
201
|
+
pip install torch-geometric
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
**Requirements:** NVIDIA GPU with CUDA, Python โฅ 3.10
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Usage
|
|
209
|
+
|
|
210
|
+
### 1. Launch the Dashboard
|
|
211
|
+
```bash
|
|
212
|
+
streamlit run dashboard.py
|
|
213
|
+
```
|
|
214
|
+
Features: live model prediction, batch size optimizer, model comparison, performance monitor.
|
|
215
|
+
|
|
216
|
+
### 2. Collect Profiling Data
|
|
217
|
+
```bash
|
|
218
|
+
python scripts/collect_data.py --batch-sizes 1 4 16 32 64
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### 3. Train Prediction Models
|
|
222
|
+
```bash
|
|
223
|
+
# Train XGBoost / RF / Linear baseline models
|
|
224
|
+
python prediction_model.py
|
|
225
|
+
|
|
226
|
+
# Train GNN predictor
|
|
227
|
+
python train_gnn.py
|
|
228
|
+
|
|
229
|
+
# Train memory model
|
|
230
|
+
python train_memory_model.py
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### 4. Run Ablation Study
|
|
234
|
+
```bash
|
|
235
|
+
python scripts/ablation_study.py
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### 5. Predict via Python API
|
|
239
|
+
```python
|
|
240
|
+
from gpu_predictor import GPUPredictor
|
|
241
|
+
import torchvision.models as models
|
|
242
|
+
|
|
243
|
+
predictor = GPUPredictor()
|
|
244
|
+
model = models.resnet50(pretrained=False)
|
|
245
|
+
result = predictor.predict_for_custom_model(model, batch_size=16)
|
|
246
|
+
print(result)
|
|
247
|
+
# {'execution_time_ms': 12.4, 'memory_mb': 1820, 'confidence_lower': 11.1, ...}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Data Pipeline
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
collect_data.py
|
|
256
|
+
โโโถ data/raw/*.csv (GPU profiling measurements)
|
|
257
|
+
โ
|
|
258
|
+
โผ
|
|
259
|
+
feature_extractor.py
|
|
260
|
+
โโโถ data/processed/*.csv (static model features)
|
|
261
|
+
โ
|
|
262
|
+
โผ
|
|
263
|
+
enhance_dataset.py
|
|
264
|
+
โโโถ data/enriched/*.csv (augmented, training-ready)
|
|
265
|
+
โ
|
|
266
|
+
โผ
|
|
267
|
+
prediction_model.py / train_gnn.py
|
|
268
|
+
โโโถ models/ (trained predictors)
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Model Performance
|
|
274
|
+
|
|
275
|
+
Results on held-out test set (20% split):
|
|
276
|
+
|
|
277
|
+
| Model | Exec Time MAPE | Memory MAPE | Notes |
|
|
278
|
+
|---|---|---|---|
|
|
279
|
+
| XGBoost (tuned) | ~8% | ~6% | Best overall |
|
|
280
|
+
| Random Forest | ~11% | ~9% | Robust baseline |
|
|
281
|
+
| GNN Predictor | ~10% | ~8% | Best on unseen architectures |
|
|
282
|
+
| Linear Regression | ~22% | ~19% | Baseline |
|
|
283
|
+
|
|
284
|
+
*(Full ablation study results: `results/ablation_study_table.csv`)*
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## Dashboard
|
|
289
|
+
|
|
290
|
+
The Streamlit dashboard (`dashboard.py`) provides:
|
|
291
|
+
|
|
292
|
+
| Tab | Description |
|
|
293
|
+
|---|---|
|
|
294
|
+
| ๐ฏ Prediction | Predict execution time & memory for standard or custom models |
|
|
295
|
+
| โก Batch Optimizer | Find optimal batch size within a memory budget |
|
|
296
|
+
| ๐ Model Comparison | Compare predictions across multiple architectures |
|
|
297
|
+
| ๐ Performance Monitor | Live GPU utilization and prediction history |
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
## Paper Reproducibility
|
|
302
|
+
|
|
303
|
+
To reproduce all paper figures and tables:
|
|
304
|
+
```bash
|
|
305
|
+
python scripts/generate_paper_figures.py
|
|
306
|
+
python scripts/generate_paper_tables.py
|
|
307
|
+
python scripts/ablation_study.py
|
|
308
|
+
```
|
|
309
|
+
Outputs saved to `results/figures/`.
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## License
|
|
314
|
+
|
|
315
|
+
MIT License โ see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
๏ปฟ# Blink ๐ญ
|
|
2
|
+
> **GPU Performance Predictor for Deep Learning Models**
|
|
3
|
+
|
|
4
|
+
Blink predicts **execution time** and **memory usage** of PyTorch neural networks on GPU without actually running them. It combines classical ML (XGBoost, Random Forest) with a Graph Neural Network (GNN) that encodes the computational graph of any model architecture.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## ๐ Table of Contents
|
|
9
|
+
- [Overview](#overview)
|
|
10
|
+
- [Architecture](#architecture)
|
|
11
|
+
- [Project Structure](#project-structure)
|
|
12
|
+
- [Installation](#installation)
|
|
13
|
+
- [Usage](#usage)
|
|
14
|
+
- [Data Pipeline](#data-pipeline)
|
|
15
|
+
- [Model Performance](#model-performance)
|
|
16
|
+
- [Dashboard](#dashboard)
|
|
17
|
+
- [Paper Reproducibility](#paper-reproducibility)
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Overview
|
|
22
|
+
|
|
23
|
+
Given a PyTorch model and a batch size, Blink answers:
|
|
24
|
+
- *How long will a forward pass take on this GPU?*
|
|
25
|
+
- *How much GPU memory will it consume?*
|
|
26
|
+
|
|
27
|
+
This is useful for:
|
|
28
|
+
- **Batch size optimization** before deployment
|
|
29
|
+
- **Hardware cost estimation** for training runs
|
|
30
|
+
- **NAS (Neural Architecture Search)** โ filtering architectures by predicted cost
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Architecture
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
PyTorch Model
|
|
38
|
+
โ
|
|
39
|
+
โผ
|
|
40
|
+
โโโโโโโโโโโโโโโโโโโโโโโ
|
|
41
|
+
โ Feature Extractor โ โ layer counts, FLOPs, params, depth, width, skip connections
|
|
42
|
+
โ + GNN Extractor โ โ graph-based architecture encoding (ArchitectureGNN)
|
|
43
|
+
โโโโโโโโโโโฌโโโโโโโโโโโโ
|
|
44
|
+
โ
|
|
45
|
+
โผ
|
|
46
|
+
โโโโโโโโโโโโโโโโโโโโโโโ
|
|
47
|
+
โ Prediction Models โ
|
|
48
|
+
โ โโโโโโโโโโโโโโโโโ โ
|
|
49
|
+
โ ยท XGBoost (tuned) โ โ main predictor (best MAPE)
|
|
50
|
+
โ ยท Random Forest โ โ ensemble comparison
|
|
51
|
+
โ ยท GNN Predictor โ โ graph-native, generalizes across architectures
|
|
52
|
+
โ ยท Linear / Ridge โ โ baselines
|
|
53
|
+
โโโโโโโโโโโฌโโโโโโโโโโโโ
|
|
54
|
+
โ
|
|
55
|
+
โผ
|
|
56
|
+
Predicted: execution_time_ms, memory_mb
|
|
57
|
+
+ Uncertainty bounds (lower / upper)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Project Structure
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
Blink/
|
|
66
|
+
โโโ dashboard.py # ๐ฅ๏ธ Main Streamlit web app (run this)
|
|
67
|
+
โโโ prediction_api.py # ๐ Flask REST API
|
|
68
|
+
โ
|
|
69
|
+
โโโ โโ Core ML Modules โโ
|
|
70
|
+
โ โโโ model_profiler.py # GPU profiler (CUDA events)
|
|
71
|
+
โ โโโ feature_extractor.py # Static feature extraction from nn.Module
|
|
72
|
+
โ โโโ gnn_extractor.py # GNN-based graph feature extraction
|
|
73
|
+
โ โโโ gnn_model.py # ArchitectureGNN model definition (PyG)
|
|
74
|
+
โ โโโ prediction_model.py # Train XGBoost / RF / Linear models
|
|
75
|
+
โ โโโ train_gnn.py # Train the GNN predictor
|
|
76
|
+
โ โโโ train_memory_model.py# Train memory prediction model
|
|
77
|
+
โ โโโ gpu_predictor.py # Inference class with caching & batch support
|
|
78
|
+
โ โโโ model_analyser.py # Model complexity analysis utilities
|
|
79
|
+
โ โโโ advanced_features.py # Extended feature engineering
|
|
80
|
+
โ โโโ dynamic_predictor.py # Dynamic / online prediction
|
|
81
|
+
โ โโโ gpu_info.py # GPU metadata (pynvml)
|
|
82
|
+
โ โโโ workload_scheduler.py# Batch workload scheduler
|
|
83
|
+
โ โโโ performance_monitor.py
|
|
84
|
+
โ
|
|
85
|
+
โโโ scripts/ # ๐ฌ Experiment & data scripts
|
|
86
|
+
โ โโโ collect_data.py # Profile CNN/Transformer/custom models โ data/raw/
|
|
87
|
+
โ โโโ enhance_dataset.py # Augment dataset (more batch sizes / models)
|
|
88
|
+
โ โโโ diverse_architectures.py # Profile diverse arch families
|
|
89
|
+
โ โโโ ablation_study.py # 5-condition ablation (Table II in paper)
|
|
90
|
+
โ โโโ generate_paper_figures.py # Reproduce all paper figures
|
|
91
|
+
โ โโโ generate_paper_tables.py # Reproduce paper tables
|
|
92
|
+
โ
|
|
93
|
+
โโโ tests/ # โ
Test suite
|
|
94
|
+
โ โโโ test_diverse_models.py
|
|
95
|
+
โ โโโ test_predictors.py
|
|
96
|
+
โ โโโ test_profiler.py
|
|
97
|
+
โ โโโ test_gnn_scaling.py
|
|
98
|
+
โ โโโ evaluate_gnn_vs_xgb.py
|
|
99
|
+
โ
|
|
100
|
+
โโโ data/
|
|
101
|
+
โ โโโ raw/ # Raw profiling CSVs (gitignored)
|
|
102
|
+
โ โโโ processed/ # Feature-engineered CSVs
|
|
103
|
+
โ โโโ enriched/ # Final training-ready dataset
|
|
104
|
+
โ โโโ feedback_log.csv # Online feedback loop log
|
|
105
|
+
โ
|
|
106
|
+
โโโ models/ # Serialized model artifacts (gitignored)
|
|
107
|
+
โ โโโ xgboost_(tuned)_model.joblib
|
|
108
|
+
โ โโโ random_forest_model.joblib
|
|
109
|
+
โ โโโ gnn_predictor.pth
|
|
110
|
+
โ โโโ memory_model.joblib
|
|
111
|
+
โ โโโ ...
|
|
112
|
+
โ
|
|
113
|
+
โโโ results/
|
|
114
|
+
โ โโโ figures/ # Paper figures (PNG)
|
|
115
|
+
โ โโโ ablation_study_table.csv
|
|
116
|
+
โ โโโ gnn_scaling_table.csv
|
|
117
|
+
โ โโโ ...
|
|
118
|
+
โ
|
|
119
|
+
โโโ templates/index.html # HTML template for web interface
|
|
120
|
+
โโโ legacy/ # Archived / superseded scripts
|
|
121
|
+
โโโ requirements.txt
|
|
122
|
+
โโโ .gitignore
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Installation
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# 1. Clone the repo
|
|
131
|
+
git clone <your-repo-url>
|
|
132
|
+
cd Blink
|
|
133
|
+
|
|
134
|
+
# 2. Create a virtual environment
|
|
135
|
+
python -m venv venv
|
|
136
|
+
venv\Scripts\activate # Windows
|
|
137
|
+
# source venv/bin/activate # Linux/macOS
|
|
138
|
+
|
|
139
|
+
# 3. Install dependencies
|
|
140
|
+
pip install -r requirements.txt
|
|
141
|
+
|
|
142
|
+
# 4. Install PyTorch Geometric (match your CUDA version)
|
|
143
|
+
# See: https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html
|
|
144
|
+
pip install torch-geometric
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Requirements:** NVIDIA GPU with CUDA, Python โฅ 3.10
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Usage
|
|
152
|
+
|
|
153
|
+
### 1. Launch the Dashboard
|
|
154
|
+
```bash
|
|
155
|
+
streamlit run dashboard.py
|
|
156
|
+
```
|
|
157
|
+
Features: live model prediction, batch size optimizer, model comparison, performance monitor.
|
|
158
|
+
|
|
159
|
+
### 2. Collect Profiling Data
|
|
160
|
+
```bash
|
|
161
|
+
python scripts/collect_data.py --batch-sizes 1 4 16 32 64
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### 3. Train Prediction Models
|
|
165
|
+
```bash
|
|
166
|
+
# Train XGBoost / RF / Linear baseline models
|
|
167
|
+
python prediction_model.py
|
|
168
|
+
|
|
169
|
+
# Train GNN predictor
|
|
170
|
+
python train_gnn.py
|
|
171
|
+
|
|
172
|
+
# Train memory model
|
|
173
|
+
python train_memory_model.py
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### 4. Run Ablation Study
|
|
177
|
+
```bash
|
|
178
|
+
python scripts/ablation_study.py
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### 5. Predict via Python API
|
|
182
|
+
```python
|
|
183
|
+
from gpu_predictor import GPUPredictor
|
|
184
|
+
import torchvision.models as models
|
|
185
|
+
|
|
186
|
+
predictor = GPUPredictor()
|
|
187
|
+
model = models.resnet50(pretrained=False)
|
|
188
|
+
result = predictor.predict_for_custom_model(model, batch_size=16)
|
|
189
|
+
print(result)
|
|
190
|
+
# {'execution_time_ms': 12.4, 'memory_mb': 1820, 'confidence_lower': 11.1, ...}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Data Pipeline
|
|
196
|
+
|
|
197
|
+
```
|
|
198
|
+
collect_data.py
|
|
199
|
+
โโโถ data/raw/*.csv (GPU profiling measurements)
|
|
200
|
+
โ
|
|
201
|
+
โผ
|
|
202
|
+
feature_extractor.py
|
|
203
|
+
โโโถ data/processed/*.csv (static model features)
|
|
204
|
+
โ
|
|
205
|
+
โผ
|
|
206
|
+
enhance_dataset.py
|
|
207
|
+
โโโถ data/enriched/*.csv (augmented, training-ready)
|
|
208
|
+
โ
|
|
209
|
+
โผ
|
|
210
|
+
prediction_model.py / train_gnn.py
|
|
211
|
+
โโโถ models/ (trained predictors)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Model Performance
|
|
217
|
+
|
|
218
|
+
Results on held-out test set (20% split):
|
|
219
|
+
|
|
220
|
+
| Model | Exec Time MAPE | Memory MAPE | Notes |
|
|
221
|
+
|---|---|---|---|
|
|
222
|
+
| XGBoost (tuned) | ~8% | ~6% | Best overall |
|
|
223
|
+
| Random Forest | ~11% | ~9% | Robust baseline |
|
|
224
|
+
| GNN Predictor | ~10% | ~8% | Best on unseen architectures |
|
|
225
|
+
| Linear Regression | ~22% | ~19% | Baseline |
|
|
226
|
+
|
|
227
|
+
*(Full ablation study results: `results/ablation_study_table.csv`)*
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Dashboard
|
|
232
|
+
|
|
233
|
+
The Streamlit dashboard (`dashboard.py`) provides:
|
|
234
|
+
|
|
235
|
+
| Tab | Description |
|
|
236
|
+
|---|---|
|
|
237
|
+
| ๐ฏ Prediction | Predict execution time & memory for standard or custom models |
|
|
238
|
+
| โก Batch Optimizer | Find optimal batch size within a memory budget |
|
|
239
|
+
| ๐ Model Comparison | Compare predictions across multiple architectures |
|
|
240
|
+
| ๐ Performance Monitor | Live GPU utilization and prediction history |
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Paper Reproducibility
|
|
245
|
+
|
|
246
|
+
To reproduce all paper figures and tables:
|
|
247
|
+
```bash
|
|
248
|
+
python scripts/generate_paper_figures.py
|
|
249
|
+
python scripts/generate_paper_tables.py
|
|
250
|
+
python scripts/ablation_study.py
|
|
251
|
+
```
|
|
252
|
+
Outputs saved to `results/figures/`.
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## License
|
|
257
|
+
|
|
258
|
+
MIT License โ see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Blink โ GPU Performance Predictor
|
|
3
|
+
==================================
|
|
4
|
+
Predict GPU execution time and memory usage for PyTorch models
|
|
5
|
+
*before* running them on GPU hardware.
|
|
6
|
+
|
|
7
|
+
Quick start
|
|
8
|
+
-----------
|
|
9
|
+
>>> from blink import BlinkPredictor
|
|
10
|
+
>>> predictor = BlinkPredictor()
|
|
11
|
+
>>> result = predictor.predict("resnet18", batch_size=32)
|
|
12
|
+
>>> print(f"Exec time: {result['exec_time_ms']:.1f} ms")
|
|
13
|
+
>>> print(f"Memory : {result['memory_mb']:.1f} MB")
|
|
14
|
+
|
|
15
|
+
Or with your own model:
|
|
16
|
+
>>> import torch.nn as nn
|
|
17
|
+
>>> model = nn.Sequential(nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 10))
|
|
18
|
+
>>> result = BlinkPredictor().predict(model, batch_size=64)
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from blink._predictor import BlinkPredictor
|
|
23
|
+
from blink._analyzer import BlinkAnalyzer
|
|
24
|
+
from blink._version import __version__
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"BlinkPredictor",
|
|
28
|
+
"BlinkAnalyzer",
|
|
29
|
+
"__version__",
|
|
30
|
+
]
|