mlwheels 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlwheels-0.1.0/PKG-INFO +84 -0
- mlwheels-0.1.0/README.md +63 -0
- mlwheels-0.1.0/mlwheels/__init__.py +7 -0
- mlwheels-0.1.0/mlwheels/cli.py +81 -0
- mlwheels-0.1.0/mlwheels/detector.py +207 -0
- mlwheels-0.1.0/mlwheels.egg-info/PKG-INFO +84 -0
- mlwheels-0.1.0/mlwheels.egg-info/SOURCES.txt +10 -0
- mlwheels-0.1.0/mlwheels.egg-info/dependency_links.txt +1 -0
- mlwheels-0.1.0/mlwheels.egg-info/entry_points.txt +2 -0
- mlwheels-0.1.0/mlwheels.egg-info/top_level.txt +1 -0
- mlwheels-0.1.0/pyproject.toml +34 -0
- mlwheels-0.1.0/setup.cfg +4 -0
mlwheels-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlwheels
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Auto-detect and install pre-built wheels for Flash Attention & vLLM
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://rs545837.github.io/Flash-Attn-wheels/
|
|
7
|
+
Project-URL: Repository, https://github.com/rs545837/Flash-Attn-wheels
|
|
8
|
+
Keywords: flash-attention,vllm,cuda,pytorch,wheels
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Pre-Built Wheels
|
|
23
|
+
|
|
24
|
+
Pre-built wheels for Flash Attention & vLLM. Skip the compilation.
|
|
25
|
+
|
|
26
|
+
## Quick Install
|
|
27
|
+
|
|
28
|
+
Auto-detect your environment and install the right wheel:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install mlwheels
|
|
32
|
+
|
|
33
|
+
# Install Flash Attention
|
|
34
|
+
mlwheels flash-attn
|
|
35
|
+
|
|
36
|
+
# Install vLLM
|
|
37
|
+
mlwheels vllm
|
|
38
|
+
|
|
39
|
+
# Just detect environment (no install)
|
|
40
|
+
mlwheels --detect
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- Search and filter by CUDA, Python, PyTorch, and Platform
|
|
46
|
+
- One-click copy for `pip` and `uv` install commands
|
|
47
|
+
- Direct download links
|
|
48
|
+
- Flash Attention 2 & 3 support
|
|
49
|
+
- vLLM wheels for multiple CUDA versions
|
|
50
|
+
|
|
51
|
+
## Supported Configurations
|
|
52
|
+
|
|
53
|
+
### Flash Attention 2
|
|
54
|
+
- CUDA: 11.8, 12.1, 12.2, 12.3, 12.4, 12.6
|
|
55
|
+
- PyTorch: 2.0 - 2.10
|
|
56
|
+
- Python: 3.8 - 3.12
|
|
57
|
+
- Platforms: Linux x86_64, Linux ARM64, Windows
|
|
58
|
+
|
|
59
|
+
### Flash Attention 3
|
|
60
|
+
- CUDA: 12.6, 12.8, 12.9, 13.0
|
|
61
|
+
- PyTorch: 2.8 - 2.10
|
|
62
|
+
- Python: 3.10 - 3.12
|
|
63
|
+
- Platforms: Linux x86_64, Linux ARM64, Windows
|
|
64
|
+
|
|
65
|
+
### vLLM
|
|
66
|
+
- CUDA: 11.8, 12.1, 12.4, 12.6, 12.8, 12.9, 13.0, CPU
|
|
67
|
+
- Python: 3.8+
|
|
68
|
+
- Platforms: Linux x86_64, Linux ARM64
|
|
69
|
+
|
|
70
|
+
## Sources
|
|
71
|
+
|
|
72
|
+
**Flash Attention**
|
|
73
|
+
- [flashattn.dev](https://flashattn.dev/)
|
|
74
|
+
- [Flash Attention 3 Wheels](https://windreamer.github.io/flash-attention3-wheels/)
|
|
75
|
+
- [mjun0812/flash-attention-prebuild-wheels](https://github.com/mjun0812/flash-attention-prebuild-wheels)
|
|
76
|
+
- [Dao-AILab/flash-attention](https://github.com/Dao-AILab/flash-attention)
|
|
77
|
+
|
|
78
|
+
**vLLM**
|
|
79
|
+
- [vLLM GitHub Releases](https://github.com/vllm-project/vllm/releases)
|
|
80
|
+
- [vLLM Documentation](https://docs.vllm.ai/)
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
MIT
|
mlwheels-0.1.0/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Pre-Built Wheels
|
|
2
|
+
|
|
3
|
+
Pre-built wheels for Flash Attention & vLLM. Skip the compilation.
|
|
4
|
+
|
|
5
|
+
## Quick Install
|
|
6
|
+
|
|
7
|
+
Auto-detect your environment and install the right wheel:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install mlwheels
|
|
11
|
+
|
|
12
|
+
# Install Flash Attention
|
|
13
|
+
mlwheels flash-attn
|
|
14
|
+
|
|
15
|
+
# Install vLLM
|
|
16
|
+
mlwheels vllm
|
|
17
|
+
|
|
18
|
+
# Just detect environment (no install)
|
|
19
|
+
mlwheels --detect
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Features
|
|
23
|
+
|
|
24
|
+
- Search and filter by CUDA, Python, PyTorch, and Platform
|
|
25
|
+
- One-click copy for `pip` and `uv` install commands
|
|
26
|
+
- Direct download links
|
|
27
|
+
- Flash Attention 2 & 3 support
|
|
28
|
+
- vLLM wheels for multiple CUDA versions
|
|
29
|
+
|
|
30
|
+
## Supported Configurations
|
|
31
|
+
|
|
32
|
+
### Flash Attention 2
|
|
33
|
+
- CUDA: 11.8, 12.1, 12.2, 12.3, 12.4, 12.6
|
|
34
|
+
- PyTorch: 2.0 - 2.10
|
|
35
|
+
- Python: 3.8 - 3.12
|
|
36
|
+
- Platforms: Linux x86_64, Linux ARM64, Windows
|
|
37
|
+
|
|
38
|
+
### Flash Attention 3
|
|
39
|
+
- CUDA: 12.6, 12.8, 12.9, 13.0
|
|
40
|
+
- PyTorch: 2.8 - 2.10
|
|
41
|
+
- Python: 3.10 - 3.12
|
|
42
|
+
- Platforms: Linux x86_64, Linux ARM64, Windows
|
|
43
|
+
|
|
44
|
+
### vLLM
|
|
45
|
+
- CUDA: 11.8, 12.1, 12.4, 12.6, 12.8, 12.9, 13.0, CPU
|
|
46
|
+
- Python: 3.8+
|
|
47
|
+
- Platforms: Linux x86_64, Linux ARM64
|
|
48
|
+
|
|
49
|
+
## Sources
|
|
50
|
+
|
|
51
|
+
**Flash Attention**
|
|
52
|
+
- [flashattn.dev](https://flashattn.dev/)
|
|
53
|
+
- [Flash Attention 3 Wheels](https://windreamer.github.io/flash-attention3-wheels/)
|
|
54
|
+
- [mjun0812/flash-attention-prebuild-wheels](https://github.com/mjun0812/flash-attention-prebuild-wheels)
|
|
55
|
+
- [Dao-AILab/flash-attention](https://github.com/Dao-AILab/flash-attention)
|
|
56
|
+
|
|
57
|
+
**vLLM**
|
|
58
|
+
- [vLLM GitHub Releases](https://github.com/vllm-project/vllm/releases)
|
|
59
|
+
- [vLLM Documentation](https://docs.vllm.ai/)
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""CLI for mlwheels."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from .detector import detect_environment, get_wheel_url, install_wheel, get_platform
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
parser = argparse.ArgumentParser(
|
|
10
|
+
description="Auto-detect and install pre-built wheels for Flash Attention & vLLM"
|
|
11
|
+
)
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
"library",
|
|
14
|
+
nargs="?",
|
|
15
|
+
choices=["flash-attn", "vllm"],
|
|
16
|
+
help="Library to install (flash-attn or vllm)"
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--detect", "-d",
|
|
20
|
+
action="store_true",
|
|
21
|
+
help="Only detect environment, don't install"
|
|
22
|
+
)
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"--dry-run", "-n",
|
|
25
|
+
action="store_true",
|
|
26
|
+
help="Show what would be installed without installing"
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--url", "-u",
|
|
30
|
+
action="store_true",
|
|
31
|
+
help="Only print the wheel URL"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
args = parser.parse_args()
|
|
35
|
+
|
|
36
|
+
env = detect_environment()
|
|
37
|
+
platform = get_platform()
|
|
38
|
+
|
|
39
|
+
if args.detect or not args.library:
|
|
40
|
+
print("Detected environment:")
|
|
41
|
+
print(f" Python: {env['python']}")
|
|
42
|
+
print(f" PyTorch: {env['torch'] or 'not installed'}")
|
|
43
|
+
print(f" CUDA: {env['cuda'] or 'not detected'}")
|
|
44
|
+
print(f" Platform: {platform or 'unknown'}")
|
|
45
|
+
|
|
46
|
+
if not args.library:
|
|
47
|
+
print("\nRecommended wheels:")
|
|
48
|
+
for lib in ["flash-attn", "vllm"]:
|
|
49
|
+
wheel = get_wheel_url(lib, env)
|
|
50
|
+
if wheel:
|
|
51
|
+
print(f"\n {lib} {wheel['version']}:")
|
|
52
|
+
print(f" pip install {wheel['url']}")
|
|
53
|
+
else:
|
|
54
|
+
print(f"\n {lib}: no matching wheel found")
|
|
55
|
+
|
|
56
|
+
print("\nTo install, run:")
|
|
57
|
+
print(" mlwheels flash-attn")
|
|
58
|
+
print(" mlwheels vllm")
|
|
59
|
+
return 0
|
|
60
|
+
|
|
61
|
+
if args.library:
|
|
62
|
+
wheel = get_wheel_url(args.library, env)
|
|
63
|
+
|
|
64
|
+
if args.url:
|
|
65
|
+
if wheel:
|
|
66
|
+
print(wheel["url"])
|
|
67
|
+
return 0
|
|
68
|
+
else:
|
|
69
|
+
print(f"No matching wheel found", file=sys.stderr)
|
|
70
|
+
return 1
|
|
71
|
+
|
|
72
|
+
if args.dry_run:
|
|
73
|
+
return 0 if install_wheel(args.library, dry_run=True) else 1
|
|
74
|
+
|
|
75
|
+
return 0 if install_wheel(args.library) else 1
|
|
76
|
+
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
sys.exit(main())
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Environment detection and wheel matching."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_python_version():
|
|
9
|
+
"""Get Python version as string (e.g., '3.10')."""
|
|
10
|
+
return f"{sys.version_info.major}.{sys.version_info.minor}"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_torch_version():
|
|
14
|
+
"""Get PyTorch version if installed."""
|
|
15
|
+
try:
|
|
16
|
+
import torch
|
|
17
|
+
# Extract major.minor (e.g., "2.5" from "2.5.1+cu124")
|
|
18
|
+
version = torch.__version__
|
|
19
|
+
match = re.match(r"(\d+\.\d+)", version)
|
|
20
|
+
return match.group(1) if match else None
|
|
21
|
+
except ImportError:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_cuda_version():
|
|
26
|
+
"""Get CUDA version from PyTorch or nvidia-smi."""
|
|
27
|
+
# Try PyTorch first
|
|
28
|
+
try:
|
|
29
|
+
import torch
|
|
30
|
+
if torch.cuda.is_available():
|
|
31
|
+
# Get CUDA version from PyTorch build
|
|
32
|
+
cuda_version = torch.version.cuda
|
|
33
|
+
if cuda_version:
|
|
34
|
+
# Extract major.minor (e.g., "12.4" from "12.4")
|
|
35
|
+
match = re.match(r"(\d+\.\d+)", cuda_version)
|
|
36
|
+
return match.group(1) if match else None
|
|
37
|
+
except ImportError:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
# Fallback to nvidia-smi
|
|
41
|
+
try:
|
|
42
|
+
result = subprocess.run(
|
|
43
|
+
["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"],
|
|
44
|
+
capture_output=True, text=True, timeout=5
|
|
45
|
+
)
|
|
46
|
+
if result.returncode == 0:
|
|
47
|
+
# nvidia-smi doesn't directly give CUDA version, try nvcc
|
|
48
|
+
nvcc_result = subprocess.run(
|
|
49
|
+
["nvcc", "--version"],
|
|
50
|
+
capture_output=True, text=True, timeout=5
|
|
51
|
+
)
|
|
52
|
+
if nvcc_result.returncode == 0:
|
|
53
|
+
match = re.search(r"release (\d+\.\d+)", nvcc_result.stdout)
|
|
54
|
+
if match:
|
|
55
|
+
return match.group(1)
|
|
56
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def detect_environment():
|
|
63
|
+
"""Detect current environment."""
|
|
64
|
+
return {
|
|
65
|
+
"python": get_python_version(),
|
|
66
|
+
"torch": get_torch_version(),
|
|
67
|
+
"cuda": get_cuda_version(),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_platform():
|
|
72
|
+
"""Get platform identifier."""
|
|
73
|
+
import platform
|
|
74
|
+
system = platform.system().lower()
|
|
75
|
+
machine = platform.machine().lower()
|
|
76
|
+
|
|
77
|
+
if system == "linux":
|
|
78
|
+
if machine in ("x86_64", "amd64"):
|
|
79
|
+
return "linux_x86_64"
|
|
80
|
+
elif machine in ("aarch64", "arm64"):
|
|
81
|
+
return "linux_aarch64"
|
|
82
|
+
elif system == "windows":
|
|
83
|
+
return "win_amd64"
|
|
84
|
+
elif system == "darwin":
|
|
85
|
+
return "macos"
|
|
86
|
+
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Base URLs for wheels
|
|
91
|
+
WHEEL_SOURCES = {
|
|
92
|
+
"flash-attn": {
|
|
93
|
+
"base": "https://github.com/Dao-AILab/flash-attention/releases/download",
|
|
94
|
+
"alt": "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download",
|
|
95
|
+
},
|
|
96
|
+
"vllm": {
|
|
97
|
+
"base": "https://github.com/vllm-project/vllm/releases/download",
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Known wheel configurations (subset - full list at flashattn.dev)
|
|
102
|
+
FLASH_ATTN_WHEELS = {
|
|
103
|
+
# Format: (cuda, torch, python, platform): version
|
|
104
|
+
("12.4", "2.5", "3.10", "linux_x86_64"): "2.7.4",
|
|
105
|
+
("12.4", "2.5", "3.11", "linux_x86_64"): "2.7.4",
|
|
106
|
+
("12.4", "2.5", "3.12", "linux_x86_64"): "2.7.4",
|
|
107
|
+
("12.6", "2.6", "3.10", "linux_x86_64"): "2.7.4",
|
|
108
|
+
("12.6", "2.6", "3.11", "linux_x86_64"): "2.7.4",
|
|
109
|
+
("12.6", "2.6", "3.12", "linux_x86_64"): "2.7.4",
|
|
110
|
+
("12.1", "2.4", "3.10", "linux_x86_64"): "2.6.3",
|
|
111
|
+
("12.1", "2.4", "3.11", "linux_x86_64"): "2.6.3",
|
|
112
|
+
("11.8", "2.3", "3.10", "linux_x86_64"): "2.6.3",
|
|
113
|
+
("11.8", "2.3", "3.11", "linux_x86_64"): "2.6.3",
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
VLLM_WHEELS = {
|
|
117
|
+
# Format: (cuda, python, platform): (version, url)
|
|
118
|
+
("12.6", "3.10", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0-cp38-abi3-manylinux1_x86_64.whl"),
|
|
119
|
+
("12.6", "3.11", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0-cp38-abi3-manylinux1_x86_64.whl"),
|
|
120
|
+
("12.6", "3.12", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0-cp38-abi3-manylinux1_x86_64.whl"),
|
|
121
|
+
("12.4", "3.10", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0+cu124-cp38-abi3-manylinux1_x86_64.whl"),
|
|
122
|
+
("12.4", "3.11", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0+cu124-cp38-abi3-manylinux1_x86_64.whl"),
|
|
123
|
+
("12.1", "3.10", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0+cu121-cp38-abi3-manylinux1_x86_64.whl"),
|
|
124
|
+
("12.1", "3.11", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0+cu121-cp38-abi3-manylinux1_x86_64.whl"),
|
|
125
|
+
("11.8", "3.10", "linux_x86_64"): ("0.15.0", "https://github.com/vllm-project/vllm/releases/download/v0.15.0/vllm-0.15.0+cu118-cp38-abi3-manylinux1_x86_64.whl"),
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def find_closest_cuda(target_cuda, available_versions):
|
|
130
|
+
"""Find the closest CUDA version that's <= target."""
|
|
131
|
+
if not target_cuda:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
target = float(target_cuda)
|
|
135
|
+
available = sorted([float(v) for v in available_versions], reverse=True)
|
|
136
|
+
|
|
137
|
+
for v in available:
|
|
138
|
+
if v <= target:
|
|
139
|
+
return str(v) if v == int(v) else f"{v:.1f}"
|
|
140
|
+
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def get_wheel_url(library="flash-attn", env=None):
|
|
145
|
+
"""Get the best matching wheel URL for the current environment."""
|
|
146
|
+
if env is None:
|
|
147
|
+
env = detect_environment()
|
|
148
|
+
|
|
149
|
+
platform = get_platform()
|
|
150
|
+
python = env.get("python")
|
|
151
|
+
torch = env.get("torch")
|
|
152
|
+
cuda = env.get("cuda")
|
|
153
|
+
|
|
154
|
+
if library == "vllm":
|
|
155
|
+
# Find matching vLLM wheel
|
|
156
|
+
available_cuda = list(set(k[0] for k in VLLM_WHEELS.keys()))
|
|
157
|
+
matched_cuda = find_closest_cuda(cuda, available_cuda) or "12.6"
|
|
158
|
+
|
|
159
|
+
for py in [python, "3.11", "3.10"]:
|
|
160
|
+
key = (matched_cuda, py, platform)
|
|
161
|
+
if key in VLLM_WHEELS:
|
|
162
|
+
version, url = VLLM_WHEELS[key]
|
|
163
|
+
return {"url": url, "version": version, "cuda": matched_cuda, "python": py}
|
|
164
|
+
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
else: # flash-attn
|
|
168
|
+
available_cuda = list(set(k[0] for k in FLASH_ATTN_WHEELS.keys()))
|
|
169
|
+
matched_cuda = find_closest_cuda(cuda, available_cuda)
|
|
170
|
+
|
|
171
|
+
if not matched_cuda or not torch:
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
for py in [python, "3.11", "3.10"]:
|
|
175
|
+
key = (matched_cuda, torch, py, platform)
|
|
176
|
+
if key in FLASH_ATTN_WHEELS:
|
|
177
|
+
version = FLASH_ATTN_WHEELS[key]
|
|
178
|
+
# Construct URL
|
|
179
|
+
url = f"https://github.com/Dao-AILab/flash-attention/releases/download/v{version}/flash_attn-{version}+cu{matched_cuda.replace('.', '')}torch{torch}-cp{py.replace('.', '')}-cp{py.replace('.', '')}-{platform}.whl"
|
|
180
|
+
return {"url": url, "version": version, "cuda": matched_cuda, "torch": torch, "python": py}
|
|
181
|
+
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def install_wheel(library="flash-attn", dry_run=False):
|
|
186
|
+
"""Install the matching wheel."""
|
|
187
|
+
env = detect_environment()
|
|
188
|
+
wheel = get_wheel_url(library, env)
|
|
189
|
+
|
|
190
|
+
if not wheel:
|
|
191
|
+
print(f"No matching {library} wheel found for your environment:")
|
|
192
|
+
print(f" Python: {env.get('python')}")
|
|
193
|
+
print(f" PyTorch: {env.get('torch') or 'not installed'}")
|
|
194
|
+
print(f" CUDA: {env.get('cuda') or 'not detected'}")
|
|
195
|
+
print(f"\nVisit https://rs545837.github.io/Flash-Attn-wheels/ to find a compatible wheel.")
|
|
196
|
+
return False
|
|
197
|
+
|
|
198
|
+
cmd = f"pip install {wheel['url']}"
|
|
199
|
+
|
|
200
|
+
if dry_run:
|
|
201
|
+
print(f"Would install {library} {wheel['version']}:")
|
|
202
|
+
print(f" {cmd}")
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
print(f"Installing {library} {wheel['version']}...")
|
|
206
|
+
result = subprocess.run(cmd.split(), capture_output=False)
|
|
207
|
+
return result.returncode == 0
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlwheels
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Auto-detect and install pre-built wheels for Flash Attention & vLLM
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://rs545837.github.io/Flash-Attn-wheels/
|
|
7
|
+
Project-URL: Repository, https://github.com/rs545837/Flash-Attn-wheels
|
|
8
|
+
Keywords: flash-attention,vllm,cuda,pytorch,wheels
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Pre-Built Wheels
|
|
23
|
+
|
|
24
|
+
Pre-built wheels for Flash Attention & vLLM. Skip the compilation.
|
|
25
|
+
|
|
26
|
+
## Quick Install
|
|
27
|
+
|
|
28
|
+
Auto-detect your environment and install the right wheel:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install mlwheels
|
|
32
|
+
|
|
33
|
+
# Install Flash Attention
|
|
34
|
+
mlwheels flash-attn
|
|
35
|
+
|
|
36
|
+
# Install vLLM
|
|
37
|
+
mlwheels vllm
|
|
38
|
+
|
|
39
|
+
# Just detect environment (no install)
|
|
40
|
+
mlwheels --detect
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- Search and filter by CUDA, Python, PyTorch, and Platform
|
|
46
|
+
- One-click copy for `pip` and `uv` install commands
|
|
47
|
+
- Direct download links
|
|
48
|
+
- Flash Attention 2 & 3 support
|
|
49
|
+
- vLLM wheels for multiple CUDA versions
|
|
50
|
+
|
|
51
|
+
## Supported Configurations
|
|
52
|
+
|
|
53
|
+
### Flash Attention 2
|
|
54
|
+
- CUDA: 11.8, 12.1, 12.2, 12.3, 12.4, 12.6
|
|
55
|
+
- PyTorch: 2.0 - 2.10
|
|
56
|
+
- Python: 3.8 - 3.12
|
|
57
|
+
- Platforms: Linux x86_64, Linux ARM64, Windows
|
|
58
|
+
|
|
59
|
+
### Flash Attention 3
|
|
60
|
+
- CUDA: 12.6, 12.8, 12.9, 13.0
|
|
61
|
+
- PyTorch: 2.8 - 2.10
|
|
62
|
+
- Python: 3.10 - 3.12
|
|
63
|
+
- Platforms: Linux x86_64, Linux ARM64, Windows
|
|
64
|
+
|
|
65
|
+
### vLLM
|
|
66
|
+
- CUDA: 11.8, 12.1, 12.4, 12.6, 12.8, 12.9, 13.0, CPU
|
|
67
|
+
- Python: 3.8+
|
|
68
|
+
- Platforms: Linux x86_64, Linux ARM64
|
|
69
|
+
|
|
70
|
+
## Sources
|
|
71
|
+
|
|
72
|
+
**Flash Attention**
|
|
73
|
+
- [flashattn.dev](https://flashattn.dev/)
|
|
74
|
+
- [Flash Attention 3 Wheels](https://windreamer.github.io/flash-attention3-wheels/)
|
|
75
|
+
- [mjun0812/flash-attention-prebuild-wheels](https://github.com/mjun0812/flash-attention-prebuild-wheels)
|
|
76
|
+
- [Dao-AILab/flash-attention](https://github.com/Dao-AILab/flash-attention)
|
|
77
|
+
|
|
78
|
+
**vLLM**
|
|
79
|
+
- [vLLM GitHub Releases](https://github.com/vllm-project/vllm/releases)
|
|
80
|
+
- [vLLM Documentation](https://docs.vllm.ai/)
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
MIT
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
mlwheels/__init__.py
|
|
4
|
+
mlwheels/cli.py
|
|
5
|
+
mlwheels/detector.py
|
|
6
|
+
mlwheels.egg-info/PKG-INFO
|
|
7
|
+
mlwheels.egg-info/SOURCES.txt
|
|
8
|
+
mlwheels.egg-info/dependency_links.txt
|
|
9
|
+
mlwheels.egg-info/entry_points.txt
|
|
10
|
+
mlwheels.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mlwheels
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mlwheels"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Auto-detect and install pre-built wheels for Flash Attention & vLLM"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.8",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
]
|
|
24
|
+
keywords = ["flash-attention", "vllm", "cuda", "pytorch", "wheels"]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
mlwheels = "mlwheels.cli:main"
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://rs545837.github.io/Flash-Attn-wheels/"
|
|
31
|
+
Repository = "https://github.com/rs545837/Flash-Attn-wheels"
|
|
32
|
+
|
|
33
|
+
[tool.setuptools.packages.find]
|
|
34
|
+
include = ["mlwheels*"]
|
mlwheels-0.1.0/setup.cfg
ADDED