rocmate 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rocmate-0.1.0/.claude/settings.local.json +21 -0
- rocmate-0.1.0/.github/workflows/ci.yml +29 -0
- rocmate-0.1.0/.gitignore +17 -0
- rocmate-0.1.0/CONTRIBUTING.md +55 -0
- rocmate-0.1.0/LICENSE +21 -0
- rocmate-0.1.0/PKG-INFO +99 -0
- rocmate-0.1.0/PLAN.md +252 -0
- rocmate-0.1.0/README.md +69 -0
- rocmate-0.1.0/configs/tools/axolotl.yaml +55 -0
- rocmate-0.1.0/configs/tools/comfyui.yaml +50 -0
- rocmate-0.1.0/configs/tools/exllamav2.yaml +52 -0
- rocmate-0.1.0/configs/tools/faster-whisper.yaml +35 -0
- rocmate-0.1.0/configs/tools/llama-cpp.yaml +50 -0
- rocmate-0.1.0/configs/tools/ollama.yaml +66 -0
- rocmate-0.1.0/configs/tools/stable-diffusion-webui.yaml +49 -0
- rocmate-0.1.0/configs/tools/vllm.yaml +60 -0
- rocmate-0.1.0/pyproject.toml +61 -0
- rocmate-0.1.0/src/rocmate/__init__.py +2 -0
- rocmate-0.1.0/src/rocmate/cli.py +142 -0
- rocmate-0.1.0/src/rocmate/configs.py +98 -0
- rocmate-0.1.0/src/rocmate/doctor.py +221 -0
- rocmate-0.1.0/src/rocmate/fixer.py +85 -0
- rocmate-0.1.0/src/rocmate/gpu.py +172 -0
- rocmate-0.1.0/src/rocmate/install.py +147 -0
- rocmate-0.1.0/tests/test_basic.py +34 -0
- rocmate-0.1.0/tests/test_cli.py +233 -0
- rocmate-0.1.0/tests/test_configs.py +222 -0
- rocmate-0.1.0/tests/test_doctor.py +256 -0
- rocmate-0.1.0/tests/test_fixer.py +154 -0
- rocmate-0.1.0/tests/test_gpu.py +307 -0
- rocmate-0.1.0/tests/test_install.py +180 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"PowerShell(Get-ChildItem -Path \"C:\\\\Users\\\\tonde\\\\PycharmProjects\\\\RocMate\" -Recurse -Force | Select-Object -Property FullName, PSIsContainer | Sort-Object FullName | ForEach-Object { if \\($_.PSIsContainer\\) { Write-Host \"[DIR] $\\($_.FullName\\)\" } else { Write-Host \"[FILE] $\\($_.FullName\\)\" } })",
|
|
5
|
+
"Bash(python -m pytest tests/ -v)",
|
|
6
|
+
"Bash(python -m pytest tests/ -v -p no:cacheprovider)",
|
|
7
|
+
"Bash(pip install *)",
|
|
8
|
+
"Bash(python -m build)",
|
|
9
|
+
"Bash(curl -s -o /dev/null -w \"%{http_code}\" https://pypi.org/pypi/rocmate/json)",
|
|
10
|
+
"Bash(python -m pytest tests/test_gpu.py -v)",
|
|
11
|
+
"Bash(python -m pytest tests/ -v --tb=no -q)",
|
|
12
|
+
"Bash(python -m pytest tests/ -q --tb=no)",
|
|
13
|
+
"Bash(python -m pytest tests/test_cli.py::TestDoctorToolFlag -v --tb=short)",
|
|
14
|
+
"Bash(python -m pytest tests/ -q --tb=short)",
|
|
15
|
+
"Bash(python -m pytest tests/test_configs.py -q --tb=no)",
|
|
16
|
+
"Bash(python -m pytest tests/test_doctor.py -q --tb=no)",
|
|
17
|
+
"Bash(python -m pytest tests/test_install.py -q --tb=short)",
|
|
18
|
+
"Bash(python -m pytest tests/test_cli.py::TestInstallCommand -q --tb=line)"
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: ${{ matrix.python-version }}
|
|
20
|
+
- name: Install
|
|
21
|
+
run: |
|
|
22
|
+
python -m pip install --upgrade pip
|
|
23
|
+
pip install -e ".[dev]"
|
|
24
|
+
- name: Lint
|
|
25
|
+
run: |
|
|
26
|
+
ruff check src tests
|
|
27
|
+
- name: Test
|
|
28
|
+
run: |
|
|
29
|
+
pytest
|
rocmate-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Contributing to rocmate
|
|
2
|
+
|
|
3
|
+
Thanks for your interest! `rocmate` lives or dies by community-contributed configs — every working setup on a new AMD chip helps someone else avoid a rabbit hole.
|
|
4
|
+
|
|
5
|
+
## How to contribute
|
|
6
|
+
|
|
7
|
+
### Adding a tool config (most valuable contribution)
|
|
8
|
+
|
|
9
|
+
1. Copy `configs/tools/ollama.yaml` as a template.
|
|
10
|
+
2. Fill in the tool name, description, and homepage.
|
|
11
|
+
3. For each AMD chip you've personally tested, add an entry under `chips:` with:
|
|
12
|
+
- `status`: `tested`, `partial`, or `broken`
|
|
13
|
+
- `notes`: what you observed (ROCm version, model sizes, quirks)
|
|
14
|
+
- `env_vars`: any ENV vars needed
|
|
15
|
+
- `install_hints`: a short, copy-pasteable install sequence
|
|
16
|
+
4. Add yourself to the contributors list in the README.
|
|
17
|
+
5. Open a PR with `[config]` in the title.
|
|
18
|
+
|
|
19
|
+
### Improving an existing config
|
|
20
|
+
|
|
21
|
+
If you tested a chip that's marked `partial` and it actually works fine, please update the status with notes on what changed (ROCm version, kernel, driver).
|
|
22
|
+
|
|
23
|
+
### Reporting a broken setup
|
|
24
|
+
|
|
25
|
+
Open an issue with:
|
|
26
|
+
- Your chip (`rocminfo | grep Name`)
|
|
27
|
+
- ROCm version
|
|
28
|
+
- The tool and its version
|
|
29
|
+
- The exact error
|
|
30
|
+
- What you tried
|
|
31
|
+
|
|
32
|
+
### Code contributions
|
|
33
|
+
|
|
34
|
+
For CLI or library changes, please open an issue first to discuss the approach. Tests required for new functionality.
|
|
35
|
+
|
|
36
|
+
## Development setup
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
git clone https://github.com/yourhandle/rocmate
|
|
40
|
+
cd rocmate
|
|
41
|
+
python -m venv venv && source venv/bin/activate
|
|
42
|
+
pip install -e ".[dev]"
|
|
43
|
+
pytest
|
|
44
|
+
ruff check src tests
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Code style
|
|
48
|
+
|
|
49
|
+
- Python 3.11+, type hints everywhere
|
|
50
|
+
- `ruff` for linting
|
|
51
|
+
- Keep CLI output friendly and copy-pasteable
|
|
52
|
+
|
|
53
|
+
## Code of Conduct
|
|
54
|
+
|
|
55
|
+
Be kind. Be specific. Don't gatekeep AMD hardware.
|
rocmate-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ben
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
rocmate-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rocmate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Curated AMD GPU compatibility index and CLI for AI workloads
|
|
5
|
+
Project-URL: Homepage, https://github.com/tonde/rocmate
|
|
6
|
+
Project-URL: Issues, https://github.com/tonde/rocmate/issues
|
|
7
|
+
Author-email: Benjamin Faeuster <benjamin.faeuster@web.de>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ai,amd,gpu,llm,ollama,rocm
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: GPU
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: System :: Hardware
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Requires-Dist: pydantic>=2.6
|
|
21
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
|
+
Requires-Dist: rich>=13.7
|
|
23
|
+
Requires-Dist: typer>=0.12
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# rocmate
|
|
32
|
+
|
|
33
|
+
> Get AMD GPUs running with AI tools — without the rabbit hole.
|
|
34
|
+
|
|
35
|
+
`rocmate` is a curated compatibility index and CLI for running modern AI workloads on AMD GPUs via ROCm. It tells you what works on your specific card, what to set, and what to avoid — based on configurations that real users have tested.
|
|
36
|
+
|
|
37
|
+
## Why?
|
|
38
|
+
|
|
39
|
+
AMD GPUs offer great VRAM-per-dollar (especially the RX 7900 XTX with 24 GB), but getting Ollama, ComfyUI, faster-whisper, or axolotl to actually use the GPU still involves hunting through blog posts, GitHub issues, and Discord threads. Information is scattered, often outdated, and rarely specific to your chip generation (gfx1030 vs gfx1100 vs gfx1201).
|
|
40
|
+
|
|
41
|
+
`rocmate` consolidates this knowledge into one place — version-controlled, testable, community-maintained.
|
|
42
|
+
|
|
43
|
+
## Quickstart
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Install (Python 3.11+)
|
|
47
|
+
pipx install rocmate
|
|
48
|
+
|
|
49
|
+
# Check your system
|
|
50
|
+
rocmate doctor
|
|
51
|
+
|
|
52
|
+
# Show the tested config for a tool
|
|
53
|
+
rocmate show ollama
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Example output:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
$ rocmate doctor
|
|
60
|
+
✓ GPU detected: AMD Radeon RX 7900 XTX (gfx1100)
|
|
61
|
+
✓ ROCm 6.3.1 installed
|
|
62
|
+
✗ User not in 'render' group
|
|
63
|
+
→ sudo usermod -aG render $USER && newgrp render
|
|
64
|
+
⚠ HSA_OVERRIDE_GFX_VERSION not set
|
|
65
|
+
→ export HSA_OVERRIDE_GFX_VERSION=11.0.0
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Supported tools (v0.1.0)
|
|
69
|
+
|
|
70
|
+
| Tool | gfx1100 (RX 7900 XT/XTX) | gfx1030 (RX 6800/6900) | gfx1201 (RX 9070) |
|
|
71
|
+
|------|:-:|:-:|:-:|
|
|
72
|
+
| Ollama | ✅ | ✅ | 🟡 |
|
|
73
|
+
| faster-whisper | ✅ | 🟡 | — |
|
|
74
|
+
| ComfyUI | ✅ | 🟡 | — |
|
|
75
|
+
|
|
76
|
+
✅ tested · 🟡 partial / workarounds needed · ❌ not working
|
|
77
|
+
|
|
78
|
+
## Status
|
|
79
|
+
|
|
80
|
+
Early-stage. Currently maintained by [@tonde](https://github.com/tonde) on an RX 7900 XTX. Contributions for other AMD chips are very welcome — see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
81
|
+
|
|
82
|
+
## Roadmap
|
|
83
|
+
|
|
84
|
+
- [x] `rocmate doctor` — system diagnostic
|
|
85
|
+
- [x] `rocmate show <tool>` — display tested config
|
|
86
|
+
- [ ] Windows / HIP SDK support (`doctor` + `show`)
|
|
87
|
+
- [ ] `rocmate install <tool>` — automated installer with correct ENV
|
|
88
|
+
- [ ] `rocmate doctor --fix` — auto-setup for ROCm/HIP, groups, and ENV vars
|
|
89
|
+
- [ ] Web-facing compatibility matrix
|
|
90
|
+
|
|
91
|
+
## Non-goals
|
|
92
|
+
|
|
93
|
+
- Not a replacement for ROCm, Ollama, or any inference engine
|
|
94
|
+
- Not a fork of upstream tools — only configs and glue
|
|
95
|
+
- Not a benchmarking tool
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
|
|
99
|
+
MIT — see [LICENSE](LICENSE).
|
rocmate-0.1.0/PLAN.md
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# rocmate — Projektplan
|
|
2
|
+
|
|
3
|
+
> Persönliches Planungsdokument. Nicht für die Öffentlichkeit gedacht.
|
|
4
|
+
> Das öffentliche README liegt in `README.md`.
|
|
5
|
+
|
|
6
|
+
## TL;DR
|
|
7
|
+
|
|
8
|
+
`rocmate` ist ein **kuratierter Kompatibilitäts-Index + CLI-Tool** für AMD-GPUs und AI-Workloads. Es löst das Problem, dass Informationen darüber, was auf welcher AMD-Karte mit welcher ROCm-Version läuft, über hunderte Blogposts, GitHub-Issues und Discord-Threads verstreut sind.
|
|
9
|
+
|
|
10
|
+
**Zielnutzer:** Alle, die lokale AI auf AMD-Hardware laufen wollen — Indie-Devs, Homelabber, Datenschutz-bewusste Nutzer, Studenten, kleine Teams. Plattform: Linux primär, Windows ab v0.2.
|
|
11
|
+
|
|
12
|
+
**Mein Vorteil:** Ich bin selbst der Zielnutzer. Jede Friktion, die ich erlebe, ist ein Feature für andere.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Warum dieses Projekt?
|
|
17
|
+
|
|
18
|
+
### Das Problem
|
|
19
|
+
|
|
20
|
+
AMD-GPUs sind preislich attraktiv für AI:
|
|
21
|
+
- RX 7900 XTX: 24 GB VRAM für deutlich weniger als eine RTX 4090
|
|
22
|
+
- Wachsender Markt durch Ryzen AI APUs und neue RX 9000-Serie
|
|
23
|
+
- Aber: 90 % der Nutzer bleiben am Setup hängen
|
|
24
|
+
|
|
25
|
+
Bestehende Quellen:
|
|
26
|
+
- **AMDs offizieller AI-Bundle** (Januar 2026): nur für RX 7700+ und Ryzen AI 300/400, nur paar Tools
|
|
27
|
+
- **Ollama-Docs**: listen nur Ollama selbst, keine anderen Tools
|
|
28
|
+
- **Blogposts**: veralten in 3 Monaten, oft nicht chip-spezifisch
|
|
29
|
+
- **GitHub-Issues**: fragmentiert, schwer durchsuchbar
|
|
30
|
+
|
|
31
|
+
→ Keine **konsolidierte, getestete, versionierte** Quelle, die sagt: "Hier ist die Config, die auf gfx1100 mit ROCm 6.3 für ComfyUI funktioniert."
|
|
32
|
+
|
|
33
|
+
### Warum gerade jetzt
|
|
34
|
+
|
|
35
|
+
- ROCm 6.x ist endlich brauchbar gereift
|
|
36
|
+
- PyTorch ROCm-Wheels sind offiziell und stabil
|
|
37
|
+
- AMD pusht massiv (Ryzen AI Max, RX 9070)
|
|
38
|
+
- NVIDIA-Tax wird vielen zu viel
|
|
39
|
+
- "Run local AI" wird Mainstream (privacy, kein Subscription)
|
|
40
|
+
|
|
41
|
+
### Warum ich
|
|
42
|
+
|
|
43
|
+
- Hab selbst RX 7900 XTX, hab den Schmerz durch
|
|
44
|
+
- Indie-Dev-Mindset (von Kaleo-App bis PoE-Overlay): bau funktionierende Tools
|
|
45
|
+
- Python-Stack passt zu meiner Komfortzone
|
|
46
|
+
- Neuro-sama-Projekt zwingt mich sowieso, halb diese Configs zu kuratieren
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Vision
|
|
51
|
+
|
|
52
|
+
### Was es ist
|
|
53
|
+
|
|
54
|
+
- Versions-kontrollierte YAML-Configs pro Tool × pro Chip
|
|
55
|
+
- CLI für Diagnostik (`doctor`) und Lookup (`show`)
|
|
56
|
+
- Community-driven: jeder PR mit getesteter Config ist Gold
|
|
57
|
+
- Klare Status-Labels: ✅ tested · 🟡 partial · ❌ broken
|
|
58
|
+
|
|
59
|
+
### Was es **nicht** ist
|
|
60
|
+
|
|
61
|
+
- Kein Fork von ROCm
|
|
62
|
+
- Kein eigener Inferenz-Server
|
|
63
|
+
- Kein Ersatz für Ollama/ComfyUI/etc.
|
|
64
|
+
- Kein Windows-first Tool (Linux primär, Windows später)
|
|
65
|
+
- Kein Benchmarking-Service (in v0.1 — vielleicht später als Submodul)
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Roadmap
|
|
70
|
+
|
|
71
|
+
### v0.1.0 — MVP (Launch-fähig)
|
|
72
|
+
|
|
73
|
+
**Status: Code steht, Configs für 3 Tools auf 4 Chips drin.**
|
|
74
|
+
|
|
75
|
+
- [x] CLI mit `doctor`, `show`, `list`
|
|
76
|
+
- [x] GPU-Detection via `rocminfo`
|
|
77
|
+
- [x] Pydantic-Validierung der YAMLs
|
|
78
|
+
- [x] Configs für Ollama, faster-whisper, ComfyUI
|
|
79
|
+
- [x] Chip-Coverage: gfx1100, gfx1030, gfx1201, gfx1034
|
|
80
|
+
- [x] Tests + GitHub Actions CI
|
|
81
|
+
- [x] MIT License + Contributing Guide
|
|
82
|
+
- [ ] **Auf eigenem RX 7900 XTX laufen lassen, Bugs fixen**
|
|
83
|
+
- [ ] **Naming final entscheiden** (rocmate vs gfx-toolkit vs rocforge)
|
|
84
|
+
- [ ] **PyPI-Name claimen + publishen**
|
|
85
|
+
- [ ] **GitHub Repo public + README mit echtem Screenshot**
|
|
86
|
+
|
|
87
|
+
**Geschätzter Aufwand:** 1 Wochenende für Finalisierung.
|
|
88
|
+
|
|
89
|
+
### v0.2.0 — Breite Tool-Coverage + Windows CLI
|
|
90
|
+
|
|
91
|
+
- [ ] Configs hinzufügen: vLLM, axolotl, Piper TTS, Coqui TTS, Stable Diffusion WebUI, llama.cpp, exllama
|
|
92
|
+
- [ ] Konfigs für gfx1101, gfx1102 (RX 7700/7800) inkl. Windows-Install-Hints
|
|
93
|
+
- [ ] Bessere `doctor`-Checks: Docker-GPU-Passthrough, Vulkan-Verfügbarkeit
|
|
94
|
+
- [ ] `rocmate doctor --tool <name>` — tool-spezifische Checks
|
|
95
|
+
- [ ] **Windows-Support für `doctor` und `show`**: GPU-Detection via `hipinfo` (primär) + WMI-Name→GFX-Lookup-Tabelle (Fallback), plattformbedingte Checks statt Linux-Gruppen
|
|
96
|
+
|
|
97
|
+
**Geschätzter Aufwand:** 2-3 Wochenenden.
|
|
98
|
+
|
|
99
|
+
### v0.3.0 — Auto-Install
|
|
100
|
+
|
|
101
|
+
- [ ] `rocmate install <tool>` — installiert Tool mit korrekten ENV-Vars und Pip-Indexen
|
|
102
|
+
- [ ] Dry-run-Modus (`--dry-run`)
|
|
103
|
+
- [ ] Docker-Compose-Snippets als Output-Option
|
|
104
|
+
- [ ] Rollback-Funktion (Snapshot von ENV vor Install)
|
|
105
|
+
|
|
106
|
+
**Hier wird's heikel.** Auto-Installer machen viele Annahmen. Lieber lange `--dry-run` als Default, dann erst opt-in für echte Installation.
|
|
107
|
+
|
|
108
|
+
### v0.4.0 — doctor --fix (System-Setup)
|
|
109
|
+
|
|
110
|
+
Ziel: `rocmate doctor` diagnostiziert, `rocmate doctor --fix` repariert — plattformübergreifend, chip-aware.
|
|
111
|
+
|
|
112
|
+
- [ ] `rocmate doctor --fix` — opt-in Setup-Modus, fragt vor jeder Aktion nach Bestätigung
|
|
113
|
+
- [ ] Stufe 1 (sicher): ENV-Vars setzen, Shell-Profile patchen
|
|
114
|
+
- [ ] Stufe 2 (braucht sudo/admin): Linux-Gruppen (`render`, `video`), Windows-HIP-Pfade
|
|
115
|
+
- [ ] Stufe 3 (riskant, explizit opt-in): ROCm/HIP SDK installieren — nur wenn Datenbasis breit und korrekt genug ist
|
|
116
|
+
- [ ] Plattformunabhängig: Linux (apt/pacman/dnf-Detection) + Windows (winget/manuell)
|
|
117
|
+
- [ ] `--dry-run` als Default für Stufe 3, `--yes` zum Übersteuern
|
|
118
|
+
|
|
119
|
+
**Voraussetzung:** Datenbasis aus v0.2/v0.3 muss solide sein, bevor hier automatisch installiert wird.
|
|
120
|
+
|
|
121
|
+
### v0.5.0 — Web Matrix
|
|
122
|
+
|
|
123
|
+
- [ ] Statische Website (Hugo/Astro) generiert aus YAMLs
|
|
124
|
+
- [ ] Filterbare Tabelle: "Welcher Chip × welches Tool"
|
|
125
|
+
- [ ] Pro-Chip-Seiten mit allen unterstützten Tools
|
|
126
|
+
- [ ] Pro-Tool-Seiten mit allen Chips
|
|
127
|
+
- [ ] Domain registrieren (.dev?)
|
|
128
|
+
|
|
129
|
+
### v1.0.0 — Stable
|
|
130
|
+
|
|
131
|
+
- [ ] Stabilisierte API
|
|
132
|
+
- [ ] Migration-Guide für Config-Schema-Änderungen
|
|
133
|
+
- [ ] Maintenance-Mode-Docs (was tun bei neuer ROCm-Version)
|
|
134
|
+
|
|
135
|
+
### Möglicherweise nie / sehr später
|
|
136
|
+
|
|
137
|
+
- Benchmarking-Submodul (siehe `rocm-bench`-Idee — eigenes Projekt wäre besser)
|
|
138
|
+
- Ryzen AI / NPU-Unterstützung (XDNA-Tooling zu unreif)
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Scope-Disziplin
|
|
143
|
+
|
|
144
|
+
**Das größte Risiko ist Feature-Creep.** Konkrete Versuchungen, denen ich widerstehen muss:
|
|
145
|
+
|
|
146
|
+
| Verlockung | Warum es schlecht ist |
|
|
147
|
+
|---|---|
|
|
148
|
+
| Sofort `install`-Befehl bauen | Datenbasis muss zuerst breit/korrekt sein, sonst installiere ich Müll |
|
|
149
|
+
| Eigenes Benchmark-System dranschrauben | Sprengt Scope, verdient eigenes Projekt |
|
|
150
|
+
| Windows-Support in v0.1 anfassen | Datenbasis und Linux-Pfad müssen zuerst stabil sein — Windows kommt in v0.2 |
|
|
151
|
+
| GUI bauen | Niemand braucht GUI für CLI-Tool, das man 2× pro Setup nutzt |
|
|
152
|
+
| Mit AMD-Hardware-Sponsoring locken lassen | Ja zu Hardware-Samples, nein zu Abhängigkeit |
|
|
153
|
+
| Eigene Inferenz-Engine bauen | Wir sind Klebemittel, nicht Konkurrent |
|
|
154
|
+
|
|
155
|
+
**Goldene Regel:** Wenn ein Feature die Frage "Welche Config funktioniert auf meinem Chip?" nicht direkter beantwortet, gehört es nicht in v0.x.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Launch-Strategie
|
|
160
|
+
|
|
161
|
+
### Pre-Launch (vor v0.1.0 public)
|
|
162
|
+
|
|
163
|
+
1. **Eigenes Setup damit dokumentieren** — Ollama, faster-whisper, ComfyUI auf der 7900 XTX neu aufsetzen mit `rocmate show` als Guide. Findet Bugs.
|
|
164
|
+
2. **Screenshots** von `doctor` und `show ollama` für README
|
|
165
|
+
3. **Naming + Domain prüfen**
|
|
166
|
+
4. **PyPI-Name claimen**
|
|
167
|
+
|
|
168
|
+
### Launch-Tag
|
|
169
|
+
|
|
170
|
+
**Reihenfolge wichtig:** klein anfangen, dann größer.
|
|
171
|
+
|
|
172
|
+
1. **r/ROCm** (kleinste, freundlichste Community) — als "feedback wanted"
|
|
173
|
+
2. **r/Amd** (allgemein) — Fokus auf "billiger als NVIDIA für lokales AI"
|
|
174
|
+
3. **r/LocalLLaMA** (größte Tech-Community) — Fokus auf "Ollama-Setup auf AMD endlich nicht mehr nervig"
|
|
175
|
+
4. **HackerNews** — "Show HN: rocmate – Curated AMD GPU compatibility for AI tools"
|
|
176
|
+
|
|
177
|
+
**Nicht alles am selben Tag.** Reddit-Posts auf 3-4 Tage verteilen, HN am Wochenende (mehr Traffic).
|
|
178
|
+
|
|
179
|
+
### Posting-Stil
|
|
180
|
+
|
|
181
|
+
**Nicht:** "Mein neues Projekt!"
|
|
182
|
+
**Sondern:** "Ich war frustriert, dass es keine konsolidierte Quelle für AMD-AI-Configs gibt, also habe ich angefangen, eine zu bauen. Hier ist v0.1.0 — ich brauche eure Configs für die Chips, die ich nicht habe."
|
|
183
|
+
|
|
184
|
+
**Klar machen:**
|
|
185
|
+
- Was funktioniert (3 Tools auf 4 Chips)
|
|
186
|
+
- Was fehlt (alles andere)
|
|
187
|
+
- Wie man beiträgt (PR-Template, 5-Min-Aufwand)
|
|
188
|
+
- Was es **nicht** ist (kein Fork, kein Installer-Magic)
|
|
189
|
+
|
|
190
|
+
### Erfolgs-Indikatoren
|
|
191
|
+
|
|
192
|
+
**Realistisch nach 2 Wochen:**
|
|
193
|
+
- 50-200 GitHub-Stars
|
|
194
|
+
- 1-3 externe PRs mit Configs
|
|
195
|
+
- 5-20 Issues mit "läuft nicht auf gfx10XX"
|
|
196
|
+
- 100-500 PyPI-Downloads
|
|
197
|
+
|
|
198
|
+
**Wenn nach 2 Wochen <20 Stars:** Naming/Positionierung überdenken, nicht aufgeben.
|
|
199
|
+
|
|
200
|
+
**Wenn nach 2 Monaten <5 externe PRs:** Contribution-Hürde senken (template-driven Issue → auto-PR?).
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Tech-Entscheidungen
|
|
205
|
+
|
|
206
|
+
| Entscheidung | Begründung |
|
|
207
|
+
|---|---|
|
|
208
|
+
| Python 3.11+ | Passt zu meinem Stack, einfache CLI-Sprache, alle Zielnutzer haben Python |
|
|
209
|
+
| Typer (statt argparse/click) | Modern, Type-Hints, schöne Auto-Help |
|
|
210
|
+
| Rich für Output | Tabellen + Farben kostenlos, professioneller Look |
|
|
211
|
+
| Pydantic für YAML-Validierung | Fehler früh fangen, Config-Schema dokumentiert sich selbst |
|
|
212
|
+
| YAML statt TOML/JSON für Configs | Multi-Line-Strings für Notes, Kommentare möglich, Community-freundlicher |
|
|
213
|
+
| Hatchling als Build-Backend | Modern, schnell, sauber mit `force-include` für YAMLs |
|
|
214
|
+
| MIT-Lizenz | Maximal permissive, keine Reibung für Contributors |
|
|
215
|
+
| uv für Dev-Setup | Schnell, modern — aber pip-fallback dokumentieren |
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Risiken
|
|
220
|
+
|
|
221
|
+
| Risiko | Wahrscheinlichkeit | Gegenmaßnahme |
|
|
222
|
+
|---|---|---|
|
|
223
|
+
| AMD baut selbst was Vergleichbares | Mittel | Wir bleiben das vendor-neutrale Open-Source-Tool, AMD bewirbt nur eigene Tools |
|
|
224
|
+
| ROCm-Versionen brechen Configs alle 6 Monate | Hoch | Configs versionieren, alte ROCm-Versionen markieren |
|
|
225
|
+
| Niemand contributed | Hoch | Beitragen muss trivial sein (5 Min, ein YAML-File), explizit nach gfx10XX-Chips fragen |
|
|
226
|
+
| Ich verliere Interesse nach 3 Monaten | Mittel | Scope eng halten, keine versprochenen Features die zur Pflicht werden |
|
|
227
|
+
| Naming-Konflikt mit existierender Library | Niedrig | PyPI prüfen vor Launch |
|
|
228
|
+
| AMD Markenrechts-Stress | Niedrig | Neutraler Name (nicht "Radeon-X"), Disclaimer im README |
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## Persönliche Erinnerungen
|
|
233
|
+
|
|
234
|
+
- **Nicht alles selbst dokumentieren wollen.** Ein Issue-Template "Config für gfx10XX einreichen" ist mehr wert als 20 Stunden eigene Recherche zu Chips, die ich nicht besitze.
|
|
235
|
+
- **Code-Qualität ist Marketing.** Sauberer Code + Tests + CI signalisiert "diese Maintainerin meint es ernst" — und das zieht Contributors.
|
|
236
|
+
- **Antworte auf jedes Issue innerhalb 48h** in den ersten 4 Wochen. Danach kann's lockerer werden, aber der frühe Eindruck zählt.
|
|
237
|
+
- **Kein Burn-out.** Wenn ein Wochenende keine Lust → kein Wochenende. Open Source ist Marathon.
|
|
238
|
+
- **Spaß steht im Vordergrund.** Das Projekt sollte sich gut anfühlen, sonst überlebt es nicht.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Offene Fragen (vor Launch klären)
|
|
243
|
+
|
|
244
|
+
- [ ] Endgültiger Name: `rocmate` / `gfx-toolkit` / `rocforge` / `amdgpu-ai` / ?
|
|
245
|
+
- [ ] Domain registrieren? (rocmate.dev / .io / kein Web in v0.1?)
|
|
246
|
+
- [ ] Eigene GitHub-Org (`rocmate-project`) oder unter persönlichem Account?
|
|
247
|
+
- [ ] Issue-Templates wie genau strukturieren?
|
|
248
|
+
- [ ] Discord/Matrix für Community oder erstmal nur GitHub Discussions?
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
*Stand: Initial. Wird mit dem Projekt mitwachsen.*
|
rocmate-0.1.0/README.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# rocmate
|
|
2
|
+
|
|
3
|
+
> Get AMD GPUs running with AI tools — without the rabbit hole.
|
|
4
|
+
|
|
5
|
+
`rocmate` is a curated compatibility index and CLI for running modern AI workloads on AMD GPUs via ROCm. It tells you what works on your specific card, what to set, and what to avoid — based on configurations that real users have tested.
|
|
6
|
+
|
|
7
|
+
## Why?
|
|
8
|
+
|
|
9
|
+
AMD GPUs offer great VRAM-per-dollar (especially the RX 7900 XTX with 24 GB), but getting Ollama, ComfyUI, faster-whisper, or axolotl to actually use the GPU still involves hunting through blog posts, GitHub issues, and Discord threads. Information is scattered, often outdated, and rarely specific to your chip generation (gfx1030 vs gfx1100 vs gfx1201).
|
|
10
|
+
|
|
11
|
+
`rocmate` consolidates this knowledge into one place — version-controlled, testable, community-maintained.
|
|
12
|
+
|
|
13
|
+
## Quickstart
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Install (Python 3.11+)
|
|
17
|
+
pipx install rocmate
|
|
18
|
+
|
|
19
|
+
# Check your system
|
|
20
|
+
rocmate doctor
|
|
21
|
+
|
|
22
|
+
# Show the tested config for a tool
|
|
23
|
+
rocmate show ollama
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Example output:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
$ rocmate doctor
|
|
30
|
+
✓ GPU detected: AMD Radeon RX 7900 XTX (gfx1100)
|
|
31
|
+
✓ ROCm 6.3.1 installed
|
|
32
|
+
✗ User not in 'render' group
|
|
33
|
+
→ sudo usermod -aG render $USER && newgrp render
|
|
34
|
+
⚠ HSA_OVERRIDE_GFX_VERSION not set
|
|
35
|
+
→ export HSA_OVERRIDE_GFX_VERSION=11.0.0
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Supported tools (v0.1.0)
|
|
39
|
+
|
|
40
|
+
| Tool | gfx1100 (RX 7900 XT/XTX) | gfx1030 (RX 6800/6900) | gfx1201 (RX 9070) |
|
|
41
|
+
|------|:-:|:-:|:-:|
|
|
42
|
+
| Ollama | ✅ | ✅ | 🟡 |
|
|
43
|
+
| faster-whisper | ✅ | 🟡 | — |
|
|
44
|
+
| ComfyUI | ✅ | 🟡 | — |
|
|
45
|
+
|
|
46
|
+
✅ tested · 🟡 partial / workarounds needed · ❌ not working
|
|
47
|
+
|
|
48
|
+
## Status
|
|
49
|
+
|
|
50
|
+
Early-stage. Currently maintained by [@tonde](https://github.com/tonde) on an RX 7900 XTX. Contributions for other AMD chips are very welcome — see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
51
|
+
|
|
52
|
+
## Roadmap
|
|
53
|
+
|
|
54
|
+
- [x] `rocmate doctor` — system diagnostic
|
|
55
|
+
- [x] `rocmate show <tool>` — display tested config
|
|
56
|
+
- [ ] Windows / HIP SDK support (`doctor` + `show`)
|
|
57
|
+
- [ ] `rocmate install <tool>` — automated installer with correct ENV
|
|
58
|
+
- [ ] `rocmate doctor --fix` — auto-setup for ROCm/HIP, groups, and ENV vars
|
|
59
|
+
- [ ] Web-facing compatibility matrix
|
|
60
|
+
|
|
61
|
+
## Non-goals
|
|
62
|
+
|
|
63
|
+
- Not a replacement for ROCm, Ollama, or any inference engine
|
|
64
|
+
- Not a fork of upstream tools — only configs and glue
|
|
65
|
+
- Not a benchmarking tool
|
|
66
|
+
|
|
67
|
+
## License
|
|
68
|
+
|
|
69
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: Axolotl
|
|
2
|
+
description: Fine-tuning framework for LLMs supporting LoRA, QLoRA, and full fine-tuning.
|
|
3
|
+
homepage: https://github.com/axolotl-ai-cloud/axolotl
|
|
4
|
+
|
|
5
|
+
chips:
|
|
6
|
+
gfx1100:
|
|
7
|
+
status: tested
|
|
8
|
+
tested_on_rocm: "6.2"
|
|
9
|
+
notes: >
|
|
10
|
+
RX 7900 XTX (24 GB) handles QLoRA fine-tuning of 7B–13B models comfortably.
|
|
11
|
+
Flash-attention 2 works via ROCm CK (install separately). bitsandbytes ROCm fork
|
|
12
|
+
required for quantized training.
|
|
13
|
+
env_vars:
|
|
14
|
+
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
|
15
|
+
PYTORCH_HIP_ALLOC_CONF: "expandable_segments:True"
|
|
16
|
+
install_hints:
|
|
17
|
+
- "git clone https://github.com/axolotl-ai-cloud/axolotl && cd axolotl"
|
|
18
|
+
- "pip install torch --index-url https://download.pytorch.org/whl/rocm6.2"
|
|
19
|
+
- "pip install packaging ninja && pip install flash-attn --no-build-isolation"
|
|
20
|
+
- "pip install -e '.[deepspeed]'"
|
|
21
|
+
- "accelerate launch -m axolotl.cli.train examples/llama-3/qlora.yml"
|
|
22
|
+
|
|
23
|
+
gfx1101:
|
|
24
|
+
status: tested
|
|
25
|
+
tested_on_rocm: "6.2"
|
|
26
|
+
notes: RX 7800 XT / RX 7700 XT — QLoRA of 7B models fits in 16 GB. Same setup as gfx1100.
|
|
27
|
+
env_vars:
|
|
28
|
+
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
|
29
|
+
PYTORCH_HIP_ALLOC_CONF: "expandable_segments:True"
|
|
30
|
+
install_hints:
|
|
31
|
+
- "Same install as gfx1100. Use gradient_checkpointing: true in your YAML for memory savings."
|
|
32
|
+
|
|
33
|
+
gfx1030:
|
|
34
|
+
status: partial
|
|
35
|
+
tested_on_rocm: "6.2"
|
|
36
|
+
notes: >
|
|
37
|
+
RX 6800/6900 — QLoRA of 7B works. Full fine-tuning requires gradient checkpointing.
|
|
38
|
+
Flash-attention fallback path is slower than RDNA3.
|
|
39
|
+
env_vars: {}
|
|
40
|
+
install_hints:
|
|
41
|
+
- "Same install as gfx1100."
|
|
42
|
+
- "Add gradient_checkpointing: true and micro_batch_size: 1 to your config."
|
|
43
|
+
|
|
44
|
+
gfx1102:
|
|
45
|
+
status: partial
|
|
46
|
+
tested_on_rocm: "6.2"
|
|
47
|
+
notes: >
|
|
48
|
+
RX 7600 (8 GB) — QLoRA of 7B is tight. Use per_device_train_batch_size: 1 and
|
|
49
|
+
gradient_checkpointing. Offload optimizer states to CPU if OOM.
|
|
50
|
+
env_vars:
|
|
51
|
+
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
|
52
|
+
PYTORCH_HIP_ALLOC_CONF: "expandable_segments:True"
|
|
53
|
+
install_hints:
|
|
54
|
+
- "Same install as gfx1100."
|
|
55
|
+
- "Config: micro_batch_size: 1, gradient_checkpointing: true, optimizer: adamw_8bit"
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: ComfyUI
|
|
2
|
+
description: Node-based UI for Stable Diffusion and other image generation models.
|
|
3
|
+
homepage: https://github.com/comfyanonymous/ComfyUI
|
|
4
|
+
|
|
5
|
+
chips:
|
|
6
|
+
gfx1100:
|
|
7
|
+
status: tested
|
|
8
|
+
tested_on_rocm: "6.2"
|
|
9
|
+
notes: >
|
|
10
|
+
Works well on RX 7900 XTX with PyTorch ROCm 6.2+. SDXL runs comfortably in 24 GB VRAM.
|
|
11
|
+
Flux.1 also works but requires careful memory management.
|
|
12
|
+
env_vars:
|
|
13
|
+
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
|
14
|
+
PYTORCH_HIP_ALLOC_CONF: "expandable_segments:True"
|
|
15
|
+
install_hints:
|
|
16
|
+
- "Linux: git clone https://github.com/comfyanonymous/ComfyUI && cd ComfyUI"
|
|
17
|
+
- "python -m venv venv && source venv/bin/activate"
|
|
18
|
+
- "pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.2"
|
|
19
|
+
- "pip install -r requirements.txt && python main.py --listen"
|
|
20
|
+
- "Windows (HIP SDK): pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.2"
|
|
21
|
+
|
|
22
|
+
gfx1101:
|
|
23
|
+
status: tested
|
|
24
|
+
tested_on_rocm: "6.2"
|
|
25
|
+
notes: RX 7800 XT / RX 7700 XT — same setup as gfx1100. 16 GB VRAM limits Flux.1; SDXL and SD 1.5 run well.
|
|
26
|
+
env_vars:
|
|
27
|
+
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
|
28
|
+
PYTORCH_HIP_ALLOC_CONF: "expandable_segments:True"
|
|
29
|
+
install_hints:
|
|
30
|
+
- "Same install as gfx1100."
|
|
31
|
+
|
|
32
|
+
gfx1102:
|
|
33
|
+
status: partial
|
|
34
|
+
tested_on_rocm: "6.2"
|
|
35
|
+
notes: RX 7600 — 8 GB VRAM is tight. SD 1.5 works; SDXL requires --lowvram flag and is slow.
|
|
36
|
+
env_vars:
|
|
37
|
+
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
|
|
38
|
+
PYTORCH_HIP_ALLOC_CONF: "expandable_segments:True"
|
|
39
|
+
install_hints:
|
|
40
|
+
- "Same install as gfx1100."
|
|
41
|
+
- "Launch with: python main.py --lowvram --listen"
|
|
42
|
+
|
|
43
|
+
gfx1030:
|
|
44
|
+
status: partial
|
|
45
|
+
tested_on_rocm: "6.2"
|
|
46
|
+
notes: SDXL is slow; SD 1.5 works fine. Memory pressure with larger models.
|
|
47
|
+
env_vars:
|
|
48
|
+
HSA_OVERRIDE_GFX_VERSION: "10.3.0"
|
|
49
|
+
install_hints:
|
|
50
|
+
- "Same as gfx1100 install; expect ~3-4x slower SDXL generation."
|