perfectrag 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perfectrag/__init__.py +3 -0
- perfectrag/cli.py +196 -0
- perfectrag/hardware.py +203 -0
- perfectrag/mcp_registry.py +101 -0
- perfectrag/recipes.py +187 -0
- perfectrag/scaffolder.py +62 -0
- perfectrag/skills.py +55 -0
- perfectrag/templates/__init__.py +0 -0
- perfectrag/templates/_shared/__init__.py +0 -0
- perfectrag/templates/_shared/skills/__init__.py +0 -0
- perfectrag/templates/_shared/skills/code-rag/SKILL.md +17 -0
- perfectrag/templates/_shared/skills/legal-rag/SKILL.md +17 -0
- perfectrag/templates/_shared/skills/medical-rag/SKILL.md +17 -0
- perfectrag/templates/_shared/skills/research-rag/SKILL.md +17 -0
- perfectrag/templates/custom-naive-rag/.env.jinja +19 -0
- perfectrag/templates/custom-naive-rag/.gitignore.jinja +4 -0
- perfectrag/templates/custom-naive-rag/README.md.jinja +55 -0
- perfectrag/templates/custom-naive-rag/__init__.py +0 -0
- perfectrag/templates/custom-naive-rag/app/Dockerfile.jinja +14 -0
- perfectrag/templates/custom-naive-rag/app/main.py.jinja +151 -0
- perfectrag/templates/custom-naive-rag/app/requirements.txt.jinja +12 -0
- perfectrag/templates/custom-naive-rag/copier.yml +14 -0
- perfectrag/templates/custom-naive-rag/data/.gitkeep +0 -0
- perfectrag/templates/custom-naive-rag/docker-compose.yml.jinja +65 -0
- perfectrag/templates/custom-naive-rag/mcp.yaml.jinja +5 -0
- perfectrag/templates/custom-naive-rag/skills/.gitkeep +0 -0
- perfectrag/templates/dify-stack/.env.jinja +15 -0
- perfectrag/templates/dify-stack/.gitignore.jinja +2 -0
- perfectrag/templates/dify-stack/README.md.jinja +46 -0
- perfectrag/templates/dify-stack/__init__.py +0 -0
- perfectrag/templates/dify-stack/copier.yml +11 -0
- perfectrag/templates/dify-stack/docker-compose.yml.jinja +128 -0
- perfectrag/templates/dify-stack/mcp.yaml.jinja +4 -0
- perfectrag/templates/dify-stack/skills/.gitkeep +0 -0
- perfectrag/templates/lightrag-stack/.env.jinja +28 -0
- perfectrag/templates/lightrag-stack/.gitignore.jinja +4 -0
- perfectrag/templates/lightrag-stack/README.md.jinja +58 -0
- perfectrag/templates/lightrag-stack/__init__.py +0 -0
- perfectrag/templates/lightrag-stack/copier.yml +11 -0
- perfectrag/templates/lightrag-stack/docker-compose.yml.jinja +54 -0
- perfectrag/templates/lightrag-stack/inputs/.gitkeep +0 -0
- perfectrag/templates/lightrag-stack/mcp.yaml.jinja +4 -0
- perfectrag/templates/lightrag-stack/rag_storage/.gitkeep +0 -0
- perfectrag/templates/lightrag-stack/skills/.gitkeep +0 -0
- perfectrag/templates/ragflow-stack/.env.jinja +32 -0
- perfectrag/templates/ragflow-stack/.gitignore.jinja +2 -0
- perfectrag/templates/ragflow-stack/README.md.jinja +52 -0
- perfectrag/templates/ragflow-stack/__init__.py +0 -0
- perfectrag/templates/ragflow-stack/copier.yml +11 -0
- perfectrag/templates/ragflow-stack/docker-compose.yml.jinja +90 -0
- perfectrag/templates/ragflow-stack/mcp.yaml.jinja +6 -0
- perfectrag/templates/ragflow-stack/skills/.gitkeep +0 -0
- perfectrag/wizard.py +77 -0
- perfectrag-0.1.0.dist-info/METADATA +196 -0
- perfectrag-0.1.0.dist-info/RECORD +57 -0
- perfectrag-0.1.0.dist-info/WHEEL +4 -0
- perfectrag-0.1.0.dist-info/entry_points.txt +2 -0
perfectrag/__init__.py
ADDED
perfectrag/cli.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""perfectrag CLI — entrypoint wired via pyproject [project.scripts]."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
import yaml
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.panel import Panel
|
|
12
|
+
from rich.table import Table
|
|
13
|
+
|
|
14
|
+
from perfectrag import hardware, recipes, scaffolder, wizard
|
|
15
|
+
from perfectrag.mcp_registry import REGISTRY, add_mcp_to_project
|
|
16
|
+
from perfectrag.skills import add_skill_to_project, list_bundled_skills
|
|
17
|
+
|
|
18
|
+
# Force UTF-8 on Windows legacy consoles (cp1252 can't encode many non-ASCII chars)
|
|
19
|
+
for stream in (sys.stdout, sys.stderr):
|
|
20
|
+
if hasattr(stream, "reconfigure"):
|
|
21
|
+
try:
|
|
22
|
+
stream.reconfigure(encoding="utf-8", errors="replace")
|
|
23
|
+
except (AttributeError, OSError):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
app = typer.Typer(
|
|
27
|
+
help="perfectRAG - dynamic RAG framework scaffolder",
|
|
28
|
+
no_args_is_help=True,
|
|
29
|
+
add_completion=False,
|
|
30
|
+
)
|
|
31
|
+
console = Console(legacy_windows=False)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@app.command()
|
|
35
|
+
def init(
|
|
36
|
+
project_dir: Path = typer.Argument(Path("./my-rag"), help="Thư mục project sẽ sinh"),
|
|
37
|
+
answers_file: Path | None = typer.Option(
|
|
38
|
+
None, "--answers-file", "-a",
|
|
39
|
+
help="YAML chứa answers (bỏ qua wizard interactive, dùng cho CI/test)",
|
|
40
|
+
),
|
|
41
|
+
template: str | None = typer.Option(
|
|
42
|
+
None, "--template", "-t",
|
|
43
|
+
help="Override template gợi ý (custom-naive-rag, ragflow-stack, lightrag-stack, dify-stack)",
|
|
44
|
+
),
|
|
45
|
+
force: bool = typer.Option(False, "--force", help="Ghi đè nếu project_dir đã có"),
|
|
46
|
+
dry_run: bool = typer.Option(False, "--dry-run", help="Preview recipe, không scaffold"),
|
|
47
|
+
) -> None:
|
|
48
|
+
"""Chạy wizard → scaffold project RAG hoàn chỉnh."""
|
|
49
|
+
hw = hardware.detect()
|
|
50
|
+
_show_hardware(hw)
|
|
51
|
+
|
|
52
|
+
if answers_file is not None:
|
|
53
|
+
raw = yaml.safe_load(answers_file.read_text(encoding="utf-8"))
|
|
54
|
+
answers = recipes.Answers(**raw)
|
|
55
|
+
else:
|
|
56
|
+
answers = wizard.run_wizard()
|
|
57
|
+
|
|
58
|
+
recipe = recipes.recommend(answers, hw)
|
|
59
|
+
if template is not None:
|
|
60
|
+
if template not in scaffolder.available_templates():
|
|
61
|
+
console.print(f"[red]Template '{template}' không tồn tại. Có sẵn: "
|
|
62
|
+
f"{', '.join(scaffolder.available_templates().keys())}[/red]")
|
|
63
|
+
raise typer.Exit(1)
|
|
64
|
+
recipe.template = template
|
|
65
|
+
recipe.notes.append(f"Template được override bằng --template={template}")
|
|
66
|
+
_show_recipe(recipe)
|
|
67
|
+
|
|
68
|
+
if dry_run:
|
|
69
|
+
console.print("[yellow]--dry-run, không scaffold.[/yellow]")
|
|
70
|
+
raise typer.Exit(0)
|
|
71
|
+
|
|
72
|
+
if project_dir.exists() and any(project_dir.iterdir()) and not force:
|
|
73
|
+
console.print(f"[red]Thư mục {project_dir} đã tồn tại và không rỗng. Dùng --force để ghi đè.[/red]")
|
|
74
|
+
raise typer.Exit(1)
|
|
75
|
+
|
|
76
|
+
scaffolder.render(recipe, hw, answers, project_dir, force=force)
|
|
77
|
+
console.print(
|
|
78
|
+
Panel.fit(
|
|
79
|
+
f"[green]Done![/green]\n\n"
|
|
80
|
+
f"cd {project_dir}\n"
|
|
81
|
+
f"docker compose up -d\n\n"
|
|
82
|
+
f"Edit [cyan]mcp.yaml[/cyan] to add tools, [cyan]skills/[/cyan] to add skills.",
|
|
83
|
+
title="Next steps",
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@app.command("add")
|
|
89
|
+
def add_cmd(
|
|
90
|
+
kind: str = typer.Argument(..., help="'mcp' hoặc 'skill'"),
|
|
91
|
+
name: str = typer.Argument(..., help="Tên MCP server / skill"),
|
|
92
|
+
project_dir: Path = typer.Option(Path("."), "--project", "-p", help="Project dir"),
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Add MCP server hoặc skill vào project đã sinh."""
|
|
95
|
+
if kind == "mcp":
|
|
96
|
+
add_mcp_to_project(name, project_dir)
|
|
97
|
+
console.print(f"[green]Added MCP '{name}' vào {project_dir}/mcp.yaml[/green]")
|
|
98
|
+
elif kind == "skill":
|
|
99
|
+
add_skill_to_project(name, project_dir)
|
|
100
|
+
console.print(f"[green]Added skill '{name}' vào {project_dir}/skills/[/green]")
|
|
101
|
+
else:
|
|
102
|
+
console.print(f"[red]Unknown kind: {kind}. Dùng 'mcp' hoặc 'skill'.[/red]")
|
|
103
|
+
raise typer.Exit(1)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.command("list")
|
|
107
|
+
def list_cmd(
|
|
108
|
+
what: str = typer.Argument(..., help="'templates', 'mcp', 'skills'"),
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Liệt kê templates / MCP servers / skills có sẵn."""
|
|
111
|
+
if what == "templates":
|
|
112
|
+
_list_templates()
|
|
113
|
+
elif what == "mcp":
|
|
114
|
+
_list_mcp()
|
|
115
|
+
elif what == "skills":
|
|
116
|
+
_list_skills()
|
|
117
|
+
else:
|
|
118
|
+
console.print(f"[red]Unknown: {what}. Dùng 'templates', 'mcp' hoặc 'skills'.[/red]")
|
|
119
|
+
raise typer.Exit(1)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@app.command()
|
|
123
|
+
def hw() -> None:
|
|
124
|
+
"""Chỉ detect + show hardware, không làm gì khác."""
|
|
125
|
+
_show_hardware(hardware.detect())
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# --- helpers ---
|
|
129
|
+
|
|
130
|
+
def _show_hardware(hw: hardware.HardwareProfile) -> None:
|
|
131
|
+
table = Table(title="Detected hardware", show_header=False)
|
|
132
|
+
table.add_column("Field", style="cyan")
|
|
133
|
+
table.add_column("Value")
|
|
134
|
+
table.add_row("OS / arch", f"{hw.os} ({hw.arch})")
|
|
135
|
+
table.add_row("CPU", f"{hw.cpu_model} — {hw.cpu_cores} cores")
|
|
136
|
+
table.add_row("RAM", f"{hw.ram_gb} GB")
|
|
137
|
+
table.add_row("Disk free", f"{hw.disk_free_gb} GB")
|
|
138
|
+
table.add_row("GPU", f"{hw.gpu_vendor} / {hw.gpu_name or '—'}")
|
|
139
|
+
table.add_row("VRAM", f"{hw.vram_gb} GB")
|
|
140
|
+
if hw.cuda_version:
|
|
141
|
+
table.add_row("CUDA", hw.cuda_version)
|
|
142
|
+
table.add_row("Tier", f"[bold]{hw.tier}[/bold]")
|
|
143
|
+
console.print(table)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _show_recipe(recipe: recipes.Recipe) -> None:
|
|
147
|
+
table = Table(title=f"Recommended recipe → template: [bold]{recipe.template}[/bold]")
|
|
148
|
+
table.add_column("Component", style="cyan")
|
|
149
|
+
table.add_column("Choice")
|
|
150
|
+
table.add_row("LLM", f"{recipe.llm_model} (via {recipe.llm_runtime})")
|
|
151
|
+
table.add_row("Embedding", recipe.embedding_model)
|
|
152
|
+
table.add_row("Reranker", recipe.reranker or "—")
|
|
153
|
+
table.add_row("Vector DB", recipe.vector_db)
|
|
154
|
+
table.add_row("Doc parser", recipe.doc_parser)
|
|
155
|
+
table.add_row("Chunk", f"{recipe.chunk_strategy} / {recipe.chunk_size} tokens")
|
|
156
|
+
table.add_row("GPU enabled", "yes" if recipe.gpu_enabled else "no")
|
|
157
|
+
table.add_row("VRAM cap", f"{recipe.vram_cap_gb} GB")
|
|
158
|
+
for k, v in recipe.extras.items():
|
|
159
|
+
table.add_row(f"extras.{k}", str(v))
|
|
160
|
+
console.print(table)
|
|
161
|
+
for note in recipe.notes:
|
|
162
|
+
console.print(f"[yellow]![/yellow] {note}")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _list_templates() -> None:
|
|
166
|
+
from perfectrag.scaffolder import available_templates
|
|
167
|
+
|
|
168
|
+
table = Table(title="Templates bundled")
|
|
169
|
+
table.add_column("Name", style="cyan")
|
|
170
|
+
table.add_column("Description")
|
|
171
|
+
for name, desc in available_templates().items():
|
|
172
|
+
table.add_row(name, desc)
|
|
173
|
+
console.print(table)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _list_mcp() -> None:
|
|
177
|
+
table = Table(title="MCP registry")
|
|
178
|
+
table.add_column("Name", style="cyan")
|
|
179
|
+
table.add_column("Description")
|
|
180
|
+
table.add_column("Requires env")
|
|
181
|
+
for name, info in REGISTRY.items():
|
|
182
|
+
table.add_row(name, info["description"], ", ".join(info.get("env", [])) or "—")
|
|
183
|
+
console.print(table)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _list_skills() -> None:
|
|
187
|
+
table = Table(title="Bundled skills")
|
|
188
|
+
table.add_column("Name", style="cyan")
|
|
189
|
+
table.add_column("Description")
|
|
190
|
+
for name, desc in list_bundled_skills().items():
|
|
191
|
+
table.add_row(name, desc)
|
|
192
|
+
console.print(table)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
if __name__ == "__main__":
|
|
196
|
+
app()
|
perfectrag/hardware.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Cross-platform hardware detection for recipe selection.
|
|
2
|
+
|
|
3
|
+
Detects CPU, RAM, disk, and GPU (NVIDIA / AMD ROCm / Apple Silicon / none) so the
|
|
4
|
+
recipe engine can pick an appropriate stack tier.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import platform
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
from dataclasses import asdict, dataclass
|
|
14
|
+
from typing import Literal
|
|
15
|
+
|
|
16
|
+
import psutil
|
|
17
|
+
|
|
18
|
+
GpuVendor = Literal["nvidia", "amd", "apple", "none"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class HardwareProfile:
|
|
23
|
+
os: str
|
|
24
|
+
arch: str
|
|
25
|
+
cpu_model: str
|
|
26
|
+
cpu_cores: int
|
|
27
|
+
ram_gb: int
|
|
28
|
+
disk_free_gb: int
|
|
29
|
+
gpu_vendor: GpuVendor
|
|
30
|
+
gpu_name: str | None
|
|
31
|
+
vram_gb: int
|
|
32
|
+
cuda_version: str | None
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def tier(self) -> str:
|
|
36
|
+
"""Coarse tier used by the recipe engine."""
|
|
37
|
+
if self.gpu_vendor == "none":
|
|
38
|
+
return "cpu"
|
|
39
|
+
if self.gpu_vendor == "apple":
|
|
40
|
+
# Apple Silicon uses unified memory; treat RAM as VRAM budget.
|
|
41
|
+
if self.ram_gb >= 24:
|
|
42
|
+
return "apple-high"
|
|
43
|
+
return "apple-low"
|
|
44
|
+
if self.vram_gb >= 24:
|
|
45
|
+
return "gpu-24gb"
|
|
46
|
+
if self.vram_gb >= 12:
|
|
47
|
+
return "gpu-12gb"
|
|
48
|
+
if self.vram_gb >= 6:
|
|
49
|
+
return "gpu-8gb"
|
|
50
|
+
return "cpu"
|
|
51
|
+
|
|
52
|
+
def as_dict(self) -> dict:
|
|
53
|
+
return asdict(self)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _run(cmd: list[str], timeout: int = 5) -> str | None:
|
|
57
|
+
try:
|
|
58
|
+
out = subprocess.run(
|
|
59
|
+
cmd, capture_output=True, text=True, timeout=timeout, check=False
|
|
60
|
+
)
|
|
61
|
+
if out.returncode == 0:
|
|
62
|
+
return out.stdout
|
|
63
|
+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
|
64
|
+
return None
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _detect_cpu_model() -> str:
|
|
69
|
+
try:
|
|
70
|
+
from cpuinfo import get_cpu_info
|
|
71
|
+
|
|
72
|
+
return get_cpu_info().get("brand_raw", platform.processor() or "unknown")
|
|
73
|
+
except Exception:
|
|
74
|
+
return platform.processor() or "unknown"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _detect_nvidia() -> tuple[str, int, str | None] | None:
|
|
78
|
+
"""Return (gpu_name, vram_gb, cuda_version) via NVML, else None."""
|
|
79
|
+
try:
|
|
80
|
+
import pynvml
|
|
81
|
+
|
|
82
|
+
pynvml.nvmlInit()
|
|
83
|
+
try:
|
|
84
|
+
count = pynvml.nvmlDeviceGetCount()
|
|
85
|
+
if count == 0:
|
|
86
|
+
return None
|
|
87
|
+
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
|
|
88
|
+
name = pynvml.nvmlDeviceGetName(handle)
|
|
89
|
+
if isinstance(name, bytes):
|
|
90
|
+
name = name.decode()
|
|
91
|
+
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
|
92
|
+
vram_gb = int(mem.total / (1024**3))
|
|
93
|
+
try:
|
|
94
|
+
cuda = pynvml.nvmlSystemGetCudaDriverVersion_v2()
|
|
95
|
+
cuda_str = f"{cuda // 1000}.{(cuda % 1000) // 10}"
|
|
96
|
+
except Exception:
|
|
97
|
+
cuda_str = None
|
|
98
|
+
return name, vram_gb, cuda_str
|
|
99
|
+
finally:
|
|
100
|
+
try:
|
|
101
|
+
pynvml.nvmlShutdown()
|
|
102
|
+
except Exception:
|
|
103
|
+
pass
|
|
104
|
+
except Exception:
|
|
105
|
+
# Fallback: parse nvidia-smi
|
|
106
|
+
smi = _run(["nvidia-smi", "--query-gpu=name,memory.total,driver_version",
|
|
107
|
+
"--format=csv,noheader,nounits"])
|
|
108
|
+
if not smi:
|
|
109
|
+
return None
|
|
110
|
+
line = smi.strip().splitlines()[0]
|
|
111
|
+
parts = [p.strip() for p in line.split(",")]
|
|
112
|
+
if len(parts) < 2:
|
|
113
|
+
return None
|
|
114
|
+
name = parts[0]
|
|
115
|
+
try:
|
|
116
|
+
vram_mb = int(parts[1])
|
|
117
|
+
vram_gb = vram_mb // 1024
|
|
118
|
+
except ValueError:
|
|
119
|
+
vram_gb = 0
|
|
120
|
+
return name, vram_gb, None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _detect_amd() -> tuple[str, int] | None:
|
|
124
|
+
"""Return (gpu_name, vram_gb) via rocm-smi, else None."""
|
|
125
|
+
if not shutil.which("rocm-smi"):
|
|
126
|
+
return None
|
|
127
|
+
out = _run(["rocm-smi", "--showproductname", "--showmeminfo", "vram", "--json"])
|
|
128
|
+
if not out:
|
|
129
|
+
return None
|
|
130
|
+
try:
|
|
131
|
+
data = json.loads(out)
|
|
132
|
+
except json.JSONDecodeError:
|
|
133
|
+
return None
|
|
134
|
+
for _card_key, info in data.items():
|
|
135
|
+
if not isinstance(info, dict):
|
|
136
|
+
continue
|
|
137
|
+
name = info.get("Card series") or info.get("Card model") or "AMD GPU"
|
|
138
|
+
vram_bytes_str = info.get("VRAM Total Memory (B)")
|
|
139
|
+
try:
|
|
140
|
+
vram_gb = int(vram_bytes_str) // (1024**3) if vram_bytes_str else 0
|
|
141
|
+
except (TypeError, ValueError):
|
|
142
|
+
vram_gb = 0
|
|
143
|
+
return str(name), vram_gb
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _detect_apple() -> tuple[str, int] | None:
|
|
148
|
+
"""Return (chip_name, unified_memory_gb) on Apple Silicon macOS, else None."""
|
|
149
|
+
if platform.system() != "Darwin":
|
|
150
|
+
return None
|
|
151
|
+
if platform.machine() not in ("arm64", "aarch64"):
|
|
152
|
+
return None
|
|
153
|
+
chip = _run(["sysctl", "-n", "machdep.cpu.brand_string"])
|
|
154
|
+
mem = _run(["sysctl", "-n", "hw.memsize"])
|
|
155
|
+
chip_name = (chip or "Apple Silicon").strip()
|
|
156
|
+
try:
|
|
157
|
+
ram_gb = int(mem.strip()) // (1024**3) if mem else 0
|
|
158
|
+
except ValueError:
|
|
159
|
+
ram_gb = 0
|
|
160
|
+
return chip_name, ram_gb
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def detect() -> HardwareProfile:
|
|
164
|
+
"""Detect the current machine's hardware profile."""
|
|
165
|
+
os_name = platform.system()
|
|
166
|
+
arch = platform.machine()
|
|
167
|
+
cpu_cores = psutil.cpu_count(logical=False) or psutil.cpu_count() or 1
|
|
168
|
+
ram_gb = int(psutil.virtual_memory().total / (1024**3))
|
|
169
|
+
disk_free_gb = int(psutil.disk_usage(".").free / (1024**3))
|
|
170
|
+
cpu_model = _detect_cpu_model()
|
|
171
|
+
|
|
172
|
+
gpu_vendor: GpuVendor = "none"
|
|
173
|
+
gpu_name: str | None = None
|
|
174
|
+
vram_gb = 0
|
|
175
|
+
cuda_version: str | None = None
|
|
176
|
+
|
|
177
|
+
nv = _detect_nvidia()
|
|
178
|
+
if nv is not None:
|
|
179
|
+
gpu_vendor = "nvidia"
|
|
180
|
+
gpu_name, vram_gb, cuda_version = nv
|
|
181
|
+
else:
|
|
182
|
+
amd = _detect_amd()
|
|
183
|
+
if amd is not None:
|
|
184
|
+
gpu_vendor = "amd"
|
|
185
|
+
gpu_name, vram_gb = amd
|
|
186
|
+
else:
|
|
187
|
+
apple = _detect_apple()
|
|
188
|
+
if apple is not None:
|
|
189
|
+
gpu_vendor = "apple"
|
|
190
|
+
gpu_name, vram_gb = apple # unified memory
|
|
191
|
+
|
|
192
|
+
return HardwareProfile(
|
|
193
|
+
os=os_name,
|
|
194
|
+
arch=arch,
|
|
195
|
+
cpu_model=cpu_model,
|
|
196
|
+
cpu_cores=cpu_cores,
|
|
197
|
+
ram_gb=ram_gb,
|
|
198
|
+
disk_free_gb=disk_free_gb,
|
|
199
|
+
gpu_vendor=gpu_vendor,
|
|
200
|
+
gpu_name=gpu_name,
|
|
201
|
+
vram_gb=vram_gb,
|
|
202
|
+
cuda_version=cuda_version,
|
|
203
|
+
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Registry of well-known MCP servers + helper to splice them into a project's mcp.yaml.
|
|
2
|
+
|
|
3
|
+
mcp.yaml format mirrors the Claude Code / Cursor / Claude Desktop MCP config so that
|
|
4
|
+
configs are portable: each server has `command`, `args`, optional `env`, and transport.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
REGISTRY: dict[str, dict] = {
|
|
14
|
+
"filesystem": {
|
|
15
|
+
"description": "Đọc/ghi file trong thư mục được whitelist (read-only by default)",
|
|
16
|
+
"command": "npx",
|
|
17
|
+
"args": ["-y", "@modelcontextprotocol/server-filesystem", "${PWD}/data"],
|
|
18
|
+
"env": [],
|
|
19
|
+
},
|
|
20
|
+
"fetch": {
|
|
21
|
+
"description": "Fetch URL và convert sang markdown cho LLM",
|
|
22
|
+
"command": "uvx",
|
|
23
|
+
"args": ["mcp-server-fetch"],
|
|
24
|
+
"env": [],
|
|
25
|
+
},
|
|
26
|
+
"tavily": {
|
|
27
|
+
"description": "Tavily web search (cần TAVILY_API_KEY)",
|
|
28
|
+
"command": "npx",
|
|
29
|
+
"args": ["-y", "mcp-tavily"],
|
|
30
|
+
"env": ["TAVILY_API_KEY"],
|
|
31
|
+
},
|
|
32
|
+
"brave-search": {
|
|
33
|
+
"description": "Brave web search (cần BRAVE_API_KEY)",
|
|
34
|
+
"command": "npx",
|
|
35
|
+
"args": ["-y", "@modelcontextprotocol/server-brave-search"],
|
|
36
|
+
"env": ["BRAVE_API_KEY"],
|
|
37
|
+
},
|
|
38
|
+
"postgres": {
|
|
39
|
+
"description": "Query Postgres read-only (cần POSTGRES_URL)",
|
|
40
|
+
"command": "npx",
|
|
41
|
+
"args": ["-y", "@modelcontextprotocol/server-postgres", "${POSTGRES_URL}"],
|
|
42
|
+
"env": ["POSTGRES_URL"],
|
|
43
|
+
},
|
|
44
|
+
"sqlite": {
|
|
45
|
+
"description": "Query SQLite database local",
|
|
46
|
+
"command": "uvx",
|
|
47
|
+
"args": ["mcp-server-sqlite", "--db-path", "${PWD}/data/app.db"],
|
|
48
|
+
"env": [],
|
|
49
|
+
},
|
|
50
|
+
"github": {
|
|
51
|
+
"description": "GitHub repos / issues / PRs (cần GITHUB_PERSONAL_ACCESS_TOKEN)",
|
|
52
|
+
"command": "npx",
|
|
53
|
+
"args": ["-y", "@modelcontextprotocol/server-github"],
|
|
54
|
+
"env": ["GITHUB_PERSONAL_ACCESS_TOKEN"],
|
|
55
|
+
},
|
|
56
|
+
"memory": {
|
|
57
|
+
"description": "Persistent knowledge-graph memory cho agent",
|
|
58
|
+
"command": "npx",
|
|
59
|
+
"args": ["-y", "@modelcontextprotocol/server-memory"],
|
|
60
|
+
"env": [],
|
|
61
|
+
},
|
|
62
|
+
"sequential-thinking": {
|
|
63
|
+
"description": "Chain-of-thought structured reasoning tool",
|
|
64
|
+
"command": "npx",
|
|
65
|
+
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"],
|
|
66
|
+
"env": [],
|
|
67
|
+
},
|
|
68
|
+
"qdrant": {
|
|
69
|
+
"description": "Qdrant vector-db MCP server (read/write vectors)",
|
|
70
|
+
"command": "uvx",
|
|
71
|
+
"args": ["mcp-server-qdrant", "--qdrant-url", "${QDRANT_URL:-http://localhost:6333}"],
|
|
72
|
+
"env": [],
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _mcp_path(project_dir: Path) -> Path:
|
|
78
|
+
return project_dir / "mcp.yaml"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _load(path: Path) -> dict:
|
|
82
|
+
if not path.exists():
|
|
83
|
+
return {"servers": {}}
|
|
84
|
+
data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
|
85
|
+
data.setdefault("servers", {})
|
|
86
|
+
return data
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def add_mcp_to_project(name: str, project_dir: Path) -> None:
|
|
90
|
+
if name not in REGISTRY:
|
|
91
|
+
raise KeyError(f"Không biết MCP '{name}'. Chạy `perfectrag list mcp` để xem.")
|
|
92
|
+
info = REGISTRY[name]
|
|
93
|
+
path = _mcp_path(project_dir)
|
|
94
|
+
data = _load(path)
|
|
95
|
+
data["servers"][name] = {
|
|
96
|
+
"command": info["command"],
|
|
97
|
+
"args": info["args"],
|
|
98
|
+
**({"env": {k: f"${{{k}}}" for k in info["env"]}} if info["env"] else {}),
|
|
99
|
+
}
|
|
100
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
|
perfectrag/recipes.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Decision matrix: (user answers + hardware) -> concrete techstack recipe.
|
|
2
|
+
|
|
3
|
+
The mapping is opinionated but documented. Users can override any field in the
|
|
4
|
+
wizard's final confirmation step.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
12
|
+
from perfectrag.hardware import HardwareProfile
|
|
13
|
+
|
|
14
|
+
UseCase = Literal["qa_docs", "graphrag", "multimodal", "code_rag", "agent_workflow"]
|
|
15
|
+
Privacy = Literal["fully_local", "hybrid_api"]
|
|
16
|
+
CorpusSize = Literal["small", "medium", "large"] # <10k, 10k-1M, >1M
|
|
17
|
+
UserScale = Literal["solo", "team", "production"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class Answers:
|
|
22
|
+
use_case: UseCase
|
|
23
|
+
modality: list[str] # ["text", "tables", "images", "code"]
|
|
24
|
+
privacy: Privacy
|
|
25
|
+
multi_hop: bool
|
|
26
|
+
corpus_size: CorpusSize
|
|
27
|
+
user_scale: UserScale
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Recipe:
|
|
32
|
+
template: str
|
|
33
|
+
llm_model: str
|
|
34
|
+
llm_runtime: Literal["ollama", "vllm", "llamacpp"]
|
|
35
|
+
embedding_model: str
|
|
36
|
+
reranker: str | None
|
|
37
|
+
vector_db: Literal["qdrant", "milvus", "chroma", "lancedb", "pgvector"]
|
|
38
|
+
doc_parser: Literal["docling", "markitdown", "unstructured", "llamaparse"]
|
|
39
|
+
chunk_strategy: Literal["recursive", "semantic", "late"]
|
|
40
|
+
chunk_size: int
|
|
41
|
+
gpu_enabled: bool
|
|
42
|
+
vram_cap_gb: int
|
|
43
|
+
notes: list[str] = field(default_factory=list)
|
|
44
|
+
extras: dict = field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
def as_template_vars(self, hw: HardwareProfile, answers: Answers) -> dict:
|
|
47
|
+
"""Flatten to vars Copier/Jinja can consume."""
|
|
48
|
+
return {
|
|
49
|
+
"recipe": {
|
|
50
|
+
"template": self.template,
|
|
51
|
+
"llm_model": self.llm_model,
|
|
52
|
+
"llm_runtime": self.llm_runtime,
|
|
53
|
+
"embedding_model": self.embedding_model,
|
|
54
|
+
"reranker": self.reranker,
|
|
55
|
+
"vector_db": self.vector_db,
|
|
56
|
+
"doc_parser": self.doc_parser,
|
|
57
|
+
"chunk_strategy": self.chunk_strategy,
|
|
58
|
+
"chunk_size": self.chunk_size,
|
|
59
|
+
"gpu_enabled": self.gpu_enabled,
|
|
60
|
+
"vram_cap_gb": self.vram_cap_gb,
|
|
61
|
+
"extras": self.extras,
|
|
62
|
+
},
|
|
63
|
+
"hw": hw.as_dict(),
|
|
64
|
+
"answers": {
|
|
65
|
+
"use_case": answers.use_case,
|
|
66
|
+
"modality": answers.modality,
|
|
67
|
+
"privacy": answers.privacy,
|
|
68
|
+
"multi_hop": answers.multi_hop,
|
|
69
|
+
"corpus_size": answers.corpus_size,
|
|
70
|
+
"user_scale": answers.user_scale,
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# --- Model tables by hardware tier ---
|
|
76
|
+
|
|
77
|
+
_LLM_BY_TIER = {
|
|
78
|
+
"cpu": ("qwen2.5:3b-instruct-q4_K_M", "llamacpp"),
|
|
79
|
+
"apple-low": ("qwen2.5:7b-instruct-q4_K_M", "ollama"),
|
|
80
|
+
"apple-high": ("qwen2.5:14b-instruct-q4_K_M", "ollama"),
|
|
81
|
+
"gpu-8gb": ("qwen2.5:7b-instruct-q5_K_M", "ollama"),
|
|
82
|
+
"gpu-12gb": ("qwen2.5:14b-instruct-q4_K_M", "ollama"),
|
|
83
|
+
"gpu-24gb": ("qwen2.5:32b-instruct-q4_K_M", "vllm"),
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
_EMBED_BY_TIER = {
|
|
87
|
+
"cpu": "nomic-embed-text",
|
|
88
|
+
"apple-low": "BAAI/bge-m3",
|
|
89
|
+
"apple-high": "BAAI/bge-m3",
|
|
90
|
+
"gpu-8gb": "BAAI/bge-m3",
|
|
91
|
+
"gpu-12gb": "BAAI/bge-m3",
|
|
92
|
+
"gpu-24gb": "BAAI/bge-m3",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
_VECTORDB_BY_CORPUS = {
|
|
96
|
+
"small": "chroma",
|
|
97
|
+
"medium": "qdrant",
|
|
98
|
+
"large": "milvus",
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
_PARSER_BY_MODALITY = [
|
|
102
|
+
({"images", "tables"}, "docling"),
|
|
103
|
+
({"tables"}, "docling"),
|
|
104
|
+
({"images"}, "docling"),
|
|
105
|
+
(set(), "markitdown"),
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _pick_parser(modality: list[str]) -> str:
|
|
110
|
+
mod = set(modality)
|
|
111
|
+
for needed, parser in _PARSER_BY_MODALITY:
|
|
112
|
+
if needed and needed.issubset(mod):
|
|
113
|
+
return parser
|
|
114
|
+
return "markitdown"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _pick_template(answers: Answers, tier: str) -> tuple[str, list[str]]:
|
|
118
|
+
notes: list[str] = []
|
|
119
|
+
|
|
120
|
+
# Hard routing rules (in priority order)
|
|
121
|
+
if answers.use_case == "graphrag" or answers.multi_hop:
|
|
122
|
+
if tier in ("cpu", "apple-low"):
|
|
123
|
+
notes.append(
|
|
124
|
+
"GraphRAG yêu cầu LLM mạnh; hardware hiện tại có thể chạy chậm. "
|
|
125
|
+
"Cân nhắc upgrade hoặc giảm corpus."
|
|
126
|
+
)
|
|
127
|
+
return "lightrag-stack", notes
|
|
128
|
+
|
|
129
|
+
if answers.use_case == "agent_workflow":
|
|
130
|
+
return "dify-stack", notes
|
|
131
|
+
|
|
132
|
+
if answers.use_case == "multimodal":
|
|
133
|
+
if tier in ("cpu", "apple-low", "gpu-8gb"):
|
|
134
|
+
notes.append("Multimodal RAG cần VRAM cao; có thể giảm chất lượng hình ảnh.")
|
|
135
|
+
return "ragflow-stack", notes
|
|
136
|
+
|
|
137
|
+
if answers.use_case == "code_rag":
|
|
138
|
+
return "ragflow-stack", notes
|
|
139
|
+
|
|
140
|
+
# qa_docs default
|
|
141
|
+
if tier == "cpu":
|
|
142
|
+
return "custom-naive-rag", notes
|
|
143
|
+
return "ragflow-stack", notes
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def recommend(answers: Answers, hw: HardwareProfile) -> Recipe:
|
|
147
|
+
tier = hw.tier
|
|
148
|
+
llm_model, llm_runtime = _LLM_BY_TIER[tier]
|
|
149
|
+
embed = _EMBED_BY_TIER[tier]
|
|
150
|
+
vector_db = _VECTORDB_BY_CORPUS[answers.corpus_size]
|
|
151
|
+
parser = _pick_parser(answers.modality)
|
|
152
|
+
template, notes = _pick_template(answers, tier)
|
|
153
|
+
|
|
154
|
+
# Production scale → prefer vLLM if GPU has room
|
|
155
|
+
if answers.user_scale == "production" and tier in ("gpu-12gb", "gpu-24gb"):
|
|
156
|
+
llm_runtime = "vllm"
|
|
157
|
+
|
|
158
|
+
# Naive template uses smaller vector db / no reranker for simplicity
|
|
159
|
+
if template == "custom-naive-rag":
|
|
160
|
+
vector_db = "qdrant" # bundled in template
|
|
161
|
+
reranker: str | None = None
|
|
162
|
+
else:
|
|
163
|
+
reranker = "jinaai/jina-reranker-v2-base-multilingual"
|
|
164
|
+
|
|
165
|
+
gpu_enabled = hw.gpu_vendor in ("nvidia", "amd", "apple")
|
|
166
|
+
vram_cap = hw.vram_gb if hw.gpu_vendor == "nvidia" else max(hw.ram_gb // 2, 4)
|
|
167
|
+
|
|
168
|
+
extras: dict = {
|
|
169
|
+
"enable_graphrag": answers.use_case == "graphrag" or answers.multi_hop,
|
|
170
|
+
"enable_hybrid_search": answers.use_case in ("qa_docs", "code_rag"),
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return Recipe(
|
|
174
|
+
template=template,
|
|
175
|
+
llm_model=llm_model,
|
|
176
|
+
llm_runtime=llm_runtime,
|
|
177
|
+
embedding_model=embed,
|
|
178
|
+
reranker=reranker,
|
|
179
|
+
vector_db=vector_db,
|
|
180
|
+
doc_parser=parser,
|
|
181
|
+
chunk_strategy="recursive",
|
|
182
|
+
chunk_size=512,
|
|
183
|
+
gpu_enabled=gpu_enabled,
|
|
184
|
+
vram_cap_gb=vram_cap,
|
|
185
|
+
notes=notes,
|
|
186
|
+
extras=extras,
|
|
187
|
+
)
|