perfectrag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. perfectrag/__init__.py +3 -0
  2. perfectrag/cli.py +196 -0
  3. perfectrag/hardware.py +203 -0
  4. perfectrag/mcp_registry.py +101 -0
  5. perfectrag/recipes.py +187 -0
  6. perfectrag/scaffolder.py +62 -0
  7. perfectrag/skills.py +55 -0
  8. perfectrag/templates/__init__.py +0 -0
  9. perfectrag/templates/_shared/__init__.py +0 -0
  10. perfectrag/templates/_shared/skills/__init__.py +0 -0
  11. perfectrag/templates/_shared/skills/code-rag/SKILL.md +17 -0
  12. perfectrag/templates/_shared/skills/legal-rag/SKILL.md +17 -0
  13. perfectrag/templates/_shared/skills/medical-rag/SKILL.md +17 -0
  14. perfectrag/templates/_shared/skills/research-rag/SKILL.md +17 -0
  15. perfectrag/templates/custom-naive-rag/.env.jinja +19 -0
  16. perfectrag/templates/custom-naive-rag/.gitignore.jinja +4 -0
  17. perfectrag/templates/custom-naive-rag/README.md.jinja +55 -0
  18. perfectrag/templates/custom-naive-rag/__init__.py +0 -0
  19. perfectrag/templates/custom-naive-rag/app/Dockerfile.jinja +14 -0
  20. perfectrag/templates/custom-naive-rag/app/main.py.jinja +151 -0
  21. perfectrag/templates/custom-naive-rag/app/requirements.txt.jinja +12 -0
  22. perfectrag/templates/custom-naive-rag/copier.yml +14 -0
  23. perfectrag/templates/custom-naive-rag/data/.gitkeep +0 -0
  24. perfectrag/templates/custom-naive-rag/docker-compose.yml.jinja +65 -0
  25. perfectrag/templates/custom-naive-rag/mcp.yaml.jinja +5 -0
  26. perfectrag/templates/custom-naive-rag/skills/.gitkeep +0 -0
  27. perfectrag/templates/dify-stack/.env.jinja +15 -0
  28. perfectrag/templates/dify-stack/.gitignore.jinja +2 -0
  29. perfectrag/templates/dify-stack/README.md.jinja +46 -0
  30. perfectrag/templates/dify-stack/__init__.py +0 -0
  31. perfectrag/templates/dify-stack/copier.yml +11 -0
  32. perfectrag/templates/dify-stack/docker-compose.yml.jinja +128 -0
  33. perfectrag/templates/dify-stack/mcp.yaml.jinja +4 -0
  34. perfectrag/templates/dify-stack/skills/.gitkeep +0 -0
  35. perfectrag/templates/lightrag-stack/.env.jinja +28 -0
  36. perfectrag/templates/lightrag-stack/.gitignore.jinja +4 -0
  37. perfectrag/templates/lightrag-stack/README.md.jinja +58 -0
  38. perfectrag/templates/lightrag-stack/__init__.py +0 -0
  39. perfectrag/templates/lightrag-stack/copier.yml +11 -0
  40. perfectrag/templates/lightrag-stack/docker-compose.yml.jinja +54 -0
  41. perfectrag/templates/lightrag-stack/inputs/.gitkeep +0 -0
  42. perfectrag/templates/lightrag-stack/mcp.yaml.jinja +4 -0
  43. perfectrag/templates/lightrag-stack/rag_storage/.gitkeep +0 -0
  44. perfectrag/templates/lightrag-stack/skills/.gitkeep +0 -0
  45. perfectrag/templates/ragflow-stack/.env.jinja +32 -0
  46. perfectrag/templates/ragflow-stack/.gitignore.jinja +2 -0
  47. perfectrag/templates/ragflow-stack/README.md.jinja +52 -0
  48. perfectrag/templates/ragflow-stack/__init__.py +0 -0
  49. perfectrag/templates/ragflow-stack/copier.yml +11 -0
  50. perfectrag/templates/ragflow-stack/docker-compose.yml.jinja +90 -0
  51. perfectrag/templates/ragflow-stack/mcp.yaml.jinja +6 -0
  52. perfectrag/templates/ragflow-stack/skills/.gitkeep +0 -0
  53. perfectrag/wizard.py +77 -0
  54. perfectrag-0.1.0.dist-info/METADATA +196 -0
  55. perfectrag-0.1.0.dist-info/RECORD +57 -0
  56. perfectrag-0.1.0.dist-info/WHEEL +4 -0
  57. perfectrag-0.1.0.dist-info/entry_points.txt +2 -0
perfectrag/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """perfectRAG — dynamic RAG framework scaffolder."""
2
+
3
+ __version__ = "0.1.0"
perfectrag/cli.py ADDED
@@ -0,0 +1,196 @@
1
+ """perfectrag CLI — entrypoint wired via pyproject [project.scripts]."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import typer
9
+ import yaml
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.table import Table
13
+
14
+ from perfectrag import hardware, recipes, scaffolder, wizard
15
+ from perfectrag.mcp_registry import REGISTRY, add_mcp_to_project
16
+ from perfectrag.skills import add_skill_to_project, list_bundled_skills
17
+
18
+ # Force UTF-8 on Windows legacy consoles (cp1252 can't encode many non-ASCII chars)
19
+ for stream in (sys.stdout, sys.stderr):
20
+ if hasattr(stream, "reconfigure"):
21
+ try:
22
+ stream.reconfigure(encoding="utf-8", errors="replace")
23
+ except (AttributeError, OSError):
24
+ pass
25
+
26
+ app = typer.Typer(
27
+ help="perfectRAG - dynamic RAG framework scaffolder",
28
+ no_args_is_help=True,
29
+ add_completion=False,
30
+ )
31
+ console = Console(legacy_windows=False)
32
+
33
+
34
+ @app.command()
35
+ def init(
36
+ project_dir: Path = typer.Argument(Path("./my-rag"), help="Thư mục project sẽ sinh"),
37
+ answers_file: Path | None = typer.Option(
38
+ None, "--answers-file", "-a",
39
+ help="YAML chứa answers (bỏ qua wizard interactive, dùng cho CI/test)",
40
+ ),
41
+ template: str | None = typer.Option(
42
+ None, "--template", "-t",
43
+ help="Override template gợi ý (custom-naive-rag, ragflow-stack, lightrag-stack, dify-stack)",
44
+ ),
45
+ force: bool = typer.Option(False, "--force", help="Ghi đè nếu project_dir đã có"),
46
+ dry_run: bool = typer.Option(False, "--dry-run", help="Preview recipe, không scaffold"),
47
+ ) -> None:
48
+ """Chạy wizard → scaffold project RAG hoàn chỉnh."""
49
+ hw = hardware.detect()
50
+ _show_hardware(hw)
51
+
52
+ if answers_file is not None:
53
+ raw = yaml.safe_load(answers_file.read_text(encoding="utf-8"))
54
+ answers = recipes.Answers(**raw)
55
+ else:
56
+ answers = wizard.run_wizard()
57
+
58
+ recipe = recipes.recommend(answers, hw)
59
+ if template is not None:
60
+ if template not in scaffolder.available_templates():
61
+ console.print(f"[red]Template '{template}' không tồn tại. Có sẵn: "
62
+ f"{', '.join(scaffolder.available_templates().keys())}[/red]")
63
+ raise typer.Exit(1)
64
+ recipe.template = template
65
+ recipe.notes.append(f"Template được override bằng --template={template}")
66
+ _show_recipe(recipe)
67
+
68
+ if dry_run:
69
+ console.print("[yellow]--dry-run, không scaffold.[/yellow]")
70
+ raise typer.Exit(0)
71
+
72
+ if project_dir.exists() and any(project_dir.iterdir()) and not force:
73
+ console.print(f"[red]Thư mục {project_dir} đã tồn tại và không rỗng. Dùng --force để ghi đè.[/red]")
74
+ raise typer.Exit(1)
75
+
76
+ scaffolder.render(recipe, hw, answers, project_dir, force=force)
77
+ console.print(
78
+ Panel.fit(
79
+ f"[green]Done![/green]\n\n"
80
+ f"cd {project_dir}\n"
81
+ f"docker compose up -d\n\n"
82
+ f"Edit [cyan]mcp.yaml[/cyan] to add tools, [cyan]skills/[/cyan] to add skills.",
83
+ title="Next steps",
84
+ )
85
+ )
86
+
87
+
88
+ @app.command("add")
89
+ def add_cmd(
90
+ kind: str = typer.Argument(..., help="'mcp' hoặc 'skill'"),
91
+ name: str = typer.Argument(..., help="Tên MCP server / skill"),
92
+ project_dir: Path = typer.Option(Path("."), "--project", "-p", help="Project dir"),
93
+ ) -> None:
94
+ """Add MCP server hoặc skill vào project đã sinh."""
95
+ if kind == "mcp":
96
+ add_mcp_to_project(name, project_dir)
97
+ console.print(f"[green]Added MCP '{name}' vào {project_dir}/mcp.yaml[/green]")
98
+ elif kind == "skill":
99
+ add_skill_to_project(name, project_dir)
100
+ console.print(f"[green]Added skill '{name}' vào {project_dir}/skills/[/green]")
101
+ else:
102
+ console.print(f"[red]Unknown kind: {kind}. Dùng 'mcp' hoặc 'skill'.[/red]")
103
+ raise typer.Exit(1)
104
+
105
+
106
+ @app.command("list")
107
+ def list_cmd(
108
+ what: str = typer.Argument(..., help="'templates', 'mcp', 'skills'"),
109
+ ) -> None:
110
+ """Liệt kê templates / MCP servers / skills có sẵn."""
111
+ if what == "templates":
112
+ _list_templates()
113
+ elif what == "mcp":
114
+ _list_mcp()
115
+ elif what == "skills":
116
+ _list_skills()
117
+ else:
118
+ console.print(f"[red]Unknown: {what}. Dùng 'templates', 'mcp' hoặc 'skills'.[/red]")
119
+ raise typer.Exit(1)
120
+
121
+
122
+ @app.command()
123
+ def hw() -> None:
124
+ """Chỉ detect + show hardware, không làm gì khác."""
125
+ _show_hardware(hardware.detect())
126
+
127
+
128
+ # --- helpers ---
129
+
130
+ def _show_hardware(hw: hardware.HardwareProfile) -> None:
131
+ table = Table(title="Detected hardware", show_header=False)
132
+ table.add_column("Field", style="cyan")
133
+ table.add_column("Value")
134
+ table.add_row("OS / arch", f"{hw.os} ({hw.arch})")
135
+ table.add_row("CPU", f"{hw.cpu_model} — {hw.cpu_cores} cores")
136
+ table.add_row("RAM", f"{hw.ram_gb} GB")
137
+ table.add_row("Disk free", f"{hw.disk_free_gb} GB")
138
+ table.add_row("GPU", f"{hw.gpu_vendor} / {hw.gpu_name or '—'}")
139
+ table.add_row("VRAM", f"{hw.vram_gb} GB")
140
+ if hw.cuda_version:
141
+ table.add_row("CUDA", hw.cuda_version)
142
+ table.add_row("Tier", f"[bold]{hw.tier}[/bold]")
143
+ console.print(table)
144
+
145
+
146
+ def _show_recipe(recipe: recipes.Recipe) -> None:
147
+ table = Table(title=f"Recommended recipe → template: [bold]{recipe.template}[/bold]")
148
+ table.add_column("Component", style="cyan")
149
+ table.add_column("Choice")
150
+ table.add_row("LLM", f"{recipe.llm_model} (via {recipe.llm_runtime})")
151
+ table.add_row("Embedding", recipe.embedding_model)
152
+ table.add_row("Reranker", recipe.reranker or "—")
153
+ table.add_row("Vector DB", recipe.vector_db)
154
+ table.add_row("Doc parser", recipe.doc_parser)
155
+ table.add_row("Chunk", f"{recipe.chunk_strategy} / {recipe.chunk_size} tokens")
156
+ table.add_row("GPU enabled", "yes" if recipe.gpu_enabled else "no")
157
+ table.add_row("VRAM cap", f"{recipe.vram_cap_gb} GB")
158
+ for k, v in recipe.extras.items():
159
+ table.add_row(f"extras.{k}", str(v))
160
+ console.print(table)
161
+ for note in recipe.notes:
162
+ console.print(f"[yellow]![/yellow] {note}")
163
+
164
+
165
+ def _list_templates() -> None:
166
+ from perfectrag.scaffolder import available_templates
167
+
168
+ table = Table(title="Templates bundled")
169
+ table.add_column("Name", style="cyan")
170
+ table.add_column("Description")
171
+ for name, desc in available_templates().items():
172
+ table.add_row(name, desc)
173
+ console.print(table)
174
+
175
+
176
+ def _list_mcp() -> None:
177
+ table = Table(title="MCP registry")
178
+ table.add_column("Name", style="cyan")
179
+ table.add_column("Description")
180
+ table.add_column("Requires env")
181
+ for name, info in REGISTRY.items():
182
+ table.add_row(name, info["description"], ", ".join(info.get("env", [])) or "—")
183
+ console.print(table)
184
+
185
+
186
+ def _list_skills() -> None:
187
+ table = Table(title="Bundled skills")
188
+ table.add_column("Name", style="cyan")
189
+ table.add_column("Description")
190
+ for name, desc in list_bundled_skills().items():
191
+ table.add_row(name, desc)
192
+ console.print(table)
193
+
194
+
195
+ if __name__ == "__main__":
196
+ app()
perfectrag/hardware.py ADDED
@@ -0,0 +1,203 @@
1
+ """Cross-platform hardware detection for recipe selection.
2
+
3
+ Detects CPU, RAM, disk, and GPU (NVIDIA / AMD ROCm / Apple Silicon / none) so the
4
+ recipe engine can pick an appropriate stack tier.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import platform
11
+ import shutil
12
+ import subprocess
13
+ from dataclasses import asdict, dataclass
14
+ from typing import Literal
15
+
16
+ import psutil
17
+
18
+ GpuVendor = Literal["nvidia", "amd", "apple", "none"]
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class HardwareProfile:
23
+ os: str
24
+ arch: str
25
+ cpu_model: str
26
+ cpu_cores: int
27
+ ram_gb: int
28
+ disk_free_gb: int
29
+ gpu_vendor: GpuVendor
30
+ gpu_name: str | None
31
+ vram_gb: int
32
+ cuda_version: str | None
33
+
34
+ @property
35
+ def tier(self) -> str:
36
+ """Coarse tier used by the recipe engine."""
37
+ if self.gpu_vendor == "none":
38
+ return "cpu"
39
+ if self.gpu_vendor == "apple":
40
+ # Apple Silicon uses unified memory; treat RAM as VRAM budget.
41
+ if self.ram_gb >= 24:
42
+ return "apple-high"
43
+ return "apple-low"
44
+ if self.vram_gb >= 24:
45
+ return "gpu-24gb"
46
+ if self.vram_gb >= 12:
47
+ return "gpu-12gb"
48
+ if self.vram_gb >= 6:
49
+ return "gpu-8gb"
50
+ return "cpu"
51
+
52
+ def as_dict(self) -> dict:
53
+ return asdict(self)
54
+
55
+
56
+ def _run(cmd: list[str], timeout: int = 5) -> str | None:
57
+ try:
58
+ out = subprocess.run(
59
+ cmd, capture_output=True, text=True, timeout=timeout, check=False
60
+ )
61
+ if out.returncode == 0:
62
+ return out.stdout
63
+ except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
64
+ return None
65
+ return None
66
+
67
+
68
+ def _detect_cpu_model() -> str:
69
+ try:
70
+ from cpuinfo import get_cpu_info
71
+
72
+ return get_cpu_info().get("brand_raw", platform.processor() or "unknown")
73
+ except Exception:
74
+ return platform.processor() or "unknown"
75
+
76
+
77
+ def _detect_nvidia() -> tuple[str, int, str | None] | None:
78
+ """Return (gpu_name, vram_gb, cuda_version) via NVML, else None."""
79
+ try:
80
+ import pynvml
81
+
82
+ pynvml.nvmlInit()
83
+ try:
84
+ count = pynvml.nvmlDeviceGetCount()
85
+ if count == 0:
86
+ return None
87
+ handle = pynvml.nvmlDeviceGetHandleByIndex(0)
88
+ name = pynvml.nvmlDeviceGetName(handle)
89
+ if isinstance(name, bytes):
90
+ name = name.decode()
91
+ mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
92
+ vram_gb = int(mem.total / (1024**3))
93
+ try:
94
+ cuda = pynvml.nvmlSystemGetCudaDriverVersion_v2()
95
+ cuda_str = f"{cuda // 1000}.{(cuda % 1000) // 10}"
96
+ except Exception:
97
+ cuda_str = None
98
+ return name, vram_gb, cuda_str
99
+ finally:
100
+ try:
101
+ pynvml.nvmlShutdown()
102
+ except Exception:
103
+ pass
104
+ except Exception:
105
+ # Fallback: parse nvidia-smi
106
+ smi = _run(["nvidia-smi", "--query-gpu=name,memory.total,driver_version",
107
+ "--format=csv,noheader,nounits"])
108
+ if not smi:
109
+ return None
110
+ line = smi.strip().splitlines()[0]
111
+ parts = [p.strip() for p in line.split(",")]
112
+ if len(parts) < 2:
113
+ return None
114
+ name = parts[0]
115
+ try:
116
+ vram_mb = int(parts[1])
117
+ vram_gb = vram_mb // 1024
118
+ except ValueError:
119
+ vram_gb = 0
120
+ return name, vram_gb, None
121
+
122
+
123
+ def _detect_amd() -> tuple[str, int] | None:
124
+ """Return (gpu_name, vram_gb) via rocm-smi, else None."""
125
+ if not shutil.which("rocm-smi"):
126
+ return None
127
+ out = _run(["rocm-smi", "--showproductname", "--showmeminfo", "vram", "--json"])
128
+ if not out:
129
+ return None
130
+ try:
131
+ data = json.loads(out)
132
+ except json.JSONDecodeError:
133
+ return None
134
+ for _card_key, info in data.items():
135
+ if not isinstance(info, dict):
136
+ continue
137
+ name = info.get("Card series") or info.get("Card model") or "AMD GPU"
138
+ vram_bytes_str = info.get("VRAM Total Memory (B)")
139
+ try:
140
+ vram_gb = int(vram_bytes_str) // (1024**3) if vram_bytes_str else 0
141
+ except (TypeError, ValueError):
142
+ vram_gb = 0
143
+ return str(name), vram_gb
144
+ return None
145
+
146
+
147
+ def _detect_apple() -> tuple[str, int] | None:
148
+ """Return (chip_name, unified_memory_gb) on Apple Silicon macOS, else None."""
149
+ if platform.system() != "Darwin":
150
+ return None
151
+ if platform.machine() not in ("arm64", "aarch64"):
152
+ return None
153
+ chip = _run(["sysctl", "-n", "machdep.cpu.brand_string"])
154
+ mem = _run(["sysctl", "-n", "hw.memsize"])
155
+ chip_name = (chip or "Apple Silicon").strip()
156
+ try:
157
+ ram_gb = int(mem.strip()) // (1024**3) if mem else 0
158
+ except ValueError:
159
+ ram_gb = 0
160
+ return chip_name, ram_gb
161
+
162
+
163
+ def detect() -> HardwareProfile:
164
+ """Detect the current machine's hardware profile."""
165
+ os_name = platform.system()
166
+ arch = platform.machine()
167
+ cpu_cores = psutil.cpu_count(logical=False) or psutil.cpu_count() or 1
168
+ ram_gb = int(psutil.virtual_memory().total / (1024**3))
169
+ disk_free_gb = int(psutil.disk_usage(".").free / (1024**3))
170
+ cpu_model = _detect_cpu_model()
171
+
172
+ gpu_vendor: GpuVendor = "none"
173
+ gpu_name: str | None = None
174
+ vram_gb = 0
175
+ cuda_version: str | None = None
176
+
177
+ nv = _detect_nvidia()
178
+ if nv is not None:
179
+ gpu_vendor = "nvidia"
180
+ gpu_name, vram_gb, cuda_version = nv
181
+ else:
182
+ amd = _detect_amd()
183
+ if amd is not None:
184
+ gpu_vendor = "amd"
185
+ gpu_name, vram_gb = amd
186
+ else:
187
+ apple = _detect_apple()
188
+ if apple is not None:
189
+ gpu_vendor = "apple"
190
+ gpu_name, vram_gb = apple # unified memory
191
+
192
+ return HardwareProfile(
193
+ os=os_name,
194
+ arch=arch,
195
+ cpu_model=cpu_model,
196
+ cpu_cores=cpu_cores,
197
+ ram_gb=ram_gb,
198
+ disk_free_gb=disk_free_gb,
199
+ gpu_vendor=gpu_vendor,
200
+ gpu_name=gpu_name,
201
+ vram_gb=vram_gb,
202
+ cuda_version=cuda_version,
203
+ )
@@ -0,0 +1,101 @@
1
+ """Registry of well-known MCP servers + helper to splice them into a project's mcp.yaml.
2
+
3
+ mcp.yaml format mirrors the Claude Code / Cursor / Claude Desktop MCP config so that
4
+ configs are portable: each server has `command`, `args`, optional `env`, and transport.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ import yaml
12
+
13
+ REGISTRY: dict[str, dict] = {
14
+ "filesystem": {
15
+ "description": "Đọc/ghi file trong thư mục được whitelist (read-only by default)",
16
+ "command": "npx",
17
+ "args": ["-y", "@modelcontextprotocol/server-filesystem", "${PWD}/data"],
18
+ "env": [],
19
+ },
20
+ "fetch": {
21
+ "description": "Fetch URL và convert sang markdown cho LLM",
22
+ "command": "uvx",
23
+ "args": ["mcp-server-fetch"],
24
+ "env": [],
25
+ },
26
+ "tavily": {
27
+ "description": "Tavily web search (cần TAVILY_API_KEY)",
28
+ "command": "npx",
29
+ "args": ["-y", "mcp-tavily"],
30
+ "env": ["TAVILY_API_KEY"],
31
+ },
32
+ "brave-search": {
33
+ "description": "Brave web search (cần BRAVE_API_KEY)",
34
+ "command": "npx",
35
+ "args": ["-y", "@modelcontextprotocol/server-brave-search"],
36
+ "env": ["BRAVE_API_KEY"],
37
+ },
38
+ "postgres": {
39
+ "description": "Query Postgres read-only (cần POSTGRES_URL)",
40
+ "command": "npx",
41
+ "args": ["-y", "@modelcontextprotocol/server-postgres", "${POSTGRES_URL}"],
42
+ "env": ["POSTGRES_URL"],
43
+ },
44
+ "sqlite": {
45
+ "description": "Query SQLite database local",
46
+ "command": "uvx",
47
+ "args": ["mcp-server-sqlite", "--db-path", "${PWD}/data/app.db"],
48
+ "env": [],
49
+ },
50
+ "github": {
51
+ "description": "GitHub repos / issues / PRs (cần GITHUB_PERSONAL_ACCESS_TOKEN)",
52
+ "command": "npx",
53
+ "args": ["-y", "@modelcontextprotocol/server-github"],
54
+ "env": ["GITHUB_PERSONAL_ACCESS_TOKEN"],
55
+ },
56
+ "memory": {
57
+ "description": "Persistent knowledge-graph memory cho agent",
58
+ "command": "npx",
59
+ "args": ["-y", "@modelcontextprotocol/server-memory"],
60
+ "env": [],
61
+ },
62
+ "sequential-thinking": {
63
+ "description": "Chain-of-thought structured reasoning tool",
64
+ "command": "npx",
65
+ "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"],
66
+ "env": [],
67
+ },
68
+ "qdrant": {
69
+ "description": "Qdrant vector-db MCP server (read/write vectors)",
70
+ "command": "uvx",
71
+ "args": ["mcp-server-qdrant", "--qdrant-url", "${QDRANT_URL:-http://localhost:6333}"],
72
+ "env": [],
73
+ },
74
+ }
75
+
76
+
77
+ def _mcp_path(project_dir: Path) -> Path:
78
+ return project_dir / "mcp.yaml"
79
+
80
+
81
+ def _load(path: Path) -> dict:
82
+ if not path.exists():
83
+ return {"servers": {}}
84
+ data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
85
+ data.setdefault("servers", {})
86
+ return data
87
+
88
+
89
+ def add_mcp_to_project(name: str, project_dir: Path) -> None:
90
+ if name not in REGISTRY:
91
+ raise KeyError(f"Không biết MCP '{name}'. Chạy `perfectrag list mcp` để xem.")
92
+ info = REGISTRY[name]
93
+ path = _mcp_path(project_dir)
94
+ data = _load(path)
95
+ data["servers"][name] = {
96
+ "command": info["command"],
97
+ "args": info["args"],
98
+ **({"env": {k: f"${{{k}}}" for k in info["env"]}} if info["env"] else {}),
99
+ }
100
+ path.parent.mkdir(parents=True, exist_ok=True)
101
+ path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
perfectrag/recipes.py ADDED
@@ -0,0 +1,187 @@
1
+ """Decision matrix: (user answers + hardware) -> concrete techstack recipe.
2
+
3
+ The mapping is opinionated but documented. Users can override any field in the
4
+ wizard's final confirmation step.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Literal
11
+
12
+ from perfectrag.hardware import HardwareProfile
13
+
14
+ UseCase = Literal["qa_docs", "graphrag", "multimodal", "code_rag", "agent_workflow"]
15
+ Privacy = Literal["fully_local", "hybrid_api"]
16
+ CorpusSize = Literal["small", "medium", "large"] # <10k, 10k-1M, >1M
17
+ UserScale = Literal["solo", "team", "production"]
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class Answers:
22
+ use_case: UseCase
23
+ modality: list[str] # ["text", "tables", "images", "code"]
24
+ privacy: Privacy
25
+ multi_hop: bool
26
+ corpus_size: CorpusSize
27
+ user_scale: UserScale
28
+
29
+
30
+ @dataclass
31
+ class Recipe:
32
+ template: str
33
+ llm_model: str
34
+ llm_runtime: Literal["ollama", "vllm", "llamacpp"]
35
+ embedding_model: str
36
+ reranker: str | None
37
+ vector_db: Literal["qdrant", "milvus", "chroma", "lancedb", "pgvector"]
38
+ doc_parser: Literal["docling", "markitdown", "unstructured", "llamaparse"]
39
+ chunk_strategy: Literal["recursive", "semantic", "late"]
40
+ chunk_size: int
41
+ gpu_enabled: bool
42
+ vram_cap_gb: int
43
+ notes: list[str] = field(default_factory=list)
44
+ extras: dict = field(default_factory=dict)
45
+
46
+ def as_template_vars(self, hw: HardwareProfile, answers: Answers) -> dict:
47
+ """Flatten to vars Copier/Jinja can consume."""
48
+ return {
49
+ "recipe": {
50
+ "template": self.template,
51
+ "llm_model": self.llm_model,
52
+ "llm_runtime": self.llm_runtime,
53
+ "embedding_model": self.embedding_model,
54
+ "reranker": self.reranker,
55
+ "vector_db": self.vector_db,
56
+ "doc_parser": self.doc_parser,
57
+ "chunk_strategy": self.chunk_strategy,
58
+ "chunk_size": self.chunk_size,
59
+ "gpu_enabled": self.gpu_enabled,
60
+ "vram_cap_gb": self.vram_cap_gb,
61
+ "extras": self.extras,
62
+ },
63
+ "hw": hw.as_dict(),
64
+ "answers": {
65
+ "use_case": answers.use_case,
66
+ "modality": answers.modality,
67
+ "privacy": answers.privacy,
68
+ "multi_hop": answers.multi_hop,
69
+ "corpus_size": answers.corpus_size,
70
+ "user_scale": answers.user_scale,
71
+ },
72
+ }
73
+
74
+
75
+ # --- Model tables by hardware tier ---
76
+
77
+ _LLM_BY_TIER = {
78
+ "cpu": ("qwen2.5:3b-instruct-q4_K_M", "llamacpp"),
79
+ "apple-low": ("qwen2.5:7b-instruct-q4_K_M", "ollama"),
80
+ "apple-high": ("qwen2.5:14b-instruct-q4_K_M", "ollama"),
81
+ "gpu-8gb": ("qwen2.5:7b-instruct-q5_K_M", "ollama"),
82
+ "gpu-12gb": ("qwen2.5:14b-instruct-q4_K_M", "ollama"),
83
+ "gpu-24gb": ("qwen2.5:32b-instruct-q4_K_M", "vllm"),
84
+ }
85
+
86
+ _EMBED_BY_TIER = {
87
+ "cpu": "nomic-embed-text",
88
+ "apple-low": "BAAI/bge-m3",
89
+ "apple-high": "BAAI/bge-m3",
90
+ "gpu-8gb": "BAAI/bge-m3",
91
+ "gpu-12gb": "BAAI/bge-m3",
92
+ "gpu-24gb": "BAAI/bge-m3",
93
+ }
94
+
95
+ _VECTORDB_BY_CORPUS = {
96
+ "small": "chroma",
97
+ "medium": "qdrant",
98
+ "large": "milvus",
99
+ }
100
+
101
+ _PARSER_BY_MODALITY = [
102
+ ({"images", "tables"}, "docling"),
103
+ ({"tables"}, "docling"),
104
+ ({"images"}, "docling"),
105
+ (set(), "markitdown"),
106
+ ]
107
+
108
+
109
+ def _pick_parser(modality: list[str]) -> str:
110
+ mod = set(modality)
111
+ for needed, parser in _PARSER_BY_MODALITY:
112
+ if needed and needed.issubset(mod):
113
+ return parser
114
+ return "markitdown"
115
+
116
+
117
+ def _pick_template(answers: Answers, tier: str) -> tuple[str, list[str]]:
118
+ notes: list[str] = []
119
+
120
+ # Hard routing rules (in priority order)
121
+ if answers.use_case == "graphrag" or answers.multi_hop:
122
+ if tier in ("cpu", "apple-low"):
123
+ notes.append(
124
+ "GraphRAG yêu cầu LLM mạnh; hardware hiện tại có thể chạy chậm. "
125
+ "Cân nhắc upgrade hoặc giảm corpus."
126
+ )
127
+ return "lightrag-stack", notes
128
+
129
+ if answers.use_case == "agent_workflow":
130
+ return "dify-stack", notes
131
+
132
+ if answers.use_case == "multimodal":
133
+ if tier in ("cpu", "apple-low", "gpu-8gb"):
134
+ notes.append("Multimodal RAG cần VRAM cao; có thể giảm chất lượng hình ảnh.")
135
+ return "ragflow-stack", notes
136
+
137
+ if answers.use_case == "code_rag":
138
+ return "ragflow-stack", notes
139
+
140
+ # qa_docs default
141
+ if tier == "cpu":
142
+ return "custom-naive-rag", notes
143
+ return "ragflow-stack", notes
144
+
145
+
146
+ def recommend(answers: Answers, hw: HardwareProfile) -> Recipe:
147
+ tier = hw.tier
148
+ llm_model, llm_runtime = _LLM_BY_TIER[tier]
149
+ embed = _EMBED_BY_TIER[tier]
150
+ vector_db = _VECTORDB_BY_CORPUS[answers.corpus_size]
151
+ parser = _pick_parser(answers.modality)
152
+ template, notes = _pick_template(answers, tier)
153
+
154
+ # Production scale → prefer vLLM if GPU has room
155
+ if answers.user_scale == "production" and tier in ("gpu-12gb", "gpu-24gb"):
156
+ llm_runtime = "vllm"
157
+
158
+ # Naive template uses smaller vector db / no reranker for simplicity
159
+ if template == "custom-naive-rag":
160
+ vector_db = "qdrant" # bundled in template
161
+ reranker: str | None = None
162
+ else:
163
+ reranker = "jinaai/jina-reranker-v2-base-multilingual"
164
+
165
+ gpu_enabled = hw.gpu_vendor in ("nvidia", "amd", "apple")
166
+ vram_cap = hw.vram_gb if hw.gpu_vendor == "nvidia" else max(hw.ram_gb // 2, 4)
167
+
168
+ extras: dict = {
169
+ "enable_graphrag": answers.use_case == "graphrag" or answers.multi_hop,
170
+ "enable_hybrid_search": answers.use_case in ("qa_docs", "code_rag"),
171
+ }
172
+
173
+ return Recipe(
174
+ template=template,
175
+ llm_model=llm_model,
176
+ llm_runtime=llm_runtime,
177
+ embedding_model=embed,
178
+ reranker=reranker,
179
+ vector_db=vector_db,
180
+ doc_parser=parser,
181
+ chunk_strategy="recursive",
182
+ chunk_size=512,
183
+ gpu_enabled=gpu_enabled,
184
+ vram_cap_gb=vram_cap,
185
+ notes=notes,
186
+ extras=extras,
187
+ )