npm - ltcai - Versions diffs - 0.1.8 → 0.1.11 - Mend

ltcai 0.1.8 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +141 -289
package/docs/CHANGELOG.md +227 -0
package/docs/architecture.md +121 -0
package/docs/mcp-tools.md +116 -0
package/docs/privacy.md +74 -0
package/docs/public-deploy.md +137 -0
package/docs/security-model.md +121 -0
package/knowledge_graph.py +18 -5
package/ltcai_cli.py +2 -2
package/package.json +1 -1
package/server.py +1140 -280
package/skills/SKILL_TEMPLATE.md +61 -29
package/skills/code_review/SKILL.md +28 -0
package/skills/code_review/examples.md +59 -0
package/skills/code_review/risk.json +9 -0
package/skills/code_review/schema.json +65 -0
package/skills/data_analysis/SKILL.md +28 -0
package/skills/data_analysis/examples.md +62 -0
package/skills/data_analysis/risk.json +9 -0
package/skills/data_analysis/schema.json +61 -0
package/skills/file_edit/SKILL.md +33 -0
package/skills/file_edit/examples.md +45 -0
package/skills/file_edit/risk.json +9 -0
package/skills/file_edit/schema.json +60 -0
package/skills/summarize_document/SKILL.md +68 -0
package/skills/summarize_document/examples.md +65 -0
package/skills/summarize_document/risk.json +9 -0
package/skills/summarize_document/schema.json +71 -0
package/skills/web_search/SKILL.md +28 -0
package/skills/web_search/examples.md +61 -0
package/skills/web_search/risk.json +9 -0
package/skills/web_search/schema.json +62 -0
package/tests/integration/__pycache__/__init__.cpython-314.pyc +0 -0
package/tests/integration/__pycache__/test_api.cpython-314-pytest-9.0.3.pyc +0 -0
package/tests/unit/__pycache__/test_security.cpython-314-pytest-9.0.3.pyc +0 -0
package/tests/unit/__pycache__/test_tools.cpython-314-pytest-9.0.3.pyc +0 -0
package/tests/unit/test_security.py +125 -0
package/tests/unit/test_tools.py +194 -1
package/tools.py +264 -4

package/server.py CHANGED Viewed

@@ -29,7 +29,8 @@ try:
 except Exception as e:
     print(f"⚠️ MLX Metal context unavailable: {e}")
     mx = None
-from typing import AsyncIterator, Optional, List, Dict
+from enum import Enum
+from typing import AsyncIterator, Optional, List, Dict, TypedDict
 import uvicorn
 from fastapi import FastAPI, File, HTTPException, Request, Cookie, UploadFile
@@ -65,12 +66,14 @@ from tools import (
     read_document,
     deploy_project,
     desktop_bridge_status,
+    edit_file,
     ensure_agent_root,
     execute_tool,
     git_diff,
     git_log,
     git_show,
     git_status,
+    grep,
     inspect_html,
     knowledge_save,
     knowledge_search,
@@ -87,6 +90,8 @@ from tools import (
     read_file,
     run_command,
     search_files,
+    todo_read,
+    todo_write,
     workspace_tree,
     write_file,
 )
@@ -99,19 +104,15 @@ except Exception:
 from datetime import datetime
 def detect_language(text: str) -> str:
-    """Detect language: 'ko' (Korean), 'zh' (Chinese), or 'en' (English)."""
+    """Detect language: 'ko' (Korean) or 'en' (English)."""
     total = max(len(text), 1)
     ko = sum(1 for c in text if '가' <= c <= '힣')
-    zh = sum(1 for c in text if '一' <= c <= '鿿')
     if ko / total > 0.05:
         return "ko"
-    if zh / total > 0.05:
-        return "zh"
     return "en"
 _LANG_HINT = {
     "ko": "Respond in Korean (한국어로 답변하세요).",
-    "zh": "Respond in Chinese (用中文回答).",
     "en": "Respond in English.",
 }
@@ -217,17 +218,25 @@ def verify_password(password: str, hashed: str) -> bool:
         return False
 def verify_and_migrate_password(email: str, plain: str, stored: str, users: Dict) -> bool:
-    """평문 비밀번호를 투명하게 해시로 마이그레이션."""
+    """평문 비밀번호를 투명하게 해시로 마이그레이션. 마이그레이션 발생 시 audit log 남김."""
     if ":" in stored and len(stored) > 64:
         return verify_password(plain, stored)
     if plain == stored:
         users[email]["password"] = hash_password(plain)
         save_users(users)
+        try:
+            append_audit_event("password_migrated_from_plaintext", user_email=email)
+        except Exception as e:
+            logging.warning("audit log failed on password migration: %s", e)
+        logging.info("Migrated plaintext password to bcrypt hash for %s", email)
         return True
     return False
 # ── Session store (file-backed, survives restarts) ────────────────────────────
-_SESSION_TTL = 60 * 60 * 24 * 7  # 7 days
+# 24-hour TTL with sliding-window refresh — every authenticated request bumps
+# created_at, so an active user stays logged in while idle sessions auto-expire.
+_SESSION_TTL = 60 * 60 * 24  # 24 hours
+_SESSION_REFRESH_THRESHOLD = 60 * 15  # only persist if >15 min since last bump (write amplification guard)
 _sessions_lock = threading.Lock()
 def _sessions_file() -> Path:
@@ -239,15 +248,15 @@ def _load_sessions() -> Dict[str, tuple]:
         if f.exists():
             raw = json.loads(f.read_text())
             return {k: tuple(v) for k, v in raw.items()}
-    except Exception:
-        pass
+    except Exception as e:
+        logging.warning("_load_sessions failed (starting empty): %s", e)
     return {}
 def _persist_sessions(sessions: Dict[str, tuple]) -> None:
     try:
         _sessions_file().write_text(json.dumps({k: list(v) for k, v in sessions.items()}, ensure_ascii=False))
-    except Exception:
-        pass
+    except Exception as e:
+        logging.warning("_persist_sessions failed: %s", e)
 _sessions: Dict[str, tuple] = _load_sessions()
@@ -259,15 +268,21 @@ def create_session(email: str) -> str:
     return token
 def get_session_email(token: str) -> Optional[str]:
+    """Return email for a valid session, sliding the expiry forward on activity."""
+    now = time.time()
     with _sessions_lock:
         entry = _sessions.get(token)
         if entry is None:
             return None
         email, created_at = entry
-        if time.time() - created_at > _SESSION_TTL:
+        if now - created_at > _SESSION_TTL:
             _sessions.pop(token, None)
             _persist_sessions(_sessions)
             return None
+        # Sliding refresh: only update if the timestamp drifted enough to be worth a disk write
+        if now - created_at > _SESSION_REFRESH_THRESHOLD:
+            _sessions[token] = (email, now)
+            _persist_sessions(_sessions)
         return email
 def invalidate_session(token: str) -> None:
@@ -628,7 +643,8 @@ def load_vpc_config() -> Dict:
         with open(VPC_FILE, "r", encoding="utf-8") as f:
             stored = json.load(f)
         return {**DEFAULT_VPC_CONFIG, **stored}
-    except Exception:
+    except Exception as e:
+        logging.warning("load_vpc_config failed (using defaults): %s", e)
         return DEFAULT_VPC_CONFIG.copy()
 def save_vpc_config(config: Dict):
@@ -645,7 +661,8 @@ def load_mcp_installs() -> Dict:
         if "installed" not in data:
             data["installed"] = {}
         return data
-    except Exception:
+    except Exception as e:
+        logging.warning("load_mcp_installs failed: %s", e)
         return {"installed": {}, "updated_at": None}
 def save_mcp_installs(data: Dict):
@@ -1048,6 +1065,71 @@ def require_user(request: Request) -> str:
         raise HTTPException(status_code=401, detail="인증이 필요합니다.")
     return email or ""
+# ── Rate limiting ─────────────────────────────────────────────────────────────
+# Per-user token bucket. Disabled when LATTICEAI_RATE_LIMIT=0 (default: enabled).
+_RATE_LIMIT_ENABLED = os.getenv("LATTICEAI_RATE_LIMIT", "1") != "0"
+_rate_buckets: Dict[str, Dict[str, float]] = {}
+_rate_lock = threading.Lock()
+# (capacity, refill_per_second) per endpoint family
+_RATE_LIMITS = {
+    "chat":   (30, 0.5),   # 30 burst, 30/min sustained
+    "agent":  (10, 0.1),   # 10 burst, 6/min sustained (agent is expensive)
+    "upload": (20, 0.2),   # 20 burst, 12/min sustained
+}
+def enforce_rate_limit(email: str, bucket_key: str) -> None:
+    """Raise HTTP 429 if user exceeds the bucket. No-op when disabled or unauth'd."""
+    if not _RATE_LIMIT_ENABLED or not email:
+        return
+    cap, refill = _RATE_LIMITS.get(bucket_key, (60, 1.0))
+    key = f"{email}:{bucket_key}"
+    now = time.time()
+    with _rate_lock:
+        bucket = _rate_buckets.get(key)
+        if bucket is None:
+            _rate_buckets[key] = {"tokens": cap - 1, "ts": now}
+            return
+        elapsed = now - bucket["ts"]
+        bucket["tokens"] = min(cap, bucket["tokens"] + elapsed * refill)
+        bucket["ts"] = now
+        if bucket["tokens"] < 1:
+            retry_after = max(1, int((1 - bucket["tokens"]) / refill))
+            raise HTTPException(
+                status_code=429,
+                detail=f"Rate limit exceeded for {bucket_key}. Retry after {retry_after}s.",
+                headers={"Retry-After": str(retry_after)},
+            )
+        bucket["tokens"] -= 1
+# ── File magic-number validation ──────────────────────────────────────────────
+# Map of extension → list of byte-prefix signatures (any-match). Files without
+# distinctive magic (.txt, .md, .csv) skip the check.
+_FILE_MAGIC: Dict[str, List[bytes]] = {
+    ".pdf":  [b"%PDF-"],
+    ".docx": [b"PK\x03\x04"],
+    ".xlsx": [b"PK\x03\x04"],
+    ".pptx": [b"PK\x03\x04"],
+    ".zip":  [b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"],
+    ".png":  [b"\x89PNG\r\n\x1a\n"],
+    ".jpg":  [b"\xff\xd8\xff"],
+    ".jpeg": [b"\xff\xd8\xff"],
+    ".gif":  [b"GIF87a", b"GIF89a"],
+}
+def _bytes_match_extension(data: bytes, ext: str) -> bool:
+    """Return True if the file bytes match the claimed extension (or extension has no magic)."""
+    ext = (ext or "").lower()
+    signatures = _FILE_MAGIC.get(ext)
+    if not signatures:
+        return True  # text-like formats — no reliable magic
+    head = data[:16]
+    return any(head.startswith(sig) for sig in signatures)
 def require_admin(request: Request) -> tuple[str, Dict]:
     users = load_users()
     token = _extract_bearer_token(request)
@@ -1414,18 +1496,31 @@ async def unload_idle_models_loop() -> None:
         except Exception as e:
             logging.warning("Idle model unload failed: %s", e)
+def _spawn(coro, *, name: str):
+    """Fire-and-forget asyncio task that logs exceptions instead of swallowing them."""
+    task = asyncio.create_task(coro, name=name)
+    def _on_done(t: asyncio.Task) -> None:
+        if t.cancelled():
+            return
+        exc = t.exception()
+        if exc is not None:
+            logging.warning("background task '%s' failed: %s", name, exc)
+    task.add_done_callback(_on_done)
+    return task
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     try:
         print(f"🧭 Lattice AI mode: {APP_MODE}")
         if ENABLE_TELEGRAM:
             from telegram_bot import run_bot
-            asyncio.create_task(run_bot())
+            _spawn(run_bot(), name="telegram_bot")
             print("🚀 Telegram Bot Bridge activated!")
         else:
             print("⏭️ Telegram Bot Bridge disabled for this mode.")
-        asyncio.create_task(unload_idle_models_loop())
-        asyncio.create_task(autoload_default_model())
+        _spawn(unload_idle_models_loop(), name="unload_idle_models")
+        _spawn(autoload_default_model(), name="autoload_default_model")
     except Exception as e:
         print(f"⚠️ Startup sequence failed: {e}")
     try:
@@ -1491,7 +1586,7 @@ async def login(req: UserLogin):
         "is_admin": role == "admin",
         "token": token,
     })
-    response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=60 * 60 * 24 * 7)
+    response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=_SESSION_TTL)
     return response
 @app.get("/auth/sso/config")
@@ -1884,12 +1979,48 @@ class AgentRequest(BaseModel):
     message: str
     conversation_id: Optional[str] = None
     source: Optional[str] = None
-    max_steps: int = 6
+    max_steps: int = 25
     temperature: float = 0.1
     user_email: Optional[str] = None
     user_nickname: Optional[str] = None
+class AgentEvalRequest(BaseModel):
+    skill: str
+    case_id: Optional[str] = None
+class AgentState(str, Enum):
+    IDLE             = "IDLE"
+    PLANNING         = "PLANNING"
+    WAITING_APPROVAL = "WAITING_APPROVAL"
+    EXECUTING        = "EXECUTING"
+    VERIFYING        = "VERIFYING"
+    FAILED           = "FAILED"
+    ROLLBACK         = "ROLLBACK"
+    DONE             = "DONE"
+# Terminal states — the agent loop exits when reaching one of these
+AGENT_TERMINAL_STATES = frozenset({AgentState.DONE, AgentState.FAILED})
+class AgentRunContext:
+    """Mutable state carrier passed through all agent phases."""
+    __slots__ = ("state", "plan", "transcript", "retry_count",
+                 "state_history", "corrections", "final_message", "rollback_log")
+    def __init__(self) -> None:
+        self.state:         AgentState = AgentState.IDLE
+        self.plan:          dict       = {}
+        self.transcript:    list       = []
+        self.retry_count:   int        = 0
+        self.state_history: list       = []
+        self.corrections:   list       = []
+        self.final_message: str        = ""
+        self.rollback_log:  list       = []
 class ToolPathRequest(BaseModel):
     path: str = "."
@@ -1915,6 +2046,33 @@ class ToolSearchFilesRequest(BaseModel):
     max_results: int = 20
+class ToolReadFileRequest(BaseModel):
+    path: str
+    offset: int = 0
+    limit: int = 0
+    line_numbers: bool = True
+class ToolEditFileRequest(BaseModel):
+    path: str
+    old_string: str
+    new_string: str
+    replace_all: bool = False
+class ToolGrepRequest(BaseModel):
+    pattern: str
+    path: str = "."
+    glob: Optional[str] = None
+    max_results: int = 50
+    case_insensitive: bool = False
+    context_lines: int = 0
+class ToolTodoWriteRequest(BaseModel):
+    todos: List[Dict] = []
 class ToolWorkspaceTreeRequest(BaseModel):
     path: str = "."
     max_depth: int = 3
@@ -2349,11 +2507,28 @@ def install_engine(engine: str) -> Dict:
         "installed": engine_installed(engine),
     }
     if engine == "ollama" and completed.returncode == 0 and shutil.which("ollama"):
+        # Skip if already running to avoid orphan daemons.
+        already_up = False
         try:
-            subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-            result["daemon_started"] = True
+            probe = subprocess.run(["ollama", "list"], capture_output=True, timeout=2, check=False)
+            already_up = probe.returncode == 0
         except Exception:
-            result["daemon_started"] = False
+            already_up = False
+        if already_up:
+            result["daemon_started"] = "already_running"
+        else:
+            try:
+                # Detach so the daemon survives this request but doesn't become our zombie.
+                subprocess.Popen(
+                    ["ollama", "serve"],
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    start_new_session=True,
+                )
+                result["daemon_started"] = True
+            except Exception as e:
+                logging.warning("ollama serve spawn failed: %s", e)
+                result["daemon_started"] = False
     return result
 CLOUD_VERIFY_CACHE: Dict[str, Dict] = {}
@@ -2623,6 +2798,7 @@ async def unload_all_models(request: Request):
 @app.post("/chat")
 async def chat(req: ChatRequest, request: Request):
     current_user = require_user(request)
+    enforce_rate_limit(current_user, "chat")
     img_len = len(req.image_data) if req.image_data else 0
     print(
         f"🧪 /chat request: stream={req.stream} image_data_len={img_len} "
@@ -2966,130 +3142,446 @@ async def _stream_chat(req: ChatRequest, context: str = "", image_data: str = No
 # ── Local Computer Agent ──────────────────────────────────────────────────────
-AGENT_SYSTEM_PROMPT = """You are Lattice AI Agent, a local computer-use coding assistant.
-You have full access to the local filesystem via local_list / local_read / local_write tools.
-Use read_file / write_file for paths inside the agent workspace (relative paths).
-Use local_read / local_write for any absolute path on the system (e.g. ~/Downloads, ~/Desktop).
+# ── Agent Role Prompts (Planner / Executor / Critic / Memory Updater) ─────────
+_TOOL_CATALOG_BRIEF = """
+FILESYSTEM  : list_dir  workspace_tree  read_file  write_file  edit_file  grep  search_files  inspect_html  preview_url
+PLANNING    : todo_read  todo_write
+PROJECT     : run_command  build_project  deploy_project  create_web_project
+GIT (read)  : git_status  git_diff  git_log  git_show
+LOCAL FS    : local_list  local_read  local_write  read_document
+DOCS        : create_docx  create_xlsx  create_pptx  create_pdf
+KNOWLEDGE   : knowledge_save  knowledge_search  knowledge_tree
+COMPUTER    : computer_screenshot  computer_open_app  computer_open_url  computer_click  computer_type  computer_key
+MISC        : network_status  clear_history  final
+"""
-Available actions:
-- list_dir: {"action":"list_dir","args":{"path":"."}}
-- workspace_tree: {"action":"workspace_tree","args":{"path":".","max_depth":3}}
-- read_file: {"action":"read_file","args":{"path":"relative/path.txt"}}
-- write_file: {"action":"write_file","args":{"path":"relative/path.txt","content":"complete file content"}}
-- search_files: {"action":"search_files","args":{"query":"text","path":".","max_results":20}}
-- clear_history: {"action":"clear_history","args":{"keep_last":0}}
-- inspect_html: {"action":"inspect_html","args":{"path":"index.html"}}
-- preview_url: {"action":"preview_url","args":{"path":"index.html"}}
-- create_docx: {"action":"create_docx","args":{"title":"title","body":"paragraphs","filename":"document.docx"}}
-- create_xlsx: {"action":"create_xlsx","args":{"rows":[["A","B"],[1,2]],"filename":"spreadsheet.xlsx","sheet_name":"Sheet1"}}
-- create_pptx: {"action":"create_pptx","args":{"title":"title","slides":[{"title":"Slide","bullets":["point"]}],"filename":"presentation.pptx"}}
-- create_pdf: {"action":"create_pdf","args":{"title":"title","body":"paragraphs","filename":"document.pdf"}}
-- create_web_project: {"action":"create_web_project","args":{"path":"my_app","framework":"react","template":"vite"}} — scaffold a runnable web app project
-- local_list: {"action":"local_list","args":{"path":"/Users/username/Downloads"}} — lists any local folder (UI will request user permission first)
-- local_read: {"action":"local_read","args":{"path":"/Users/username/Documents/note.txt"}} — reads any local file (UI will request user permission first)
-- local_write: {"action":"local_write","args":{"path":"/Users/username/Desktop/output.txt","content":"..."}} — writes any local file (UI will request user permission first)
-- read_document: {"action":"read_document","args":{"path":"/absolute/path/to/file.pdf"}} — extract text from PDF, DOCX, XLSX, PPTX, TXT, MD, CSV
-- computer_screenshot: {"action":"computer_screenshot","args":{}} — capture current screen as base64 PNG
-- computer_open_app: {"action":"computer_open_app","args":{"app":"Google Chrome"}} — open or focus a Mac app
-- computer_open_url: {"action":"computer_open_url","args":{"url":"https://example.com","app":"Google Chrome"}} — open URL in app
-- computer_click: {"action":"computer_click","args":{"x":500,"y":300,"button":"left","double":false}}
-- computer_type: {"action":"computer_type","args":{"text":"hello"}}
-- computer_key: {"action":"computer_key","args":{"key":"command+c"}} — e.g. return, escape, tab, command+v
-- computer_scroll: {"action":"computer_scroll","args":{"x":500,"y":300,"direction":"down","clicks":3}}
-- computer_move: {"action":"computer_move","args":{"x":500,"y":300}}
-- computer_drag: {"action":"computer_drag","args":{"x1":100,"y1":100,"x2":500,"y2":500}}
-- computer_status: {"action":"computer_status","args":{}} — check if Computer Use is available
-- chrome_status: {"action":"chrome_status","args":{}}
-- computer_use_status: {"action":"computer_use_status","args":{}}
-- knowledge_save: {"action":"knowledge_save","args":{"folder":"30_Projects","title":"short title","content":"note"}}
-- knowledge_search: {"action":"knowledge_search","args":{"query":"keyword","max_results":5}}
-- knowledge_tree: {"action":"knowledge_tree","args":{}}
-- obsidian_save: {"action":"obsidian_save","args":{"folder":"30_Projects","title":"short title","content":"note"}}
-- obsidian_search: {"action":"obsidian_search","args":{"query":"keyword","max_results":5}}
-- obsidian_tree: {"action":"obsidian_tree","args":{}}
-- git_status: {"action":"git_status","args":{}}
-- git_diff: {"action":"git_diff","args":{"path":"optional/relative/path"}}
-- git_log: {"action":"git_log","args":{"max_count":5}}
-- git_show: {"action":"git_show","args":{"revision":"HEAD"}}
-- network_status: {"action":"network_status","args":{}} — get current local/private IP, public IP, hostname, and Wi-Fi info
-- run_command: {"action":"run_command","args":{"command":"python3 app.py","cwd":"."}}
-- build_project: {"action":"build_project","args":{"cwd":".","script":"build"}}
-- deploy_project: {"action":"deploy_project","args":{"cwd":".","script":"deploy"}}
-- final: {"action":"final","message":"short Korean summary of what you did"}
+PLANNER_PROMPT = """You are the PLANNER role in Lattice AI's multi-role agent harness.
+Your ONLY job: analyze the request and produce a structured execution plan.
+You do NOT call tools or write code.
+Respond with exactly ONE JSON object (no markdown, no fences):
+{
+  "action": "plan",
+  "state": "PLANNING",
+  "goal": "one-sentence goal in the user's language",
+  "steps": [
+    {"id": 1, "description": "what this step does", "action": "expected_tool", "purpose": "why needed"}
+  ],
+  "requires_approval": true,
+  "rollback_strategy": "git",
+  "estimated_steps": 3
+}
+Rules:
+- requires_approval = true if ANY step uses write/exec tools (edit_file, write_file, run_command, etc.)
+- rollback_strategy = "git" if steps modify existing files; "none" otherwise
+- Keep steps realistic: 2-4 for simple tasks, up to 10 for complex ones
+- Do NOT specify full tool args — that is the Executor's job
+Available tools:""" + _TOOL_CATALOG_BRIEF
+EXECUTOR_PROMPT = """You are the EXECUTOR role in Lattice AI's multi-role agent harness.
+You have a plan from the Planner. Execute it step by step using exactly one tool per response.
+You think and act like a senior software engineer:
+- Read (read_file, grep) BEFORE editing — never guess at file contents
+- Prefer edit_file over write_file for existing files
+- Keep changes small and precise
+- Verify after changes with build_project or run_command
+Respond with exactly ONE JSON object per step:
+{"thoughts": "what you learned / why this next action", "action": "tool_name", "args": {...}}
+When the task is fully done AND a tool result in this run confirms it:
+{"thoughts": "verified", "action": "final", "message": "한국어로 무엇을 했고 어디서 검증했는지 요약"}
+ANTI-PATTERNS (will halt the loop):
+- Editing without reading first → read_file + grep BEFORE edit_file
+- Repeating the same action+args → check the transcript
+- Claiming done without a verification tool result in transcript
+- Hallucinating imports or file paths that were never confirmed by a tool result
+Available tools:""" + _TOOL_CATALOG_BRIEF
+CRITIC_PROMPT = """You are the CRITIC / REVIEWER role in Lattice AI's multi-role agent harness.
+Review the execution transcript and determine whether the goal was achieved.
+Respond with exactly ONE JSON object:
+{
+  "action": "verdict",
+  "state": "VERIFYING",
+  "verdict": "PASS",
+  "reason": "why you think it passed or failed (cite specific tool results)",
+  "corrections": [],
+  "confidence": 0.95,
+  "next_state": "DONE"
+}
+verdict: "PASS" | "FAIL"
+next_state:
+  "DONE"      — task succeeded; finish
+  "EXECUTING" — task failed but corrections can fix it (use corrections field for retry)
+  "ROLLBACK"  — task failed AND file changes should be undone
+Criteria for PASS: a tool result in the transcript explicitly confirms success.
+Be strict. Claiming done without evidence = FAIL."""
+MEMORY_UPDATER_PROMPT = """You are the MEMORY UPDATER role in Lattice AI's multi-role agent harness.
+After a completed task, extract reusable learnings.
+Respond with exactly ONE JSON object:
+{
+  "action": "memory",
+  "state": "DONE",
+  "learnings": ["one concise fact about this codebase or task"],
+  "artifacts": ["relative/path/to/created_or_modified_file"],
+  "save_to_knowledge": false
+}
 Rules:
-- Respond with exactly one JSON object. No markdown, no code fences, no extra text.
-- Use relative paths only.
-- Create complete files, not fragments.
-- Prefer simple, verifiable steps.
-- Use inspect_html and preview_url for generated web UI.
-- Use build_project when the user asks to build, compile, typecheck, or run a package build script.
-- Use deploy_project when the user asks to deploy, preview, release, or package installers (pkg/exe) and package.json defines that script (e.g. package, dist, make, build:pkg, build:exe).
-- If the user asks for app/service/web creation, prefer create_web_project first, then edit files with write_file/read_file and verify with build_project or run_command.
-- If the user asks for installer outputs (.pkg/.exe), set up packaging config (for example Electron/electron-builder or equivalent), create package scripts in package.json, then run deploy_project for installer scripts.
-- If .exe cannot be built on current OS/toolchain, still generate the full packaging config and scripts for Windows and report the exact missing prerequisite.
-- Do not claim you cannot build or deploy. If a script, token, or platform config is missing, inspect the workspace and explain the exact missing piece.
-- Use knowledge tools when the user asks to remember, search memory, or organize project context.
-- Use run_command for local inspection, tests, and short development commands after files are written.
-- For data analysis tasks, read the provided files first (read_document/local_read), compute with run_command when needed, and return concrete findings plus output artifact paths when created.
-- Use clear_history when the user asks to forget, clear, delete, reset, or speed up chat history.
-- Git is read-only: status, diff, log, and show only. Never commit, push, pull, fetch, clone, reset, or checkout.
-- If the user asks for something unsafe or outside the workspace, explain the limitation with final.
-- IMPORTANT: When user asks to create any document (docx, pdf, xlsx, pptx, word, excel, powerpoint, 문서, 파일, 엑셀, 파워포인트, PPT, 피피티), ALWAYS use the appropriate create_* action immediately with full, rich content. Never say you cannot create files.
+- max 5 learnings, one sentence each
+- save_to_knowledge = true only if learnings are genuinely useful across future sessions
+- artifacts = files the Executor actually created or modified (from transcript)
+"""
+# Keep backward-compat alias used by any existing callers
+AGENT_SYSTEM_PROMPT = EXECUTOR_PROMPT
+# Marker: the old monolithic prompt was replaced by 4-role prompts above.
+# Legacy variable kept so Telegram bot / VS Code extension still work.
+_ORIGINAL_MONOLITHIC_PROMPT_NOTE = """You are Lattice AI Agent — a local, professional-grade coding assistant.
+You have full access to a sandboxed workspace and (with user approval) the wider filesystem.
+You think and work like a senior software engineer, not like an autocompleter.
+================================================================================
+HOW A PROFESSIONAL DEVELOPER THINKS — your operating loop
+================================================================================
+Every multi-step task follows four phases. Skipping phases is the #1 cause of bad
+output. Do not skip them.
+1) DISCOVER (read first, then act)
+   - Map the territory before changing it. Use workspace_tree, list_dir, grep,
+     and read_file BEFORE writing or editing anything.
+   - When the user names a file/feature/function, locate it (grep) and read the
+     surrounding code BEFORE proposing a change.
+   - Read package.json, pyproject.toml, requirements.txt, tsconfig.json, and
+     other config files before assuming a library/version/tool is available.
+   - Never guess at APIs, imports, file paths, function signatures, or types.
+     If you don't know, look it up with grep/read_file. Hallucinated code is
+     the worst possible output.
+2) PLAN (write the plan down)
+   - For any task with 3+ distinct steps, call todo_write FIRST with a concrete
+     checklist (3–10 items). Keep exactly one item in_progress at a time.
+   - The plan should describe WHAT will change and HOW you'll verify it works,
+     not vague intentions ("look at code", "fix bugs"). Bad plans produce bad code.
+   - Update the todo list (todo_write again) as items complete or new ones emerge.
+3) IMPLEMENT (small, precise diffs)
+   - Prefer edit_file over write_file when modifying existing files. edit_file
+     requires exact byte-level old_string match — read the file first and copy
+     the surrounding context verbatim. This forces correctness.
+   - Use write_file only for brand-new files or when fully rewriting a file you
+     understand end-to-end.
+   - Keep diffs as small as the task requires. Don't refactor "while you're
+     there." Don't add abstractions for hypothetical future needs.
+   - Code quality:
+       * No new comments unless the WHY is non-obvious (a subtle invariant, a
+         workaround for a specific bug, behavior that would surprise a reader).
+         Never write comments that just restate what the code does.
+       * No backward-compat shims, no dead code, no unused imports/variables.
+       * No defensive try/except around code that can't fail. Trust internal
+         contracts; validate only at system boundaries (user input, network).
+       * Match the surrounding code's style (indent, quotes, naming).
+4) VERIFY (prove it works before claiming done)
+   - After code changes, RUN something that confirms correctness:
+       * build_project for build/typecheck/test scripts
+       * run_command for python/node scripts and tests
+       * inspect_html + preview_url for generated UI
+   - If verification fails, treat the failure as the new task. Diagnose root
+     cause; do not paper over it (no try/except shortcuts, no --no-verify, no
+     disabling tests). Re-enter Discover phase if needed.
+   - Never claim a task is "complete," "saved," "fixed," "working," or
+     "deployed" unless a tool result in this same agent run confirms it.
+================================================================================
+RESPONSE FORMAT (strict)
+================================================================================
+Respond with exactly ONE JSON object per step. No markdown, no code fences, no
+extra prose. Include a short `thoughts` field that records your current reasoning
+(what you just learned, what you'll do next, why). The user does not see it
+directly — it exists so you can plan across steps.
+  {"thoughts": "Need to read App.tsx before editing the import. Workspace tree
+   confirms only one App.tsx exists.",
+   "action": "read_file",
+   "args": {"path": "src/App.tsx"}}
+When the task is fully complete AND verified:
+  {"thoughts": "Build passed, file written, ready to summarize.",
+   "action": "final",
+   "message": "한국어로 간결하게 무엇을 만들었고 어디서 검증했는지 요약."}
+If you cannot proceed (missing tool, blocked path, ambiguous user intent), use
+`final` and clearly state the blocker and the smallest next step the user can
+take to unblock it. Do NOT loop on the same failing action.
+================================================================================
+TOOL CATALOG
+================================================================================
+Filesystem (workspace, relative paths):
+  list_dir        {"path":"."}
+  workspace_tree  {"path":".", "max_depth":3}
+  read_file       {"path":"src/App.tsx", "offset":0, "limit":0, "line_numbers":true}
+                  — returns numbered view + total_lines. Use offset/limit for big files.
+  write_file      {"path":"new_file.py", "content":"..."}   — new files / full rewrites
+  edit_file       {"path":"existing.py", "old_string":"exact text", "new_string":"new text",
+                   "replace_all":false}
+                  — preferred for existing files. old_string MUST appear once
+                    (unless replace_all=true). Include enough surrounding context
+                    to make it unique.
+  grep            {"pattern":"regex", "path":".", "glob":"*.py", "max_results":50,
+                   "case_insensitive":false, "context_lines":2}
+                  — regex search across the codebase. Use this before assuming a
+                    symbol exists.
+  search_files    {"query":"substring", "path":".", "max_results":20}   — legacy substring search
+  inspect_html    {"path":"index.html"}
+  preview_url     {"path":"index.html"}
+Planning:
+  todo_read       {}
+  todo_write      {"todos":[{"id":"1","content":"...","status":"pending"}]}
+                  — status ∈ pending|in_progress|completed.
+                    Use proactively for any task with 3+ steps.
+Project ops:
+  run_command     {"command":"python3 app.py", "cwd":"."}
+                  — allowed binaries: pwd ls find cat sed head tail wc rg python python3 node npm npx
+                  — git is NOT allowed here; use the git_* tools below (read-only).
+  build_project   {"cwd":".", "script":"build"}    — also: compile, typecheck, test
+  deploy_project  {"cwd":".", "script":"deploy"}   — also: preview, release, package, dist, make, build:pkg, build:exe
+  create_web_project {"path":"my_app", "framework":"react", "template":"vite"}
+Git (read-only):
+  git_status, git_diff, git_log, git_show
+  — Never commit/push/pull/fetch/clone/reset/checkout. Lattice agent does not author git history.
+Local filesystem (outside workspace; UI prompts user for approval):
+  local_list      {"path":"/Users/.../Downloads"}
+  local_read      {"path":"/abs/path/file.txt"}
+  local_write     {"path":"/abs/path/file.txt", "content":"..."}
+  read_document   {"path":"/abs/path/report.pdf"}   — PDF, DOCX, XLSX, PPTX, TXT, MD, CSV
+Document generation (written to workspace generated_* folders):
+  create_docx     {"title":"...", "body":"...", "filename":"doc.docx"}
+  create_xlsx     {"rows":[["A","B"],[1,2]], "filename":"sheet.xlsx", "sheet_name":"Sheet1"}
+  create_pptx     {"title":"...", "slides":[{"title":"...","bullets":["..."]}], "filename":"deck.pptx"}
+  create_pdf      {"title":"...", "body":"...", "filename":"doc.pdf"}
+Knowledge / memory (Obsidian-compatible Markdown vault):
+  knowledge_save  {"folder":"30_Projects", "title":"...", "content":"..."}
+  knowledge_search {"query":"...", "max_results":5}
+  knowledge_tree  {}
+  obsidian_save / obsidian_search / obsidian_tree  — same as knowledge_*, with vault URIs
+Computer use (macOS desktop control, requires Accessibility permission):
+  computer_screenshot, computer_open_app, computer_open_url, computer_click,
+  computer_type, computer_key, computer_scroll, computer_move, computer_drag,
+  computer_status, chrome_status, computer_use_status
+  — Use screenshot to ground state; click/type to interact. Verify with another screenshot.
+Misc:
+  network_status  {}
+  clear_history   {"keep_last":0}
+  final           {"message":"..."}
+================================================================================
+DOMAIN RULES (keep in mind)
+================================================================================
+- Frontend: don't assume Tailwind/framer-motion/TypeScript exist. Read
+  package.json first. If a dependency is missing, either add it explicitly to
+  package.json (and create the config files it needs) or pick a simpler stack
+  that already works.
+- Installers (.pkg/.exe): set up the packaging config (e.g. electron-builder)
+  with full scripts in package.json, then run deploy_project. If the current
+  OS/toolchain can't produce the artifact, still generate complete config and
+  state the exact missing prerequisite — do not say "I can't."
+- Data analysis: read the data files (read_document/local_read), compute with
+  run_command, report concrete findings plus output artifact paths.
+- Document requests (docx/xlsx/pptx/pdf, 문서/엑셀/PPT/피피티/파워포인트): call
+  the matching create_* action immediately with rich, complete content. Never
+  say you cannot create files.
+- Korean/English: answer in the language the user used; default to Korean
+  if mixed or ambiguous.
+================================================================================
+ANTI-PATTERNS (will be flagged by the orchestrator)
+================================================================================
+- Editing without reading first → use read_file + grep before edit_file.
+- Repeating the same action with the same args → the loop will halt you.
+- Claiming "done" without a verification tool result in the transcript.
+- Adding new dependencies without updating package.json / requirements.txt.
+- Producing fragments when the user asked for a complete file or runnable app.
+- Stuffing speculative features beyond the user's actual request.
+- Decorative placeholder URLs / fake data when real data is available.
 """
-_FILE_CREATE_ACTIONS = {"create_docx", "create_xlsx", "create_pptx", "create_pdf", "write_file", "create_web_project"}
+_FILE_CREATE_ACTIONS = {"create_docx", "create_xlsx", "create_pptx", "create_pdf", "write_file", "edit_file", "create_web_project"}
 # Harness risk level per tool action.
 # low    — read-only, no side effects
 # medium — write/create files or knowledge entries
 # high   — execute commands, control computer, write to arbitrary FS paths
-_TOOL_RISK: Dict[str, str] = {
-    # read-only workspace tools
-    "list_dir": "low", "workspace_tree": "low", "read_file": "low",
-    "search_files": "low", "inspect_html": "low",
-    # read-only local FS
-    "local_list": "low", "local_read": "low",
-    # read-only git
-    "git_status": "low", "git_log": "low", "git_diff": "low", "git_show": "low",
-    # read-only knowledge / computer
-    "knowledge_search": "low", "knowledge_tree": "low",
-    "obsidian_search": "low", "obsidian_tree": "low",
-    "computer_screenshot": "low", "computer_status": "low",
-    # write workspace
-    "write_file": "medium", "create_web_project": "medium",
-    "create_docx": "medium", "create_xlsx": "medium",
-    "create_pptx": "medium", "create_pdf": "medium",
-    # write knowledge
-    "knowledge_save": "medium", "obsidian_save": "medium",
-    # write local FS (arbitrary path — treated as medium; blocked from system roots below)
-    "local_write": "medium",
-    # preview
-    "preview_url": "medium",
-    # execute commands
-    "run_command": "high",
-    # computer control
-    "computer_click": "high", "computer_type": "high", "computer_key": "high",
-    "computer_scroll": "high", "computer_drag": "high", "computer_move": "high",
-    "computer_open_app": "high", "computer_open_url": "high",
+class ToolPolicy(TypedDict):
+    risk: str         # "read" | "write" | "exec" | "destructive"
+    destructive: bool # True = data loss possible, no auto-undo
+    shell: bool       # True = spawns a subprocess
+    network: bool     # True = makes external network calls
+    auto_approve: bool# True = agent may call without human confirmation
+    sandbox: str      # "workspace" | "home" | "system"
+    rollback: str     # "none" | "backup" | "git"
+_R = lambda s, sb="workspace", ro="none": ToolPolicy(risk="read",        destructive=False, shell=False, network=False, auto_approve=True,  sandbox=sb, rollback=ro)
+_RS = lambda s, sb="workspace", ro="none": ToolPolicy(risk="read",       destructive=False, shell=True,  network=False, auto_approve=True,  sandbox=sb, rollback=ro)
+_RN = lambda s, sb="system",    ro="none": ToolPolicy(risk="read",       destructive=False, shell=True,  network=True,  auto_approve=True,  sandbox=sb, rollback=ro)
+_W = lambda s, sb="workspace", ro="none": ToolPolicy(risk="write",       destructive=False, shell=False, network=False, auto_approve=False, sandbox=sb, rollback=ro)
+_E = lambda s, sb="workspace", ro="none": ToolPolicy(risk="exec",        destructive=False, shell=True,  network=False, auto_approve=False, sandbox=sb, rollback=ro)
+_EN = lambda s, sb="workspace", ro="none": ToolPolicy(risk="exec",       destructive=False, shell=True,  network=True,  auto_approve=False, sandbox=sb, rollback=ro)
+_EC = lambda s, sb="system",   ro="none": ToolPolicy(risk="exec",        destructive=False, shell=False, network=False, auto_approve=False, sandbox=sb, rollback=ro)
+_D = lambda s, sb="workspace", ro="none": ToolPolicy(risk="destructive", destructive=True,  shell=True,  network=False, auto_approve=False, sandbox=sb, rollback=ro)
+TOOL_GOVERNANCE: Dict[str, ToolPolicy] = {
+    # ── read-only / workspace ──────────────────────────────────────────────────
+    "list_dir":           _R("list_dir"),
+    "workspace_tree":     _R("workspace_tree"),
+    "read_file":          _R("read_file"),
+    "search_files":       _R("search_files"),
+    "grep":               _R("grep"),
+    "inspect_html":       _R("inspect_html"),
+    "todo_read":          _R("todo_read"),
+    # ── read-only / home FS ───────────────────────────────────────────────────
+    "local_list":         _R("local_list",  sb="home"),
+    "local_read":         _R("local_read",  sb="home"),
+    # ── read-only / git (spawns subprocess, read-only) ───────────────────────
+    "git_status":         _RS("git_status"),
+    "git_diff":           _RS("git_diff"),
+    "git_log":            _RS("git_log"),
+    "git_show":           _RS("git_show"),
+    # ── read-only / knowledge ─────────────────────────────────────────────────
+    "knowledge_search":   _R("knowledge_search", sb="home"),
+    "knowledge_tree":     _R("knowledge_tree",   sb="home"),
+    "obsidian_search":    _R("obsidian_search",  sb="home"),
+    "obsidian_tree":      _R("obsidian_tree",    sb="home"),
+    # ── read-only / system ────────────────────────────────────────────────────
+    "computer_screenshot":_R("computer_screenshot", sb="system"),
+    "computer_status":    _R("computer_status",     sb="system"),
+    "chrome_status":      _R("chrome_status",       sb="system"),
+    "computer_use_status":_R("computer_use_status", sb="system"),
+    "network_status":     _RN("network_status"),
+    # ── write / workspace ─────────────────────────────────────────────────────
+    "write_file":         _W("write_file",       ro="git"),
+    "edit_file":          _W("edit_file",        ro="git"),
+    "create_web_project": _W("create_web_project"),
+    "create_docx":        _W("create_docx"),
+    "create_xlsx":        _W("create_xlsx"),
+    "create_pptx":        _W("create_pptx"),
+    "create_pdf":         _W("create_pdf"),
+    "preview_url":        _W("preview_url"),
+    "todo_write":         _W("todo_write"),
+    # ── write / home FS ───────────────────────────────────────────────────────
+    "knowledge_save":     _W("knowledge_save",  sb="home"),
+    "obsidian_save":      _W("obsidian_save",   sb="home"),
+    "local_write":        _W("local_write",     sb="home"),
+    # ── exec / workspace ──────────────────────────────────────────────────────
+    "run_command":        _E("run_command"),
+    "build_project":      _E("build_project"),
+    # ── exec / network ────────────────────────────────────────────────────────
+    "deploy_project":     _EN("deploy_project"),
+    # ── exec / computer use (system-level input injection) ───────────────────
+    "computer_click":     _EC("computer_click"),
+    "computer_type":      _EC("computer_type"),
+    "computer_key":       _EC("computer_key"),
+    "computer_scroll":    _EC("computer_scroll"),
+    "computer_drag":      _EC("computer_drag"),
+    "computer_move":      _EC("computer_move"),
+    "computer_open_app":  _EC("computer_open_app"),
+    "computer_open_url":  ToolPolicy(risk="exec", destructive=False, shell=False, network=True,  auto_approve=False, sandbox="system",    rollback="none"),
 }
-# Paths that local_write must never target (system-level protection)
+_TOOL_GOVERNANCE_DEFAULT = ToolPolicy(
+    risk="write", destructive=False, shell=False, network=False,
+    auto_approve=False, sandbox="workspace", rollback="none",
+)
+# Paths that local_write / local_list must never target
 _LOCAL_WRITE_BLOCKED_PREFIXES = (
     "/etc/", "/usr/", "/bin/", "/sbin/", "/System/", "/private/etc/",
     "/Library/LaunchDaemons/", "/Library/LaunchAgents/",
 )
+# Backward-compat: map policy risk → legacy low/medium/high string
+_RISK_LEVEL_MAP = {"read": "low", "write": "medium", "exec": "high", "destructive": "high"}
-def _agent_risk(action_name: str, args: dict) -> str:
-    """Return risk level for an action, upgrading local_write to 'high' for system paths."""
-    risk = _TOOL_RISK.get(action_name, "medium")
+def _agent_policy(action_name: str, args: dict) -> ToolPolicy:
+    """Return the full governance policy for an action.
+    Upgrades local_write to destructive risk when targeting system paths.
+    """
+    policy = TOOL_GOVERNANCE.get(action_name, _TOOL_GOVERNANCE_DEFAULT)
     if action_name == "local_write":
         path = str(args.get("path", ""))
         if any(path.startswith(p) for p in _LOCAL_WRITE_BLOCKED_PREFIXES):
-            risk = "high"
-    return risk
+            policy = ToolPolicy(
+                risk="destructive", destructive=True, shell=False, network=False,
+                auto_approve=False, sandbox="system", rollback="none",
+            )
+    return policy
+def _agent_risk(action_name: str, args: dict) -> str:
+    """Return legacy low/medium/high risk string (kept for transcript backward-compat)."""
+    return _RISK_LEVEL_MAP.get(_agent_policy(action_name, args)["risk"], "medium")
+# ── Tool Permission Layer ─────────────────────────────────────────────────────
+# A compact, public-facing view of each tool's authorization profile, derived
+# from TOOL_GOVERNANCE. Designed for client UIs / approval dialogs that don't
+# need the full 7-dimensional governance object.
+#
+# Example:
+#   { "tool": "shell", "risk": "high", "requires_approval": true, "network": false }
+class ToolPermission(TypedDict):
+    tool: str
+    risk: str                 # "low" | "medium" | "high"
+    requires_approval: bool   # inverse of governance.auto_approve
+    network: bool             # tool makes external network calls
+def get_tool_permission(name: str, args: Optional[dict] = None) -> ToolPermission:
+    """Return the simplified permission view for a tool name.
+    `args` lets path-sensitive tools (e.g. local_write to /etc) escalate risk;
+    omit it for static catalog views.
+    """
+    policy = _agent_policy(name, args or {})
+    return ToolPermission(
+        tool=name,
+        risk=_RISK_LEVEL_MAP.get(policy["risk"], "medium"),
+        requires_approval=not policy["auto_approve"],
+        network=policy["network"],
+    )
+def list_tool_permissions() -> list:
+    """Return permission views for every governed tool, sorted by tool name."""
+    return [get_tool_permission(name) for name in sorted(TOOL_GOVERNANCE.keys())]
 def _collect_created_files(transcript: list) -> list:
@@ -3138,141 +3630,420 @@ def _extract_agent_action(raw: str) -> Dict:
     return action
-@app.post("/agent")
-async def agent(req: AgentRequest, request: Request):
-    """Natural-language local agent loop for Telegram and future clients."""
-    current_user = require_user(request)
-    if not router.current_model_id:
-        raise HTTPException(status_code=400, detail="No model loaded. Call /models/load first.")
+# ── Agent State Machine — Phase Functions ─────────────────────────────────────
-    ensure_agent_root()
-    transcript = []
-    max_steps = max(1, min(req.max_steps, 10))
-    lang = detect_language(req.message)
-    lang_hint = _LANG_HINT[lang]
+async def _phase_plan(
+    ctx: AgentRunContext, req: AgentRequest, router, lang_hint: str, current_user: str,
+) -> None:
+    """PLAN: Planner role produces a structured plan JSON."""
+    context = (
+        f"{PLANNER_PROMPT}\n\n"
+        f"[LANGUAGE HINT: {lang_hint}]\n"
+        f"Workspace root: {AGENT_ROOT}\n\n"
+        f"User request: {req.message}"
+    )
+    raw = await router.generate(
+        message="Produce a JSON execution plan for this request.",
+        context=context, max_tokens=1024, temperature=0.1,
+    )
+    try:
+        plan = _extract_agent_action(str(raw))
+    except ValueError:
+        plan = {
+            "action": "plan", "state": "PLAN",
+            "goal": req.message, "steps": [],
+            "requires_approval": False, "rollback_strategy": "none", "estimated_steps": 1,
+        }
+    ctx.plan = plan
+    ctx.transcript.append({
+        "state": AgentState.PLANNING.value,
+        "goal": plan.get("goal", req.message),
+        "steps": plan.get("steps", []),
+        "requires_approval": plan.get("requires_approval", False),
+        "rollback_strategy": plan.get("rollback_strategy", "none"),
+        "estimated_steps": plan.get("estimated_steps", 1),
+    })
+    ctx.state = AgentState.WAITING_APPROVAL
+def _phase_approval(ctx: AgentRunContext, current_user: str) -> None:
+    """APPROVAL: Check governance, log decision, auto-approve (future: UI prompt)."""
+    auto_approve_tools = {name for name, p in TOOL_GOVERNANCE.items() if p["auto_approve"]}
+    steps = ctx.plan.get("steps", [])
+    non_auto = [s.get("action") for s in steps if s.get("action") not in auto_approve_tools]
+    requires = ctx.plan.get("requires_approval", False) or bool(non_auto)
+    ctx.transcript.append({
+        "state": AgentState.WAITING_APPROVAL.value,
+        "requires_approval": requires,
+        "non_auto_approve_steps": non_auto,
+        "decision": "auto_approved",
+    })
+    append_audit_event(
+        "agent_approval", user_email=current_user,
+        requires_approval=requires, non_auto_steps=non_auto, decision="auto_approved",
+    )
+    ctx.state = AgentState.EXECUTING
+async def _phase_execute(
+    ctx: AgentRunContext, req: AgentRequest, router, lang_hint: str,
+    current_user: str, max_steps: int,
+) -> None:
+    """EXECUTE: Executor role calls tools one at a time until final or budget exhausted."""
+    exec_count = sum(1 for s in ctx.transcript if s.get("state") == AgentState.EXECUTING.value)
+    budget = max(1, max_steps - exec_count)
+    for _ in range(budget):
+        corrections_hint = (
+            "\n\nCritic corrections from previous attempt:\n"
+            + "\n".join(f"- {c}" for c in ctx.corrections)
+        ) if ctx.corrections else ""
-    for step in range(max_steps):
-        recent_context = build_recent_chat_context(conversation_id=req.conversation_id)
         context = (
-            f"{AGENT_SYSTEM_PROMPT}\n\n"
-            f"[LANGUAGE: {lang_hint}]\n\n"
+            f"{EXECUTOR_PROMPT}\n\n"
+            f"[LANGUAGE HINT: {lang_hint}]\n"
             f"Workspace root: {AGENT_ROOT}\n\n"
-            f"Recent conversation:\n{recent_context or '(none)'}\n\n"
-            f"User request:\n{req.message}\n\n"
-            f"Previous tool results:\n{json.dumps(transcript, ensure_ascii=False, indent=2)}"
+            f"PLAN:\n{json.dumps(ctx.plan, ensure_ascii=False)}\n\n"
+            f"Recent conversation:\n{build_recent_chat_context(conversation_id=req.conversation_id) or '(none)'}\n\n"
+            f"User request: {req.message}{corrections_hint}\n\n"
+            f"Execution transcript:\n{json.dumps(ctx.transcript, ensure_ascii=False, indent=2)}"
         )
         raw = await router.generate(
-            message="Choose the next agent action.",
-            context=context,
-            max_tokens=4096,
-            temperature=req.temperature,
+            message="Execute the next step.",
+            context=context, max_tokens=4096, temperature=req.temperature,
         )
         try:
             action = _extract_agent_action(str(raw))
         except ValueError as exc:
-            transcript.append({"step": step + 1, "action": "parse_error", "raw": str(raw), "error": str(exc)})
-            message = "작업 계획을 안정적으로 해석하지 못해 자동 실행을 중단했습니다. 요청을 더 짧고 구체적으로 다시 시도해 주세요."
-            save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
-            save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
-            created_files = _collect_created_files(transcript)
-            return {
-                "status": "ok",
-                "response": message,
-                "workspace": str(AGENT_ROOT),
-                "steps": transcript,
-                "created_files": created_files,
-            }
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": "parse_error",
+                "raw": str(raw)[:400], "error": str(exc),
+            })
+            break
+        name     = action.get("action")
+        thoughts = str(action.get("thoughts") or "")[:600]
+        args     = action.get("args") or {}
-        name = action.get("action")
         if name == "final":
-            message = action.get("message", "작업을 완료했습니다.")
-            save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
-            save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
-            created_files = _collect_created_files(transcript)
-            return {"status": "ok", "response": message, "workspace": str(AGENT_ROOT), "steps": transcript, "created_files": created_files}
-        # Prevent repeated file/project creation loops with identical action+args.
-        last_step = transcript[-1] if transcript else None
-        current_args = action.get("args") or {}
+            ctx.final_message = action.get("message", "작업을 완료했습니다.")
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": "final", "thoughts": thoughts,
+            })
+            ctx.state = AgentState.VERIFYING
+            return
+        # Loop guard
+        exec_steps = [s for s in ctx.transcript if s.get("state") == AgentState.EXECUTING.value]
+        last = exec_steps[-1] if exec_steps else None
         if (
-            name in _FILE_CREATE_ACTIONS
-            and last_step
-            and last_step.get("action") == name
-            and (last_step.get("args") or {}) == current_args
-            and "result" in last_step
+            name in _FILE_CREATE_ACTIONS and last
+            and last.get("action") == name
+            and (last.get("args") or {}) == args
+            and "result" in last
         ):
-            message = "요청한 파일 생성을 이미 완료해서 반복 실행을 중단했습니다."
-            save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
-            save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
-            created_files = _collect_created_files(transcript)
-            return {"status": "ok", "response": message, "workspace": str(AGENT_ROOT), "steps": transcript, "created_files": created_files}
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": name,
+                "error": "LOOP_DETECTED: identical action+args repeated — halted.",
+            })
+            break
         if name == "clear_history":
-            result = clear_history(current_args.get("keep_last", 0))
-            append_audit_event(
-                "history_delete",
-                user_email=current_user,
-                source=req.source or "agent",
-                keep_last=current_args.get("keep_last", 0),
-                removed=result.get("removed", 0),
-                kept=result.get("kept", 0),
-            )
-            transcript.append({"step": step + 1, "action": name, "args": current_args, "result": result})
+            result = clear_history(args.get("keep_last", 0))
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": name,
+                "thoughts": thoughts, "args": args, "result": result,
+            })
             continue
-        risk = _agent_risk(name, current_args)
+        policy = _agent_policy(name, args)
+        risk   = _RISK_LEVEL_MAP.get(policy["risk"], "medium")
-        # Block system-path local_write even if the LLM tries it
-        if name == "local_write":
-            path = str(current_args.get("path", ""))
-            if any(path.startswith(p) for p in _LOCAL_WRITE_BLOCKED_PREFIXES):
-                transcript.append({
-                    "step": step + 1, "action": name, "args": current_args,
-                    "risk": "high", "error": f"BLOCKED: writing to system path is not allowed: {path}",
-                })
-                append_audit_event(
-                    "agent_blocked", user_email=current_user, source=req.source or "agent",
-                    action=name, path=path, reason="system_path",
-                )
-                continue
+        if policy["risk"] == "destructive":
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": name,
+                "thoughts": thoughts, "args": args, "risk": risk,
+                "governance": dict(policy),
+                "error": f"BLOCKED: destructive action '{name}' not permitted in agent mode.",
+            })
+            append_audit_event(
+                "agent_blocked", user_email=current_user, source=req.source or "agent",
+                action=name, reason="destructive", governance=dict(policy),
+            )
+            continue
-        # Audit medium/high actions before execution
-        if risk in ("medium", "high"):
+        if not policy["auto_approve"]:
             append_audit_event(
                 "agent_exec", user_email=current_user, source=req.source or "agent",
-                step=step + 1, action=name, risk=risk,
-                args={k: v for k, v in (current_args or {}).items() if k != "content"},
+                state=AgentState.EXECUTING.value, action=name, risk=risk,
+                shell=policy["shell"], network=policy["network"],
+                destructive=policy["destructive"], sandbox=policy["sandbox"],
+                rollback=policy["rollback"],
+                args={k: v for k, v in args.items() if k != "content"},
             )
         try:
-            result = execute_tool(name, current_args)
-            transcript.append({"step": step + 1, "action": name, "args": current_args, "risk": risk, "result": result})
+            result = execute_tool(name, args)
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": name,
+                "thoughts": thoughts, "args": args,
+                "risk": risk, "governance": dict(policy), "result": result,
+            })
         except (ToolError, KeyError, TypeError) as exc:
-            transcript.append({"step": step + 1, "action": name, "args": current_args, "risk": risk, "error": str(exc)})
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value, "action": name,
+                "thoughts": thoughts, "args": args,
+                "risk": risk, "governance": dict(policy), "error": str(exc),
+            })
+    ctx.state = AgentState.VERIFYING
-    summary_context = (
-        f"{AGENT_SYSTEM_PROMPT}\n\n"
-        f"Recent conversation:\n{build_recent_chat_context(conversation_id=req.conversation_id) or '(none)'}\n\n"
-        f"User request:\n{req.message}\n\n"
-        f"Tool transcript:\n{json.dumps(transcript, ensure_ascii=False, indent=2)}"
+async def _phase_verify(
+    ctx: AgentRunContext, req: AgentRequest, router, lang_hint: str, current_user: str,
+    max_retry: int = 3,
+) -> None:
+    """VERIFYING: Critic role evaluates transcript → DONE / EXECUTING (retry) / ROLLBACK / FAILED."""
+    context = (
+        f"{CRITIC_PROMPT}\n\n"
+        f"[LANGUAGE HINT: {lang_hint}]\n\n"
+        f"Original request: {req.message}\n"
+        f"Plan goal: {ctx.plan.get('goal', req.message)}\n\n"
+        f"Full transcript:\n{json.dumps(ctx.transcript, ensure_ascii=False, indent=2)}"
     )
-    summary = await router.generate(
-        message='Return only {"action":"final","message":"..."} summarizing the current result in Korean.',
-        context=summary_context,
-        max_tokens=1024,
-        temperature=0.1,
+    raw = await router.generate(
+        message="Review the execution transcript and return your verdict JSON.",
+        context=context, max_tokens=512, temperature=0.1,
     )
     try:
-        final_action = _extract_agent_action(str(summary))
-        message = final_action.get("message", str(summary))
+        verdict = _extract_agent_action(str(raw))
     except ValueError:
-        message = str(summary)
+        verdict = {"action": "verdict", "verdict": "PASS", "next_state": "DONE",
+                   "reason": "Critic parse failed — assuming pass.", "corrections": [], "confidence": 0.7}
+    ctx.corrections = verdict.get("corrections", [])
+    # Normalize legacy verdict next_state strings to current AgentState names
+    raw_next = verdict.get("next_state", "DONE")
+    next_s = {"COMPLETE": "DONE", "RETRY": "EXECUTING"}.get(raw_next, raw_next)
+    ctx.transcript.append({
+        "state": AgentState.VERIFYING.value,
+        "verdict":     verdict.get("verdict", "PASS"),
+        "reason":      verdict.get("reason", ""),
+        "corrections": ctx.corrections,
+        "confidence":  verdict.get("confidence", 0.9),
+        "next_state":  next_s,
+    })
+    if verdict.get("verdict") == "PASS" or next_s == "DONE":
+        if not ctx.final_message:
+            ctx.final_message = verdict.get("reason", "작업이 완료되었습니다.")
+        ctx.state = AgentState.DONE
+    elif next_s == "ROLLBACK":
+        ctx.state = AgentState.ROLLBACK
+    elif next_s == "EXECUTING":
+        if ctx.retry_count >= max_retry:
+            ctx.final_message = (
+                f"최대 재시도({max_retry}회) 초과로 작업을 종료했습니다. "
+                f"마지막 비판: {verdict.get('reason', '(없음)')}"
+            )
+            ctx.state = AgentState.FAILED
+        else:
+            ctx.retry_count += 1
+            ctx.transcript.append({
+                "state": AgentState.EXECUTING.value,
+                "retry_attempt": ctx.retry_count,
+                "corrections": ctx.corrections,
+            })
+            ctx.state = AgentState.EXECUTING
+    else:
+        ctx.final_message = verdict.get("reason", "검증자가 인식되지 않은 다음 상태를 반환했습니다.")
+        ctx.state = AgentState.FAILED
+def _phase_rollback(ctx: AgentRunContext, current_user: str) -> None:
+    """ROLLBACK: attempt git checkout for each edited file, then COMPLETE."""
+    import subprocess as _sp
+    rolled: list = []
+    for step in ctx.transcript:
+        if step.get("state") != AgentState.EXECUTING.value:
+            continue
+        gov = step.get("governance", {})
+        if gov.get("rollback") != "git":
+            continue
+        result = step.get("result", {})
+        if not (isinstance(result, dict) and result.get("success")):
+            continue
+        path = result.get("path") or (step.get("args") or {}).get("path", "")
+        if not path:
+            continue
+        try:
+            r = _sp.run(
+                ["git", "checkout", "--", path], cwd=str(AGENT_ROOT),
+                capture_output=True, text=True, timeout=10,
+            )
+            rolled.append({"path": path, "ok": r.returncode == 0, "stderr": r.stderr[:200]})
+        except Exception as exc:
+            rolled.append({"path": path, "ok": False, "error": str(exc)})
+    ctx.transcript.append({"state": AgentState.ROLLBACK.value, "rolled_back": rolled})
+    recovered = [r["path"] for r in rolled if r.get("ok")]
+    ctx.final_message = (
+        f"실행 실패로 롤백했습니다. 복구 파일: {recovered}"
+        if recovered
+        else "롤백을 시도했으나 복구할 파일이 없거나 git이 초기화되지 않았습니다."
+    )
+    append_audit_event("agent_rollback", user_email=current_user, rolled_back=rolled)
+    # Rollback is a recovery from a failed verification — terminal state is FAILED
+    ctx.state = AgentState.FAILED
+async def _phase_memory_update(
+    ctx: AgentRunContext, req: AgentRequest, router, current_user: str,
+) -> None:
+    """Background: Memory Updater role extracts learnings after COMPLETE."""
+    context = (
+        f"{MEMORY_UPDATER_PROMPT}\n\n"
+        f"Completed task: {req.message}\n\n"
+        f"Last 5 transcript steps:\n{json.dumps(ctx.transcript[-5:], ensure_ascii=False)}"
+    )
+    try:
+        raw = await router.generate(
+            message="Extract learnings from this completed task.",
+            context=context, max_tokens=256, temperature=0.1,
+        )
+        mem = _extract_agent_action(str(raw))
+        if mem.get("save_to_knowledge") and mem.get("learnings"):
+            from tools import knowledge_save
+            knowledge_save(
+                "\n".join(mem["learnings"]),
+                folder="30_Projects",
+                title=f"Agent: {req.message[:60]}",
+            )
+    except Exception:
+        pass
+# ── Eval harness ──────────────────────────────────────────────────────────────
+@app.post("/agent/eval")
+async def agent_eval(req: AgentEvalRequest, request: Request):
+    """Run a skill's eval cases from schema.json and return pass/fail per case."""
+    require_user(request)
+    skill_dir = Path(__file__).resolve().parent / "skills" / req.skill
+    schema_path = skill_dir / "schema.json"
+    if not schema_path.exists():
+        raise HTTPException(404, detail=f"Skill '{req.skill}' not found or missing schema.json")
+    schema = json.loads(schema_path.read_text(encoding="utf-8"))
+    eval_cases = schema.get("evals", [])
+    if req.case_id:
+        eval_cases = [c for c in eval_cases if c.get("id") == req.case_id]
+    if not eval_cases:
+        return {"skill": req.skill, "total": 0, "passed": 0, "failed": 0, "results": [],
+                "message": "No eval cases defined in schema.json"}
+    action_name = schema.get("action", req.skill)
+    results = []
+    for case in eval_cases:
+        case_id = case.get("id", "?")
+        try:
+            result   = execute_tool(action_name, case.get("input", {}))
+            criteria = case.get("pass_criteria", "")
+            if "success == true" in criteria:
+                passed = result.get("success") is True
+            elif "success == false" in criteria:
+                passed = result.get("success") is False
+            else:
+                passed = True  # manual review required
+            results.append({"id": case_id, "description": case.get("description", ""),
+                            "passed": passed, "result": result, "pass_criteria": criteria})
+        except Exception as exc:
+            results.append({"id": case_id, "description": case.get("description", ""),
+                            "passed": False, "error": str(exc),
+                            "pass_criteria": case.get("pass_criteria", "")})
+    n_passed = sum(1 for r in results if r.get("passed") is True)
+    return {
+        "skill": req.skill, "action": action_name,
+        "total": len(results), "passed": n_passed, "failed": len(results) - n_passed,
+        "results": results,
+    }
+@app.post("/agent")
+async def agent(req: AgentRequest, request: Request):
+    """Natural-language local agent.
+    State machine:
+        IDLE → PLANNING → WAITING_APPROVAL → EXECUTING → VERIFYING
+                                       ↓                     ↓
+                                     FAILED       DONE | EXECUTING(retry) | ROLLBACK
+                                                                                  ↓
+                                                                               FAILED
+    """
+    current_user = require_user(request)
+    enforce_rate_limit(current_user, "agent")
+    if not router.current_model_id:
+        raise HTTPException(status_code=400, detail="No model loaded. Call /models/load first.")
+    ensure_agent_root()
+    lang = detect_language(req.message)
+    lang_hint = _LANG_HINT[lang]
+    max_steps = max(1, min(req.max_steps, 50))
+    max_retry = 3
+    ctx = AgentRunContext()
+    while ctx.state not in AGENT_TERMINAL_STATES:
+        ctx.state_history.append(ctx.state.value)
+        # Hard guard against infinite state loops
+        if len(ctx.state_history) > 200:
+            ctx.final_message = "에이전트 상태 머신이 최대 반복(200)에 도달해 중단했습니다."
+            ctx.state = AgentState.FAILED
+            break
+        if ctx.state == AgentState.IDLE:
+            ctx.state = AgentState.PLANNING
+        elif ctx.state == AgentState.PLANNING:
+            await _phase_plan(ctx, req, router, lang_hint, current_user)
+        elif ctx.state == AgentState.WAITING_APPROVAL:
+            _phase_approval(ctx, current_user)
+        elif ctx.state == AgentState.EXECUTING:
+            await _phase_execute(ctx, req, router, lang_hint, current_user, max_steps)
+        elif ctx.state == AgentState.VERIFYING:
+            await _phase_verify(ctx, req, router, lang_hint, current_user, max_retry)
+        elif ctx.state == AgentState.ROLLBACK:
+            _phase_rollback(ctx, current_user)
+        else:
+            ctx.state = AgentState.FAILED
+    # Record terminal state in history for clients
+    ctx.state_history.append(ctx.state.value)
+    # Fire-and-forget memory update — does not block the response
+    asyncio.create_task(_phase_memory_update(ctx, req, router, current_user))
+    message = ctx.final_message or "작업을 완료했습니다."
     save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
     save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
-    created_files = _collect_created_files(transcript)
-    return {"status": "ok", "response": message, "workspace": str(AGENT_ROOT), "steps": transcript, "created_files": created_files}
+    created_files = _collect_created_files(ctx.transcript)
+    return {
+        "status": "ok" if ctx.state == AgentState.DONE else "failed",
+        "response": message,
+        "workspace": str(AGENT_ROOT),
+        "steps": ctx.transcript,
+        "state_history": ctx.state_history,
+        "final_state": ctx.state.value,
+        "created_files": created_files,
+    }
 # ── Direct Tool API ───────────────────────────────────────────────────────────
@@ -3297,9 +4068,13 @@ async def tools_workspace_tree(req: ToolWorkspaceTreeRequest, request: Request):
 @app.post("/tools/read_file")
-async def tools_read_file(req: ToolPathRequest, request: Request):
+async def tools_read_file(req: ToolReadFileRequest, request: Request):
     require_user(request)
-    return _tool_response(read_file, req.path)
+    try:
+        return {"status": "ok", "workspace": str(AGENT_ROOT),
+                "result": read_file(req.path, offset=req.offset, limit=req.limit, line_numbers=req.line_numbers)}
+    except ToolError as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
 @app.post("/tools/write_file")
@@ -3308,12 +4083,51 @@ async def tools_write_file(req: ToolWriteFileRequest, request: Request):
     return _tool_response(write_file, req.path, req.content)
+@app.post("/tools/edit_file")
+async def tools_edit_file(req: ToolEditFileRequest, request: Request):
+    require_user(request)
+    try:
+        return {"status": "ok", "workspace": str(AGENT_ROOT),
+                "result": edit_file(req.path, req.old_string, req.new_string, replace_all=req.replace_all)}
+    except ToolError as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
 @app.post("/tools/search_files")
 async def tools_search_files(req: ToolSearchFilesRequest, request: Request):
     require_user(request)
     return _tool_response(search_files, req.query, req.path, req.max_results)
+@app.post("/tools/grep")
+async def tools_grep(req: ToolGrepRequest, request: Request):
+    require_user(request)
+    try:
+        return {"status": "ok", "workspace": str(AGENT_ROOT),
+                "result": grep(
+                    req.pattern,
+                    path=req.path,
+                    glob=req.glob,
+                    max_results=req.max_results,
+                    case_insensitive=req.case_insensitive,
+                    context_lines=req.context_lines,
+                )}
+    except ToolError as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
+@app.post("/tools/todo_read")
+async def tools_todo_read(request: Request):
+    require_user(request)
+    return _tool_response(todo_read)
+@app.post("/tools/todo_write")
+async def tools_todo_write(req: ToolTodoWriteRequest, request: Request):
+    require_user(request)
+    return _tool_response(todo_write, req.todos)
 @app.post("/tools/clear_history")
 async def tools_clear_history(req: ToolClearHistoryRequest, request: Request):
     current_user = require_user(request)
@@ -3378,21 +4192,28 @@ async def tools_pdf_pages(path: str, request: Request):
     target = Path(path).expanduser().resolve()
     if not target.exists() or not target.is_file():
         raise HTTPException(status_code=404, detail="File not found")
+    import fitz  # PyMuPDF
+    doc = None
     try:
-        import fitz  # PyMuPDF
         doc = fitz.open(str(target))
+        total = len(doc)
         pages = []
         for i, page in enumerate(doc):
             if i >= 20:  # 최대 20페이지
                 break
-            mat = fitz.Matrix(1.5, 1.5)  # 1.5x 해상도
+            mat = fitz.Matrix(1.5, 1.5)
             pix = page.get_pixmap(matrix=mat)
             b64 = base64.b64encode(pix.tobytes("png")).decode()
             pages.append({"page": i + 1, "b64": b64})
-        doc.close()
-        return {"total": len(doc), "pages": pages}
+        return {"total": total, "pages": pages}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"PDF 렌더링 실패: {e}")
+    finally:
+        if doc is not None:
+            try:
+                doc.close()
+            except Exception as e:
+                logging.warning("fitz doc close failed: %s", e)
 @app.get("/tools/download")
@@ -3416,6 +4237,7 @@ async def tools_download(path: str, request: Request):
 @app.post("/upload/document")
 async def upload_document(request: Request, file: UploadFile = File(...)):
     current_user = require_user(request)
+    enforce_rate_limit(current_user, "upload")
     """Upload a document and extract text (PDF, DOCX, XLSX, PPTX, TXT, MD, CSV)."""
     suffix = Path(file.filename or "upload").suffix.lower()
     allowed = {".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".csv"}
@@ -3424,6 +4246,9 @@ async def upload_document(request: Request, file: UploadFile = File(...)):
     contents = await file.read()
     if len(contents) > 10 * 1024 * 1024:
         raise HTTPException(status_code=400, detail="파일이 너무 큽니다. 최대 10MB.")
+    # MIME sniff — verify the bytes actually match the claimed extension (cheap header check)
+    if not _bytes_match_extension(contents, suffix):
+        raise HTTPException(status_code=400, detail=f"파일 내용이 확장자({suffix})와 일치하지 않습니다.")
     with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
         tmp.write(contents)
         tmp_path = tmp.name
@@ -3879,61 +4704,96 @@ async def tools_deploy_project(req: ToolScriptRequest, request: Request):
     return _tool_response(deploy_project, req.cwd, req.script)
+_MCP_TOOL_DESCRIPTIONS: Dict[str, str] = {
+    "list_dir":              "List files in the agent workspace.",
+    "workspace_tree":        "Return a recursive workspace tree.",
+    "read_file":             "Read a UTF-8 file from the workspace with optional line numbers and offset/limit slicing.",
+    "write_file":            "Write a UTF-8 file inside the workspace (new files / full rewrites).",
+    "edit_file":             "Precise diff-style edit: replace exact old_string with new_string. Requires unique match unless replace_all=true.",
+    "search_files":          "Substring search in text files (legacy).",
+    "grep":                  "Regex search across the workspace with line numbers and optional context.",
+    "todo_read":             "Read the agent's persistent TODO list for the current workspace.",
+    "todo_write":            "Replace the agent's TODO list (id, content, status: pending/in_progress/completed).",
+    "clear_history":         "Clear chat history to reduce context and speed up responses.",
+    "inspect_html":          "Inspect local HTML structure and assets.",
+    "preview_url":           "Return a server URL for a workspace file.",
+    "create_docx":           "Create a Word DOCX document in the agent workspace.",
+    "create_xlsx":           "Create an XLSX spreadsheet in the agent workspace.",
+    "create_pptx":           "Create a PPTX presentation deck in the agent workspace.",
+    "create_pdf":            "Create a PDF document in the agent workspace.",
+    "local_list":            "List any local folder (requires user permission via UI).",
+    "local_read":            "Read any local file (requires user permission via UI).",
+    "local_write":           "Write any local file (requires user permission via UI).",
+    "read_document":         "Extract text from PDF, DOCX, XLSX, PPTX, TXT, MD, CSV files.",
+    "computer_screenshot":   "Capture the current Mac screen as base64 PNG.",
+    "computer_open_app":     "Open or focus a Mac app, e.g. Google Chrome.",
+    "computer_open_url":     "Open a URL in a Mac app, e.g. Google Chrome.",
+    "computer_click":        "Click at screen coordinates (x, y).",
+    "computer_type":         "Type text at the current focus position.",
+    "computer_key":          "Press a keyboard key or shortcut (e.g. 'command+c').",
+    "computer_scroll":       "Scroll at screen coordinates.",
+    "computer_move":         "Move the mouse to screen coordinates.",
+    "computer_drag":         "Drag from (x1,y1) to (x2,y2).",
+    "computer_status":       "Check if Mac Computer Use (pyautogui) is available.",
+    "chrome_status":         "Report Chrome desktop bridge availability.",
+    "computer_use_status":   "Report Mac Computer Use bridge availability.",
+    "knowledge_save":        "Save a note into the local knowledge garden.",
+    "knowledge_search":      "Search the local knowledge garden.",
+    "knowledge_tree":        "List local knowledge garden markdown files.",
+    "knowledge_graph_ingest":"Ingest a message, AI answer, or connector event into the SQLite knowledge graph.",
+    "knowledge_graph_search":"Search graph nodes, summaries, and JSON metadata.",
+    "knowledge_graph_graph": "Return Obsidian-style graph nodes and edges.",
+    "knowledge_graph_context":"Return compact graph-backed RAG context for a prompt.",
+    "obsidian_save":         "Save a note into the Obsidian-compatible memory vault.",
+    "obsidian_search":       "Search the Obsidian-compatible memory vault.",
+    "obsidian_tree":         "List Obsidian memory vault markdown files.",
+    "git_status":            "Read-only local git status inside the workspace.",
+    "git_diff":              "Read-only local git diff inside the workspace.",
+    "git_log":               "Read-only local git log inside the workspace.",
+    "git_show":              "Read-only local git show --stat inside the workspace.",
+    "network_status":        "Get current local/private IP, public IP, hostname, and Wi-Fi info.",
+    "run_command":           "Run an allowlisted local command inside the workspace.",
+    "build_project":         "Run an allowlisted package.json build/compile/typecheck/test script to verify changes actually work.",
+    "deploy_project":        "Run an allowlisted package.json deploy/preview/release/package installer script (pkg/exe).",
+}
+@app.get("/tools/permissions")
+async def tools_permissions(request: Request):
+    """Compact tool permission view (tool / risk / requires_approval / network).
+    A simpler authorization-layer summary derived from TOOL_GOVERNANCE.
+    Use /mcp/tools for the full 7-dimensional governance object.
+    """
+    require_user(request)
+    return {"status": "ok", "permissions": list_tool_permissions()}
 @app.get("/mcp/tools")
 async def mcp_tools():
     installed = load_mcp_installs().get("installed", {})
+    tools = []
+    for name, description in _MCP_TOOL_DESCRIPTIONS.items():
+        policy = TOOL_GOVERNANCE.get(name, _TOOL_GOVERNANCE_DEFAULT)
+        tools.append({
+            "name": name,
+            "description": description,
+            "permission": get_tool_permission(name),
+            "governance": {
+                "risk":         policy["risk"],
+                "destructive":  policy["destructive"],
+                "shell":        policy["shell"],
+                "network":      policy["network"],
+                "auto_approve": policy["auto_approve"],
+                "sandbox":      policy["sandbox"],
+                "rollback":     policy["rollback"],
+            },
+        })
     return {
         "status": "ok",
         "workspace": str(AGENT_ROOT),
         "installed_mcps": [mcp_public_item(item, installed) for item in MCP_REGISTRY],
-        "tools": [
-            {"name": "list_dir", "description": "List files in the agent workspace."},
-            {"name": "workspace_tree", "description": "Return a recursive workspace tree."},
-            {"name": "read_file", "description": "Read a UTF-8 file from the workspace."},
-            {"name": "write_file", "description": "Write a UTF-8 file inside the workspace."},
-            {"name": "search_files", "description": "Search text files inside the workspace."},
-            {"name": "clear_history", "description": "Clear chat history to reduce context and speed up responses."},
-            {"name": "inspect_html", "description": "Inspect local HTML structure and assets."},
-            {"name": "preview_url", "description": "Return a server URL for a workspace file."},
-            {"name": "create_docx", "description": "Create a Word DOCX document in the agent workspace."},
-            {"name": "create_xlsx", "description": "Create an XLSX spreadsheet in the agent workspace."},
-            {"name": "create_pptx", "description": "Create a PPTX presentation deck in the agent workspace."},
-            {"name": "create_pdf", "description": "Create a PDF document in the agent workspace."},
-            {"name": "local_list", "description": "List any local folder (requires user permission via UI)."},
-            {"name": "local_read", "description": "Read any local file (requires user permission via UI)."},
-            {"name": "local_write", "description": "Write any local file (requires user permission via UI)."},
-            {"name": "read_document", "description": "Extract text from PDF, DOCX, XLSX, PPTX, TXT, MD, CSV files."},
-            {"name": "computer_screenshot", "description": "Capture the current Mac screen as base64 PNG."},
-            {"name": "computer_open_app", "description": "Open or focus a Mac app, e.g. Google Chrome."},
-            {"name": "computer_open_url", "description": "Open a URL in a Mac app, e.g. Google Chrome."},
-            {"name": "computer_click", "description": "Click at screen coordinates (x, y)."},
-            {"name": "computer_type", "description": "Type text at the current focus position."},
-            {"name": "computer_key", "description": "Press a keyboard key or shortcut (e.g. 'command+c')."},
-            {"name": "computer_scroll", "description": "Scroll at screen coordinates."},
-            {"name": "computer_move", "description": "Move the mouse to screen coordinates."},
-            {"name": "computer_drag", "description": "Drag from (x1,y1) to (x2,y2)."},
-            {"name": "computer_status", "description": "Check if Mac Computer Use (pyautogui) is available."},
-            {"name": "chrome_status", "description": "Report Chrome desktop bridge availability."},
-            {"name": "computer_use_status", "description": "Report Mac Computer Use bridge availability."},
-            {"name": "knowledge_save", "description": "Save a note into the local knowledge garden."},
-            {"name": "knowledge_search", "description": "Search the local knowledge garden."},
-            {"name": "knowledge_tree", "description": "List local knowledge garden markdown files."},
-            {"name": "knowledge_graph_ingest", "description": "Ingest a message, AI answer, or connector event into the SQLite knowledge graph."},
-            {"name": "knowledge_graph_search", "description": "Search graph nodes, summaries, and JSON metadata."},
-            {"name": "knowledge_graph_graph", "description": "Return Obsidian-style graph nodes and edges."},
-            {"name": "knowledge_graph_context", "description": "Return compact graph-backed RAG context for a prompt."},
-            {"name": "obsidian_save", "description": "Save a note into the Obsidian-compatible memory vault."},
-            {"name": "obsidian_search", "description": "Search the Obsidian-compatible memory vault."},
-            {"name": "obsidian_tree", "description": "List Obsidian memory vault markdown files."},
-            {"name": "git_status", "description": "Read-only local git status inside the workspace."},
-            {"name": "git_diff", "description": "Read-only local git diff inside the workspace."},
-            {"name": "git_log", "description": "Read-only local git log inside the workspace."},
-            {"name": "git_show", "description": "Read-only local git show --stat inside the workspace."},
-            {"name": "network_status", "description": "Get current local/private IP, public IP, hostname, and Wi-Fi info."},
-            {"name": "run_command", "description": "Run an allowlisted local command inside the workspace."},
-            {"name": "build_project", "description": "Run an allowlisted package.json build/compile/typecheck/test script."},
-            {"name": "deploy_project", "description": "Run an allowlisted package.json deploy/preview/release/package installer script (pkg/exe)."},
-        ],
+        "tools": tools,
     }