ltcai 0.1.8 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +141 -289
  2. package/docs/CHANGELOG.md +227 -0
  3. package/docs/architecture.md +121 -0
  4. package/docs/mcp-tools.md +116 -0
  5. package/docs/privacy.md +74 -0
  6. package/docs/public-deploy.md +137 -0
  7. package/docs/security-model.md +121 -0
  8. package/knowledge_graph.py +18 -5
  9. package/ltcai_cli.py +2 -2
  10. package/package.json +1 -1
  11. package/server.py +1140 -280
  12. package/skills/SKILL_TEMPLATE.md +61 -29
  13. package/skills/code_review/SKILL.md +28 -0
  14. package/skills/code_review/examples.md +59 -0
  15. package/skills/code_review/risk.json +9 -0
  16. package/skills/code_review/schema.json +65 -0
  17. package/skills/data_analysis/SKILL.md +28 -0
  18. package/skills/data_analysis/examples.md +62 -0
  19. package/skills/data_analysis/risk.json +9 -0
  20. package/skills/data_analysis/schema.json +61 -0
  21. package/skills/file_edit/SKILL.md +33 -0
  22. package/skills/file_edit/examples.md +45 -0
  23. package/skills/file_edit/risk.json +9 -0
  24. package/skills/file_edit/schema.json +60 -0
  25. package/skills/summarize_document/SKILL.md +68 -0
  26. package/skills/summarize_document/examples.md +65 -0
  27. package/skills/summarize_document/risk.json +9 -0
  28. package/skills/summarize_document/schema.json +71 -0
  29. package/skills/web_search/SKILL.md +28 -0
  30. package/skills/web_search/examples.md +61 -0
  31. package/skills/web_search/risk.json +9 -0
  32. package/skills/web_search/schema.json +62 -0
  33. package/tests/integration/__pycache__/__init__.cpython-314.pyc +0 -0
  34. package/tests/integration/__pycache__/test_api.cpython-314-pytest-9.0.3.pyc +0 -0
  35. package/tests/unit/__pycache__/test_security.cpython-314-pytest-9.0.3.pyc +0 -0
  36. package/tests/unit/__pycache__/test_tools.cpython-314-pytest-9.0.3.pyc +0 -0
  37. package/tests/unit/test_security.py +125 -0
  38. package/tests/unit/test_tools.py +194 -1
  39. package/tools.py +264 -4
package/server.py CHANGED
@@ -29,7 +29,8 @@ try:
29
29
  except Exception as e:
30
30
  print(f"⚠️ MLX Metal context unavailable: {e}")
31
31
  mx = None
32
- from typing import AsyncIterator, Optional, List, Dict
32
+ from enum import Enum
33
+ from typing import AsyncIterator, Optional, List, Dict, TypedDict
33
34
 
34
35
  import uvicorn
35
36
  from fastapi import FastAPI, File, HTTPException, Request, Cookie, UploadFile
@@ -65,12 +66,14 @@ from tools import (
65
66
  read_document,
66
67
  deploy_project,
67
68
  desktop_bridge_status,
69
+ edit_file,
68
70
  ensure_agent_root,
69
71
  execute_tool,
70
72
  git_diff,
71
73
  git_log,
72
74
  git_show,
73
75
  git_status,
76
+ grep,
74
77
  inspect_html,
75
78
  knowledge_save,
76
79
  knowledge_search,
@@ -87,6 +90,8 @@ from tools import (
87
90
  read_file,
88
91
  run_command,
89
92
  search_files,
93
+ todo_read,
94
+ todo_write,
90
95
  workspace_tree,
91
96
  write_file,
92
97
  )
@@ -99,19 +104,15 @@ except Exception:
99
104
  from datetime import datetime
100
105
 
101
106
  def detect_language(text: str) -> str:
102
- """Detect language: 'ko' (Korean), 'zh' (Chinese), or 'en' (English)."""
107
+ """Detect language: 'ko' (Korean) or 'en' (English)."""
103
108
  total = max(len(text), 1)
104
109
  ko = sum(1 for c in text if '가' <= c <= '힣')
105
- zh = sum(1 for c in text if '一' <= c <= '鿿')
106
110
  if ko / total > 0.05:
107
111
  return "ko"
108
- if zh / total > 0.05:
109
- return "zh"
110
112
  return "en"
111
113
 
112
114
  _LANG_HINT = {
113
115
  "ko": "Respond in Korean (한국어로 답변하세요).",
114
- "zh": "Respond in Chinese (用中文回答).",
115
116
  "en": "Respond in English.",
116
117
  }
117
118
 
@@ -217,17 +218,25 @@ def verify_password(password: str, hashed: str) -> bool:
217
218
  return False
218
219
 
219
220
  def verify_and_migrate_password(email: str, plain: str, stored: str, users: Dict) -> bool:
220
- """평문 비밀번호를 투명하게 해시로 마이그레이션."""
221
+ """평문 비밀번호를 투명하게 해시로 마이그레이션. 마이그레이션 발생 시 audit log 남김."""
221
222
  if ":" in stored and len(stored) > 64:
222
223
  return verify_password(plain, stored)
223
224
  if plain == stored:
224
225
  users[email]["password"] = hash_password(plain)
225
226
  save_users(users)
227
+ try:
228
+ append_audit_event("password_migrated_from_plaintext", user_email=email)
229
+ except Exception as e:
230
+ logging.warning("audit log failed on password migration: %s", e)
231
+ logging.info("Migrated plaintext password to bcrypt hash for %s", email)
226
232
  return True
227
233
  return False
228
234
 
229
235
  # ── Session store (file-backed, survives restarts) ────────────────────────────
230
- _SESSION_TTL = 60 * 60 * 24 * 7 # 7 days
236
+ # 24-hour TTL with sliding-window refresh every authenticated request bumps
237
+ # created_at, so an active user stays logged in while idle sessions auto-expire.
238
+ _SESSION_TTL = 60 * 60 * 24 # 24 hours
239
+ _SESSION_REFRESH_THRESHOLD = 60 * 15 # only persist if >15 min since last bump (write amplification guard)
231
240
  _sessions_lock = threading.Lock()
232
241
 
233
242
  def _sessions_file() -> Path:
@@ -239,15 +248,15 @@ def _load_sessions() -> Dict[str, tuple]:
239
248
  if f.exists():
240
249
  raw = json.loads(f.read_text())
241
250
  return {k: tuple(v) for k, v in raw.items()}
242
- except Exception:
243
- pass
251
+ except Exception as e:
252
+ logging.warning("_load_sessions failed (starting empty): %s", e)
244
253
  return {}
245
254
 
246
255
  def _persist_sessions(sessions: Dict[str, tuple]) -> None:
247
256
  try:
248
257
  _sessions_file().write_text(json.dumps({k: list(v) for k, v in sessions.items()}, ensure_ascii=False))
249
- except Exception:
250
- pass
258
+ except Exception as e:
259
+ logging.warning("_persist_sessions failed: %s", e)
251
260
 
252
261
  _sessions: Dict[str, tuple] = _load_sessions()
253
262
 
@@ -259,15 +268,21 @@ def create_session(email: str) -> str:
259
268
  return token
260
269
 
261
270
  def get_session_email(token: str) -> Optional[str]:
271
+ """Return email for a valid session, sliding the expiry forward on activity."""
272
+ now = time.time()
262
273
  with _sessions_lock:
263
274
  entry = _sessions.get(token)
264
275
  if entry is None:
265
276
  return None
266
277
  email, created_at = entry
267
- if time.time() - created_at > _SESSION_TTL:
278
+ if now - created_at > _SESSION_TTL:
268
279
  _sessions.pop(token, None)
269
280
  _persist_sessions(_sessions)
270
281
  return None
282
+ # Sliding refresh: only update if the timestamp drifted enough to be worth a disk write
283
+ if now - created_at > _SESSION_REFRESH_THRESHOLD:
284
+ _sessions[token] = (email, now)
285
+ _persist_sessions(_sessions)
271
286
  return email
272
287
 
273
288
  def invalidate_session(token: str) -> None:
@@ -628,7 +643,8 @@ def load_vpc_config() -> Dict:
628
643
  with open(VPC_FILE, "r", encoding="utf-8") as f:
629
644
  stored = json.load(f)
630
645
  return {**DEFAULT_VPC_CONFIG, **stored}
631
- except Exception:
646
+ except Exception as e:
647
+ logging.warning("load_vpc_config failed (using defaults): %s", e)
632
648
  return DEFAULT_VPC_CONFIG.copy()
633
649
 
634
650
  def save_vpc_config(config: Dict):
@@ -645,7 +661,8 @@ def load_mcp_installs() -> Dict:
645
661
  if "installed" not in data:
646
662
  data["installed"] = {}
647
663
  return data
648
- except Exception:
664
+ except Exception as e:
665
+ logging.warning("load_mcp_installs failed: %s", e)
649
666
  return {"installed": {}, "updated_at": None}
650
667
 
651
668
  def save_mcp_installs(data: Dict):
@@ -1048,6 +1065,71 @@ def require_user(request: Request) -> str:
1048
1065
  raise HTTPException(status_code=401, detail="인증이 필요합니다.")
1049
1066
  return email or ""
1050
1067
 
1068
+
1069
+ # ── Rate limiting ─────────────────────────────────────────────────────────────
1070
+ # Per-user token bucket. Disabled when LATTICEAI_RATE_LIMIT=0 (default: enabled).
1071
+ _RATE_LIMIT_ENABLED = os.getenv("LATTICEAI_RATE_LIMIT", "1") != "0"
1072
+ _rate_buckets: Dict[str, Dict[str, float]] = {}
1073
+ _rate_lock = threading.Lock()
1074
+
1075
+ # (capacity, refill_per_second) per endpoint family
1076
+ _RATE_LIMITS = {
1077
+ "chat": (30, 0.5), # 30 burst, 30/min sustained
1078
+ "agent": (10, 0.1), # 10 burst, 6/min sustained (agent is expensive)
1079
+ "upload": (20, 0.2), # 20 burst, 12/min sustained
1080
+ }
1081
+
1082
+
1083
+ def enforce_rate_limit(email: str, bucket_key: str) -> None:
1084
+ """Raise HTTP 429 if user exceeds the bucket. No-op when disabled or unauth'd."""
1085
+ if not _RATE_LIMIT_ENABLED or not email:
1086
+ return
1087
+ cap, refill = _RATE_LIMITS.get(bucket_key, (60, 1.0))
1088
+ key = f"{email}:{bucket_key}"
1089
+ now = time.time()
1090
+ with _rate_lock:
1091
+ bucket = _rate_buckets.get(key)
1092
+ if bucket is None:
1093
+ _rate_buckets[key] = {"tokens": cap - 1, "ts": now}
1094
+ return
1095
+ elapsed = now - bucket["ts"]
1096
+ bucket["tokens"] = min(cap, bucket["tokens"] + elapsed * refill)
1097
+ bucket["ts"] = now
1098
+ if bucket["tokens"] < 1:
1099
+ retry_after = max(1, int((1 - bucket["tokens"]) / refill))
1100
+ raise HTTPException(
1101
+ status_code=429,
1102
+ detail=f"Rate limit exceeded for {bucket_key}. Retry after {retry_after}s.",
1103
+ headers={"Retry-After": str(retry_after)},
1104
+ )
1105
+ bucket["tokens"] -= 1
1106
+
1107
+
1108
+ # ── File magic-number validation ──────────────────────────────────────────────
1109
+ # Map of extension → list of byte-prefix signatures (any-match). Files without
1110
+ # distinctive magic (.txt, .md, .csv) skip the check.
1111
+ _FILE_MAGIC: Dict[str, List[bytes]] = {
1112
+ ".pdf": [b"%PDF-"],
1113
+ ".docx": [b"PK\x03\x04"],
1114
+ ".xlsx": [b"PK\x03\x04"],
1115
+ ".pptx": [b"PK\x03\x04"],
1116
+ ".zip": [b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"],
1117
+ ".png": [b"\x89PNG\r\n\x1a\n"],
1118
+ ".jpg": [b"\xff\xd8\xff"],
1119
+ ".jpeg": [b"\xff\xd8\xff"],
1120
+ ".gif": [b"GIF87a", b"GIF89a"],
1121
+ }
1122
+
1123
+
1124
+ def _bytes_match_extension(data: bytes, ext: str) -> bool:
1125
+ """Return True if the file bytes match the claimed extension (or extension has no magic)."""
1126
+ ext = (ext or "").lower()
1127
+ signatures = _FILE_MAGIC.get(ext)
1128
+ if not signatures:
1129
+ return True # text-like formats — no reliable magic
1130
+ head = data[:16]
1131
+ return any(head.startswith(sig) for sig in signatures)
1132
+
1051
1133
  def require_admin(request: Request) -> tuple[str, Dict]:
1052
1134
  users = load_users()
1053
1135
  token = _extract_bearer_token(request)
@@ -1414,18 +1496,31 @@ async def unload_idle_models_loop() -> None:
1414
1496
  except Exception as e:
1415
1497
  logging.warning("Idle model unload failed: %s", e)
1416
1498
 
1499
+ def _spawn(coro, *, name: str):
1500
+ """Fire-and-forget asyncio task that logs exceptions instead of swallowing them."""
1501
+ task = asyncio.create_task(coro, name=name)
1502
+ def _on_done(t: asyncio.Task) -> None:
1503
+ if t.cancelled():
1504
+ return
1505
+ exc = t.exception()
1506
+ if exc is not None:
1507
+ logging.warning("background task '%s' failed: %s", name, exc)
1508
+ task.add_done_callback(_on_done)
1509
+ return task
1510
+
1511
+
1417
1512
  @asynccontextmanager
1418
1513
  async def lifespan(app: FastAPI):
1419
1514
  try:
1420
1515
  print(f"🧭 Lattice AI mode: {APP_MODE}")
1421
1516
  if ENABLE_TELEGRAM:
1422
1517
  from telegram_bot import run_bot
1423
- asyncio.create_task(run_bot())
1518
+ _spawn(run_bot(), name="telegram_bot")
1424
1519
  print("🚀 Telegram Bot Bridge activated!")
1425
1520
  else:
1426
1521
  print("⏭️ Telegram Bot Bridge disabled for this mode.")
1427
- asyncio.create_task(unload_idle_models_loop())
1428
- asyncio.create_task(autoload_default_model())
1522
+ _spawn(unload_idle_models_loop(), name="unload_idle_models")
1523
+ _spawn(autoload_default_model(), name="autoload_default_model")
1429
1524
  except Exception as e:
1430
1525
  print(f"⚠️ Startup sequence failed: {e}")
1431
1526
  try:
@@ -1491,7 +1586,7 @@ async def login(req: UserLogin):
1491
1586
  "is_admin": role == "admin",
1492
1587
  "token": token,
1493
1588
  })
1494
- response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=60 * 60 * 24 * 7)
1589
+ response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=_SESSION_TTL)
1495
1590
  return response
1496
1591
 
1497
1592
  @app.get("/auth/sso/config")
@@ -1884,12 +1979,48 @@ class AgentRequest(BaseModel):
1884
1979
  message: str
1885
1980
  conversation_id: Optional[str] = None
1886
1981
  source: Optional[str] = None
1887
- max_steps: int = 6
1982
+ max_steps: int = 25
1888
1983
  temperature: float = 0.1
1889
1984
  user_email: Optional[str] = None
1890
1985
  user_nickname: Optional[str] = None
1891
1986
 
1892
1987
 
1988
+ class AgentEvalRequest(BaseModel):
1989
+ skill: str
1990
+ case_id: Optional[str] = None
1991
+
1992
+
1993
+ class AgentState(str, Enum):
1994
+ IDLE = "IDLE"
1995
+ PLANNING = "PLANNING"
1996
+ WAITING_APPROVAL = "WAITING_APPROVAL"
1997
+ EXECUTING = "EXECUTING"
1998
+ VERIFYING = "VERIFYING"
1999
+ FAILED = "FAILED"
2000
+ ROLLBACK = "ROLLBACK"
2001
+ DONE = "DONE"
2002
+
2003
+
2004
+ # Terminal states — the agent loop exits when reaching one of these
2005
+ AGENT_TERMINAL_STATES = frozenset({AgentState.DONE, AgentState.FAILED})
2006
+
2007
+
2008
+ class AgentRunContext:
2009
+ """Mutable state carrier passed through all agent phases."""
2010
+ __slots__ = ("state", "plan", "transcript", "retry_count",
2011
+ "state_history", "corrections", "final_message", "rollback_log")
2012
+
2013
+ def __init__(self) -> None:
2014
+ self.state: AgentState = AgentState.IDLE
2015
+ self.plan: dict = {}
2016
+ self.transcript: list = []
2017
+ self.retry_count: int = 0
2018
+ self.state_history: list = []
2019
+ self.corrections: list = []
2020
+ self.final_message: str = ""
2021
+ self.rollback_log: list = []
2022
+
2023
+
1893
2024
  class ToolPathRequest(BaseModel):
1894
2025
  path: str = "."
1895
2026
 
@@ -1915,6 +2046,33 @@ class ToolSearchFilesRequest(BaseModel):
1915
2046
  max_results: int = 20
1916
2047
 
1917
2048
 
2049
+ class ToolReadFileRequest(BaseModel):
2050
+ path: str
2051
+ offset: int = 0
2052
+ limit: int = 0
2053
+ line_numbers: bool = True
2054
+
2055
+
2056
+ class ToolEditFileRequest(BaseModel):
2057
+ path: str
2058
+ old_string: str
2059
+ new_string: str
2060
+ replace_all: bool = False
2061
+
2062
+
2063
+ class ToolGrepRequest(BaseModel):
2064
+ pattern: str
2065
+ path: str = "."
2066
+ glob: Optional[str] = None
2067
+ max_results: int = 50
2068
+ case_insensitive: bool = False
2069
+ context_lines: int = 0
2070
+
2071
+
2072
+ class ToolTodoWriteRequest(BaseModel):
2073
+ todos: List[Dict] = []
2074
+
2075
+
1918
2076
  class ToolWorkspaceTreeRequest(BaseModel):
1919
2077
  path: str = "."
1920
2078
  max_depth: int = 3
@@ -2349,11 +2507,28 @@ def install_engine(engine: str) -> Dict:
2349
2507
  "installed": engine_installed(engine),
2350
2508
  }
2351
2509
  if engine == "ollama" and completed.returncode == 0 and shutil.which("ollama"):
2510
+ # Skip if already running to avoid orphan daemons.
2511
+ already_up = False
2352
2512
  try:
2353
- subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
2354
- result["daemon_started"] = True
2513
+ probe = subprocess.run(["ollama", "list"], capture_output=True, timeout=2, check=False)
2514
+ already_up = probe.returncode == 0
2355
2515
  except Exception:
2356
- result["daemon_started"] = False
2516
+ already_up = False
2517
+ if already_up:
2518
+ result["daemon_started"] = "already_running"
2519
+ else:
2520
+ try:
2521
+ # Detach so the daemon survives this request but doesn't become our zombie.
2522
+ subprocess.Popen(
2523
+ ["ollama", "serve"],
2524
+ stdout=subprocess.DEVNULL,
2525
+ stderr=subprocess.DEVNULL,
2526
+ start_new_session=True,
2527
+ )
2528
+ result["daemon_started"] = True
2529
+ except Exception as e:
2530
+ logging.warning("ollama serve spawn failed: %s", e)
2531
+ result["daemon_started"] = False
2357
2532
  return result
2358
2533
 
2359
2534
  CLOUD_VERIFY_CACHE: Dict[str, Dict] = {}
@@ -2623,6 +2798,7 @@ async def unload_all_models(request: Request):
2623
2798
  @app.post("/chat")
2624
2799
  async def chat(req: ChatRequest, request: Request):
2625
2800
  current_user = require_user(request)
2801
+ enforce_rate_limit(current_user, "chat")
2626
2802
  img_len = len(req.image_data) if req.image_data else 0
2627
2803
  print(
2628
2804
  f"🧪 /chat request: stream={req.stream} image_data_len={img_len} "
@@ -2966,130 +3142,446 @@ async def _stream_chat(req: ChatRequest, context: str = "", image_data: str = No
2966
3142
 
2967
3143
  # ── Local Computer Agent ──────────────────────────────────────────────────────
2968
3144
 
2969
- AGENT_SYSTEM_PROMPT = """You are Lattice AI Agent, a local computer-use coding assistant.
2970
- You have full access to the local filesystem via local_list / local_read / local_write tools.
2971
- Use read_file / write_file for paths inside the agent workspace (relative paths).
2972
- Use local_read / local_write for any absolute path on the system (e.g. ~/Downloads, ~/Desktop).
3145
+ # ── Agent Role Prompts (Planner / Executor / Critic / Memory Updater) ─────────
3146
+
3147
+ _TOOL_CATALOG_BRIEF = """
3148
+ FILESYSTEM : list_dir workspace_tree read_file write_file edit_file grep search_files inspect_html preview_url
3149
+ PLANNING : todo_read todo_write
3150
+ PROJECT : run_command build_project deploy_project create_web_project
3151
+ GIT (read) : git_status git_diff git_log git_show
3152
+ LOCAL FS : local_list local_read local_write read_document
3153
+ DOCS : create_docx create_xlsx create_pptx create_pdf
3154
+ KNOWLEDGE : knowledge_save knowledge_search knowledge_tree
3155
+ COMPUTER : computer_screenshot computer_open_app computer_open_url computer_click computer_type computer_key
3156
+ MISC : network_status clear_history final
3157
+ """
2973
3158
 
2974
- Available actions:
2975
- - list_dir: {"action":"list_dir","args":{"path":"."}}
2976
- - workspace_tree: {"action":"workspace_tree","args":{"path":".","max_depth":3}}
2977
- - read_file: {"action":"read_file","args":{"path":"relative/path.txt"}}
2978
- - write_file: {"action":"write_file","args":{"path":"relative/path.txt","content":"complete file content"}}
2979
- - search_files: {"action":"search_files","args":{"query":"text","path":".","max_results":20}}
2980
- - clear_history: {"action":"clear_history","args":{"keep_last":0}}
2981
- - inspect_html: {"action":"inspect_html","args":{"path":"index.html"}}
2982
- - preview_url: {"action":"preview_url","args":{"path":"index.html"}}
2983
- - create_docx: {"action":"create_docx","args":{"title":"title","body":"paragraphs","filename":"document.docx"}}
2984
- - create_xlsx: {"action":"create_xlsx","args":{"rows":[["A","B"],[1,2]],"filename":"spreadsheet.xlsx","sheet_name":"Sheet1"}}
2985
- - create_pptx: {"action":"create_pptx","args":{"title":"title","slides":[{"title":"Slide","bullets":["point"]}],"filename":"presentation.pptx"}}
2986
- - create_pdf: {"action":"create_pdf","args":{"title":"title","body":"paragraphs","filename":"document.pdf"}}
2987
- - create_web_project: {"action":"create_web_project","args":{"path":"my_app","framework":"react","template":"vite"}} — scaffold a runnable web app project
2988
- - local_list: {"action":"local_list","args":{"path":"/Users/username/Downloads"}} — lists any local folder (UI will request user permission first)
2989
- - local_read: {"action":"local_read","args":{"path":"/Users/username/Documents/note.txt"}} — reads any local file (UI will request user permission first)
2990
- - local_write: {"action":"local_write","args":{"path":"/Users/username/Desktop/output.txt","content":"..."}} — writes any local file (UI will request user permission first)
2991
- - read_document: {"action":"read_document","args":{"path":"/absolute/path/to/file.pdf"}} — extract text from PDF, DOCX, XLSX, PPTX, TXT, MD, CSV
2992
- - computer_screenshot: {"action":"computer_screenshot","args":{}} capture current screen as base64 PNG
2993
- - computer_open_app: {"action":"computer_open_app","args":{"app":"Google Chrome"}} open or focus a Mac app
2994
- - computer_open_url: {"action":"computer_open_url","args":{"url":"https://example.com","app":"Google Chrome"}} open URL in app
2995
- - computer_click: {"action":"computer_click","args":{"x":500,"y":300,"button":"left","double":false}}
2996
- - computer_type: {"action":"computer_type","args":{"text":"hello"}}
2997
- - computer_key: {"action":"computer_key","args":{"key":"command+c"}} — e.g. return, escape, tab, command+v
2998
- - computer_scroll: {"action":"computer_scroll","args":{"x":500,"y":300,"direction":"down","clicks":3}}
2999
- - computer_move: {"action":"computer_move","args":{"x":500,"y":300}}
3000
- - computer_drag: {"action":"computer_drag","args":{"x1":100,"y1":100,"x2":500,"y2":500}}
3001
- - computer_status: {"action":"computer_status","args":{}} — check if Computer Use is available
3002
- - chrome_status: {"action":"chrome_status","args":{}}
3003
- - computer_use_status: {"action":"computer_use_status","args":{}}
3004
- - knowledge_save: {"action":"knowledge_save","args":{"folder":"30_Projects","title":"short title","content":"note"}}
3005
- - knowledge_search: {"action":"knowledge_search","args":{"query":"keyword","max_results":5}}
3006
- - knowledge_tree: {"action":"knowledge_tree","args":{}}
3007
- - obsidian_save: {"action":"obsidian_save","args":{"folder":"30_Projects","title":"short title","content":"note"}}
3008
- - obsidian_search: {"action":"obsidian_search","args":{"query":"keyword","max_results":5}}
3009
- - obsidian_tree: {"action":"obsidian_tree","args":{}}
3010
- - git_status: {"action":"git_status","args":{}}
3011
- - git_diff: {"action":"git_diff","args":{"path":"optional/relative/path"}}
3012
- - git_log: {"action":"git_log","args":{"max_count":5}}
3013
- - git_show: {"action":"git_show","args":{"revision":"HEAD"}}
3014
- - network_status: {"action":"network_status","args":{}} get current local/private IP, public IP, hostname, and Wi-Fi info
3015
- - run_command: {"action":"run_command","args":{"command":"python3 app.py","cwd":"."}}
3016
- - build_project: {"action":"build_project","args":{"cwd":".","script":"build"}}
3017
- - deploy_project: {"action":"deploy_project","args":{"cwd":".","script":"deploy"}}
3018
- - final: {"action":"final","message":"short Korean summary of what you did"}
3159
+ PLANNER_PROMPT = """You are the PLANNER role in Lattice AI's multi-role agent harness.
3160
+ Your ONLY job: analyze the request and produce a structured execution plan.
3161
+ You do NOT call tools or write code.
3162
+
3163
+ Respond with exactly ONE JSON object (no markdown, no fences):
3164
+ {
3165
+ "action": "plan",
3166
+ "state": "PLANNING",
3167
+ "goal": "one-sentence goal in the user's language",
3168
+ "steps": [
3169
+ {"id": 1, "description": "what this step does", "action": "expected_tool", "purpose": "why needed"}
3170
+ ],
3171
+ "requires_approval": true,
3172
+ "rollback_strategy": "git",
3173
+ "estimated_steps": 3
3174
+ }
3175
+
3176
+ Rules:
3177
+ - requires_approval = true if ANY step uses write/exec tools (edit_file, write_file, run_command, etc.)
3178
+ - rollback_strategy = "git" if steps modify existing files; "none" otherwise
3179
+ - Keep steps realistic: 2-4 for simple tasks, up to 10 for complex ones
3180
+ - Do NOT specify full tool args — that is the Executor's job
3181
+
3182
+ Available tools:""" + _TOOL_CATALOG_BRIEF
3183
+
3184
+ EXECUTOR_PROMPT = """You are the EXECUTOR role in Lattice AI's multi-role agent harness.
3185
+ You have a plan from the Planner. Execute it step by step using exactly one tool per response.
3186
+
3187
+ You think and act like a senior software engineer:
3188
+ - Read (read_file, grep) BEFORE editing — never guess at file contents
3189
+ - Prefer edit_file over write_file for existing files
3190
+ - Keep changes small and precise
3191
+ - Verify after changes with build_project or run_command
3192
+
3193
+ Respond with exactly ONE JSON object per step:
3194
+ {"thoughts": "what you learned / why this next action", "action": "tool_name", "args": {...}}
3195
+
3196
+ When the task is fully done AND a tool result in this run confirms it:
3197
+ {"thoughts": "verified", "action": "final", "message": "한국어로 무엇을 했고 어디서 검증했는지 요약"}
3198
+
3199
+ ANTI-PATTERNS (will halt the loop):
3200
+ - Editing without reading first → read_file + grep BEFORE edit_file
3201
+ - Repeating the same action+args → check the transcript
3202
+ - Claiming done without a verification tool result in transcript
3203
+ - Hallucinating imports or file paths that were never confirmed by a tool result
3204
+
3205
+ Available tools:""" + _TOOL_CATALOG_BRIEF
3206
+
3207
+ CRITIC_PROMPT = """You are the CRITIC / REVIEWER role in Lattice AI's multi-role agent harness.
3208
+ Review the execution transcript and determine whether the goal was achieved.
3209
+
3210
+ Respond with exactly ONE JSON object:
3211
+ {
3212
+ "action": "verdict",
3213
+ "state": "VERIFYING",
3214
+ "verdict": "PASS",
3215
+ "reason": "why you think it passed or failed (cite specific tool results)",
3216
+ "corrections": [],
3217
+ "confidence": 0.95,
3218
+ "next_state": "DONE"
3219
+ }
3220
+
3221
+ verdict: "PASS" | "FAIL"
3222
+ next_state:
3223
+ "DONE" — task succeeded; finish
3224
+ "EXECUTING" — task failed but corrections can fix it (use corrections field for retry)
3225
+ "ROLLBACK" — task failed AND file changes should be undone
3226
+
3227
+ Criteria for PASS: a tool result in the transcript explicitly confirms success.
3228
+ Be strict. Claiming done without evidence = FAIL."""
3229
+
3230
+ MEMORY_UPDATER_PROMPT = """You are the MEMORY UPDATER role in Lattice AI's multi-role agent harness.
3231
+ After a completed task, extract reusable learnings.
3232
+
3233
+ Respond with exactly ONE JSON object:
3234
+ {
3235
+ "action": "memory",
3236
+ "state": "DONE",
3237
+ "learnings": ["one concise fact about this codebase or task"],
3238
+ "artifacts": ["relative/path/to/created_or_modified_file"],
3239
+ "save_to_knowledge": false
3240
+ }
3019
3241
 
3020
3242
  Rules:
3021
- - Respond with exactly one JSON object. No markdown, no code fences, no extra text.
3022
- - Use relative paths only.
3023
- - Create complete files, not fragments.
3024
- - Prefer simple, verifiable steps.
3025
- - Use inspect_html and preview_url for generated web UI.
3026
- - Use build_project when the user asks to build, compile, typecheck, or run a package build script.
3027
- - Use deploy_project when the user asks to deploy, preview, release, or package installers (pkg/exe) and package.json defines that script (e.g. package, dist, make, build:pkg, build:exe).
3028
- - If the user asks for app/service/web creation, prefer create_web_project first, then edit files with write_file/read_file and verify with build_project or run_command.
3029
- - If the user asks for installer outputs (.pkg/.exe), set up packaging config (for example Electron/electron-builder or equivalent), create package scripts in package.json, then run deploy_project for installer scripts.
3030
- - If .exe cannot be built on current OS/toolchain, still generate the full packaging config and scripts for Windows and report the exact missing prerequisite.
3031
- - Do not claim you cannot build or deploy. If a script, token, or platform config is missing, inspect the workspace and explain the exact missing piece.
3032
- - Use knowledge tools when the user asks to remember, search memory, or organize project context.
3033
- - Use run_command for local inspection, tests, and short development commands after files are written.
3034
- - For data analysis tasks, read the provided files first (read_document/local_read), compute with run_command when needed, and return concrete findings plus output artifact paths when created.
3035
- - Use clear_history when the user asks to forget, clear, delete, reset, or speed up chat history.
3036
- - Git is read-only: status, diff, log, and show only. Never commit, push, pull, fetch, clone, reset, or checkout.
3037
- - If the user asks for something unsafe or outside the workspace, explain the limitation with final.
3038
- - IMPORTANT: When user asks to create any document (docx, pdf, xlsx, pptx, word, excel, powerpoint, 문서, 파일, 엑셀, 파워포인트, PPT, 피피티), ALWAYS use the appropriate create_* action immediately with full, rich content. Never say you cannot create files.
3243
+ - max 5 learnings, one sentence each
3244
+ - save_to_knowledge = true only if learnings are genuinely useful across future sessions
3245
+ - artifacts = files the Executor actually created or modified (from transcript)
3246
+ """
3247
+
3248
+ # Keep backward-compat alias used by any existing callers
3249
+ AGENT_SYSTEM_PROMPT = EXECUTOR_PROMPT
3250
+
3251
+ # Marker: the old monolithic prompt was replaced by 4-role prompts above.
3252
+ # Legacy variable kept so Telegram bot / VS Code extension still work.
3253
+
3254
+ _ORIGINAL_MONOLITHIC_PROMPT_NOTE = """You are Lattice AI Agent a local, professional-grade coding assistant.
3255
+ You have full access to a sandboxed workspace and (with user approval) the wider filesystem.
3256
+ You think and work like a senior software engineer, not like an autocompleter.
3257
+
3258
+ ================================================================================
3259
+ HOW A PROFESSIONAL DEVELOPER THINKS your operating loop
3260
+ ================================================================================
3261
+ Every multi-step task follows four phases. Skipping phases is the #1 cause of bad
3262
+ output. Do not skip them.
3263
+
3264
+ 1) DISCOVER (read first, then act)
3265
+ - Map the territory before changing it. Use workspace_tree, list_dir, grep,
3266
+ and read_file BEFORE writing or editing anything.
3267
+ - When the user names a file/feature/function, locate it (grep) and read the
3268
+ surrounding code BEFORE proposing a change.
3269
+ - Read package.json, pyproject.toml, requirements.txt, tsconfig.json, and
3270
+ other config files before assuming a library/version/tool is available.
3271
+ - Never guess at APIs, imports, file paths, function signatures, or types.
3272
+ If you don't know, look it up with grep/read_file. Hallucinated code is
3273
+ the worst possible output.
3274
+
3275
+ 2) PLAN (write the plan down)
3276
+ - For any task with 3+ distinct steps, call todo_write FIRST with a concrete
3277
+ checklist (3–10 items). Keep exactly one item in_progress at a time.
3278
+ - The plan should describe WHAT will change and HOW you'll verify it works,
3279
+ not vague intentions ("look at code", "fix bugs"). Bad plans produce bad code.
3280
+ - Update the todo list (todo_write again) as items complete or new ones emerge.
3281
+
3282
+ 3) IMPLEMENT (small, precise diffs)
3283
+ - Prefer edit_file over write_file when modifying existing files. edit_file
3284
+ requires exact byte-level old_string match — read the file first and copy
3285
+ the surrounding context verbatim. This forces correctness.
3286
+ - Use write_file only for brand-new files or when fully rewriting a file you
3287
+ understand end-to-end.
3288
+ - Keep diffs as small as the task requires. Don't refactor "while you're
3289
+ there." Don't add abstractions for hypothetical future needs.
3290
+ - Code quality:
3291
+ * No new comments unless the WHY is non-obvious (a subtle invariant, a
3292
+ workaround for a specific bug, behavior that would surprise a reader).
3293
+ Never write comments that just restate what the code does.
3294
+ * No backward-compat shims, no dead code, no unused imports/variables.
3295
+ * No defensive try/except around code that can't fail. Trust internal
3296
+ contracts; validate only at system boundaries (user input, network).
3297
+ * Match the surrounding code's style (indent, quotes, naming).
3298
+
3299
+ 4) VERIFY (prove it works before claiming done)
3300
+ - After code changes, RUN something that confirms correctness:
3301
+ * build_project for build/typecheck/test scripts
3302
+ * run_command for python/node scripts and tests
3303
+ * inspect_html + preview_url for generated UI
3304
+ - If verification fails, treat the failure as the new task. Diagnose root
3305
+ cause; do not paper over it (no try/except shortcuts, no --no-verify, no
3306
+ disabling tests). Re-enter Discover phase if needed.
3307
+ - Never claim a task is "complete," "saved," "fixed," "working," or
3308
+ "deployed" unless a tool result in this same agent run confirms it.
3309
+
3310
+ ================================================================================
3311
+ RESPONSE FORMAT (strict)
3312
+ ================================================================================
3313
+ Respond with exactly ONE JSON object per step. No markdown, no code fences, no
3314
+ extra prose. Include a short `thoughts` field that records your current reasoning
3315
+ (what you just learned, what you'll do next, why). The user does not see it
3316
+ directly — it exists so you can plan across steps.
3317
+
3318
+ {"thoughts": "Need to read App.tsx before editing the import. Workspace tree
3319
+ confirms only one App.tsx exists.",
3320
+ "action": "read_file",
3321
+ "args": {"path": "src/App.tsx"}}
3322
+
3323
+ When the task is fully complete AND verified:
3324
+ {"thoughts": "Build passed, file written, ready to summarize.",
3325
+ "action": "final",
3326
+ "message": "한국어로 간결하게 무엇을 만들었고 어디서 검증했는지 요약."}
3327
+
3328
+ If you cannot proceed (missing tool, blocked path, ambiguous user intent), use
3329
+ `final` and clearly state the blocker and the smallest next step the user can
3330
+ take to unblock it. Do NOT loop on the same failing action.
3331
+
3332
+ ================================================================================
3333
+ TOOL CATALOG
3334
+ ================================================================================
3335
+ Filesystem (workspace, relative paths):
3336
+ list_dir {"path":"."}
3337
+ workspace_tree {"path":".", "max_depth":3}
3338
+ read_file {"path":"src/App.tsx", "offset":0, "limit":0, "line_numbers":true}
3339
+ — returns numbered view + total_lines. Use offset/limit for big files.
3340
+ write_file {"path":"new_file.py", "content":"..."} — new files / full rewrites
3341
+ edit_file {"path":"existing.py", "old_string":"exact text", "new_string":"new text",
3342
+ "replace_all":false}
3343
+ — preferred for existing files. old_string MUST appear once
3344
+ (unless replace_all=true). Include enough surrounding context
3345
+ to make it unique.
3346
+ grep {"pattern":"regex", "path":".", "glob":"*.py", "max_results":50,
3347
+ "case_insensitive":false, "context_lines":2}
3348
+ — regex search across the codebase. Use this before assuming a
3349
+ symbol exists.
3350
+ search_files {"query":"substring", "path":".", "max_results":20} — legacy substring search
3351
+ inspect_html {"path":"index.html"}
3352
+ preview_url {"path":"index.html"}
3353
+
3354
+ Planning:
3355
+ todo_read {}
3356
+ todo_write {"todos":[{"id":"1","content":"...","status":"pending"}]}
3357
+ — status ∈ pending|in_progress|completed.
3358
+ Use proactively for any task with 3+ steps.
3359
+
3360
+ Project ops:
3361
+ run_command {"command":"python3 app.py", "cwd":"."}
3362
+ — allowed binaries: pwd ls find cat sed head tail wc rg python python3 node npm npx
3363
+ — git is NOT allowed here; use the git_* tools below (read-only).
3364
+ build_project {"cwd":".", "script":"build"} — also: compile, typecheck, test
3365
+ deploy_project {"cwd":".", "script":"deploy"} — also: preview, release, package, dist, make, build:pkg, build:exe
3366
+ create_web_project {"path":"my_app", "framework":"react", "template":"vite"}
3367
+
3368
+ Git (read-only):
3369
+ git_status, git_diff, git_log, git_show
3370
+ — Never commit/push/pull/fetch/clone/reset/checkout. Lattice agent does not author git history.
3371
+
3372
+ Local filesystem (outside workspace; UI prompts user for approval):
3373
+ local_list {"path":"/Users/.../Downloads"}
3374
+ local_read {"path":"/abs/path/file.txt"}
3375
+ local_write {"path":"/abs/path/file.txt", "content":"..."}
3376
+ read_document {"path":"/abs/path/report.pdf"} — PDF, DOCX, XLSX, PPTX, TXT, MD, CSV
3377
+
3378
+ Document generation (written to workspace generated_* folders):
3379
+ create_docx {"title":"...", "body":"...", "filename":"doc.docx"}
3380
+ create_xlsx {"rows":[["A","B"],[1,2]], "filename":"sheet.xlsx", "sheet_name":"Sheet1"}
3381
+ create_pptx {"title":"...", "slides":[{"title":"...","bullets":["..."]}], "filename":"deck.pptx"}
3382
+ create_pdf {"title":"...", "body":"...", "filename":"doc.pdf"}
3383
+
3384
+ Knowledge / memory (Obsidian-compatible Markdown vault):
3385
+ knowledge_save {"folder":"30_Projects", "title":"...", "content":"..."}
3386
+ knowledge_search {"query":"...", "max_results":5}
3387
+ knowledge_tree {}
3388
+ obsidian_save / obsidian_search / obsidian_tree — same as knowledge_*, with vault URIs
3389
+
3390
+ Computer use (macOS desktop control, requires Accessibility permission):
3391
+ computer_screenshot, computer_open_app, computer_open_url, computer_click,
3392
+ computer_type, computer_key, computer_scroll, computer_move, computer_drag,
3393
+ computer_status, chrome_status, computer_use_status
3394
+ — Use screenshot to ground state; click/type to interact. Verify with another screenshot.
3395
+
3396
+ Misc:
3397
+ network_status {}
3398
+ clear_history {"keep_last":0}
3399
+ final {"message":"..."}
3400
+
3401
+ ================================================================================
3402
+ DOMAIN RULES (keep in mind)
3403
+ ================================================================================
3404
+ - Frontend: don't assume Tailwind/framer-motion/TypeScript exist. Read
3405
+ package.json first. If a dependency is missing, either add it explicitly to
3406
+ package.json (and create the config files it needs) or pick a simpler stack
3407
+ that already works.
3408
+ - Installers (.pkg/.exe): set up the packaging config (e.g. electron-builder)
3409
+ with full scripts in package.json, then run deploy_project. If the current
3410
+ OS/toolchain can't produce the artifact, still generate complete config and
3411
+ state the exact missing prerequisite — do not say "I can't."
3412
+ - Data analysis: read the data files (read_document/local_read), compute with
3413
+ run_command, report concrete findings plus output artifact paths.
3414
+ - Document requests (docx/xlsx/pptx/pdf, 문서/엑셀/PPT/피피티/파워포인트): call
3415
+ the matching create_* action immediately with rich, complete content. Never
3416
+ say you cannot create files.
3417
+ - Korean/English: answer in the language the user used; default to Korean
3418
+ if mixed or ambiguous.
3419
+
3420
+ ================================================================================
3421
+ ANTI-PATTERNS (will be flagged by the orchestrator)
3422
+ ================================================================================
3423
+ - Editing without reading first → use read_file + grep before edit_file.
3424
+ - Repeating the same action with the same args → the loop will halt you.
3425
+ - Claiming "done" without a verification tool result in the transcript.
3426
+ - Adding new dependencies without updating package.json / requirements.txt.
3427
+ - Producing fragments when the user asked for a complete file or runnable app.
3428
+ - Stuffing speculative features beyond the user's actual request.
3429
+ - Decorative placeholder URLs / fake data when real data is available.
3039
3430
  """
3040
3431
 
3041
3432
 
3042
- _FILE_CREATE_ACTIONS = {"create_docx", "create_xlsx", "create_pptx", "create_pdf", "write_file", "create_web_project"}
3433
+ _FILE_CREATE_ACTIONS = {"create_docx", "create_xlsx", "create_pptx", "create_pdf", "write_file", "edit_file", "create_web_project"}
3043
3434
 
3044
3435
  # Harness risk level per tool action.
3045
3436
  # low — read-only, no side effects
3046
3437
  # medium — write/create files or knowledge entries
3047
3438
  # high — execute commands, control computer, write to arbitrary FS paths
3048
- _TOOL_RISK: Dict[str, str] = {
3049
- # read-only workspace tools
3050
- "list_dir": "low", "workspace_tree": "low", "read_file": "low",
3051
- "search_files": "low", "inspect_html": "low",
3052
- # read-only local FS
3053
- "local_list": "low", "local_read": "low",
3054
- # read-only git
3055
- "git_status": "low", "git_log": "low", "git_diff": "low", "git_show": "low",
3056
- # read-only knowledge / computer
3057
- "knowledge_search": "low", "knowledge_tree": "low",
3058
- "obsidian_search": "low", "obsidian_tree": "low",
3059
- "computer_screenshot": "low", "computer_status": "low",
3060
- # write workspace
3061
- "write_file": "medium", "create_web_project": "medium",
3062
- "create_docx": "medium", "create_xlsx": "medium",
3063
- "create_pptx": "medium", "create_pdf": "medium",
3064
- # write knowledge
3065
- "knowledge_save": "medium", "obsidian_save": "medium",
3066
- # write local FS (arbitrary path — treated as medium; blocked from system roots below)
3067
- "local_write": "medium",
3068
- # preview
3069
- "preview_url": "medium",
3070
- # execute commands
3071
- "run_command": "high",
3072
- # computer control
3073
- "computer_click": "high", "computer_type": "high", "computer_key": "high",
3074
- "computer_scroll": "high", "computer_drag": "high", "computer_move": "high",
3075
- "computer_open_app": "high", "computer_open_url": "high",
3439
+ class ToolPolicy(TypedDict):
3440
+ risk: str # "read" | "write" | "exec" | "destructive"
3441
+ destructive: bool # True = data loss possible, no auto-undo
3442
+ shell: bool # True = spawns a subprocess
3443
+ network: bool # True = makes external network calls
3444
+ auto_approve: bool# True = agent may call without human confirmation
3445
+ sandbox: str # "workspace" | "home" | "system"
3446
+ rollback: str # "none" | "backup" | "git"
3447
+
3448
+
3449
+ _R = lambda s, sb="workspace", ro="none": ToolPolicy(risk="read", destructive=False, shell=False, network=False, auto_approve=True, sandbox=sb, rollback=ro)
3450
+ _RS = lambda s, sb="workspace", ro="none": ToolPolicy(risk="read", destructive=False, shell=True, network=False, auto_approve=True, sandbox=sb, rollback=ro)
3451
+ _RN = lambda s, sb="system", ro="none": ToolPolicy(risk="read", destructive=False, shell=True, network=True, auto_approve=True, sandbox=sb, rollback=ro)
3452
+ _W = lambda s, sb="workspace", ro="none": ToolPolicy(risk="write", destructive=False, shell=False, network=False, auto_approve=False, sandbox=sb, rollback=ro)
3453
+ _E = lambda s, sb="workspace", ro="none": ToolPolicy(risk="exec", destructive=False, shell=True, network=False, auto_approve=False, sandbox=sb, rollback=ro)
3454
+ _EN = lambda s, sb="workspace", ro="none": ToolPolicy(risk="exec", destructive=False, shell=True, network=True, auto_approve=False, sandbox=sb, rollback=ro)
3455
+ _EC = lambda s, sb="system", ro="none": ToolPolicy(risk="exec", destructive=False, shell=False, network=False, auto_approve=False, sandbox=sb, rollback=ro)
3456
+ _D = lambda s, sb="workspace", ro="none": ToolPolicy(risk="destructive", destructive=True, shell=True, network=False, auto_approve=False, sandbox=sb, rollback=ro)
3457
+
3458
+ TOOL_GOVERNANCE: Dict[str, ToolPolicy] = {
3459
+ # ── read-only / workspace ──────────────────────────────────────────────────
3460
+ "list_dir": _R("list_dir"),
3461
+ "workspace_tree": _R("workspace_tree"),
3462
+ "read_file": _R("read_file"),
3463
+ "search_files": _R("search_files"),
3464
+ "grep": _R("grep"),
3465
+ "inspect_html": _R("inspect_html"),
3466
+ "todo_read": _R("todo_read"),
3467
+ # ── read-only / home FS ───────────────────────────────────────────────────
3468
+ "local_list": _R("local_list", sb="home"),
3469
+ "local_read": _R("local_read", sb="home"),
3470
+ # ── read-only / git (spawns subprocess, read-only) ───────────────────────
3471
+ "git_status": _RS("git_status"),
3472
+ "git_diff": _RS("git_diff"),
3473
+ "git_log": _RS("git_log"),
3474
+ "git_show": _RS("git_show"),
3475
+ # ── read-only / knowledge ─────────────────────────────────────────────────
3476
+ "knowledge_search": _R("knowledge_search", sb="home"),
3477
+ "knowledge_tree": _R("knowledge_tree", sb="home"),
3478
+ "obsidian_search": _R("obsidian_search", sb="home"),
3479
+ "obsidian_tree": _R("obsidian_tree", sb="home"),
3480
+ # ── read-only / system ────────────────────────────────────────────────────
3481
+ "computer_screenshot":_R("computer_screenshot", sb="system"),
3482
+ "computer_status": _R("computer_status", sb="system"),
3483
+ "chrome_status": _R("chrome_status", sb="system"),
3484
+ "computer_use_status":_R("computer_use_status", sb="system"),
3485
+ "network_status": _RN("network_status"),
3486
+ # ── write / workspace ─────────────────────────────────────────────────────
3487
+ "write_file": _W("write_file", ro="git"),
3488
+ "edit_file": _W("edit_file", ro="git"),
3489
+ "create_web_project": _W("create_web_project"),
3490
+ "create_docx": _W("create_docx"),
3491
+ "create_xlsx": _W("create_xlsx"),
3492
+ "create_pptx": _W("create_pptx"),
3493
+ "create_pdf": _W("create_pdf"),
3494
+ "preview_url": _W("preview_url"),
3495
+ "todo_write": _W("todo_write"),
3496
+ # ── write / home FS ───────────────────────────────────────────────────────
3497
+ "knowledge_save": _W("knowledge_save", sb="home"),
3498
+ "obsidian_save": _W("obsidian_save", sb="home"),
3499
+ "local_write": _W("local_write", sb="home"),
3500
+ # ── exec / workspace ──────────────────────────────────────────────────────
3501
+ "run_command": _E("run_command"),
3502
+ "build_project": _E("build_project"),
3503
+ # ── exec / network ────────────────────────────────────────────────────────
3504
+ "deploy_project": _EN("deploy_project"),
3505
+ # ── exec / computer use (system-level input injection) ───────────────────
3506
+ "computer_click": _EC("computer_click"),
3507
+ "computer_type": _EC("computer_type"),
3508
+ "computer_key": _EC("computer_key"),
3509
+ "computer_scroll": _EC("computer_scroll"),
3510
+ "computer_drag": _EC("computer_drag"),
3511
+ "computer_move": _EC("computer_move"),
3512
+ "computer_open_app": _EC("computer_open_app"),
3513
+ "computer_open_url": ToolPolicy(risk="exec", destructive=False, shell=False, network=True, auto_approve=False, sandbox="system", rollback="none"),
3076
3514
  }
3077
3515
 
3078
- # Paths that local_write must never target (system-level protection)
3516
+ _TOOL_GOVERNANCE_DEFAULT = ToolPolicy(
3517
+ risk="write", destructive=False, shell=False, network=False,
3518
+ auto_approve=False, sandbox="workspace", rollback="none",
3519
+ )
3520
+
3521
+ # Paths that local_write / local_list must never target
3079
3522
  _LOCAL_WRITE_BLOCKED_PREFIXES = (
3080
3523
  "/etc/", "/usr/", "/bin/", "/sbin/", "/System/", "/private/etc/",
3081
3524
  "/Library/LaunchDaemons/", "/Library/LaunchAgents/",
3082
3525
  )
3083
3526
 
3527
+ # Backward-compat: map policy risk → legacy low/medium/high string
3528
+ _RISK_LEVEL_MAP = {"read": "low", "write": "medium", "exec": "high", "destructive": "high"}
3084
3529
 
3085
- def _agent_risk(action_name: str, args: dict) -> str:
3086
- """Return risk level for an action, upgrading local_write to 'high' for system paths."""
3087
- risk = _TOOL_RISK.get(action_name, "medium")
3530
+
3531
+ def _agent_policy(action_name: str, args: dict) -> ToolPolicy:
3532
+ """Return the full governance policy for an action.
3533
+
3534
+ Upgrades local_write to destructive risk when targeting system paths.
3535
+ """
3536
+ policy = TOOL_GOVERNANCE.get(action_name, _TOOL_GOVERNANCE_DEFAULT)
3088
3537
  if action_name == "local_write":
3089
3538
  path = str(args.get("path", ""))
3090
3539
  if any(path.startswith(p) for p in _LOCAL_WRITE_BLOCKED_PREFIXES):
3091
- risk = "high"
3092
- return risk
3540
+ policy = ToolPolicy(
3541
+ risk="destructive", destructive=True, shell=False, network=False,
3542
+ auto_approve=False, sandbox="system", rollback="none",
3543
+ )
3544
+ return policy
3545
+
3546
+
3547
+ def _agent_risk(action_name: str, args: dict) -> str:
3548
+ """Return legacy low/medium/high risk string (kept for transcript backward-compat)."""
3549
+ return _RISK_LEVEL_MAP.get(_agent_policy(action_name, args)["risk"], "medium")
3550
+
3551
+
3552
+ # ── Tool Permission Layer ─────────────────────────────────────────────────────
3553
+ # A compact, public-facing view of each tool's authorization profile, derived
3554
+ # from TOOL_GOVERNANCE. Designed for client UIs / approval dialogs that don't
3555
+ # need the full 7-dimensional governance object.
3556
+ #
3557
+ # Example:
3558
+ # { "tool": "shell", "risk": "high", "requires_approval": true, "network": false }
3559
+
3560
+ class ToolPermission(TypedDict):
3561
+ tool: str
3562
+ risk: str # "low" | "medium" | "high"
3563
+ requires_approval: bool # inverse of governance.auto_approve
3564
+ network: bool # tool makes external network calls
3565
+
3566
+
3567
+ def get_tool_permission(name: str, args: Optional[dict] = None) -> ToolPermission:
3568
+ """Return the simplified permission view for a tool name.
3569
+
3570
+ `args` lets path-sensitive tools (e.g. local_write to /etc) escalate risk;
3571
+ omit it for static catalog views.
3572
+ """
3573
+ policy = _agent_policy(name, args or {})
3574
+ return ToolPermission(
3575
+ tool=name,
3576
+ risk=_RISK_LEVEL_MAP.get(policy["risk"], "medium"),
3577
+ requires_approval=not policy["auto_approve"],
3578
+ network=policy["network"],
3579
+ )
3580
+
3581
+
3582
+ def list_tool_permissions() -> list:
3583
+ """Return permission views for every governed tool, sorted by tool name."""
3584
+ return [get_tool_permission(name) for name in sorted(TOOL_GOVERNANCE.keys())]
3093
3585
 
3094
3586
 
3095
3587
  def _collect_created_files(transcript: list) -> list:
@@ -3138,141 +3630,420 @@ def _extract_agent_action(raw: str) -> Dict:
3138
3630
  return action
3139
3631
 
3140
3632
 
3141
- @app.post("/agent")
3142
- async def agent(req: AgentRequest, request: Request):
3143
- """Natural-language local agent loop for Telegram and future clients."""
3144
- current_user = require_user(request)
3145
- if not router.current_model_id:
3146
- raise HTTPException(status_code=400, detail="No model loaded. Call /models/load first.")
3633
+ # ── Agent State Machine — Phase Functions ─────────────────────────────────────
3147
3634
 
3148
- ensure_agent_root()
3149
- transcript = []
3150
- max_steps = max(1, min(req.max_steps, 10))
3151
- lang = detect_language(req.message)
3152
- lang_hint = _LANG_HINT[lang]
3635
+ async def _phase_plan(
3636
+ ctx: AgentRunContext, req: AgentRequest, router, lang_hint: str, current_user: str,
3637
+ ) -> None:
3638
+ """PLAN: Planner role produces a structured plan JSON."""
3639
+ context = (
3640
+ f"{PLANNER_PROMPT}\n\n"
3641
+ f"[LANGUAGE HINT: {lang_hint}]\n"
3642
+ f"Workspace root: {AGENT_ROOT}\n\n"
3643
+ f"User request: {req.message}"
3644
+ )
3645
+ raw = await router.generate(
3646
+ message="Produce a JSON execution plan for this request.",
3647
+ context=context, max_tokens=1024, temperature=0.1,
3648
+ )
3649
+ try:
3650
+ plan = _extract_agent_action(str(raw))
3651
+ except ValueError:
3652
+ plan = {
3653
+ "action": "plan", "state": "PLAN",
3654
+ "goal": req.message, "steps": [],
3655
+ "requires_approval": False, "rollback_strategy": "none", "estimated_steps": 1,
3656
+ }
3657
+ ctx.plan = plan
3658
+ ctx.transcript.append({
3659
+ "state": AgentState.PLANNING.value,
3660
+ "goal": plan.get("goal", req.message),
3661
+ "steps": plan.get("steps", []),
3662
+ "requires_approval": plan.get("requires_approval", False),
3663
+ "rollback_strategy": plan.get("rollback_strategy", "none"),
3664
+ "estimated_steps": plan.get("estimated_steps", 1),
3665
+ })
3666
+ ctx.state = AgentState.WAITING_APPROVAL
3667
+
3668
+
3669
+ def _phase_approval(ctx: AgentRunContext, current_user: str) -> None:
3670
+ """APPROVAL: Check governance, log decision, auto-approve (future: UI prompt)."""
3671
+ auto_approve_tools = {name for name, p in TOOL_GOVERNANCE.items() if p["auto_approve"]}
3672
+ steps = ctx.plan.get("steps", [])
3673
+ non_auto = [s.get("action") for s in steps if s.get("action") not in auto_approve_tools]
3674
+ requires = ctx.plan.get("requires_approval", False) or bool(non_auto)
3675
+
3676
+ ctx.transcript.append({
3677
+ "state": AgentState.WAITING_APPROVAL.value,
3678
+ "requires_approval": requires,
3679
+ "non_auto_approve_steps": non_auto,
3680
+ "decision": "auto_approved",
3681
+ })
3682
+ append_audit_event(
3683
+ "agent_approval", user_email=current_user,
3684
+ requires_approval=requires, non_auto_steps=non_auto, decision="auto_approved",
3685
+ )
3686
+ ctx.state = AgentState.EXECUTING
3687
+
3688
+
3689
+ async def _phase_execute(
3690
+ ctx: AgentRunContext, req: AgentRequest, router, lang_hint: str,
3691
+ current_user: str, max_steps: int,
3692
+ ) -> None:
3693
+ """EXECUTE: Executor role calls tools one at a time until final or budget exhausted."""
3694
+ exec_count = sum(1 for s in ctx.transcript if s.get("state") == AgentState.EXECUTING.value)
3695
+ budget = max(1, max_steps - exec_count)
3696
+
3697
+ for _ in range(budget):
3698
+ corrections_hint = (
3699
+ "\n\nCritic corrections from previous attempt:\n"
3700
+ + "\n".join(f"- {c}" for c in ctx.corrections)
3701
+ ) if ctx.corrections else ""
3153
3702
 
3154
- for step in range(max_steps):
3155
- recent_context = build_recent_chat_context(conversation_id=req.conversation_id)
3156
3703
  context = (
3157
- f"{AGENT_SYSTEM_PROMPT}\n\n"
3158
- f"[LANGUAGE: {lang_hint}]\n\n"
3704
+ f"{EXECUTOR_PROMPT}\n\n"
3705
+ f"[LANGUAGE HINT: {lang_hint}]\n"
3159
3706
  f"Workspace root: {AGENT_ROOT}\n\n"
3160
- f"Recent conversation:\n{recent_context or '(none)'}\n\n"
3161
- f"User request:\n{req.message}\n\n"
3162
- f"Previous tool results:\n{json.dumps(transcript, ensure_ascii=False, indent=2)}"
3707
+ f"PLAN:\n{json.dumps(ctx.plan, ensure_ascii=False)}\n\n"
3708
+ f"Recent conversation:\n{build_recent_chat_context(conversation_id=req.conversation_id) or '(none)'}\n\n"
3709
+ f"User request: {req.message}{corrections_hint}\n\n"
3710
+ f"Execution transcript:\n{json.dumps(ctx.transcript, ensure_ascii=False, indent=2)}"
3163
3711
  )
3164
3712
  raw = await router.generate(
3165
- message="Choose the next agent action.",
3166
- context=context,
3167
- max_tokens=4096,
3168
- temperature=req.temperature,
3713
+ message="Execute the next step.",
3714
+ context=context, max_tokens=4096, temperature=req.temperature,
3169
3715
  )
3170
-
3171
3716
  try:
3172
3717
  action = _extract_agent_action(str(raw))
3173
3718
  except ValueError as exc:
3174
- transcript.append({"step": step + 1, "action": "parse_error", "raw": str(raw), "error": str(exc)})
3175
- message = "작업 계획을 안정적으로 해석하지 못해 자동 실행을 중단했습니다. 요청을 더 짧고 구체적으로 다시 시도해 주세요."
3176
- save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
3177
- save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
3178
- created_files = _collect_created_files(transcript)
3179
- return {
3180
- "status": "ok",
3181
- "response": message,
3182
- "workspace": str(AGENT_ROOT),
3183
- "steps": transcript,
3184
- "created_files": created_files,
3185
- }
3719
+ ctx.transcript.append({
3720
+ "state": AgentState.EXECUTING.value, "action": "parse_error",
3721
+ "raw": str(raw)[:400], "error": str(exc),
3722
+ })
3723
+ break
3724
+
3725
+ name = action.get("action")
3726
+ thoughts = str(action.get("thoughts") or "")[:600]
3727
+ args = action.get("args") or {}
3186
3728
 
3187
- name = action.get("action")
3188
3729
  if name == "final":
3189
- message = action.get("message", "작업을 완료했습니다.")
3190
- save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
3191
- save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
3192
- created_files = _collect_created_files(transcript)
3193
- return {"status": "ok", "response": message, "workspace": str(AGENT_ROOT), "steps": transcript, "created_files": created_files}
3194
-
3195
- # Prevent repeated file/project creation loops with identical action+args.
3196
- last_step = transcript[-1] if transcript else None
3197
- current_args = action.get("args") or {}
3730
+ ctx.final_message = action.get("message", "작업을 완료했습니다.")
3731
+ ctx.transcript.append({
3732
+ "state": AgentState.EXECUTING.value, "action": "final", "thoughts": thoughts,
3733
+ })
3734
+ ctx.state = AgentState.VERIFYING
3735
+ return
3736
+
3737
+ # Loop guard
3738
+ exec_steps = [s for s in ctx.transcript if s.get("state") == AgentState.EXECUTING.value]
3739
+ last = exec_steps[-1] if exec_steps else None
3198
3740
  if (
3199
- name in _FILE_CREATE_ACTIONS
3200
- and last_step
3201
- and last_step.get("action") == name
3202
- and (last_step.get("args") or {}) == current_args
3203
- and "result" in last_step
3741
+ name in _FILE_CREATE_ACTIONS and last
3742
+ and last.get("action") == name
3743
+ and (last.get("args") or {}) == args
3744
+ and "result" in last
3204
3745
  ):
3205
- message = "요청한 파일 생성을 이미 완료해서 반복 실행을 중단했습니다."
3206
- save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
3207
- save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
3208
- created_files = _collect_created_files(transcript)
3209
- return {"status": "ok", "response": message, "workspace": str(AGENT_ROOT), "steps": transcript, "created_files": created_files}
3746
+ ctx.transcript.append({
3747
+ "state": AgentState.EXECUTING.value, "action": name,
3748
+ "error": "LOOP_DETECTED: identical action+args repeated — halted.",
3749
+ })
3750
+ break
3210
3751
 
3211
3752
  if name == "clear_history":
3212
- result = clear_history(current_args.get("keep_last", 0))
3213
- append_audit_event(
3214
- "history_delete",
3215
- user_email=current_user,
3216
- source=req.source or "agent",
3217
- keep_last=current_args.get("keep_last", 0),
3218
- removed=result.get("removed", 0),
3219
- kept=result.get("kept", 0),
3220
- )
3221
- transcript.append({"step": step + 1, "action": name, "args": current_args, "result": result})
3753
+ result = clear_history(args.get("keep_last", 0))
3754
+ ctx.transcript.append({
3755
+ "state": AgentState.EXECUTING.value, "action": name,
3756
+ "thoughts": thoughts, "args": args, "result": result,
3757
+ })
3222
3758
  continue
3223
3759
 
3224
- risk = _agent_risk(name, current_args)
3760
+ policy = _agent_policy(name, args)
3761
+ risk = _RISK_LEVEL_MAP.get(policy["risk"], "medium")
3225
3762
 
3226
- # Block system-path local_write even if the LLM tries it
3227
- if name == "local_write":
3228
- path = str(current_args.get("path", ""))
3229
- if any(path.startswith(p) for p in _LOCAL_WRITE_BLOCKED_PREFIXES):
3230
- transcript.append({
3231
- "step": step + 1, "action": name, "args": current_args,
3232
- "risk": "high", "error": f"BLOCKED: writing to system path is not allowed: {path}",
3233
- })
3234
- append_audit_event(
3235
- "agent_blocked", user_email=current_user, source=req.source or "agent",
3236
- action=name, path=path, reason="system_path",
3237
- )
3238
- continue
3763
+ if policy["risk"] == "destructive":
3764
+ ctx.transcript.append({
3765
+ "state": AgentState.EXECUTING.value, "action": name,
3766
+ "thoughts": thoughts, "args": args, "risk": risk,
3767
+ "governance": dict(policy),
3768
+ "error": f"BLOCKED: destructive action '{name}' not permitted in agent mode.",
3769
+ })
3770
+ append_audit_event(
3771
+ "agent_blocked", user_email=current_user, source=req.source or "agent",
3772
+ action=name, reason="destructive", governance=dict(policy),
3773
+ )
3774
+ continue
3239
3775
 
3240
- # Audit medium/high actions before execution
3241
- if risk in ("medium", "high"):
3776
+ if not policy["auto_approve"]:
3242
3777
  append_audit_event(
3243
3778
  "agent_exec", user_email=current_user, source=req.source or "agent",
3244
- step=step + 1, action=name, risk=risk,
3245
- args={k: v for k, v in (current_args or {}).items() if k != "content"},
3779
+ state=AgentState.EXECUTING.value, action=name, risk=risk,
3780
+ shell=policy["shell"], network=policy["network"],
3781
+ destructive=policy["destructive"], sandbox=policy["sandbox"],
3782
+ rollback=policy["rollback"],
3783
+ args={k: v for k, v in args.items() if k != "content"},
3246
3784
  )
3247
3785
 
3248
3786
  try:
3249
- result = execute_tool(name, current_args)
3250
- transcript.append({"step": step + 1, "action": name, "args": current_args, "risk": risk, "result": result})
3787
+ result = execute_tool(name, args)
3788
+ ctx.transcript.append({
3789
+ "state": AgentState.EXECUTING.value, "action": name,
3790
+ "thoughts": thoughts, "args": args,
3791
+ "risk": risk, "governance": dict(policy), "result": result,
3792
+ })
3251
3793
  except (ToolError, KeyError, TypeError) as exc:
3252
- transcript.append({"step": step + 1, "action": name, "args": current_args, "risk": risk, "error": str(exc)})
3794
+ ctx.transcript.append({
3795
+ "state": AgentState.EXECUTING.value, "action": name,
3796
+ "thoughts": thoughts, "args": args,
3797
+ "risk": risk, "governance": dict(policy), "error": str(exc),
3798
+ })
3799
+
3800
+ ctx.state = AgentState.VERIFYING
3801
+
3253
3802
 
3254
- summary_context = (
3255
- f"{AGENT_SYSTEM_PROMPT}\n\n"
3256
- f"Recent conversation:\n{build_recent_chat_context(conversation_id=req.conversation_id) or '(none)'}\n\n"
3257
- f"User request:\n{req.message}\n\n"
3258
- f"Tool transcript:\n{json.dumps(transcript, ensure_ascii=False, indent=2)}"
3803
+ async def _phase_verify(
3804
+ ctx: AgentRunContext, req: AgentRequest, router, lang_hint: str, current_user: str,
3805
+ max_retry: int = 3,
3806
+ ) -> None:
3807
+ """VERIFYING: Critic role evaluates transcript → DONE / EXECUTING (retry) / ROLLBACK / FAILED."""
3808
+ context = (
3809
+ f"{CRITIC_PROMPT}\n\n"
3810
+ f"[LANGUAGE HINT: {lang_hint}]\n\n"
3811
+ f"Original request: {req.message}\n"
3812
+ f"Plan goal: {ctx.plan.get('goal', req.message)}\n\n"
3813
+ f"Full transcript:\n{json.dumps(ctx.transcript, ensure_ascii=False, indent=2)}"
3259
3814
  )
3260
- summary = await router.generate(
3261
- message='Return only {"action":"final","message":"..."} summarizing the current result in Korean.',
3262
- context=summary_context,
3263
- max_tokens=1024,
3264
- temperature=0.1,
3815
+ raw = await router.generate(
3816
+ message="Review the execution transcript and return your verdict JSON.",
3817
+ context=context, max_tokens=512, temperature=0.1,
3265
3818
  )
3266
3819
  try:
3267
- final_action = _extract_agent_action(str(summary))
3268
- message = final_action.get("message", str(summary))
3820
+ verdict = _extract_agent_action(str(raw))
3269
3821
  except ValueError:
3270
- message = str(summary)
3822
+ verdict = {"action": "verdict", "verdict": "PASS", "next_state": "DONE",
3823
+ "reason": "Critic parse failed — assuming pass.", "corrections": [], "confidence": 0.7}
3824
+
3825
+ ctx.corrections = verdict.get("corrections", [])
3826
+ # Normalize legacy verdict next_state strings to current AgentState names
3827
+ raw_next = verdict.get("next_state", "DONE")
3828
+ next_s = {"COMPLETE": "DONE", "RETRY": "EXECUTING"}.get(raw_next, raw_next)
3829
+
3830
+ ctx.transcript.append({
3831
+ "state": AgentState.VERIFYING.value,
3832
+ "verdict": verdict.get("verdict", "PASS"),
3833
+ "reason": verdict.get("reason", ""),
3834
+ "corrections": ctx.corrections,
3835
+ "confidence": verdict.get("confidence", 0.9),
3836
+ "next_state": next_s,
3837
+ })
3838
+
3839
+ if verdict.get("verdict") == "PASS" or next_s == "DONE":
3840
+ if not ctx.final_message:
3841
+ ctx.final_message = verdict.get("reason", "작업이 완료되었습니다.")
3842
+ ctx.state = AgentState.DONE
3843
+ elif next_s == "ROLLBACK":
3844
+ ctx.state = AgentState.ROLLBACK
3845
+ elif next_s == "EXECUTING":
3846
+ if ctx.retry_count >= max_retry:
3847
+ ctx.final_message = (
3848
+ f"최대 재시도({max_retry}회) 초과로 작업을 종료했습니다. "
3849
+ f"마지막 비판: {verdict.get('reason', '(없음)')}"
3850
+ )
3851
+ ctx.state = AgentState.FAILED
3852
+ else:
3853
+ ctx.retry_count += 1
3854
+ ctx.transcript.append({
3855
+ "state": AgentState.EXECUTING.value,
3856
+ "retry_attempt": ctx.retry_count,
3857
+ "corrections": ctx.corrections,
3858
+ })
3859
+ ctx.state = AgentState.EXECUTING
3860
+ else:
3861
+ ctx.final_message = verdict.get("reason", "검증자가 인식되지 않은 다음 상태를 반환했습니다.")
3862
+ ctx.state = AgentState.FAILED
3863
+
3864
+
3865
+ def _phase_rollback(ctx: AgentRunContext, current_user: str) -> None:
3866
+ """ROLLBACK: attempt git checkout for each edited file, then COMPLETE."""
3867
+ import subprocess as _sp
3868
+ rolled: list = []
3869
+ for step in ctx.transcript:
3870
+ if step.get("state") != AgentState.EXECUTING.value:
3871
+ continue
3872
+ gov = step.get("governance", {})
3873
+ if gov.get("rollback") != "git":
3874
+ continue
3875
+ result = step.get("result", {})
3876
+ if not (isinstance(result, dict) and result.get("success")):
3877
+ continue
3878
+ path = result.get("path") or (step.get("args") or {}).get("path", "")
3879
+ if not path:
3880
+ continue
3881
+ try:
3882
+ r = _sp.run(
3883
+ ["git", "checkout", "--", path], cwd=str(AGENT_ROOT),
3884
+ capture_output=True, text=True, timeout=10,
3885
+ )
3886
+ rolled.append({"path": path, "ok": r.returncode == 0, "stderr": r.stderr[:200]})
3887
+ except Exception as exc:
3888
+ rolled.append({"path": path, "ok": False, "error": str(exc)})
3889
+
3890
+ ctx.transcript.append({"state": AgentState.ROLLBACK.value, "rolled_back": rolled})
3891
+ recovered = [r["path"] for r in rolled if r.get("ok")]
3892
+ ctx.final_message = (
3893
+ f"실행 실패로 롤백했습니다. 복구 파일: {recovered}"
3894
+ if recovered
3895
+ else "롤백을 시도했으나 복구할 파일이 없거나 git이 초기화되지 않았습니다."
3896
+ )
3897
+ append_audit_event("agent_rollback", user_email=current_user, rolled_back=rolled)
3898
+ # Rollback is a recovery from a failed verification — terminal state is FAILED
3899
+ ctx.state = AgentState.FAILED
3900
+
3901
+
3902
+ async def _phase_memory_update(
3903
+ ctx: AgentRunContext, req: AgentRequest, router, current_user: str,
3904
+ ) -> None:
3905
+ """Background: Memory Updater role extracts learnings after COMPLETE."""
3906
+ context = (
3907
+ f"{MEMORY_UPDATER_PROMPT}\n\n"
3908
+ f"Completed task: {req.message}\n\n"
3909
+ f"Last 5 transcript steps:\n{json.dumps(ctx.transcript[-5:], ensure_ascii=False)}"
3910
+ )
3911
+ try:
3912
+ raw = await router.generate(
3913
+ message="Extract learnings from this completed task.",
3914
+ context=context, max_tokens=256, temperature=0.1,
3915
+ )
3916
+ mem = _extract_agent_action(str(raw))
3917
+ if mem.get("save_to_knowledge") and mem.get("learnings"):
3918
+ from tools import knowledge_save
3919
+ knowledge_save(
3920
+ "\n".join(mem["learnings"]),
3921
+ folder="30_Projects",
3922
+ title=f"Agent: {req.message[:60]}",
3923
+ )
3924
+ except Exception:
3925
+ pass
3926
+
3927
+
3928
+ # ── Eval harness ──────────────────────────────────────────────────────────────
3929
+
3930
+ @app.post("/agent/eval")
3931
+ async def agent_eval(req: AgentEvalRequest, request: Request):
3932
+ """Run a skill's eval cases from schema.json and return pass/fail per case."""
3933
+ require_user(request)
3934
+ skill_dir = Path(__file__).resolve().parent / "skills" / req.skill
3935
+ schema_path = skill_dir / "schema.json"
3936
+ if not schema_path.exists():
3937
+ raise HTTPException(404, detail=f"Skill '{req.skill}' not found or missing schema.json")
3938
+
3939
+ schema = json.loads(schema_path.read_text(encoding="utf-8"))
3940
+ eval_cases = schema.get("evals", [])
3941
+ if req.case_id:
3942
+ eval_cases = [c for c in eval_cases if c.get("id") == req.case_id]
3943
+ if not eval_cases:
3944
+ return {"skill": req.skill, "total": 0, "passed": 0, "failed": 0, "results": [],
3945
+ "message": "No eval cases defined in schema.json"}
3946
+
3947
+ action_name = schema.get("action", req.skill)
3948
+ results = []
3949
+ for case in eval_cases:
3950
+ case_id = case.get("id", "?")
3951
+ try:
3952
+ result = execute_tool(action_name, case.get("input", {}))
3953
+ criteria = case.get("pass_criteria", "")
3954
+ if "success == true" in criteria:
3955
+ passed = result.get("success") is True
3956
+ elif "success == false" in criteria:
3957
+ passed = result.get("success") is False
3958
+ else:
3959
+ passed = True # manual review required
3960
+ results.append({"id": case_id, "description": case.get("description", ""),
3961
+ "passed": passed, "result": result, "pass_criteria": criteria})
3962
+ except Exception as exc:
3963
+ results.append({"id": case_id, "description": case.get("description", ""),
3964
+ "passed": False, "error": str(exc),
3965
+ "pass_criteria": case.get("pass_criteria", "")})
3966
+
3967
+ n_passed = sum(1 for r in results if r.get("passed") is True)
3968
+ return {
3969
+ "skill": req.skill, "action": action_name,
3970
+ "total": len(results), "passed": n_passed, "failed": len(results) - n_passed,
3971
+ "results": results,
3972
+ }
3973
+
3974
+
3975
+ @app.post("/agent")
3976
+ async def agent(req: AgentRequest, request: Request):
3977
+ """Natural-language local agent.
3978
+
3979
+ State machine:
3980
+ IDLE → PLANNING → WAITING_APPROVAL → EXECUTING → VERIFYING
3981
+ ↓ ↓
3982
+ FAILED DONE | EXECUTING(retry) | ROLLBACK
3983
+
3984
+ FAILED
3985
+ """
3986
+ current_user = require_user(request)
3987
+ enforce_rate_limit(current_user, "agent")
3988
+ if not router.current_model_id:
3989
+ raise HTTPException(status_code=400, detail="No model loaded. Call /models/load first.")
3990
+
3991
+ ensure_agent_root()
3992
+ lang = detect_language(req.message)
3993
+ lang_hint = _LANG_HINT[lang]
3994
+ max_steps = max(1, min(req.max_steps, 50))
3995
+ max_retry = 3
3996
+
3997
+ ctx = AgentRunContext()
3998
+
3999
+ while ctx.state not in AGENT_TERMINAL_STATES:
4000
+ ctx.state_history.append(ctx.state.value)
4001
+ # Hard guard against infinite state loops
4002
+ if len(ctx.state_history) > 200:
4003
+ ctx.final_message = "에이전트 상태 머신이 최대 반복(200)에 도달해 중단했습니다."
4004
+ ctx.state = AgentState.FAILED
4005
+ break
4006
+
4007
+ if ctx.state == AgentState.IDLE:
4008
+ ctx.state = AgentState.PLANNING
4009
+
4010
+ elif ctx.state == AgentState.PLANNING:
4011
+ await _phase_plan(ctx, req, router, lang_hint, current_user)
4012
+
4013
+ elif ctx.state == AgentState.WAITING_APPROVAL:
4014
+ _phase_approval(ctx, current_user)
4015
+
4016
+ elif ctx.state == AgentState.EXECUTING:
4017
+ await _phase_execute(ctx, req, router, lang_hint, current_user, max_steps)
4018
+
4019
+ elif ctx.state == AgentState.VERIFYING:
4020
+ await _phase_verify(ctx, req, router, lang_hint, current_user, max_retry)
3271
4021
 
4022
+ elif ctx.state == AgentState.ROLLBACK:
4023
+ _phase_rollback(ctx, current_user)
4024
+
4025
+ else:
4026
+ ctx.state = AgentState.FAILED
4027
+
4028
+ # Record terminal state in history for clients
4029
+ ctx.state_history.append(ctx.state.value)
4030
+
4031
+ # Fire-and-forget memory update — does not block the response
4032
+ asyncio.create_task(_phase_memory_update(ctx, req, router, current_user))
4033
+
4034
+ message = ctx.final_message or "작업을 완료했습니다."
3272
4035
  save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
3273
4036
  save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
3274
- created_files = _collect_created_files(transcript)
3275
- return {"status": "ok", "response": message, "workspace": str(AGENT_ROOT), "steps": transcript, "created_files": created_files}
4037
+ created_files = _collect_created_files(ctx.transcript)
4038
+ return {
4039
+ "status": "ok" if ctx.state == AgentState.DONE else "failed",
4040
+ "response": message,
4041
+ "workspace": str(AGENT_ROOT),
4042
+ "steps": ctx.transcript,
4043
+ "state_history": ctx.state_history,
4044
+ "final_state": ctx.state.value,
4045
+ "created_files": created_files,
4046
+ }
3276
4047
 
3277
4048
 
3278
4049
  # ── Direct Tool API ───────────────────────────────────────────────────────────
@@ -3297,9 +4068,13 @@ async def tools_workspace_tree(req: ToolWorkspaceTreeRequest, request: Request):
3297
4068
 
3298
4069
 
3299
4070
  @app.post("/tools/read_file")
3300
- async def tools_read_file(req: ToolPathRequest, request: Request):
4071
+ async def tools_read_file(req: ToolReadFileRequest, request: Request):
3301
4072
  require_user(request)
3302
- return _tool_response(read_file, req.path)
4073
+ try:
4074
+ return {"status": "ok", "workspace": str(AGENT_ROOT),
4075
+ "result": read_file(req.path, offset=req.offset, limit=req.limit, line_numbers=req.line_numbers)}
4076
+ except ToolError as exc:
4077
+ raise HTTPException(status_code=400, detail=str(exc))
3303
4078
 
3304
4079
 
3305
4080
  @app.post("/tools/write_file")
@@ -3308,12 +4083,51 @@ async def tools_write_file(req: ToolWriteFileRequest, request: Request):
3308
4083
  return _tool_response(write_file, req.path, req.content)
3309
4084
 
3310
4085
 
4086
+ @app.post("/tools/edit_file")
4087
+ async def tools_edit_file(req: ToolEditFileRequest, request: Request):
4088
+ require_user(request)
4089
+ try:
4090
+ return {"status": "ok", "workspace": str(AGENT_ROOT),
4091
+ "result": edit_file(req.path, req.old_string, req.new_string, replace_all=req.replace_all)}
4092
+ except ToolError as exc:
4093
+ raise HTTPException(status_code=400, detail=str(exc))
4094
+
4095
+
3311
4096
  @app.post("/tools/search_files")
3312
4097
  async def tools_search_files(req: ToolSearchFilesRequest, request: Request):
3313
4098
  require_user(request)
3314
4099
  return _tool_response(search_files, req.query, req.path, req.max_results)
3315
4100
 
3316
4101
 
4102
+ @app.post("/tools/grep")
4103
+ async def tools_grep(req: ToolGrepRequest, request: Request):
4104
+ require_user(request)
4105
+ try:
4106
+ return {"status": "ok", "workspace": str(AGENT_ROOT),
4107
+ "result": grep(
4108
+ req.pattern,
4109
+ path=req.path,
4110
+ glob=req.glob,
4111
+ max_results=req.max_results,
4112
+ case_insensitive=req.case_insensitive,
4113
+ context_lines=req.context_lines,
4114
+ )}
4115
+ except ToolError as exc:
4116
+ raise HTTPException(status_code=400, detail=str(exc))
4117
+
4118
+
4119
+ @app.post("/tools/todo_read")
4120
+ async def tools_todo_read(request: Request):
4121
+ require_user(request)
4122
+ return _tool_response(todo_read)
4123
+
4124
+
4125
+ @app.post("/tools/todo_write")
4126
+ async def tools_todo_write(req: ToolTodoWriteRequest, request: Request):
4127
+ require_user(request)
4128
+ return _tool_response(todo_write, req.todos)
4129
+
4130
+
3317
4131
  @app.post("/tools/clear_history")
3318
4132
  async def tools_clear_history(req: ToolClearHistoryRequest, request: Request):
3319
4133
  current_user = require_user(request)
@@ -3378,21 +4192,28 @@ async def tools_pdf_pages(path: str, request: Request):
3378
4192
  target = Path(path).expanduser().resolve()
3379
4193
  if not target.exists() or not target.is_file():
3380
4194
  raise HTTPException(status_code=404, detail="File not found")
4195
+ import fitz # PyMuPDF
4196
+ doc = None
3381
4197
  try:
3382
- import fitz # PyMuPDF
3383
4198
  doc = fitz.open(str(target))
4199
+ total = len(doc)
3384
4200
  pages = []
3385
4201
  for i, page in enumerate(doc):
3386
4202
  if i >= 20: # 최대 20페이지
3387
4203
  break
3388
- mat = fitz.Matrix(1.5, 1.5) # 1.5x 해상도
4204
+ mat = fitz.Matrix(1.5, 1.5)
3389
4205
  pix = page.get_pixmap(matrix=mat)
3390
4206
  b64 = base64.b64encode(pix.tobytes("png")).decode()
3391
4207
  pages.append({"page": i + 1, "b64": b64})
3392
- doc.close()
3393
- return {"total": len(doc), "pages": pages}
4208
+ return {"total": total, "pages": pages}
3394
4209
  except Exception as e:
3395
4210
  raise HTTPException(status_code=500, detail=f"PDF 렌더링 실패: {e}")
4211
+ finally:
4212
+ if doc is not None:
4213
+ try:
4214
+ doc.close()
4215
+ except Exception as e:
4216
+ logging.warning("fitz doc close failed: %s", e)
3396
4217
 
3397
4218
 
3398
4219
  @app.get("/tools/download")
@@ -3416,6 +4237,7 @@ async def tools_download(path: str, request: Request):
3416
4237
  @app.post("/upload/document")
3417
4238
  async def upload_document(request: Request, file: UploadFile = File(...)):
3418
4239
  current_user = require_user(request)
4240
+ enforce_rate_limit(current_user, "upload")
3419
4241
  """Upload a document and extract text (PDF, DOCX, XLSX, PPTX, TXT, MD, CSV)."""
3420
4242
  suffix = Path(file.filename or "upload").suffix.lower()
3421
4243
  allowed = {".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".csv"}
@@ -3424,6 +4246,9 @@ async def upload_document(request: Request, file: UploadFile = File(...)):
3424
4246
  contents = await file.read()
3425
4247
  if len(contents) > 10 * 1024 * 1024:
3426
4248
  raise HTTPException(status_code=400, detail="파일이 너무 큽니다. 최대 10MB.")
4249
+ # MIME sniff — verify the bytes actually match the claimed extension (cheap header check)
4250
+ if not _bytes_match_extension(contents, suffix):
4251
+ raise HTTPException(status_code=400, detail=f"파일 내용이 확장자({suffix})와 일치하지 않습니다.")
3427
4252
  with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
3428
4253
  tmp.write(contents)
3429
4254
  tmp_path = tmp.name
@@ -3879,61 +4704,96 @@ async def tools_deploy_project(req: ToolScriptRequest, request: Request):
3879
4704
  return _tool_response(deploy_project, req.cwd, req.script)
3880
4705
 
3881
4706
 
4707
+ _MCP_TOOL_DESCRIPTIONS: Dict[str, str] = {
4708
+ "list_dir": "List files in the agent workspace.",
4709
+ "workspace_tree": "Return a recursive workspace tree.",
4710
+ "read_file": "Read a UTF-8 file from the workspace with optional line numbers and offset/limit slicing.",
4711
+ "write_file": "Write a UTF-8 file inside the workspace (new files / full rewrites).",
4712
+ "edit_file": "Precise diff-style edit: replace exact old_string with new_string. Requires unique match unless replace_all=true.",
4713
+ "search_files": "Substring search in text files (legacy).",
4714
+ "grep": "Regex search across the workspace with line numbers and optional context.",
4715
+ "todo_read": "Read the agent's persistent TODO list for the current workspace.",
4716
+ "todo_write": "Replace the agent's TODO list (id, content, status: pending/in_progress/completed).",
4717
+ "clear_history": "Clear chat history to reduce context and speed up responses.",
4718
+ "inspect_html": "Inspect local HTML structure and assets.",
4719
+ "preview_url": "Return a server URL for a workspace file.",
4720
+ "create_docx": "Create a Word DOCX document in the agent workspace.",
4721
+ "create_xlsx": "Create an XLSX spreadsheet in the agent workspace.",
4722
+ "create_pptx": "Create a PPTX presentation deck in the agent workspace.",
4723
+ "create_pdf": "Create a PDF document in the agent workspace.",
4724
+ "local_list": "List any local folder (requires user permission via UI).",
4725
+ "local_read": "Read any local file (requires user permission via UI).",
4726
+ "local_write": "Write any local file (requires user permission via UI).",
4727
+ "read_document": "Extract text from PDF, DOCX, XLSX, PPTX, TXT, MD, CSV files.",
4728
+ "computer_screenshot": "Capture the current Mac screen as base64 PNG.",
4729
+ "computer_open_app": "Open or focus a Mac app, e.g. Google Chrome.",
4730
+ "computer_open_url": "Open a URL in a Mac app, e.g. Google Chrome.",
4731
+ "computer_click": "Click at screen coordinates (x, y).",
4732
+ "computer_type": "Type text at the current focus position.",
4733
+ "computer_key": "Press a keyboard key or shortcut (e.g. 'command+c').",
4734
+ "computer_scroll": "Scroll at screen coordinates.",
4735
+ "computer_move": "Move the mouse to screen coordinates.",
4736
+ "computer_drag": "Drag from (x1,y1) to (x2,y2).",
4737
+ "computer_status": "Check if Mac Computer Use (pyautogui) is available.",
4738
+ "chrome_status": "Report Chrome desktop bridge availability.",
4739
+ "computer_use_status": "Report Mac Computer Use bridge availability.",
4740
+ "knowledge_save": "Save a note into the local knowledge garden.",
4741
+ "knowledge_search": "Search the local knowledge garden.",
4742
+ "knowledge_tree": "List local knowledge garden markdown files.",
4743
+ "knowledge_graph_ingest":"Ingest a message, AI answer, or connector event into the SQLite knowledge graph.",
4744
+ "knowledge_graph_search":"Search graph nodes, summaries, and JSON metadata.",
4745
+ "knowledge_graph_graph": "Return Obsidian-style graph nodes and edges.",
4746
+ "knowledge_graph_context":"Return compact graph-backed RAG context for a prompt.",
4747
+ "obsidian_save": "Save a note into the Obsidian-compatible memory vault.",
4748
+ "obsidian_search": "Search the Obsidian-compatible memory vault.",
4749
+ "obsidian_tree": "List Obsidian memory vault markdown files.",
4750
+ "git_status": "Read-only local git status inside the workspace.",
4751
+ "git_diff": "Read-only local git diff inside the workspace.",
4752
+ "git_log": "Read-only local git log inside the workspace.",
4753
+ "git_show": "Read-only local git show --stat inside the workspace.",
4754
+ "network_status": "Get current local/private IP, public IP, hostname, and Wi-Fi info.",
4755
+ "run_command": "Run an allowlisted local command inside the workspace.",
4756
+ "build_project": "Run an allowlisted package.json build/compile/typecheck/test script to verify changes actually work.",
4757
+ "deploy_project": "Run an allowlisted package.json deploy/preview/release/package installer script (pkg/exe).",
4758
+ }
4759
+
4760
+
4761
+ @app.get("/tools/permissions")
4762
+ async def tools_permissions(request: Request):
4763
+ """Compact tool permission view (tool / risk / requires_approval / network).
4764
+
4765
+ A simpler authorization-layer summary derived from TOOL_GOVERNANCE.
4766
+ Use /mcp/tools for the full 7-dimensional governance object.
4767
+ """
4768
+ require_user(request)
4769
+ return {"status": "ok", "permissions": list_tool_permissions()}
4770
+
4771
+
3882
4772
  @app.get("/mcp/tools")
3883
4773
  async def mcp_tools():
3884
4774
  installed = load_mcp_installs().get("installed", {})
4775
+ tools = []
4776
+ for name, description in _MCP_TOOL_DESCRIPTIONS.items():
4777
+ policy = TOOL_GOVERNANCE.get(name, _TOOL_GOVERNANCE_DEFAULT)
4778
+ tools.append({
4779
+ "name": name,
4780
+ "description": description,
4781
+ "permission": get_tool_permission(name),
4782
+ "governance": {
4783
+ "risk": policy["risk"],
4784
+ "destructive": policy["destructive"],
4785
+ "shell": policy["shell"],
4786
+ "network": policy["network"],
4787
+ "auto_approve": policy["auto_approve"],
4788
+ "sandbox": policy["sandbox"],
4789
+ "rollback": policy["rollback"],
4790
+ },
4791
+ })
3885
4792
  return {
3886
4793
  "status": "ok",
3887
4794
  "workspace": str(AGENT_ROOT),
3888
4795
  "installed_mcps": [mcp_public_item(item, installed) for item in MCP_REGISTRY],
3889
- "tools": [
3890
- {"name": "list_dir", "description": "List files in the agent workspace."},
3891
- {"name": "workspace_tree", "description": "Return a recursive workspace tree."},
3892
- {"name": "read_file", "description": "Read a UTF-8 file from the workspace."},
3893
- {"name": "write_file", "description": "Write a UTF-8 file inside the workspace."},
3894
- {"name": "search_files", "description": "Search text files inside the workspace."},
3895
- {"name": "clear_history", "description": "Clear chat history to reduce context and speed up responses."},
3896
- {"name": "inspect_html", "description": "Inspect local HTML structure and assets."},
3897
- {"name": "preview_url", "description": "Return a server URL for a workspace file."},
3898
- {"name": "create_docx", "description": "Create a Word DOCX document in the agent workspace."},
3899
- {"name": "create_xlsx", "description": "Create an XLSX spreadsheet in the agent workspace."},
3900
- {"name": "create_pptx", "description": "Create a PPTX presentation deck in the agent workspace."},
3901
- {"name": "create_pdf", "description": "Create a PDF document in the agent workspace."},
3902
- {"name": "local_list", "description": "List any local folder (requires user permission via UI)."},
3903
- {"name": "local_read", "description": "Read any local file (requires user permission via UI)."},
3904
- {"name": "local_write", "description": "Write any local file (requires user permission via UI)."},
3905
- {"name": "read_document", "description": "Extract text from PDF, DOCX, XLSX, PPTX, TXT, MD, CSV files."},
3906
- {"name": "computer_screenshot", "description": "Capture the current Mac screen as base64 PNG."},
3907
- {"name": "computer_open_app", "description": "Open or focus a Mac app, e.g. Google Chrome."},
3908
- {"name": "computer_open_url", "description": "Open a URL in a Mac app, e.g. Google Chrome."},
3909
- {"name": "computer_click", "description": "Click at screen coordinates (x, y)."},
3910
- {"name": "computer_type", "description": "Type text at the current focus position."},
3911
- {"name": "computer_key", "description": "Press a keyboard key or shortcut (e.g. 'command+c')."},
3912
- {"name": "computer_scroll", "description": "Scroll at screen coordinates."},
3913
- {"name": "computer_move", "description": "Move the mouse to screen coordinates."},
3914
- {"name": "computer_drag", "description": "Drag from (x1,y1) to (x2,y2)."},
3915
- {"name": "computer_status", "description": "Check if Mac Computer Use (pyautogui) is available."},
3916
- {"name": "chrome_status", "description": "Report Chrome desktop bridge availability."},
3917
- {"name": "computer_use_status", "description": "Report Mac Computer Use bridge availability."},
3918
- {"name": "knowledge_save", "description": "Save a note into the local knowledge garden."},
3919
- {"name": "knowledge_search", "description": "Search the local knowledge garden."},
3920
- {"name": "knowledge_tree", "description": "List local knowledge garden markdown files."},
3921
- {"name": "knowledge_graph_ingest", "description": "Ingest a message, AI answer, or connector event into the SQLite knowledge graph."},
3922
- {"name": "knowledge_graph_search", "description": "Search graph nodes, summaries, and JSON metadata."},
3923
- {"name": "knowledge_graph_graph", "description": "Return Obsidian-style graph nodes and edges."},
3924
- {"name": "knowledge_graph_context", "description": "Return compact graph-backed RAG context for a prompt."},
3925
- {"name": "obsidian_save", "description": "Save a note into the Obsidian-compatible memory vault."},
3926
- {"name": "obsidian_search", "description": "Search the Obsidian-compatible memory vault."},
3927
- {"name": "obsidian_tree", "description": "List Obsidian memory vault markdown files."},
3928
- {"name": "git_status", "description": "Read-only local git status inside the workspace."},
3929
- {"name": "git_diff", "description": "Read-only local git diff inside the workspace."},
3930
- {"name": "git_log", "description": "Read-only local git log inside the workspace."},
3931
- {"name": "git_show", "description": "Read-only local git show --stat inside the workspace."},
3932
- {"name": "network_status", "description": "Get current local/private IP, public IP, hostname, and Wi-Fi info."},
3933
- {"name": "run_command", "description": "Run an allowlisted local command inside the workspace."},
3934
- {"name": "build_project", "description": "Run an allowlisted package.json build/compile/typecheck/test script."},
3935
- {"name": "deploy_project", "description": "Run an allowlisted package.json deploy/preview/release/package installer script (pkg/exe)."},
3936
- ],
4796
+ "tools": tools,
3937
4797
  }
3938
4798
 
3939
4799