npm - agentforge-multi - Versions diffs - 0.1.5 → 0.1.7 - Mend

agentforge-multi 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/agentforge +609 -77
package/package.json +1 -1

package/agentforge CHANGED Viewed

@@ -27,6 +27,7 @@ from collections import deque
 from pathlib import Path
 import requests as _requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from rich.console import Console
 from rich.layout import Layout
@@ -44,6 +45,8 @@ from prompt_toolkit.formatted_text import HTML
 # ── Constants ─────────────────────────────────────────────────────────────────
 CODEX_BIN = Path.home() / ".npm-global" / "bin" / "codex"
+KNOWLEDGE_DIR  = Path.home() / ".agentforge" / "knowledge"   # 영구 지식 저장소 루트
+SESSION_FILE   = Path.home() / ".agentforge" / "last_session.json"  # 세션 영속화
 DEFAULT_MAX_ITER = 5000
 WORKER_BUF_LINES = 60
 DEFAULT_WORKER_MODEL = "gpt-5.4"
@@ -57,19 +60,70 @@ console = Console()
 _last_session: dict | None = None        # {goal, history, eval_history, workdir}
 _interrupt_event = threading.Event()     # ESC 감지 플래그
+def _persist_session(session: dict) -> None:
+    """세션을 디스크에 저장. worker_lines는 TUI 표시용이라 제외."""
+    try:
+        SESSION_FILE.parent.mkdir(parents=True, exist_ok=True)
+        # worker_lines는 디스플레이 전용 → 저장 제외
+        history_slim = [
+            {k: v for k, v in h.items() if k != 'worker_lines'}
+            for h in session.get('history', [])
+        ]
+        data = {
+            **session,
+            'history': history_slim,
+            'saved_at': time.strftime("%Y-%m-%dT%H:%M:%S"),
+        }
+        SESSION_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2))
+    except Exception:
+        pass
+def _load_persisted_session() -> dict | None:
+    """디스크에서 세션 로드. 실패 시 None."""
+    try:
+        if not SESSION_FILE.exists():
+            return None
+        data = json.loads(SESSION_FILE.read_text())
+        # worker_lines 누락된 항목 복원
+        for h in data.get('history', []):
+            h.setdefault('worker_lines', [])
+        return data
+    except Exception:
+        return None
+_current_response = None                 # 현재 스트리밍 HTTP 응답 (즉시 끊기용)
+# HTTP 세션 — TCP+TLS 연결 재사용으로 매 호출 수십~수백 ms 절감
+_session = _requests.Session()
+_session.headers.update({"Content-Type": "application/json"})
+# 인증 헤더 캐시 — auth.json mtime이 바뀔 때만 재읽기
+_auth_cache: dict | None = None
+_auth_mtime: float = 0.0
 # ── ChatGPT backend API ────────────────────────────────────────────────────────
 def _get_auth_headers() -> dict:
-    """~/.codex/auth.json 에서 Bearer 헤더 + ChatGPT-Account-Id 반환."""
+    """~/.codex/auth.json 에서 Bearer 헤더 반환. mtime 기반 캐싱."""
+    global _auth_cache, _auth_mtime
     auth_file = Path.home() / ".codex" / "auth.json"
+    try:
+        mtime = auth_file.stat().st_mtime
+    except OSError:
+        return {}
+    if _auth_cache is not None and mtime == _auth_mtime:
+        return _auth_cache
     data = json.loads(auth_file.read_text())
     token = data["tokens"]["access_token"]
     account_id = data["tokens"].get("account_id", "")
-    return {
+    _auth_cache = {
         "Authorization": f"Bearer {token}",
         "Content-Type": "application/json",
         "ChatGPT-Account-Id": account_id,
     }
+    _auth_mtime = mtime
+    return _auth_cache
 WORKER_TOOLS = [
@@ -134,30 +188,58 @@ WORKER_TOOLS = [
 ]
+RESEARCH_TOOLS = WORKER_TOOLS + [
+    {
+        "type": "function", "name": "web_search",
+        "description": "Search the web using DuckDuckGo (or Brave if BRAVE_API_KEY set). Returns summaries and links.",
+        "parameters": {"type": "object",
+                       "properties": {"query": {"type": "string"}},
+                       "required": ["query"]},
+        "strict": False,
+    },
+    {
+        "type": "function", "name": "fetch_url",
+        "description": "Fetch and extract text content from a URL.",
+        "parameters": {"type": "object",
+                       "properties": {"url": {"type": "string"}},
+                       "required": ["url"]},
+        "strict": False,
+    },
+]
 def _iter_events(payload: dict):
     """
     ChatGPT backend-api/codex/responses 스트리밍 호출.
-    SSE 이벤트를 실시간으로 yield.  _interrupt_event가 set되면 조기 종료.
+    SSE 이벤트를 실시간으로 yield.  _interrupt_event가 set되면 즉시 연결 끊기.
     """
+    global _current_response
     headers = _get_auth_headers()
     try:
-        r = _requests.post(
+        r = _session.post(
             CHATGPT_RESPONSES_URL, headers=headers,
             json=payload, stream=True, timeout=300,
         )
+        _current_response = r
         r.raise_for_status()
-        for line in r.iter_lines():
-            if _interrupt_event.is_set():
-                break
-            if not line:
-                continue
-            decoded = line.decode("utf-8", errors="replace")
-            if decoded.startswith("data: "):
-                try:
-                    yield json.loads(decoded[6:])
-                except Exception:
-                    pass
+        try:
+            for line in r.iter_lines():
+                if _interrupt_event.is_set():
+                    r.close()
+                    break
+                if not line:
+                    continue
+                decoded = line.decode("utf-8", errors="replace")
+                if decoded.startswith("data: "):
+                    try:
+                        yield json.loads(decoded[6:])
+                    except Exception:
+                        pass
+        finally:
+            r.close()   # GeneratorExit(gen.close()) 시에도 HTTP 연결 즉시 반환
+            _current_response = None
     except Exception as e:
+        _current_response = None
         yield {"type": "_error", "message": str(e)}
@@ -195,6 +277,48 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
             return "\n".join(
                 ("dir  " if p.is_dir() else "file ") + p.name for p in items
             )
+        elif name == "web_search":
+            query = args["query"]
+            brave_key = os.environ.get("BRAVE_API_KEY")
+            if brave_key:
+                r = _session.get(
+                    "https://api.search.brave.com/res/v1/web/search",
+                    params={"q": query, "count": 10},
+                    headers={"Accept": "application/json",
+                             "Accept-Encoding": "gzip",
+                             "X-Subscription-Token": brave_key},
+                    timeout=15,
+                )
+                items = r.json().get("web", {}).get("results", [])
+                return "\n".join(
+                    f"[{i+1}] {it['title']}\n    {it['url']}\n    {it.get('description','')}"
+                    for i, it in enumerate(items[:8])
+                ) or "(결과 없음)"
+            else:
+                r = _session.get(
+                    "https://html.duckduckgo.com/html/",
+                    params={"q": query},
+                    headers={"User-Agent": "Mozilla/5.0"},
+                    timeout=15,
+                )
+                snippets = re.findall(r'class="result__snippet">(.*?)</a>', r.text, re.S)
+                titles   = re.findall(r'class="result__a"[^>]*>(.*?)</a>', r.text, re.S)
+                urls     = re.findall(r'uddg=(https?[^&"]+)', r.text)
+                from urllib.parse import unquote
+                lines = []
+                for i, (t, u, s) in enumerate(zip(titles, urls, snippets)):
+                    t = re.sub(r'<[^>]+>', '', t).strip()
+                    s = re.sub(r'<[^>]+>', '', s).strip()
+                    lines.append(f"[{i+1}] {t}\n    {unquote(u)}\n    {s}")
+                    if i >= 7:
+                        break
+                return "\n".join(lines) or "(결과 없음)"
+        elif name == "fetch_url":
+            url = args["url"]
+            r = _session.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
+            text = re.sub(r'<[^>]+>', ' ', r.text)
+            text = re.sub(r'\s+', ' ', text).strip()
+            return text[:8000]
         else:
             return f"Unknown tool: {name}"
     except Exception as e:
@@ -203,23 +327,30 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
 # ── Slash command autocomplete ────────────────────────────────────────────────
 SLASH_COMMANDS = [
-    ("/resume", "마지막 세션을 이어서 실행"),
-    ("/exit",   "agentforge 종료"),
+    ("/resume",          "마지막 세션 재개"),
+    ("/exit",            "종료"),
+    ("/mode code",       "코딩 모드로 전환"),
+    ("/mode research",   "연구 모드로 전환"),
+    ("/eval-every <N>",  "N번마다 Evaluator 실행"),
+    ("/status",          "현재 설정 확인"),
+    ("/help",            "커맨드 목록 표시"),
 ]
 class SlashCompleter(Completer):
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
         if text.startswith('/'):
-            typed = text.lstrip('/')
+            typed = text[1:]  # strip leading /
             for cmd, desc in SLASH_COMMANDS:
-                name = cmd.lstrip('/')
-                if name.startswith(typed):
+                name = cmd[1:].split()[0]  # first word without /
+                full = cmd[1:]             # full command without /
+                if full.startswith(typed) or name.startswith(typed.split()[0] if typed else ''):
+                    import html as _html
                     yield Completion(
                         cmd,
                         start_position=-len(text),
-                        display=HTML(f'<ansicyan>{cmd}</ansicyan>'),
-                        display_meta=HTML(f'<ansiwhite>{desc}</ansiwhite>'),
+                        display=HTML(f'<ansicyan>{_html.escape(cmd)}</ansicyan>'),
+                        display_meta=HTML(f'<ansiwhite>{_html.escape(desc)}</ansiwhite>'),
                     )
 PROMPT_STYLE = PtStyle.from_dict({
@@ -298,23 +429,276 @@ EVALUATOR_SYSTEM = textwrap.dedent("""\
     Do NOT write anything before the decision keyword.
 """).strip()
-def build_worker_prompt(goal: str, history: list) -> str:
+RESEARCH_WORKER_SYSTEM = textwrap.dedent("""\
+    You are an expert researcher. Your goal is to investigate a topic thoroughly.
+    - Use web_search to find relevant papers, articles, and data
+    - Use fetch_url to read full content of important pages
+    - Use read_file/write_file to organize findings into structured notes
+    - Synthesize information across multiple sources
+    - Stay on topic; do not drift from the research goal
+""").strip()
+RESEARCH_EVALUATOR_SYSTEM = textwrap.dedent("""\
+    You are a rigorous academic reviewer. Evaluate whether the research goal is achieved.
+    Respond with EXACTLY ONE first line: DONE, IMPROVE: <feedback>, or REDIRECT: <feedback>
+    DONE only if: sufficient sources found, content analyzed, findings written to file(s).
+    IMPROVE if: more sources needed, analysis incomplete, or notes missing.
+    REDIRECT if: wrong direction entirely.
+    When DONE, add Korean summary:
+    판단 이유: ...
+    결과물 위치: ...
+    결과 요약: ...
+""").strip()
+TOKEN_COMPRESS_THRESHOLD = 100_000  # 프롬프트 추정 토큰이 이 수를 넘으면 압축
+HISTORY_KEEP_RECENT = 4             # 압축 후 보존할 최근 항목 수 (상세 내용 유지)
+def _estimate_tokens(text: str) -> int:
+    """토큰 수 추정. tiktoken 있으면 정확하게, 없으면 글자 수 기반 근사."""
+    try:
+        import tiktoken
+        enc = tiktoken.get_encoding("cl100k_base")
+        return len(enc.encode(text))
+    except Exception:
+        # 영문 ~4자/토큰, 한글 ~1.5자/토큰 혼합 근사 → 3자/토큰
+        return len(text) // 3
+# ── Knowledge Store (RAG) ────────────────────────────────────────────────────
+def _goal_to_slug(goal: str) -> str:
+    """목표 문자열 → 파일시스템 안전 폴더명 (한글 보존)."""
+    slug = re.sub(r'[^\w가-힣\s]', '', goal)   # 특수문자 제거, 한글·영문·숫자 보존
+    slug = re.sub(r'\s+', '_', slug.strip())    # 공백 → 언더스코어
+    return slug[:72] or "unnamed"
+def _knowledge_path(goal: str) -> Path:
+    """목표별 지식 저장소 디렉토리 경로 반환 (없으면 생성)."""
+    p = KNOWLEDGE_DIR / _goal_to_slug(goal)
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+def _save_attempt(goal: str, record: dict) -> None:
+    """시도 기록을 JSONL에 append (스레드 안전: Linux append는 원자적)."""
+    try:
+        path = _knowledge_path(goal) / "attempts.jsonl"
+        line = json.dumps(record, ensure_ascii=False) + "\n"
+        with open(path, "a", encoding="utf-8") as f:
+            f.write(line)
+    except Exception:
+        pass  # 저장 실패해도 에이전트 동작에 영향 없도록
+def _load_past_attempts(goal: str) -> list[dict]:
+    """과거 시도 기록 전체 로드."""
+    try:
+        path = _knowledge_path(goal) / "attempts.jsonl"
+        if not path.exists():
+            return []
+        records = []
+        for line in path.read_text(encoding="utf-8").splitlines():
+            line = line.strip()
+            if line:
+                try:
+                    records.append(json.loads(line))
+                except Exception:
+                    pass
+        return records
+    except Exception:
+        return []
+def _retrieve_relevant(query: str, attempts: list[dict], top_k: int = 6) -> list[dict]:
+    """
+    BM25로 query와 가장 관련 있는 과거 시도 검색.
+    rank_bm25 없으면 단어 overlap 기반 폴백.
+    """
+    if not attempts:
+        return []
+    def _text(a: dict) -> str:
+        return f"{a.get('feedback','')} {a.get('worker_summary','')} {a.get('decision','')}"
+    try:
+        from rank_bm25 import BM25Okapi
+        corpus = [_text(a).lower().split() for a in attempts]
+        bm25 = BM25Okapi(corpus)
+        scores = bm25.get_scores(query.lower().split())
+        top_idx = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
+        return [attempts[i] for i in top_idx]
+    except Exception:
+        # 폴백: 단어 overlap 점수
+        q_words = set(query.lower().split())
+        scored = []
+        for a in attempts:
+            words = set(_text(a).lower().split())
+            scored.append((len(q_words & words), a))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [a for _, a in scored[:top_k]]
+def _build_rag_section(past_attempts: list[dict], current_context: str) -> str:
+    """
+    현재 상황과 관련된 과거 시도를 RAG로 검색하여
+    Worker 프롬프트에 삽입할 경고 섹션 생성.
+    """
+    if not past_attempts:
+        return ""
+    relevant = _retrieve_relevant(current_context, past_attempts, top_k=6)
+    if not relevant:
+        return ""
+    lines = [
+        "━" * 60,
+        "PAST ATTEMPTS FROM PREVIOUS SESSIONS — DO NOT REPEAT THESE:",
+    ]
+    for a in relevant:
+        decision = a.get("decision", "?")
+        feedback = a.get("feedback", "")[:200]
+        summary  = a.get("worker_summary", "")[:150]
+        ts       = a.get("timestamp", "")[:10]
+        badge    = {"DONE": "✓", "IMPROVE": "▲", "REDIRECT": "↩"}.get(decision, "?")
+        lines.append(f"  [{badge} {decision}] ({ts}) {feedback}")
+        if summary:
+            lines.append(f"    tried: {summary}")
+    lines.append("━" * 60)
+    return "\n".join(lines)
+# ── History Compression ───────────────────────────────────────────────────────
+COMPRESSOR_SYSTEM = textwrap.dedent("""\
+    You are a concise summarizer for an AI agent's work log.
+    Given a list of past iterations (what the agent tried and what the evaluator said),
+    produce a compact summary that preserves:
+    - Every approach that was tried (even failed ones)
+    - Why each approach was rejected or approved
+    - The current state of the work (what exists, what doesn't)
+    - Any important file paths or technical details
+    Write in plain English. Be dense but complete. Max 400 words.
+    Do NOT omit failed approaches — the agent must not repeat them.
+""").strip()
+def _call_compressor(entries: list[dict], goal: str, model: str | None) -> str:
+    """과거 history entries를 LLM으로 요약. 실패 시 원본 한 줄 요약 반환."""
+    lines = [f"GOAL: {goal}", "", "ITERATIONS TO SUMMARIZE:"]
+    for h in entries:
+        lines.append(f"  Iter {h['iter']}: [{h['decision']}] {h.get('feedback','')[:200]}")
+        ws = h.get('worker_summary', '')
+        if ws:
+            lines.append(f"    Worker did: {ws[:200]}")
+    prompt_text = "\n".join(lines)
+    model = model or DEFAULT_EVAL_MODEL
+    payload = {
+        "model": model,
+        "instructions": COMPRESSOR_SYSTEM,
+        "input": [{"role": "user", "content": prompt_text}],
+        "store": False,
+        "stream": True,
+    }
+    parts: list[str] = []
+    gen = _iter_events(payload)
+    try:
+        for ev in gen:
+            if ev["type"] == "response.output_text.delta":
+                parts.append(ev.get("delta", ""))
+    finally:
+        gen.close()
+    result = "".join(parts).strip()
+    if not result:
+        # 폴백: 원본 한 줄 목록
+        result = "\n".join(
+            f"Iter {h['iter']} [{h['decision']}]: {h.get('feedback','')[:100]}"
+            for h in entries
+        )
+    return result
+def compress_history(history: list, goal: str, model: str | None) -> list:
+    """
+    history가 COMPRESS_THRESHOLD를 넘으면 오래된 항목을 LLM 요약으로 교체.
+    최근 KEEP_RECENT 항목은 보존.
+    반환: 새 history 리스트 (압축 entry + 최근 항목)
+    """
+    # 이미 압축된 summary entry가 있으면 그것도 포함해 재압축 대상 선정
+    to_compress = history[:-HISTORY_KEEP_RECENT]
+    keep = history[-HISTORY_KEEP_RECENT:]
+    # summary entry는 그대로, 일반 entry만 압축
+    prev_summary_text = ""
+    normal_entries = []
+    for h in to_compress:
+        if h.get('type') == 'compressed_summary':
+            prev_summary_text = h['content']
+        else:
+            normal_entries.append(h)
+    if not normal_entries:
+        return history  # 압축할 게 없음
+    new_text = _call_compressor(normal_entries, goal, model)
+    if prev_summary_text:
+        combined = prev_summary_text + "\n\n--- Additional history ---\n" + new_text
+    else:
+        combined = new_text
+    first_iter = normal_entries[0]['iter']
+    last_iter  = normal_entries[-1]['iter']
+    summary_entry = {
+        'type': 'compressed_summary',
+        'covers': f"{first_iter}–{last_iter}",
+        'content': combined,
+    }
+    return [summary_entry] + keep
+def build_worker_prompt(goal: str, history: list,
+                        past_attempts: list[dict] | None = None) -> str:
     lines = [f"GOAL: {goal}", ""]
+    # RAG 섹션: 현재 상황을 쿼리로 과거 시도 검색·삽입
+    if past_attempts:
+        current_ctx = " ".join(
+            f"{h.get('feedback','')} {h.get('worker_summary','')}"
+            for h in history[-3:] if h.get('type') != 'compressed_summary'
+        ) or goal
+        rag = _build_rag_section(past_attempts, current_ctx)
+        if rag:
+            lines.append(rag)
+            lines.append("")
     if not history:
         lines += [WORKER_SYSTEM, "", "Begin working on the goal above. Make concrete changes now."]
     else:
         lines.append("ITERATION HISTORY:")
         for h in history:
-            lines.append(f"  Iteration {h['iter']}:")
-            lines.append(f"    Your work: {h['worker_summary']}")
-            lines.append(f"    Evaluator: {h['decision']}" +
-                         (f" — {h['feedback']}" if h['feedback'] else ""))
+            if h.get('type') == 'compressed_summary':
+                lines.append(f"  [COMPRESSED SUMMARY — Iter {h['covers']}]")
+                for ln in h['content'].splitlines():
+                    lines.append(f"    {ln}")
+                lines.append("  ──────────────────────────────────────")
+            else:
+                lines.append(f"  Iteration {h['iter']}:")
+                lines.append(f"    Your work: {h['worker_summary']}")
+                lines.append(f"    Evaluator: {h['decision']}" +
+                             (f" — {h['feedback']}" if h['feedback'] else ""))
         lines.append("")
-        last = history[-1]
-        if last['decision'].upper() == 'IMPROVE':
-            lines.append(f"INSTRUCTION: Refine your previous work. Feedback: {last['feedback']}")
-        else:
-            lines.append(f"INSTRUCTION: Abandon previous approach. Try differently: {last['feedback']}")
+        # 마지막 실제 iteration entry 찾기
+        last = next((h for h in reversed(history) if h.get('type') != 'compressed_summary'), None)
+        if last:
+            if last['decision'].upper() == 'IMPROVE':
+                lines.append(f"INSTRUCTION: Refine your previous work. Feedback: {last['feedback']}")
+            else:
+                lines.append(f"INSTRUCTION: Abandon previous approach. Try differently: {last['feedback']}")
         lines.append("Make the changes now.")
     return "\n".join(lines)
@@ -422,6 +806,11 @@ def _esc_listener(stop: threading.Event):
                 ch = os.read(tty_fd, 1)
                 if ch == b'\x1b':
                     _interrupt_event.set()
+                    if _current_response:
+                        try:
+                            _current_response.close()
+                        except Exception:
+                            pass
                     break
     except Exception:
         pass
@@ -436,9 +825,13 @@ def _esc_listener(stop: threading.Event):
 # ── Agent Runners ─────────────────────────────────────────────────────────────
 def run_worker(prompt: str, workdir: str, model: str | None,
-               buf: deque, status_ref: list) -> tuple[str, int]:
-    """Worker: ChatGPT backend Responses API 직접 호출 + 도구 실행 루프."""
+               buf: deque, status_ref: list,
+               system_prompt: str | None = None,
+               tools: list | None = None) -> tuple[str, int]:
+    """Worker: ChatGPT backend Responses API 직접 호출 + 병렬 도구 실행 루프."""
     model = model or DEFAULT_WORKER_MODEL
+    system_prompt = system_prompt or WORKER_SYSTEM
+    tools = tools if tools is not None else WORKER_TOOLS
     status_ref[0] = "running"
     # 입력 히스토리 (user msg + function_call + function_call_output 누적)
@@ -460,9 +853,9 @@ def run_worker(prompt: str, workdir: str, model: str | None,
         payload = {
             "model": model,
-            "instructions": WORKER_SYSTEM,
+            "instructions": system_prompt,
             "input": input_history,
-            "tools": WORKER_TOOLS,
+            "tools": tools,
             "store": False,
             "stream": True,
         }
@@ -504,26 +897,33 @@ def run_worker(prompt: str, workdir: str, model: str | None,
         if not fc_items:
             break
-        # 도구 실행 및 히스토리에 추가
+        # 도구 병렬 실행
+        with ThreadPoolExecutor(max_workers=min(len(fc_items), 8)) as pool:
+            futures = {
+                pool.submit(
+                    _execute_tool,
+                    fc["name"],
+                    json.loads(fc["arguments"]) if fc["arguments"] else {},
+                    workdir,
+                ): fc
+                for fc in fc_items
+            }
+            results: dict[str, str] = {}
+            for fut in as_completed(futures):
+                fc = futures[fut]
+                try:
+                    results[fc["call_id"]] = fut.result()
+                except Exception as e:
+                    results[fc["call_id"]] = f"Error: {e}"
+        # call_id 순서 보장하며 히스토리 추가
         for fc in fc_items:
-            call_id = fc["call_id"]
-            name    = fc["name"]
-            raw_args = fc["arguments"]
-            try:
-                args = json.loads(raw_args)
-            except Exception:
-                args = {}
-            arg_preview = raw_args[:80]
-            buf.append(f"[cyan]▶ {name}({arg_preview})[/cyan]")
-            result = _execute_tool(name, args, workdir)
-            short = result[:300].replace('\n', ' ')
-            buf.append(f"[dim]{short}[/dim]")
-            # Responses API 형식 히스토리
-            input_history.append({"type": "function_call", "call_id": call_id,
-                                   "name": name, "arguments": raw_args})
-            input_history.append({"type": "function_call_output", "call_id": call_id,
+            result = results[fc["call_id"]]
+            buf.append(f"[cyan]▶ {fc['name']}({fc['arguments'][:80]})[/cyan]")
+            buf.append(f"[dim]{result[:300].replace(chr(10), ' ')}[/dim]")
+            input_history.append({"type": "function_call", "call_id": fc["call_id"],
+                                   "name": fc["name"], "arguments": fc["arguments"]})
+            input_history.append({"type": "function_call_output", "call_id": fc["call_id"],
                                    "output": result})
     # 잔여 line_buf flush
@@ -534,22 +934,43 @@ def run_worker(prompt: str, workdir: str, model: str | None,
     return "\n".join(all_text_parts), 0
-def run_evaluator(prompt: str, workdir: str, model: str | None) -> tuple[str, int]:
-    """Evaluator: ChatGPT backend Responses API, 도구 없이 텍스트만."""
+def run_evaluator(prompt: str, workdir: str, model: str | None,
+                  system_prompt: str | None = None) -> tuple[str, int]:
+    """Evaluator: ChatGPT backend Responses API, 도구 없이 텍스트만.
+    IMPROVE/REDIRECT는 첫 번째 완성 라인에서 결정 → 스트림 조기 종료.
+    DONE은 한국어 요약까지 필요하므로 전체 수신.
+    """
     model = model or DEFAULT_EVAL_MODEL
+    system_prompt = system_prompt or EVALUATOR_SYSTEM
     payload = {
         "model": model,
-        "instructions": EVALUATOR_SYSTEM,
+        "instructions": system_prompt,
         "input": [{"role": "user", "content": prompt}],
         "store": False,
         "stream": True,
     }
     parts: list[str] = []
-    for ev in _iter_events(payload):
-        if ev["type"] == "_error":
-            return f"[Evaluator error] {ev['message']}", 1
-        if ev["type"] == "response.output_text.delta":
-            parts.append(ev.get("delta", ""))
+    accumulated = ""
+    early_decided = False
+    gen = _iter_events(payload)
+    try:
+        for ev in gen:
+            if ev["type"] == "_error":
+                return f"[Evaluator error] {ev['message']}", 1
+            if ev["type"] == "response.output_text.delta":
+                delta = ev.get("delta", "")
+                parts.append(delta)
+                if not early_decided:
+                    accumulated += delta
+                    # 첫 줄이 완성되면 결정 파싱 시도
+                    if '\n' in accumulated:
+                        first_line = accumulated.split('\n')[0].strip()
+                        m = DECISION_RE.match(first_line)
+                        if m and m.group(1).upper() in ('IMPROVE', 'REDIRECT'):
+                            early_decided = True
+                            break  # 한국어 요약 불필요 → 스트림 종료
+    finally:
+        gen.close()
     return "".join(parts), 0
@@ -745,10 +1166,12 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
                    eval_model: str | None, max_iter: int,
                    layout: Layout, live: Live,
                    initial_history: list | None = None,
-                   initial_eval_history: list | None = None) -> str:
+                   initial_eval_history: list | None = None,
+                   mode: str = "code",
+                   eval_every: int = 1) -> str:
     """
     Worker + Evaluator 반복 루프.
-    반환: 'done' | 'max'
+    반환: 'done' | 'max' | 'interrupted'
     """
     global _last_session
     history = list(initial_history or [])
@@ -757,6 +1180,24 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
     worker_status = ["idle"]
     done = False
+    # 모드별 시스템 프롬프트 및 도구 선택
+    worker_sys   = RESEARCH_WORKER_SYSTEM if mode == "research" else WORKER_SYSTEM
+    eval_sys     = RESEARCH_EVALUATOR_SYSTEM if mode == "research" else EVALUATOR_SYSTEM
+    active_tools = RESEARCH_TOOLS if mode == "research" else WORKER_TOOLS
+    # 압축 상태 — 백그라운드 압축 스레드 결과 수신용
+    _compress_result: list[list] = []   # [new_history] 채워지면 완료
+    _compress_thread: threading.Thread | None = None
+    # ── 지식 저장소 로드 ─────────────────────────────────────────────────
+    past_attempts = _load_past_attempts(goal)
+    knowledge_dir = _knowledge_path(goal)
+    if past_attempts:
+        console.print(
+            f"[dim]📚 지식 저장소 로드: {knowledge_dir.name}/ "
+            f"({len(past_attempts)}개 과거 시도)[/dim]"
+        )
     # ESC 리스너 시작
     _interrupt_event.clear()
     _esc_stop = threading.Event()
@@ -782,12 +1223,13 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             live.start()
         refresh("worker", iteration)
-        worker_prompt = build_worker_prompt(goal, history)
+        worker_prompt = build_worker_prompt(goal, history, past_attempts=past_attempts)
         worker_result = [None, None]
         def _worker():
             worker_result[0], worker_result[1] = run_worker(
-                worker_prompt, workdir, worker_model, worker_buf, worker_status)
+                worker_prompt, workdir, worker_model, worker_buf, worker_status,
+                system_prompt=worker_sys, tools=active_tools)
         t = threading.Thread(target=_worker, daemon=True)
         t.start()
@@ -804,6 +1246,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             live.stop()
             _last_session = {"goal": goal, "history": history,
                              "eval_history": eval_history, "workdir": workdir}
+            _persist_session(_last_session)
             return _finish('interrupted')
         last_msg, _ = worker_result
@@ -821,11 +1264,16 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             live.stop()
             _last_session = {"goal": goal, "history": history,
                              "eval_history": eval_history, "workdir": workdir}
+            _persist_session(_last_session)
             return _finish('interrupted')
-        refresh("evaluator", iteration)
-        eval_prompt = build_evaluator_prompt(goal, last_msg or "", iteration)
-        eval_msg, _ = run_evaluator(eval_prompt, workdir, eval_model)
+        if iteration % eval_every == 0 or iteration == max_iter:
+            refresh("evaluator", iteration)
+            eval_prompt = build_evaluator_prompt(goal, last_msg or "", iteration)
+            eval_msg, _ = run_evaluator(eval_prompt, workdir, eval_model,
+                                        system_prompt=eval_sys)
+        else:
+            eval_msg = "IMPROVE: (evaluation skipped)"
         decision, feedback = parse_decision(eval_msg)
         history[-1]['decision'] = decision
@@ -837,6 +1285,40 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             'full_msg': eval_msg,
         })
+        # ── 지식 저장소에 attempt 기록 ───────────────────────────────────
+        attempt_record = {
+            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "iter": iteration,
+            "decision": decision,
+            "feedback": feedback,
+            "worker_summary": worker_summary,
+            "goal": goal,
+        }
+        _save_attempt(goal, attempt_record)
+        past_attempts.append(attempt_record)  # 현재 세션 내 즉시 반영
+        # ── 이전 압축 결과 반영 ──────────────────────────────────────────
+        if _compress_thread and not _compress_thread.is_alive() and _compress_result:
+            history = _compress_result[0]
+            _compress_result.clear()
+            _compress_thread = None
+        # ── 프롬프트 토큰이 100k 초과 시 백그라운드 압축 시작 ────────────
+        prompt_tokens = _estimate_tokens(build_worker_prompt(goal, history, past_attempts=past_attempts))
+        if (prompt_tokens > TOKEN_COMPRESS_THRESHOLD
+                and (_compress_thread is None or not _compress_thread.is_alive())
+                and decision != 'DONE'):
+            _snap = list(history)  # 스냅샷 캡처
+            _res  = _compress_result
+            def _do_compress():
+                new_h = compress_history(_snap, goal, eval_model)
+                _res.clear()
+                _res.append(new_h)
+            _compress_thread = threading.Thread(target=_do_compress, daemon=True)
+            _compress_thread.start()
         if decision == 'DONE':
             done = True
             refresh("done", iteration)
@@ -846,6 +1328,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
                 "goal": goal, "history": history,
                 "eval_history": eval_history, "workdir": workdir,
             }
+            _persist_session(_last_session)
             # 완료 요약 출력
             console.print()
@@ -866,6 +1349,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
         "goal": goal, "history": history,
         "eval_history": eval_history, "workdir": workdir,
     }
+    _persist_session(_last_session)
     return _finish('max')
@@ -954,6 +1438,10 @@ def main():
                         help="Evaluator 모델")
     parser.add_argument("-n", "--max-iterations", type=int, default=DEFAULT_MAX_ITER,
                         metavar="N", help=f"최대 반복 횟수 (기본: {DEFAULT_MAX_ITER})")
+    parser.add_argument("--mode", choices=["code", "research"], default="code",
+                        help="실행 모드: code(기본) / research(웹 검색·분석)")
+    parser.add_argument("--eval-every", type=int, default=1, metavar="N",
+                        help="N번 반복마다 Evaluator 실행 (기본: 1, 즉 매번)")
     args = parser.parse_args()
     # ── auth 서브커맨드 ────────────────────────────────────────────────
@@ -1004,7 +1492,21 @@ def main():
                 "agentforge auth login 으로 나중에 로그인할 수 있습니다.[/dim]"
             )
-    console.print("[dim]명령을 입력하세요. /resume | /exit[/dim]")
+    current_mode = args.mode
+    eval_every   = args.eval_every
+    # 프로세스 시작 시 디스크에서 마지막 세션 복원
+    global _last_session
+    if _last_session is None:
+        _last_session = _load_persisted_session()
+        if _last_session:
+            saved_at = _last_session.get('saved_at', '알 수 없음')
+            console.print(
+                f"[dim]💾 이전 세션 복원됨: [cyan]{_last_session['goal'][:60]}[/cyan] "
+                f"({len(_last_session['history'])}회, {saved_at})[/dim]"
+            )
+    console.print(f"[dim]명령을 입력하세요. /help 로 커맨드 목록 확인. 모드: {current_mode}[/dim]")
     _completer = SlashCompleter()
@@ -1039,9 +1541,17 @@ def main():
                     continue
                 s = _last_session
                 prev_iters = len(s["history"])
+                saved_at = s.get('saved_at', '알 수 없음')
                 console.print(Rule("[cyan]세션 재개[/cyan]"))
-                console.print(f"[dim]목표:[/dim] {s['goal'][:80]}")
-                console.print(f"[dim]이전 반복:[/dim] {prev_iters}회  →  이어서 실행")
+                console.print(f"[dim]목표:[/dim]    [white]{s['goal'][:80]}[/white]")
+                console.print(f"[dim]반복:[/dim]    {prev_iters}회 완료")
+                console.print(f"[dim]저장:[/dim]    {saved_at}")
+                last_decision = next(
+                    (h['decision'] for h in reversed(s['history'])
+                     if h.get('decision') and h.get('type') != 'compressed_summary'),
+                    '없음'
+                )
+                console.print(f"[dim]마지막 판정:[/dim] {last_decision}")
                 console.print()
                 layout2 = make_layout()
                 layout2["header"].update(render_header(s["goal"], prev_iters, max_iter, "idle"))
@@ -1054,18 +1564,39 @@ def main():
                     max_iter, layout2, live2,
                     initial_history=s["history"],
                     initial_eval_history=s["eval_history"],
+                    mode=current_mode, eval_every=eval_every,
                 )
-                goal = s["goal"]
                 if outcome == 'interrupted':
-                    goal = _handle_interrupt(goal, workdir, args, max_iter)
+                    console.print("\n[yellow]⚠  중단됨. REPL로 돌아갑니다.[/yellow]")
                 elif outcome == 'max':
                     console.print(f"[red]{max_iter}번 반복 후에도 완료되지 않았습니다.[/red]")
                 if outcome != 'interrupted':
                     console.print("[dim]다음 명령을 입력하세요. /exit 로 종료.[/dim]")
+            elif cmd_name == 'mode':
+                if cmd_arg in ('code', 'research'):
+                    current_mode = cmd_arg
+                    console.print(f"[cyan]모드 변경: {current_mode}[/cyan]")
+                else:
+                    console.print("[red]사용법: /mode code  또는  /mode research[/red]")
+            elif cmd_name == 'eval-every':
+                try:
+                    eval_every = int(cmd_arg)
+                    console.print(f"[cyan]Evaluator: {eval_every}번마다 실행[/cyan]")
+                except ValueError:
+                    console.print("[red]숫자를 입력하세요. 예: /eval-every 3[/red]")
+            elif cmd_name == 'status':
+                console.print(f"모드: [cyan]{current_mode}[/cyan]  |  eval-every: [cyan]{eval_every}[/cyan]  |  dir: [cyan]{workdir}[/cyan]")
+            elif cmd_name == 'help':
+                for cmd, desc in SLASH_COMMANDS:
+                    console.print(f"  [cyan]{cmd:<25}[/cyan] {desc}")
             else:
                 console.print(f"[red]알 수 없는 커맨드: /{cmd_name}[/red]")
-                console.print("[dim]사용 가능: /resume  /exit[/dim]")
+                console.print("[dim]/help 로 커맨드 목록을 확인하세요.[/dim]")
         else:
             # 일반 텍스트 → 바로 Worker에게 목표로 전달
@@ -1078,9 +1609,10 @@ def main():
             outcome = run_agent_loop(
                 goal, workdir, args.worker_model, args.eval_model,
                 max_iter, layout2, live2,
+                mode=current_mode, eval_every=eval_every,
             )
             if outcome == 'interrupted':
-                goal = _handle_interrupt(goal, workdir, args, max_iter)
+                console.print("\n[yellow]⚠  중단됨. REPL로 돌아갑니다.[/yellow]")
             elif outcome == 'max':
                 console.print(f"[red]{max_iter}번 반복 후에도 완료되지 않았습니다.[/red]")
             if outcome != 'interrupted':

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentforge-multi",
-  "version": "0.1.5",
+  "version": "0.1.7",
   "description": "Multi-agent CLI: Worker + Evaluator agents collaborate in a loop to achieve your goal",
   "keywords": [
     "ai",