npm - agentforge-multi - Versions diffs - 0.1.6 → 0.1.7 - Mend

agentforge-multi 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/agentforge +396 -25
package/package.json +1 -1

package/agentforge CHANGED Viewed

@@ -45,6 +45,8 @@ from prompt_toolkit.formatted_text import HTML
 # ── Constants ─────────────────────────────────────────────────────────────────
 CODEX_BIN = Path.home() / ".npm-global" / "bin" / "codex"
+KNOWLEDGE_DIR  = Path.home() / ".agentforge" / "knowledge"   # 영구 지식 저장소 루트
+SESSION_FILE   = Path.home() / ".agentforge" / "last_session.json"  # 세션 영속화
 DEFAULT_MAX_ITER = 5000
 WORKER_BUF_LINES = 60
 DEFAULT_WORKER_MODEL = "gpt-5.4"
@@ -57,21 +59,71 @@ NOISE_RE = re.compile(r'^\s*([\-─═\s]+)?$')
 console = Console()
 _last_session: dict | None = None        # {goal, history, eval_history, workdir}
 _interrupt_event = threading.Event()     # ESC 감지 플래그
+def _persist_session(session: dict) -> None:
+    """세션을 디스크에 저장. worker_lines는 TUI 표시용이라 제외."""
+    try:
+        SESSION_FILE.parent.mkdir(parents=True, exist_ok=True)
+        # worker_lines는 디스플레이 전용 → 저장 제외
+        history_slim = [
+            {k: v for k, v in h.items() if k != 'worker_lines'}
+            for h in session.get('history', [])
+        ]
+        data = {
+            **session,
+            'history': history_slim,
+            'saved_at': time.strftime("%Y-%m-%dT%H:%M:%S"),
+        }
+        SESSION_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2))
+    except Exception:
+        pass
+def _load_persisted_session() -> dict | None:
+    """디스크에서 세션 로드. 실패 시 None."""
+    try:
+        if not SESSION_FILE.exists():
+            return None
+        data = json.loads(SESSION_FILE.read_text())
+        # worker_lines 누락된 항목 복원
+        for h in data.get('history', []):
+            h.setdefault('worker_lines', [])
+        return data
+    except Exception:
+        return None
 _current_response = None                 # 현재 스트리밍 HTTP 응답 (즉시 끊기용)
+# HTTP 세션 — TCP+TLS 연결 재사용으로 매 호출 수십~수백 ms 절감
+_session = _requests.Session()
+_session.headers.update({"Content-Type": "application/json"})
+# 인증 헤더 캐시 — auth.json mtime이 바뀔 때만 재읽기
+_auth_cache: dict | None = None
+_auth_mtime: float = 0.0
 # ── ChatGPT backend API ────────────────────────────────────────────────────────
 def _get_auth_headers() -> dict:
-    """~/.codex/auth.json 에서 Bearer 헤더 + ChatGPT-Account-Id 반환."""
+    """~/.codex/auth.json 에서 Bearer 헤더 반환. mtime 기반 캐싱."""
+    global _auth_cache, _auth_mtime
     auth_file = Path.home() / ".codex" / "auth.json"
+    try:
+        mtime = auth_file.stat().st_mtime
+    except OSError:
+        return {}
+    if _auth_cache is not None and mtime == _auth_mtime:
+        return _auth_cache
     data = json.loads(auth_file.read_text())
     token = data["tokens"]["access_token"]
     account_id = data["tokens"].get("account_id", "")
-    return {
+    _auth_cache = {
         "Authorization": f"Bearer {token}",
         "Content-Type": "application/json",
         "ChatGPT-Account-Id": account_id,
     }
+    _auth_mtime = mtime
+    return _auth_cache
 WORKER_TOOLS = [
@@ -164,7 +216,7 @@ def _iter_events(payload: dict):
     global _current_response
     headers = _get_auth_headers()
     try:
-        r = _requests.post(
+        r = _session.post(
             CHATGPT_RESPONSES_URL, headers=headers,
             json=payload, stream=True, timeout=300,
         )
@@ -184,6 +236,7 @@ def _iter_events(payload: dict):
                     except Exception:
                         pass
         finally:
+            r.close()   # GeneratorExit(gen.close()) 시에도 HTTP 연결 즉시 반환
             _current_response = None
     except Exception as e:
         _current_response = None
@@ -228,7 +281,7 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
             query = args["query"]
             brave_key = os.environ.get("BRAVE_API_KEY")
             if brave_key:
-                r = _requests.get(
+                r = _session.get(
                     "https://api.search.brave.com/res/v1/web/search",
                     params={"q": query, "count": 10},
                     headers={"Accept": "application/json",
@@ -242,7 +295,7 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
                     for i, it in enumerate(items[:8])
                 ) or "(결과 없음)"
             else:
-                r = _requests.get(
+                r = _session.get(
                     "https://html.duckduckgo.com/html/",
                     params={"q": query},
                     headers={"User-Agent": "Mozilla/5.0"},
@@ -262,7 +315,7 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
                 return "\n".join(lines) or "(결과 없음)"
         elif name == "fetch_url":
             url = args["url"]
-            r = _requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
+            r = _session.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
             text = re.sub(r'<[^>]+>', ' ', r.text)
             text = re.sub(r'\s+', ' ', text).strip()
             return text[:8000]
@@ -400,23 +453,252 @@ RESEARCH_EVALUATOR_SYSTEM = textwrap.dedent("""\
 """).strip()
-def build_worker_prompt(goal: str, history: list) -> str:
+TOKEN_COMPRESS_THRESHOLD = 100_000  # 프롬프트 추정 토큰이 이 수를 넘으면 압축
+HISTORY_KEEP_RECENT = 4             # 압축 후 보존할 최근 항목 수 (상세 내용 유지)
+def _estimate_tokens(text: str) -> int:
+    """토큰 수 추정. tiktoken 있으면 정확하게, 없으면 글자 수 기반 근사."""
+    try:
+        import tiktoken
+        enc = tiktoken.get_encoding("cl100k_base")
+        return len(enc.encode(text))
+    except Exception:
+        # 영문 ~4자/토큰, 한글 ~1.5자/토큰 혼합 근사 → 3자/토큰
+        return len(text) // 3
+# ── Knowledge Store (RAG) ────────────────────────────────────────────────────
+def _goal_to_slug(goal: str) -> str:
+    """목표 문자열 → 파일시스템 안전 폴더명 (한글 보존)."""
+    slug = re.sub(r'[^\w가-힣\s]', '', goal)   # 특수문자 제거, 한글·영문·숫자 보존
+    slug = re.sub(r'\s+', '_', slug.strip())    # 공백 → 언더스코어
+    return slug[:72] or "unnamed"
+def _knowledge_path(goal: str) -> Path:
+    """목표별 지식 저장소 디렉토리 경로 반환 (없으면 생성)."""
+    p = KNOWLEDGE_DIR / _goal_to_slug(goal)
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+def _save_attempt(goal: str, record: dict) -> None:
+    """시도 기록을 JSONL에 append (스레드 안전: Linux append는 원자적)."""
+    try:
+        path = _knowledge_path(goal) / "attempts.jsonl"
+        line = json.dumps(record, ensure_ascii=False) + "\n"
+        with open(path, "a", encoding="utf-8") as f:
+            f.write(line)
+    except Exception:
+        pass  # 저장 실패해도 에이전트 동작에 영향 없도록
+def _load_past_attempts(goal: str) -> list[dict]:
+    """과거 시도 기록 전체 로드."""
+    try:
+        path = _knowledge_path(goal) / "attempts.jsonl"
+        if not path.exists():
+            return []
+        records = []
+        for line in path.read_text(encoding="utf-8").splitlines():
+            line = line.strip()
+            if line:
+                try:
+                    records.append(json.loads(line))
+                except Exception:
+                    pass
+        return records
+    except Exception:
+        return []
+def _retrieve_relevant(query: str, attempts: list[dict], top_k: int = 6) -> list[dict]:
+    """
+    BM25로 query와 가장 관련 있는 과거 시도 검색.
+    rank_bm25 없으면 단어 overlap 기반 폴백.
+    """
+    if not attempts:
+        return []
+    def _text(a: dict) -> str:
+        return f"{a.get('feedback','')} {a.get('worker_summary','')} {a.get('decision','')}"
+    try:
+        from rank_bm25 import BM25Okapi
+        corpus = [_text(a).lower().split() for a in attempts]
+        bm25 = BM25Okapi(corpus)
+        scores = bm25.get_scores(query.lower().split())
+        top_idx = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
+        return [attempts[i] for i in top_idx]
+    except Exception:
+        # 폴백: 단어 overlap 점수
+        q_words = set(query.lower().split())
+        scored = []
+        for a in attempts:
+            words = set(_text(a).lower().split())
+            scored.append((len(q_words & words), a))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [a for _, a in scored[:top_k]]
+def _build_rag_section(past_attempts: list[dict], current_context: str) -> str:
+    """
+    현재 상황과 관련된 과거 시도를 RAG로 검색하여
+    Worker 프롬프트에 삽입할 경고 섹션 생성.
+    """
+    if not past_attempts:
+        return ""
+    relevant = _retrieve_relevant(current_context, past_attempts, top_k=6)
+    if not relevant:
+        return ""
+    lines = [
+        "━" * 60,
+        "PAST ATTEMPTS FROM PREVIOUS SESSIONS — DO NOT REPEAT THESE:",
+    ]
+    for a in relevant:
+        decision = a.get("decision", "?")
+        feedback = a.get("feedback", "")[:200]
+        summary  = a.get("worker_summary", "")[:150]
+        ts       = a.get("timestamp", "")[:10]
+        badge    = {"DONE": "✓", "IMPROVE": "▲", "REDIRECT": "↩"}.get(decision, "?")
+        lines.append(f"  [{badge} {decision}] ({ts}) {feedback}")
+        if summary:
+            lines.append(f"    tried: {summary}")
+    lines.append("━" * 60)
+    return "\n".join(lines)
+# ── History Compression ───────────────────────────────────────────────────────
+COMPRESSOR_SYSTEM = textwrap.dedent("""\
+    You are a concise summarizer for an AI agent's work log.
+    Given a list of past iterations (what the agent tried and what the evaluator said),
+    produce a compact summary that preserves:
+    - Every approach that was tried (even failed ones)
+    - Why each approach was rejected or approved
+    - The current state of the work (what exists, what doesn't)
+    - Any important file paths or technical details
+    Write in plain English. Be dense but complete. Max 400 words.
+    Do NOT omit failed approaches — the agent must not repeat them.
+""").strip()
+def _call_compressor(entries: list[dict], goal: str, model: str | None) -> str:
+    """과거 history entries를 LLM으로 요약. 실패 시 원본 한 줄 요약 반환."""
+    lines = [f"GOAL: {goal}", "", "ITERATIONS TO SUMMARIZE:"]
+    for h in entries:
+        lines.append(f"  Iter {h['iter']}: [{h['decision']}] {h.get('feedback','')[:200]}")
+        ws = h.get('worker_summary', '')
+        if ws:
+            lines.append(f"    Worker did: {ws[:200]}")
+    prompt_text = "\n".join(lines)
+    model = model or DEFAULT_EVAL_MODEL
+    payload = {
+        "model": model,
+        "instructions": COMPRESSOR_SYSTEM,
+        "input": [{"role": "user", "content": prompt_text}],
+        "store": False,
+        "stream": True,
+    }
+    parts: list[str] = []
+    gen = _iter_events(payload)
+    try:
+        for ev in gen:
+            if ev["type"] == "response.output_text.delta":
+                parts.append(ev.get("delta", ""))
+    finally:
+        gen.close()
+    result = "".join(parts).strip()
+    if not result:
+        # 폴백: 원본 한 줄 목록
+        result = "\n".join(
+            f"Iter {h['iter']} [{h['decision']}]: {h.get('feedback','')[:100]}"
+            for h in entries
+        )
+    return result
+def compress_history(history: list, goal: str, model: str | None) -> list:
+    """
+    history가 COMPRESS_THRESHOLD를 넘으면 오래된 항목을 LLM 요약으로 교체.
+    최근 KEEP_RECENT 항목은 보존.
+    반환: 새 history 리스트 (압축 entry + 최근 항목)
+    """
+    # 이미 압축된 summary entry가 있으면 그것도 포함해 재압축 대상 선정
+    to_compress = history[:-HISTORY_KEEP_RECENT]
+    keep = history[-HISTORY_KEEP_RECENT:]
+    # summary entry는 그대로, 일반 entry만 압축
+    prev_summary_text = ""
+    normal_entries = []
+    for h in to_compress:
+        if h.get('type') == 'compressed_summary':
+            prev_summary_text = h['content']
+        else:
+            normal_entries.append(h)
+    if not normal_entries:
+        return history  # 압축할 게 없음
+    new_text = _call_compressor(normal_entries, goal, model)
+    if prev_summary_text:
+        combined = prev_summary_text + "\n\n--- Additional history ---\n" + new_text
+    else:
+        combined = new_text
+    first_iter = normal_entries[0]['iter']
+    last_iter  = normal_entries[-1]['iter']
+    summary_entry = {
+        'type': 'compressed_summary',
+        'covers': f"{first_iter}–{last_iter}",
+        'content': combined,
+    }
+    return [summary_entry] + keep
+def build_worker_prompt(goal: str, history: list,
+                        past_attempts: list[dict] | None = None) -> str:
     lines = [f"GOAL: {goal}", ""]
+    # RAG 섹션: 현재 상황을 쿼리로 과거 시도 검색·삽입
+    if past_attempts:
+        current_ctx = " ".join(
+            f"{h.get('feedback','')} {h.get('worker_summary','')}"
+            for h in history[-3:] if h.get('type') != 'compressed_summary'
+        ) or goal
+        rag = _build_rag_section(past_attempts, current_ctx)
+        if rag:
+            lines.append(rag)
+            lines.append("")
     if not history:
         lines += [WORKER_SYSTEM, "", "Begin working on the goal above. Make concrete changes now."]
     else:
         lines.append("ITERATION HISTORY:")
         for h in history:
-            lines.append(f"  Iteration {h['iter']}:")
-            lines.append(f"    Your work: {h['worker_summary']}")
-            lines.append(f"    Evaluator: {h['decision']}" +
-                         (f" — {h['feedback']}" if h['feedback'] else ""))
+            if h.get('type') == 'compressed_summary':
+                lines.append(f"  [COMPRESSED SUMMARY — Iter {h['covers']}]")
+                for ln in h['content'].splitlines():
+                    lines.append(f"    {ln}")
+                lines.append("  ──────────────────────────────────────")
+            else:
+                lines.append(f"  Iteration {h['iter']}:")
+                lines.append(f"    Your work: {h['worker_summary']}")
+                lines.append(f"    Evaluator: {h['decision']}" +
+                             (f" — {h['feedback']}" if h['feedback'] else ""))
         lines.append("")
-        last = history[-1]
-        if last['decision'].upper() == 'IMPROVE':
-            lines.append(f"INSTRUCTION: Refine your previous work. Feedback: {last['feedback']}")
-        else:
-            lines.append(f"INSTRUCTION: Abandon previous approach. Try differently: {last['feedback']}")
+        # 마지막 실제 iteration entry 찾기
+        last = next((h for h in reversed(history) if h.get('type') != 'compressed_summary'), None)
+        if last:
+            if last['decision'].upper() == 'IMPROVE':
+                lines.append(f"INSTRUCTION: Refine your previous work. Feedback: {last['feedback']}")
+            else:
+                lines.append(f"INSTRUCTION: Abandon previous approach. Try differently: {last['feedback']}")
         lines.append("Make the changes now.")
     return "\n".join(lines)
@@ -654,7 +936,10 @@ def run_worker(prompt: str, workdir: str, model: str | None,
 def run_evaluator(prompt: str, workdir: str, model: str | None,
                   system_prompt: str | None = None) -> tuple[str, int]:
-    """Evaluator: ChatGPT backend Responses API, 도구 없이 텍스트만."""
+    """Evaluator: ChatGPT backend Responses API, 도구 없이 텍스트만.
+    IMPROVE/REDIRECT는 첫 번째 완성 라인에서 결정 → 스트림 조기 종료.
+    DONE은 한국어 요약까지 필요하므로 전체 수신.
+    """
     model = model or DEFAULT_EVAL_MODEL
     system_prompt = system_prompt or EVALUATOR_SYSTEM
     payload = {
@@ -665,11 +950,27 @@ def run_evaluator(prompt: str, workdir: str, model: str | None,
         "stream": True,
     }
     parts: list[str] = []
-    for ev in _iter_events(payload):
-        if ev["type"] == "_error":
-            return f"[Evaluator error] {ev['message']}", 1
-        if ev["type"] == "response.output_text.delta":
-            parts.append(ev.get("delta", ""))
+    accumulated = ""
+    early_decided = False
+    gen = _iter_events(payload)
+    try:
+        for ev in gen:
+            if ev["type"] == "_error":
+                return f"[Evaluator error] {ev['message']}", 1
+            if ev["type"] == "response.output_text.delta":
+                delta = ev.get("delta", "")
+                parts.append(delta)
+                if not early_decided:
+                    accumulated += delta
+                    # 첫 줄이 완성되면 결정 파싱 시도
+                    if '\n' in accumulated:
+                        first_line = accumulated.split('\n')[0].strip()
+                        m = DECISION_RE.match(first_line)
+                        if m and m.group(1).upper() in ('IMPROVE', 'REDIRECT'):
+                            early_decided = True
+                            break  # 한국어 요약 불필요 → 스트림 종료
+    finally:
+        gen.close()
     return "".join(parts), 0
@@ -884,6 +1185,19 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
     eval_sys     = RESEARCH_EVALUATOR_SYSTEM if mode == "research" else EVALUATOR_SYSTEM
     active_tools = RESEARCH_TOOLS if mode == "research" else WORKER_TOOLS
+    # 압축 상태 — 백그라운드 압축 스레드 결과 수신용
+    _compress_result: list[list] = []   # [new_history] 채워지면 완료
+    _compress_thread: threading.Thread | None = None
+    # ── 지식 저장소 로드 ─────────────────────────────────────────────────
+    past_attempts = _load_past_attempts(goal)
+    knowledge_dir = _knowledge_path(goal)
+    if past_attempts:
+        console.print(
+            f"[dim]📚 지식 저장소 로드: {knowledge_dir.name}/ "
+            f"({len(past_attempts)}개 과거 시도)[/dim]"
+        )
     # ESC 리스너 시작
     _interrupt_event.clear()
     _esc_stop = threading.Event()
@@ -909,7 +1223,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             live.start()
         refresh("worker", iteration)
-        worker_prompt = build_worker_prompt(goal, history)
+        worker_prompt = build_worker_prompt(goal, history, past_attempts=past_attempts)
         worker_result = [None, None]
         def _worker():
@@ -932,6 +1246,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             live.stop()
             _last_session = {"goal": goal, "history": history,
                              "eval_history": eval_history, "workdir": workdir}
+            _persist_session(_last_session)
             return _finish('interrupted')
         last_msg, _ = worker_result
@@ -949,6 +1264,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             live.stop()
             _last_session = {"goal": goal, "history": history,
                              "eval_history": eval_history, "workdir": workdir}
+            _persist_session(_last_session)
             return _finish('interrupted')
         if iteration % eval_every == 0 or iteration == max_iter:
@@ -969,6 +1285,40 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
             'full_msg': eval_msg,
         })
+        # ── 지식 저장소에 attempt 기록 ───────────────────────────────────
+        attempt_record = {
+            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "iter": iteration,
+            "decision": decision,
+            "feedback": feedback,
+            "worker_summary": worker_summary,
+            "goal": goal,
+        }
+        _save_attempt(goal, attempt_record)
+        past_attempts.append(attempt_record)  # 현재 세션 내 즉시 반영
+        # ── 이전 압축 결과 반영 ──────────────────────────────────────────
+        if _compress_thread and not _compress_thread.is_alive() and _compress_result:
+            history = _compress_result[0]
+            _compress_result.clear()
+            _compress_thread = None
+        # ── 프롬프트 토큰이 100k 초과 시 백그라운드 압축 시작 ────────────
+        prompt_tokens = _estimate_tokens(build_worker_prompt(goal, history, past_attempts=past_attempts))
+        if (prompt_tokens > TOKEN_COMPRESS_THRESHOLD
+                and (_compress_thread is None or not _compress_thread.is_alive())
+                and decision != 'DONE'):
+            _snap = list(history)  # 스냅샷 캡처
+            _res  = _compress_result
+            def _do_compress():
+                new_h = compress_history(_snap, goal, eval_model)
+                _res.clear()
+                _res.append(new_h)
+            _compress_thread = threading.Thread(target=_do_compress, daemon=True)
+            _compress_thread.start()
         if decision == 'DONE':
             done = True
             refresh("done", iteration)
@@ -978,6 +1328,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
                 "goal": goal, "history": history,
                 "eval_history": eval_history, "workdir": workdir,
             }
+            _persist_session(_last_session)
             # 완료 요약 출력
             console.print()
@@ -998,6 +1349,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
         "goal": goal, "history": history,
         "eval_history": eval_history, "workdir": workdir,
     }
+    _persist_session(_last_session)
     return _finish('max')
@@ -1143,6 +1495,17 @@ def main():
     current_mode = args.mode
     eval_every   = args.eval_every
+    # 프로세스 시작 시 디스크에서 마지막 세션 복원
+    global _last_session
+    if _last_session is None:
+        _last_session = _load_persisted_session()
+        if _last_session:
+            saved_at = _last_session.get('saved_at', '알 수 없음')
+            console.print(
+                f"[dim]💾 이전 세션 복원됨: [cyan]{_last_session['goal'][:60]}[/cyan] "
+                f"({len(_last_session['history'])}회, {saved_at})[/dim]"
+            )
     console.print(f"[dim]명령을 입력하세요. /help 로 커맨드 목록 확인. 모드: {current_mode}[/dim]")
     _completer = SlashCompleter()
@@ -1178,9 +1541,17 @@ def main():
                     continue
                 s = _last_session
                 prev_iters = len(s["history"])
+                saved_at = s.get('saved_at', '알 수 없음')
                 console.print(Rule("[cyan]세션 재개[/cyan]"))
-                console.print(f"[dim]목표:[/dim] {s['goal'][:80]}")
-                console.print(f"[dim]이전 반복:[/dim] {prev_iters}회  →  이어서 실행")
+                console.print(f"[dim]목표:[/dim]    [white]{s['goal'][:80]}[/white]")
+                console.print(f"[dim]반복:[/dim]    {prev_iters}회 완료")
+                console.print(f"[dim]저장:[/dim]    {saved_at}")
+                last_decision = next(
+                    (h['decision'] for h in reversed(s['history'])
+                     if h.get('decision') and h.get('type') != 'compressed_summary'),
+                    '없음'
+                )
+                console.print(f"[dim]마지막 판정:[/dim] {last_decision}")
                 console.print()
                 layout2 = make_layout()
                 layout2["header"].update(render_header(s["goal"], prev_iters, max_iter, "idle"))

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentforge-multi",
-  "version": "0.1.6",
+  "version": "0.1.7",
   "description": "Multi-agent CLI: Worker + Evaluator agents collaborate in a loop to achieve your goal",
   "keywords": [
     "ai",