agentforge-multi 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/agentforge +609 -77
  2. package/package.json +1 -1
package/agentforge CHANGED
@@ -27,6 +27,7 @@ from collections import deque
27
27
  from pathlib import Path
28
28
 
29
29
  import requests as _requests
30
+ from concurrent.futures import ThreadPoolExecutor, as_completed
30
31
 
31
32
  from rich.console import Console
32
33
  from rich.layout import Layout
@@ -44,6 +45,8 @@ from prompt_toolkit.formatted_text import HTML
44
45
  # ── Constants ─────────────────────────────────────────────────────────────────
45
46
 
46
47
  CODEX_BIN = Path.home() / ".npm-global" / "bin" / "codex"
48
+ KNOWLEDGE_DIR = Path.home() / ".agentforge" / "knowledge" # 영구 지식 저장소 루트
49
+ SESSION_FILE = Path.home() / ".agentforge" / "last_session.json" # 세션 영속화
47
50
  DEFAULT_MAX_ITER = 5000
48
51
  WORKER_BUF_LINES = 60
49
52
  DEFAULT_WORKER_MODEL = "gpt-5.4"
@@ -57,19 +60,70 @@ console = Console()
57
60
  _last_session: dict | None = None # {goal, history, eval_history, workdir}
58
61
  _interrupt_event = threading.Event() # ESC 감지 플래그
59
62
 
63
+
64
+ def _persist_session(session: dict) -> None:
65
+ """세션을 디스크에 저장. worker_lines는 TUI 표시용이라 제외."""
66
+ try:
67
+ SESSION_FILE.parent.mkdir(parents=True, exist_ok=True)
68
+ # worker_lines는 디스플레이 전용 → 저장 제외
69
+ history_slim = [
70
+ {k: v for k, v in h.items() if k != 'worker_lines'}
71
+ for h in session.get('history', [])
72
+ ]
73
+ data = {
74
+ **session,
75
+ 'history': history_slim,
76
+ 'saved_at': time.strftime("%Y-%m-%dT%H:%M:%S"),
77
+ }
78
+ SESSION_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2))
79
+ except Exception:
80
+ pass
81
+
82
+
83
+ def _load_persisted_session() -> dict | None:
84
+ """디스크에서 세션 로드. 실패 시 None."""
85
+ try:
86
+ if not SESSION_FILE.exists():
87
+ return None
88
+ data = json.loads(SESSION_FILE.read_text())
89
+ # worker_lines 누락된 항목 복원
90
+ for h in data.get('history', []):
91
+ h.setdefault('worker_lines', [])
92
+ return data
93
+ except Exception:
94
+ return None
95
+ _current_response = None # 현재 스트리밍 HTTP 응답 (즉시 끊기용)
96
+
97
+ # HTTP 세션 — TCP+TLS 연결 재사용으로 매 호출 수십~수백 ms 절감
98
+ _session = _requests.Session()
99
+ _session.headers.update({"Content-Type": "application/json"})
100
+
101
+ # 인증 헤더 캐시 — auth.json mtime이 바뀔 때만 재읽기
102
+ _auth_cache: dict | None = None
103
+ _auth_mtime: float = 0.0
104
+
60
105
  # ── ChatGPT backend API ────────────────────────────────────────────────────────
61
106
 
62
107
  def _get_auth_headers() -> dict:
63
- """~/.codex/auth.json 에서 Bearer 헤더 + ChatGPT-Account-Id 반환."""
108
+ """~/.codex/auth.json 에서 Bearer 헤더 반환. mtime 기반 캐싱."""
109
+ global _auth_cache, _auth_mtime
64
110
  auth_file = Path.home() / ".codex" / "auth.json"
111
+ try:
112
+ mtime = auth_file.stat().st_mtime
113
+ except OSError:
114
+ return {}
115
+ if _auth_cache is not None and mtime == _auth_mtime:
116
+ return _auth_cache
65
117
  data = json.loads(auth_file.read_text())
66
118
  token = data["tokens"]["access_token"]
67
119
  account_id = data["tokens"].get("account_id", "")
68
- return {
120
+ _auth_cache = {
69
121
  "Authorization": f"Bearer {token}",
70
122
  "Content-Type": "application/json",
71
123
  "ChatGPT-Account-Id": account_id,
72
124
  }
125
+ _auth_mtime = mtime
126
+ return _auth_cache
73
127
 
74
128
 
75
129
  WORKER_TOOLS = [
@@ -134,30 +188,58 @@ WORKER_TOOLS = [
134
188
  ]
135
189
 
136
190
 
191
+ RESEARCH_TOOLS = WORKER_TOOLS + [
192
+ {
193
+ "type": "function", "name": "web_search",
194
+ "description": "Search the web using DuckDuckGo (or Brave if BRAVE_API_KEY set). Returns summaries and links.",
195
+ "parameters": {"type": "object",
196
+ "properties": {"query": {"type": "string"}},
197
+ "required": ["query"]},
198
+ "strict": False,
199
+ },
200
+ {
201
+ "type": "function", "name": "fetch_url",
202
+ "description": "Fetch and extract text content from a URL.",
203
+ "parameters": {"type": "object",
204
+ "properties": {"url": {"type": "string"}},
205
+ "required": ["url"]},
206
+ "strict": False,
207
+ },
208
+ ]
209
+
210
+
137
211
  def _iter_events(payload: dict):
138
212
  """
139
213
  ChatGPT backend-api/codex/responses 스트리밍 호출.
140
- SSE 이벤트를 실시간으로 yield. _interrupt_event가 set되면 조기 종료.
214
+ SSE 이벤트를 실시간으로 yield. _interrupt_event가 set되면 즉시 연결 끊기.
141
215
  """
216
+ global _current_response
142
217
  headers = _get_auth_headers()
143
218
  try:
144
- r = _requests.post(
219
+ r = _session.post(
145
220
  CHATGPT_RESPONSES_URL, headers=headers,
146
221
  json=payload, stream=True, timeout=300,
147
222
  )
223
+ _current_response = r
148
224
  r.raise_for_status()
149
- for line in r.iter_lines():
150
- if _interrupt_event.is_set():
151
- break
152
- if not line:
153
- continue
154
- decoded = line.decode("utf-8", errors="replace")
155
- if decoded.startswith("data: "):
156
- try:
157
- yield json.loads(decoded[6:])
158
- except Exception:
159
- pass
225
+ try:
226
+ for line in r.iter_lines():
227
+ if _interrupt_event.is_set():
228
+ r.close()
229
+ break
230
+ if not line:
231
+ continue
232
+ decoded = line.decode("utf-8", errors="replace")
233
+ if decoded.startswith("data: "):
234
+ try:
235
+ yield json.loads(decoded[6:])
236
+ except Exception:
237
+ pass
238
+ finally:
239
+ r.close() # GeneratorExit(gen.close()) 시에도 HTTP 연결 즉시 반환
240
+ _current_response = None
160
241
  except Exception as e:
242
+ _current_response = None
161
243
  yield {"type": "_error", "message": str(e)}
162
244
 
163
245
 
@@ -195,6 +277,48 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
195
277
  return "\n".join(
196
278
  ("dir " if p.is_dir() else "file ") + p.name for p in items
197
279
  )
280
+ elif name == "web_search":
281
+ query = args["query"]
282
+ brave_key = os.environ.get("BRAVE_API_KEY")
283
+ if brave_key:
284
+ r = _session.get(
285
+ "https://api.search.brave.com/res/v1/web/search",
286
+ params={"q": query, "count": 10},
287
+ headers={"Accept": "application/json",
288
+ "Accept-Encoding": "gzip",
289
+ "X-Subscription-Token": brave_key},
290
+ timeout=15,
291
+ )
292
+ items = r.json().get("web", {}).get("results", [])
293
+ return "\n".join(
294
+ f"[{i+1}] {it['title']}\n {it['url']}\n {it.get('description','')}"
295
+ for i, it in enumerate(items[:8])
296
+ ) or "(결과 없음)"
297
+ else:
298
+ r = _session.get(
299
+ "https://html.duckduckgo.com/html/",
300
+ params={"q": query},
301
+ headers={"User-Agent": "Mozilla/5.0"},
302
+ timeout=15,
303
+ )
304
+ snippets = re.findall(r'class="result__snippet">(.*?)</a>', r.text, re.S)
305
+ titles = re.findall(r'class="result__a"[^>]*>(.*?)</a>', r.text, re.S)
306
+ urls = re.findall(r'uddg=(https?[^&"]+)', r.text)
307
+ from urllib.parse import unquote
308
+ lines = []
309
+ for i, (t, u, s) in enumerate(zip(titles, urls, snippets)):
310
+ t = re.sub(r'<[^>]+>', '', t).strip()
311
+ s = re.sub(r'<[^>]+>', '', s).strip()
312
+ lines.append(f"[{i+1}] {t}\n {unquote(u)}\n {s}")
313
+ if i >= 7:
314
+ break
315
+ return "\n".join(lines) or "(결과 없음)"
316
+ elif name == "fetch_url":
317
+ url = args["url"]
318
+ r = _session.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
319
+ text = re.sub(r'<[^>]+>', ' ', r.text)
320
+ text = re.sub(r'\s+', ' ', text).strip()
321
+ return text[:8000]
198
322
  else:
199
323
  return f"Unknown tool: {name}"
200
324
  except Exception as e:
@@ -203,23 +327,30 @@ def _execute_tool(name: str, args: dict, workdir: str) -> str:
203
327
  # ── Slash command autocomplete ────────────────────────────────────────────────
204
328
 
205
329
  SLASH_COMMANDS = [
206
- ("/resume", "마지막 세션을 이어서 실행"),
207
- ("/exit", "agentforge 종료"),
330
+ ("/resume", "마지막 세션 재개"),
331
+ ("/exit", "종료"),
332
+ ("/mode code", "코딩 모드로 전환"),
333
+ ("/mode research", "연구 모드로 전환"),
334
+ ("/eval-every <N>", "N번마다 Evaluator 실행"),
335
+ ("/status", "현재 설정 확인"),
336
+ ("/help", "커맨드 목록 표시"),
208
337
  ]
209
338
 
210
339
  class SlashCompleter(Completer):
211
340
  def get_completions(self, document, complete_event):
212
341
  text = document.text_before_cursor
213
342
  if text.startswith('/'):
214
- typed = text.lstrip('/')
343
+ typed = text[1:] # strip leading /
215
344
  for cmd, desc in SLASH_COMMANDS:
216
- name = cmd.lstrip('/')
217
- if name.startswith(typed):
345
+ name = cmd[1:].split()[0] # first word without /
346
+ full = cmd[1:] # full command without /
347
+ if full.startswith(typed) or name.startswith(typed.split()[0] if typed else ''):
348
+ import html as _html
218
349
  yield Completion(
219
350
  cmd,
220
351
  start_position=-len(text),
221
- display=HTML(f'<ansicyan>{cmd}</ansicyan>'),
222
- display_meta=HTML(f'<ansiwhite>{desc}</ansiwhite>'),
352
+ display=HTML(f'<ansicyan>{_html.escape(cmd)}</ansicyan>'),
353
+ display_meta=HTML(f'<ansiwhite>{_html.escape(desc)}</ansiwhite>'),
223
354
  )
224
355
 
225
356
  PROMPT_STYLE = PtStyle.from_dict({
@@ -298,23 +429,276 @@ EVALUATOR_SYSTEM = textwrap.dedent("""\
298
429
  Do NOT write anything before the decision keyword.
299
430
  """).strip()
300
431
 
301
- def build_worker_prompt(goal: str, history: list) -> str:
432
+ RESEARCH_WORKER_SYSTEM = textwrap.dedent("""\
433
+ You are an expert researcher. Your goal is to investigate a topic thoroughly.
434
+ - Use web_search to find relevant papers, articles, and data
435
+ - Use fetch_url to read full content of important pages
436
+ - Use read_file/write_file to organize findings into structured notes
437
+ - Synthesize information across multiple sources
438
+ - Stay on topic; do not drift from the research goal
439
+ """).strip()
440
+
441
+ RESEARCH_EVALUATOR_SYSTEM = textwrap.dedent("""\
442
+ You are a rigorous academic reviewer. Evaluate whether the research goal is achieved.
443
+ Respond with EXACTLY ONE first line: DONE, IMPROVE: <feedback>, or REDIRECT: <feedback>
444
+
445
+ DONE only if: sufficient sources found, content analyzed, findings written to file(s).
446
+ IMPROVE if: more sources needed, analysis incomplete, or notes missing.
447
+ REDIRECT if: wrong direction entirely.
448
+
449
+ When DONE, add Korean summary:
450
+ 판단 이유: ...
451
+ 결과물 위치: ...
452
+ 결과 요약: ...
453
+ """).strip()
454
+
455
+
456
+ TOKEN_COMPRESS_THRESHOLD = 100_000 # 프롬프트 추정 토큰이 이 수를 넘으면 압축
457
+ HISTORY_KEEP_RECENT = 4 # 압축 후 보존할 최근 항목 수 (상세 내용 유지)
458
+
459
+
460
+ def _estimate_tokens(text: str) -> int:
461
+ """토큰 수 추정. tiktoken 있으면 정확하게, 없으면 글자 수 기반 근사."""
462
+ try:
463
+ import tiktoken
464
+ enc = tiktoken.get_encoding("cl100k_base")
465
+ return len(enc.encode(text))
466
+ except Exception:
467
+ # 영문 ~4자/토큰, 한글 ~1.5자/토큰 혼합 근사 → 3자/토큰
468
+ return len(text) // 3
469
+
470
+ # ── Knowledge Store (RAG) ────────────────────────────────────────────────────
471
+
472
+ def _goal_to_slug(goal: str) -> str:
473
+ """목표 문자열 → 파일시스템 안전 폴더명 (한글 보존)."""
474
+ slug = re.sub(r'[^\w가-힣\s]', '', goal) # 특수문자 제거, 한글·영문·숫자 보존
475
+ slug = re.sub(r'\s+', '_', slug.strip()) # 공백 → 언더스코어
476
+ return slug[:72] or "unnamed"
477
+
478
+
479
+ def _knowledge_path(goal: str) -> Path:
480
+ """목표별 지식 저장소 디렉토리 경로 반환 (없으면 생성)."""
481
+ p = KNOWLEDGE_DIR / _goal_to_slug(goal)
482
+ p.mkdir(parents=True, exist_ok=True)
483
+ return p
484
+
485
+
486
+ def _save_attempt(goal: str, record: dict) -> None:
487
+ """시도 기록을 JSONL에 append (스레드 안전: Linux append는 원자적)."""
488
+ try:
489
+ path = _knowledge_path(goal) / "attempts.jsonl"
490
+ line = json.dumps(record, ensure_ascii=False) + "\n"
491
+ with open(path, "a", encoding="utf-8") as f:
492
+ f.write(line)
493
+ except Exception:
494
+ pass # 저장 실패해도 에이전트 동작에 영향 없도록
495
+
496
+
497
+ def _load_past_attempts(goal: str) -> list[dict]:
498
+ """과거 시도 기록 전체 로드."""
499
+ try:
500
+ path = _knowledge_path(goal) / "attempts.jsonl"
501
+ if not path.exists():
502
+ return []
503
+ records = []
504
+ for line in path.read_text(encoding="utf-8").splitlines():
505
+ line = line.strip()
506
+ if line:
507
+ try:
508
+ records.append(json.loads(line))
509
+ except Exception:
510
+ pass
511
+ return records
512
+ except Exception:
513
+ return []
514
+
515
+
516
+ def _retrieve_relevant(query: str, attempts: list[dict], top_k: int = 6) -> list[dict]:
517
+ """
518
+ BM25로 query와 가장 관련 있는 과거 시도 검색.
519
+ rank_bm25 없으면 단어 overlap 기반 폴백.
520
+ """
521
+ if not attempts:
522
+ return []
523
+
524
+ def _text(a: dict) -> str:
525
+ return f"{a.get('feedback','')} {a.get('worker_summary','')} {a.get('decision','')}"
526
+
527
+ try:
528
+ from rank_bm25 import BM25Okapi
529
+ corpus = [_text(a).lower().split() for a in attempts]
530
+ bm25 = BM25Okapi(corpus)
531
+ scores = bm25.get_scores(query.lower().split())
532
+ top_idx = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
533
+ return [attempts[i] for i in top_idx]
534
+ except Exception:
535
+ # 폴백: 단어 overlap 점수
536
+ q_words = set(query.lower().split())
537
+ scored = []
538
+ for a in attempts:
539
+ words = set(_text(a).lower().split())
540
+ scored.append((len(q_words & words), a))
541
+ scored.sort(key=lambda x: x[0], reverse=True)
542
+ return [a for _, a in scored[:top_k]]
543
+
544
+
545
+ def _build_rag_section(past_attempts: list[dict], current_context: str) -> str:
546
+ """
547
+ 현재 상황과 관련된 과거 시도를 RAG로 검색하여
548
+ Worker 프롬프트에 삽입할 경고 섹션 생성.
549
+ """
550
+ if not past_attempts:
551
+ return ""
552
+
553
+ relevant = _retrieve_relevant(current_context, past_attempts, top_k=6)
554
+ if not relevant:
555
+ return ""
556
+
557
+ lines = [
558
+ "━" * 60,
559
+ "PAST ATTEMPTS FROM PREVIOUS SESSIONS — DO NOT REPEAT THESE:",
560
+ ]
561
+ for a in relevant:
562
+ decision = a.get("decision", "?")
563
+ feedback = a.get("feedback", "")[:200]
564
+ summary = a.get("worker_summary", "")[:150]
565
+ ts = a.get("timestamp", "")[:10]
566
+ badge = {"DONE": "✓", "IMPROVE": "▲", "REDIRECT": "↩"}.get(decision, "?")
567
+ lines.append(f" [{badge} {decision}] ({ts}) {feedback}")
568
+ if summary:
569
+ lines.append(f" tried: {summary}")
570
+ lines.append("━" * 60)
571
+ return "\n".join(lines)
572
+
573
+
574
+ # ── History Compression ───────────────────────────────────────────────────────
575
+
576
+ COMPRESSOR_SYSTEM = textwrap.dedent("""\
577
+ You are a concise summarizer for an AI agent's work log.
578
+ Given a list of past iterations (what the agent tried and what the evaluator said),
579
+ produce a compact summary that preserves:
580
+ - Every approach that was tried (even failed ones)
581
+ - Why each approach was rejected or approved
582
+ - The current state of the work (what exists, what doesn't)
583
+ - Any important file paths or technical details
584
+
585
+ Write in plain English. Be dense but complete. Max 400 words.
586
+ Do NOT omit failed approaches — the agent must not repeat them.
587
+ """).strip()
588
+
589
+
590
+ def _call_compressor(entries: list[dict], goal: str, model: str | None) -> str:
591
+ """과거 history entries를 LLM으로 요약. 실패 시 원본 한 줄 요약 반환."""
592
+ lines = [f"GOAL: {goal}", "", "ITERATIONS TO SUMMARIZE:"]
593
+ for h in entries:
594
+ lines.append(f" Iter {h['iter']}: [{h['decision']}] {h.get('feedback','')[:200]}")
595
+ ws = h.get('worker_summary', '')
596
+ if ws:
597
+ lines.append(f" Worker did: {ws[:200]}")
598
+ prompt_text = "\n".join(lines)
599
+
600
+ model = model or DEFAULT_EVAL_MODEL
601
+ payload = {
602
+ "model": model,
603
+ "instructions": COMPRESSOR_SYSTEM,
604
+ "input": [{"role": "user", "content": prompt_text}],
605
+ "store": False,
606
+ "stream": True,
607
+ }
608
+ parts: list[str] = []
609
+ gen = _iter_events(payload)
610
+ try:
611
+ for ev in gen:
612
+ if ev["type"] == "response.output_text.delta":
613
+ parts.append(ev.get("delta", ""))
614
+ finally:
615
+ gen.close()
616
+ result = "".join(parts).strip()
617
+ if not result:
618
+ # 폴백: 원본 한 줄 목록
619
+ result = "\n".join(
620
+ f"Iter {h['iter']} [{h['decision']}]: {h.get('feedback','')[:100]}"
621
+ for h in entries
622
+ )
623
+ return result
624
+
625
+
626
+ def compress_history(history: list, goal: str, model: str | None) -> list:
627
+ """
628
+ history가 COMPRESS_THRESHOLD를 넘으면 오래된 항목을 LLM 요약으로 교체.
629
+ 최근 KEEP_RECENT 항목은 보존.
630
+ 반환: 새 history 리스트 (압축 entry + 최근 항목)
631
+ """
632
+ # 이미 압축된 summary entry가 있으면 그것도 포함해 재압축 대상 선정
633
+ to_compress = history[:-HISTORY_KEEP_RECENT]
634
+ keep = history[-HISTORY_KEEP_RECENT:]
635
+
636
+ # summary entry는 그대로, 일반 entry만 압축
637
+ prev_summary_text = ""
638
+ normal_entries = []
639
+ for h in to_compress:
640
+ if h.get('type') == 'compressed_summary':
641
+ prev_summary_text = h['content']
642
+ else:
643
+ normal_entries.append(h)
644
+
645
+ if not normal_entries:
646
+ return history # 압축할 게 없음
647
+
648
+ new_text = _call_compressor(normal_entries, goal, model)
649
+ if prev_summary_text:
650
+ combined = prev_summary_text + "\n\n--- Additional history ---\n" + new_text
651
+ else:
652
+ combined = new_text
653
+
654
+ first_iter = normal_entries[0]['iter']
655
+ last_iter = normal_entries[-1]['iter']
656
+ summary_entry = {
657
+ 'type': 'compressed_summary',
658
+ 'covers': f"{first_iter}–{last_iter}",
659
+ 'content': combined,
660
+ }
661
+ return [summary_entry] + keep
662
+
663
+
664
+ def build_worker_prompt(goal: str, history: list,
665
+ past_attempts: list[dict] | None = None) -> str:
302
666
  lines = [f"GOAL: {goal}", ""]
667
+
668
+ # RAG 섹션: 현재 상황을 쿼리로 과거 시도 검색·삽입
669
+ if past_attempts:
670
+ current_ctx = " ".join(
671
+ f"{h.get('feedback','')} {h.get('worker_summary','')}"
672
+ for h in history[-3:] if h.get('type') != 'compressed_summary'
673
+ ) or goal
674
+ rag = _build_rag_section(past_attempts, current_ctx)
675
+ if rag:
676
+ lines.append(rag)
677
+ lines.append("")
678
+
303
679
  if not history:
304
680
  lines += [WORKER_SYSTEM, "", "Begin working on the goal above. Make concrete changes now."]
305
681
  else:
306
682
  lines.append("ITERATION HISTORY:")
307
683
  for h in history:
308
- lines.append(f" Iteration {h['iter']}:")
309
- lines.append(f" Your work: {h['worker_summary']}")
310
- lines.append(f" Evaluator: {h['decision']}" +
311
- (f"{h['feedback']}" if h['feedback'] else ""))
684
+ if h.get('type') == 'compressed_summary':
685
+ lines.append(f" [COMPRESSED SUMMARY — Iter {h['covers']}]")
686
+ for ln in h['content'].splitlines():
687
+ lines.append(f" {ln}")
688
+ lines.append(" ──────────────────────────────────────")
689
+ else:
690
+ lines.append(f" Iteration {h['iter']}:")
691
+ lines.append(f" Your work: {h['worker_summary']}")
692
+ lines.append(f" Evaluator: {h['decision']}" +
693
+ (f" — {h['feedback']}" if h['feedback'] else ""))
312
694
  lines.append("")
313
- last = history[-1]
314
- if last['decision'].upper() == 'IMPROVE':
315
- lines.append(f"INSTRUCTION: Refine your previous work. Feedback: {last['feedback']}")
316
- else:
317
- lines.append(f"INSTRUCTION: Abandon previous approach. Try differently: {last['feedback']}")
695
+ # 마지막 실제 iteration entry 찾기
696
+ last = next((h for h in reversed(history) if h.get('type') != 'compressed_summary'), None)
697
+ if last:
698
+ if last['decision'].upper() == 'IMPROVE':
699
+ lines.append(f"INSTRUCTION: Refine your previous work. Feedback: {last['feedback']}")
700
+ else:
701
+ lines.append(f"INSTRUCTION: Abandon previous approach. Try differently: {last['feedback']}")
318
702
  lines.append("Make the changes now.")
319
703
  return "\n".join(lines)
320
704
 
@@ -422,6 +806,11 @@ def _esc_listener(stop: threading.Event):
422
806
  ch = os.read(tty_fd, 1)
423
807
  if ch == b'\x1b':
424
808
  _interrupt_event.set()
809
+ if _current_response:
810
+ try:
811
+ _current_response.close()
812
+ except Exception:
813
+ pass
425
814
  break
426
815
  except Exception:
427
816
  pass
@@ -436,9 +825,13 @@ def _esc_listener(stop: threading.Event):
436
825
  # ── Agent Runners ─────────────────────────────────────────────────────────────
437
826
 
438
827
  def run_worker(prompt: str, workdir: str, model: str | None,
439
- buf: deque, status_ref: list) -> tuple[str, int]:
440
- """Worker: ChatGPT backend Responses API 직접 호출 + 도구 실행 루프."""
828
+ buf: deque, status_ref: list,
829
+ system_prompt: str | None = None,
830
+ tools: list | None = None) -> tuple[str, int]:
831
+ """Worker: ChatGPT backend Responses API 직접 호출 + 병렬 도구 실행 루프."""
441
832
  model = model or DEFAULT_WORKER_MODEL
833
+ system_prompt = system_prompt or WORKER_SYSTEM
834
+ tools = tools if tools is not None else WORKER_TOOLS
442
835
  status_ref[0] = "running"
443
836
 
444
837
  # 입력 히스토리 (user msg + function_call + function_call_output 누적)
@@ -460,9 +853,9 @@ def run_worker(prompt: str, workdir: str, model: str | None,
460
853
 
461
854
  payload = {
462
855
  "model": model,
463
- "instructions": WORKER_SYSTEM,
856
+ "instructions": system_prompt,
464
857
  "input": input_history,
465
- "tools": WORKER_TOOLS,
858
+ "tools": tools,
466
859
  "store": False,
467
860
  "stream": True,
468
861
  }
@@ -504,26 +897,33 @@ def run_worker(prompt: str, workdir: str, model: str | None,
504
897
  if not fc_items:
505
898
  break
506
899
 
507
- # 도구 실행 및 히스토리에 추가
900
+ # 도구 병렬 실행
901
+ with ThreadPoolExecutor(max_workers=min(len(fc_items), 8)) as pool:
902
+ futures = {
903
+ pool.submit(
904
+ _execute_tool,
905
+ fc["name"],
906
+ json.loads(fc["arguments"]) if fc["arguments"] else {},
907
+ workdir,
908
+ ): fc
909
+ for fc in fc_items
910
+ }
911
+ results: dict[str, str] = {}
912
+ for fut in as_completed(futures):
913
+ fc = futures[fut]
914
+ try:
915
+ results[fc["call_id"]] = fut.result()
916
+ except Exception as e:
917
+ results[fc["call_id"]] = f"Error: {e}"
918
+
919
+ # call_id 순서 보장하며 히스토리 추가
508
920
  for fc in fc_items:
509
- call_id = fc["call_id"]
510
- name = fc["name"]
511
- raw_args = fc["arguments"]
512
- try:
513
- args = json.loads(raw_args)
514
- except Exception:
515
- args = {}
516
-
517
- arg_preview = raw_args[:80]
518
- buf.append(f"[cyan]▶ {name}({arg_preview})[/cyan]")
519
- result = _execute_tool(name, args, workdir)
520
- short = result[:300].replace('\n', ' ')
521
- buf.append(f"[dim]{short}[/dim]")
522
-
523
- # Responses API 형식 히스토리
524
- input_history.append({"type": "function_call", "call_id": call_id,
525
- "name": name, "arguments": raw_args})
526
- input_history.append({"type": "function_call_output", "call_id": call_id,
921
+ result = results[fc["call_id"]]
922
+ buf.append(f"[cyan]▶ {fc['name']}({fc['arguments'][:80]})[/cyan]")
923
+ buf.append(f"[dim]{result[:300].replace(chr(10), ' ')}[/dim]")
924
+ input_history.append({"type": "function_call", "call_id": fc["call_id"],
925
+ "name": fc["name"], "arguments": fc["arguments"]})
926
+ input_history.append({"type": "function_call_output", "call_id": fc["call_id"],
527
927
  "output": result})
528
928
 
529
929
  # 잔여 line_buf flush
@@ -534,22 +934,43 @@ def run_worker(prompt: str, workdir: str, model: str | None,
534
934
  return "\n".join(all_text_parts), 0
535
935
 
536
936
 
537
- def run_evaluator(prompt: str, workdir: str, model: str | None) -> tuple[str, int]:
538
- """Evaluator: ChatGPT backend Responses API, 도구 없이 텍스트만."""
937
+ def run_evaluator(prompt: str, workdir: str, model: str | None,
938
+ system_prompt: str | None = None) -> tuple[str, int]:
939
+ """Evaluator: ChatGPT backend Responses API, 도구 없이 텍스트만.
940
+ IMPROVE/REDIRECT는 첫 번째 완성 라인에서 결정 → 스트림 조기 종료.
941
+ DONE은 한국어 요약까지 필요하므로 전체 수신.
942
+ """
539
943
  model = model or DEFAULT_EVAL_MODEL
944
+ system_prompt = system_prompt or EVALUATOR_SYSTEM
540
945
  payload = {
541
946
  "model": model,
542
- "instructions": EVALUATOR_SYSTEM,
947
+ "instructions": system_prompt,
543
948
  "input": [{"role": "user", "content": prompt}],
544
949
  "store": False,
545
950
  "stream": True,
546
951
  }
547
952
  parts: list[str] = []
548
- for ev in _iter_events(payload):
549
- if ev["type"] == "_error":
550
- return f"[Evaluator error] {ev['message']}", 1
551
- if ev["type"] == "response.output_text.delta":
552
- parts.append(ev.get("delta", ""))
953
+ accumulated = ""
954
+ early_decided = False
955
+ gen = _iter_events(payload)
956
+ try:
957
+ for ev in gen:
958
+ if ev["type"] == "_error":
959
+ return f"[Evaluator error] {ev['message']}", 1
960
+ if ev["type"] == "response.output_text.delta":
961
+ delta = ev.get("delta", "")
962
+ parts.append(delta)
963
+ if not early_decided:
964
+ accumulated += delta
965
+ # 첫 줄이 완성되면 결정 파싱 시도
966
+ if '\n' in accumulated:
967
+ first_line = accumulated.split('\n')[0].strip()
968
+ m = DECISION_RE.match(first_line)
969
+ if m and m.group(1).upper() in ('IMPROVE', 'REDIRECT'):
970
+ early_decided = True
971
+ break # 한국어 요약 불필요 → 스트림 종료
972
+ finally:
973
+ gen.close()
553
974
  return "".join(parts), 0
554
975
 
555
976
 
@@ -745,10 +1166,12 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
745
1166
  eval_model: str | None, max_iter: int,
746
1167
  layout: Layout, live: Live,
747
1168
  initial_history: list | None = None,
748
- initial_eval_history: list | None = None) -> str:
1169
+ initial_eval_history: list | None = None,
1170
+ mode: str = "code",
1171
+ eval_every: int = 1) -> str:
749
1172
  """
750
1173
  Worker + Evaluator 반복 루프.
751
- 반환: 'done' | 'max'
1174
+ 반환: 'done' | 'max' | 'interrupted'
752
1175
  """
753
1176
  global _last_session
754
1177
  history = list(initial_history or [])
@@ -757,6 +1180,24 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
757
1180
  worker_status = ["idle"]
758
1181
  done = False
759
1182
 
1183
+ # 모드별 시스템 프롬프트 및 도구 선택
1184
+ worker_sys = RESEARCH_WORKER_SYSTEM if mode == "research" else WORKER_SYSTEM
1185
+ eval_sys = RESEARCH_EVALUATOR_SYSTEM if mode == "research" else EVALUATOR_SYSTEM
1186
+ active_tools = RESEARCH_TOOLS if mode == "research" else WORKER_TOOLS
1187
+
1188
+ # 압축 상태 — 백그라운드 압축 스레드 결과 수신용
1189
+ _compress_result: list[list] = [] # [new_history] 채워지면 완료
1190
+ _compress_thread: threading.Thread | None = None
1191
+
1192
+ # ── 지식 저장소 로드 ─────────────────────────────────────────────────
1193
+ past_attempts = _load_past_attempts(goal)
1194
+ knowledge_dir = _knowledge_path(goal)
1195
+ if past_attempts:
1196
+ console.print(
1197
+ f"[dim]📚 지식 저장소 로드: {knowledge_dir.name}/ "
1198
+ f"({len(past_attempts)}개 과거 시도)[/dim]"
1199
+ )
1200
+
760
1201
  # ESC 리스너 시작
761
1202
  _interrupt_event.clear()
762
1203
  _esc_stop = threading.Event()
@@ -782,12 +1223,13 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
782
1223
  live.start()
783
1224
  refresh("worker", iteration)
784
1225
 
785
- worker_prompt = build_worker_prompt(goal, history)
1226
+ worker_prompt = build_worker_prompt(goal, history, past_attempts=past_attempts)
786
1227
  worker_result = [None, None]
787
1228
 
788
1229
  def _worker():
789
1230
  worker_result[0], worker_result[1] = run_worker(
790
- worker_prompt, workdir, worker_model, worker_buf, worker_status)
1231
+ worker_prompt, workdir, worker_model, worker_buf, worker_status,
1232
+ system_prompt=worker_sys, tools=active_tools)
791
1233
 
792
1234
  t = threading.Thread(target=_worker, daemon=True)
793
1235
  t.start()
@@ -804,6 +1246,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
804
1246
  live.stop()
805
1247
  _last_session = {"goal": goal, "history": history,
806
1248
  "eval_history": eval_history, "workdir": workdir}
1249
+ _persist_session(_last_session)
807
1250
  return _finish('interrupted')
808
1251
 
809
1252
  last_msg, _ = worker_result
@@ -821,11 +1264,16 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
821
1264
  live.stop()
822
1265
  _last_session = {"goal": goal, "history": history,
823
1266
  "eval_history": eval_history, "workdir": workdir}
1267
+ _persist_session(_last_session)
824
1268
  return _finish('interrupted')
825
1269
 
826
- refresh("evaluator", iteration)
827
- eval_prompt = build_evaluator_prompt(goal, last_msg or "", iteration)
828
- eval_msg, _ = run_evaluator(eval_prompt, workdir, eval_model)
1270
+ if iteration % eval_every == 0 or iteration == max_iter:
1271
+ refresh("evaluator", iteration)
1272
+ eval_prompt = build_evaluator_prompt(goal, last_msg or "", iteration)
1273
+ eval_msg, _ = run_evaluator(eval_prompt, workdir, eval_model,
1274
+ system_prompt=eval_sys)
1275
+ else:
1276
+ eval_msg = "IMPROVE: (evaluation skipped)"
829
1277
 
830
1278
  decision, feedback = parse_decision(eval_msg)
831
1279
  history[-1]['decision'] = decision
@@ -837,6 +1285,40 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
837
1285
  'full_msg': eval_msg,
838
1286
  })
839
1287
 
1288
+ # ── 지식 저장소에 attempt 기록 ───────────────────────────────────
1289
+ attempt_record = {
1290
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
1291
+ "iter": iteration,
1292
+ "decision": decision,
1293
+ "feedback": feedback,
1294
+ "worker_summary": worker_summary,
1295
+ "goal": goal,
1296
+ }
1297
+ _save_attempt(goal, attempt_record)
1298
+ past_attempts.append(attempt_record) # 현재 세션 내 즉시 반영
1299
+
1300
+ # ── 이전 압축 결과 반영 ──────────────────────────────────────────
1301
+ if _compress_thread and not _compress_thread.is_alive() and _compress_result:
1302
+ history = _compress_result[0]
1303
+ _compress_result.clear()
1304
+ _compress_thread = None
1305
+
1306
+ # ── 프롬프트 토큰이 100k 초과 시 백그라운드 압축 시작 ────────────
1307
+ prompt_tokens = _estimate_tokens(build_worker_prompt(goal, history, past_attempts=past_attempts))
1308
+ if (prompt_tokens > TOKEN_COMPRESS_THRESHOLD
1309
+ and (_compress_thread is None or not _compress_thread.is_alive())
1310
+ and decision != 'DONE'):
1311
+ _snap = list(history) # 스냅샷 캡처
1312
+ _res = _compress_result
1313
+
1314
+ def _do_compress():
1315
+ new_h = compress_history(_snap, goal, eval_model)
1316
+ _res.clear()
1317
+ _res.append(new_h)
1318
+
1319
+ _compress_thread = threading.Thread(target=_do_compress, daemon=True)
1320
+ _compress_thread.start()
1321
+
840
1322
  if decision == 'DONE':
841
1323
  done = True
842
1324
  refresh("done", iteration)
@@ -846,6 +1328,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
846
1328
  "goal": goal, "history": history,
847
1329
  "eval_history": eval_history, "workdir": workdir,
848
1330
  }
1331
+ _persist_session(_last_session)
849
1332
 
850
1333
  # 완료 요약 출력
851
1334
  console.print()
@@ -866,6 +1349,7 @@ def run_agent_loop(goal: str, workdir: str, worker_model: str | None,
866
1349
  "goal": goal, "history": history,
867
1350
  "eval_history": eval_history, "workdir": workdir,
868
1351
  }
1352
+ _persist_session(_last_session)
869
1353
  return _finish('max')
870
1354
 
871
1355
 
@@ -954,6 +1438,10 @@ def main():
954
1438
  help="Evaluator 모델")
955
1439
  parser.add_argument("-n", "--max-iterations", type=int, default=DEFAULT_MAX_ITER,
956
1440
  metavar="N", help=f"최대 반복 횟수 (기본: {DEFAULT_MAX_ITER})")
1441
+ parser.add_argument("--mode", choices=["code", "research"], default="code",
1442
+ help="실행 모드: code(기본) / research(웹 검색·분석)")
1443
+ parser.add_argument("--eval-every", type=int, default=1, metavar="N",
1444
+ help="N번 반복마다 Evaluator 실행 (기본: 1, 즉 매번)")
957
1445
  args = parser.parse_args()
958
1446
 
959
1447
  # ── auth 서브커맨드 ────────────────────────────────────────────────
@@ -1004,7 +1492,21 @@ def main():
1004
1492
  "agentforge auth login 으로 나중에 로그인할 수 있습니다.[/dim]"
1005
1493
  )
1006
1494
 
1007
- console.print("[dim]명령을 입력하세요. /resume | /exit[/dim]")
1495
+ current_mode = args.mode
1496
+ eval_every = args.eval_every
1497
+
1498
+ # 프로세스 시작 시 디스크에서 마지막 세션 복원
1499
+ global _last_session
1500
+ if _last_session is None:
1501
+ _last_session = _load_persisted_session()
1502
+ if _last_session:
1503
+ saved_at = _last_session.get('saved_at', '알 수 없음')
1504
+ console.print(
1505
+ f"[dim]💾 이전 세션 복원됨: [cyan]{_last_session['goal'][:60]}[/cyan] "
1506
+ f"({len(_last_session['history'])}회, {saved_at})[/dim]"
1507
+ )
1508
+
1509
+ console.print(f"[dim]명령을 입력하세요. /help 로 커맨드 목록 확인. 모드: {current_mode}[/dim]")
1008
1510
 
1009
1511
  _completer = SlashCompleter()
1010
1512
 
@@ -1039,9 +1541,17 @@ def main():
1039
1541
  continue
1040
1542
  s = _last_session
1041
1543
  prev_iters = len(s["history"])
1544
+ saved_at = s.get('saved_at', '알 수 없음')
1042
1545
  console.print(Rule("[cyan]세션 재개[/cyan]"))
1043
- console.print(f"[dim]목표:[/dim] {s['goal'][:80]}")
1044
- console.print(f"[dim]이전 반복:[/dim] {prev_iters}회 → 이어서 실행")
1546
+ console.print(f"[dim]목표:[/dim] [white]{s['goal'][:80]}[/white]")
1547
+ console.print(f"[dim]반복:[/dim] {prev_iters}회 완료")
1548
+ console.print(f"[dim]저장:[/dim] {saved_at}")
1549
+ last_decision = next(
1550
+ (h['decision'] for h in reversed(s['history'])
1551
+ if h.get('decision') and h.get('type') != 'compressed_summary'),
1552
+ '없음'
1553
+ )
1554
+ console.print(f"[dim]마지막 판정:[/dim] {last_decision}")
1045
1555
  console.print()
1046
1556
  layout2 = make_layout()
1047
1557
  layout2["header"].update(render_header(s["goal"], prev_iters, max_iter, "idle"))
@@ -1054,18 +1564,39 @@ def main():
1054
1564
  max_iter, layout2, live2,
1055
1565
  initial_history=s["history"],
1056
1566
  initial_eval_history=s["eval_history"],
1567
+ mode=current_mode, eval_every=eval_every,
1057
1568
  )
1058
- goal = s["goal"]
1059
1569
  if outcome == 'interrupted':
1060
- goal = _handle_interrupt(goal, workdir, args, max_iter)
1570
+ console.print("\n[yellow]⚠ 중단됨. REPL로 돌아갑니다.[/yellow]")
1061
1571
  elif outcome == 'max':
1062
1572
  console.print(f"[red]{max_iter}번 반복 후에도 완료되지 않았습니다.[/red]")
1063
1573
  if outcome != 'interrupted':
1064
1574
  console.print("[dim]다음 명령을 입력하세요. /exit 로 종료.[/dim]")
1065
1575
 
1576
+ elif cmd_name == 'mode':
1577
+ if cmd_arg in ('code', 'research'):
1578
+ current_mode = cmd_arg
1579
+ console.print(f"[cyan]모드 변경: {current_mode}[/cyan]")
1580
+ else:
1581
+ console.print("[red]사용법: /mode code 또는 /mode research[/red]")
1582
+
1583
+ elif cmd_name == 'eval-every':
1584
+ try:
1585
+ eval_every = int(cmd_arg)
1586
+ console.print(f"[cyan]Evaluator: {eval_every}번마다 실행[/cyan]")
1587
+ except ValueError:
1588
+ console.print("[red]숫자를 입력하세요. 예: /eval-every 3[/red]")
1589
+
1590
+ elif cmd_name == 'status':
1591
+ console.print(f"모드: [cyan]{current_mode}[/cyan] | eval-every: [cyan]{eval_every}[/cyan] | dir: [cyan]{workdir}[/cyan]")
1592
+
1593
+ elif cmd_name == 'help':
1594
+ for cmd, desc in SLASH_COMMANDS:
1595
+ console.print(f" [cyan]{cmd:<25}[/cyan] {desc}")
1596
+
1066
1597
  else:
1067
1598
  console.print(f"[red]알 수 없는 커맨드: /{cmd_name}[/red]")
1068
- console.print("[dim]사용 가능: /resume /exit[/dim]")
1599
+ console.print("[dim]/help 커맨드 목록을 확인하세요.[/dim]")
1069
1600
 
1070
1601
  else:
1071
1602
  # 일반 텍스트 → 바로 Worker에게 목표로 전달
@@ -1078,9 +1609,10 @@ def main():
1078
1609
  outcome = run_agent_loop(
1079
1610
  goal, workdir, args.worker_model, args.eval_model,
1080
1611
  max_iter, layout2, live2,
1612
+ mode=current_mode, eval_every=eval_every,
1081
1613
  )
1082
1614
  if outcome == 'interrupted':
1083
- goal = _handle_interrupt(goal, workdir, args, max_iter)
1615
+ console.print("\n[yellow]⚠ 중단됨. REPL로 돌아갑니다.[/yellow]")
1084
1616
  elif outcome == 'max':
1085
1617
  console.print(f"[red]{max_iter}번 반복 후에도 완료되지 않았습니다.[/red]")
1086
1618
  if outcome != 'interrupted':
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentforge-multi",
3
- "version": "0.1.5",
3
+ "version": "0.1.7",
4
4
  "description": "Multi-agent CLI: Worker + Evaluator agents collaborate in a loop to achieve your goal",
5
5
  "keywords": [
6
6
  "ai",