clouds-coder 0.1.0__tar.gz → 2026.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import base64
5
5
  from collections import deque
6
6
  import csv
7
7
  import difflib
8
+ import errno
8
9
  import html
9
10
  import hashlib
10
11
  import hmac
@@ -35,7 +36,7 @@ from pathlib import Path, PurePosixPath
35
36
  from urllib.error import HTTPError, URLError
36
37
  from urllib.parse import parse_qs, unquote, urlparse
37
38
  from urllib.request import Request, urlopen
38
- APP_VERSION = "0.1.0"
39
+ APP_VERSION = "0.1.1"
39
40
  DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
40
41
  DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
41
42
  WORKDIR = Path(os.getenv("AGENT_WORKDIR", os.getcwd())).resolve()
@@ -71,6 +72,14 @@ DEFAULT_TIMEOUT_SECONDS = max(
71
72
  DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
72
73
  AUTO_CONTINUE_BUDGET_DEFAULT = 30
73
74
  AGENT_MAX_OUTPUT_TOKENS = 2200
75
+ WATCHDOG_INTENT_NO_TOOL_THRESHOLD = 2
76
+ WATCHDOG_REPEAT_NO_TOOL_THRESHOLD = 2
77
+ WATCHDOG_STATE_STALL_THRESHOLD = 6
78
+ WATCHDOG_CONTEXT_STALL_THRESHOLD = 2
79
+ WATCHDOG_REPEAT_SIMILARITY_THRESHOLD = 0.85
80
+ WATCHDOG_CONTEXT_NEAR_RATIO = 0.92
81
+ WATCHDOG_MAX_DECOMPOSE_STEPS = 12
82
+ WATCHDOG_STEP_MAX_ATTEMPTS = 2
74
83
  EMPTY_ACTION_MIN_CONTENT_CHARS = 5
75
84
  EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 2
76
85
  THINKING_BUDGET_FORCE_RATIO = 0.85
@@ -98,6 +107,22 @@ LIVE_INPUT_WEIGHT_BASE_DELAYED = 0.35
98
107
  LIVE_INPUT_WEIGHT_BASE_NORMAL = 0.65
99
108
  LIVE_INPUT_WEIGHT_STEP_DELAYED = 0.30
100
109
  LIVE_INPUT_WEIGHT_STEP_NORMAL = 0.20
110
+ SOCKET_NOISE_LINE_PATTERNS = (
111
+ re.compile(r"\bwinerror\s*10038\b", re.IGNORECASE),
112
+ re.compile(r"\bwsaenotsock\b", re.IGNORECASE),
113
+ re.compile(r"\bsocket\s+closed\s+benignly\b", re.IGNORECASE),
114
+ re.compile(r"\bbenign\s+socket\s+error\b", re.IGNORECASE),
115
+ )
116
+ BENIGN_SOCKET_DEBUG_LOG_ENABLED = str(os.getenv("AGENT_DEBUG_SOCKET_LOG", "") or "").strip().lower() in {
117
+ "1",
118
+ "true",
119
+ "yes",
120
+ "on",
121
+ "debug",
122
+ }
123
+ BENIGN_SOCKET_LOG_INTERVAL_SECONDS = 30.0
124
+ FINAL_SUMMARY_MIN_CHARS = 80
125
+ FINAL_SUMMARY_STRICT_MIN_CHARS = 120
101
126
  RUNTIME_CONTROL_HINT_PREFIXES = (
102
127
  "<reminder>",
103
128
  "<todo-rescue>",
@@ -170,6 +195,7 @@ TASK_PROFILE_TYPES = (
170
195
  )
171
196
  TASK_LEVEL_CHOICES = (1, 2, 3, 4, 5)
172
197
  TASK_SCALE_PREFERENCES = ("fast", "balanced", "thorough")
198
+ SEMANTIC_CONFIDENCE_CHOICES = ("high", "medium", "low")
173
199
  TASK_LEVEL_POLICIES: dict[int, dict] = {
174
200
  1: {
175
201
  "name": "simple_direct_answer",
@@ -712,6 +738,37 @@ def model_language_instruction(lang: str) -> str:
712
738
  )
713
739
 
714
740
 
741
+ def _detect_os_shell_instruction() -> str:
742
+ """Return a shell environment note for the agent system prompt based on the host OS."""
743
+ import platform as _platform
744
+ _sys = _platform.system()
745
+ if _sys == "Windows":
746
+ return (
747
+ "Shell environment: Windows (cmd.exe via shell=True). "
748
+ "IMPORTANT — use Windows-native commands only: "
749
+ "use 'dir' (not 'ls'), 'type' (not 'cat'), 'del' (not 'rm'), "
750
+ "'move' (not 'mv'), 'copy' (not 'cp'), 'findstr' (not 'grep'), "
751
+ "'where' (not 'which'), 'echo %VAR%' (not 'echo $VAR'). "
752
+ "To list files recursively use 'dir /s /b'. "
753
+ "Path separator is backslash (\\). "
754
+ "Do NOT use POSIX paths like /workspace, /tmp, /usr, ~/... — they do not exist. "
755
+ "Working directory is already set; use relative paths or the absolute session root shown above."
756
+ )
757
+ if _sys == "Darwin":
758
+ return (
759
+ "Shell environment: macOS (bash/zsh). "
760
+ "Standard POSIX commands are available (ls, cat, grep, find, etc.). "
761
+ "Package manager is 'brew'. "
762
+ "Do NOT assume Linux-specific paths like /proc or /etc/os-release exist. "
763
+ "Use relative paths or the absolute session root shown above."
764
+ )
765
+ # Linux / other POSIX
766
+ return (
767
+ "Shell environment: Linux (bash). "
768
+ "Standard POSIX commands are available (ls, cat, grep, find, etc.). "
769
+ "Use relative paths or the absolute session root shown above."
770
+ )
771
+
715
772
  def resolve_web_ui_dir_path(raw: str, base_dir: Path | None = None) -> Path:
716
773
  txt = str(raw or "").strip()
717
774
  if not txt:
@@ -896,6 +953,103 @@ def guess_ext_from_mime(mime: str, fallback: str = ".bin") -> str:
896
953
  def now_ts() -> float:
897
954
  return time.time()
898
955
 
956
+
957
+ _benign_socket_log_lock = threading.Lock()
958
+ _benign_socket_log_state: dict[str, dict[str, float | int]] = {}
959
+
960
+
961
+ def filter_runtime_noise_lines(text: str) -> tuple[str, int]:
962
+ raw = str(text or "")
963
+ if not raw:
964
+ return "", 0
965
+ kept: list[str] = []
966
+ dropped = 0
967
+ for line in raw.splitlines():
968
+ row = str(line or "")
969
+ if any(p.search(row) for p in SOCKET_NOISE_LINE_PATTERNS):
970
+ dropped += 1
971
+ continue
972
+ kept.append(row)
973
+ return "\n".join(kept).strip(), int(dropped)
974
+
975
+
976
+ def is_benign_socket_error(exc: BaseException | None) -> bool:
977
+ if exc is None:
978
+ return False
979
+ if isinstance(exc, (BrokenPipeError, ConnectionResetError, ConnectionAbortedError, TimeoutError)):
980
+ return True
981
+ if not isinstance(exc, OSError):
982
+ return False
983
+ winerror = int(getattr(exc, "winerror", 0) or 0)
984
+ if winerror in {10038, 10053, 10054, 10057, 10093}: # 10093 = WSANOTINITIALISED (selector on pipe)
985
+ return True
986
+ err = int(getattr(exc, "errno", 0) or 0)
987
+ benign_errno = {
988
+ int(getattr(errno, "EPIPE", 32)),
989
+ int(getattr(errno, "ECONNRESET", 104)),
990
+ int(getattr(errno, "ECONNABORTED", 103)),
991
+ int(getattr(errno, "ENOTCONN", 107)),
992
+ int(getattr(errno, "EBADF", 9)),
993
+ }
994
+ return err in benign_errno
995
+
996
+
997
+ def _socket_error_code(exc: BaseException | None) -> str:
998
+ if not isinstance(exc, OSError):
999
+ return str(type(exc).__name__ if exc is not None else "unknown")
1000
+ winerror = int(getattr(exc, "winerror", 0) or 0)
1001
+ if winerror > 0:
1002
+ return f"winerror:{winerror}"
1003
+ err = int(getattr(exc, "errno", 0) or 0)
1004
+ if err > 0:
1005
+ return f"errno:{err}"
1006
+ return str(type(exc).__name__ if exc is not None else "OSError")
1007
+
1008
+
1009
+ def _log_benign_socket_error_limited(exc: BaseException | None, where: str = ""):
1010
+ if not BENIGN_SOCKET_DEBUG_LOG_ENABLED:
1011
+ return
1012
+ code = _socket_error_code(exc)
1013
+ location = str(where or "runtime").strip()
1014
+ key = f"{location}|{code}"
1015
+ now = now_ts()
1016
+ suppressed = 0
1017
+ should_emit = False
1018
+ with _benign_socket_log_lock:
1019
+ row = _benign_socket_log_state.get(key)
1020
+ if not isinstance(row, dict):
1021
+ _benign_socket_log_state[key] = {"last_ts": now, "suppressed": 0}
1022
+ should_emit = True
1023
+ else:
1024
+ last_ts = float(row.get("last_ts", 0.0) or 0.0)
1025
+ if now - last_ts >= BENIGN_SOCKET_LOG_INTERVAL_SECONDS:
1026
+ suppressed = int(row.get("suppressed", 0) or 0)
1027
+ row["last_ts"] = now
1028
+ row["suppressed"] = 0
1029
+ should_emit = True
1030
+ else:
1031
+ row["suppressed"] = int(row.get("suppressed", 0) or 0) + 1
1032
+ if len(_benign_socket_log_state) > 512:
1033
+ stale = sorted(
1034
+ _benign_socket_log_state.items(),
1035
+ key=lambda item: float((item[1] or {}).get("last_ts", 0.0) if isinstance(item[1], dict) else 0.0),
1036
+ )[:128]
1037
+ for dead_key, _ in stale:
1038
+ _benign_socket_log_state.pop(dead_key, None)
1039
+ if should_emit:
1040
+ msg = f"[web-agent][debug] benign socket error {code} at {location}"
1041
+ if suppressed > 0:
1042
+ msg = f"{msg} (+{suppressed} suppressed)"
1043
+ print(msg, file=sys.stderr)
1044
+
1045
+
1046
+ def swallow_benign_socket_error(exc: BaseException | None, where: str = "") -> bool:
1047
+ if not is_benign_socket_error(exc):
1048
+ return False
1049
+ _log_benign_socket_error_limited(exc, where)
1050
+ return True
1051
+
1052
+
899
1053
  def normalize_timeout_seconds(
900
1054
  raw: object,
901
1055
  *,
@@ -1185,8 +1339,9 @@ def normalize_work_text(text: object, status: str = "") -> str:
1185
1339
  flags=re.IGNORECASE,
1186
1340
  )
1187
1341
  if status:
1342
+ status_pattern = re.escape(status).replace("_", r"[_\-\s]?")
1188
1343
  s = re.sub(
1189
- rf"\s*[—-]\s*{re.escape(status).replace('_', '[_\\-\\s]?')}\s*$",
1344
+ rf"\s*[—-]\s*{status_pattern}\s*$",
1190
1345
  "",
1191
1346
  s,
1192
1347
  flags=re.IGNORECASE,
@@ -8503,11 +8658,14 @@ class SessionState:
8503
8658
  "for compact reasoning and fast handoffs. "
8504
8659
  "Budget controls thought depth only and must not be used as an early-stop user-facing reason."
8505
8660
  )
8661
+ html_block = f"{html_hint}\n\n" if html_hint else ""
8662
+ research_block = f"{research_hint}\n\n" if research_hint else ""
8506
8663
  return (
8507
8664
  f"You are a coding agent running in isolated session workspace {self.files_root}. "
8508
8665
  f"Session absolute writable root is {self.files_root}. "
8509
8666
  "For file tools, prefer relative paths like hello.txt; runtime will map them to the absolute session root. "
8510
8667
  "The '/workspace/...' form is only a virtual alias for path arguments; never create OS-level /workspace in shell. "
8668
+ f"{_detect_os_shell_instruction()} "
8511
8669
  "Use tools to inspect files, execute commands, and edit code safely. "
8512
8670
  f"{route_hint}"
8513
8671
  f"{budget_hint} "
@@ -8531,8 +8689,8 @@ class SessionState:
8531
8689
  f"Current context upper bound is ~{self.context_token_upper_bound} tokens; keep steps compact to stay under this limit. "
8532
8690
  "When user asks to modify uploaded content, prioritize files under the uploaded workspace paths.\n\n"
8533
8691
  "If user asks to generate image/audio/video, use generate_media when active model capability supports it.\n\n"
8534
- f"{(html_hint + '\n\n') if html_hint else ''}"
8535
- f"{(research_hint + '\n\n') if research_hint else ''}"
8692
+ f"{html_block}"
8693
+ f"{research_block}"
8536
8694
  f"{model_language_instruction(self.ui_language)}\n\n"
8537
8695
  f"Uploaded files context:\n{uploads_ctx}\n\n"
8538
8696
  f"Available skills:\n{self.skills.descriptions()}"
@@ -8555,6 +8713,19 @@ class SessionState:
8555
8713
  "used_percent": used_pct,
8556
8714
  }
8557
8715
 
8716
+ def _apply_auto_compact_if_needed(self, reason: str = "auto") -> bool:
8717
+ self._microcompact()
8718
+ metrics = self._context_budget_metrics()
8719
+ used = int(metrics.get("used", 0) or 0)
8720
+ limit = max(1, int(metrics.get("limit", 0) or 0))
8721
+ if used < limit:
8722
+ return False
8723
+ now_tick = now_ts()
8724
+ if (now_tick - float(self.last_compact_ts or 0.0)) < 0.8:
8725
+ return False
8726
+ self._auto_compact(reason)
8727
+ return True
8728
+
8558
8729
  def _estimate_output_tokens(self, text: str, thinking_text: str = "", tool_calls: list | None = None) -> int:
8559
8730
  t_main = len(str(text or "")) // 4
8560
8731
  t_think = len(str(thinking_text or "")) // 4
@@ -10978,9 +11149,18 @@ class SessionState:
10978
11149
  "重构",
10979
11150
  "设计",
10980
11151
  "构建",
11152
+ "架构",
11153
+ "内核",
11154
+ "框架",
11155
+ "死循环",
11156
+ "状态机",
11157
+ "调度",
10981
11158
  "后端",
10982
11159
  "前端",
10983
11160
  "自动化",
11161
+ "agentbus",
11162
+ "watchdog",
11163
+ "decomposition",
10984
11164
  "workflow",
10985
11165
  "architecture",
10986
11166
  "build",
@@ -11099,7 +11279,10 @@ class SessionState:
11099
11279
  return {
11100
11280
  "task_type": "general",
11101
11281
  "complexity": "simple",
11102
- "direct_objective": "Provide the most direct useful response with minimal orchestration.",
11282
+ "direct_objective": (
11283
+ "Provide the most direct useful response with minimal orchestration, "
11284
+ "anchored to the current project context and user goal."
11285
+ ),
11103
11286
  "recommended_agents": ["developer"],
11104
11287
  "round_budget": 3,
11105
11288
  "reason": "default lightweight profile",
@@ -11664,87 +11847,233 @@ class SessionState:
11664
11847
  del target[:overflow]
11665
11848
 
11666
11849
  def _merge_output_text() -> str:
11667
- out_text = out_buf.decode("utf-8", errors="replace")
11668
- err_text = err_buf.decode("utf-8", errors="replace")
11850
+ # On Windows, cmd.exe outputs in the system OEM codepage (e.g. cp936/GBK),
11851
+ # not UTF-8. Detect and use the correct encoding for decoding.
11852
+ if os.name == "nt":
11853
+ try:
11854
+ import locale as _lc
11855
+ enc = _lc.getpreferredencoding(False) or "utf-8"
11856
+ except Exception:
11857
+ enc = "utf-8"
11858
+ else:
11859
+ enc = "utf-8"
11860
+ out_text = out_buf.decode(enc, errors="replace")
11861
+ err_text = err_buf.decode(enc, errors="replace")
11669
11862
  return (out_text + err_text).strip()
11670
11863
 
11671
- try:
11672
- proc = subprocess.Popen(
11673
- command,
11674
- shell=True,
11675
- cwd=cwd,
11676
- stdout=subprocess.PIPE,
11677
- stderr=subprocess.PIPE,
11678
- text=False,
11679
- bufsize=0,
11680
- start_new_session=(os.name == "posix"),
11681
- )
11682
- with selectors.DefaultSelector() as sel:
11683
- if proc.stdout is not None:
11684
- try:
11685
- os.set_blocking(proc.stdout.fileno(), False)
11686
- except Exception:
11687
- pass
11688
- sel.register(proc.stdout, selectors.EVENT_READ, data="stdout")
11689
- if proc.stderr is not None:
11864
+ def _collect_with_reader_threads(proc: subprocess.Popen):
11865
+ nonlocal next_progress_emit
11866
+ reader_threads: list[threading.Thread] = []
11867
+ io_queue: queue.Queue = queue.Queue()
11868
+ active_readers: set[str] = set()
11869
+
11870
+ def _spawn_reader(label: str, stream):
11871
+ if stream is None:
11872
+ return
11873
+ active_readers.add(label)
11874
+ # Selector fallback may leave PIPE FDs in non-blocking mode.
11875
+ # Reader threads expect blocking reads to avoid early EOF/pipe close.
11876
+ try:
11877
+ os.set_blocking(stream.fileno(), True)
11878
+ except Exception:
11879
+ pass
11880
+
11881
+ def _reader():
11690
11882
  try:
11691
- os.set_blocking(proc.stderr.fileno(), False)
11692
- except Exception:
11693
- pass
11694
- sel.register(proc.stderr, selectors.EVENT_READ, data="stderr")
11695
- while True:
11696
- now = time.time()
11697
- elapsed = now - start
11698
- if self.cancel_requested:
11699
- _stop_process(proc)
11700
- meta["error"] = "Error: interrupted by user"
11701
- meta["exit_code"] = -130
11702
- elif timeout > 0 and elapsed >= timeout:
11703
- _stop_process(proc)
11704
- meta["error"] = f"Error: timeout ({timeout}s)"
11705
- meta["exit_code"] = -1
11706
- events = sel.select(timeout=0.12)
11707
- for key, _ in events:
11708
- stream = key.fileobj
11883
+ while True:
11884
+ try:
11885
+ chunk = stream.read(65536)
11886
+ except Exception:
11887
+ break
11888
+ if chunk is None:
11889
+ time.sleep(0.01)
11890
+ continue
11891
+ if not chunk:
11892
+ break
11893
+ io_queue.put((label, chunk))
11894
+ finally:
11709
11895
  try:
11710
- chunk = os.read(stream.fileno(), 65536)
11711
- except BlockingIOError:
11712
- continue
11896
+ stream.close()
11713
11897
  except Exception:
11714
- chunk = b""
11715
- if not chunk:
11898
+ pass
11899
+ io_queue.put((label, None))
11900
+
11901
+ th = threading.Thread(target=_reader, daemon=True)
11902
+ th.start()
11903
+ reader_threads.append(th)
11904
+
11905
+ _spawn_reader("stdout", proc.stdout)
11906
+ _spawn_reader("stderr", proc.stderr)
11907
+
11908
+ while True:
11909
+ now = time.time()
11910
+ elapsed = now - start
11911
+ if (not meta.get("error")) and self.cancel_requested:
11912
+ _stop_process(proc)
11913
+ meta["error"] = "Error: interrupted by user"
11914
+ meta["exit_code"] = -130
11915
+ elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
11916
+ _stop_process(proc)
11917
+ meta["error"] = f"Error: timeout ({timeout}s)"
11918
+ meta["exit_code"] = -1
11919
+ try:
11920
+ label, chunk = io_queue.get(timeout=0.12)
11921
+ if chunk is None:
11922
+ active_readers.discard(str(label))
11923
+ elif str(label) == "stderr":
11924
+ _append_capture(err_buf, chunk)
11925
+ else:
11926
+ _append_capture(out_buf, chunk)
11927
+ except queue.Empty:
11928
+ pass
11929
+ while True:
11930
+ try:
11931
+ label, chunk = io_queue.get_nowait()
11932
+ except queue.Empty:
11933
+ break
11934
+ if chunk is None:
11935
+ active_readers.discard(str(label))
11936
+ elif str(label) == "stderr":
11937
+ _append_capture(err_buf, chunk)
11938
+ else:
11939
+ _append_capture(out_buf, chunk)
11940
+ if now >= next_progress_emit:
11941
+ self._emit_transient(
11942
+ "status",
11943
+ {
11944
+ "summary": (
11945
+ f"bash running ({int(elapsed)}s, "
11946
+ f"captured={len(out_buf) + len(err_buf)}B)"
11947
+ )
11948
+ },
11949
+ )
11950
+ next_progress_emit = now + 0.8
11951
+ if (proc.poll() is not None) and (not active_readers) and io_queue.empty():
11952
+ break
11953
+
11954
+ for th in reader_threads:
11955
+ try:
11956
+ th.join(timeout=0.8)
11957
+ except Exception:
11958
+ pass
11959
+ while True:
11960
+ try:
11961
+ label, chunk = io_queue.get_nowait()
11962
+ except queue.Empty:
11963
+ break
11964
+ if chunk is None:
11965
+ continue
11966
+ if str(label) == "stderr":
11967
+ _append_capture(err_buf, chunk)
11968
+ else:
11969
+ _append_capture(out_buf, chunk)
11970
+ merged_raw = _merge_output_text()
11971
+ merged, _ = filter_runtime_noise_lines(merged_raw)
11972
+ if meta.get("error"):
11973
+ meta["output"] = trim(merged or str(meta["error"]))
11974
+ else:
11975
+ meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
11976
+ meta["output"] = trim(merged or "(no output)")
11977
+
11978
+ try:
11979
+ popen_kwargs = {
11980
+ "shell": True,
11981
+ "cwd": cwd,
11982
+ "stdout": subprocess.PIPE,
11983
+ "stderr": subprocess.PIPE,
11984
+ "text": False,
11985
+ "bufsize": 0,
11986
+ "start_new_session": (os.name == "posix"),
11987
+ }
11988
+ if os.name == "nt":
11989
+ create_group = int(getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0) or 0)
11990
+ if create_group > 0:
11991
+ popen_kwargs["creationflags"] = create_group
11992
+ proc = subprocess.Popen(command, **popen_kwargs)
11993
+ if os.name == "nt":
11994
+ # Windows: read PIPE output via blocking reader threads + queue.
11995
+ _collect_with_reader_threads(proc)
11996
+ else:
11997
+ try:
11998
+ with selectors.DefaultSelector() as sel:
11999
+ if proc.stdout is not None:
11716
12000
  try:
11717
- sel.unregister(stream)
12001
+ os.set_blocking(proc.stdout.fileno(), False)
11718
12002
  except Exception:
11719
12003
  pass
11720
- continue
11721
- if key.data == "stderr":
11722
- _append_capture(err_buf, chunk)
11723
- else:
11724
- _append_capture(out_buf, chunk)
11725
- if now >= next_progress_emit:
11726
- self._emit_transient(
11727
- "status",
11728
- {
11729
- "summary": (
11730
- f"bash running ({int(elapsed)}s, "
11731
- f"captured={len(out_buf) + len(err_buf)}B)"
12004
+ sel.register(proc.stdout, selectors.EVENT_READ, data="stdout")
12005
+ if proc.stderr is not None:
12006
+ try:
12007
+ os.set_blocking(proc.stderr.fileno(), False)
12008
+ except Exception:
12009
+ pass
12010
+ sel.register(proc.stderr, selectors.EVENT_READ, data="stderr")
12011
+ while True:
12012
+ now = time.time()
12013
+ elapsed = now - start
12014
+ if self.cancel_requested:
12015
+ _stop_process(proc)
12016
+ meta["error"] = "Error: interrupted by user"
12017
+ meta["exit_code"] = -130
12018
+ elif timeout > 0 and elapsed >= timeout:
12019
+ _stop_process(proc)
12020
+ meta["error"] = f"Error: timeout ({timeout}s)"
12021
+ meta["exit_code"] = -1
12022
+ events = sel.select(timeout=0.12)
12023
+ for key, _ in events:
12024
+ stream = key.fileobj
12025
+ try:
12026
+ chunk = os.read(stream.fileno(), 65536)
12027
+ except BlockingIOError:
12028
+ continue
12029
+ except Exception:
12030
+ chunk = b""
12031
+ if not chunk:
12032
+ try:
12033
+ sel.unregister(stream)
12034
+ except Exception:
12035
+ pass
12036
+ continue
12037
+ if key.data == "stderr":
12038
+ _append_capture(err_buf, chunk)
12039
+ else:
12040
+ _append_capture(out_buf, chunk)
12041
+ if now >= next_progress_emit:
12042
+ self._emit_transient(
12043
+ "status",
12044
+ {
12045
+ "summary": (
12046
+ f"bash running ({int(elapsed)}s, "
12047
+ f"captured={len(out_buf) + len(err_buf)}B)"
12048
+ )
12049
+ },
11732
12050
  )
11733
- },
11734
- )
11735
- next_progress_emit = now + 0.8
11736
- if (proc.poll() is not None) and (not sel.get_map()):
11737
- break
11738
- merged = _merge_output_text()
11739
- if meta.get("error"):
11740
- meta["output"] = trim(merged or str(meta["error"]))
11741
- else:
11742
- meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
11743
- meta["output"] = trim(merged or "(no output)")
12051
+ next_progress_emit = now + 0.8
12052
+ if (proc.poll() is not None) and (not sel.get_map()):
12053
+ break
12054
+ merged_raw = _merge_output_text()
12055
+ merged, _ = filter_runtime_noise_lines(merged_raw)
12056
+ if meta.get("error"):
12057
+ meta["output"] = trim(merged or str(meta["error"]))
12058
+ else:
12059
+ meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
12060
+ meta["output"] = trim(merged or "(no output)")
12061
+ except Exception as exc:
12062
+ # Some platforms may reject selector registration for PIPEs.
12063
+ # On Windows, also catch any OSError (e.g. WinError 10093 WSANOTINITIALISED).
12064
+ if is_benign_socket_error(exc) or isinstance(exc, ValueError) or (os.name == "nt" and isinstance(exc, OSError)):
12065
+ _collect_with_reader_threads(proc)
12066
+ else:
12067
+ raise
11744
12068
  except Exception as exc:
11745
- meta["error"] = f"Error: {exc}"
11746
- meta["output"] = meta["error"]
11747
- meta["exit_code"] = -1
12069
+ # On Windows, WinError 10038 (WSAENOTSOCK) can surface here when
12070
+ # selector-based I/O is used with pipe FDs. Fall back to thread-based reading.
12071
+ if proc is not None and is_benign_socket_error(exc):
12072
+ _collect_with_reader_threads(proc)
12073
+ else:
12074
+ meta["error"] = f"Error: {exc}"
12075
+ meta["output"] = meta["error"]
12076
+ meta["exit_code"] = -1
11748
12077
  meta["duration_ms"] = int((time.time() - start) * 1000)
11749
12078
  after = self._git_status_map(cwd)
11750
12079
  meta["changed_files"] = self._status_delta(before, after) if before or after else []
@@ -12275,6 +12604,35 @@ class SessionState:
12275
12604
  return trim(text.replace("\n", " "), 220)
12276
12605
  return "current task"
12277
12606
 
12607
+ def _compose_default_direct_objective(self, base_objective: str, goal: str, task_type: str) -> str:
12608
+ base = trim(str(base_objective or "").strip(), 520)
12609
+ goal_clean = trim(strip_thinking_content(str(goal or "")).replace("\n", " ").strip(), 220)
12610
+ path_hits = re.findall(
12611
+ r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.(?:py|js|ts|tsx|jsx|java|go|rs|md|json|yaml|yml|toml|ini|sh|html|css|c|cpp|h)",
12612
+ goal_clean,
12613
+ )
12614
+ uniq_paths: list[str] = []
12615
+ for item in path_hits:
12616
+ one = trim(str(item or "").strip(), 80)
12617
+ if one and one not in uniq_paths:
12618
+ uniq_paths.append(one)
12619
+ if len(uniq_paths) >= 3:
12620
+ break
12621
+ if uniq_paths:
12622
+ anchor = f" Project anchors: {', '.join(uniq_paths)}."
12623
+ elif goal_clean:
12624
+ anchor = f" Project anchor: {goal_clean}."
12625
+ else:
12626
+ anchor = " Project anchor: current repository context."
12627
+ if task_type == "simple_qa":
12628
+ postfix = " Keep orchestration lightweight and answer directly with project-aware specifics."
12629
+ else:
12630
+ postfix = (
12631
+ " Keep orchestration lightweight and execution-first. "
12632
+ "Use bounded creativity for ambiguous details while preserving existing architecture and constraints."
12633
+ )
12634
+ return trim(f"{base}{anchor}{postfix}", 800)
12635
+
12278
12636
  def _normalize_task_profile(self, goal: str, raw: object) -> dict:
12279
12637
  base = self._infer_task_profile(goal)
12280
12638
  src = raw if isinstance(raw, dict) else {}
@@ -12287,13 +12645,22 @@ class SessionState:
12287
12645
  complexity = str(src.get("complexity", base.get("complexity", "simple")) or "").strip().lower()
12288
12646
  if complexity not in TASK_COMPLEXITY_LEVELS:
12289
12647
  complexity = str(base.get("complexity", "simple"))
12290
- direct_objective = (
12291
- trim(
12292
- str(src.get("direct_objective", base.get("direct_objective", "")) or "").strip(),
12293
- 800,
12648
+ src_direct_objective = trim(str(src.get("direct_objective", "") or "").strip(), 800)
12649
+ legacy_objectives = {
12650
+ "Provide the most direct useful response with minimal orchestration.",
12651
+ (
12652
+ "Provide the most direct useful response with minimal orchestration, "
12653
+ "anchored to the current project context and user goal."
12654
+ ),
12655
+ }
12656
+ if src_direct_objective and src_direct_objective not in legacy_objectives:
12657
+ direct_objective = src_direct_objective
12658
+ else:
12659
+ direct_objective = self._compose_default_direct_objective(
12660
+ str(base.get("direct_objective", "")),
12661
+ goal,
12662
+ task_type,
12294
12663
  )
12295
- or str(base.get("direct_objective", ""))
12296
- )
12297
12664
  rec_raw = src.get("recommended_agents", base.get("recommended_agents", []))
12298
12665
  recommended: list[str] = []
12299
12666
  if isinstance(rec_raw, list):
@@ -12594,67 +12961,767 @@ class SessionState:
12594
12961
  key = str(raw or "").strip().upper()
12595
12962
  return key if key in BLACKBOARD_STATUSES else "INITIALIZING"
12596
12963
 
12597
- def _new_blackboard(self, goal: str = "") -> dict:
12598
- profile = self._normalize_task_profile(goal, {})
12599
- progress = "done" if str(profile.get("task_type", "") or "") == "simple_qa" and not str(goal or "").strip() else "initializing"
12964
+ def _new_watchdog_state(self) -> dict:
12600
12965
  return {
12601
- "version": 1,
12602
- "updated_at": float(now_ts()),
12603
- "original_goal": trim(str(goal or "").strip(), 4000),
12604
- "research_notes": [],
12605
- "code_artifacts": {},
12606
- "execution_logs": [],
12607
- "review_feedback": [],
12608
- "conversation_history": [],
12609
- "status": "INITIALIZING",
12610
- "approval": {
12611
- "approved": False,
12612
- "by": "",
12613
- "note": "",
12614
- "ts": 0.0,
12615
- },
12616
- "manager_cycles": 0,
12617
- "active_agent": "",
12618
- "last_delegate": {
12619
- "target": "",
12620
- "instruction": "",
12621
- "reason": "",
12622
- "source": "",
12623
- "is_mandatory": False,
12624
- "ts": 0.0,
12625
- },
12626
- "task_profile": profile,
12627
- "manager_judgement": {
12628
- "task_type": str(profile.get("task_type", "general")),
12629
- "complexity": str(profile.get("complexity", "simple")),
12630
- "scale_preference": str(profile.get("scale_preference", "balanced") or "balanced"),
12631
- "progress": progress,
12632
- "remaining_rounds": (
12633
- -1
12634
- if int(profile.get("round_budget", 0) or 0) <= 0
12635
- else int(profile.get("round_budget", 1) or 1)
12636
- ),
12637
- "updated_at": float(now_ts()),
12638
- },
12639
- "last_worker_reply": {
12640
- "role": "",
12641
- "text": "",
12642
- "ts": 0.0,
12643
- },
12966
+ "intent_no_tool_streak": 0,
12967
+ "repeat_no_tool_streak": 0,
12968
+ "state_unchanged_streak": 0,
12969
+ "last_no_tool_text": "",
12970
+ "last_no_tool_hash": "",
12971
+ "last_state_fp": "",
12972
+ "trigger_count": 0,
12973
+ "last_trigger_reason": "",
12974
+ "last_trigger_ts": 0.0,
12644
12975
  }
12645
12976
 
12646
- def _normalize_blackboard(self, raw: object) -> dict:
12977
+ def _normalize_watchdog_state(self, raw: object) -> dict:
12647
12978
  src = raw if isinstance(raw, dict) else {}
12648
- board = self._new_blackboard(str(src.get("original_goal", "") or ""))
12979
+ out = self._new_watchdog_state()
12980
+ out["intent_no_tool_streak"] = max(0, int(src.get("intent_no_tool_streak", 0) or 0))
12981
+ out["repeat_no_tool_streak"] = max(0, int(src.get("repeat_no_tool_streak", 0) or 0))
12982
+ out["state_unchanged_streak"] = max(0, int(src.get("state_unchanged_streak", 0) or 0))
12983
+ out["last_no_tool_text"] = trim(str(src.get("last_no_tool_text", "") or "").strip(), 1200)
12984
+ out["last_no_tool_hash"] = trim(str(src.get("last_no_tool_hash", "") or "").strip(), 80)
12985
+ out["last_state_fp"] = trim(str(src.get("last_state_fp", "") or "").strip(), 120)
12986
+ out["trigger_count"] = max(0, int(src.get("trigger_count", 0) or 0))
12987
+ out["last_trigger_reason"] = trim(str(src.get("last_trigger_reason", "") or "").strip(), 200)
12988
+ out["last_trigger_ts"] = float(src.get("last_trigger_ts", 0.0) or 0.0)
12989
+ return out
12990
+
12991
+ def _new_decomposition_queue_state(self) -> dict:
12992
+ return {
12993
+ "active": False,
12994
+ "trigger_reason": "",
12995
+ "created_at": 0.0,
12996
+ "cursor": 0,
12997
+ "steps": [],
12998
+ "last_error": "",
12999
+ "snapshot": "",
13000
+ "decomposer_output": "",
13001
+ }
13002
+
13003
+ def _watchdog_normalize_steps(self, rows: object) -> list[dict]:
13004
+ if not isinstance(rows, list):
13005
+ return []
13006
+ out: list[dict] = []
13007
+
13008
+ def _infer_target(action_type: str, instruction: str, fallback: str = "developer") -> str:
13009
+ raw = self._sanitize_agent_role(fallback) or "developer"
13010
+ low = f"{action_type} {instruction}".lower()
13011
+ if any(tok in low for tok in ("review", "verify", "validate", "test", "qa", "检查", "验证", "评审", "審查")):
13012
+ return "reviewer"
13013
+ if any(tok in low for tok in ("research", "inspect", "analy", "explore", "investigate", "分析", "调研", "調研", "探索")):
13014
+ return "explorer"
13015
+ return raw
13016
+
13017
+ for idx, row in enumerate(rows[:WATCHDOG_MAX_DECOMPOSE_STEPS]):
13018
+ if not isinstance(row, dict):
13019
+ continue
13020
+ instruction = trim(
13021
+ str(
13022
+ row.get("description", "")
13023
+ or row.get("instruction", "")
13024
+ or row.get("content", "")
13025
+ or row.get("task", "")
13026
+ or ""
13027
+ ).strip(),
13028
+ 900,
13029
+ )
13030
+ if not instruction:
13031
+ continue
13032
+ action_type = trim(str(row.get("action_type", "") or "").strip(), 80)
13033
+ target = self._sanitize_agent_role(
13034
+ row.get("target", row.get("owner", row.get("role", row.get("agent", ""))))
13035
+ )
13036
+ target = target or _infer_target(action_type, instruction)
13037
+ if target == "developer" and "incremental" not in instruction.lower():
13038
+ instruction = trim(
13039
+ (
13040
+ f"{instruction}\n"
13041
+ "Use incremental edits (append/targeted replace) instead of full-file overwrite unless unavoidable."
13042
+ ),
13043
+ 1000,
13044
+ )
13045
+ try:
13046
+ step_no = int(row.get("step", idx + 1) or (idx + 1))
13047
+ except Exception:
13048
+ step_no = idx + 1
13049
+ out.append(
13050
+ {
13051
+ "step": max(1, step_no),
13052
+ "target": target,
13053
+ "action_type": action_type or "execute",
13054
+ "instruction": instruction,
13055
+ "attempts": max(0, int(row.get("attempts", 0) or 0)),
13056
+ "status": trim(str(row.get("status", "pending") or "pending").strip().lower(), 20) or "pending",
13057
+ "updated_at": float(now_ts()),
13058
+ }
13059
+ )
13060
+ if not out:
13061
+ return []
13062
+ return out[:WATCHDOG_MAX_DECOMPOSE_STEPS]
13063
+
13064
+ def _normalize_decomposition_queue_state(self, raw: object) -> dict:
13065
+ src = raw if isinstance(raw, dict) else {}
13066
+ out = self._new_decomposition_queue_state()
13067
+ out["active"] = bool(src.get("active", False))
13068
+ out["trigger_reason"] = trim(str(src.get("trigger_reason", "") or "").strip(), 200)
13069
+ out["created_at"] = float(src.get("created_at", 0.0) or 0.0)
13070
+ out["cursor"] = max(0, int(src.get("cursor", 0) or 0))
13071
+ out["last_error"] = trim(str(src.get("last_error", "") or "").strip(), 400)
13072
+ out["snapshot"] = trim(str(src.get("snapshot", "") or "").strip(), 4000)
13073
+ out["decomposer_output"] = trim(str(src.get("decomposer_output", "") or "").strip(), 2000)
13074
+ out["steps"] = self._watchdog_normalize_steps(src.get("steps", []))
13075
+ if out["cursor"] >= len(out["steps"]):
13076
+ out["active"] = False
13077
+ return out
13078
+
13079
+ def _watchdog_state_fingerprint(self, board: dict | None = None) -> str:
13080
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13081
+ profile = self._ensure_blackboard_task_profile(bb)
13082
+ payload = {
13083
+ "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
13084
+ "goal": trim(str(bb.get("original_goal", "") or "").strip(), 400),
13085
+ "active_agent": self._sanitize_agent_role(bb.get("active_agent", "")),
13086
+ "delegate": self._sanitize_agent_role((bb.get("last_delegate", {}) or {}).get("target", "")),
13087
+ "research_count": len(bb.get("research_notes", []) or []),
13088
+ "artifact_count": len(bb.get("code_artifacts", {}) or {}),
13089
+ "exec_count": len(bb.get("execution_logs", []) or []),
13090
+ "review_count": len(bb.get("review_feedback", []) or []),
13091
+ "approved": bool((bb.get("approval", {}) or {}).get("approved", False)),
13092
+ "task_type": str(profile.get("task_type", "general") or "general"),
13093
+ "complexity": str(profile.get("complexity", "simple") or "simple"),
13094
+ }
13095
+ raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
13096
+ return hashlib.sha1(raw.encode("utf-8")).hexdigest()
13097
+
13098
+ def _watchdog_extract_json_array(self, text: str) -> list[dict]:
13099
+ raw = str(text or "").strip()
13100
+ if not raw:
13101
+ return []
13102
+ probe_candidates: list[str] = [raw]
13103
+ fence = re.findall(r"```(?:json)?\s*([\s\S]*?)```", raw, flags=re.IGNORECASE)
13104
+ probe_candidates.extend([str(x or "").strip() for x in fence if str(x or "").strip()])
13105
+ first = raw.find("[")
13106
+ last = raw.rfind("]")
13107
+ if first >= 0 and last > first:
13108
+ probe_candidates.append(raw[first : last + 1].strip())
13109
+ for candidate in probe_candidates:
13110
+ try:
13111
+ parsed = json.loads(candidate)
13112
+ except Exception:
13113
+ continue
13114
+ if isinstance(parsed, list):
13115
+ return [dict(x) for x in parsed if isinstance(x, dict)]
13116
+ return []
13117
+
13118
+ def _watchdog_intent_without_action(self, text: str) -> bool:
13119
+ clean = strip_thinking_content(str(text or "")).strip()
13120
+ if not clean:
13121
+ return False
13122
+ low = clean.lower()
13123
+ intent_markers = (
13124
+ "i will",
13125
+ "i'm going to",
13126
+ "next step",
13127
+ "plan to",
13128
+ "let me",
13129
+ "我将",
13130
+ "我會",
13131
+ "我会",
13132
+ "下一步",
13133
+ "接下来",
13134
+ "接下來",
13135
+ "计划",
13136
+ "計劃",
13137
+ "准备",
13138
+ "準備",
13139
+ )
13140
+ action_markers = (
13141
+ "wrote",
13142
+ "edited",
13143
+ "executed",
13144
+ "called",
13145
+ "ran ",
13146
+ "已完成",
13147
+ "已执行",
13148
+ "已執行",
13149
+ "已调用",
13150
+ "已調用",
13151
+ "完成了",
13152
+ "执行了",
13153
+ "執行了",
13154
+ "调用了",
13155
+ "調用了",
13156
+ )
13157
+ if any(tok in low for tok in action_markers):
13158
+ return False
13159
+ return any(tok in low for tok in intent_markers)
13160
+
13161
+ def _watchdog_similarity(self, a: str, b: str) -> float:
13162
+ left = trim(strip_thinking_content(str(a or "")).strip(), 1800)
13163
+ right = trim(strip_thinking_content(str(b or "")).strip(), 1800)
13164
+ if (not left) or (not right):
13165
+ return 0.0
13166
+ return float(difflib.SequenceMatcher(None, left, right).ratio())
13167
+
13168
+ def _watchdog_context_near_limit(self) -> bool:
13169
+ limit = max(1, int(self.context_token_upper_bound or TOKEN_THRESHOLD))
12649
13170
  try:
12650
- board["version"] = int(src.get("version", 1) or 1)
13171
+ used = int(self._estimate_tokens())
12651
13172
  except Exception:
12652
- board["version"] = 1
12653
- board["updated_at"] = float(src.get("updated_at", now_ts()) or now_ts())
12654
- board["status"] = self._normalize_blackboard_status(src.get("status", board["status"]))
12655
- board["manager_cycles"] = max(0, int(src.get("manager_cycles", 0) or 0))
12656
- board["active_agent"] = self._sanitize_agent_role(src.get("active_agent", ""))
12657
- raw_delegate = src.get("last_delegate", {})
13173
+ used = 0
13174
+ return bool(used >= int(limit * WATCHDOG_CONTEXT_NEAR_RATIO))
13175
+
13176
+ def _watchdog_snapshot_payload(self, board: dict, reason: str, role: str, step: dict | None = None) -> str:
13177
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13178
+ profile = self._ensure_blackboard_task_profile(bb)
13179
+ code_rows = sorted(
13180
+ list((bb.get("code_artifacts", {}) or {}).items()),
13181
+ key=lambda item: float((item[1] or {}).get("updated_at", 0.0) if isinstance(item[1], dict) else 0.0),
13182
+ reverse=True,
13183
+ )
13184
+ payload = {
13185
+ "objective": trim(str(bb.get("original_goal", "") or "").strip(), 1800),
13186
+ "trigger_reason": trim(str(reason or "").strip(), 200),
13187
+ "active_role": self._sanitize_agent_role(role),
13188
+ "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
13189
+ "task_profile": {
13190
+ "task_type": str(profile.get("task_type", "general") or "general"),
13191
+ "complexity": str(profile.get("complexity", "simple") or "simple"),
13192
+ "direct_objective": trim(str(profile.get("direct_objective", "") or "").strip(), 600),
13193
+ },
13194
+ "latest_worker_step": {
13195
+ "status": str((step or {}).get("status", "") or ""),
13196
+ "text": trim(str((step or {}).get("text", "") or "").strip(), 600),
13197
+ },
13198
+ "code_artifacts": [
13199
+ {
13200
+ "path": str(path),
13201
+ "summary": trim(str((item or {}).get("summary", "") or "").strip(), 200),
13202
+ }
13203
+ for path, item in code_rows[:6]
13204
+ ],
13205
+ "recent_execution_logs": [
13206
+ trim(str((row or {}).get("content", "") or "").strip(), 220)
13207
+ for row in (bb.get("execution_logs", []) or [])[-4:]
13208
+ if isinstance(row, dict)
13209
+ ],
13210
+ "recent_review_feedback": [
13211
+ trim(str((row or {}).get("content", "") or "").strip(), 220)
13212
+ for row in (bb.get("review_feedback", []) or [])[-4:]
13213
+ if isinstance(row, dict)
13214
+ ],
13215
+ }
13216
+ return trim(json_dumps(payload, indent=2), 6000)
13217
+
13218
+ def _watchdog_fallback_steps(self, board: dict, reason: str) -> list[dict]:
13219
+ profile = self._ensure_blackboard_task_profile(board)
13220
+ objective = trim(str(profile.get("direct_objective", "") or "").strip(), 280) or trim(
13221
+ str(board.get("original_goal", "") or "").strip(),
13222
+ 280,
13223
+ )
13224
+ raw = [
13225
+ {
13226
+ "step": 1,
13227
+ "action_type": "research",
13228
+ "target": "explorer",
13229
+ "description": (
13230
+ "Analyze the latest blocker quickly and write concrete constraints to blackboard "
13231
+ f"(trigger={trim(reason, 120)})."
13232
+ ),
13233
+ },
13234
+ {
13235
+ "step": 2,
13236
+ "action_type": "implement",
13237
+ "target": "developer",
13238
+ "description": (
13239
+ "Implement one incremental fix for the current objective and provide verifiable tool output. "
13240
+ f"Objective: {objective}"
13241
+ ),
13242
+ },
13243
+ {
13244
+ "step": 3,
13245
+ "action_type": "validate",
13246
+ "target": "reviewer",
13247
+ "description": (
13248
+ "Run one validation pass, provide pass/fix verdict with evidence, and handoff summary request if needed."
13249
+ ),
13250
+ },
13251
+ ]
13252
+ return self._watchdog_normalize_steps(raw)
13253
+
13254
+ def _watchdog_decompose_steps(self, board: dict, reason: str, *, pinned_selection: str) -> tuple[list[dict], str, str]:
13255
+ snapshot = self._watchdog_snapshot_payload(board, reason, str(board.get("active_agent", "") or ""), None)
13256
+ objective = trim(str(board.get("original_goal", "") or "").strip(), 1600)
13257
+ system_prompt = (
13258
+ "You are a task decomposer. Your only job is to split OBJECTIVE into executable micro-steps. "
13259
+ "Return strict JSON array only: "
13260
+ "[{\"step\":1,\"action_type\":\"...\",\"target\":\"explorer|developer|reviewer\",\"description\":\"...\"}]. "
13261
+ "No markdown, no prose, no code fence."
13262
+ )
13263
+ user_prompt = (
13264
+ f"OBJECTIVE:\n{objective}\n\n"
13265
+ f"TRIGGER:\n{trim(reason, 220)}\n\n"
13266
+ "SNAPSHOT:\n"
13267
+ f"{snapshot}\n\n"
13268
+ "Rules: keep steps module-level (not line-by-line), use incremental edits, "
13269
+ "and keep total steps <= 12."
13270
+ )
13271
+ raw_text = ""
13272
+ parsed_steps: list[dict] = []
13273
+ try:
13274
+ rsp = self._chat_with_same_model_retry(
13275
+ [{"role": "user", "content": user_prompt, "ts": now_ts()}],
13276
+ tools=None,
13277
+ system=system_prompt,
13278
+ max_tokens=1200,
13279
+ think=False,
13280
+ stream_thinking=False,
13281
+ pinned_selection=pinned_selection,
13282
+ context_label="watchdog decomposer",
13283
+ retries=max(1, min(2, int(MODEL_OUTPUT_RETRY_TIMES))),
13284
+ )
13285
+ raw_text = str(rsp.get("content") or "")
13286
+ parsed_steps = self._watchdog_extract_json_array(raw_text)
13287
+ except Exception as exc:
13288
+ raw_text = f"decomposer-error: {trim(str(exc), 220)}"
13289
+ parsed_steps = []
13290
+ normalized = self._watchdog_normalize_steps(parsed_steps)
13291
+ if not normalized:
13292
+ normalized = self._watchdog_fallback_steps(board, reason)
13293
+ return normalized, snapshot, trim(raw_text, 2000)
13294
+
13295
+ def _watchdog_activate_decomposition(
13296
+ self,
13297
+ board: dict,
13298
+ *,
13299
+ reason: str,
13300
+ role: str,
13301
+ step: dict | None,
13302
+ pinned_selection: str,
13303
+ ) -> bool:
13304
+ dq = self._normalize_decomposition_queue_state(board.get("decomposition_queue", {}))
13305
+ if bool(dq.get("active", False)):
13306
+ return False
13307
+ steps, snapshot, raw_text = self._watchdog_decompose_steps(
13308
+ board,
13309
+ reason,
13310
+ pinned_selection=pinned_selection,
13311
+ )
13312
+ if not steps:
13313
+ return False
13314
+ dq = {
13315
+ "active": True,
13316
+ "trigger_reason": trim(str(reason or "").strip(), 200),
13317
+ "created_at": float(now_ts()),
13318
+ "cursor": 0,
13319
+ "steps": steps,
13320
+ "last_error": "",
13321
+ "snapshot": trim(snapshot, 4000),
13322
+ "decomposer_output": trim(raw_text, 2000),
13323
+ }
13324
+ wd = self._normalize_watchdog_state(board.get("watchdog", {}))
13325
+ wd["trigger_count"] = max(0, int(wd.get("trigger_count", 0) or 0)) + 1
13326
+ wd["last_trigger_reason"] = trim(str(reason or "").strip(), 200)
13327
+ wd["last_trigger_ts"] = float(now_ts())
13328
+ wd["intent_no_tool_streak"] = 0
13329
+ wd["repeat_no_tool_streak"] = 0
13330
+ board["watchdog"] = wd
13331
+ board["decomposition_queue"] = dq
13332
+ self.blackboard = board
13333
+ self._blackboard_touch()
13334
+ self._blackboard_history(
13335
+ "manager",
13336
+ trim(
13337
+ (
13338
+ "watchdog triggered decomposition "
13339
+ f"(reason={reason}, role={self._sanitize_agent_role(role)}, "
13340
+ f"steps={len(steps)})"
13341
+ ),
13342
+ 520,
13343
+ ),
13344
+ )
13345
+ self._emit(
13346
+ "status",
13347
+ {
13348
+ "summary": (
13349
+ "watchdog triggered; switched to stateless executor queue "
13350
+ f"(reason={trim(reason, 90)}, steps={len(steps)})"
13351
+ )
13352
+ },
13353
+ )
13354
+ return True
13355
+
13356
+ def _watchdog_pick_executor_route(self, board: dict | None = None) -> tuple[dict, dict] | None:
13357
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13358
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13359
+ if not bool(dq.get("active", False)):
13360
+ return None
13361
+ steps = list(dq.get("steps", []) or [])
13362
+ if not steps:
13363
+ return None
13364
+ cursor = max(0, int(dq.get("cursor", 0) or 0))
13365
+ while cursor < len(steps):
13366
+ status = str((steps[cursor] or {}).get("status", "") or "").strip().lower()
13367
+ if status not in {"done", "skipped"}:
13368
+ break
13369
+ cursor += 1
13370
+ if cursor >= len(steps):
13371
+ dq["active"] = False
13372
+ dq["cursor"] = len(steps)
13373
+ bb["decomposition_queue"] = dq
13374
+ self.blackboard = bb
13375
+ self._blackboard_touch()
13376
+ return None
13377
+ dq["cursor"] = cursor
13378
+ step_row = steps[cursor] if isinstance(steps[cursor], dict) else {}
13379
+ target = self._sanitize_agent_role(step_row.get("target", "")) or "developer"
13380
+ action_type = trim(str(step_row.get("action_type", "execute") or "execute").strip(), 80) or "execute"
13381
+ step_instruction = trim(str(step_row.get("instruction", "") or "").strip(), 900)
13382
+ trigger_reason = trim(str(dq.get("trigger_reason", "") or "").strip(), 180)
13383
+ total = len(steps)
13384
+ current = cursor + 1
13385
+ profile = self._ensure_blackboard_task_profile(bb)
13386
+ task_level = int(profile.get("task_level", self.runtime_task_level or 3) or 3)
13387
+ if task_level not in TASK_LEVEL_CHOICES:
13388
+ task_level = 3
13389
+ args = {
13390
+ "target": target,
13391
+ "instruction": trim(
13392
+ (
13393
+ f"Executor mode (stateless) step {current}/{total}. "
13394
+ f"trigger={trigger_reason or 'watchdog'}; action_type={action_type}.\n"
13395
+ f"{step_instruction}\n"
13396
+ "Rules: execute one concrete tool call now, keep scope narrow, "
13397
+ "and update blackboard evidence immediately."
13398
+ ),
13399
+ 1200,
13400
+ ),
13401
+ "task_level": int(task_level),
13402
+ "task_type": trim(str(profile.get("task_type", "general") or "general"), 40),
13403
+ "complexity": trim(str(profile.get("complexity", "simple") or "simple"), 20),
13404
+ "scale_preference": trim(str(profile.get("scale_preference", "balanced") or "balanced"), 20),
13405
+ "judgement": trim(
13406
+ f"watchdog-executor-step-{current}/{total}",
13407
+ 200,
13408
+ ),
13409
+ "round_budget": int(profile.get("round_budget", self.runtime_round_budget or self.max_agent_rounds) or 0),
13410
+ "direct_objective": trim(str(profile.get("direct_objective", self.runtime_direct_objective or "") or ""), 800),
13411
+ "execution_mode": normalize_execution_mode(
13412
+ profile.get("execution_mode", self._effective_execution_mode()),
13413
+ default=self._effective_execution_mode(),
13414
+ ),
13415
+ "participants": profile.get("participants", self.runtime_participants),
13416
+ "assigned_expert": profile.get("assigned_expert", self.runtime_assigned_expert or "developer"),
13417
+ "requires_user_confirmation": bool(profile.get("requires_user_confirmation", False)),
13418
+ "is_mandatory": True,
13419
+ "executor_mode": True,
13420
+ }
13421
+ bb["decomposition_queue"] = dq
13422
+ self.blackboard = bb
13423
+ self._blackboard_touch()
13424
+ meta = {
13425
+ "trigger_reason": trigger_reason,
13426
+ "cursor": current,
13427
+ "total": total,
13428
+ "target": target,
13429
+ "action_type": action_type,
13430
+ }
13431
+ return args, meta
13432
+
13433
+ def _watchdog_mark_step_progress(self, board: dict, role: str, step: dict | None) -> dict:
13434
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13435
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13436
+ out = {"queue_active": bool(dq.get("active", False)), "step_advanced": False}
13437
+ if not bool(dq.get("active", False)):
13438
+ bb["decomposition_queue"] = dq
13439
+ self.blackboard = bb
13440
+ return out
13441
+ rows = list(dq.get("steps", []) or [])
13442
+ cursor = max(0, int(dq.get("cursor", 0) or 0))
13443
+ if cursor >= len(rows):
13444
+ dq["active"] = False
13445
+ dq["cursor"] = len(rows)
13446
+ bb["decomposition_queue"] = dq
13447
+ self.blackboard = bb
13448
+ return {"queue_active": False, "step_advanced": False}
13449
+ current = rows[cursor] if isinstance(rows[cursor], dict) else {}
13450
+ target = self._sanitize_agent_role(current.get("target", "")) or "developer"
13451
+ role_key = self._sanitize_agent_role(role)
13452
+ if target != role_key:
13453
+ bb["decomposition_queue"] = dq
13454
+ self.blackboard = bb
13455
+ return out
13456
+ status = str((step or {}).get("status", "") or "").strip().lower()
13457
+ text = trim(strip_thinking_content(str((step or {}).get("text", "") or "").strip()), 1200)
13458
+ tool_results = (step or {}).get("tool_results", []) if isinstance((step or {}).get("tool_results"), list) else []
13459
+ has_ok_tool = any(isinstance(row, dict) and bool(row.get("ok", False)) for row in tool_results)
13460
+ success = bool(status == "tools" and has_ok_tool)
13461
+ if (not success) and status == "no-tools" and role_key in {"explorer", "reviewer"} and len(text) >= 120:
13462
+ success = True
13463
+ attempts = max(0, int(current.get("attempts", 0) or 0)) + 1
13464
+ current["attempts"] = attempts
13465
+ current["updated_at"] = float(now_ts())
13466
+ if success:
13467
+ current["status"] = "done"
13468
+ dq["cursor"] = cursor + 1
13469
+ out["step_advanced"] = True
13470
+ dq["last_error"] = ""
13471
+ elif status in {"no-tools", "tools", "skip"}:
13472
+ if attempts >= int(WATCHDOG_STEP_MAX_ATTEMPTS):
13473
+ current["status"] = "skipped"
13474
+ dq["cursor"] = cursor + 1
13475
+ out["step_advanced"] = True
13476
+ dq["last_error"] = trim(
13477
+ f"step {cursor + 1} skipped after {attempts} attempts ({status})",
13478
+ 300,
13479
+ )
13480
+ else:
13481
+ current["status"] = "retry"
13482
+ dq["last_error"] = trim(
13483
+ f"step {cursor + 1} retry pending ({status})",
13484
+ 300,
13485
+ )
13486
+ rows[cursor] = current
13487
+ dq["steps"] = rows
13488
+ if int(dq.get("cursor", 0) or 0) >= len(rows):
13489
+ dq["active"] = False
13490
+ out["queue_active"] = False
13491
+ self._emit("status", {"summary": "stateless executor queue drained; returning to normal manager routing"})
13492
+ else:
13493
+ out["queue_active"] = bool(dq.get("active", False))
13494
+ bb["decomposition_queue"] = dq
13495
+ self.blackboard = bb
13496
+ return out
13497
+
13498
+ def _watchdog_process_worker_step(
13499
+ self,
13500
+ board: dict,
13501
+ *,
13502
+ role: str,
13503
+ step: dict,
13504
+ state_changed: bool,
13505
+ pinned_selection: str,
13506
+ ) -> dict:
13507
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13508
+ wd = self._normalize_watchdog_state(bb.get("watchdog", {}))
13509
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13510
+ status = str((step or {}).get("status", "") or "").strip().lower()
13511
+ text = trim(strip_thinking_content(str((step or {}).get("text", "") or "").strip()), 1200)
13512
+ wd["last_state_fp"] = self._watchdog_state_fingerprint(bb)
13513
+ if state_changed:
13514
+ wd["state_unchanged_streak"] = 0
13515
+ else:
13516
+ wd["state_unchanged_streak"] = max(0, int(wd.get("state_unchanged_streak", 0) or 0)) + 1
13517
+ if status == "tools":
13518
+ wd["intent_no_tool_streak"] = 0
13519
+ wd["repeat_no_tool_streak"] = 0
13520
+ wd["last_no_tool_text"] = ""
13521
+ wd["last_no_tool_hash"] = ""
13522
+ elif status == "no-tools":
13523
+ if self._watchdog_intent_without_action(text):
13524
+ wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0)) + 1
13525
+ else:
13526
+ wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0) - 1)
13527
+ prev_text = str(wd.get("last_no_tool_text", "") or "")
13528
+ sim = self._watchdog_similarity(prev_text, text)
13529
+ if sim >= float(WATCHDOG_REPEAT_SIMILARITY_THRESHOLD):
13530
+ wd["repeat_no_tool_streak"] = max(0, int(wd.get("repeat_no_tool_streak", 0) or 0)) + 1
13531
+ else:
13532
+ wd["repeat_no_tool_streak"] = 0
13533
+ wd["last_no_tool_text"] = text
13534
+ wd["last_no_tool_hash"] = hashlib.sha1(text.encode("utf-8")).hexdigest() if text else ""
13535
+ else:
13536
+ wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0) - 1)
13537
+ wd["repeat_no_tool_streak"] = max(0, int(wd.get("repeat_no_tool_streak", 0) or 0) - 1)
13538
+ bb["watchdog"] = wd
13539
+ bb["decomposition_queue"] = dq
13540
+ self.blackboard = bb
13541
+ progress_row = self._watchdog_mark_step_progress(bb, role, step)
13542
+ bb = self._ensure_blackboard()
13543
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13544
+ trigger_reason = ""
13545
+ if not bool(dq.get("active", False)):
13546
+ if int(wd.get("intent_no_tool_streak", 0) or 0) >= int(WATCHDOG_INTENT_NO_TOOL_THRESHOLD):
13547
+ trigger_reason = "intent-without-tool-call"
13548
+ elif int(wd.get("repeat_no_tool_streak", 0) or 0) >= int(WATCHDOG_REPEAT_NO_TOOL_THRESHOLD):
13549
+ trigger_reason = "repeated-no-tool-reply"
13550
+ elif (
13551
+ self._watchdog_context_near_limit()
13552
+ and int(wd.get("state_unchanged_streak", 0) or 0) >= int(WATCHDOG_CONTEXT_STALL_THRESHOLD)
13553
+ ):
13554
+ trigger_reason = "context-threshold-no-state-change"
13555
+ elif (
13556
+ status in {"no-tools", "skip"}
13557
+ and int(wd.get("state_unchanged_streak", 0) or 0) >= int(WATCHDOG_STATE_STALL_THRESHOLD)
13558
+ ):
13559
+ trigger_reason = "state-unchanged-stall"
13560
+ triggered = False
13561
+ if trigger_reason:
13562
+ try:
13563
+ last_trigger_ts = float(wd.get("last_trigger_ts", 0.0) or 0.0)
13564
+ except Exception:
13565
+ last_trigger_ts = 0.0
13566
+ if now_ts() - last_trigger_ts >= 1.0:
13567
+ triggered = self._watchdog_activate_decomposition(
13568
+ bb,
13569
+ reason=trigger_reason,
13570
+ role=role,
13571
+ step=step,
13572
+ pinned_selection=pinned_selection,
13573
+ )
13574
+ bb = self._ensure_blackboard()
13575
+ bb["watchdog"] = self._normalize_watchdog_state(bb.get("watchdog", wd))
13576
+ bb["decomposition_queue"] = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", dq))
13577
+ self.blackboard = bb
13578
+ self._blackboard_touch()
13579
+ return {
13580
+ "triggered": bool(triggered),
13581
+ "trigger_reason": trigger_reason,
13582
+ "queue_active": bool((bb.get("decomposition_queue", {}) or {}).get("active", False)),
13583
+ "step_advanced": bool(progress_row.get("step_advanced", False)),
13584
+ }
13585
+
13586
+ def _watchdog_execute_queue_step(self, *, pinned_selection: str) -> dict:
13587
+ board = self._ensure_blackboard()
13588
+ pick = self._watchdog_pick_executor_route(board)
13589
+ if not pick:
13590
+ dq = self._normalize_decomposition_queue_state(board.get("decomposition_queue", {}))
13591
+ return {"executed": False, "queue_active": bool(dq.get("active", False)), "stop_run": False, "interrupted": False}
13592
+ queue_args, meta = pick
13593
+ role = self._sanitize_agent_role((queue_args or {}).get("target", "")) or "developer"
13594
+ instruction = trim(str((queue_args or {}).get("instruction", "") or "").strip(), 1200)
13595
+ if not instruction:
13596
+ instruction = (
13597
+ "Executor mode step: call one concrete tool now, keep scope narrow, and update blackboard evidence."
13598
+ )
13599
+ self._inject_manager_instruction(role, instruction, is_mandatory=True, executor_mode=True)
13600
+ if role == "explorer":
13601
+ self._blackboard_set_status("RESEARCHING")
13602
+ elif role == "developer":
13603
+ self._blackboard_set_status("CODING")
13604
+ elif role == "reviewer":
13605
+ self._blackboard_set_status("REVIEWING")
13606
+ board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
13607
+ step = self._multi_agent_turn(
13608
+ role,
13609
+ pinned_selection=pinned_selection,
13610
+ media_inputs_round=None,
13611
+ )
13612
+ safe_step = step if isinstance(step, dict) else {}
13613
+ self._blackboard_update_from_worker_step(role, safe_step)
13614
+ board_after = self._ensure_blackboard()
13615
+ board_after_fp = self._watchdog_state_fingerprint(board_after)
13616
+ wd_event = self._watchdog_process_worker_step(
13617
+ board_after,
13618
+ role=role,
13619
+ step=safe_step,
13620
+ state_changed=bool(board_after_fp != board_before_fp),
13621
+ pinned_selection=pinned_selection,
13622
+ )
13623
+ status = str(safe_step.get("status", "") or "").strip().lower()
13624
+ interrupted = bool(status == "interrupted")
13625
+ stop_run = False
13626
+ finish_gate_reason = ""
13627
+ if status == "tools" and bool(safe_step.get("stop_due_to_finish", False)):
13628
+ note = f"{self._agent_display_name(role)} signaled finish via tool."
13629
+ # Approval note should come from finish tool payload sync; avoid overwriting with generic text here.
13630
+ can_finish_now, finish_gate_reason = self._can_auto_finish_from_approval(
13631
+ self._ensure_blackboard(),
13632
+ latest_user_ts=self._latest_user_message_ts(),
13633
+ )
13634
+ if can_finish_now:
13635
+ self._mark_all_done_silently(note)
13636
+ stop_run = True
13637
+ else:
13638
+ self._emit(
13639
+ "status",
13640
+ {
13641
+ "summary": (
13642
+ f"executor finish deferred by gate ({finish_gate_reason}); "
13643
+ "continue watchdog queue"
13644
+ )
13645
+ },
13646
+ )
13647
+ dq = self._normalize_decomposition_queue_state(self._ensure_blackboard().get("decomposition_queue", {}))
13648
+ return {
13649
+ "executed": True,
13650
+ "queue_active": bool(dq.get("active", False)),
13651
+ "stop_run": bool(stop_run),
13652
+ "interrupted": bool(interrupted),
13653
+ "role": role,
13654
+ "status": status,
13655
+ "wd_event": wd_event,
13656
+ "trigger_reason": trim(str(meta.get("trigger_reason", "") or "").strip(), 120),
13657
+ "finish_gate_reason": finish_gate_reason,
13658
+ }
13659
+
13660
+ def _new_blackboard(self, goal: str = "") -> dict:
13661
+ profile = self._normalize_task_profile(goal, {})
13662
+ progress = "done" if str(profile.get("task_type", "") or "") == "simple_qa" and not str(goal or "").strip() else "initializing"
13663
+ return {
13664
+ "version": 1,
13665
+ "updated_at": float(now_ts()),
13666
+ "original_goal": trim(str(goal or "").strip(), 4000),
13667
+ "research_notes": [],
13668
+ "code_artifacts": {},
13669
+ "execution_logs": [],
13670
+ "review_feedback": [],
13671
+ "conversation_history": [],
13672
+ "status": "INITIALIZING",
13673
+ "approval": {
13674
+ "approved": False,
13675
+ "by": "",
13676
+ "note": "",
13677
+ "ts": 0.0,
13678
+ },
13679
+ "manager_cycles": 0,
13680
+ "manager_summary_attempts": 0,
13681
+ "active_agent": "",
13682
+ "last_delegate": {
13683
+ "target": "",
13684
+ "instruction": "",
13685
+ "reason": "",
13686
+ "source": "",
13687
+ "is_mandatory": False,
13688
+ "ts": 0.0,
13689
+ },
13690
+ "task_profile": profile,
13691
+ "manager_judgement": {
13692
+ "task_type": str(profile.get("task_type", "general")),
13693
+ "complexity": str(profile.get("complexity", "simple")),
13694
+ "scale_preference": str(profile.get("scale_preference", "balanced") or "balanced"),
13695
+ "progress": progress,
13696
+ "remaining_rounds": (
13697
+ -1
13698
+ if int(profile.get("round_budget", 0) or 0) <= 0
13699
+ else int(profile.get("round_budget", 1) or 1)
13700
+ ),
13701
+ "updated_at": float(now_ts()),
13702
+ },
13703
+ "last_worker_reply": {
13704
+ "role": "",
13705
+ "text": "",
13706
+ "ts": 0.0,
13707
+ },
13708
+ "watchdog": self._new_watchdog_state(),
13709
+ "decomposition_queue": self._new_decomposition_queue_state(),
13710
+ }
13711
+
13712
+ def _normalize_blackboard(self, raw: object) -> dict:
13713
+ src = raw if isinstance(raw, dict) else {}
13714
+ board = self._new_blackboard(str(src.get("original_goal", "") or ""))
13715
+ try:
13716
+ board["version"] = int(src.get("version", 1) or 1)
13717
+ except Exception:
13718
+ board["version"] = 1
13719
+ board["updated_at"] = float(src.get("updated_at", now_ts()) or now_ts())
13720
+ board["status"] = self._normalize_blackboard_status(src.get("status", board["status"]))
13721
+ board["manager_cycles"] = max(0, int(src.get("manager_cycles", 0) or 0))
13722
+ board["manager_summary_attempts"] = max(0, int(src.get("manager_summary_attempts", 0) or 0))
13723
+ board["active_agent"] = self._sanitize_agent_role(src.get("active_agent", ""))
13724
+ raw_delegate = src.get("last_delegate", {})
12658
13725
  if isinstance(raw_delegate, dict):
12659
13726
  board["last_delegate"] = {
12660
13727
  "target": str(raw_delegate.get("target", "") or "").strip().lower(),
@@ -12789,6 +13856,10 @@ class SessionState:
12789
13856
  "change_count": max(1, int(item.get("change_count", 1) or 1)),
12790
13857
  }
12791
13858
  board["code_artifacts"] = artifacts
13859
+ board["watchdog"] = self._normalize_watchdog_state(src.get("watchdog", {}))
13860
+ board["decomposition_queue"] = self._normalize_decomposition_queue_state(
13861
+ src.get("decomposition_queue", {})
13862
+ )
12792
13863
  return board
12793
13864
 
12794
13865
  def _ensure_blackboard(self) -> dict:
@@ -13170,6 +14241,11 @@ class SessionState:
13170
14241
  goal = trim(str(board.get("original_goal", "") or "").strip(), 1800)
13171
14242
  status = self._normalize_blackboard_status(board.get("status", "INITIALIZING"))
13172
14243
  delegate = board.get("last_delegate", {}) if isinstance(board.get("last_delegate"), dict) else {}
14244
+ watchdog = board.get("watchdog", {}) if isinstance(board.get("watchdog"), dict) else {}
14245
+ dq = board.get("decomposition_queue", {}) if isinstance(board.get("decomposition_queue"), dict) else {}
14246
+ dq_steps = dq.get("steps", []) if isinstance(dq.get("steps"), list) else []
14247
+ dq_cursor = max(0, int(dq.get("cursor", 0) or 0))
14248
+ dq_total = len(dq_steps)
13173
14249
  lines = [
13174
14250
  "## Blackboard State",
13175
14251
  f"- status: {status}",
@@ -13191,6 +14267,20 @@ class SessionState:
13191
14267
  ),
13192
14268
  f"- active_agent: {board.get('active_agent', '') or '(none)'}",
13193
14269
  f"- manager_cycles: {int(board.get('manager_cycles', 0) or 0)}",
14270
+ f"- manager_summary_attempts: {int(board.get('manager_summary_attempts', 0) or 0)}",
14271
+ (
14272
+ "- watchdog: "
14273
+ f"intent_no_tool={int(watchdog.get('intent_no_tool_streak', 0) or 0)}, "
14274
+ f"repeat_no_tool={int(watchdog.get('repeat_no_tool_streak', 0) or 0)}, "
14275
+ f"state_unchanged={int(watchdog.get('state_unchanged_streak', 0) or 0)}, "
14276
+ f"trigger_count={int(watchdog.get('trigger_count', 0) or 0)}"
14277
+ ),
14278
+ (
14279
+ "- decomposition_queue: "
14280
+ f"active={bool(dq.get('active', False))}, "
14281
+ f"cursor={dq_cursor}, total={dq_total}, "
14282
+ f"trigger_reason={trim(str(dq.get('trigger_reason', '') or ''), 140)}"
14283
+ ),
13194
14284
  (
13195
14285
  "- manager_judgement: "
13196
14286
  f"{trim(str(judgement.get('progress', 'initializing') or ''), 40)}"
@@ -13207,6 +14297,7 @@ class SessionState:
13207
14297
  f"- code_artifacts: {len(board.get('code_artifacts', {}) or {})}",
13208
14298
  f"- execution_logs: {len(board.get('execution_logs', []) or [])}",
13209
14299
  f"- review_feedback: {len(board.get('review_feedback', []) or [])}",
14300
+ f"- collaboration_history: {len(board.get('conversation_history', []) or [])}",
13210
14301
  ]
13211
14302
  approval = board.get("approval", {}) if isinstance(board.get("approval"), dict) else {}
13212
14303
  if bool(approval.get("approved", False)):
@@ -13235,6 +14326,7 @@ class SessionState:
13235
14326
  lines.append(f"- [{actor or 'agent'}] {txt}")
13236
14327
 
13237
14328
  _render_tail("Recent Research Notes", board.get("research_notes", []))
14329
+ _render_tail("Recent Collaboration History", board.get("conversation_history", []))
13238
14330
  art = board.get("code_artifacts", {})
13239
14331
  lines.append("\n### Recent Code Artifacts")
13240
14332
  if isinstance(art, dict) and art:
@@ -13277,6 +14369,7 @@ class SessionState:
13277
14369
  "assigned_expert": {"type": "string", "enum": list(AGENT_ROLES)},
13278
14370
  "requires_user_confirmation": {"type": "boolean"},
13279
14371
  "is_mandatory": {"type": "boolean"},
14372
+ "executor_mode": {"type": "boolean"},
13280
14373
  },
13281
14374
  ["target", "instruction"],
13282
14375
  )
@@ -13295,6 +14388,8 @@ class SessionState:
13295
14388
  "task_type": {"type": "string"},
13296
14389
  "complexity": {"type": "string", "enum": list(TASK_COMPLEXITY_LEVELS)},
13297
14390
  "scale_preference": {"type": "string", "enum": list(TASK_SCALE_PREFERENCES)},
14391
+ "semantic_confidence": {"type": "string", "enum": list(SEMANTIC_CONFIDENCE_CHOICES)},
14392
+ "low_confidence_reason": {"type": "string"},
13298
14393
  "inherit_previous_state": {"type": "boolean"},
13299
14394
  "judgement": {"type": "string"},
13300
14395
  "round_budget": {"type": "integer"},
@@ -13318,6 +14413,68 @@ class SessionState:
13318
14413
  yes_tokens = ("继续", "确认", "开始", "执行", "同意", "go ahead", "proceed", "continue", "yes")
13319
14414
  return any(tok in low for tok in yes_tokens)
13320
14415
 
14416
+ def _normalize_semantic_confidence(self, raw: object, *, default: str = "medium") -> str:
14417
+ value = str(raw or "").strip().lower()
14418
+ if value in SEMANTIC_CONFIDENCE_CHOICES:
14419
+ return value
14420
+ return default if default in SEMANTIC_CONFIDENCE_CHOICES else "medium"
14421
+
14422
+ def _merge_task_decision_for_low_confidence(self, llm_row: dict, fallback_row: dict) -> dict:
14423
+ merged = dict(fallback_row or {})
14424
+ row = llm_row if isinstance(llm_row, dict) else {}
14425
+ if bool(row.get("inherit_previous_state", False)):
14426
+ merged["inherit_previous_state"] = True
14427
+ try:
14428
+ lvl = int(row.get("level", 0) or 0)
14429
+ except Exception:
14430
+ lvl = 0
14431
+ if lvl in TASK_LEVEL_CHOICES:
14432
+ merged["level"] = int(lvl)
14433
+ task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
14434
+ if task_type in TASK_PROFILE_TYPES:
14435
+ merged["task_type"] = task_type
14436
+ complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
14437
+ if complexity in TASK_COMPLEXITY_LEVELS:
14438
+ merged["complexity"] = complexity
14439
+ scale = trim(str(row.get("scale_preference", "") or "").strip().lower(), 20)
14440
+ if scale in TASK_SCALE_PREFERENCES:
14441
+ merged["scale_preference"] = scale
14442
+ mode = normalize_execution_mode(row.get("execution_mode", ""), default="")
14443
+ if mode in EXECUTION_MODE_CHOICES:
14444
+ merged["execution_mode"] = mode
14445
+ assigned = self._sanitize_agent_role(row.get("assigned_expert", ""))
14446
+ if assigned:
14447
+ merged["assigned_expert"] = assigned
14448
+ raw_participants = row.get("participants", [])
14449
+ participants: list[str] = []
14450
+ if isinstance(raw_participants, list):
14451
+ for item in raw_participants:
14452
+ role = self._sanitize_agent_role(item)
14453
+ if role and role not in participants:
14454
+ participants.append(role)
14455
+ if participants:
14456
+ merged["participants"] = participants[:3]
14457
+ try:
14458
+ budget = int(row.get("round_budget", 0) or 0)
14459
+ except Exception:
14460
+ budget = 0
14461
+ if budget > 0:
14462
+ merged["round_budget"] = int(
14463
+ max(1, min(int(self.max_agent_rounds or MAX_AGENT_ROUNDS), int(budget)))
14464
+ )
14465
+ if bool(row.get("requires_user_confirmation", False)):
14466
+ merged["requires_user_confirmation"] = True
14467
+ objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
14468
+ if objective:
14469
+ merged["direct_objective"] = objective
14470
+ judgement = trim(str(row.get("judgement", "") or "").strip(), 200)
14471
+ if judgement:
14472
+ merged["judgement"] = judgement
14473
+ merged["semantic_confidence"] = self._normalize_semantic_confidence(row.get("semantic_confidence", "low"), default="low")
14474
+ merged["low_confidence_reason"] = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
14475
+ merged["source"] = "manager-low-confidence+fallback"
14476
+ return merged
14477
+
13321
14478
  def _fallback_task_level_decision(self, goal_text: str) -> dict:
13322
14479
  profile = self._infer_task_profile(goal_text)
13323
14480
  task_type = str(profile.get("task_type", "general") or "general")
@@ -13421,6 +14578,8 @@ class SessionState:
13421
14578
  "participants": list(inherited_participants),
13422
14579
  "assigned_expert": inherited_assigned,
13423
14580
  "requires_user_confirmation": bool(inherited_requires_confirmation if inherited_level == 5 else False),
14581
+ "semantic_confidence": "low",
14582
+ "low_confidence_reason": "rule fallback inherited previous runtime state",
13424
14583
  "source": "fallback",
13425
14584
  }
13426
14585
  level = 3
@@ -13457,6 +14616,8 @@ class SessionState:
13457
14616
  "participants": participants,
13458
14617
  "assigned_expert": assigned,
13459
14618
  "requires_user_confirmation": bool(requires_confirmation),
14619
+ "semantic_confidence": "low",
14620
+ "low_confidence_reason": "rule fallback classification",
13460
14621
  "source": "fallback",
13461
14622
  }
13462
14623
 
@@ -13479,7 +14640,9 @@ class SessionState:
13479
14640
  "If user clearly indicates speed vs completeness preference, that preference has higher priority than your default strategy. "
13480
14641
  "Budgets are internal efficiency controls to reduce overthinking and idle loops; "
13481
14642
  "they must not be treated as a user-visible early-stop reason. "
13482
- "Output exactly one classify_task_level tool call with concise judgement and inherit_previous_state. "
14643
+ "Output exactly one classify_task_level tool call with concise judgement, inherit_previous_state, "
14644
+ "and semantic_confidence(high|medium|low). "
14645
+ "Use low confidence only when semantic ambiguity is substantial, then set low_confidence_reason briefly. "
13483
14646
  f"{model_language_instruction(self.ui_language)}"
13484
14647
  )
13485
14648
 
@@ -13577,16 +14740,55 @@ class SessionState:
13577
14740
  participants = normalized_participants[:3] or [assigned]
13578
14741
  if assigned not in participants:
13579
14742
  assigned = participants[0]
14743
+ semantic_confidence = self._normalize_semantic_confidence(
14744
+ row.get("semantic_confidence", "medium"),
14745
+ default="medium",
14746
+ )
14747
+ decision_source = trim(str(row.get("source", "") or "").strip().lower(), 80)
14748
+ low_confidence_mode = bool(
14749
+ str(semantic_confidence or "medium") == "low"
14750
+ or decision_source.startswith("fallback")
14751
+ or "low-confidence" in decision_source
14752
+ )
14753
+ if low_confidence_mode:
14754
+ rule_profile = self._infer_task_profile(goal_text)
14755
+ fallback_task_type = str(rule_profile.get("task_type", "general") or "general")
14756
+ fallback_complexity = str(rule_profile.get("complexity", "simple") or "simple")
14757
+ fallback_objective = trim(str(rule_profile.get("direct_objective", "") or ""), 800)
14758
+ else:
14759
+ board_now = self._ensure_blackboard()
14760
+ board_profile = board_now.get("task_profile", {}) if isinstance(board_now.get("task_profile"), dict) else {}
14761
+ fallback_task_type = trim(
14762
+ str(self.runtime_task_type or board_profile.get("task_type", "general") or "general"),
14763
+ 40,
14764
+ )
14765
+ if fallback_task_type not in TASK_PROFILE_TYPES:
14766
+ fallback_task_type = "general"
14767
+ fallback_complexity = trim(
14768
+ str(self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple"),
14769
+ 20,
14770
+ )
14771
+ if fallback_complexity not in TASK_COMPLEXITY_LEVELS:
14772
+ fallback_complexity = "simple"
14773
+ fallback_objective = trim(
14774
+ str(self.runtime_direct_objective or board_profile.get("direct_objective", "") or "").strip(),
14775
+ 800,
14776
+ )
14777
+ if not fallback_objective:
14778
+ fallback_objective = (
14779
+ "Proceed with direct semantic objective and concrete progress for the current request."
14780
+ )
13580
14781
  task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
13581
14782
  if task_type not in TASK_PROFILE_TYPES:
13582
- task_type = str(self._infer_task_profile(goal_text).get("task_type", "general"))
14783
+ task_type = fallback_task_type
13583
14784
  complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
13584
14785
  if complexity not in TASK_COMPLEXITY_LEVELS:
13585
- complexity = str(self._infer_task_profile(goal_text).get("complexity", "simple"))
14786
+ complexity = fallback_complexity
14787
+ low_confidence_reason = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
13586
14788
  judgement = trim(str(row.get("judgement", "") or "").strip(), 200) or "manager classified task level"
13587
14789
  objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
13588
14790
  if not objective:
13589
- objective = trim(str(self._infer_task_profile(goal_text).get("direct_objective", "") or ""), 800)
14791
+ objective = fallback_objective
13590
14792
  self.runtime_task_level = int(level)
13591
14793
  self.runtime_execution_mode = mode
13592
14794
  self.runtime_assigned_expert = assigned
@@ -13614,6 +14816,8 @@ class SessionState:
13614
14816
  profile["direct_objective"] = objective
13615
14817
  profile["round_budget"] = int(round_budget)
13616
14818
  profile["inherit_previous_state"] = bool(inherit_previous_state)
14819
+ profile["semantic_confidence"] = semantic_confidence
14820
+ profile["low_confidence_reason"] = low_confidence_reason
13617
14821
  profile["recommended_agents"] = list(participants)
13618
14822
  profile["reason"] = trim(str(row.get("judgement", "") or row.get("source", "manager")), 400)
13619
14823
  profile["updated_at"] = float(now_ts())
@@ -13629,6 +14833,8 @@ class SessionState:
13629
14833
  "execution_mode": mode,
13630
14834
  "participants": list(participants),
13631
14835
  "assigned_expert": assigned,
14836
+ "semantic_confidence": semantic_confidence,
14837
+ "low_confidence_reason": low_confidence_reason,
13632
14838
  "updated_at": float(now_ts()),
13633
14839
  }
13634
14840
  board["active_agent"] = assigned if mode == EXECUTION_MODE_SINGLE else ""
@@ -13641,7 +14847,8 @@ class SessionState:
13641
14847
  "summary": (
13642
14848
  f"manager classified: L{level} "
13643
14849
  f"mode={mode} scale={scale_preference} participants={','.join(participants)} "
13644
- f"expert={assigned} budget={'unlimited' if int(round_budget) <= 0 else int(round_budget)}"
14850
+ f"expert={assigned} budget={'unlimited' if int(round_budget) <= 0 else int(round_budget)} "
14851
+ f"confidence={semantic_confidence}"
13645
14852
  )
13646
14853
  },
13647
14854
  )
@@ -13699,10 +14906,20 @@ class SessionState:
13699
14906
  row.get("inherit_previous_state", False),
13700
14907
  default=False,
13701
14908
  )
14909
+ row["semantic_confidence"] = self._normalize_semantic_confidence(
14910
+ row.get("semantic_confidence", "medium"),
14911
+ default="medium",
14912
+ )
14913
+ if str(row.get("semantic_confidence", "medium")) == "low":
14914
+ fallback_row = self._fallback_task_level_decision(goal_text)
14915
+ merged = self._merge_task_decision_for_low_confidence(row, fallback_row)
14916
+ return merged
13702
14917
  row["source"] = "manager"
13703
14918
  return row
13704
14919
  row = self._fallback_task_level_decision(goal_text)
13705
- row["source"] = "fallback"
14920
+ row["source"] = "fallback-no-toolcall"
14921
+ row["semantic_confidence"] = "low"
14922
+ row["low_confidence_reason"] = "manager classifier returned no valid tool call"
13706
14923
  return row
13707
14924
 
13708
14925
  def _refresh_runtime_task_policy(
@@ -13774,6 +14991,8 @@ class SessionState:
13774
14991
  "Never use budget as an early-stop reason shown to user before task completion. "
13775
14992
  "Decision policy: missing facts/API -> explorer; implementation/update -> developer; "
13776
14993
  "verification/gap check -> reviewer; only choose finish when review is approved and no blocking logs remain. "
14994
+ "Prefer Manager+AgentBus co-management: when fresh agentbus handoff is available and aligned, "
14995
+ "follow that handoff to reduce orchestration latency instead of re-planning from scratch. "
13777
14996
  "If finish is blocked by missing final summary after review approval, instruct Reviewer to hand off Explorer "
13778
14997
  "via agentbus (intent=final_summary_request) instead of silently ending. "
13779
14998
  f"Current task level={level or '-'}, mode={mode}, scale_preference={scale_preference}, participants={participant_text}, "
@@ -13785,6 +15004,139 @@ class SessionState:
13785
15004
  f"{model_language_instruction(self.ui_language)}"
13786
15005
  )
13787
15006
 
15007
+ def _manager_pick_agentbus_fast_route(
15008
+ self,
15009
+ board: dict | None = None,
15010
+ *,
15011
+ max_age_seconds: float = 240.0,
15012
+ ) -> tuple[dict, dict] | None:
15013
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
15014
+ profile = self._ensure_blackboard_task_profile(bb)
15015
+ mode = normalize_execution_mode(
15016
+ profile.get("execution_mode", self._effective_execution_mode()),
15017
+ default=self._effective_execution_mode(),
15018
+ )
15019
+ if mode != EXECUTION_MODE_SYNC:
15020
+ return None
15021
+ if str(profile.get("task_type", "general") or "general") == "simple_qa":
15022
+ return None
15023
+ approval = bb.get("approval", {}) if isinstance(bb.get("approval"), dict) else {}
15024
+ if bool(approval.get("approved", False)):
15025
+ return None
15026
+ last_delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
15027
+ try:
15028
+ last_delegate_ts = float(last_delegate.get("ts", 0.0) or 0.0)
15029
+ except Exception:
15030
+ last_delegate_ts = 0.0
15031
+ now_tick = float(now_ts())
15032
+ max_age = max(15.0, float(max_age_seconds or 240.0))
15033
+ participants = profile.get("participants", []) if isinstance(profile.get("participants"), list) else []
15034
+ participants_norm = [self._sanitize_agent_role(x) for x in participants]
15035
+ participants_norm = [x for x in participants_norm if x]
15036
+ if not participants_norm:
15037
+ participants_norm = [self._sanitize_agent_role(profile.get("assigned_expert", "developer")) or "developer"]
15038
+
15039
+ def _intent_rank(intent: str) -> int:
15040
+ low = str(intent or "").strip().lower()
15041
+ if low in {"final_summary_request", "review_request", "fix_request", "execute_plan", "requestresearch_support"}:
15042
+ return 6
15043
+ if low in {"handoff", "implementation_request", "verify_request", "tip"}:
15044
+ return 4
15045
+ if "summary" in low or "review" in low or "fix" in low:
15046
+ return 5
15047
+ if low in {"message", "info"}:
15048
+ return 2
15049
+ return 3
15050
+
15051
+ candidates: list[tuple[int, float, dict]] = []
15052
+ for env in list(self.agent_bus_messages)[-72:]:
15053
+ if not isinstance(env, dict):
15054
+ continue
15055
+ try:
15056
+ ts = float(env.get("ts", 0.0) or 0.0)
15057
+ except Exception:
15058
+ ts = 0.0
15059
+ if ts <= 0.0:
15060
+ continue
15061
+ if last_delegate_ts > 0.0 and ts + 1e-6 <= last_delegate_ts:
15062
+ continue
15063
+ if (now_tick - ts) > max_age:
15064
+ continue
15065
+ src = self._sanitize_agent_role(env.get("from", ""))
15066
+ dst = self._sanitize_agent_role(env.get("to", ""))
15067
+ if (not src) or (not dst) or src == dst:
15068
+ continue
15069
+ if dst not in participants_norm:
15070
+ continue
15071
+ intent = trim(str(env.get("intent", "") or "").strip().lower(), 80)
15072
+ payload = trim(str(env.get("payload", "") or "").strip(), 1200)
15073
+ if not payload:
15074
+ continue
15075
+ env_id = trim(str(env.get("id", "") or "").strip(), 80)
15076
+ score = _intent_rank(intent)
15077
+ if dst == "developer":
15078
+ score += 1
15079
+ if dst == "explorer" and ("summary" in intent):
15080
+ score += 2
15081
+ if dst == "reviewer" and ("review" in intent or "verify" in intent):
15082
+ score += 1
15083
+ candidates.append(
15084
+ (
15085
+ int(score),
15086
+ float(ts),
15087
+ {
15088
+ "env_id": env_id,
15089
+ "from": src,
15090
+ "to": dst,
15091
+ "intent": intent or "message",
15092
+ "payload": payload,
15093
+ "ts": ts,
15094
+ },
15095
+ )
15096
+ )
15097
+ if not candidates:
15098
+ return None
15099
+ candidates.sort(key=lambda x: (x[0], x[1]), reverse=True)
15100
+ best = candidates[0][2]
15101
+ assigned_expert = self._sanitize_agent_role(profile.get("assigned_expert", "developer")) or "developer"
15102
+ task_level = int(profile.get("task_level", self.runtime_task_level or 3) or 3)
15103
+ if task_level not in TASK_LEVEL_CHOICES:
15104
+ task_level = 3
15105
+ round_budget = int(profile.get("round_budget", self.runtime_round_budget or self.max_agent_rounds) or 0)
15106
+ args = {
15107
+ "target": best.get("to", assigned_expert),
15108
+ "instruction": trim(str(best.get("payload", "") or ""), 1200),
15109
+ "task_level": int(task_level),
15110
+ "task_type": trim(str(profile.get("task_type", self.runtime_task_type or "general") or "general"), 40),
15111
+ "complexity": trim(str(profile.get("complexity", self.runtime_task_complexity or "simple") or "simple"), 20),
15112
+ "scale_preference": trim(
15113
+ str(profile.get("scale_preference", self.runtime_scale_preference or "balanced") or "balanced"),
15114
+ 20,
15115
+ ),
15116
+ "judgement": trim(
15117
+ (
15118
+ f"agentbus relay {best.get('from','?')}->{best.get('to','?')} "
15119
+ f"intent={best.get('intent','message')} id={best.get('env_id','-')}"
15120
+ ),
15121
+ 200,
15122
+ ),
15123
+ "round_budget": int(round_budget),
15124
+ "direct_objective": trim(str(profile.get("direct_objective", self.runtime_direct_objective or "") or ""), 800),
15125
+ "execution_mode": mode,
15126
+ "participants": list(participants_norm),
15127
+ "assigned_expert": assigned_expert,
15128
+ "requires_user_confirmation": bool(profile.get("requires_user_confirmation", False)),
15129
+ "is_mandatory": bool(best.get("to", "") in {"developer", "explorer"}),
15130
+ }
15131
+ meta = {
15132
+ "env_id": best.get("env_id", ""),
15133
+ "from": best.get("from", ""),
15134
+ "to": best.get("to", ""),
15135
+ "intent": best.get("intent", "message"),
15136
+ "age_sec": max(0.0, now_tick - float(best.get("ts", now_tick) or now_tick)),
15137
+ }
15138
+ return args, meta
15139
+
13788
15140
  def _manager_fallback_route(self) -> dict:
13789
15141
  board = self._ensure_blackboard()
13790
15142
  latest_user_ts = self._latest_user_message_ts()
@@ -13809,6 +15161,31 @@ class SessionState:
13809
15161
  )
13810
15162
  has_error_log = self._manager_has_error_log(board)
13811
15163
  feedback_pass = self._manager_feedback_passed_from_blackboard(board)
15164
+ cycles = int(board.get("manager_cycles", 0) or 0)
15165
+ summary_attempts = int(board.get("manager_summary_attempts", 0) or 0)
15166
+ max_budget = max(1, int(getattr(self, "max_agent_rounds", MAX_AGENT_ROUNDS) or MAX_AGENT_ROUNDS))
15167
+ if cycles >= max_budget:
15168
+ self._emit("status", {"summary": "Max cycles reached; forcing finish."})
15169
+ return {
15170
+ "target": "finish",
15171
+ "instruction": (
15172
+ "Maximum cycles reached. Generate final summary immediately based on current "
15173
+ "blackboard state and terminate."
15174
+ ),
15175
+ "reason": "forced-finish-budget-exhausted",
15176
+ "source": "fallback",
15177
+ }
15178
+ if finish_gate_reason == "reviewer-summary-missing" and summary_attempts >= 2:
15179
+ self._emit("status", {"summary": "Summary generation attempted; forcing finish now."})
15180
+ return {
15181
+ "target": "finish",
15182
+ "instruction": (
15183
+ "Final summary generation was requested in previous round. Compile final report "
15184
+ "from current blackboard evidence now and finish."
15185
+ ),
15186
+ "reason": "summary-generation-timeout-finish",
15187
+ "source": "fallback",
15188
+ }
13812
15189
  if progress == "done" and can_finish_from_approval:
13813
15190
  return {
13814
15191
  "target": "finish",
@@ -13832,16 +15209,32 @@ class SessionState:
13832
15209
  "source": "fallback",
13833
15210
  }
13834
15211
  if finish_gate_reason == "reviewer-summary-missing":
15212
+ next_attempt = summary_attempts + 1
15213
+ board["manager_summary_attempts"] = next_attempt
15214
+ self.blackboard = board
15215
+ if next_attempt >= 2:
15216
+ return {
15217
+ "target": "explorer",
15218
+ "instruction": (
15219
+ "Reviewer summary is still missing. Read blackboard sections "
15220
+ "(code_artifacts, execution_logs, review_feedback, status) and write one structured "
15221
+ "final summary to blackboard (changes, validation evidence, residual risks/next steps). "
15222
+ "Do not call finish tool in this step."
15223
+ ),
15224
+ "reason": "approval-missing-summary-handoff-explorer",
15225
+ "source": "fallback",
15226
+ "is_mandatory": True,
15227
+ }
13835
15228
  return {
13836
15229
  "target": "reviewer",
13837
15230
  "instruction": (
13838
- "Do not finish yet. Use agentbus handoff to Explorer (intent=final_summary_request), "
13839
- "then ensure final wrap-up summary is produced from blackboard evidence "
13840
- "(implemented outputs, validation evidence, remaining risks/next steps), and finish."
15231
+ "Review approved but final summary required. First call read_from_blackboard for "
15232
+ "code_artifacts/execution_logs/review_feedback/status, then call finish_task with summary "
15233
+ "including changes, validation evidence, and residual risks/next steps."
13841
15234
  ),
13842
- "reason": "approval-missing-reviewer-summary",
15235
+ "reason": "approval-missing-reviewer-summary-request",
13843
15236
  "source": "fallback",
13844
- "is_mandatory": False,
15237
+ "is_mandatory": True,
13845
15238
  }
13846
15239
  if finish_gate_reason == "blocking-error-log":
13847
15240
  return {
@@ -13894,15 +15287,17 @@ class SessionState:
13894
15287
  }
13895
15288
  if feedback_pass and (not can_finish_from_approval):
13896
15289
  if finish_gate_reason == "reviewer-summary-missing":
15290
+ board["manager_summary_attempts"] = summary_attempts + 1
15291
+ self.blackboard = board
13897
15292
  return {
13898
15293
  "target": "reviewer",
13899
15294
  "instruction": (
13900
- "Do not finish yet. Use agentbus to hand off Explorer for final summary, "
13901
- "then confirm summary includes changed files, validation evidence, and residual risks/next steps."
15295
+ "Quick review passed but final summary is missing. Produce concise final summary "
15296
+ "covering changed files and validation evidence, then finish."
13902
15297
  ),
13903
- "reason": "simple-code-summary-missing",
15298
+ "reason": "simple-code-summary-request",
13904
15299
  "source": "fallback",
13905
- "is_mandatory": False,
15300
+ "is_mandatory": True,
13906
15301
  }
13907
15302
  return {
13908
15303
  "target": "reviewer",
@@ -13925,15 +15320,17 @@ class SessionState:
13925
15320
  }
13926
15321
  if feedback_pass and code_count > 0 and (not can_finish_from_approval):
13927
15322
  if finish_gate_reason == "reviewer-summary-missing":
15323
+ board["manager_summary_attempts"] = summary_attempts + 1
15324
+ self.blackboard = board
13928
15325
  return {
13929
15326
  "target": "reviewer",
13930
15327
  "instruction": (
13931
- "Do not finish yet. Use agentbus handoff to Explorer for final summary, "
13932
- "then verify final summary covers what changed, validation evidence, and residual risks/next steps."
15328
+ "Review passed but final summary is still missing. Produce final summary covering "
15329
+ "what changed, validation evidence, and residual risks/next steps, then finish."
13933
15330
  ),
13934
- "reason": "feedback-pass-summary-missing",
15331
+ "reason": "feedback-pass-summary-request",
13935
15332
  "source": "fallback",
13936
- "is_mandatory": False,
15333
+ "is_mandatory": True,
13937
15334
  }
13938
15335
  return {
13939
15336
  "target": "reviewer",
@@ -13976,6 +15373,17 @@ class SessionState:
13976
15373
  "reason": "need-review",
13977
15374
  "source": "fallback",
13978
15375
  }
15376
+ if cycles > 10 and (code_count > 0 or research_count > 0):
15377
+ self._emit("status", {"summary": "Fallback default with progress; forcing finish."})
15378
+ return {
15379
+ "target": "finish",
15380
+ "instruction": (
15381
+ "Task has produced outputs but no explicit completion condition met. "
15382
+ "Generate final summary of current progress and finish."
15383
+ ),
15384
+ "reason": "forced-finish-fallback-progress",
15385
+ "source": "fallback",
15386
+ }
13979
15387
  return {
13980
15388
  "target": "developer",
13981
15389
  "instruction": "Continue implementation and produce concrete file/tool changes.",
@@ -13985,14 +15393,23 @@ class SessionState:
13985
15393
 
13986
15394
  def _manager_apply_anti_stall(self, route: dict) -> dict:
13987
15395
  row = dict(route or {})
15396
+ if bool(row.get("executor_mode", False)):
15397
+ return row
13988
15398
  if str(row.get("task_type", "") or "").strip().lower() == "simple_qa":
13989
15399
  return row
13990
15400
  target = str(row.get("target", "") or "").strip().lower()
13991
15401
  if target not in AGENT_ROLES:
13992
15402
  return row
13993
- recent = [str(x.get("target", "") or "").strip().lower() for x in self.manager_routes[-2:]]
13994
- if len(recent) == 2 and recent[0] == target and recent[1] == target:
15403
+ recent = [str(x.get("target", "") or "").strip().lower() for x in self.manager_routes[-4:]]
15404
+ if len(recent) >= 2 and recent[-1] == target and recent[-2] == target:
13995
15405
  board = self._ensure_blackboard()
15406
+ low_reason = str(row.get("reason", "") or "").strip().lower()
15407
+ if "summary" in low_reason and len(board.get("code_artifacts", {}) or {}) > 0:
15408
+ row["target"] = "finish"
15409
+ row["instruction"] = "Anti-stall: summary generation loop detected, forcing finish."
15410
+ row["reason"] = f"{row.get('reason', '')}|anti-stall-summary-loop-finish"
15411
+ row["source"] = "anti-stall"
15412
+ return row
13996
15413
  if target != "reviewer" and len(board.get("code_artifacts", {}) or {}) > 0:
13997
15414
  row["target"] = "reviewer"
13998
15415
  row["instruction"] = "Parallel-check current changes and provide immediate fix/pass guidance."
@@ -14006,10 +15423,20 @@ class SessionState:
14006
15423
  row["instruction"] = "Run a focused search/read step to unblock the next coding move."
14007
15424
  row["reason"] = f"{row.get('reason', '')}|anti-stall->explorer"
14008
15425
  row["source"] = "anti-stall"
15426
+ return row
15427
+ if len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
15428
+ board = self._ensure_blackboard()
15429
+ if len(board.get("code_artifacts", {}) or {}) > 0:
15430
+ row["target"] = "finish"
15431
+ row["instruction"] = "Oscillation detected with existing outputs; finish now."
15432
+ row["reason"] = f"{row.get('reason', '')}|anti-stall-oscillation-finish"
15433
+ row["source"] = "anti-stall"
15434
+ return row
14009
15435
  return row
14010
15436
 
14011
15437
  def _manager_apply_task_policy(self, route: dict) -> dict:
14012
15438
  row = dict(route or {})
15439
+ executor_mode_flag = _to_bool_like(row.get("executor_mode", False), default=False)
14013
15440
  board = self._ensure_blackboard()
14014
15441
  latest_user_ts = self._latest_user_message_ts()
14015
15442
  self._invalidate_stale_approval_if_needed(
@@ -14058,7 +15485,13 @@ class SessionState:
14058
15485
  if target not in MANAGER_ROUTE_TARGETS:
14059
15486
  target = assigned_expert if mode == EXECUTION_MODE_SINGLE else "developer"
14060
15487
  if target in AGENT_ROLES and target not in participants:
14061
- target = participants[0]
15488
+ if executor_mode_flag:
15489
+ if len(participants) < 3:
15490
+ participants.append(target)
15491
+ else:
15492
+ participants[-1] = target
15493
+ else:
15494
+ target = participants[0]
14062
15495
  instruction = trim(str(row.get("instruction", "") or "").strip(), 1200)
14063
15496
  if not instruction:
14064
15497
  instruction = "Proceed with one concrete next step and report evidence."
@@ -14109,13 +15542,53 @@ class SessionState:
14109
15542
  board,
14110
15543
  latest_user_ts=latest_user_ts,
14111
15544
  )
15545
+ board_status = self._normalize_blackboard_status(board.get("status", "INITIALIZING"))
15546
+ code_count = len(board.get("code_artifacts", {}) or {})
15547
+ research_count = len(board.get("research_notes", []) or [])
15548
+ feedback_pass = self._manager_feedback_passed_from_blackboard(board)
15549
+ summary_attempts = int(board.get("manager_summary_attempts", 0) or 0)
15550
+ force_finish_override = False
14112
15551
  if bool((board.get("approval", {}) or {}).get("approved", False)) and can_finish_from_approval:
14113
15552
  target = "finish"
14114
15553
  if not instruction:
14115
15554
  instruction = "Review already approved; finish now."
14116
15555
  if target == "finish" and (not can_finish_from_approval):
14117
15556
  if finish_gate_reason == "reviewer-summary-missing":
14118
- target = "reviewer"
15557
+ if summary_attempts >= 2:
15558
+ force_finish_override = True
15559
+ target = "finish"
15560
+ instruction = (
15561
+ "Summary generation was attempted in previous cycle. Compile final report from "
15562
+ "available blackboard data and finish now."
15563
+ )
15564
+ row["reason"] = "forced-finish-summary-max-retry"
15565
+ row["source"] = "policy"
15566
+ self._emit("status", {"summary": "Summary retry limit reached; forcing finish."})
15567
+ elif summary_attempts >= 1:
15568
+ board["manager_summary_attempts"] = summary_attempts + 1
15569
+ self.blackboard = board
15570
+ target = "explorer"
15571
+ instruction = (
15572
+ "Reviewer summary is still missing. Read blackboard sections "
15573
+ "(code_artifacts, execution_logs, review_feedback, status) and write one structured "
15574
+ "final summary to blackboard: changes, validation evidence, residual risks/next steps. "
15575
+ "Do not call finish tool in this step."
15576
+ )
15577
+ row["reason"] = "finish-blocked-summary-handoff-explorer"
15578
+ row["source"] = "policy"
15579
+ self._emit("status", {"summary": "Reviewer summary missing; handoff to explorer synthesis."})
15580
+ else:
15581
+ board["manager_summary_attempts"] = summary_attempts + 1
15582
+ self.blackboard = board
15583
+ target = "reviewer"
15584
+ instruction = (
15585
+ "Generate final summary report from blackboard evidence. First call read_from_blackboard "
15586
+ "(code_artifacts, execution_logs, review_feedback, status), then call finish_task.summary "
15587
+ "including changes, validation evidence, and residual risks/next steps."
15588
+ )
15589
+ row["reason"] = "finish-blocked-summary-request"
15590
+ row["source"] = "policy"
15591
+ self._emit("status", {"summary": "Requesting final summary generation before finish."})
14119
15592
  elif mode == EXECUTION_MODE_SINGLE:
14120
15593
  target = assigned_expert
14121
15594
  else:
@@ -14133,29 +15606,58 @@ class SessionState:
14133
15606
  "Continue execution for updated requirements and produce concrete progress now."
14134
15607
  )
14135
15608
  elif finish_gate_reason == "reviewer-summary-missing":
14136
- instruction = (
14137
- "Do not finish yet. Reviewer must hand off Explorer via agentbus "
14138
- "(intent=final_summary_request), then ensure final summary is produced from blackboard evidence "
14139
- "(implemented outputs, validation evidence, residual risks/next steps)."
14140
- )
15609
+ if not (force_finish_override and target == "finish"):
15610
+ instruction = (
15611
+ "Do not finish yet. Generate final summary first (changes, validation evidence, "
15612
+ "residual risks/next steps), then finish."
15613
+ )
14141
15614
  elif finish_gate_reason == "blocking-error-log":
14142
15615
  instruction = (
14143
15616
  "Do not finish yet. Latest execution logs still contain blocking errors. "
14144
15617
  "Resolve errors and provide verifiable evidence."
14145
15618
  )
14146
15619
  else:
14147
- instruction = (
14148
- "Do not finish yet. Completion requires fresh reviewer approval for the current user request. "
14149
- "Continue with one concrete step and update blackboard."
14150
- )
14151
- self._emit(
14152
- "status",
14153
- {
14154
- "summary": (
14155
- f"manager finish blocked ({finish_gate_reason}); rerouted to {target}"
15620
+ has_outputs = bool(code_count > 0 or research_count > 0)
15621
+ if board_status == "COMPLETED" and has_outputs:
15622
+ force_finish_override = True
15623
+ target = "finish"
15624
+ instruction = (
15625
+ "Task is already in COMPLETED state with concrete outputs. "
15626
+ "Generate final summary from blackboard (changes, validation evidence, residual "
15627
+ "risks/next steps) and finish now."
14156
15628
  )
14157
- },
14158
- )
15629
+ row["reason"] = "finish-blocked-completed-auto-summary-close"
15630
+ row["source"] = "policy"
15631
+ self._emit(
15632
+ "status",
15633
+ {"summary": "Completion gate unresolved but board is COMPLETED; auto-closing with final summary."},
15634
+ )
15635
+ elif feedback_pass and has_outputs:
15636
+ force_finish_override = True
15637
+ target = "finish"
15638
+ instruction = (
15639
+ "Reviewer feedback already passed with concrete outputs. "
15640
+ "Generate final summary and finish now."
15641
+ )
15642
+ row["reason"] = "finish-blocked-feedback-pass-auto-close"
15643
+ row["source"] = "policy"
15644
+ else:
15645
+ instruction = (
15646
+ "Do not finish yet. Completion requires fresh reviewer approval for the current user request. "
15647
+ "Continue with one concrete step and update blackboard."
15648
+ )
15649
+ if finish_gate_reason != "reviewer-summary-missing":
15650
+ self._emit(
15651
+ "status",
15652
+ {
15653
+ "summary": (
15654
+ f"manager finish blocked ({finish_gate_reason}); rerouted to {target}"
15655
+ )
15656
+ },
15657
+ )
15658
+ if target not in {"finish", "reviewer"} and finish_gate_reason != "reviewer-summary-missing":
15659
+ board["manager_summary_attempts"] = 0
15660
+ self.blackboard = board
14159
15661
  if target != "finish" and objective:
14160
15662
  low_instruction = instruction.lower()
14161
15663
  low_objective = objective.lower()
@@ -14166,12 +15668,13 @@ class SessionState:
14166
15668
  has_mandatory_field = isinstance(row, dict) and ("is_mandatory" in row)
14167
15669
  is_mandatory = _to_bool_like(row.get("is_mandatory", False), default=False) if has_mandatory_field else False
14168
15670
  if finish_gate_reason == "reviewer-summary-missing" and target == "reviewer":
14169
- is_mandatory = False
15671
+ is_mandatory = True
14170
15672
  has_mandatory_field = True
14171
15673
  if (not has_mandatory_field) and target in AGENT_ROLES and task_type != "simple_qa":
14172
15674
  is_mandatory = True
14173
15675
  if target == "finish":
14174
15676
  is_mandatory = False
15677
+ executor_mode_flag = False
14175
15678
  row.update(
14176
15679
  {
14177
15680
  "target": target,
@@ -14186,6 +15689,7 @@ class SessionState:
14186
15689
  "participants": list(participants),
14187
15690
  "assigned_expert": assigned_expert,
14188
15691
  "is_mandatory": bool(is_mandatory),
15692
+ "executor_mode": bool(executor_mode_flag and target in AGENT_ROLES),
14189
15693
  "requires_user_confirmation": bool(
14190
15694
  row.get(
14191
15695
  "requires_user_confirmation",
@@ -14230,6 +15734,7 @@ class SessionState:
14230
15734
  "assigned_expert": trim(str(args.get("assigned_expert", "") or "").strip().lower(), 20),
14231
15735
  "requires_user_confirmation": bool(args.get("requires_user_confirmation", False)),
14232
15736
  "is_mandatory": _to_bool_like(args.get("is_mandatory", False), default=False),
15737
+ "executor_mode": _to_bool_like(args.get("executor_mode", False), default=False),
14233
15738
  "round_budget": args.get("round_budget", 0),
14234
15739
  "reason": trim(str(text or "").strip(), 600),
14235
15740
  "source": "tool",
@@ -14258,6 +15763,7 @@ class SessionState:
14258
15763
  objective, _ = self._split_language_policy_from_text(objective_raw, max_len=800)
14259
15764
  instruction, _ = self._split_language_policy_from_text(instruction_raw, max_len=1200)
14260
15765
  is_mandatory = bool(row.get("is_mandatory", False))
15766
+ is_executor = bool(row.get("executor_mode", False))
14261
15767
  round_budget = int(row.get("round_budget", 0) or 0)
14262
15768
  remaining = int(row.get("remaining_rounds", -1) or -1)
14263
15769
  budget_text = "unlimited" if round_budget <= 0 else str(round_budget)
@@ -14266,7 +15772,11 @@ class SessionState:
14266
15772
  lines = [
14267
15773
  f"Manager -> {target_label}",
14268
15774
  f"L{task_level if task_level in TASK_LEVEL_CHOICES else '-'} | {mode} | {task_type}/{complexity} | scale={scale}",
14269
- f"mandatory={'yes' if is_mandatory else 'no'} | budget={budget_text} | remaining={remaining_text}",
15775
+ (
15776
+ f"mandatory={'yes' if is_mandatory else 'no'}"
15777
+ f" | executor={'yes' if is_executor else 'no'}"
15778
+ f" | budget={budget_text} | remaining={remaining_text}"
15779
+ ),
14270
15780
  ]
14271
15781
  if objective:
14272
15782
  lines.append(f"objective: {objective}")
@@ -14282,6 +15792,7 @@ class SessionState:
14282
15792
  "complexity": complexity,
14283
15793
  "scale_preference": scale,
14284
15794
  "is_mandatory": is_mandatory,
15795
+ "executor_mode": is_executor,
14285
15796
  "round_budget": round_budget,
14286
15797
  "remaining_rounds": remaining,
14287
15798
  "direct_objective": objective,
@@ -14304,94 +15815,222 @@ class SessionState:
14304
15815
  ):
14305
15816
  board = self._ensure_blackboard()
14306
15817
  board["manager_cycles"] = int(board.get("manager_cycles", 0) or 0) + 1
14307
- prompt = (
14308
- "Read the blackboard and delegate one next short timeslice. "
14309
- "Return only one route_to_next_agent call.\n\n"
14310
- f"{self._blackboard_read_state_markdown(max_items=6)}"
14311
- )
14312
- self.manager_context.append({"role": "user", "content": prompt, "ts": now_ts()})
14313
- self.manager_context = self.manager_context[-400:]
14314
- with self.lock:
14315
- self.current_phase = "manager:model-call"
14316
- self.current_tool_name = ""
14317
- self.active_agent_role = "manager"
14318
- response = self._chat_with_same_model_retry(
14319
- self.manager_context,
14320
- tools=self._manager_route_tools(),
14321
- system=self._manager_system_prompt(),
14322
- max_tokens=600,
14323
- think=False,
14324
- stream_thinking=False,
14325
- on_thinking_chunk=self._append_live_thinking,
14326
- pinned_selection=pinned_selection,
14327
- context_label="manager turn",
14328
- retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
14329
- media_inputs=media_inputs_round,
14330
- )
14331
- text = str(response.get("content") or "")
14332
- tool_calls = response.get("tool_calls", [])
14333
- text, text_filter_meta = self._sanitize_assistant_text_for_runtime(text, tool_calls)
14334
- if bool(text_filter_meta.get("filtered", False)) and str(text_filter_meta.get("reason", "")) == "oversized_raw_toolcall":
14335
- self._inject_toolcall_overflow_hint("manager")
14336
- assistant = {"role": "assistant", "content": text, "ts": now_ts()}
14337
- if tool_calls:
14338
- assistant["tool_calls"] = [
15818
+ text = ""
15819
+ tool_calls: list[dict] = []
15820
+ used_watchdog_executor = False
15821
+ watchdog_meta: dict = {}
15822
+ watchdog_pick = self._watchdog_pick_executor_route(board)
15823
+ used_agentbus_fast = False
15824
+ fast_meta: dict = {}
15825
+ if watchdog_pick:
15826
+ used_watchdog_executor = True
15827
+ queue_args, watchdog_meta = watchdog_pick
15828
+ with self.lock:
15829
+ self.current_phase = "manager:watchdog-executor-route"
15830
+ self.current_tool_name = ""
15831
+ self.active_agent_role = "manager"
15832
+ text = trim(
15833
+ (
15834
+ "watchdog executor route "
15835
+ f"step={int(watchdog_meta.get('cursor', 0) or 0)}/{int(watchdog_meta.get('total', 0) or 0)} "
15836
+ f"target={watchdog_meta.get('target', '?')} "
15837
+ f"trigger={watchdog_meta.get('trigger_reason', '') or '?'}"
15838
+ ),
15839
+ 600,
15840
+ )
15841
+ tool_calls = [
14339
15842
  {
14340
- "id": tc["id"],
15843
+ "id": make_id("tc"),
14341
15844
  "type": "function",
14342
15845
  "function": {
14343
- "name": tc["function"]["name"],
14344
- "arguments": json_dumps(tc["function"]["arguments"]),
15846
+ "name": "route_to_next_agent",
15847
+ "arguments": dict(queue_args or {}),
14345
15848
  },
14346
15849
  }
14347
- for tc in tool_calls
14348
15850
  ]
14349
- self.manager_context.append(assistant)
14350
- self.manager_context = self.manager_context[-400:]
14351
- route_only_tool_calls = False
14352
- if isinstance(tool_calls, list) and tool_calls:
14353
- tool_names = [
14354
- str(tc.get("function", {}).get("name", "") or "").strip().lower()
14355
- for tc in tool_calls
14356
- if isinstance(tc, dict)
14357
- ]
14358
- if tool_names and all(name in {"route_to_next_agent", "routetonext_agent"} for name in tool_names):
14359
- route_only_tool_calls = True
14360
- emit_text = str(text or "").strip()
14361
- if not emit_text and tool_calls and (not route_only_tool_calls):
14362
- emit_text = f"[tool calls] {', '.join(str(tc.get('function', {}).get('name', '?')) for tc in tool_calls)}"
14363
- if emit_text:
14364
- manager_message = {
14365
- "role": "assistant",
14366
- "content": emit_text,
14367
- "ts": assistant["ts"],
14368
- "agent_role": "manager",
14369
- }
14370
- if "tool_calls" in assistant and (not route_only_tool_calls):
14371
- manager_message["tool_calls"] = assistant["tool_calls"]
14372
- self.messages.append(manager_message)
14373
- self.messages = self.messages[-400:]
14374
- elif "tool_calls" in assistant and (not route_only_tool_calls):
14375
- manager_message = {
14376
- "role": "assistant",
14377
- "content": "",
14378
- "ts": assistant["ts"],
14379
- "agent_role": "manager",
14380
- "tool_calls": assistant["tool_calls"],
14381
- }
14382
- self.messages.append(manager_message)
14383
- self.messages = self.messages[-400:]
14384
- if emit_text:
15851
+ self.manager_context.append(
15852
+ {
15853
+ "role": "system",
15854
+ "content": (
15855
+ "[manager-watchdog-route] "
15856
+ f"{trim(str(text or ''), 500)}"
15857
+ ),
15858
+ "ts": now_ts(),
15859
+ }
15860
+ )
15861
+ self.manager_context = self.manager_context[-400:]
14385
15862
  self._emit(
14386
- "message",
15863
+ "status",
14387
15864
  {
14388
- "role": "assistant",
14389
- "agent_role": "manager",
14390
- "text": emit_text,
14391
- "summary": "Manager response",
15865
+ "summary": (
15866
+ "manager watchdog executor active "
15867
+ f"(step={int(watchdog_meta.get('cursor', 0) or 0)}/"
15868
+ f"{int(watchdog_meta.get('total', 0) or 0)}, "
15869
+ f"target={watchdog_meta.get('target', '?')}, "
15870
+ f"trigger={trim(str(watchdog_meta.get('trigger_reason', '') or ''), 80)})"
15871
+ )
14392
15872
  },
14393
15873
  )
15874
+ else:
15875
+ fast_pick = self._manager_pick_agentbus_fast_route(board)
15876
+ if fast_pick:
15877
+ used_agentbus_fast = True
15878
+ fast_args, fast_meta = fast_pick
15879
+ with self.lock:
15880
+ self.current_phase = "manager:agentbus-fast-route"
15881
+ self.current_tool_name = ""
15882
+ self.active_agent_role = "manager"
15883
+ text = trim(
15884
+ (
15885
+ "agentbus fast-route "
15886
+ f"{fast_meta.get('from', '?')}->{fast_meta.get('to', '?')} "
15887
+ f"intent={fast_meta.get('intent', 'message')} id={fast_meta.get('env_id', '-')}"
15888
+ ),
15889
+ 600,
15890
+ )
15891
+ tool_calls = [
15892
+ {
15893
+ "id": make_id("tc"),
15894
+ "type": "function",
15895
+ "function": {
15896
+ "name": "route_to_next_agent",
15897
+ "arguments": dict(fast_args or {}),
15898
+ },
15899
+ }
15900
+ ]
15901
+ self.manager_context.append(
15902
+ {
15903
+ "role": "system",
15904
+ "content": (
15905
+ "[manager-fast-route] "
15906
+ f"{trim(str(text or ''), 500)}"
15907
+ ),
15908
+ "ts": now_ts(),
15909
+ }
15910
+ )
15911
+ self.manager_context = self.manager_context[-400:]
15912
+ self._emit(
15913
+ "status",
15914
+ {
15915
+ "summary": (
15916
+ "manager fast-route via agentbus "
15917
+ f"({fast_meta.get('from', '?')}->{fast_meta.get('to', '?')}, "
15918
+ f"intent={fast_meta.get('intent', 'message')}, "
15919
+ f"age={float(fast_meta.get('age_sec', 0.0) or 0.0):.1f}s)"
15920
+ )
15921
+ },
15922
+ )
15923
+ else:
15924
+ prompt = (
15925
+ "Read the blackboard and delegate one next short timeslice. "
15926
+ "Return only one route_to_next_agent call.\n\n"
15927
+ f"{self._blackboard_read_state_markdown(max_items=6)}"
15928
+ )
15929
+ self.manager_context.append({"role": "user", "content": prompt, "ts": now_ts()})
15930
+ self.manager_context = self.manager_context[-400:]
15931
+ with self.lock:
15932
+ self.current_phase = "manager:model-call"
15933
+ self.current_tool_name = ""
15934
+ self.active_agent_role = "manager"
15935
+ response = self._chat_with_same_model_retry(
15936
+ self.manager_context,
15937
+ tools=self._manager_route_tools(),
15938
+ system=self._manager_system_prompt(),
15939
+ max_tokens=600,
15940
+ think=False,
15941
+ stream_thinking=False,
15942
+ on_thinking_chunk=self._append_live_thinking,
15943
+ pinned_selection=pinned_selection,
15944
+ context_label="manager turn",
15945
+ retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
15946
+ media_inputs=media_inputs_round,
15947
+ )
15948
+ text = str(response.get("content") or "")
15949
+ tool_calls = response.get("tool_calls", [])
15950
+ text, text_filter_meta = self._sanitize_assistant_text_for_runtime(text, tool_calls)
15951
+ if bool(text_filter_meta.get("filtered", False)) and str(text_filter_meta.get("reason", "")) == "oversized_raw_toolcall":
15952
+ self._inject_toolcall_overflow_hint("manager")
15953
+ assistant = {"role": "assistant", "content": text, "ts": now_ts()}
15954
+ if tool_calls:
15955
+ assistant["tool_calls"] = [
15956
+ {
15957
+ "id": tc["id"],
15958
+ "type": "function",
15959
+ "function": {
15960
+ "name": tc["function"]["name"],
15961
+ "arguments": json_dumps(tc["function"]["arguments"]),
15962
+ },
15963
+ }
15964
+ for tc in tool_calls
15965
+ ]
15966
+ self.manager_context.append(assistant)
15967
+ self.manager_context = self.manager_context[-400:]
15968
+ route_only_tool_calls = False
15969
+ if isinstance(tool_calls, list) and tool_calls:
15970
+ tool_names = [
15971
+ str(tc.get("function", {}).get("name", "") or "").strip().lower()
15972
+ for tc in tool_calls
15973
+ if isinstance(tc, dict)
15974
+ ]
15975
+ if tool_names and all(name in {"route_to_next_agent", "routetonext_agent"} for name in tool_names):
15976
+ route_only_tool_calls = True
15977
+ emit_text = str(text or "").strip()
15978
+ if not emit_text and tool_calls and (not route_only_tool_calls):
15979
+ emit_text = f"[tool calls] {', '.join(str(tc.get('function', {}).get('name', '?')) for tc in tool_calls)}"
15980
+ if emit_text:
15981
+ manager_message = {
15982
+ "role": "assistant",
15983
+ "content": emit_text,
15984
+ "ts": assistant["ts"],
15985
+ "agent_role": "manager",
15986
+ }
15987
+ if "tool_calls" in assistant and (not route_only_tool_calls):
15988
+ manager_message["tool_calls"] = assistant["tool_calls"]
15989
+ self.messages.append(manager_message)
15990
+ self.messages = self.messages[-400:]
15991
+ elif "tool_calls" in assistant and (not route_only_tool_calls):
15992
+ manager_message = {
15993
+ "role": "assistant",
15994
+ "content": "",
15995
+ "ts": assistant["ts"],
15996
+ "agent_role": "manager",
15997
+ "tool_calls": assistant["tool_calls"],
15998
+ }
15999
+ self.messages.append(manager_message)
16000
+ self.messages = self.messages[-400:]
16001
+ if emit_text:
16002
+ self._emit(
16003
+ "message",
16004
+ {
16005
+ "role": "assistant",
16006
+ "agent_role": "manager",
16007
+ "text": emit_text,
16008
+ "summary": "Manager response",
16009
+ },
16010
+ )
14394
16011
  route = self._manager_route_from_response(text, tool_calls)
16012
+ if used_watchdog_executor:
16013
+ route["source"] = "watchdog-executor"
16014
+ route["reason"] = trim(
16015
+ (
16016
+ f"watchdog executor step {int(watchdog_meta.get('cursor', 0) or 0)}/"
16017
+ f"{int(watchdog_meta.get('total', 0) or 0)} "
16018
+ f"target={watchdog_meta.get('target', '?')} "
16019
+ f"trigger={watchdog_meta.get('trigger_reason', '')}"
16020
+ ),
16021
+ 600,
16022
+ )
16023
+ route["executor_mode"] = True
16024
+ route["is_mandatory"] = True
16025
+ if used_agentbus_fast:
16026
+ route["source"] = "agentbus-fast"
16027
+ route["reason"] = trim(
16028
+ (
16029
+ f"agentbus relay {fast_meta.get('from', '?')}->{fast_meta.get('to', '?')} "
16030
+ f"intent={fast_meta.get('intent', 'message')} id={fast_meta.get('env_id', '-')}"
16031
+ ),
16032
+ 600,
16033
+ )
14395
16034
  active_profile = self._ensure_blackboard_task_profile(board)
14396
16035
  target = str(route.get("target", "") or "").strip().lower()
14397
16036
  instruction = trim(str(route.get("instruction", "") or "").strip(), 1200)
@@ -14457,6 +16096,7 @@ class SessionState:
14457
16096
  "participants": list(participants),
14458
16097
  "assigned_expert": assigned_expert,
14459
16098
  "is_mandatory": bool(route.get("is_mandatory", False)),
16099
+ "executor_mode": bool(route.get("executor_mode", False)),
14460
16100
  "requires_user_confirmation": bool(route.get("requires_user_confirmation", False)),
14461
16101
  "round_budget": int(round_budget),
14462
16102
  "remaining_rounds": int(remaining_rounds),
@@ -14469,6 +16109,7 @@ class SessionState:
14469
16109
  profile["participants"] = list(participants)
14470
16110
  profile["assigned_expert"] = assigned_expert
14471
16111
  profile["is_mandatory"] = bool(route_row.get("is_mandatory", False))
16112
+ profile["executor_mode"] = bool(route_row.get("executor_mode", False))
14472
16113
  profile["requires_user_confirmation"] = bool(route_row.get("requires_user_confirmation", False))
14473
16114
  if task_type in TASK_PROFILE_TYPES:
14474
16115
  profile["task_type"] = task_type
@@ -14503,6 +16144,7 @@ class SessionState:
14503
16144
  "participants": list(participants),
14504
16145
  "assigned_expert": assigned_expert,
14505
16146
  "is_mandatory": bool(route_row.get("is_mandatory", False)),
16147
+ "executor_mode": bool(route_row.get("executor_mode", False)),
14506
16148
  "remaining_rounds": int(remaining_rounds),
14507
16149
  "updated_at": float(now_ts()),
14508
16150
  }
@@ -14574,10 +16216,40 @@ class SessionState:
14574
16216
  )
14575
16217
  return route_row
14576
16218
 
14577
- def _inject_manager_instruction(self, role: str, instruction: str, is_mandatory: bool = False):
16219
+ def _inject_manager_instruction(
16220
+ self,
16221
+ role: str,
16222
+ instruction: str,
16223
+ is_mandatory: bool = False,
16224
+ executor_mode: bool = False,
16225
+ ):
14578
16226
  role_key = self._sanitize_agent_role(role)
14579
16227
  if not role_key:
14580
16228
  return
16229
+ if bool(executor_mode):
16230
+ executor_seed = {
16231
+ "role": "system",
16232
+ "content": self._apply_agent_language_policy(
16233
+ (
16234
+ "Executor mode is enabled by watchdog. You are stateless for this step: "
16235
+ "ignore old conversational plans, execute only the delegated step, call concrete tools, "
16236
+ "and write verifiable evidence to blackboard."
16237
+ ),
16238
+ max_len=800,
16239
+ ),
16240
+ "ts": now_ts(),
16241
+ "agent_role": role_key,
16242
+ }
16243
+ self.contexts[role_key] = [executor_seed]
16244
+ self._emit(
16245
+ "status",
16246
+ {
16247
+ "summary": (
16248
+ f"executor hot-swap: reset {self._agent_display_name(role_key)} context "
16249
+ "for stateless execution"
16250
+ )
16251
+ },
16252
+ )
14581
16253
  instruction_with_policy = self._apply_agent_language_policy(
14582
16254
  trim(str(instruction or "").strip(), 1400),
14583
16255
  max_len=1400,
@@ -14597,14 +16269,30 @@ class SessionState:
14597
16269
  if bool(is_mandatory)
14598
16270
  else ""
14599
16271
  )
16272
+ executor_note = (
16273
+ (
16274
+ "STATELESS EXECUTOR: do not re-plan globally; "
16275
+ "complete only this delegated step and return concrete tool evidence."
16276
+ )
16277
+ if bool(executor_mode)
16278
+ else ""
16279
+ )
16280
+ collaboration_note = (
16281
+ "COLLABORATION PREFERENCE: if your current step needs another specialty, "
16282
+ "use ask_colleague immediately with explicit intent and concise payload; "
16283
+ "do not wait for another manager cycle."
16284
+ )
14600
16285
  board_md = self._blackboard_read_state_markdown(max_items=5)
14601
16286
  payload = (
14602
16287
  "<manager-delegate>\n"
14603
16288
  f"target={role_key}\n"
14604
16289
  f"is_mandatory={bool(is_mandatory)}\n"
16290
+ f"executor_mode={bool(executor_mode)}\n"
14605
16291
  f"instruction={instruction_text}\n"
14606
16292
  f"language_policy={language_note}\n"
14607
16293
  f"{mandatory_note}\n"
16294
+ f"{executor_note}\n"
16295
+ f"{collaboration_note}\n"
14608
16296
  "</manager-delegate>\n"
14609
16297
  "<blackboard-state>\n"
14610
16298
  f"{trim(board_md, 6000)}\n"
@@ -14655,7 +16343,9 @@ class SessionState:
14655
16343
  return
14656
16344
  name = str(item.get("name", "") or "").strip()
14657
16345
  args = item.get("args", {}) if isinstance(item.get("args"), dict) else {}
14658
- output = trim(str(item.get("output", "") or "").strip(), BLACKBOARD_MAX_TEXT)
16346
+ output_raw = trim(str(item.get("output", "") or "").strip(), BLACKBOARD_MAX_TEXT)
16347
+ output_clean, _ = filter_runtime_noise_lines(output_raw)
16348
+ output = trim(output_clean, BLACKBOARD_MAX_TEXT)
14659
16349
  ok = bool(item.get("ok", False))
14660
16350
  if name in {"write_file", "edit_file"}:
14661
16351
  rel_path = str(args.get("path", "") or "").strip()
@@ -14679,10 +16369,17 @@ class SessionState:
14679
16369
  if role_key == "explorer":
14680
16370
  self._blackboard_set_status("RESEARCHING")
14681
16371
  elif name in {"finish_task", "finish_current_task", "mark_done"} and ok:
16372
+ summary_arg = trim(str(args.get("summary", "") or "").strip(), BLACKBOARD_MAX_TEXT)
16373
+ if summary_arg:
16374
+ if role_key == "reviewer":
16375
+ self._blackboard_append_section("review_feedback", role_key, f"final_summary\n{summary_arg}")
16376
+ elif role_key == "explorer":
16377
+ self._blackboard_append_section("research_notes", role_key, f"final_summary\n{summary_arg}")
14682
16378
  if role_key == "reviewer":
14683
16379
  gate_ok, gate_reason = self._reviewer_approval_log_gate()
14684
16380
  if gate_ok:
14685
- self._blackboard_mark_approved(output or "finish tool acknowledged", role_key)
16381
+ approval_note = summary_arg or output or "finish tool acknowledged"
16382
+ self._blackboard_mark_approved(approval_note, role_key)
14686
16383
  else:
14687
16384
  self._blackboard_append_section(
14688
16385
  "review_feedback",
@@ -14694,7 +16391,8 @@ class SessionState:
14694
16391
  )
14695
16392
  self._emit("status", {"summary": f"reviewer finish blocked: {gate_reason}"})
14696
16393
  else:
14697
- self._blackboard_mark_approved(output or "finish tool acknowledged", role_key)
16394
+ approval_note = summary_arg or output or "finish tool acknowledged"
16395
+ self._blackboard_mark_approved(approval_note, role_key)
14698
16396
  if not ok and output:
14699
16397
  self._blackboard_append_section(
14700
16398
  "execution_logs",
@@ -14797,25 +16495,151 @@ class SessionState:
14797
16495
  policy_text = trim("\n".join(policy_lines).strip(), 1200)
14798
16496
  return clean_text, policy_text
14799
16497
 
14800
- def _reviewer_final_summary_ready(self, board: dict | None = None) -> bool:
14801
- def _text_good(text: str) -> bool:
14802
- clean = strip_thinking_content(str(text or "")).strip()
14803
- if not clean:
14804
- return False
14805
- if len(clean) >= 60:
14806
- return True
14807
- low = clean.lower()
14808
- tokens = (
14809
- "summary",
14810
- "final",
14811
- "结论",
14812
- "总结",
16498
+ def _final_summary_quality(self, text: str) -> dict:
16499
+ clean = strip_thinking_content(str(text or "")).strip()
16500
+ low = clean.lower()
16501
+ chars = len(clean)
16502
+ category_tokens = {
16503
+ "changes": (
16504
+ "changed",
16505
+ "changes",
16506
+ "change",
16507
+ "modified",
16508
+ "implemented",
16509
+ "implementation",
16510
+ "files",
16511
+ "diff",
16512
+ "patch",
16513
+ "改动",
16514
+ "变更",
16515
+ "修改",
16516
+ "实现",
16517
+ "文件",
16518
+ ),
16519
+ "validation": (
16520
+ "test",
16521
+ "tests",
16522
+ "pytest",
16523
+ "validation",
16524
+ "verified",
16525
+ "verify",
16526
+ "check",
16527
+ "checks",
16528
+ "evidence",
16529
+ "pass",
16530
+ "passed",
16531
+ "验证",
16532
+ "测试",
14813
16533
  "通过",
16534
+ "证据",
16535
+ "日志",
16536
+ ),
16537
+ "risks": (
16538
+ "risk",
16539
+ "risks",
16540
+ "residual",
16541
+ "next step",
16542
+ "next steps",
16543
+ "follow-up",
16544
+ "todo",
16545
+ "limitation",
16546
+ "known issue",
16547
+ "caveat",
14814
16548
  "风险",
16549
+ "残留",
16550
+ "后续",
16551
+ "下一步",
16552
+ "待办",
16553
+ "限制",
16554
+ "已知问题",
14815
16555
  "建议",
14816
- "完成",
14817
- )
14818
- return any(tok in low for tok in tokens)
16556
+ ),
16557
+ }
16558
+ hits: dict[str, bool] = {}
16559
+ for cat, words in category_tokens.items():
16560
+ matched = any(tok in low for tok in words)
16561
+ hits[cat] = bool(matched)
16562
+ covered = sum(1 for v in hits.values() if bool(v))
16563
+ ok = bool(chars >= FINAL_SUMMARY_MIN_CHARS and (covered >= 2 or (covered >= 1 and chars >= 220)))
16564
+ strict_ok = bool(chars >= FINAL_SUMMARY_STRICT_MIN_CHARS and covered >= 2)
16565
+ return {
16566
+ "clean": clean,
16567
+ "chars": int(chars),
16568
+ "covered": int(covered),
16569
+ "hits": hits,
16570
+ "ok": bool(ok),
16571
+ "strict_ok": bool(strict_ok),
16572
+ }
16573
+
16574
+ def _final_summary_sufficient(self, text: str, *, strict: bool = False) -> bool:
16575
+ verdict = self._final_summary_quality(text)
16576
+ return bool(verdict.get("strict_ok" if strict else "ok", False))
16577
+
16578
+ def _finish_requires_structured_summary(self, role: str, tool_name: str) -> bool:
16579
+ role_key = self._sanitize_agent_role(role)
16580
+ if tool_name not in {"finish_task", "finish_current_task", "mark_done"}:
16581
+ return False
16582
+ if not role_key:
16583
+ return False
16584
+ bb = self._ensure_blackboard()
16585
+ profile = self._ensure_blackboard_task_profile(bb)
16586
+ task_type = str(profile.get("task_type", "general") or "general")
16587
+ if task_type == "simple_qa":
16588
+ return False
16589
+ delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
16590
+ delegate_target = self._sanitize_agent_role(delegate.get("target", ""))
16591
+ delegate_reason = str(delegate.get("reason", "") or "").strip().lower()
16592
+ delegate_instruction = str(delegate.get("instruction", "") or "").strip().lower()
16593
+ summary_markers = (
16594
+ "summary",
16595
+ "wrap-up",
16596
+ "final report",
16597
+ "最终总结",
16598
+ "总结",
16599
+ "收尾",
16600
+ )
16601
+ if delegate_target == role_key and any(tok in delegate_reason or tok in delegate_instruction for tok in summary_markers):
16602
+ return True
16603
+ return bool(role_key == "reviewer" and self._is_multi_agent_mode())
16604
+
16605
+ def _recent_agent_used_tools(
16606
+ self,
16607
+ role: str,
16608
+ tool_names: set[str],
16609
+ *,
16610
+ lookback: int = 20,
16611
+ max_age_seconds: float = 300.0,
16612
+ ) -> bool:
16613
+ role_key = self._sanitize_agent_role(role)
16614
+ if not role_key or not tool_names:
16615
+ return False
16616
+ names = {str(x or "").strip() for x in tool_names if str(x or "").strip()}
16617
+ if not names:
16618
+ return False
16619
+ now_tick = now_ts()
16620
+ ctx = self._agent_context(role_key)
16621
+ rows = ctx[-max(1, int(lookback)) :] if isinstance(ctx, list) else []
16622
+ for row in reversed(rows):
16623
+ if not isinstance(row, dict):
16624
+ continue
16625
+ if str(row.get("role", "") or "").strip().lower() != "tool":
16626
+ continue
16627
+ name = str(row.get("name", "") or "").strip()
16628
+ if name not in names:
16629
+ continue
16630
+ try:
16631
+ ts = float(row.get("ts", 0.0) or 0.0)
16632
+ except Exception:
16633
+ ts = 0.0
16634
+ if ts <= 0.0:
16635
+ return True
16636
+ if (now_tick - ts) <= float(max_age_seconds):
16637
+ return True
16638
+ return False
16639
+
16640
+ def _reviewer_final_summary_ready(self, board: dict | None = None) -> bool:
16641
+ def _text_good(text: str) -> bool:
16642
+ return self._final_summary_sufficient(text, strict=True)
14819
16643
 
14820
16644
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
14821
16645
  approval = bb.get("approval", {}) if isinstance(bb.get("approval"), dict) else {}
@@ -15099,6 +16923,16 @@ class SessionState:
15099
16923
  "intent": envelope["intent"],
15100
16924
  },
15101
16925
  )
16926
+ self._blackboard_history(
16927
+ src,
16928
+ trim(
16929
+ (
16930
+ f"agentbus {src}->{dst} ({envelope['intent']}, id={envelope['id']}): "
16931
+ f"{trim(envelope['payload'], 320)}"
16932
+ ),
16933
+ 520,
16934
+ ),
16935
+ )
15102
16936
  return envelope
15103
16937
 
15104
16938
  def _agent_role_system_prompt(self, role: str) -> str:
@@ -15111,6 +16945,7 @@ class SessionState:
15111
16945
  f"Session absolute writable root is {self.files_root}. "
15112
16946
  "Use relative file paths (for example hello.txt); runtime maps them to session absolute paths. "
15113
16947
  "If '/workspace/...' appears, treat it as a virtual alias only; never create OS-level /workspace in shell. "
16948
+ f"{_detect_os_shell_instruction()} "
15114
16949
  "You must stay within your role boundary and use only provided tools. "
15115
16950
  "Use read_from_blackboard/write_to_blackboard to keep the shared state accurate. "
15116
16951
  "When communicating with other agents, use ask_colleague with structured intent/content. "
@@ -15123,6 +16958,7 @@ class SessionState:
15123
16958
  + "Role objective: analyze user goals, inspect codebase, and produce actionable research notes. "
15124
16959
  + "Prefer read/search/check commands; avoid direct large code modifications. "
15125
16960
  + "When new evidence appears, write concise research updates to blackboard and hand off actionable insights. "
16961
+ + "Proactively use ask_colleague when your findings can unblock developer/reviewer immediately. "
15126
16962
  + "If reviewer sends final_summary_request, produce final wrap-up summary from blackboard evidence and finish."
15127
16963
  )
15128
16964
  if role_key == "reviewer":
@@ -15130,14 +16966,19 @@ class SessionState:
15130
16966
  base
15131
16967
  + "Role objective: verify developer output against goal, run checks/tests, and issue pass/fix decisions. "
15132
16968
  + "If gaps remain, send fix_request to developer with concrete failure evidence and write review_feedback to blackboard. "
15133
- + "If task is complete, write approval evidence and hand off final summary to Explorer "
15134
- + "(via ask_colleague intent=final_summary_request) before ending the task."
16969
+ + "If manager requests final summary, first call read_from_blackboard "
16970
+ + "(sections: code_artifacts, execution_logs, review_feedback, status), then generate a structured summary "
16971
+ + "covering changes, validation evidence, and residual risks/next steps. "
16972
+ + "When finishing, pass this summary in finish_task.summary; empty or vague summary is invalid. "
16973
+ + "If you cannot produce summary from current evidence, hand off Explorer via ask_colleague "
16974
+ + "intent=final_summary_request with explicit missing evidence."
15135
16975
  )
15136
16976
  return (
15137
16977
  base
15138
16978
  + "Role objective: implement code changes based on explorer/reviewer inputs. "
15139
16979
  + "Perform concrete file edits and command execution. "
15140
16980
  + "Continuously record progress and blockers to blackboard. "
16981
+ + "When blocked or uncertain, immediately call ask_colleague to explorer/reviewer with focused intent. "
15141
16982
  + "When implementation batch is ready, send review_request to reviewer via ask_colleague."
15142
16983
  )
15143
16984
 
@@ -15958,6 +17799,37 @@ class SessionState:
15958
17799
  return self._todo_write_rescue(args)
15959
17800
  if name in {"finish_task", "finish_current_task", "mark_done"}:
15960
17801
  summary = trim(str(args.get("summary", "") or "").strip(), 400)
17802
+ if role_key == "explorer":
17803
+ bb = self._ensure_blackboard()
17804
+ delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
17805
+ delegate_target = self._sanitize_agent_role(delegate.get("target", ""))
17806
+ delegate_reason = str(delegate.get("reason", "") or "").strip().lower()
17807
+ if delegate_target == "explorer" and "summary-handoff" in delegate_reason:
17808
+ return (
17809
+ "Error: explorer summary handoff step must not call finish tool. "
17810
+ "Write structured summary to blackboard first, then wait for manager close."
17811
+ )
17812
+ if self._finish_requires_structured_summary(role_key, name):
17813
+ if role_key == "reviewer" and not self._recent_agent_used_tools(
17814
+ role_key,
17815
+ {"read_from_blackboard"},
17816
+ lookback=24,
17817
+ max_age_seconds=420.0,
17818
+ ):
17819
+ return (
17820
+ "Error: reviewer finalization requires blackboard evidence read. "
17821
+ "Call read_from_blackboard first (sections: code_artifacts, execution_logs, "
17822
+ "review_feedback, status), then call finish_task with structured summary."
17823
+ )
17824
+ if not self._final_summary_sufficient(summary, strict=True):
17825
+ return (
17826
+ "Error: structured final summary is required before finish. "
17827
+ "Provide finish_task.summary with: "
17828
+ "(1) changes/files touched, "
17829
+ "(2) validation evidence (tests/commands/results), "
17830
+ "(3) residual risks or next steps. "
17831
+ "If evidence is missing, read_from_blackboard first or ask Explorer for final_summary_request."
17832
+ )
15961
17833
  if name == "finish_task":
15962
17834
  todo_mark = self.todo.complete_all_open(summary)
15963
17835
  else:
@@ -16036,6 +17908,7 @@ class SessionState:
16036
17908
  if guard_error:
16037
17909
  return guard_error
16038
17910
  out = self.bg.run(args["command"], int(args.get("timeout", 120)))
17911
+ out_filtered, _ = filter_runtime_noise_lines(str(out or ""))
16039
17912
  self._emit(
16040
17913
  "command",
16041
17914
  {
@@ -16045,7 +17918,7 @@ class SessionState:
16045
17918
  "summary": f"background_run: {args['command'][:80]}",
16046
17919
  },
16047
17920
  )
16048
- return out
17921
+ return trim(out_filtered or "(no output)")
16049
17922
  if name == "check_background":
16050
17923
  return self.bg.check(args.get("task_id"))
16051
17924
  if name == "task_create":
@@ -16087,13 +17960,30 @@ class SessionState:
16087
17960
  if section == "original_goal":
16088
17961
  return trim(str(board.get("original_goal", "") or "").strip(), 4000) or "(empty)"
16089
17962
  if section == "status":
17963
+ wd = board.get("watchdog", {}) if isinstance(board.get("watchdog"), dict) else {}
17964
+ dq = board.get("decomposition_queue", {}) if isinstance(board.get("decomposition_queue"), dict) else {}
16090
17965
  return json_dumps(
16091
17966
  {
16092
17967
  "status": board.get("status", "INITIALIZING"),
16093
17968
  "active_agent": board.get("active_agent", ""),
16094
17969
  "manager_cycles": int(board.get("manager_cycles", 0) or 0),
17970
+ "manager_summary_attempts": int(board.get("manager_summary_attempts", 0) or 0),
16095
17971
  "approval": board.get("approval", {}),
16096
17972
  "last_delegate": board.get("last_delegate", {}),
17973
+ "watchdog": {
17974
+ "intent_no_tool_streak": int(wd.get("intent_no_tool_streak", 0) or 0),
17975
+ "repeat_no_tool_streak": int(wd.get("repeat_no_tool_streak", 0) or 0),
17976
+ "state_unchanged_streak": int(wd.get("state_unchanged_streak", 0) or 0),
17977
+ "trigger_count": int(wd.get("trigger_count", 0) or 0),
17978
+ "last_trigger_reason": trim(str(wd.get("last_trigger_reason", "") or "").strip(), 160),
17979
+ },
17980
+ "decomposition_queue": {
17981
+ "active": bool(dq.get("active", False)),
17982
+ "trigger_reason": trim(str(dq.get("trigger_reason", "") or "").strip(), 160),
17983
+ "cursor": int(dq.get("cursor", 0) or 0),
17984
+ "total": len(dq.get("steps", []) or []),
17985
+ "last_error": trim(str(dq.get("last_error", "") or "").strip(), 220),
17986
+ },
16097
17987
  },
16098
17988
  indent=2,
16099
17989
  )
@@ -16559,6 +18449,15 @@ class SessionState:
16559
18449
  output = self._dispatch_tool(name, args, agent_role=role_key)
16560
18450
  except Exception as exc:
16561
18451
  output = f"Error: {exc}"
18452
+ raw_output = str(output or "")
18453
+ filtered_output, filtered_rows = filter_runtime_noise_lines(raw_output)
18454
+ if filtered_rows > 0:
18455
+ if filtered_output:
18456
+ output = filtered_output
18457
+ elif raw_output.startswith("Error:"):
18458
+ output = "Error: runtime socket noise filtered"
18459
+ else:
18460
+ output = "(no output)"
16562
18461
  self._append_agent_context_message(
16563
18462
  role_key,
16564
18463
  {
@@ -16775,6 +18674,7 @@ class SessionState:
16775
18674
  if self.cancel_requested:
16776
18675
  self._emit("status", {"summary": "run interrupted"})
16777
18676
  break
18677
+ self._apply_auto_compact_if_needed("auto:multi-sync")
16778
18678
  with self.lock:
16779
18679
  self.agent_round_index = int(self.agent_round_index) + 1
16780
18680
  self.current_phase = "manager:dispatch"
@@ -16817,6 +18717,7 @@ class SessionState:
16817
18717
  role,
16818
18718
  instruction,
16819
18719
  is_mandatory=bool(route.get("is_mandatory", False)),
18720
+ executor_mode=bool(route.get("executor_mode", False)),
16820
18721
  )
16821
18722
  if role == "explorer":
16822
18723
  self._blackboard_set_status("RESEARCHING")
@@ -16830,13 +18731,26 @@ class SessionState:
16830
18731
  media_inputs_pool=media_inputs_pool,
16831
18732
  media_seen_ts_by_role=media_seen_ts_by_role,
16832
18733
  )
18734
+ board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
16833
18735
  step = self._multi_agent_turn(
16834
18736
  role,
16835
18737
  pinned_selection=pinned_selection,
16836
18738
  media_inputs_round=role_media_inputs,
16837
18739
  )
16838
18740
  self._blackboard_update_from_worker_step(role, step)
18741
+ board_after = self._ensure_blackboard()
18742
+ board_after_fp = self._watchdog_state_fingerprint(board_after)
18743
+ wd_event = self._watchdog_process_worker_step(
18744
+ board_after,
18745
+ role=role,
18746
+ step=step if isinstance(step, dict) else {},
18747
+ state_changed=bool(board_after_fp != board_before_fp),
18748
+ pinned_selection=pinned_selection,
18749
+ )
16839
18750
  status = str(step.get("status", "") or "")
18751
+ if bool(wd_event.get("triggered", False)):
18752
+ idle_counts[role] = 0
18753
+ continue
16840
18754
  if status == "interrupted":
16841
18755
  break
16842
18756
  if status == "skip":
@@ -16845,7 +18759,7 @@ class SessionState:
16845
18759
  idle_counts[role] = 0
16846
18760
  if bool(step.get("stop_due_to_finish", False)):
16847
18761
  note = f"{self._agent_display_name(role)} signaled finish via tool."
16848
- self._blackboard_mark_approved(note, role)
18762
+ # Approval note should come from finish tool payload sync; avoid overwriting with generic text here.
16849
18763
  can_finish_now, finish_gate_reason = self._can_auto_finish_from_approval(
16850
18764
  self._ensure_blackboard(),
16851
18765
  latest_user_ts=self._latest_user_message_ts(),
@@ -16857,7 +18771,7 @@ class SessionState:
16857
18771
  {
16858
18772
  "summary": (
16859
18773
  "reviewer finish deferred: final summary missing; "
16860
- "handoff to explorer via agentbus and continue"
18774
+ "manager will reroute to explorer summary synthesis"
16861
18775
  )
16862
18776
  },
16863
18777
  )
@@ -16961,6 +18875,7 @@ class SessionState:
16961
18875
  if self.cancel_requested:
16962
18876
  self._emit("status", {"summary": "run interrupted"})
16963
18877
  break
18878
+ self._apply_auto_compact_if_needed("auto:multi-seq")
16964
18879
  with self.lock:
16965
18880
  self.agent_round_index = int(self.agent_round_index) + 1
16966
18881
  latest_user_ts = self._latest_user_message_ts()
@@ -16972,6 +18887,28 @@ class SessionState:
16972
18887
  media_inputs=media_inputs_pool,
16973
18888
  roles=role_order,
16974
18889
  )
18890
+ dq = self._normalize_decomposition_queue_state(
18891
+ self._ensure_blackboard().get("decomposition_queue", {})
18892
+ )
18893
+ if bool(dq.get("active", False)):
18894
+ queue_exec = self._watchdog_execute_queue_step(
18895
+ pinned_selection=pinned_selection,
18896
+ )
18897
+ if bool(queue_exec.get("interrupted", False)):
18898
+ break
18899
+ if bool(queue_exec.get("stop_run", False)):
18900
+ self._emit("status", {"summary": "watchdog executor completed task; run paused"})
18901
+ break
18902
+ if not bool(queue_exec.get("executed", False)):
18903
+ if bool(queue_exec.get("queue_active", False)):
18904
+ self._emit(
18905
+ "status",
18906
+ {"summary": "watchdog queue active but no executable step; pausing to avoid deadlock"},
18907
+ )
18908
+ break
18909
+ continue
18910
+ idle_counts[str(queue_exec.get("role", "") or "developer")] = 0
18911
+ continue
16975
18912
  role = current_role if mode == EXECUTION_MODE_SEQUENTIAL else role_order[sync_index % len(role_order)]
16976
18913
  role_media_inputs = self._resolve_role_multimodal_payload(
16977
18914
  role=role,
@@ -16979,12 +18916,27 @@ class SessionState:
16979
18916
  media_inputs_pool=media_inputs_pool,
16980
18917
  media_seen_ts_by_role=media_seen_ts_by_role,
16981
18918
  )
18919
+ board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
16982
18920
  step = self._multi_agent_turn(
16983
18921
  role,
16984
18922
  pinned_selection=pinned_selection,
16985
18923
  media_inputs_round=role_media_inputs,
16986
18924
  )
16987
- status = str(step.get("status", "") or "")
18925
+ safe_step = step if isinstance(step, dict) else {}
18926
+ self._blackboard_update_from_worker_step(role, safe_step)
18927
+ board_after = self._ensure_blackboard()
18928
+ board_after_fp = self._watchdog_state_fingerprint(board_after)
18929
+ wd_event = self._watchdog_process_worker_step(
18930
+ board_after,
18931
+ role=role,
18932
+ step=safe_step,
18933
+ state_changed=bool(board_after_fp != board_before_fp),
18934
+ pinned_selection=pinned_selection,
18935
+ )
18936
+ if bool(wd_event.get("triggered", False)):
18937
+ idle_counts[role] = 0
18938
+ continue
18939
+ status = str(safe_step.get("status", "") or "")
16988
18940
  if status == "interrupted":
16989
18941
  break
16990
18942
  if status == "skip":
@@ -16998,7 +18950,7 @@ class SessionState:
16998
18950
  continue
16999
18951
  if status == "tools":
17000
18952
  idle_counts[role] = 0
17001
- if bool(step.get("stop_due_to_finish", False)):
18953
+ if bool(safe_step.get("stop_due_to_finish", False)):
17002
18954
  self._emit("status", {"summary": "finish tool called; run paused and awaiting user instruction"})
17003
18955
  break
17004
18956
  if mode == EXECUTION_MODE_SEQUENTIAL:
@@ -17016,7 +18968,7 @@ class SessionState:
17016
18968
  idle_counts[role] = int(idle_counts.get(role, 0) or 0) + 1
17017
18969
  should_stop, next_role = self._multi_agent_no_tool_transition(
17018
18970
  role,
17019
- str(step.get("text", "") or ""),
18971
+ str(safe_step.get("text", "") or ""),
17020
18972
  mode=mode,
17021
18973
  idle_counts=idle_counts,
17022
18974
  )
@@ -17163,9 +19115,7 @@ class SessionState:
17163
19115
  },
17164
19116
  )
17165
19117
  break
17166
- self._microcompact()
17167
- if self._estimate_tokens() > self.context_token_upper_bound:
17168
- self._auto_compact("auto")
19118
+ self._apply_auto_compact_if_needed("auto")
17169
19119
  notifs = self.bg.drain()
17170
19120
  if notifs:
17171
19121
  text = "\n".join(f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs)
@@ -17197,6 +19147,32 @@ class SessionState:
17197
19147
  self._seed_multi_agent_contexts_if_needed(self.runtime_reclassify_goal or "")
17198
19148
  self._multi_agent_worker(pinned_selection=pinned_selection)
17199
19149
  return
19150
+ dq = self._normalize_decomposition_queue_state(
19151
+ self._ensure_blackboard().get("decomposition_queue", {})
19152
+ )
19153
+ if bool(dq.get("active", False)):
19154
+ queue_exec = self._watchdog_execute_queue_step(
19155
+ pinned_selection=pinned_selection,
19156
+ )
19157
+ if bool(queue_exec.get("interrupted", False)):
19158
+ self._emit("status", {"summary": "run interrupted"})
19159
+ break
19160
+ if bool(queue_exec.get("stop_run", False)):
19161
+ self._emit("status", {"summary": "watchdog executor completed task; run paused"})
19162
+ break
19163
+ if not bool(queue_exec.get("executed", False)):
19164
+ if bool(queue_exec.get("queue_active", False)):
19165
+ self._emit(
19166
+ "status",
19167
+ {"summary": "watchdog queue active but no executable step; pausing to avoid deadlock"},
19168
+ )
19169
+ break
19170
+ continue
19171
+ no_tool_rounds = 0
19172
+ arbiter_planning_rounds = 0
19173
+ fault_counter = 0
19174
+ last_fault_reason = ""
19175
+ continue
17200
19176
  latest_user_ts = self._latest_user_message_ts()
17201
19177
  media_inputs_round = None
17202
19178
  if latest_user_ts > media_last_user_ts:
@@ -17392,6 +19368,32 @@ class SessionState:
17392
19368
  arbiter_planning_rounds = 0
17393
19369
  self._emit("status", {"summary": "waiting for user input: assistant asked for a decision"})
17394
19370
  break
19371
+ wd_event = self._watchdog_process_worker_step(
19372
+ self._ensure_blackboard(),
19373
+ role=single_role,
19374
+ step={
19375
+ "status": "no-tools",
19376
+ "text": decision_probe,
19377
+ "tool_results": [],
19378
+ },
19379
+ state_changed=False,
19380
+ pinned_selection=pinned_selection,
19381
+ )
19382
+ if bool(wd_event.get("triggered", False)):
19383
+ no_tool_rounds = 0
19384
+ arbiter_planning_rounds = 0
19385
+ fault_counter = 0
19386
+ last_fault_reason = ""
19387
+ self._emit(
19388
+ "status",
19389
+ {
19390
+ "summary": (
19391
+ "watchdog triggered in single-agent planner mode; "
19392
+ "switching to stateless executor queue"
19393
+ )
19394
+ },
19395
+ )
19396
+ continue
17395
19397
  clean_decision_probe = strip_thinking_content(decision_probe).strip()
17396
19398
  if bool(self.arbiter_enabled) and len(clean_decision_probe) >= int(ARBITER_TRIGGER_MIN_CONTENT_CHARS):
17397
19399
  arbiter_decision = self._call_arbiter_llm(clean_decision_probe, thinking_text)
@@ -17645,6 +19647,8 @@ class SessionState:
17645
19647
  stop_due_to_finish_task = False
17646
19648
  hard_break_reason = ""
17647
19649
  interrupted_in_tools = False
19650
+ single_round_tool_results: list[dict] = []
19651
+ single_watchdog_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
17648
19652
  round_tool_fp = self._tool_calls_fingerprint(tool_calls)
17649
19653
  for tc in tool_calls:
17650
19654
  if self.cancel_requested:
@@ -17800,6 +19804,15 @@ class SessionState:
17800
19804
  output = self._dispatch_tool(name, args)
17801
19805
  except Exception as exc:
17802
19806
  output = f"Error: {exc}"
19807
+ raw_output = str(output or "")
19808
+ filtered_output, filtered_rows = filter_runtime_noise_lines(raw_output)
19809
+ if filtered_rows > 0:
19810
+ if filtered_output:
19811
+ output = filtered_output
19812
+ elif raw_output.startswith("Error:"):
19813
+ output = "Error: runtime socket noise filtered"
19814
+ else:
19815
+ output = "(no output)"
17803
19816
  tool_key = str(dispatched_name or name).strip() or str(name or "").strip() or "unknown-tool"
17804
19817
  if str(output).startswith("Error"):
17805
19818
  round_error_count += 1
@@ -17827,6 +19840,14 @@ class SessionState:
17827
19840
  if dispatched_name in {"finish_task", "finish_current_task", "mark_done"}:
17828
19841
  stop_due_to_finish_task = True
17829
19842
  self.messages.append({"role": "tool", "tool_call_id": tc["id"], "name": name, "content": trim(output), "ts": now_ts()})
19843
+ single_round_tool_results.append(
19844
+ {
19845
+ "name": dispatched_name or name,
19846
+ "args": args if isinstance(args, dict) else {},
19847
+ "output": trim(str(output or ""), 3000),
19848
+ "ok": not str(output).startswith("Error:"),
19849
+ }
19850
+ )
17830
19851
  self._emit("tool_result", {"name": name, "result": trim(output, 500), "summary": f"tool done: {name}"})
17831
19852
  if int(tool_error_streaks.get(tool_key, 0) or 0) >= HARD_BREAK_TOOL_ERROR_THRESHOLD:
17832
19853
  stop_due_to_hard_break = True
@@ -17855,6 +19876,18 @@ class SessionState:
17855
19876
  self.current_phase = "post-tools"
17856
19877
  if interrupted_in_tools:
17857
19878
  break
19879
+ single_watchdog_after_board = self._ensure_blackboard()
19880
+ single_watchdog_after_fp = self._watchdog_state_fingerprint(single_watchdog_after_board)
19881
+ self._watchdog_process_worker_step(
19882
+ single_watchdog_after_board,
19883
+ role=single_role,
19884
+ step={
19885
+ "status": "tools",
19886
+ "tool_results": single_round_tool_results,
19887
+ },
19888
+ state_changed=bool(single_watchdog_after_fp != single_watchdog_before_fp),
19889
+ pinned_selection=pinned_selection,
19890
+ )
17858
19891
  if stop_due_to_hard_break:
17859
19892
  note = (
17860
19893
  "Execution paused after repeated tool/recovery failures. "
@@ -19242,7 +21275,6 @@ window.MathJax={
19242
21275
  }
19243
21276
  };
19244
21277
  </script>
19245
- <script defer src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
19246
21278
  </head>
19247
21279
  <body>
19248
21280
  <div class="bg-layer"></div>
@@ -19497,7 +21529,7 @@ main{display:grid;grid-template-columns:minmax(220px,260px) minmax(520px,920px)
19497
21529
  .upload-list{margin-top:6px;border:1px solid var(--line);border-radius:10px;background:#fff;max-height:88px;overflow:auto;padding:6px}
19498
21530
  .row{display:flex;gap:8px;margin-top:8px;flex-wrap:wrap}
19499
21531
  .ctx-live{margin-left:auto;display:flex;align-items:center;gap:8px;padding:8px 10px;border:1px solid #d6deea;border-radius:999px;background:#f8fbff;min-width:250px}
19500
- .ctx-live-dot{width:8px;height:8px;border-radius:50%;background:#13b8a6;box-shadow:0 0 0 rgba(19,184,166,.45);animation:ctxPulse 1.6s ease-in-out infinite}
21532
+ .ctx-live-dot{width:8px;height:8px;border-radius:50%;background:#13b8a6;box-shadow:0 0 0 rgba(19,184,166,.45)}
19501
21533
  .ctx-live-bar{position:relative;display:inline-block;width:84px;height:6px;border-radius:999px;background:#e5edf8;overflow:hidden}
19502
21534
  .ctx-live-fill{display:block;height:100%;width:0%;background:linear-gradient(90deg,#13b8a6,#1f6feb);transition:width .24s ease,background .24s ease}
19503
21535
  .ctx-live.warn .ctx-live-dot{background:#e1a400}
@@ -19588,10 +21620,10 @@ APP_JS = """const S={sessions:[],activeId:null,snap:null,es:null,esId:'',skills:
19588
21620
  const MD_CACHE=new Map();
19589
21621
  const MD_CACHE_MAX=420;
19590
21622
  const STATIC_UI=((new URLSearchParams(location.search)).get('static_ui')==='1');
19591
- const SNAPSHOT_DELAY_VISIBLE_MS=120;
19592
- const SNAPSHOT_DELAY_HIDDEN_MS=1200;
19593
- const SESSION_POLL_VISIBLE_MS=12000;
19594
- const SESSION_POLL_HIDDEN_MS=30000;
21623
+ const SNAPSHOT_DELAY_VISIBLE_MS=300;
21624
+ const SNAPSHOT_DELAY_HIDDEN_MS=2400;
21625
+ const SESSION_POLL_VISIBLE_MS=30000;
21626
+ const SESSION_POLL_HIDDEN_MS=60000;
19595
21627
  const PANEL_SCROLL_ACTIVE_MS=1100;
19596
21628
  const CHAT_SCROLL_ACTIVE_MS=420;
19597
21629
  const CHAT_SCROLL_LOCK_MS=1200;
@@ -19608,10 +21640,10 @@ const DELTA_MAX_OPERATIONS=220;
19608
21640
  const DELTA_MAX_UPLOADS=40;
19609
21641
  const DELTA_WATCHDOG_INTERVAL_MS=1800;
19610
21642
  const DELTA_WATCHDOG_STALL_MS=9000;
19611
- const MARKDOWN_WORKER_MIN_CHARS=2200;
21643
+ const MARKDOWN_WORKER_MIN_CHARS=800;
19612
21644
  const MARKDOWN_WORKER_MAX_PENDING=96;
19613
21645
  const MARKDOWN_WORKER_REQ_TTL_MS=45000;
19614
- const CHAT_VIRT={heights:Object.create(null),heightVersion:0,avgHeight:140,overscanPx:900,maxCacheKeys:2800,poolByKind:Object.create(null),poolSize:0,poolMax:420};
21646
+ const CHAT_VIRT={heights:Object.create(null),heightVersion:0,avgHeight:140,overscanPx:400,maxCacheKeys:1200,poolByKind:Object.create(null),poolSize:0,poolMax:180};
19615
21647
  const RENDER_EVT_TYPES=new Set(['render_frame','render_bridge']);
19616
21648
  const RENDER_QUEUE_MAX=140;
19617
21649
  const RENDER_META_MIN_INTERVAL_MS=180;
@@ -20190,7 +22222,15 @@ function _mathRunTypeset(root,key=''){
20190
22222
  const run=(retry)=>{
20191
22223
  const mj=window.MathJax;
20192
22224
  if(!mj||typeof mj.typesetPromise!=='function'){
20193
- if(retry<10)setTimeout(()=>run(retry+1),180);
22225
+ // Lazy-load MathJax on first actual math demand
22226
+ if(!window._mjaxLoading){
22227
+ window._mjaxLoading=true;
22228
+ const s=document.createElement('script');
22229
+ s.src='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
22230
+ s.async=true;
22231
+ document.head.appendChild(s);
22232
+ }
22233
+ if(retry<20)setTimeout(()=>run(retry+1),200);
20194
22234
  return;
20195
22235
  }
20196
22236
  if(root._mathPending)return;
@@ -23630,16 +25670,35 @@ Use this skill when tasks match this flow pattern and reusable execution is need
23630
25670
  return self.model_catalog()
23631
25671
 
23632
25672
  class AgentHTTPServer(ThreadingHTTPServer):
25673
+ daemon_threads = True
25674
+ block_on_close = False
25675
+
23633
25676
  def __init__(self, addr: tuple[str, int], handler, app: AppContext):
23634
25677
  super().__init__(addr, handler)
23635
25678
  self.app = app
23636
25679
 
23637
25680
  def handle_error(self, request, client_address):
23638
25681
  _, exc, _ = sys.exc_info()
23639
- if isinstance(exc, (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError)):
25682
+ if swallow_benign_socket_error(exc, "agent-http.handle_error"):
23640
25683
  return
23641
25684
  return super().handle_error(request, client_address)
23642
25685
 
25686
+ def shutdown_request(self, request):
25687
+ try:
25688
+ super().shutdown_request(request)
25689
+ except OSError as exc:
25690
+ if swallow_benign_socket_error(exc, "agent-http.shutdown_request"):
25691
+ return
25692
+ raise
25693
+
25694
+ def close_request(self, request):
25695
+ try:
25696
+ super().close_request(request)
25697
+ except OSError as exc:
25698
+ if swallow_benign_socket_error(exc, "agent-http.close_request"):
25699
+ return
25700
+ raise
25701
+
23643
25702
  class Handler(BaseHTTPRequestHandler):
23644
25703
  protocol_version = "HTTP/1.1"
23645
25704
  server_version = f"StandaloneWebAgent/{APP_VERSION}"
@@ -23650,8 +25709,10 @@ class Handler(BaseHTTPRequestHandler):
23650
25709
  def handle(self):
23651
25710
  try:
23652
25711
  super().handle()
23653
- except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError):
23654
- return
25712
+ except Exception as exc:
25713
+ if swallow_benign_socket_error(exc, "handler.handle"):
25714
+ return
25715
+ raise
23655
25716
 
23656
25717
  @property
23657
25718
  def app(self) -> AppContext:
@@ -23678,48 +25739,70 @@ class Handler(BaseHTTPRequestHandler):
23678
25739
 
23679
25740
  def _send_json(self, obj: object, status: int = 200):
23680
25741
  body = json_dumps(obj).encode("utf-8")
23681
- self.send_response(status)
23682
- self.send_header("Content-Type", "application/json; charset=utf-8")
23683
- self.send_header("Content-Length", str(len(body)))
23684
- self.send_header("Cache-Control", "no-store")
23685
- self.end_headers()
23686
- self.wfile.write(body)
25742
+ try:
25743
+ self.send_response(status)
25744
+ self.send_header("Content-Type", "application/json; charset=utf-8")
25745
+ self.send_header("Content-Length", str(len(body)))
25746
+ self.send_header("Cache-Control", "no-store")
25747
+ self.end_headers()
25748
+ self.wfile.write(body)
25749
+ except Exception as exc:
25750
+ if swallow_benign_socket_error(exc, "handler.send_json"):
25751
+ return
25752
+ raise
23687
25753
 
23688
25754
  def _send_text(self, text: str, content_type: str = "text/plain; charset=utf-8", status: int = 200):
23689
25755
  body = text.encode("utf-8")
23690
- self.send_response(status)
23691
- self.send_header("Content-Type", content_type)
23692
- self.send_header("Content-Length", str(len(body)))
23693
- self.send_header("Cache-Control", "no-store")
23694
- self.send_header("Pragma", "no-cache")
23695
- self.send_header("Expires", "0")
23696
- self.end_headers()
23697
- self.wfile.write(body)
25756
+ try:
25757
+ self.send_response(status)
25758
+ self.send_header("Content-Type", content_type)
25759
+ self.send_header("Content-Length", str(len(body)))
25760
+ self.send_header("Cache-Control", "no-store")
25761
+ self.send_header("Pragma", "no-cache")
25762
+ self.send_header("Expires", "0")
25763
+ self.end_headers()
25764
+ self.wfile.write(body)
25765
+ except Exception as exc:
25766
+ if swallow_benign_socket_error(exc, "handler.send_text"):
25767
+ return
25768
+ raise
23698
25769
 
23699
25770
  def _send_bytes(self, data: bytes, content_type: str, filename: str):
23700
- self.send_response(200)
23701
- self.send_header("Content-Type", content_type)
23702
- self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
23703
- self.send_header("Content-Length", str(len(data)))
23704
- self.end_headers()
23705
- self.wfile.write(data)
25771
+ try:
25772
+ self.send_response(200)
25773
+ self.send_header("Content-Type", content_type)
25774
+ self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
25775
+ self.send_header("Content-Length", str(len(data)))
25776
+ self.end_headers()
25777
+ self.wfile.write(data)
25778
+ except Exception as exc:
25779
+ if swallow_benign_socket_error(exc, "handler.send_bytes"):
25780
+ return
25781
+ raise
23706
25782
 
23707
25783
  def _send_inline_bytes(self, data: bytes, content_type: str, status: int = 200):
23708
- self.send_response(status)
23709
- self.send_header("Content-Type", content_type)
23710
- self.send_header("Content-Length", str(len(data)))
23711
- self.send_header("Content-Disposition", "inline")
23712
- self.send_header("Cache-Control", "no-store")
23713
- self.end_headers()
23714
- self.wfile.write(data)
25784
+ try:
25785
+ self.send_response(status)
25786
+ self.send_header("Content-Type", content_type)
25787
+ self.send_header("Content-Length", str(len(data)))
25788
+ self.send_header("Content-Disposition", "inline")
25789
+ self.send_header("Cache-Control", "no-store")
25790
+ self.end_headers()
25791
+ self.wfile.write(data)
25792
+ except Exception as exc:
25793
+ if swallow_benign_socket_error(exc, "handler.send_inline_bytes"):
25794
+ return
25795
+ raise
23715
25796
 
23716
25797
  def _sse_write(self, payload: bytes) -> bool:
23717
25798
  try:
23718
25799
  self.wfile.write(payload)
23719
25800
  self.wfile.flush()
23720
25801
  return True
23721
- except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError, TimeoutError, OSError):
23722
- return False
25802
+ except Exception as exc:
25803
+ if swallow_benign_socket_error(exc, "handler.sse_write"):
25804
+ return False
25805
+ raise
23723
25806
 
23724
25807
  def do_GET(self):
23725
25808
  parsed_url = urlparse(self.path)
@@ -24137,12 +26220,17 @@ class Handler(BaseHTTPRequestHandler):
24137
26220
  return self._send_json({"ok": True})
24138
26221
 
24139
26222
  def _stream_events(self, sess: SessionState):
24140
- self.send_response(HTTPStatus.OK)
24141
- self.send_header("Content-Type", "text/event-stream; charset=utf-8")
24142
- self.send_header("Cache-Control", "no-cache")
24143
- self.send_header("Connection", "keep-alive")
24144
- self.send_header("X-Accel-Buffering", "no")
24145
- self.end_headers()
26223
+ try:
26224
+ self.send_response(HTTPStatus.OK)
26225
+ self.send_header("Content-Type", "text/event-stream; charset=utf-8")
26226
+ self.send_header("Cache-Control", "no-cache")
26227
+ self.send_header("Connection", "keep-alive")
26228
+ self.send_header("X-Accel-Buffering", "no")
26229
+ self.end_headers()
26230
+ except Exception as exc:
26231
+ if swallow_benign_socket_error(exc, "handler.stream_events.headers"):
26232
+ return
26233
+ raise
24146
26234
  sub = sess.events.subscribe()
24147
26235
  try:
24148
26236
  hello = (
@@ -24162,8 +26250,9 @@ class Handler(BaseHTTPRequestHandler):
24162
26250
  chunk = f": ping {int(now_ts())}\n\n".encode("utf-8")
24163
26251
  if not self._sse_write(chunk):
24164
26252
  break
24165
- except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
24166
- pass
26253
+ except Exception as exc:
26254
+ if not swallow_benign_socket_error(exc, "handler.stream_events.loop"):
26255
+ raise
24167
26256
  finally:
24168
26257
  sess.events.unsubscribe(sub)
24169
26258
 
@@ -24177,8 +26266,10 @@ class SkillsHandler(BaseHTTPRequestHandler):
24177
26266
  def handle(self):
24178
26267
  try:
24179
26268
  super().handle()
24180
- except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError):
24181
- return
26269
+ except Exception as exc:
26270
+ if swallow_benign_socket_error(exc, "skills-handler.handle"):
26271
+ return
26272
+ raise
24182
26273
 
24183
26274
  @property
24184
26275
  def app(self) -> AppContext:
@@ -24205,23 +26296,33 @@ class SkillsHandler(BaseHTTPRequestHandler):
24205
26296
 
24206
26297
  def _send_json(self, obj: object, status: int = 200):
24207
26298
  body = json_dumps(obj).encode("utf-8")
24208
- self.send_response(status)
24209
- self.send_header("Content-Type", "application/json; charset=utf-8")
24210
- self.send_header("Content-Length", str(len(body)))
24211
- self.send_header("Cache-Control", "no-store")
24212
- self.end_headers()
24213
- self.wfile.write(body)
26299
+ try:
26300
+ self.send_response(status)
26301
+ self.send_header("Content-Type", "application/json; charset=utf-8")
26302
+ self.send_header("Content-Length", str(len(body)))
26303
+ self.send_header("Cache-Control", "no-store")
26304
+ self.end_headers()
26305
+ self.wfile.write(body)
26306
+ except Exception as exc:
26307
+ if swallow_benign_socket_error(exc, "skills-handler.send_json"):
26308
+ return
26309
+ raise
24214
26310
 
24215
26311
  def _send_text(self, text: str, content_type: str = "text/plain; charset=utf-8", status: int = 200):
24216
26312
  body = text.encode("utf-8")
24217
- self.send_response(status)
24218
- self.send_header("Content-Type", content_type)
24219
- self.send_header("Content-Length", str(len(body)))
24220
- self.send_header("Cache-Control", "no-store")
24221
- self.send_header("Pragma", "no-cache")
24222
- self.send_header("Expires", "0")
24223
- self.end_headers()
24224
- self.wfile.write(body)
26313
+ try:
26314
+ self.send_response(status)
26315
+ self.send_header("Content-Type", content_type)
26316
+ self.send_header("Content-Length", str(len(body)))
26317
+ self.send_header("Cache-Control", "no-store")
26318
+ self.send_header("Pragma", "no-cache")
26319
+ self.send_header("Expires", "0")
26320
+ self.end_headers()
26321
+ self.wfile.write(body)
26322
+ except Exception as exc:
26323
+ if swallow_benign_socket_error(exc, "skills-handler.send_text"):
26324
+ return
26325
+ raise
24225
26326
 
24226
26327
  def do_GET(self):
24227
26328
  parsed_url = urlparse(self.path)
@@ -24833,7 +26934,15 @@ def main():
24833
26934
  elif int(skills_port) != int(args.port):
24834
26935
  try:
24835
26936
  skills_server = AgentHTTPServer((args.host, skills_port), SkillsHandler, app)
24836
- skills_thread = threading.Thread(target=skills_server.serve_forever, daemon=True)
26937
+
26938
+ def _skills_serve_loop():
26939
+ try:
26940
+ skills_server.serve_forever()
26941
+ except OSError as exc:
26942
+ if not swallow_benign_socket_error(exc, "skills-server.serve_forever"):
26943
+ raise
26944
+
26945
+ skills_thread = threading.Thread(target=_skills_serve_loop, daemon=True)
24837
26946
  skills_thread.start()
24838
26947
  setattr(app, "skills_ui_enabled", True)
24839
26948
  except Exception as exc:
@@ -24944,6 +27053,12 @@ def main():
24944
27053
  server.serve_forever()
24945
27054
  except KeyboardInterrupt:
24946
27055
  print("\n[web-agent] shutting down")
27056
+ except OSError as exc:
27057
+ if swallow_benign_socket_error(exc, "main.serve_forever"):
27058
+ if BENIGN_SOCKET_DEBUG_LOG_ENABLED:
27059
+ print(f"\n[web-agent][debug] socket closed benignly ({trim(str(exc), 180)}), shutting down")
27060
+ else:
27061
+ raise
24947
27062
  finally:
24948
27063
  try:
24949
27064
  persist_report = app.persist_all_sessions(include_running=True, lock_timeout=0.6)