clouds-coder 2026.3.7__tar.gz → 2026.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import base64
5
5
  from collections import deque
6
6
  import csv
7
7
  import difflib
8
+ import errno
8
9
  import html
9
10
  import hashlib
10
11
  import hmac
@@ -35,7 +36,7 @@ from pathlib import Path, PurePosixPath
35
36
  from urllib.error import HTTPError, URLError
36
37
  from urllib.parse import parse_qs, unquote, urlparse
37
38
  from urllib.request import Request, urlopen
38
- APP_VERSION = "2026.03.07"
39
+ APP_VERSION = "0.1.1"
39
40
  DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
40
41
  DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
41
42
  WORKDIR = Path(os.getenv("AGENT_WORKDIR", os.getcwd())).resolve()
@@ -71,6 +72,14 @@ DEFAULT_TIMEOUT_SECONDS = max(
71
72
  DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
72
73
  AUTO_CONTINUE_BUDGET_DEFAULT = 30
73
74
  AGENT_MAX_OUTPUT_TOKENS = 2200
75
+ WATCHDOG_INTENT_NO_TOOL_THRESHOLD = 2
76
+ WATCHDOG_REPEAT_NO_TOOL_THRESHOLD = 2
77
+ WATCHDOG_STATE_STALL_THRESHOLD = 6
78
+ WATCHDOG_CONTEXT_STALL_THRESHOLD = 2
79
+ WATCHDOG_REPEAT_SIMILARITY_THRESHOLD = 0.85
80
+ WATCHDOG_CONTEXT_NEAR_RATIO = 0.92
81
+ WATCHDOG_MAX_DECOMPOSE_STEPS = 12
82
+ WATCHDOG_STEP_MAX_ATTEMPTS = 2
74
83
  EMPTY_ACTION_MIN_CONTENT_CHARS = 5
75
84
  EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 2
76
85
  THINKING_BUDGET_FORCE_RATIO = 0.85
@@ -98,6 +107,22 @@ LIVE_INPUT_WEIGHT_BASE_DELAYED = 0.35
98
107
  LIVE_INPUT_WEIGHT_BASE_NORMAL = 0.65
99
108
  LIVE_INPUT_WEIGHT_STEP_DELAYED = 0.30
100
109
  LIVE_INPUT_WEIGHT_STEP_NORMAL = 0.20
110
+ SOCKET_NOISE_LINE_PATTERNS = (
111
+ re.compile(r"\bwinerror\s*10038\b", re.IGNORECASE),
112
+ re.compile(r"\bwsaenotsock\b", re.IGNORECASE),
113
+ re.compile(r"\bsocket\s+closed\s+benignly\b", re.IGNORECASE),
114
+ re.compile(r"\bbenign\s+socket\s+error\b", re.IGNORECASE),
115
+ )
116
+ BENIGN_SOCKET_DEBUG_LOG_ENABLED = str(os.getenv("AGENT_DEBUG_SOCKET_LOG", "") or "").strip().lower() in {
117
+ "1",
118
+ "true",
119
+ "yes",
120
+ "on",
121
+ "debug",
122
+ }
123
+ BENIGN_SOCKET_LOG_INTERVAL_SECONDS = 30.0
124
+ FINAL_SUMMARY_MIN_CHARS = 80
125
+ FINAL_SUMMARY_STRICT_MIN_CHARS = 120
101
126
  RUNTIME_CONTROL_HINT_PREFIXES = (
102
127
  "<reminder>",
103
128
  "<todo-rescue>",
@@ -170,6 +195,7 @@ TASK_PROFILE_TYPES = (
170
195
  )
171
196
  TASK_LEVEL_CHOICES = (1, 2, 3, 4, 5)
172
197
  TASK_SCALE_PREFERENCES = ("fast", "balanced", "thorough")
198
+ SEMANTIC_CONFIDENCE_CHOICES = ("high", "medium", "low")
173
199
  TASK_LEVEL_POLICIES: dict[int, dict] = {
174
200
  1: {
175
201
  "name": "simple_direct_answer",
@@ -712,6 +738,37 @@ def model_language_instruction(lang: str) -> str:
712
738
  )
713
739
 
714
740
 
741
+ def _detect_os_shell_instruction() -> str:
742
+ """Return a shell environment note for the agent system prompt based on the host OS."""
743
+ import platform as _platform
744
+ _sys = _platform.system()
745
+ if _sys == "Windows":
746
+ return (
747
+ "Shell environment: Windows (cmd.exe via shell=True). "
748
+ "IMPORTANT — use Windows-native commands only: "
749
+ "use 'dir' (not 'ls'), 'type' (not 'cat'), 'del' (not 'rm'), "
750
+ "'move' (not 'mv'), 'copy' (not 'cp'), 'findstr' (not 'grep'), "
751
+ "'where' (not 'which'), 'echo %VAR%' (not 'echo $VAR'). "
752
+ "To list files recursively use 'dir /s /b'. "
753
+ "Path separator is backslash (\\). "
754
+ "Do NOT use POSIX paths like /workspace, /tmp, /usr, ~/... — they do not exist. "
755
+ "Working directory is already set; use relative paths or the absolute session root shown above."
756
+ )
757
+ if _sys == "Darwin":
758
+ return (
759
+ "Shell environment: macOS (bash/zsh). "
760
+ "Standard POSIX commands are available (ls, cat, grep, find, etc.). "
761
+ "Package manager is 'brew'. "
762
+ "Do NOT assume Linux-specific paths like /proc or /etc/os-release exist. "
763
+ "Use relative paths or the absolute session root shown above."
764
+ )
765
+ # Linux / other POSIX
766
+ return (
767
+ "Shell environment: Linux (bash). "
768
+ "Standard POSIX commands are available (ls, cat, grep, find, etc.). "
769
+ "Use relative paths or the absolute session root shown above."
770
+ )
771
+
715
772
  def resolve_web_ui_dir_path(raw: str, base_dir: Path | None = None) -> Path:
716
773
  txt = str(raw or "").strip()
717
774
  if not txt:
@@ -896,6 +953,103 @@ def guess_ext_from_mime(mime: str, fallback: str = ".bin") -> str:
896
953
  def now_ts() -> float:
897
954
  return time.time()
898
955
 
956
+
957
+ _benign_socket_log_lock = threading.Lock()
958
+ _benign_socket_log_state: dict[str, dict[str, float | int]] = {}
959
+
960
+
961
+ def filter_runtime_noise_lines(text: str) -> tuple[str, int]:
962
+ raw = str(text or "")
963
+ if not raw:
964
+ return "", 0
965
+ kept: list[str] = []
966
+ dropped = 0
967
+ for line in raw.splitlines():
968
+ row = str(line or "")
969
+ if any(p.search(row) for p in SOCKET_NOISE_LINE_PATTERNS):
970
+ dropped += 1
971
+ continue
972
+ kept.append(row)
973
+ return "\n".join(kept).strip(), int(dropped)
974
+
975
+
976
+ def is_benign_socket_error(exc: BaseException | None) -> bool:
977
+ if exc is None:
978
+ return False
979
+ if isinstance(exc, (BrokenPipeError, ConnectionResetError, ConnectionAbortedError, TimeoutError)):
980
+ return True
981
+ if not isinstance(exc, OSError):
982
+ return False
983
+ winerror = int(getattr(exc, "winerror", 0) or 0)
984
+ if winerror in {10038, 10053, 10054, 10057, 10093}: # 10093 = WSANOTINITIALISED (selector on pipe)
985
+ return True
986
+ err = int(getattr(exc, "errno", 0) or 0)
987
+ benign_errno = {
988
+ int(getattr(errno, "EPIPE", 32)),
989
+ int(getattr(errno, "ECONNRESET", 104)),
990
+ int(getattr(errno, "ECONNABORTED", 103)),
991
+ int(getattr(errno, "ENOTCONN", 107)),
992
+ int(getattr(errno, "EBADF", 9)),
993
+ }
994
+ return err in benign_errno
995
+
996
+
997
+ def _socket_error_code(exc: BaseException | None) -> str:
998
+ if not isinstance(exc, OSError):
999
+ return str(type(exc).__name__ if exc is not None else "unknown")
1000
+ winerror = int(getattr(exc, "winerror", 0) or 0)
1001
+ if winerror > 0:
1002
+ return f"winerror:{winerror}"
1003
+ err = int(getattr(exc, "errno", 0) or 0)
1004
+ if err > 0:
1005
+ return f"errno:{err}"
1006
+ return str(type(exc).__name__ if exc is not None else "OSError")
1007
+
1008
+
1009
+ def _log_benign_socket_error_limited(exc: BaseException | None, where: str = ""):
1010
+ if not BENIGN_SOCKET_DEBUG_LOG_ENABLED:
1011
+ return
1012
+ code = _socket_error_code(exc)
1013
+ location = str(where or "runtime").strip()
1014
+ key = f"{location}|{code}"
1015
+ now = now_ts()
1016
+ suppressed = 0
1017
+ should_emit = False
1018
+ with _benign_socket_log_lock:
1019
+ row = _benign_socket_log_state.get(key)
1020
+ if not isinstance(row, dict):
1021
+ _benign_socket_log_state[key] = {"last_ts": now, "suppressed": 0}
1022
+ should_emit = True
1023
+ else:
1024
+ last_ts = float(row.get("last_ts", 0.0) or 0.0)
1025
+ if now - last_ts >= BENIGN_SOCKET_LOG_INTERVAL_SECONDS:
1026
+ suppressed = int(row.get("suppressed", 0) or 0)
1027
+ row["last_ts"] = now
1028
+ row["suppressed"] = 0
1029
+ should_emit = True
1030
+ else:
1031
+ row["suppressed"] = int(row.get("suppressed", 0) or 0) + 1
1032
+ if len(_benign_socket_log_state) > 512:
1033
+ stale = sorted(
1034
+ _benign_socket_log_state.items(),
1035
+ key=lambda item: float((item[1] or {}).get("last_ts", 0.0) if isinstance(item[1], dict) else 0.0),
1036
+ )[:128]
1037
+ for dead_key, _ in stale:
1038
+ _benign_socket_log_state.pop(dead_key, None)
1039
+ if should_emit:
1040
+ msg = f"[web-agent][debug] benign socket error {code} at {location}"
1041
+ if suppressed > 0:
1042
+ msg = f"{msg} (+{suppressed} suppressed)"
1043
+ print(msg, file=sys.stderr)
1044
+
1045
+
1046
+ def swallow_benign_socket_error(exc: BaseException | None, where: str = "") -> bool:
1047
+ if not is_benign_socket_error(exc):
1048
+ return False
1049
+ _log_benign_socket_error_limited(exc, where)
1050
+ return True
1051
+
1052
+
899
1053
  def normalize_timeout_seconds(
900
1054
  raw: object,
901
1055
  *,
@@ -8511,6 +8665,7 @@ class SessionState:
8511
8665
  f"Session absolute writable root is {self.files_root}. "
8512
8666
  "For file tools, prefer relative paths like hello.txt; runtime will map them to the absolute session root. "
8513
8667
  "The '/workspace/...' form is only a virtual alias for path arguments; never create OS-level /workspace in shell. "
8668
+ f"{_detect_os_shell_instruction()} "
8514
8669
  "Use tools to inspect files, execute commands, and edit code safely. "
8515
8670
  f"{route_hint}"
8516
8671
  f"{budget_hint} "
@@ -8558,6 +8713,19 @@ class SessionState:
8558
8713
  "used_percent": used_pct,
8559
8714
  }
8560
8715
 
8716
+ def _apply_auto_compact_if_needed(self, reason: str = "auto") -> bool:
8717
+ self._microcompact()
8718
+ metrics = self._context_budget_metrics()
8719
+ used = int(metrics.get("used", 0) or 0)
8720
+ limit = max(1, int(metrics.get("limit", 0) or 0))
8721
+ if used < limit:
8722
+ return False
8723
+ now_tick = now_ts()
8724
+ if (now_tick - float(self.last_compact_ts or 0.0)) < 0.8:
8725
+ return False
8726
+ self._auto_compact(reason)
8727
+ return True
8728
+
8561
8729
  def _estimate_output_tokens(self, text: str, thinking_text: str = "", tool_calls: list | None = None) -> int:
8562
8730
  t_main = len(str(text or "")) // 4
8563
8731
  t_think = len(str(thinking_text or "")) // 4
@@ -10981,9 +11149,18 @@ class SessionState:
10981
11149
  "重构",
10982
11150
  "设计",
10983
11151
  "构建",
11152
+ "架构",
11153
+ "内核",
11154
+ "框架",
11155
+ "死循环",
11156
+ "状态机",
11157
+ "调度",
10984
11158
  "后端",
10985
11159
  "前端",
10986
11160
  "自动化",
11161
+ "agentbus",
11162
+ "watchdog",
11163
+ "decomposition",
10987
11164
  "workflow",
10988
11165
  "architecture",
10989
11166
  "build",
@@ -11102,7 +11279,10 @@ class SessionState:
11102
11279
  return {
11103
11280
  "task_type": "general",
11104
11281
  "complexity": "simple",
11105
- "direct_objective": "Provide the most direct useful response with minimal orchestration.",
11282
+ "direct_objective": (
11283
+ "Provide the most direct useful response with minimal orchestration, "
11284
+ "anchored to the current project context and user goal."
11285
+ ),
11106
11286
  "recommended_agents": ["developer"],
11107
11287
  "round_budget": 3,
11108
11288
  "reason": "default lightweight profile",
@@ -11667,159 +11847,233 @@ class SessionState:
11667
11847
  del target[:overflow]
11668
11848
 
11669
11849
  def _merge_output_text() -> str:
11670
- out_text = out_buf.decode("utf-8", errors="replace")
11671
- err_text = err_buf.decode("utf-8", errors="replace")
11850
+ # On Windows, cmd.exe outputs in the system OEM codepage (e.g. cp936/GBK),
11851
+ # not UTF-8. Detect and use the correct encoding for decoding.
11852
+ if os.name == "nt":
11853
+ try:
11854
+ import locale as _lc
11855
+ enc = _lc.getpreferredencoding(False) or "utf-8"
11856
+ except Exception:
11857
+ enc = "utf-8"
11858
+ else:
11859
+ enc = "utf-8"
11860
+ out_text = out_buf.decode(enc, errors="replace")
11861
+ err_text = err_buf.decode(enc, errors="replace")
11672
11862
  return (out_text + err_text).strip()
11673
11863
 
11674
- try:
11675
- popen_kwargs = {
11676
- "shell": True,
11677
- "cwd": cwd,
11678
- "stdout": subprocess.PIPE,
11679
- "stderr": subprocess.PIPE,
11680
- "text": False,
11681
- "bufsize": 0,
11682
- "start_new_session": (os.name == "posix"),
11683
- }
11684
- if os.name == "nt":
11685
- create_group = int(getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0) or 0)
11686
- if create_group > 0:
11687
- popen_kwargs["creationflags"] = create_group
11688
- proc = subprocess.Popen(command, **popen_kwargs)
11689
- if os.name == "nt":
11690
- # Windows pipe handles are not selector-friendly; use reader threads.
11691
- reader_threads: list[threading.Thread] = []
11864
+ def _collect_with_reader_threads(proc: subprocess.Popen):
11865
+ nonlocal next_progress_emit
11866
+ reader_threads: list[threading.Thread] = []
11867
+ io_queue: queue.Queue = queue.Queue()
11868
+ active_readers: set[str] = set()
11692
11869
 
11693
- def _spawn_reader(stream, target: bytearray):
11694
- if stream is None:
11695
- return
11870
+ def _spawn_reader(label: str, stream):
11871
+ if stream is None:
11872
+ return
11873
+ active_readers.add(label)
11874
+ # Selector fallback may leave PIPE FDs in non-blocking mode.
11875
+ # Reader threads expect blocking reads to avoid early EOF/pipe close.
11876
+ try:
11877
+ os.set_blocking(stream.fileno(), True)
11878
+ except Exception:
11879
+ pass
11696
11880
 
11697
- def _reader():
11881
+ def _reader():
11882
+ try:
11698
11883
  while True:
11699
11884
  try:
11700
11885
  chunk = stream.read(65536)
11701
11886
  except Exception:
11702
11887
  break
11888
+ if chunk is None:
11889
+ time.sleep(0.01)
11890
+ continue
11703
11891
  if not chunk:
11704
11892
  break
11705
- _append_capture(target, chunk)
11893
+ io_queue.put((label, chunk))
11894
+ finally:
11895
+ try:
11896
+ stream.close()
11897
+ except Exception:
11898
+ pass
11899
+ io_queue.put((label, None))
11706
11900
 
11707
- th = threading.Thread(target=_reader, daemon=True)
11708
- th.start()
11709
- reader_threads.append(th)
11901
+ th = threading.Thread(target=_reader, daemon=True)
11902
+ th.start()
11903
+ reader_threads.append(th)
11710
11904
 
11711
- _spawn_reader(proc.stdout, out_buf)
11712
- _spawn_reader(proc.stderr, err_buf)
11905
+ _spawn_reader("stdout", proc.stdout)
11906
+ _spawn_reader("stderr", proc.stderr)
11713
11907
 
11908
+ while True:
11909
+ now = time.time()
11910
+ elapsed = now - start
11911
+ if (not meta.get("error")) and self.cancel_requested:
11912
+ _stop_process(proc)
11913
+ meta["error"] = "Error: interrupted by user"
11914
+ meta["exit_code"] = -130
11915
+ elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
11916
+ _stop_process(proc)
11917
+ meta["error"] = f"Error: timeout ({timeout}s)"
11918
+ meta["exit_code"] = -1
11919
+ try:
11920
+ label, chunk = io_queue.get(timeout=0.12)
11921
+ if chunk is None:
11922
+ active_readers.discard(str(label))
11923
+ elif str(label) == "stderr":
11924
+ _append_capture(err_buf, chunk)
11925
+ else:
11926
+ _append_capture(out_buf, chunk)
11927
+ except queue.Empty:
11928
+ pass
11714
11929
  while True:
11715
- now = time.time()
11716
- elapsed = now - start
11717
- if (not meta.get("error")) and self.cancel_requested:
11718
- _stop_process(proc)
11719
- meta["error"] = "Error: interrupted by user"
11720
- meta["exit_code"] = -130
11721
- elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
11722
- _stop_process(proc)
11723
- meta["error"] = f"Error: timeout ({timeout}s)"
11724
- meta["exit_code"] = -1
11725
- if now >= next_progress_emit:
11726
- self._emit_transient(
11727
- "status",
11728
- {
11729
- "summary": (
11730
- f"bash running ({int(elapsed)}s, "
11731
- f"captured={len(out_buf) + len(err_buf)}B)"
11732
- )
11733
- },
11734
- )
11735
- next_progress_emit = now + 0.8
11736
- if proc.poll() is not None:
11930
+ try:
11931
+ label, chunk = io_queue.get_nowait()
11932
+ except queue.Empty:
11737
11933
  break
11738
- time.sleep(0.12)
11934
+ if chunk is None:
11935
+ active_readers.discard(str(label))
11936
+ elif str(label) == "stderr":
11937
+ _append_capture(err_buf, chunk)
11938
+ else:
11939
+ _append_capture(out_buf, chunk)
11940
+ if now >= next_progress_emit:
11941
+ self._emit_transient(
11942
+ "status",
11943
+ {
11944
+ "summary": (
11945
+ f"bash running ({int(elapsed)}s, "
11946
+ f"captured={len(out_buf) + len(err_buf)}B)"
11947
+ )
11948
+ },
11949
+ )
11950
+ next_progress_emit = now + 0.8
11951
+ if (proc.poll() is not None) and (not active_readers) and io_queue.empty():
11952
+ break
11953
+
11954
+ for th in reader_threads:
11739
11955
  try:
11740
- extra_out, extra_err = proc.communicate(timeout=0.8)
11956
+ th.join(timeout=0.8)
11741
11957
  except Exception:
11742
- extra_out, extra_err = b"", b""
11743
- _append_capture(out_buf, extra_out or b"")
11744
- _append_capture(err_buf, extra_err or b"")
11745
- for th in reader_threads:
11746
- try:
11747
- th.join(timeout=0.8)
11748
- except Exception:
11749
- pass
11750
- merged = _merge_output_text()
11751
- if meta.get("error"):
11752
- meta["output"] = trim(merged or str(meta["error"]))
11958
+ pass
11959
+ while True:
11960
+ try:
11961
+ label, chunk = io_queue.get_nowait()
11962
+ except queue.Empty:
11963
+ break
11964
+ if chunk is None:
11965
+ continue
11966
+ if str(label) == "stderr":
11967
+ _append_capture(err_buf, chunk)
11753
11968
  else:
11754
- meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
11755
- meta["output"] = trim(merged or "(no output)")
11969
+ _append_capture(out_buf, chunk)
11970
+ merged_raw = _merge_output_text()
11971
+ merged, _ = filter_runtime_noise_lines(merged_raw)
11972
+ if meta.get("error"):
11973
+ meta["output"] = trim(merged or str(meta["error"]))
11756
11974
  else:
11757
- with selectors.DefaultSelector() as sel:
11758
- if proc.stdout is not None:
11759
- try:
11760
- os.set_blocking(proc.stdout.fileno(), False)
11761
- except Exception:
11762
- pass
11763
- sel.register(proc.stdout, selectors.EVENT_READ, data="stdout")
11764
- if proc.stderr is not None:
11765
- try:
11766
- os.set_blocking(proc.stderr.fileno(), False)
11767
- except Exception:
11768
- pass
11769
- sel.register(proc.stderr, selectors.EVENT_READ, data="stderr")
11770
- while True:
11771
- now = time.time()
11772
- elapsed = now - start
11773
- if self.cancel_requested:
11774
- _stop_process(proc)
11775
- meta["error"] = "Error: interrupted by user"
11776
- meta["exit_code"] = -130
11777
- elif timeout > 0 and elapsed >= timeout:
11778
- _stop_process(proc)
11779
- meta["error"] = f"Error: timeout ({timeout}s)"
11780
- meta["exit_code"] = -1
11781
- events = sel.select(timeout=0.12)
11782
- for key, _ in events:
11783
- stream = key.fileobj
11975
+ meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
11976
+ meta["output"] = trim(merged or "(no output)")
11977
+
11978
+ try:
11979
+ popen_kwargs = {
11980
+ "shell": True,
11981
+ "cwd": cwd,
11982
+ "stdout": subprocess.PIPE,
11983
+ "stderr": subprocess.PIPE,
11984
+ "text": False,
11985
+ "bufsize": 0,
11986
+ "start_new_session": (os.name == "posix"),
11987
+ }
11988
+ if os.name == "nt":
11989
+ create_group = int(getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0) or 0)
11990
+ if create_group > 0:
11991
+ popen_kwargs["creationflags"] = create_group
11992
+ proc = subprocess.Popen(command, **popen_kwargs)
11993
+ if os.name == "nt":
11994
+ # Windows: read PIPE output via blocking reader threads + queue.
11995
+ _collect_with_reader_threads(proc)
11996
+ else:
11997
+ try:
11998
+ with selectors.DefaultSelector() as sel:
11999
+ if proc.stdout is not None:
11784
12000
  try:
11785
- chunk = os.read(stream.fileno(), 65536)
11786
- except BlockingIOError:
11787
- continue
12001
+ os.set_blocking(proc.stdout.fileno(), False)
11788
12002
  except Exception:
11789
- chunk = b""
11790
- if not chunk:
12003
+ pass
12004
+ sel.register(proc.stdout, selectors.EVENT_READ, data="stdout")
12005
+ if proc.stderr is not None:
12006
+ try:
12007
+ os.set_blocking(proc.stderr.fileno(), False)
12008
+ except Exception:
12009
+ pass
12010
+ sel.register(proc.stderr, selectors.EVENT_READ, data="stderr")
12011
+ while True:
12012
+ now = time.time()
12013
+ elapsed = now - start
12014
+ if self.cancel_requested:
12015
+ _stop_process(proc)
12016
+ meta["error"] = "Error: interrupted by user"
12017
+ meta["exit_code"] = -130
12018
+ elif timeout > 0 and elapsed >= timeout:
12019
+ _stop_process(proc)
12020
+ meta["error"] = f"Error: timeout ({timeout}s)"
12021
+ meta["exit_code"] = -1
12022
+ events = sel.select(timeout=0.12)
12023
+ for key, _ in events:
12024
+ stream = key.fileobj
11791
12025
  try:
11792
- sel.unregister(stream)
12026
+ chunk = os.read(stream.fileno(), 65536)
12027
+ except BlockingIOError:
12028
+ continue
11793
12029
  except Exception:
11794
- pass
11795
- continue
11796
- if key.data == "stderr":
11797
- _append_capture(err_buf, chunk)
11798
- else:
11799
- _append_capture(out_buf, chunk)
11800
- if now >= next_progress_emit:
11801
- self._emit_transient(
11802
- "status",
11803
- {
11804
- "summary": (
11805
- f"bash running ({int(elapsed)}s, "
11806
- f"captured={len(out_buf) + len(err_buf)}B)"
11807
- )
11808
- },
11809
- )
11810
- next_progress_emit = now + 0.8
11811
- if (proc.poll() is not None) and (not sel.get_map()):
11812
- break
11813
- merged = _merge_output_text()
11814
- if meta.get("error"):
11815
- meta["output"] = trim(merged or str(meta["error"]))
12030
+ chunk = b""
12031
+ if not chunk:
12032
+ try:
12033
+ sel.unregister(stream)
12034
+ except Exception:
12035
+ pass
12036
+ continue
12037
+ if key.data == "stderr":
12038
+ _append_capture(err_buf, chunk)
12039
+ else:
12040
+ _append_capture(out_buf, chunk)
12041
+ if now >= next_progress_emit:
12042
+ self._emit_transient(
12043
+ "status",
12044
+ {
12045
+ "summary": (
12046
+ f"bash running ({int(elapsed)}s, "
12047
+ f"captured={len(out_buf) + len(err_buf)}B)"
12048
+ )
12049
+ },
12050
+ )
12051
+ next_progress_emit = now + 0.8
12052
+ if (proc.poll() is not None) and (not sel.get_map()):
12053
+ break
12054
+ merged_raw = _merge_output_text()
12055
+ merged, _ = filter_runtime_noise_lines(merged_raw)
12056
+ if meta.get("error"):
12057
+ meta["output"] = trim(merged or str(meta["error"]))
12058
+ else:
12059
+ meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
12060
+ meta["output"] = trim(merged or "(no output)")
12061
+ except Exception as exc:
12062
+ # Some platforms may reject selector registration for PIPEs.
12063
+ # On Windows, also catch any OSError (e.g. WinError 10093 WSANOTINITIALISED).
12064
+ if is_benign_socket_error(exc) or isinstance(exc, ValueError) or (os.name == "nt" and isinstance(exc, OSError)):
12065
+ _collect_with_reader_threads(proc)
11816
12066
  else:
11817
- meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
11818
- meta["output"] = trim(merged or "(no output)")
12067
+ raise
11819
12068
  except Exception as exc:
11820
- meta["error"] = f"Error: {exc}"
11821
- meta["output"] = meta["error"]
11822
- meta["exit_code"] = -1
12069
+ # On Windows, WinError 10038 (WSAENOTSOCK) can surface here when
12070
+ # selector-based I/O is used with pipe FDs. Fall back to thread-based reading.
12071
+ if proc is not None and is_benign_socket_error(exc):
12072
+ _collect_with_reader_threads(proc)
12073
+ else:
12074
+ meta["error"] = f"Error: {exc}"
12075
+ meta["output"] = meta["error"]
12076
+ meta["exit_code"] = -1
11823
12077
  meta["duration_ms"] = int((time.time() - start) * 1000)
11824
12078
  after = self._git_status_map(cwd)
11825
12079
  meta["changed_files"] = self._status_delta(before, after) if before or after else []
@@ -12350,6 +12604,35 @@ class SessionState:
12350
12604
  return trim(text.replace("\n", " "), 220)
12351
12605
  return "current task"
12352
12606
 
12607
+ def _compose_default_direct_objective(self, base_objective: str, goal: str, task_type: str) -> str:
12608
+ base = trim(str(base_objective or "").strip(), 520)
12609
+ goal_clean = trim(strip_thinking_content(str(goal or "")).replace("\n", " ").strip(), 220)
12610
+ path_hits = re.findall(
12611
+ r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.(?:py|js|ts|tsx|jsx|java|go|rs|md|json|yaml|yml|toml|ini|sh|html|css|c|cpp|h)",
12612
+ goal_clean,
12613
+ )
12614
+ uniq_paths: list[str] = []
12615
+ for item in path_hits:
12616
+ one = trim(str(item or "").strip(), 80)
12617
+ if one and one not in uniq_paths:
12618
+ uniq_paths.append(one)
12619
+ if len(uniq_paths) >= 3:
12620
+ break
12621
+ if uniq_paths:
12622
+ anchor = f" Project anchors: {', '.join(uniq_paths)}."
12623
+ elif goal_clean:
12624
+ anchor = f" Project anchor: {goal_clean}."
12625
+ else:
12626
+ anchor = " Project anchor: current repository context."
12627
+ if task_type == "simple_qa":
12628
+ postfix = " Keep orchestration lightweight and answer directly with project-aware specifics."
12629
+ else:
12630
+ postfix = (
12631
+ " Keep orchestration lightweight and execution-first. "
12632
+ "Use bounded creativity for ambiguous details while preserving existing architecture and constraints."
12633
+ )
12634
+ return trim(f"{base}{anchor}{postfix}", 800)
12635
+
12353
12636
  def _normalize_task_profile(self, goal: str, raw: object) -> dict:
12354
12637
  base = self._infer_task_profile(goal)
12355
12638
  src = raw if isinstance(raw, dict) else {}
@@ -12362,13 +12645,22 @@ class SessionState:
12362
12645
  complexity = str(src.get("complexity", base.get("complexity", "simple")) or "").strip().lower()
12363
12646
  if complexity not in TASK_COMPLEXITY_LEVELS:
12364
12647
  complexity = str(base.get("complexity", "simple"))
12365
- direct_objective = (
12366
- trim(
12367
- str(src.get("direct_objective", base.get("direct_objective", "")) or "").strip(),
12368
- 800,
12648
+ src_direct_objective = trim(str(src.get("direct_objective", "") or "").strip(), 800)
12649
+ legacy_objectives = {
12650
+ "Provide the most direct useful response with minimal orchestration.",
12651
+ (
12652
+ "Provide the most direct useful response with minimal orchestration, "
12653
+ "anchored to the current project context and user goal."
12654
+ ),
12655
+ }
12656
+ if src_direct_objective and src_direct_objective not in legacy_objectives:
12657
+ direct_objective = src_direct_objective
12658
+ else:
12659
+ direct_objective = self._compose_default_direct_objective(
12660
+ str(base.get("direct_objective", "")),
12661
+ goal,
12662
+ task_type,
12369
12663
  )
12370
- or str(base.get("direct_objective", ""))
12371
- )
12372
12664
  rec_raw = src.get("recommended_agents", base.get("recommended_agents", []))
12373
12665
  recommended: list[str] = []
12374
12666
  if isinstance(rec_raw, list):
@@ -12669,95 +12961,793 @@ class SessionState:
12669
12961
  key = str(raw or "").strip().upper()
12670
12962
  return key if key in BLACKBOARD_STATUSES else "INITIALIZING"
12671
12963
 
12672
- def _new_blackboard(self, goal: str = "") -> dict:
12673
- profile = self._normalize_task_profile(goal, {})
12674
- progress = "done" if str(profile.get("task_type", "") or "") == "simple_qa" and not str(goal or "").strip() else "initializing"
12964
+ def _new_watchdog_state(self) -> dict:
12675
12965
  return {
12676
- "version": 1,
12677
- "updated_at": float(now_ts()),
12678
- "original_goal": trim(str(goal or "").strip(), 4000),
12679
- "research_notes": [],
12680
- "code_artifacts": {},
12681
- "execution_logs": [],
12682
- "review_feedback": [],
12683
- "conversation_history": [],
12684
- "status": "INITIALIZING",
12685
- "approval": {
12686
- "approved": False,
12687
- "by": "",
12688
- "note": "",
12689
- "ts": 0.0,
12690
- },
12691
- "manager_cycles": 0,
12692
- "manager_summary_attempts": 0,
12693
- "active_agent": "",
12694
- "last_delegate": {
12695
- "target": "",
12696
- "instruction": "",
12697
- "reason": "",
12698
- "source": "",
12699
- "is_mandatory": False,
12700
- "ts": 0.0,
12701
- },
12702
- "task_profile": profile,
12703
- "manager_judgement": {
12704
- "task_type": str(profile.get("task_type", "general")),
12705
- "complexity": str(profile.get("complexity", "simple")),
12706
- "scale_preference": str(profile.get("scale_preference", "balanced") or "balanced"),
12707
- "progress": progress,
12708
- "remaining_rounds": (
12709
- -1
12710
- if int(profile.get("round_budget", 0) or 0) <= 0
12711
- else int(profile.get("round_budget", 1) or 1)
12712
- ),
12713
- "updated_at": float(now_ts()),
12714
- },
12715
- "last_worker_reply": {
12716
- "role": "",
12717
- "text": "",
12718
- "ts": 0.0,
12719
- },
12966
+ "intent_no_tool_streak": 0,
12967
+ "repeat_no_tool_streak": 0,
12968
+ "state_unchanged_streak": 0,
12969
+ "last_no_tool_text": "",
12970
+ "last_no_tool_hash": "",
12971
+ "last_state_fp": "",
12972
+ "trigger_count": 0,
12973
+ "last_trigger_reason": "",
12974
+ "last_trigger_ts": 0.0,
12720
12975
  }
12721
12976
 
12722
- def _normalize_blackboard(self, raw: object) -> dict:
12977
+ def _normalize_watchdog_state(self, raw: object) -> dict:
12723
12978
  src = raw if isinstance(raw, dict) else {}
12724
- board = self._new_blackboard(str(src.get("original_goal", "") or ""))
12725
- try:
12726
- board["version"] = int(src.get("version", 1) or 1)
12727
- except Exception:
12728
- board["version"] = 1
12729
- board["updated_at"] = float(src.get("updated_at", now_ts()) or now_ts())
12730
- board["status"] = self._normalize_blackboard_status(src.get("status", board["status"]))
12731
- board["manager_cycles"] = max(0, int(src.get("manager_cycles", 0) or 0))
12732
- board["manager_summary_attempts"] = max(0, int(src.get("manager_summary_attempts", 0) or 0))
12733
- board["active_agent"] = self._sanitize_agent_role(src.get("active_agent", ""))
12734
- raw_delegate = src.get("last_delegate", {})
12735
- if isinstance(raw_delegate, dict):
12736
- board["last_delegate"] = {
12737
- "target": str(raw_delegate.get("target", "") or "").strip().lower(),
12738
- "instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
12739
- "reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
12740
- "source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
12741
- "is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
12742
- "ts": float(raw_delegate.get("ts", 0.0) or 0.0),
12743
- }
12744
- raw_approval = src.get("approval", {})
12745
- if isinstance(raw_approval, dict):
12746
- board["approval"] = {
12747
- "approved": bool(raw_approval.get("approved", False)),
12748
- "by": trim(str(raw_approval.get("by", "") or "").strip(), 60),
12749
- "note": trim(str(raw_approval.get("note", "") or "").strip(), 1000),
12750
- "ts": float(raw_approval.get("ts", 0.0) or 0.0),
12751
- }
12752
- board["task_profile"] = self._normalize_task_profile(
12753
- str(board.get("original_goal", "") or ""),
12754
- src.get("task_profile", {}),
12755
- )
12756
- raw_judgement = src.get("manager_judgement", {})
12757
- if isinstance(raw_judgement, dict):
12758
- board["manager_judgement"] = {
12759
- "task_type": trim(
12760
- str(raw_judgement.get("task_type", board["task_profile"].get("task_type", "")) or "").strip(),
12979
+ out = self._new_watchdog_state()
12980
+ out["intent_no_tool_streak"] = max(0, int(src.get("intent_no_tool_streak", 0) or 0))
12981
+ out["repeat_no_tool_streak"] = max(0, int(src.get("repeat_no_tool_streak", 0) or 0))
12982
+ out["state_unchanged_streak"] = max(0, int(src.get("state_unchanged_streak", 0) or 0))
12983
+ out["last_no_tool_text"] = trim(str(src.get("last_no_tool_text", "") or "").strip(), 1200)
12984
+ out["last_no_tool_hash"] = trim(str(src.get("last_no_tool_hash", "") or "").strip(), 80)
12985
+ out["last_state_fp"] = trim(str(src.get("last_state_fp", "") or "").strip(), 120)
12986
+ out["trigger_count"] = max(0, int(src.get("trigger_count", 0) or 0))
12987
+ out["last_trigger_reason"] = trim(str(src.get("last_trigger_reason", "") or "").strip(), 200)
12988
+ out["last_trigger_ts"] = float(src.get("last_trigger_ts", 0.0) or 0.0)
12989
+ return out
12990
+
12991
+ def _new_decomposition_queue_state(self) -> dict:
12992
+ return {
12993
+ "active": False,
12994
+ "trigger_reason": "",
12995
+ "created_at": 0.0,
12996
+ "cursor": 0,
12997
+ "steps": [],
12998
+ "last_error": "",
12999
+ "snapshot": "",
13000
+ "decomposer_output": "",
13001
+ }
13002
+
13003
+ def _watchdog_normalize_steps(self, rows: object) -> list[dict]:
13004
+ if not isinstance(rows, list):
13005
+ return []
13006
+ out: list[dict] = []
13007
+
13008
+ def _infer_target(action_type: str, instruction: str, fallback: str = "developer") -> str:
13009
+ raw = self._sanitize_agent_role(fallback) or "developer"
13010
+ low = f"{action_type} {instruction}".lower()
13011
+ if any(tok in low for tok in ("review", "verify", "validate", "test", "qa", "检查", "验证", "评审", "審查")):
13012
+ return "reviewer"
13013
+ if any(tok in low for tok in ("research", "inspect", "analy", "explore", "investigate", "分析", "调研", "調研", "探索")):
13014
+ return "explorer"
13015
+ return raw
13016
+
13017
+ for idx, row in enumerate(rows[:WATCHDOG_MAX_DECOMPOSE_STEPS]):
13018
+ if not isinstance(row, dict):
13019
+ continue
13020
+ instruction = trim(
13021
+ str(
13022
+ row.get("description", "")
13023
+ or row.get("instruction", "")
13024
+ or row.get("content", "")
13025
+ or row.get("task", "")
13026
+ or ""
13027
+ ).strip(),
13028
+ 900,
13029
+ )
13030
+ if not instruction:
13031
+ continue
13032
+ action_type = trim(str(row.get("action_type", "") or "").strip(), 80)
13033
+ target = self._sanitize_agent_role(
13034
+ row.get("target", row.get("owner", row.get("role", row.get("agent", ""))))
13035
+ )
13036
+ target = target or _infer_target(action_type, instruction)
13037
+ if target == "developer" and "incremental" not in instruction.lower():
13038
+ instruction = trim(
13039
+ (
13040
+ f"{instruction}\n"
13041
+ "Use incremental edits (append/targeted replace) instead of full-file overwrite unless unavoidable."
13042
+ ),
13043
+ 1000,
13044
+ )
13045
+ try:
13046
+ step_no = int(row.get("step", idx + 1) or (idx + 1))
13047
+ except Exception:
13048
+ step_no = idx + 1
13049
+ out.append(
13050
+ {
13051
+ "step": max(1, step_no),
13052
+ "target": target,
13053
+ "action_type": action_type or "execute",
13054
+ "instruction": instruction,
13055
+ "attempts": max(0, int(row.get("attempts", 0) or 0)),
13056
+ "status": trim(str(row.get("status", "pending") or "pending").strip().lower(), 20) or "pending",
13057
+ "updated_at": float(now_ts()),
13058
+ }
13059
+ )
13060
+ if not out:
13061
+ return []
13062
+ return out[:WATCHDOG_MAX_DECOMPOSE_STEPS]
13063
+
13064
+ def _normalize_decomposition_queue_state(self, raw: object) -> dict:
13065
+ src = raw if isinstance(raw, dict) else {}
13066
+ out = self._new_decomposition_queue_state()
13067
+ out["active"] = bool(src.get("active", False))
13068
+ out["trigger_reason"] = trim(str(src.get("trigger_reason", "") or "").strip(), 200)
13069
+ out["created_at"] = float(src.get("created_at", 0.0) or 0.0)
13070
+ out["cursor"] = max(0, int(src.get("cursor", 0) or 0))
13071
+ out["last_error"] = trim(str(src.get("last_error", "") or "").strip(), 400)
13072
+ out["snapshot"] = trim(str(src.get("snapshot", "") or "").strip(), 4000)
13073
+ out["decomposer_output"] = trim(str(src.get("decomposer_output", "") or "").strip(), 2000)
13074
+ out["steps"] = self._watchdog_normalize_steps(src.get("steps", []))
13075
+ if out["cursor"] >= len(out["steps"]):
13076
+ out["active"] = False
13077
+ return out
13078
+
13079
+ def _watchdog_state_fingerprint(self, board: dict | None = None) -> str:
13080
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13081
+ profile = self._ensure_blackboard_task_profile(bb)
13082
+ payload = {
13083
+ "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
13084
+ "goal": trim(str(bb.get("original_goal", "") or "").strip(), 400),
13085
+ "active_agent": self._sanitize_agent_role(bb.get("active_agent", "")),
13086
+ "delegate": self._sanitize_agent_role((bb.get("last_delegate", {}) or {}).get("target", "")),
13087
+ "research_count": len(bb.get("research_notes", []) or []),
13088
+ "artifact_count": len(bb.get("code_artifacts", {}) or {}),
13089
+ "exec_count": len(bb.get("execution_logs", []) or []),
13090
+ "review_count": len(bb.get("review_feedback", []) or []),
13091
+ "approved": bool((bb.get("approval", {}) or {}).get("approved", False)),
13092
+ "task_type": str(profile.get("task_type", "general") or "general"),
13093
+ "complexity": str(profile.get("complexity", "simple") or "simple"),
13094
+ }
13095
+ raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
13096
+ return hashlib.sha1(raw.encode("utf-8")).hexdigest()
13097
+
13098
+ def _watchdog_extract_json_array(self, text: str) -> list[dict]:
13099
+ raw = str(text or "").strip()
13100
+ if not raw:
13101
+ return []
13102
+ probe_candidates: list[str] = [raw]
13103
+ fence = re.findall(r"```(?:json)?\s*([\s\S]*?)```", raw, flags=re.IGNORECASE)
13104
+ probe_candidates.extend([str(x or "").strip() for x in fence if str(x or "").strip()])
13105
+ first = raw.find("[")
13106
+ last = raw.rfind("]")
13107
+ if first >= 0 and last > first:
13108
+ probe_candidates.append(raw[first : last + 1].strip())
13109
+ for candidate in probe_candidates:
13110
+ try:
13111
+ parsed = json.loads(candidate)
13112
+ except Exception:
13113
+ continue
13114
+ if isinstance(parsed, list):
13115
+ return [dict(x) for x in parsed if isinstance(x, dict)]
13116
+ return []
13117
+
13118
+ def _watchdog_intent_without_action(self, text: str) -> bool:
13119
+ clean = strip_thinking_content(str(text or "")).strip()
13120
+ if not clean:
13121
+ return False
13122
+ low = clean.lower()
13123
+ intent_markers = (
13124
+ "i will",
13125
+ "i'm going to",
13126
+ "next step",
13127
+ "plan to",
13128
+ "let me",
13129
+ "我将",
13130
+ "我會",
13131
+ "我会",
13132
+ "下一步",
13133
+ "接下来",
13134
+ "接下來",
13135
+ "计划",
13136
+ "計劃",
13137
+ "准备",
13138
+ "準備",
13139
+ )
13140
+ action_markers = (
13141
+ "wrote",
13142
+ "edited",
13143
+ "executed",
13144
+ "called",
13145
+ "ran ",
13146
+ "已完成",
13147
+ "已执行",
13148
+ "已執行",
13149
+ "已调用",
13150
+ "已調用",
13151
+ "完成了",
13152
+ "执行了",
13153
+ "執行了",
13154
+ "调用了",
13155
+ "調用了",
13156
+ )
13157
+ if any(tok in low for tok in action_markers):
13158
+ return False
13159
+ return any(tok in low for tok in intent_markers)
13160
+
13161
+ def _watchdog_similarity(self, a: str, b: str) -> float:
13162
+ left = trim(strip_thinking_content(str(a or "")).strip(), 1800)
13163
+ right = trim(strip_thinking_content(str(b or "")).strip(), 1800)
13164
+ if (not left) or (not right):
13165
+ return 0.0
13166
+ return float(difflib.SequenceMatcher(None, left, right).ratio())
13167
+
13168
+ def _watchdog_context_near_limit(self) -> bool:
13169
+ limit = max(1, int(self.context_token_upper_bound or TOKEN_THRESHOLD))
13170
+ try:
13171
+ used = int(self._estimate_tokens())
13172
+ except Exception:
13173
+ used = 0
13174
+ return bool(used >= int(limit * WATCHDOG_CONTEXT_NEAR_RATIO))
13175
+
13176
+ def _watchdog_snapshot_payload(self, board: dict, reason: str, role: str, step: dict | None = None) -> str:
13177
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13178
+ profile = self._ensure_blackboard_task_profile(bb)
13179
+ code_rows = sorted(
13180
+ list((bb.get("code_artifacts", {}) or {}).items()),
13181
+ key=lambda item: float((item[1] or {}).get("updated_at", 0.0) if isinstance(item[1], dict) else 0.0),
13182
+ reverse=True,
13183
+ )
13184
+ payload = {
13185
+ "objective": trim(str(bb.get("original_goal", "") or "").strip(), 1800),
13186
+ "trigger_reason": trim(str(reason or "").strip(), 200),
13187
+ "active_role": self._sanitize_agent_role(role),
13188
+ "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
13189
+ "task_profile": {
13190
+ "task_type": str(profile.get("task_type", "general") or "general"),
13191
+ "complexity": str(profile.get("complexity", "simple") or "simple"),
13192
+ "direct_objective": trim(str(profile.get("direct_objective", "") or "").strip(), 600),
13193
+ },
13194
+ "latest_worker_step": {
13195
+ "status": str((step or {}).get("status", "") or ""),
13196
+ "text": trim(str((step or {}).get("text", "") or "").strip(), 600),
13197
+ },
13198
+ "code_artifacts": [
13199
+ {
13200
+ "path": str(path),
13201
+ "summary": trim(str((item or {}).get("summary", "") or "").strip(), 200),
13202
+ }
13203
+ for path, item in code_rows[:6]
13204
+ ],
13205
+ "recent_execution_logs": [
13206
+ trim(str((row or {}).get("content", "") or "").strip(), 220)
13207
+ for row in (bb.get("execution_logs", []) or [])[-4:]
13208
+ if isinstance(row, dict)
13209
+ ],
13210
+ "recent_review_feedback": [
13211
+ trim(str((row or {}).get("content", "") or "").strip(), 220)
13212
+ for row in (bb.get("review_feedback", []) or [])[-4:]
13213
+ if isinstance(row, dict)
13214
+ ],
13215
+ }
13216
+ return trim(json_dumps(payload, indent=2), 6000)
13217
+
13218
+ def _watchdog_fallback_steps(self, board: dict, reason: str) -> list[dict]:
13219
+ profile = self._ensure_blackboard_task_profile(board)
13220
+ objective = trim(str(profile.get("direct_objective", "") or "").strip(), 280) or trim(
13221
+ str(board.get("original_goal", "") or "").strip(),
13222
+ 280,
13223
+ )
13224
+ raw = [
13225
+ {
13226
+ "step": 1,
13227
+ "action_type": "research",
13228
+ "target": "explorer",
13229
+ "description": (
13230
+ "Analyze the latest blocker quickly and write concrete constraints to blackboard "
13231
+ f"(trigger={trim(reason, 120)})."
13232
+ ),
13233
+ },
13234
+ {
13235
+ "step": 2,
13236
+ "action_type": "implement",
13237
+ "target": "developer",
13238
+ "description": (
13239
+ "Implement one incremental fix for the current objective and provide verifiable tool output. "
13240
+ f"Objective: {objective}"
13241
+ ),
13242
+ },
13243
+ {
13244
+ "step": 3,
13245
+ "action_type": "validate",
13246
+ "target": "reviewer",
13247
+ "description": (
13248
+ "Run one validation pass, provide pass/fix verdict with evidence, and handoff summary request if needed."
13249
+ ),
13250
+ },
13251
+ ]
13252
+ return self._watchdog_normalize_steps(raw)
13253
+
13254
+ def _watchdog_decompose_steps(self, board: dict, reason: str, *, pinned_selection: str) -> tuple[list[dict], str, str]:
13255
+ snapshot = self._watchdog_snapshot_payload(board, reason, str(board.get("active_agent", "") or ""), None)
13256
+ objective = trim(str(board.get("original_goal", "") or "").strip(), 1600)
13257
+ system_prompt = (
13258
+ "You are a task decomposer. Your only job is to split OBJECTIVE into executable micro-steps. "
13259
+ "Return strict JSON array only: "
13260
+ "[{\"step\":1,\"action_type\":\"...\",\"target\":\"explorer|developer|reviewer\",\"description\":\"...\"}]. "
13261
+ "No markdown, no prose, no code fence."
13262
+ )
13263
+ user_prompt = (
13264
+ f"OBJECTIVE:\n{objective}\n\n"
13265
+ f"TRIGGER:\n{trim(reason, 220)}\n\n"
13266
+ "SNAPSHOT:\n"
13267
+ f"{snapshot}\n\n"
13268
+ "Rules: keep steps module-level (not line-by-line), use incremental edits, "
13269
+ "and keep total steps <= 12."
13270
+ )
13271
+ raw_text = ""
13272
+ parsed_steps: list[dict] = []
13273
+ try:
13274
+ rsp = self._chat_with_same_model_retry(
13275
+ [{"role": "user", "content": user_prompt, "ts": now_ts()}],
13276
+ tools=None,
13277
+ system=system_prompt,
13278
+ max_tokens=1200,
13279
+ think=False,
13280
+ stream_thinking=False,
13281
+ pinned_selection=pinned_selection,
13282
+ context_label="watchdog decomposer",
13283
+ retries=max(1, min(2, int(MODEL_OUTPUT_RETRY_TIMES))),
13284
+ )
13285
+ raw_text = str(rsp.get("content") or "")
13286
+ parsed_steps = self._watchdog_extract_json_array(raw_text)
13287
+ except Exception as exc:
13288
+ raw_text = f"decomposer-error: {trim(str(exc), 220)}"
13289
+ parsed_steps = []
13290
+ normalized = self._watchdog_normalize_steps(parsed_steps)
13291
+ if not normalized:
13292
+ normalized = self._watchdog_fallback_steps(board, reason)
13293
+ return normalized, snapshot, trim(raw_text, 2000)
13294
+
13295
+ def _watchdog_activate_decomposition(
13296
+ self,
13297
+ board: dict,
13298
+ *,
13299
+ reason: str,
13300
+ role: str,
13301
+ step: dict | None,
13302
+ pinned_selection: str,
13303
+ ) -> bool:
13304
+ dq = self._normalize_decomposition_queue_state(board.get("decomposition_queue", {}))
13305
+ if bool(dq.get("active", False)):
13306
+ return False
13307
+ steps, snapshot, raw_text = self._watchdog_decompose_steps(
13308
+ board,
13309
+ reason,
13310
+ pinned_selection=pinned_selection,
13311
+ )
13312
+ if not steps:
13313
+ return False
13314
+ dq = {
13315
+ "active": True,
13316
+ "trigger_reason": trim(str(reason or "").strip(), 200),
13317
+ "created_at": float(now_ts()),
13318
+ "cursor": 0,
13319
+ "steps": steps,
13320
+ "last_error": "",
13321
+ "snapshot": trim(snapshot, 4000),
13322
+ "decomposer_output": trim(raw_text, 2000),
13323
+ }
13324
+ wd = self._normalize_watchdog_state(board.get("watchdog", {}))
13325
+ wd["trigger_count"] = max(0, int(wd.get("trigger_count", 0) or 0)) + 1
13326
+ wd["last_trigger_reason"] = trim(str(reason or "").strip(), 200)
13327
+ wd["last_trigger_ts"] = float(now_ts())
13328
+ wd["intent_no_tool_streak"] = 0
13329
+ wd["repeat_no_tool_streak"] = 0
13330
+ board["watchdog"] = wd
13331
+ board["decomposition_queue"] = dq
13332
+ self.blackboard = board
13333
+ self._blackboard_touch()
13334
+ self._blackboard_history(
13335
+ "manager",
13336
+ trim(
13337
+ (
13338
+ "watchdog triggered decomposition "
13339
+ f"(reason={reason}, role={self._sanitize_agent_role(role)}, "
13340
+ f"steps={len(steps)})"
13341
+ ),
13342
+ 520,
13343
+ ),
13344
+ )
13345
+ self._emit(
13346
+ "status",
13347
+ {
13348
+ "summary": (
13349
+ "watchdog triggered; switched to stateless executor queue "
13350
+ f"(reason={trim(reason, 90)}, steps={len(steps)})"
13351
+ )
13352
+ },
13353
+ )
13354
+ return True
13355
+
13356
+ def _watchdog_pick_executor_route(self, board: dict | None = None) -> tuple[dict, dict] | None:
13357
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13358
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13359
+ if not bool(dq.get("active", False)):
13360
+ return None
13361
+ steps = list(dq.get("steps", []) or [])
13362
+ if not steps:
13363
+ return None
13364
+ cursor = max(0, int(dq.get("cursor", 0) or 0))
13365
+ while cursor < len(steps):
13366
+ status = str((steps[cursor] or {}).get("status", "") or "").strip().lower()
13367
+ if status not in {"done", "skipped"}:
13368
+ break
13369
+ cursor += 1
13370
+ if cursor >= len(steps):
13371
+ dq["active"] = False
13372
+ dq["cursor"] = len(steps)
13373
+ bb["decomposition_queue"] = dq
13374
+ self.blackboard = bb
13375
+ self._blackboard_touch()
13376
+ return None
13377
+ dq["cursor"] = cursor
13378
+ step_row = steps[cursor] if isinstance(steps[cursor], dict) else {}
13379
+ target = self._sanitize_agent_role(step_row.get("target", "")) or "developer"
13380
+ action_type = trim(str(step_row.get("action_type", "execute") or "execute").strip(), 80) or "execute"
13381
+ step_instruction = trim(str(step_row.get("instruction", "") or "").strip(), 900)
13382
+ trigger_reason = trim(str(dq.get("trigger_reason", "") or "").strip(), 180)
13383
+ total = len(steps)
13384
+ current = cursor + 1
13385
+ profile = self._ensure_blackboard_task_profile(bb)
13386
+ task_level = int(profile.get("task_level", self.runtime_task_level or 3) or 3)
13387
+ if task_level not in TASK_LEVEL_CHOICES:
13388
+ task_level = 3
13389
+ args = {
13390
+ "target": target,
13391
+ "instruction": trim(
13392
+ (
13393
+ f"Executor mode (stateless) step {current}/{total}. "
13394
+ f"trigger={trigger_reason or 'watchdog'}; action_type={action_type}.\n"
13395
+ f"{step_instruction}\n"
13396
+ "Rules: execute one concrete tool call now, keep scope narrow, "
13397
+ "and update blackboard evidence immediately."
13398
+ ),
13399
+ 1200,
13400
+ ),
13401
+ "task_level": int(task_level),
13402
+ "task_type": trim(str(profile.get("task_type", "general") or "general"), 40),
13403
+ "complexity": trim(str(profile.get("complexity", "simple") or "simple"), 20),
13404
+ "scale_preference": trim(str(profile.get("scale_preference", "balanced") or "balanced"), 20),
13405
+ "judgement": trim(
13406
+ f"watchdog-executor-step-{current}/{total}",
13407
+ 200,
13408
+ ),
13409
+ "round_budget": int(profile.get("round_budget", self.runtime_round_budget or self.max_agent_rounds) or 0),
13410
+ "direct_objective": trim(str(profile.get("direct_objective", self.runtime_direct_objective or "") or ""), 800),
13411
+ "execution_mode": normalize_execution_mode(
13412
+ profile.get("execution_mode", self._effective_execution_mode()),
13413
+ default=self._effective_execution_mode(),
13414
+ ),
13415
+ "participants": profile.get("participants", self.runtime_participants),
13416
+ "assigned_expert": profile.get("assigned_expert", self.runtime_assigned_expert or "developer"),
13417
+ "requires_user_confirmation": bool(profile.get("requires_user_confirmation", False)),
13418
+ "is_mandatory": True,
13419
+ "executor_mode": True,
13420
+ }
13421
+ bb["decomposition_queue"] = dq
13422
+ self.blackboard = bb
13423
+ self._blackboard_touch()
13424
+ meta = {
13425
+ "trigger_reason": trigger_reason,
13426
+ "cursor": current,
13427
+ "total": total,
13428
+ "target": target,
13429
+ "action_type": action_type,
13430
+ }
13431
+ return args, meta
13432
+
13433
+ def _watchdog_mark_step_progress(self, board: dict, role: str, step: dict | None) -> dict:
13434
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13435
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13436
+ out = {"queue_active": bool(dq.get("active", False)), "step_advanced": False}
13437
+ if not bool(dq.get("active", False)):
13438
+ bb["decomposition_queue"] = dq
13439
+ self.blackboard = bb
13440
+ return out
13441
+ rows = list(dq.get("steps", []) or [])
13442
+ cursor = max(0, int(dq.get("cursor", 0) or 0))
13443
+ if cursor >= len(rows):
13444
+ dq["active"] = False
13445
+ dq["cursor"] = len(rows)
13446
+ bb["decomposition_queue"] = dq
13447
+ self.blackboard = bb
13448
+ return {"queue_active": False, "step_advanced": False}
13449
+ current = rows[cursor] if isinstance(rows[cursor], dict) else {}
13450
+ target = self._sanitize_agent_role(current.get("target", "")) or "developer"
13451
+ role_key = self._sanitize_agent_role(role)
13452
+ if target != role_key:
13453
+ bb["decomposition_queue"] = dq
13454
+ self.blackboard = bb
13455
+ return out
13456
+ status = str((step or {}).get("status", "") or "").strip().lower()
13457
+ text = trim(strip_thinking_content(str((step or {}).get("text", "") or "").strip()), 1200)
13458
+ tool_results = (step or {}).get("tool_results", []) if isinstance((step or {}).get("tool_results"), list) else []
13459
+ has_ok_tool = any(isinstance(row, dict) and bool(row.get("ok", False)) for row in tool_results)
13460
+ success = bool(status == "tools" and has_ok_tool)
13461
+ if (not success) and status == "no-tools" and role_key in {"explorer", "reviewer"} and len(text) >= 120:
13462
+ success = True
13463
+ attempts = max(0, int(current.get("attempts", 0) or 0)) + 1
13464
+ current["attempts"] = attempts
13465
+ current["updated_at"] = float(now_ts())
13466
+ if success:
13467
+ current["status"] = "done"
13468
+ dq["cursor"] = cursor + 1
13469
+ out["step_advanced"] = True
13470
+ dq["last_error"] = ""
13471
+ elif status in {"no-tools", "tools", "skip"}:
13472
+ if attempts >= int(WATCHDOG_STEP_MAX_ATTEMPTS):
13473
+ current["status"] = "skipped"
13474
+ dq["cursor"] = cursor + 1
13475
+ out["step_advanced"] = True
13476
+ dq["last_error"] = trim(
13477
+ f"step {cursor + 1} skipped after {attempts} attempts ({status})",
13478
+ 300,
13479
+ )
13480
+ else:
13481
+ current["status"] = "retry"
13482
+ dq["last_error"] = trim(
13483
+ f"step {cursor + 1} retry pending ({status})",
13484
+ 300,
13485
+ )
13486
+ rows[cursor] = current
13487
+ dq["steps"] = rows
13488
+ if int(dq.get("cursor", 0) or 0) >= len(rows):
13489
+ dq["active"] = False
13490
+ out["queue_active"] = False
13491
+ self._emit("status", {"summary": "stateless executor queue drained; returning to normal manager routing"})
13492
+ else:
13493
+ out["queue_active"] = bool(dq.get("active", False))
13494
+ bb["decomposition_queue"] = dq
13495
+ self.blackboard = bb
13496
+ return out
13497
+
13498
+ def _watchdog_process_worker_step(
13499
+ self,
13500
+ board: dict,
13501
+ *,
13502
+ role: str,
13503
+ step: dict,
13504
+ state_changed: bool,
13505
+ pinned_selection: str,
13506
+ ) -> dict:
13507
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
13508
+ wd = self._normalize_watchdog_state(bb.get("watchdog", {}))
13509
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13510
+ status = str((step or {}).get("status", "") or "").strip().lower()
13511
+ text = trim(strip_thinking_content(str((step or {}).get("text", "") or "").strip()), 1200)
13512
+ wd["last_state_fp"] = self._watchdog_state_fingerprint(bb)
13513
+ if state_changed:
13514
+ wd["state_unchanged_streak"] = 0
13515
+ else:
13516
+ wd["state_unchanged_streak"] = max(0, int(wd.get("state_unchanged_streak", 0) or 0)) + 1
13517
+ if status == "tools":
13518
+ wd["intent_no_tool_streak"] = 0
13519
+ wd["repeat_no_tool_streak"] = 0
13520
+ wd["last_no_tool_text"] = ""
13521
+ wd["last_no_tool_hash"] = ""
13522
+ elif status == "no-tools":
13523
+ if self._watchdog_intent_without_action(text):
13524
+ wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0)) + 1
13525
+ else:
13526
+ wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0) - 1)
13527
+ prev_text = str(wd.get("last_no_tool_text", "") or "")
13528
+ sim = self._watchdog_similarity(prev_text, text)
13529
+ if sim >= float(WATCHDOG_REPEAT_SIMILARITY_THRESHOLD):
13530
+ wd["repeat_no_tool_streak"] = max(0, int(wd.get("repeat_no_tool_streak", 0) or 0)) + 1
13531
+ else:
13532
+ wd["repeat_no_tool_streak"] = 0
13533
+ wd["last_no_tool_text"] = text
13534
+ wd["last_no_tool_hash"] = hashlib.sha1(text.encode("utf-8")).hexdigest() if text else ""
13535
+ else:
13536
+ wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0) - 1)
13537
+ wd["repeat_no_tool_streak"] = max(0, int(wd.get("repeat_no_tool_streak", 0) or 0) - 1)
13538
+ bb["watchdog"] = wd
13539
+ bb["decomposition_queue"] = dq
13540
+ self.blackboard = bb
13541
+ progress_row = self._watchdog_mark_step_progress(bb, role, step)
13542
+ bb = self._ensure_blackboard()
13543
+ dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
13544
+ trigger_reason = ""
13545
+ if not bool(dq.get("active", False)):
13546
+ if int(wd.get("intent_no_tool_streak", 0) or 0) >= int(WATCHDOG_INTENT_NO_TOOL_THRESHOLD):
13547
+ trigger_reason = "intent-without-tool-call"
13548
+ elif int(wd.get("repeat_no_tool_streak", 0) or 0) >= int(WATCHDOG_REPEAT_NO_TOOL_THRESHOLD):
13549
+ trigger_reason = "repeated-no-tool-reply"
13550
+ elif (
13551
+ self._watchdog_context_near_limit()
13552
+ and int(wd.get("state_unchanged_streak", 0) or 0) >= int(WATCHDOG_CONTEXT_STALL_THRESHOLD)
13553
+ ):
13554
+ trigger_reason = "context-threshold-no-state-change"
13555
+ elif (
13556
+ status in {"no-tools", "skip"}
13557
+ and int(wd.get("state_unchanged_streak", 0) or 0) >= int(WATCHDOG_STATE_STALL_THRESHOLD)
13558
+ ):
13559
+ trigger_reason = "state-unchanged-stall"
13560
+ triggered = False
13561
+ if trigger_reason:
13562
+ try:
13563
+ last_trigger_ts = float(wd.get("last_trigger_ts", 0.0) or 0.0)
13564
+ except Exception:
13565
+ last_trigger_ts = 0.0
13566
+ if now_ts() - last_trigger_ts >= 1.0:
13567
+ triggered = self._watchdog_activate_decomposition(
13568
+ bb,
13569
+ reason=trigger_reason,
13570
+ role=role,
13571
+ step=step,
13572
+ pinned_selection=pinned_selection,
13573
+ )
13574
+ bb = self._ensure_blackboard()
13575
+ bb["watchdog"] = self._normalize_watchdog_state(bb.get("watchdog", wd))
13576
+ bb["decomposition_queue"] = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", dq))
13577
+ self.blackboard = bb
13578
+ self._blackboard_touch()
13579
+ return {
13580
+ "triggered": bool(triggered),
13581
+ "trigger_reason": trigger_reason,
13582
+ "queue_active": bool((bb.get("decomposition_queue", {}) or {}).get("active", False)),
13583
+ "step_advanced": bool(progress_row.get("step_advanced", False)),
13584
+ }
13585
+
13586
+ def _watchdog_execute_queue_step(self, *, pinned_selection: str) -> dict:
13587
+ board = self._ensure_blackboard()
13588
+ pick = self._watchdog_pick_executor_route(board)
13589
+ if not pick:
13590
+ dq = self._normalize_decomposition_queue_state(board.get("decomposition_queue", {}))
13591
+ return {"executed": False, "queue_active": bool(dq.get("active", False)), "stop_run": False, "interrupted": False}
13592
+ queue_args, meta = pick
13593
+ role = self._sanitize_agent_role((queue_args or {}).get("target", "")) or "developer"
13594
+ instruction = trim(str((queue_args or {}).get("instruction", "") or "").strip(), 1200)
13595
+ if not instruction:
13596
+ instruction = (
13597
+ "Executor mode step: call one concrete tool now, keep scope narrow, and update blackboard evidence."
13598
+ )
13599
+ self._inject_manager_instruction(role, instruction, is_mandatory=True, executor_mode=True)
13600
+ if role == "explorer":
13601
+ self._blackboard_set_status("RESEARCHING")
13602
+ elif role == "developer":
13603
+ self._blackboard_set_status("CODING")
13604
+ elif role == "reviewer":
13605
+ self._blackboard_set_status("REVIEWING")
13606
+ board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
13607
+ step = self._multi_agent_turn(
13608
+ role,
13609
+ pinned_selection=pinned_selection,
13610
+ media_inputs_round=None,
13611
+ )
13612
+ safe_step = step if isinstance(step, dict) else {}
13613
+ self._blackboard_update_from_worker_step(role, safe_step)
13614
+ board_after = self._ensure_blackboard()
13615
+ board_after_fp = self._watchdog_state_fingerprint(board_after)
13616
+ wd_event = self._watchdog_process_worker_step(
13617
+ board_after,
13618
+ role=role,
13619
+ step=safe_step,
13620
+ state_changed=bool(board_after_fp != board_before_fp),
13621
+ pinned_selection=pinned_selection,
13622
+ )
13623
+ status = str(safe_step.get("status", "") or "").strip().lower()
13624
+ interrupted = bool(status == "interrupted")
13625
+ stop_run = False
13626
+ finish_gate_reason = ""
13627
+ if status == "tools" and bool(safe_step.get("stop_due_to_finish", False)):
13628
+ note = f"{self._agent_display_name(role)} signaled finish via tool."
13629
+ # Approval note should come from finish tool payload sync; avoid overwriting with generic text here.
13630
+ can_finish_now, finish_gate_reason = self._can_auto_finish_from_approval(
13631
+ self._ensure_blackboard(),
13632
+ latest_user_ts=self._latest_user_message_ts(),
13633
+ )
13634
+ if can_finish_now:
13635
+ self._mark_all_done_silently(note)
13636
+ stop_run = True
13637
+ else:
13638
+ self._emit(
13639
+ "status",
13640
+ {
13641
+ "summary": (
13642
+ f"executor finish deferred by gate ({finish_gate_reason}); "
13643
+ "continue watchdog queue"
13644
+ )
13645
+ },
13646
+ )
13647
+ dq = self._normalize_decomposition_queue_state(self._ensure_blackboard().get("decomposition_queue", {}))
13648
+ return {
13649
+ "executed": True,
13650
+ "queue_active": bool(dq.get("active", False)),
13651
+ "stop_run": bool(stop_run),
13652
+ "interrupted": bool(interrupted),
13653
+ "role": role,
13654
+ "status": status,
13655
+ "wd_event": wd_event,
13656
+ "trigger_reason": trim(str(meta.get("trigger_reason", "") or "").strip(), 120),
13657
+ "finish_gate_reason": finish_gate_reason,
13658
+ }
13659
+
13660
+ def _new_blackboard(self, goal: str = "") -> dict:
13661
+ profile = self._normalize_task_profile(goal, {})
13662
+ progress = "done" if str(profile.get("task_type", "") or "") == "simple_qa" and not str(goal or "").strip() else "initializing"
13663
+ return {
13664
+ "version": 1,
13665
+ "updated_at": float(now_ts()),
13666
+ "original_goal": trim(str(goal or "").strip(), 4000),
13667
+ "research_notes": [],
13668
+ "code_artifacts": {},
13669
+ "execution_logs": [],
13670
+ "review_feedback": [],
13671
+ "conversation_history": [],
13672
+ "status": "INITIALIZING",
13673
+ "approval": {
13674
+ "approved": False,
13675
+ "by": "",
13676
+ "note": "",
13677
+ "ts": 0.0,
13678
+ },
13679
+ "manager_cycles": 0,
13680
+ "manager_summary_attempts": 0,
13681
+ "active_agent": "",
13682
+ "last_delegate": {
13683
+ "target": "",
13684
+ "instruction": "",
13685
+ "reason": "",
13686
+ "source": "",
13687
+ "is_mandatory": False,
13688
+ "ts": 0.0,
13689
+ },
13690
+ "task_profile": profile,
13691
+ "manager_judgement": {
13692
+ "task_type": str(profile.get("task_type", "general")),
13693
+ "complexity": str(profile.get("complexity", "simple")),
13694
+ "scale_preference": str(profile.get("scale_preference", "balanced") or "balanced"),
13695
+ "progress": progress,
13696
+ "remaining_rounds": (
13697
+ -1
13698
+ if int(profile.get("round_budget", 0) or 0) <= 0
13699
+ else int(profile.get("round_budget", 1) or 1)
13700
+ ),
13701
+ "updated_at": float(now_ts()),
13702
+ },
13703
+ "last_worker_reply": {
13704
+ "role": "",
13705
+ "text": "",
13706
+ "ts": 0.0,
13707
+ },
13708
+ "watchdog": self._new_watchdog_state(),
13709
+ "decomposition_queue": self._new_decomposition_queue_state(),
13710
+ }
13711
+
13712
+ def _normalize_blackboard(self, raw: object) -> dict:
13713
+ src = raw if isinstance(raw, dict) else {}
13714
+ board = self._new_blackboard(str(src.get("original_goal", "") or ""))
13715
+ try:
13716
+ board["version"] = int(src.get("version", 1) or 1)
13717
+ except Exception:
13718
+ board["version"] = 1
13719
+ board["updated_at"] = float(src.get("updated_at", now_ts()) or now_ts())
13720
+ board["status"] = self._normalize_blackboard_status(src.get("status", board["status"]))
13721
+ board["manager_cycles"] = max(0, int(src.get("manager_cycles", 0) or 0))
13722
+ board["manager_summary_attempts"] = max(0, int(src.get("manager_summary_attempts", 0) or 0))
13723
+ board["active_agent"] = self._sanitize_agent_role(src.get("active_agent", ""))
13724
+ raw_delegate = src.get("last_delegate", {})
13725
+ if isinstance(raw_delegate, dict):
13726
+ board["last_delegate"] = {
13727
+ "target": str(raw_delegate.get("target", "") or "").strip().lower(),
13728
+ "instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
13729
+ "reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
13730
+ "source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
13731
+ "is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
13732
+ "ts": float(raw_delegate.get("ts", 0.0) or 0.0),
13733
+ }
13734
+ raw_approval = src.get("approval", {})
13735
+ if isinstance(raw_approval, dict):
13736
+ board["approval"] = {
13737
+ "approved": bool(raw_approval.get("approved", False)),
13738
+ "by": trim(str(raw_approval.get("by", "") or "").strip(), 60),
13739
+ "note": trim(str(raw_approval.get("note", "") or "").strip(), 1000),
13740
+ "ts": float(raw_approval.get("ts", 0.0) or 0.0),
13741
+ }
13742
+ board["task_profile"] = self._normalize_task_profile(
13743
+ str(board.get("original_goal", "") or ""),
13744
+ src.get("task_profile", {}),
13745
+ )
13746
+ raw_judgement = src.get("manager_judgement", {})
13747
+ if isinstance(raw_judgement, dict):
13748
+ board["manager_judgement"] = {
13749
+ "task_type": trim(
13750
+ str(raw_judgement.get("task_type", board["task_profile"].get("task_type", "")) or "").strip(),
12761
13751
  40,
12762
13752
  ),
12763
13753
  "complexity": (
@@ -12866,6 +13856,10 @@ class SessionState:
12866
13856
  "change_count": max(1, int(item.get("change_count", 1) or 1)),
12867
13857
  }
12868
13858
  board["code_artifacts"] = artifacts
13859
+ board["watchdog"] = self._normalize_watchdog_state(src.get("watchdog", {}))
13860
+ board["decomposition_queue"] = self._normalize_decomposition_queue_state(
13861
+ src.get("decomposition_queue", {})
13862
+ )
12869
13863
  return board
12870
13864
 
12871
13865
  def _ensure_blackboard(self) -> dict:
@@ -13247,6 +14241,11 @@ class SessionState:
13247
14241
  goal = trim(str(board.get("original_goal", "") or "").strip(), 1800)
13248
14242
  status = self._normalize_blackboard_status(board.get("status", "INITIALIZING"))
13249
14243
  delegate = board.get("last_delegate", {}) if isinstance(board.get("last_delegate"), dict) else {}
14244
+ watchdog = board.get("watchdog", {}) if isinstance(board.get("watchdog"), dict) else {}
14245
+ dq = board.get("decomposition_queue", {}) if isinstance(board.get("decomposition_queue"), dict) else {}
14246
+ dq_steps = dq.get("steps", []) if isinstance(dq.get("steps"), list) else []
14247
+ dq_cursor = max(0, int(dq.get("cursor", 0) or 0))
14248
+ dq_total = len(dq_steps)
13250
14249
  lines = [
13251
14250
  "## Blackboard State",
13252
14251
  f"- status: {status}",
@@ -13269,6 +14268,19 @@ class SessionState:
13269
14268
  f"- active_agent: {board.get('active_agent', '') or '(none)'}",
13270
14269
  f"- manager_cycles: {int(board.get('manager_cycles', 0) or 0)}",
13271
14270
  f"- manager_summary_attempts: {int(board.get('manager_summary_attempts', 0) or 0)}",
14271
+ (
14272
+ "- watchdog: "
14273
+ f"intent_no_tool={int(watchdog.get('intent_no_tool_streak', 0) or 0)}, "
14274
+ f"repeat_no_tool={int(watchdog.get('repeat_no_tool_streak', 0) or 0)}, "
14275
+ f"state_unchanged={int(watchdog.get('state_unchanged_streak', 0) or 0)}, "
14276
+ f"trigger_count={int(watchdog.get('trigger_count', 0) or 0)}"
14277
+ ),
14278
+ (
14279
+ "- decomposition_queue: "
14280
+ f"active={bool(dq.get('active', False))}, "
14281
+ f"cursor={dq_cursor}, total={dq_total}, "
14282
+ f"trigger_reason={trim(str(dq.get('trigger_reason', '') or ''), 140)}"
14283
+ ),
13272
14284
  (
13273
14285
  "- manager_judgement: "
13274
14286
  f"{trim(str(judgement.get('progress', 'initializing') or ''), 40)}"
@@ -13357,6 +14369,7 @@ class SessionState:
13357
14369
  "assigned_expert": {"type": "string", "enum": list(AGENT_ROLES)},
13358
14370
  "requires_user_confirmation": {"type": "boolean"},
13359
14371
  "is_mandatory": {"type": "boolean"},
14372
+ "executor_mode": {"type": "boolean"},
13360
14373
  },
13361
14374
  ["target", "instruction"],
13362
14375
  )
@@ -13375,6 +14388,8 @@ class SessionState:
13375
14388
  "task_type": {"type": "string"},
13376
14389
  "complexity": {"type": "string", "enum": list(TASK_COMPLEXITY_LEVELS)},
13377
14390
  "scale_preference": {"type": "string", "enum": list(TASK_SCALE_PREFERENCES)},
14391
+ "semantic_confidence": {"type": "string", "enum": list(SEMANTIC_CONFIDENCE_CHOICES)},
14392
+ "low_confidence_reason": {"type": "string"},
13378
14393
  "inherit_previous_state": {"type": "boolean"},
13379
14394
  "judgement": {"type": "string"},
13380
14395
  "round_budget": {"type": "integer"},
@@ -13398,6 +14413,68 @@ class SessionState:
13398
14413
  yes_tokens = ("继续", "确认", "开始", "执行", "同意", "go ahead", "proceed", "continue", "yes")
13399
14414
  return any(tok in low for tok in yes_tokens)
13400
14415
 
14416
+ def _normalize_semantic_confidence(self, raw: object, *, default: str = "medium") -> str:
14417
+ value = str(raw or "").strip().lower()
14418
+ if value in SEMANTIC_CONFIDENCE_CHOICES:
14419
+ return value
14420
+ return default if default in SEMANTIC_CONFIDENCE_CHOICES else "medium"
14421
+
14422
+ def _merge_task_decision_for_low_confidence(self, llm_row: dict, fallback_row: dict) -> dict:
14423
+ merged = dict(fallback_row or {})
14424
+ row = llm_row if isinstance(llm_row, dict) else {}
14425
+ if bool(row.get("inherit_previous_state", False)):
14426
+ merged["inherit_previous_state"] = True
14427
+ try:
14428
+ lvl = int(row.get("level", 0) or 0)
14429
+ except Exception:
14430
+ lvl = 0
14431
+ if lvl in TASK_LEVEL_CHOICES:
14432
+ merged["level"] = int(lvl)
14433
+ task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
14434
+ if task_type in TASK_PROFILE_TYPES:
14435
+ merged["task_type"] = task_type
14436
+ complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
14437
+ if complexity in TASK_COMPLEXITY_LEVELS:
14438
+ merged["complexity"] = complexity
14439
+ scale = trim(str(row.get("scale_preference", "") or "").strip().lower(), 20)
14440
+ if scale in TASK_SCALE_PREFERENCES:
14441
+ merged["scale_preference"] = scale
14442
+ mode = normalize_execution_mode(row.get("execution_mode", ""), default="")
14443
+ if mode in EXECUTION_MODE_CHOICES:
14444
+ merged["execution_mode"] = mode
14445
+ assigned = self._sanitize_agent_role(row.get("assigned_expert", ""))
14446
+ if assigned:
14447
+ merged["assigned_expert"] = assigned
14448
+ raw_participants = row.get("participants", [])
14449
+ participants: list[str] = []
14450
+ if isinstance(raw_participants, list):
14451
+ for item in raw_participants:
14452
+ role = self._sanitize_agent_role(item)
14453
+ if role and role not in participants:
14454
+ participants.append(role)
14455
+ if participants:
14456
+ merged["participants"] = participants[:3]
14457
+ try:
14458
+ budget = int(row.get("round_budget", 0) or 0)
14459
+ except Exception:
14460
+ budget = 0
14461
+ if budget > 0:
14462
+ merged["round_budget"] = int(
14463
+ max(1, min(int(self.max_agent_rounds or MAX_AGENT_ROUNDS), int(budget)))
14464
+ )
14465
+ if bool(row.get("requires_user_confirmation", False)):
14466
+ merged["requires_user_confirmation"] = True
14467
+ objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
14468
+ if objective:
14469
+ merged["direct_objective"] = objective
14470
+ judgement = trim(str(row.get("judgement", "") or "").strip(), 200)
14471
+ if judgement:
14472
+ merged["judgement"] = judgement
14473
+ merged["semantic_confidence"] = self._normalize_semantic_confidence(row.get("semantic_confidence", "low"), default="low")
14474
+ merged["low_confidence_reason"] = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
14475
+ merged["source"] = "manager-low-confidence+fallback"
14476
+ return merged
14477
+
13401
14478
  def _fallback_task_level_decision(self, goal_text: str) -> dict:
13402
14479
  profile = self._infer_task_profile(goal_text)
13403
14480
  task_type = str(profile.get("task_type", "general") or "general")
@@ -13501,6 +14578,8 @@ class SessionState:
13501
14578
  "participants": list(inherited_participants),
13502
14579
  "assigned_expert": inherited_assigned,
13503
14580
  "requires_user_confirmation": bool(inherited_requires_confirmation if inherited_level == 5 else False),
14581
+ "semantic_confidence": "low",
14582
+ "low_confidence_reason": "rule fallback inherited previous runtime state",
13504
14583
  "source": "fallback",
13505
14584
  }
13506
14585
  level = 3
@@ -13537,6 +14616,8 @@ class SessionState:
13537
14616
  "participants": participants,
13538
14617
  "assigned_expert": assigned,
13539
14618
  "requires_user_confirmation": bool(requires_confirmation),
14619
+ "semantic_confidence": "low",
14620
+ "low_confidence_reason": "rule fallback classification",
13540
14621
  "source": "fallback",
13541
14622
  }
13542
14623
 
@@ -13559,7 +14640,9 @@ class SessionState:
13559
14640
  "If user clearly indicates speed vs completeness preference, that preference has higher priority than your default strategy. "
13560
14641
  "Budgets are internal efficiency controls to reduce overthinking and idle loops; "
13561
14642
  "they must not be treated as a user-visible early-stop reason. "
13562
- "Output exactly one classify_task_level tool call with concise judgement and inherit_previous_state. "
14643
+ "Output exactly one classify_task_level tool call with concise judgement, inherit_previous_state, "
14644
+ "and semantic_confidence(high|medium|low). "
14645
+ "Use low confidence only when semantic ambiguity is substantial, then set low_confidence_reason briefly. "
13563
14646
  f"{model_language_instruction(self.ui_language)}"
13564
14647
  )
13565
14648
 
@@ -13657,16 +14740,55 @@ class SessionState:
13657
14740
  participants = normalized_participants[:3] or [assigned]
13658
14741
  if assigned not in participants:
13659
14742
  assigned = participants[0]
14743
+ semantic_confidence = self._normalize_semantic_confidence(
14744
+ row.get("semantic_confidence", "medium"),
14745
+ default="medium",
14746
+ )
14747
+ decision_source = trim(str(row.get("source", "") or "").strip().lower(), 80)
14748
+ low_confidence_mode = bool(
14749
+ str(semantic_confidence or "medium") == "low"
14750
+ or decision_source.startswith("fallback")
14751
+ or "low-confidence" in decision_source
14752
+ )
14753
+ if low_confidence_mode:
14754
+ rule_profile = self._infer_task_profile(goal_text)
14755
+ fallback_task_type = str(rule_profile.get("task_type", "general") or "general")
14756
+ fallback_complexity = str(rule_profile.get("complexity", "simple") or "simple")
14757
+ fallback_objective = trim(str(rule_profile.get("direct_objective", "") or ""), 800)
14758
+ else:
14759
+ board_now = self._ensure_blackboard()
14760
+ board_profile = board_now.get("task_profile", {}) if isinstance(board_now.get("task_profile"), dict) else {}
14761
+ fallback_task_type = trim(
14762
+ str(self.runtime_task_type or board_profile.get("task_type", "general") or "general"),
14763
+ 40,
14764
+ )
14765
+ if fallback_task_type not in TASK_PROFILE_TYPES:
14766
+ fallback_task_type = "general"
14767
+ fallback_complexity = trim(
14768
+ str(self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple"),
14769
+ 20,
14770
+ )
14771
+ if fallback_complexity not in TASK_COMPLEXITY_LEVELS:
14772
+ fallback_complexity = "simple"
14773
+ fallback_objective = trim(
14774
+ str(self.runtime_direct_objective or board_profile.get("direct_objective", "") or "").strip(),
14775
+ 800,
14776
+ )
14777
+ if not fallback_objective:
14778
+ fallback_objective = (
14779
+ "Proceed with direct semantic objective and concrete progress for the current request."
14780
+ )
13660
14781
  task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
13661
14782
  if task_type not in TASK_PROFILE_TYPES:
13662
- task_type = str(self._infer_task_profile(goal_text).get("task_type", "general"))
14783
+ task_type = fallback_task_type
13663
14784
  complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
13664
14785
  if complexity not in TASK_COMPLEXITY_LEVELS:
13665
- complexity = str(self._infer_task_profile(goal_text).get("complexity", "simple"))
14786
+ complexity = fallback_complexity
14787
+ low_confidence_reason = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
13666
14788
  judgement = trim(str(row.get("judgement", "") or "").strip(), 200) or "manager classified task level"
13667
14789
  objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
13668
14790
  if not objective:
13669
- objective = trim(str(self._infer_task_profile(goal_text).get("direct_objective", "") or ""), 800)
14791
+ objective = fallback_objective
13670
14792
  self.runtime_task_level = int(level)
13671
14793
  self.runtime_execution_mode = mode
13672
14794
  self.runtime_assigned_expert = assigned
@@ -13694,6 +14816,8 @@ class SessionState:
13694
14816
  profile["direct_objective"] = objective
13695
14817
  profile["round_budget"] = int(round_budget)
13696
14818
  profile["inherit_previous_state"] = bool(inherit_previous_state)
14819
+ profile["semantic_confidence"] = semantic_confidence
14820
+ profile["low_confidence_reason"] = low_confidence_reason
13697
14821
  profile["recommended_agents"] = list(participants)
13698
14822
  profile["reason"] = trim(str(row.get("judgement", "") or row.get("source", "manager")), 400)
13699
14823
  profile["updated_at"] = float(now_ts())
@@ -13709,6 +14833,8 @@ class SessionState:
13709
14833
  "execution_mode": mode,
13710
14834
  "participants": list(participants),
13711
14835
  "assigned_expert": assigned,
14836
+ "semantic_confidence": semantic_confidence,
14837
+ "low_confidence_reason": low_confidence_reason,
13712
14838
  "updated_at": float(now_ts()),
13713
14839
  }
13714
14840
  board["active_agent"] = assigned if mode == EXECUTION_MODE_SINGLE else ""
@@ -13721,7 +14847,8 @@ class SessionState:
13721
14847
  "summary": (
13722
14848
  f"manager classified: L{level} "
13723
14849
  f"mode={mode} scale={scale_preference} participants={','.join(participants)} "
13724
- f"expert={assigned} budget={'unlimited' if int(round_budget) <= 0 else int(round_budget)}"
14850
+ f"expert={assigned} budget={'unlimited' if int(round_budget) <= 0 else int(round_budget)} "
14851
+ f"confidence={semantic_confidence}"
13725
14852
  )
13726
14853
  },
13727
14854
  )
@@ -13779,10 +14906,20 @@ class SessionState:
13779
14906
  row.get("inherit_previous_state", False),
13780
14907
  default=False,
13781
14908
  )
14909
+ row["semantic_confidence"] = self._normalize_semantic_confidence(
14910
+ row.get("semantic_confidence", "medium"),
14911
+ default="medium",
14912
+ )
14913
+ if str(row.get("semantic_confidence", "medium")) == "low":
14914
+ fallback_row = self._fallback_task_level_decision(goal_text)
14915
+ merged = self._merge_task_decision_for_low_confidence(row, fallback_row)
14916
+ return merged
13782
14917
  row["source"] = "manager"
13783
14918
  return row
13784
14919
  row = self._fallback_task_level_decision(goal_text)
13785
- row["source"] = "fallback"
14920
+ row["source"] = "fallback-no-toolcall"
14921
+ row["semantic_confidence"] = "low"
14922
+ row["low_confidence_reason"] = "manager classifier returned no valid tool call"
13786
14923
  return row
13787
14924
 
13788
14925
  def _refresh_runtime_task_policy(
@@ -14038,7 +15175,7 @@ class SessionState:
14038
15175
  "reason": "forced-finish-budget-exhausted",
14039
15176
  "source": "fallback",
14040
15177
  }
14041
- if finish_gate_reason == "reviewer-summary-missing" and summary_attempts >= 1:
15178
+ if finish_gate_reason == "reviewer-summary-missing" and summary_attempts >= 2:
14042
15179
  self._emit("status", {"summary": "Summary generation attempted; forcing finish now."})
14043
15180
  return {
14044
15181
  "target": "finish",
@@ -14072,13 +15209,28 @@ class SessionState:
14072
15209
  "source": "fallback",
14073
15210
  }
14074
15211
  if finish_gate_reason == "reviewer-summary-missing":
14075
- board["manager_summary_attempts"] = summary_attempts + 1
15212
+ next_attempt = summary_attempts + 1
15213
+ board["manager_summary_attempts"] = next_attempt
14076
15214
  self.blackboard = board
15215
+ if next_attempt >= 2:
15216
+ return {
15217
+ "target": "explorer",
15218
+ "instruction": (
15219
+ "Reviewer summary is still missing. Read blackboard sections "
15220
+ "(code_artifacts, execution_logs, review_feedback, status) and write one structured "
15221
+ "final summary to blackboard (changes, validation evidence, residual risks/next steps). "
15222
+ "Do not call finish tool in this step."
15223
+ ),
15224
+ "reason": "approval-missing-summary-handoff-explorer",
15225
+ "source": "fallback",
15226
+ "is_mandatory": True,
15227
+ }
14077
15228
  return {
14078
15229
  "target": "reviewer",
14079
15230
  "instruction": (
14080
- "Review approved but final summary required. Write one final wrap-up summary from "
14081
- "blackboard evidence (changes, validation, residual risks/next steps), and then finish."
15231
+ "Review approved but final summary required. First call read_from_blackboard for "
15232
+ "code_artifacts/execution_logs/review_feedback/status, then call finish_task with summary "
15233
+ "including changes, validation evidence, and residual risks/next steps."
14082
15234
  ),
14083
15235
  "reason": "approval-missing-reviewer-summary-request",
14084
15236
  "source": "fallback",
@@ -14241,6 +15393,8 @@ class SessionState:
14241
15393
 
14242
15394
  def _manager_apply_anti_stall(self, route: dict) -> dict:
14243
15395
  row = dict(route or {})
15396
+ if bool(row.get("executor_mode", False)):
15397
+ return row
14244
15398
  if str(row.get("task_type", "") or "").strip().lower() == "simple_qa":
14245
15399
  return row
14246
15400
  target = str(row.get("target", "") or "").strip().lower()
@@ -14282,6 +15436,7 @@ class SessionState:
14282
15436
 
14283
15437
  def _manager_apply_task_policy(self, route: dict) -> dict:
14284
15438
  row = dict(route or {})
15439
+ executor_mode_flag = _to_bool_like(row.get("executor_mode", False), default=False)
14285
15440
  board = self._ensure_blackboard()
14286
15441
  latest_user_ts = self._latest_user_message_ts()
14287
15442
  self._invalidate_stale_approval_if_needed(
@@ -14330,7 +15485,13 @@ class SessionState:
14330
15485
  if target not in MANAGER_ROUTE_TARGETS:
14331
15486
  target = assigned_expert if mode == EXECUTION_MODE_SINGLE else "developer"
14332
15487
  if target in AGENT_ROLES and target not in participants:
14333
- target = participants[0]
15488
+ if executor_mode_flag:
15489
+ if len(participants) < 3:
15490
+ participants.append(target)
15491
+ else:
15492
+ participants[-1] = target
15493
+ else:
15494
+ target = participants[0]
14334
15495
  instruction = trim(str(row.get("instruction", "") or "").strip(), 1200)
14335
15496
  if not instruction:
14336
15497
  instruction = "Proceed with one concrete next step and report evidence."
@@ -14381,22 +15542,19 @@ class SessionState:
14381
15542
  board,
14382
15543
  latest_user_ts=latest_user_ts,
14383
15544
  )
15545
+ board_status = self._normalize_blackboard_status(board.get("status", "INITIALIZING"))
15546
+ code_count = len(board.get("code_artifacts", {}) or {})
15547
+ research_count = len(board.get("research_notes", []) or [])
15548
+ feedback_pass = self._manager_feedback_passed_from_blackboard(board)
14384
15549
  summary_attempts = int(board.get("manager_summary_attempts", 0) or 0)
14385
15550
  force_finish_override = False
14386
- if remaining == 0:
14387
- force_finish_override = True
14388
- target = "finish"
14389
- instruction = "Maximum rounds reached. Generate final summary and finish immediately."
14390
- row["reason"] = "forced-finish-budget"
14391
- row["source"] = "policy"
14392
- self._emit("status", {"summary": "Round budget exhausted; forcing finish."})
14393
15551
  if bool((board.get("approval", {}) or {}).get("approved", False)) and can_finish_from_approval:
14394
15552
  target = "finish"
14395
15553
  if not instruction:
14396
15554
  instruction = "Review already approved; finish now."
14397
- if target == "finish" and (not can_finish_from_approval) and (not force_finish_override):
15555
+ if target == "finish" and (not can_finish_from_approval):
14398
15556
  if finish_gate_reason == "reviewer-summary-missing":
14399
- if summary_attempts >= 1:
15557
+ if summary_attempts >= 2:
14400
15558
  force_finish_override = True
14401
15559
  target = "finish"
14402
15560
  instruction = (
@@ -14406,13 +15564,27 @@ class SessionState:
14406
15564
  row["reason"] = "forced-finish-summary-max-retry"
14407
15565
  row["source"] = "policy"
14408
15566
  self._emit("status", {"summary": "Summary retry limit reached; forcing finish."})
15567
+ elif summary_attempts >= 1:
15568
+ board["manager_summary_attempts"] = summary_attempts + 1
15569
+ self.blackboard = board
15570
+ target = "explorer"
15571
+ instruction = (
15572
+ "Reviewer summary is still missing. Read blackboard sections "
15573
+ "(code_artifacts, execution_logs, review_feedback, status) and write one structured "
15574
+ "final summary to blackboard: changes, validation evidence, residual risks/next steps. "
15575
+ "Do not call finish tool in this step."
15576
+ )
15577
+ row["reason"] = "finish-blocked-summary-handoff-explorer"
15578
+ row["source"] = "policy"
15579
+ self._emit("status", {"summary": "Reviewer summary missing; handoff to explorer synthesis."})
14409
15580
  else:
14410
15581
  board["manager_summary_attempts"] = summary_attempts + 1
14411
15582
  self.blackboard = board
14412
15583
  target = "reviewer"
14413
15584
  instruction = (
14414
- "Generate final summary report covering implemented outputs, validation evidence, "
14415
- "and residual risks/next steps. This is the final step before completion."
15585
+ "Generate final summary report from blackboard evidence. First call read_from_blackboard "
15586
+ "(code_artifacts, execution_logs, review_feedback, status), then call finish_task.summary "
15587
+ "including changes, validation evidence, and residual risks/next steps."
14416
15588
  )
14417
15589
  row["reason"] = "finish-blocked-summary-request"
14418
15590
  row["source"] = "policy"
@@ -14445,10 +15617,35 @@ class SessionState:
14445
15617
  "Resolve errors and provide verifiable evidence."
14446
15618
  )
14447
15619
  else:
14448
- instruction = (
14449
- "Do not finish yet. Completion requires fresh reviewer approval for the current user request. "
14450
- "Continue with one concrete step and update blackboard."
14451
- )
15620
+ has_outputs = bool(code_count > 0 or research_count > 0)
15621
+ if board_status == "COMPLETED" and has_outputs:
15622
+ force_finish_override = True
15623
+ target = "finish"
15624
+ instruction = (
15625
+ "Task is already in COMPLETED state with concrete outputs. "
15626
+ "Generate final summary from blackboard (changes, validation evidence, residual "
15627
+ "risks/next steps) and finish now."
15628
+ )
15629
+ row["reason"] = "finish-blocked-completed-auto-summary-close"
15630
+ row["source"] = "policy"
15631
+ self._emit(
15632
+ "status",
15633
+ {"summary": "Completion gate unresolved but board is COMPLETED; auto-closing with final summary."},
15634
+ )
15635
+ elif feedback_pass and has_outputs:
15636
+ force_finish_override = True
15637
+ target = "finish"
15638
+ instruction = (
15639
+ "Reviewer feedback already passed with concrete outputs. "
15640
+ "Generate final summary and finish now."
15641
+ )
15642
+ row["reason"] = "finish-blocked-feedback-pass-auto-close"
15643
+ row["source"] = "policy"
15644
+ else:
15645
+ instruction = (
15646
+ "Do not finish yet. Completion requires fresh reviewer approval for the current user request. "
15647
+ "Continue with one concrete step and update blackboard."
15648
+ )
14452
15649
  if finish_gate_reason != "reviewer-summary-missing":
14453
15650
  self._emit(
14454
15651
  "status",
@@ -14477,6 +15674,7 @@ class SessionState:
14477
15674
  is_mandatory = True
14478
15675
  if target == "finish":
14479
15676
  is_mandatory = False
15677
+ executor_mode_flag = False
14480
15678
  row.update(
14481
15679
  {
14482
15680
  "target": target,
@@ -14491,6 +15689,7 @@ class SessionState:
14491
15689
  "participants": list(participants),
14492
15690
  "assigned_expert": assigned_expert,
14493
15691
  "is_mandatory": bool(is_mandatory),
15692
+ "executor_mode": bool(executor_mode_flag and target in AGENT_ROLES),
14494
15693
  "requires_user_confirmation": bool(
14495
15694
  row.get(
14496
15695
  "requires_user_confirmation",
@@ -14535,6 +15734,7 @@ class SessionState:
14535
15734
  "assigned_expert": trim(str(args.get("assigned_expert", "") or "").strip().lower(), 20),
14536
15735
  "requires_user_confirmation": bool(args.get("requires_user_confirmation", False)),
14537
15736
  "is_mandatory": _to_bool_like(args.get("is_mandatory", False), default=False),
15737
+ "executor_mode": _to_bool_like(args.get("executor_mode", False), default=False),
14538
15738
  "round_budget": args.get("round_budget", 0),
14539
15739
  "reason": trim(str(text or "").strip(), 600),
14540
15740
  "source": "tool",
@@ -14563,6 +15763,7 @@ class SessionState:
14563
15763
  objective, _ = self._split_language_policy_from_text(objective_raw, max_len=800)
14564
15764
  instruction, _ = self._split_language_policy_from_text(instruction_raw, max_len=1200)
14565
15765
  is_mandatory = bool(row.get("is_mandatory", False))
15766
+ is_executor = bool(row.get("executor_mode", False))
14566
15767
  round_budget = int(row.get("round_budget", 0) or 0)
14567
15768
  remaining = int(row.get("remaining_rounds", -1) or -1)
14568
15769
  budget_text = "unlimited" if round_budget <= 0 else str(round_budget)
@@ -14571,7 +15772,11 @@ class SessionState:
14571
15772
  lines = [
14572
15773
  f"Manager -> {target_label}",
14573
15774
  f"L{task_level if task_level in TASK_LEVEL_CHOICES else '-'} | {mode} | {task_type}/{complexity} | scale={scale}",
14574
- f"mandatory={'yes' if is_mandatory else 'no'} | budget={budget_text} | remaining={remaining_text}",
15775
+ (
15776
+ f"mandatory={'yes' if is_mandatory else 'no'}"
15777
+ f" | executor={'yes' if is_executor else 'no'}"
15778
+ f" | budget={budget_text} | remaining={remaining_text}"
15779
+ ),
14575
15780
  ]
14576
15781
  if objective:
14577
15782
  lines.append(f"objective: {objective}")
@@ -14587,6 +15792,7 @@ class SessionState:
14587
15792
  "complexity": complexity,
14588
15793
  "scale_preference": scale,
14589
15794
  "is_mandatory": is_mandatory,
15795
+ "executor_mode": is_executor,
14590
15796
  "round_budget": round_budget,
14591
15797
  "remaining_rounds": remaining,
14592
15798
  "direct_objective": objective,
@@ -14611,21 +15817,24 @@ class SessionState:
14611
15817
  board["manager_cycles"] = int(board.get("manager_cycles", 0) or 0) + 1
14612
15818
  text = ""
14613
15819
  tool_calls: list[dict] = []
15820
+ used_watchdog_executor = False
15821
+ watchdog_meta: dict = {}
15822
+ watchdog_pick = self._watchdog_pick_executor_route(board)
14614
15823
  used_agentbus_fast = False
14615
15824
  fast_meta: dict = {}
14616
- fast_pick = self._manager_pick_agentbus_fast_route(board)
14617
- if fast_pick:
14618
- used_agentbus_fast = True
14619
- fast_args, fast_meta = fast_pick
15825
+ if watchdog_pick:
15826
+ used_watchdog_executor = True
15827
+ queue_args, watchdog_meta = watchdog_pick
14620
15828
  with self.lock:
14621
- self.current_phase = "manager:agentbus-fast-route"
15829
+ self.current_phase = "manager:watchdog-executor-route"
14622
15830
  self.current_tool_name = ""
14623
15831
  self.active_agent_role = "manager"
14624
15832
  text = trim(
14625
15833
  (
14626
- "agentbus fast-route "
14627
- f"{fast_meta.get('from', '?')}->{fast_meta.get('to', '?')} "
14628
- f"intent={fast_meta.get('intent', 'message')} id={fast_meta.get('env_id', '-')}"
15834
+ "watchdog executor route "
15835
+ f"step={int(watchdog_meta.get('cursor', 0) or 0)}/{int(watchdog_meta.get('total', 0) or 0)} "
15836
+ f"target={watchdog_meta.get('target', '?')} "
15837
+ f"trigger={watchdog_meta.get('trigger_reason', '') or '?'}"
14629
15838
  ),
14630
15839
  600,
14631
15840
  )
@@ -14635,7 +15844,7 @@ class SessionState:
14635
15844
  "type": "function",
14636
15845
  "function": {
14637
15846
  "name": "route_to_next_agent",
14638
- "arguments": dict(fast_args or {}),
15847
+ "arguments": dict(queue_args or {}),
14639
15848
  },
14640
15849
  }
14641
15850
  ]
@@ -14643,7 +15852,7 @@ class SessionState:
14643
15852
  {
14644
15853
  "role": "system",
14645
15854
  "content": (
14646
- "[manager-fast-route] "
15855
+ "[manager-watchdog-route] "
14647
15856
  f"{trim(str(text or ''), 500)}"
14648
15857
  ),
14649
15858
  "ts": now_ts(),
@@ -14654,102 +15863,165 @@ class SessionState:
14654
15863
  "status",
14655
15864
  {
14656
15865
  "summary": (
14657
- "manager fast-route via agentbus "
14658
- f"({fast_meta.get('from', '?')}->{fast_meta.get('to', '?')}, "
14659
- f"intent={fast_meta.get('intent', 'message')}, "
14660
- f"age={float(fast_meta.get('age_sec', 0.0) or 0.0):.1f}s)"
15866
+ "manager watchdog executor active "
15867
+ f"(step={int(watchdog_meta.get('cursor', 0) or 0)}/"
15868
+ f"{int(watchdog_meta.get('total', 0) or 0)}, "
15869
+ f"target={watchdog_meta.get('target', '?')}, "
15870
+ f"trigger={trim(str(watchdog_meta.get('trigger_reason', '') or ''), 80)})"
14661
15871
  )
14662
15872
  },
14663
15873
  )
14664
15874
  else:
14665
- prompt = (
14666
- "Read the blackboard and delegate one next short timeslice. "
14667
- "Return only one route_to_next_agent call.\n\n"
14668
- f"{self._blackboard_read_state_markdown(max_items=6)}"
14669
- )
14670
- self.manager_context.append({"role": "user", "content": prompt, "ts": now_ts()})
14671
- self.manager_context = self.manager_context[-400:]
14672
- with self.lock:
14673
- self.current_phase = "manager:model-call"
14674
- self.current_tool_name = ""
14675
- self.active_agent_role = "manager"
14676
- response = self._chat_with_same_model_retry(
14677
- self.manager_context,
14678
- tools=self._manager_route_tools(),
14679
- system=self._manager_system_prompt(),
14680
- max_tokens=600,
14681
- think=False,
14682
- stream_thinking=False,
14683
- on_thinking_chunk=self._append_live_thinking,
14684
- pinned_selection=pinned_selection,
14685
- context_label="manager turn",
14686
- retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
14687
- media_inputs=media_inputs_round,
14688
- )
14689
- text = str(response.get("content") or "")
14690
- tool_calls = response.get("tool_calls", [])
14691
- text, text_filter_meta = self._sanitize_assistant_text_for_runtime(text, tool_calls)
14692
- if bool(text_filter_meta.get("filtered", False)) and str(text_filter_meta.get("reason", "")) == "oversized_raw_toolcall":
14693
- self._inject_toolcall_overflow_hint("manager")
14694
- assistant = {"role": "assistant", "content": text, "ts": now_ts()}
14695
- if tool_calls:
14696
- assistant["tool_calls"] = [
15875
+ fast_pick = self._manager_pick_agentbus_fast_route(board)
15876
+ if fast_pick:
15877
+ used_agentbus_fast = True
15878
+ fast_args, fast_meta = fast_pick
15879
+ with self.lock:
15880
+ self.current_phase = "manager:agentbus-fast-route"
15881
+ self.current_tool_name = ""
15882
+ self.active_agent_role = "manager"
15883
+ text = trim(
15884
+ (
15885
+ "agentbus fast-route "
15886
+ f"{fast_meta.get('from', '?')}->{fast_meta.get('to', '?')} "
15887
+ f"intent={fast_meta.get('intent', 'message')} id={fast_meta.get('env_id', '-')}"
15888
+ ),
15889
+ 600,
15890
+ )
15891
+ tool_calls = [
14697
15892
  {
14698
- "id": tc["id"],
15893
+ "id": make_id("tc"),
14699
15894
  "type": "function",
14700
15895
  "function": {
14701
- "name": tc["function"]["name"],
14702
- "arguments": json_dumps(tc["function"]["arguments"]),
15896
+ "name": "route_to_next_agent",
15897
+ "arguments": dict(fast_args or {}),
14703
15898
  },
14704
15899
  }
14705
- for tc in tool_calls
14706
15900
  ]
14707
- self.manager_context.append(assistant)
14708
- self.manager_context = self.manager_context[-400:]
14709
- route_only_tool_calls = False
14710
- if isinstance(tool_calls, list) and tool_calls:
14711
- tool_names = [
14712
- str(tc.get("function", {}).get("name", "") or "").strip().lower()
14713
- for tc in tool_calls
14714
- if isinstance(tc, dict)
14715
- ]
14716
- if tool_names and all(name in {"route_to_next_agent", "routetonext_agent"} for name in tool_names):
14717
- route_only_tool_calls = True
14718
- emit_text = str(text or "").strip()
14719
- if not emit_text and tool_calls and (not route_only_tool_calls):
14720
- emit_text = f"[tool calls] {', '.join(str(tc.get('function', {}).get('name', '?')) for tc in tool_calls)}"
14721
- if emit_text:
14722
- manager_message = {
14723
- "role": "assistant",
14724
- "content": emit_text,
14725
- "ts": assistant["ts"],
14726
- "agent_role": "manager",
14727
- }
14728
- if "tool_calls" in assistant and (not route_only_tool_calls):
14729
- manager_message["tool_calls"] = assistant["tool_calls"]
14730
- self.messages.append(manager_message)
14731
- self.messages = self.messages[-400:]
14732
- elif "tool_calls" in assistant and (not route_only_tool_calls):
14733
- manager_message = {
14734
- "role": "assistant",
14735
- "content": "",
14736
- "ts": assistant["ts"],
14737
- "agent_role": "manager",
14738
- "tool_calls": assistant["tool_calls"],
14739
- }
14740
- self.messages.append(manager_message)
14741
- self.messages = self.messages[-400:]
14742
- if emit_text:
15901
+ self.manager_context.append(
15902
+ {
15903
+ "role": "system",
15904
+ "content": (
15905
+ "[manager-fast-route] "
15906
+ f"{trim(str(text or ''), 500)}"
15907
+ ),
15908
+ "ts": now_ts(),
15909
+ }
15910
+ )
15911
+ self.manager_context = self.manager_context[-400:]
14743
15912
  self._emit(
14744
- "message",
15913
+ "status",
14745
15914
  {
14746
- "role": "assistant",
14747
- "agent_role": "manager",
14748
- "text": emit_text,
14749
- "summary": "Manager response",
15915
+ "summary": (
15916
+ "manager fast-route via agentbus "
15917
+ f"({fast_meta.get('from', '?')}->{fast_meta.get('to', '?')}, "
15918
+ f"intent={fast_meta.get('intent', 'message')}, "
15919
+ f"age={float(fast_meta.get('age_sec', 0.0) or 0.0):.1f}s)"
15920
+ )
14750
15921
  },
14751
15922
  )
15923
+ else:
15924
+ prompt = (
15925
+ "Read the blackboard and delegate one next short timeslice. "
15926
+ "Return only one route_to_next_agent call.\n\n"
15927
+ f"{self._blackboard_read_state_markdown(max_items=6)}"
15928
+ )
15929
+ self.manager_context.append({"role": "user", "content": prompt, "ts": now_ts()})
15930
+ self.manager_context = self.manager_context[-400:]
15931
+ with self.lock:
15932
+ self.current_phase = "manager:model-call"
15933
+ self.current_tool_name = ""
15934
+ self.active_agent_role = "manager"
15935
+ response = self._chat_with_same_model_retry(
15936
+ self.manager_context,
15937
+ tools=self._manager_route_tools(),
15938
+ system=self._manager_system_prompt(),
15939
+ max_tokens=600,
15940
+ think=False,
15941
+ stream_thinking=False,
15942
+ on_thinking_chunk=self._append_live_thinking,
15943
+ pinned_selection=pinned_selection,
15944
+ context_label="manager turn",
15945
+ retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
15946
+ media_inputs=media_inputs_round,
15947
+ )
15948
+ text = str(response.get("content") or "")
15949
+ tool_calls = response.get("tool_calls", [])
15950
+ text, text_filter_meta = self._sanitize_assistant_text_for_runtime(text, tool_calls)
15951
+ if bool(text_filter_meta.get("filtered", False)) and str(text_filter_meta.get("reason", "")) == "oversized_raw_toolcall":
15952
+ self._inject_toolcall_overflow_hint("manager")
15953
+ assistant = {"role": "assistant", "content": text, "ts": now_ts()}
15954
+ if tool_calls:
15955
+ assistant["tool_calls"] = [
15956
+ {
15957
+ "id": tc["id"],
15958
+ "type": "function",
15959
+ "function": {
15960
+ "name": tc["function"]["name"],
15961
+ "arguments": json_dumps(tc["function"]["arguments"]),
15962
+ },
15963
+ }
15964
+ for tc in tool_calls
15965
+ ]
15966
+ self.manager_context.append(assistant)
15967
+ self.manager_context = self.manager_context[-400:]
15968
+ route_only_tool_calls = False
15969
+ if isinstance(tool_calls, list) and tool_calls:
15970
+ tool_names = [
15971
+ str(tc.get("function", {}).get("name", "") or "").strip().lower()
15972
+ for tc in tool_calls
15973
+ if isinstance(tc, dict)
15974
+ ]
15975
+ if tool_names and all(name in {"route_to_next_agent", "routetonext_agent"} for name in tool_names):
15976
+ route_only_tool_calls = True
15977
+ emit_text = str(text or "").strip()
15978
+ if not emit_text and tool_calls and (not route_only_tool_calls):
15979
+ emit_text = f"[tool calls] {', '.join(str(tc.get('function', {}).get('name', '?')) for tc in tool_calls)}"
15980
+ if emit_text:
15981
+ manager_message = {
15982
+ "role": "assistant",
15983
+ "content": emit_text,
15984
+ "ts": assistant["ts"],
15985
+ "agent_role": "manager",
15986
+ }
15987
+ if "tool_calls" in assistant and (not route_only_tool_calls):
15988
+ manager_message["tool_calls"] = assistant["tool_calls"]
15989
+ self.messages.append(manager_message)
15990
+ self.messages = self.messages[-400:]
15991
+ elif "tool_calls" in assistant and (not route_only_tool_calls):
15992
+ manager_message = {
15993
+ "role": "assistant",
15994
+ "content": "",
15995
+ "ts": assistant["ts"],
15996
+ "agent_role": "manager",
15997
+ "tool_calls": assistant["tool_calls"],
15998
+ }
15999
+ self.messages.append(manager_message)
16000
+ self.messages = self.messages[-400:]
16001
+ if emit_text:
16002
+ self._emit(
16003
+ "message",
16004
+ {
16005
+ "role": "assistant",
16006
+ "agent_role": "manager",
16007
+ "text": emit_text,
16008
+ "summary": "Manager response",
16009
+ },
16010
+ )
14752
16011
  route = self._manager_route_from_response(text, tool_calls)
16012
+ if used_watchdog_executor:
16013
+ route["source"] = "watchdog-executor"
16014
+ route["reason"] = trim(
16015
+ (
16016
+ f"watchdog executor step {int(watchdog_meta.get('cursor', 0) or 0)}/"
16017
+ f"{int(watchdog_meta.get('total', 0) or 0)} "
16018
+ f"target={watchdog_meta.get('target', '?')} "
16019
+ f"trigger={watchdog_meta.get('trigger_reason', '')}"
16020
+ ),
16021
+ 600,
16022
+ )
16023
+ route["executor_mode"] = True
16024
+ route["is_mandatory"] = True
14753
16025
  if used_agentbus_fast:
14754
16026
  route["source"] = "agentbus-fast"
14755
16027
  route["reason"] = trim(
@@ -14824,6 +16096,7 @@ class SessionState:
14824
16096
  "participants": list(participants),
14825
16097
  "assigned_expert": assigned_expert,
14826
16098
  "is_mandatory": bool(route.get("is_mandatory", False)),
16099
+ "executor_mode": bool(route.get("executor_mode", False)),
14827
16100
  "requires_user_confirmation": bool(route.get("requires_user_confirmation", False)),
14828
16101
  "round_budget": int(round_budget),
14829
16102
  "remaining_rounds": int(remaining_rounds),
@@ -14836,6 +16109,7 @@ class SessionState:
14836
16109
  profile["participants"] = list(participants)
14837
16110
  profile["assigned_expert"] = assigned_expert
14838
16111
  profile["is_mandatory"] = bool(route_row.get("is_mandatory", False))
16112
+ profile["executor_mode"] = bool(route_row.get("executor_mode", False))
14839
16113
  profile["requires_user_confirmation"] = bool(route_row.get("requires_user_confirmation", False))
14840
16114
  if task_type in TASK_PROFILE_TYPES:
14841
16115
  profile["task_type"] = task_type
@@ -14870,6 +16144,7 @@ class SessionState:
14870
16144
  "participants": list(participants),
14871
16145
  "assigned_expert": assigned_expert,
14872
16146
  "is_mandatory": bool(route_row.get("is_mandatory", False)),
16147
+ "executor_mode": bool(route_row.get("executor_mode", False)),
14873
16148
  "remaining_rounds": int(remaining_rounds),
14874
16149
  "updated_at": float(now_ts()),
14875
16150
  }
@@ -14941,10 +16216,40 @@ class SessionState:
14941
16216
  )
14942
16217
  return route_row
14943
16218
 
14944
- def _inject_manager_instruction(self, role: str, instruction: str, is_mandatory: bool = False):
16219
+ def _inject_manager_instruction(
16220
+ self,
16221
+ role: str,
16222
+ instruction: str,
16223
+ is_mandatory: bool = False,
16224
+ executor_mode: bool = False,
16225
+ ):
14945
16226
  role_key = self._sanitize_agent_role(role)
14946
16227
  if not role_key:
14947
16228
  return
16229
+ if bool(executor_mode):
16230
+ executor_seed = {
16231
+ "role": "system",
16232
+ "content": self._apply_agent_language_policy(
16233
+ (
16234
+ "Executor mode is enabled by watchdog. You are stateless for this step: "
16235
+ "ignore old conversational plans, execute only the delegated step, call concrete tools, "
16236
+ "and write verifiable evidence to blackboard."
16237
+ ),
16238
+ max_len=800,
16239
+ ),
16240
+ "ts": now_ts(),
16241
+ "agent_role": role_key,
16242
+ }
16243
+ self.contexts[role_key] = [executor_seed]
16244
+ self._emit(
16245
+ "status",
16246
+ {
16247
+ "summary": (
16248
+ f"executor hot-swap: reset {self._agent_display_name(role_key)} context "
16249
+ "for stateless execution"
16250
+ )
16251
+ },
16252
+ )
14948
16253
  instruction_with_policy = self._apply_agent_language_policy(
14949
16254
  trim(str(instruction or "").strip(), 1400),
14950
16255
  max_len=1400,
@@ -14964,6 +16269,14 @@ class SessionState:
14964
16269
  if bool(is_mandatory)
14965
16270
  else ""
14966
16271
  )
16272
+ executor_note = (
16273
+ (
16274
+ "STATELESS EXECUTOR: do not re-plan globally; "
16275
+ "complete only this delegated step and return concrete tool evidence."
16276
+ )
16277
+ if bool(executor_mode)
16278
+ else ""
16279
+ )
14967
16280
  collaboration_note = (
14968
16281
  "COLLABORATION PREFERENCE: if your current step needs another specialty, "
14969
16282
  "use ask_colleague immediately with explicit intent and concise payload; "
@@ -14974,9 +16287,11 @@ class SessionState:
14974
16287
  "<manager-delegate>\n"
14975
16288
  f"target={role_key}\n"
14976
16289
  f"is_mandatory={bool(is_mandatory)}\n"
16290
+ f"executor_mode={bool(executor_mode)}\n"
14977
16291
  f"instruction={instruction_text}\n"
14978
16292
  f"language_policy={language_note}\n"
14979
16293
  f"{mandatory_note}\n"
16294
+ f"{executor_note}\n"
14980
16295
  f"{collaboration_note}\n"
14981
16296
  "</manager-delegate>\n"
14982
16297
  "<blackboard-state>\n"
@@ -15028,7 +16343,9 @@ class SessionState:
15028
16343
  return
15029
16344
  name = str(item.get("name", "") or "").strip()
15030
16345
  args = item.get("args", {}) if isinstance(item.get("args"), dict) else {}
15031
- output = trim(str(item.get("output", "") or "").strip(), BLACKBOARD_MAX_TEXT)
16346
+ output_raw = trim(str(item.get("output", "") or "").strip(), BLACKBOARD_MAX_TEXT)
16347
+ output_clean, _ = filter_runtime_noise_lines(output_raw)
16348
+ output = trim(output_clean, BLACKBOARD_MAX_TEXT)
15032
16349
  ok = bool(item.get("ok", False))
15033
16350
  if name in {"write_file", "edit_file"}:
15034
16351
  rel_path = str(args.get("path", "") or "").strip()
@@ -15052,10 +16369,17 @@ class SessionState:
15052
16369
  if role_key == "explorer":
15053
16370
  self._blackboard_set_status("RESEARCHING")
15054
16371
  elif name in {"finish_task", "finish_current_task", "mark_done"} and ok:
16372
+ summary_arg = trim(str(args.get("summary", "") or "").strip(), BLACKBOARD_MAX_TEXT)
16373
+ if summary_arg:
16374
+ if role_key == "reviewer":
16375
+ self._blackboard_append_section("review_feedback", role_key, f"final_summary\n{summary_arg}")
16376
+ elif role_key == "explorer":
16377
+ self._blackboard_append_section("research_notes", role_key, f"final_summary\n{summary_arg}")
15055
16378
  if role_key == "reviewer":
15056
16379
  gate_ok, gate_reason = self._reviewer_approval_log_gate()
15057
16380
  if gate_ok:
15058
- self._blackboard_mark_approved(output or "finish tool acknowledged", role_key)
16381
+ approval_note = summary_arg or output or "finish tool acknowledged"
16382
+ self._blackboard_mark_approved(approval_note, role_key)
15059
16383
  else:
15060
16384
  self._blackboard_append_section(
15061
16385
  "review_feedback",
@@ -15067,7 +16391,8 @@ class SessionState:
15067
16391
  )
15068
16392
  self._emit("status", {"summary": f"reviewer finish blocked: {gate_reason}"})
15069
16393
  else:
15070
- self._blackboard_mark_approved(output or "finish tool acknowledged", role_key)
16394
+ approval_note = summary_arg or output or "finish tool acknowledged"
16395
+ self._blackboard_mark_approved(approval_note, role_key)
15071
16396
  if not ok and output:
15072
16397
  self._blackboard_append_section(
15073
16398
  "execution_logs",
@@ -15170,25 +16495,151 @@ class SessionState:
15170
16495
  policy_text = trim("\n".join(policy_lines).strip(), 1200)
15171
16496
  return clean_text, policy_text
15172
16497
 
15173
- def _reviewer_final_summary_ready(self, board: dict | None = None) -> bool:
15174
- def _text_good(text: str) -> bool:
15175
- clean = strip_thinking_content(str(text or "")).strip()
15176
- if not clean:
15177
- return False
15178
- if len(clean) >= 60:
15179
- return True
15180
- low = clean.lower()
15181
- tokens = (
15182
- "summary",
15183
- "final",
15184
- "结论",
15185
- "总结",
16498
+ def _final_summary_quality(self, text: str) -> dict:
16499
+ clean = strip_thinking_content(str(text or "")).strip()
16500
+ low = clean.lower()
16501
+ chars = len(clean)
16502
+ category_tokens = {
16503
+ "changes": (
16504
+ "changed",
16505
+ "changes",
16506
+ "change",
16507
+ "modified",
16508
+ "implemented",
16509
+ "implementation",
16510
+ "files",
16511
+ "diff",
16512
+ "patch",
16513
+ "改动",
16514
+ "变更",
16515
+ "修改",
16516
+ "实现",
16517
+ "文件",
16518
+ ),
16519
+ "validation": (
16520
+ "test",
16521
+ "tests",
16522
+ "pytest",
16523
+ "validation",
16524
+ "verified",
16525
+ "verify",
16526
+ "check",
16527
+ "checks",
16528
+ "evidence",
16529
+ "pass",
16530
+ "passed",
16531
+ "验证",
16532
+ "测试",
15186
16533
  "通过",
16534
+ "证据",
16535
+ "日志",
16536
+ ),
16537
+ "risks": (
16538
+ "risk",
16539
+ "risks",
16540
+ "residual",
16541
+ "next step",
16542
+ "next steps",
16543
+ "follow-up",
16544
+ "todo",
16545
+ "limitation",
16546
+ "known issue",
16547
+ "caveat",
15187
16548
  "风险",
16549
+ "残留",
16550
+ "后续",
16551
+ "下一步",
16552
+ "待办",
16553
+ "限制",
16554
+ "已知问题",
15188
16555
  "建议",
15189
- "完成",
15190
- )
15191
- return any(tok in low for tok in tokens)
16556
+ ),
16557
+ }
16558
+ hits: dict[str, bool] = {}
16559
+ for cat, words in category_tokens.items():
16560
+ matched = any(tok in low for tok in words)
16561
+ hits[cat] = bool(matched)
16562
+ covered = sum(1 for v in hits.values() if bool(v))
16563
+ ok = bool(chars >= FINAL_SUMMARY_MIN_CHARS and (covered >= 2 or (covered >= 1 and chars >= 220)))
16564
+ strict_ok = bool(chars >= FINAL_SUMMARY_STRICT_MIN_CHARS and covered >= 2)
16565
+ return {
16566
+ "clean": clean,
16567
+ "chars": int(chars),
16568
+ "covered": int(covered),
16569
+ "hits": hits,
16570
+ "ok": bool(ok),
16571
+ "strict_ok": bool(strict_ok),
16572
+ }
16573
+
16574
+ def _final_summary_sufficient(self, text: str, *, strict: bool = False) -> bool:
16575
+ verdict = self._final_summary_quality(text)
16576
+ return bool(verdict.get("strict_ok" if strict else "ok", False))
16577
+
16578
+ def _finish_requires_structured_summary(self, role: str, tool_name: str) -> bool:
16579
+ role_key = self._sanitize_agent_role(role)
16580
+ if tool_name not in {"finish_task", "finish_current_task", "mark_done"}:
16581
+ return False
16582
+ if not role_key:
16583
+ return False
16584
+ bb = self._ensure_blackboard()
16585
+ profile = self._ensure_blackboard_task_profile(bb)
16586
+ task_type = str(profile.get("task_type", "general") or "general")
16587
+ if task_type == "simple_qa":
16588
+ return False
16589
+ delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
16590
+ delegate_target = self._sanitize_agent_role(delegate.get("target", ""))
16591
+ delegate_reason = str(delegate.get("reason", "") or "").strip().lower()
16592
+ delegate_instruction = str(delegate.get("instruction", "") or "").strip().lower()
16593
+ summary_markers = (
16594
+ "summary",
16595
+ "wrap-up",
16596
+ "final report",
16597
+ "最终总结",
16598
+ "总结",
16599
+ "收尾",
16600
+ )
16601
+ if delegate_target == role_key and any(tok in delegate_reason or tok in delegate_instruction for tok in summary_markers):
16602
+ return True
16603
+ return bool(role_key == "reviewer" and self._is_multi_agent_mode())
16604
+
16605
+ def _recent_agent_used_tools(
16606
+ self,
16607
+ role: str,
16608
+ tool_names: set[str],
16609
+ *,
16610
+ lookback: int = 20,
16611
+ max_age_seconds: float = 300.0,
16612
+ ) -> bool:
16613
+ role_key = self._sanitize_agent_role(role)
16614
+ if not role_key or not tool_names:
16615
+ return False
16616
+ names = {str(x or "").strip() for x in tool_names if str(x or "").strip()}
16617
+ if not names:
16618
+ return False
16619
+ now_tick = now_ts()
16620
+ ctx = self._agent_context(role_key)
16621
+ rows = ctx[-max(1, int(lookback)) :] if isinstance(ctx, list) else []
16622
+ for row in reversed(rows):
16623
+ if not isinstance(row, dict):
16624
+ continue
16625
+ if str(row.get("role", "") or "").strip().lower() != "tool":
16626
+ continue
16627
+ name = str(row.get("name", "") or "").strip()
16628
+ if name not in names:
16629
+ continue
16630
+ try:
16631
+ ts = float(row.get("ts", 0.0) or 0.0)
16632
+ except Exception:
16633
+ ts = 0.0
16634
+ if ts <= 0.0:
16635
+ return True
16636
+ if (now_tick - ts) <= float(max_age_seconds):
16637
+ return True
16638
+ return False
16639
+
16640
+ def _reviewer_final_summary_ready(self, board: dict | None = None) -> bool:
16641
+ def _text_good(text: str) -> bool:
16642
+ return self._final_summary_sufficient(text, strict=True)
15192
16643
 
15193
16644
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
15194
16645
  approval = bb.get("approval", {}) if isinstance(bb.get("approval"), dict) else {}
@@ -15494,6 +16945,7 @@ class SessionState:
15494
16945
  f"Session absolute writable root is {self.files_root}. "
15495
16946
  "Use relative file paths (for example hello.txt); runtime maps them to session absolute paths. "
15496
16947
  "If '/workspace/...' appears, treat it as a virtual alias only; never create OS-level /workspace in shell. "
16948
+ f"{_detect_os_shell_instruction()} "
15497
16949
  "You must stay within your role boundary and use only provided tools. "
15498
16950
  "Use read_from_blackboard/write_to_blackboard to keep the shared state accurate. "
15499
16951
  "When communicating with other agents, use ask_colleague with structured intent/content. "
@@ -15514,8 +16966,12 @@ class SessionState:
15514
16966
  base
15515
16967
  + "Role objective: verify developer output against goal, run checks/tests, and issue pass/fix decisions. "
15516
16968
  + "If gaps remain, send fix_request to developer with concrete failure evidence and write review_feedback to blackboard. "
15517
- + "If task is complete, write approval evidence and hand off final summary to Explorer "
15518
- + "(via ask_colleague intent=final_summary_request) before ending the task."
16969
+ + "If manager requests final summary, first call read_from_blackboard "
16970
+ + "(sections: code_artifacts, execution_logs, review_feedback, status), then generate a structured summary "
16971
+ + "covering changes, validation evidence, and residual risks/next steps. "
16972
+ + "When finishing, pass this summary in finish_task.summary; empty or vague summary is invalid. "
16973
+ + "If you cannot produce summary from current evidence, hand off Explorer via ask_colleague "
16974
+ + "intent=final_summary_request with explicit missing evidence."
15519
16975
  )
15520
16976
  return (
15521
16977
  base
@@ -16343,6 +17799,37 @@ class SessionState:
16343
17799
  return self._todo_write_rescue(args)
16344
17800
  if name in {"finish_task", "finish_current_task", "mark_done"}:
16345
17801
  summary = trim(str(args.get("summary", "") or "").strip(), 400)
17802
+ if role_key == "explorer":
17803
+ bb = self._ensure_blackboard()
17804
+ delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
17805
+ delegate_target = self._sanitize_agent_role(delegate.get("target", ""))
17806
+ delegate_reason = str(delegate.get("reason", "") or "").strip().lower()
17807
+ if delegate_target == "explorer" and "summary-handoff" in delegate_reason:
17808
+ return (
17809
+ "Error: explorer summary handoff step must not call finish tool. "
17810
+ "Write structured summary to blackboard first, then wait for manager close."
17811
+ )
17812
+ if self._finish_requires_structured_summary(role_key, name):
17813
+ if role_key == "reviewer" and not self._recent_agent_used_tools(
17814
+ role_key,
17815
+ {"read_from_blackboard"},
17816
+ lookback=24,
17817
+ max_age_seconds=420.0,
17818
+ ):
17819
+ return (
17820
+ "Error: reviewer finalization requires blackboard evidence read. "
17821
+ "Call read_from_blackboard first (sections: code_artifacts, execution_logs, "
17822
+ "review_feedback, status), then call finish_task with structured summary."
17823
+ )
17824
+ if not self._final_summary_sufficient(summary, strict=True):
17825
+ return (
17826
+ "Error: structured final summary is required before finish. "
17827
+ "Provide finish_task.summary with: "
17828
+ "(1) changes/files touched, "
17829
+ "(2) validation evidence (tests/commands/results), "
17830
+ "(3) residual risks or next steps. "
17831
+ "If evidence is missing, read_from_blackboard first or ask Explorer for final_summary_request."
17832
+ )
16346
17833
  if name == "finish_task":
16347
17834
  todo_mark = self.todo.complete_all_open(summary)
16348
17835
  else:
@@ -16421,6 +17908,7 @@ class SessionState:
16421
17908
  if guard_error:
16422
17909
  return guard_error
16423
17910
  out = self.bg.run(args["command"], int(args.get("timeout", 120)))
17911
+ out_filtered, _ = filter_runtime_noise_lines(str(out or ""))
16424
17912
  self._emit(
16425
17913
  "command",
16426
17914
  {
@@ -16430,7 +17918,7 @@ class SessionState:
16430
17918
  "summary": f"background_run: {args['command'][:80]}",
16431
17919
  },
16432
17920
  )
16433
- return out
17921
+ return trim(out_filtered or "(no output)")
16434
17922
  if name == "check_background":
16435
17923
  return self.bg.check(args.get("task_id"))
16436
17924
  if name == "task_create":
@@ -16472,6 +17960,8 @@ class SessionState:
16472
17960
  if section == "original_goal":
16473
17961
  return trim(str(board.get("original_goal", "") or "").strip(), 4000) or "(empty)"
16474
17962
  if section == "status":
17963
+ wd = board.get("watchdog", {}) if isinstance(board.get("watchdog"), dict) else {}
17964
+ dq = board.get("decomposition_queue", {}) if isinstance(board.get("decomposition_queue"), dict) else {}
16475
17965
  return json_dumps(
16476
17966
  {
16477
17967
  "status": board.get("status", "INITIALIZING"),
@@ -16480,6 +17970,20 @@ class SessionState:
16480
17970
  "manager_summary_attempts": int(board.get("manager_summary_attempts", 0) or 0),
16481
17971
  "approval": board.get("approval", {}),
16482
17972
  "last_delegate": board.get("last_delegate", {}),
17973
+ "watchdog": {
17974
+ "intent_no_tool_streak": int(wd.get("intent_no_tool_streak", 0) or 0),
17975
+ "repeat_no_tool_streak": int(wd.get("repeat_no_tool_streak", 0) or 0),
17976
+ "state_unchanged_streak": int(wd.get("state_unchanged_streak", 0) or 0),
17977
+ "trigger_count": int(wd.get("trigger_count", 0) or 0),
17978
+ "last_trigger_reason": trim(str(wd.get("last_trigger_reason", "") or "").strip(), 160),
17979
+ },
17980
+ "decomposition_queue": {
17981
+ "active": bool(dq.get("active", False)),
17982
+ "trigger_reason": trim(str(dq.get("trigger_reason", "") or "").strip(), 160),
17983
+ "cursor": int(dq.get("cursor", 0) or 0),
17984
+ "total": len(dq.get("steps", []) or []),
17985
+ "last_error": trim(str(dq.get("last_error", "") or "").strip(), 220),
17986
+ },
16483
17987
  },
16484
17988
  indent=2,
16485
17989
  )
@@ -16945,6 +18449,15 @@ class SessionState:
16945
18449
  output = self._dispatch_tool(name, args, agent_role=role_key)
16946
18450
  except Exception as exc:
16947
18451
  output = f"Error: {exc}"
18452
+ raw_output = str(output or "")
18453
+ filtered_output, filtered_rows = filter_runtime_noise_lines(raw_output)
18454
+ if filtered_rows > 0:
18455
+ if filtered_output:
18456
+ output = filtered_output
18457
+ elif raw_output.startswith("Error:"):
18458
+ output = "Error: runtime socket noise filtered"
18459
+ else:
18460
+ output = "(no output)"
16948
18461
  self._append_agent_context_message(
16949
18462
  role_key,
16950
18463
  {
@@ -17161,6 +18674,7 @@ class SessionState:
17161
18674
  if self.cancel_requested:
17162
18675
  self._emit("status", {"summary": "run interrupted"})
17163
18676
  break
18677
+ self._apply_auto_compact_if_needed("auto:multi-sync")
17164
18678
  with self.lock:
17165
18679
  self.agent_round_index = int(self.agent_round_index) + 1
17166
18680
  self.current_phase = "manager:dispatch"
@@ -17203,6 +18717,7 @@ class SessionState:
17203
18717
  role,
17204
18718
  instruction,
17205
18719
  is_mandatory=bool(route.get("is_mandatory", False)),
18720
+ executor_mode=bool(route.get("executor_mode", False)),
17206
18721
  )
17207
18722
  if role == "explorer":
17208
18723
  self._blackboard_set_status("RESEARCHING")
@@ -17216,13 +18731,26 @@ class SessionState:
17216
18731
  media_inputs_pool=media_inputs_pool,
17217
18732
  media_seen_ts_by_role=media_seen_ts_by_role,
17218
18733
  )
18734
+ board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
17219
18735
  step = self._multi_agent_turn(
17220
18736
  role,
17221
18737
  pinned_selection=pinned_selection,
17222
18738
  media_inputs_round=role_media_inputs,
17223
18739
  )
17224
18740
  self._blackboard_update_from_worker_step(role, step)
18741
+ board_after = self._ensure_blackboard()
18742
+ board_after_fp = self._watchdog_state_fingerprint(board_after)
18743
+ wd_event = self._watchdog_process_worker_step(
18744
+ board_after,
18745
+ role=role,
18746
+ step=step if isinstance(step, dict) else {},
18747
+ state_changed=bool(board_after_fp != board_before_fp),
18748
+ pinned_selection=pinned_selection,
18749
+ )
17225
18750
  status = str(step.get("status", "") or "")
18751
+ if bool(wd_event.get("triggered", False)):
18752
+ idle_counts[role] = 0
18753
+ continue
17226
18754
  if status == "interrupted":
17227
18755
  break
17228
18756
  if status == "skip":
@@ -17231,7 +18759,7 @@ class SessionState:
17231
18759
  idle_counts[role] = 0
17232
18760
  if bool(step.get("stop_due_to_finish", False)):
17233
18761
  note = f"{self._agent_display_name(role)} signaled finish via tool."
17234
- self._blackboard_mark_approved(note, role)
18762
+ # Approval note should come from finish tool payload sync; avoid overwriting with generic text here.
17235
18763
  can_finish_now, finish_gate_reason = self._can_auto_finish_from_approval(
17236
18764
  self._ensure_blackboard(),
17237
18765
  latest_user_ts=self._latest_user_message_ts(),
@@ -17243,7 +18771,7 @@ class SessionState:
17243
18771
  {
17244
18772
  "summary": (
17245
18773
  "reviewer finish deferred: final summary missing; "
17246
- "handoff to explorer via agentbus and continue"
18774
+ "manager will reroute to explorer summary synthesis"
17247
18775
  )
17248
18776
  },
17249
18777
  )
@@ -17347,6 +18875,7 @@ class SessionState:
17347
18875
  if self.cancel_requested:
17348
18876
  self._emit("status", {"summary": "run interrupted"})
17349
18877
  break
18878
+ self._apply_auto_compact_if_needed("auto:multi-seq")
17350
18879
  with self.lock:
17351
18880
  self.agent_round_index = int(self.agent_round_index) + 1
17352
18881
  latest_user_ts = self._latest_user_message_ts()
@@ -17358,6 +18887,28 @@ class SessionState:
17358
18887
  media_inputs=media_inputs_pool,
17359
18888
  roles=role_order,
17360
18889
  )
18890
+ dq = self._normalize_decomposition_queue_state(
18891
+ self._ensure_blackboard().get("decomposition_queue", {})
18892
+ )
18893
+ if bool(dq.get("active", False)):
18894
+ queue_exec = self._watchdog_execute_queue_step(
18895
+ pinned_selection=pinned_selection,
18896
+ )
18897
+ if bool(queue_exec.get("interrupted", False)):
18898
+ break
18899
+ if bool(queue_exec.get("stop_run", False)):
18900
+ self._emit("status", {"summary": "watchdog executor completed task; run paused"})
18901
+ break
18902
+ if not bool(queue_exec.get("executed", False)):
18903
+ if bool(queue_exec.get("queue_active", False)):
18904
+ self._emit(
18905
+ "status",
18906
+ {"summary": "watchdog queue active but no executable step; pausing to avoid deadlock"},
18907
+ )
18908
+ break
18909
+ continue
18910
+ idle_counts[str(queue_exec.get("role", "") or "developer")] = 0
18911
+ continue
17361
18912
  role = current_role if mode == EXECUTION_MODE_SEQUENTIAL else role_order[sync_index % len(role_order)]
17362
18913
  role_media_inputs = self._resolve_role_multimodal_payload(
17363
18914
  role=role,
@@ -17365,12 +18916,27 @@ class SessionState:
17365
18916
  media_inputs_pool=media_inputs_pool,
17366
18917
  media_seen_ts_by_role=media_seen_ts_by_role,
17367
18918
  )
18919
+ board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
17368
18920
  step = self._multi_agent_turn(
17369
18921
  role,
17370
18922
  pinned_selection=pinned_selection,
17371
18923
  media_inputs_round=role_media_inputs,
17372
18924
  )
17373
- status = str(step.get("status", "") or "")
18925
+ safe_step = step if isinstance(step, dict) else {}
18926
+ self._blackboard_update_from_worker_step(role, safe_step)
18927
+ board_after = self._ensure_blackboard()
18928
+ board_after_fp = self._watchdog_state_fingerprint(board_after)
18929
+ wd_event = self._watchdog_process_worker_step(
18930
+ board_after,
18931
+ role=role,
18932
+ step=safe_step,
18933
+ state_changed=bool(board_after_fp != board_before_fp),
18934
+ pinned_selection=pinned_selection,
18935
+ )
18936
+ if bool(wd_event.get("triggered", False)):
18937
+ idle_counts[role] = 0
18938
+ continue
18939
+ status = str(safe_step.get("status", "") or "")
17374
18940
  if status == "interrupted":
17375
18941
  break
17376
18942
  if status == "skip":
@@ -17384,7 +18950,7 @@ class SessionState:
17384
18950
  continue
17385
18951
  if status == "tools":
17386
18952
  idle_counts[role] = 0
17387
- if bool(step.get("stop_due_to_finish", False)):
18953
+ if bool(safe_step.get("stop_due_to_finish", False)):
17388
18954
  self._emit("status", {"summary": "finish tool called; run paused and awaiting user instruction"})
17389
18955
  break
17390
18956
  if mode == EXECUTION_MODE_SEQUENTIAL:
@@ -17402,7 +18968,7 @@ class SessionState:
17402
18968
  idle_counts[role] = int(idle_counts.get(role, 0) or 0) + 1
17403
18969
  should_stop, next_role = self._multi_agent_no_tool_transition(
17404
18970
  role,
17405
- str(step.get("text", "") or ""),
18971
+ str(safe_step.get("text", "") or ""),
17406
18972
  mode=mode,
17407
18973
  idle_counts=idle_counts,
17408
18974
  )
@@ -17549,9 +19115,7 @@ class SessionState:
17549
19115
  },
17550
19116
  )
17551
19117
  break
17552
- self._microcompact()
17553
- if self._estimate_tokens() > self.context_token_upper_bound:
17554
- self._auto_compact("auto")
19118
+ self._apply_auto_compact_if_needed("auto")
17555
19119
  notifs = self.bg.drain()
17556
19120
  if notifs:
17557
19121
  text = "\n".join(f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs)
@@ -17583,6 +19147,32 @@ class SessionState:
17583
19147
  self._seed_multi_agent_contexts_if_needed(self.runtime_reclassify_goal or "")
17584
19148
  self._multi_agent_worker(pinned_selection=pinned_selection)
17585
19149
  return
19150
+ dq = self._normalize_decomposition_queue_state(
19151
+ self._ensure_blackboard().get("decomposition_queue", {})
19152
+ )
19153
+ if bool(dq.get("active", False)):
19154
+ queue_exec = self._watchdog_execute_queue_step(
19155
+ pinned_selection=pinned_selection,
19156
+ )
19157
+ if bool(queue_exec.get("interrupted", False)):
19158
+ self._emit("status", {"summary": "run interrupted"})
19159
+ break
19160
+ if bool(queue_exec.get("stop_run", False)):
19161
+ self._emit("status", {"summary": "watchdog executor completed task; run paused"})
19162
+ break
19163
+ if not bool(queue_exec.get("executed", False)):
19164
+ if bool(queue_exec.get("queue_active", False)):
19165
+ self._emit(
19166
+ "status",
19167
+ {"summary": "watchdog queue active but no executable step; pausing to avoid deadlock"},
19168
+ )
19169
+ break
19170
+ continue
19171
+ no_tool_rounds = 0
19172
+ arbiter_planning_rounds = 0
19173
+ fault_counter = 0
19174
+ last_fault_reason = ""
19175
+ continue
17586
19176
  latest_user_ts = self._latest_user_message_ts()
17587
19177
  media_inputs_round = None
17588
19178
  if latest_user_ts > media_last_user_ts:
@@ -17778,6 +19368,32 @@ class SessionState:
17778
19368
  arbiter_planning_rounds = 0
17779
19369
  self._emit("status", {"summary": "waiting for user input: assistant asked for a decision"})
17780
19370
  break
19371
+ wd_event = self._watchdog_process_worker_step(
19372
+ self._ensure_blackboard(),
19373
+ role=single_role,
19374
+ step={
19375
+ "status": "no-tools",
19376
+ "text": decision_probe,
19377
+ "tool_results": [],
19378
+ },
19379
+ state_changed=False,
19380
+ pinned_selection=pinned_selection,
19381
+ )
19382
+ if bool(wd_event.get("triggered", False)):
19383
+ no_tool_rounds = 0
19384
+ arbiter_planning_rounds = 0
19385
+ fault_counter = 0
19386
+ last_fault_reason = ""
19387
+ self._emit(
19388
+ "status",
19389
+ {
19390
+ "summary": (
19391
+ "watchdog triggered in single-agent planner mode; "
19392
+ "switching to stateless executor queue"
19393
+ )
19394
+ },
19395
+ )
19396
+ continue
17781
19397
  clean_decision_probe = strip_thinking_content(decision_probe).strip()
17782
19398
  if bool(self.arbiter_enabled) and len(clean_decision_probe) >= int(ARBITER_TRIGGER_MIN_CONTENT_CHARS):
17783
19399
  arbiter_decision = self._call_arbiter_llm(clean_decision_probe, thinking_text)
@@ -18031,6 +19647,8 @@ class SessionState:
18031
19647
  stop_due_to_finish_task = False
18032
19648
  hard_break_reason = ""
18033
19649
  interrupted_in_tools = False
19650
+ single_round_tool_results: list[dict] = []
19651
+ single_watchdog_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
18034
19652
  round_tool_fp = self._tool_calls_fingerprint(tool_calls)
18035
19653
  for tc in tool_calls:
18036
19654
  if self.cancel_requested:
@@ -18186,6 +19804,15 @@ class SessionState:
18186
19804
  output = self._dispatch_tool(name, args)
18187
19805
  except Exception as exc:
18188
19806
  output = f"Error: {exc}"
19807
+ raw_output = str(output or "")
19808
+ filtered_output, filtered_rows = filter_runtime_noise_lines(raw_output)
19809
+ if filtered_rows > 0:
19810
+ if filtered_output:
19811
+ output = filtered_output
19812
+ elif raw_output.startswith("Error:"):
19813
+ output = "Error: runtime socket noise filtered"
19814
+ else:
19815
+ output = "(no output)"
18189
19816
  tool_key = str(dispatched_name or name).strip() or str(name or "").strip() or "unknown-tool"
18190
19817
  if str(output).startswith("Error"):
18191
19818
  round_error_count += 1
@@ -18213,6 +19840,14 @@ class SessionState:
18213
19840
  if dispatched_name in {"finish_task", "finish_current_task", "mark_done"}:
18214
19841
  stop_due_to_finish_task = True
18215
19842
  self.messages.append({"role": "tool", "tool_call_id": tc["id"], "name": name, "content": trim(output), "ts": now_ts()})
19843
+ single_round_tool_results.append(
19844
+ {
19845
+ "name": dispatched_name or name,
19846
+ "args": args if isinstance(args, dict) else {},
19847
+ "output": trim(str(output or ""), 3000),
19848
+ "ok": not str(output).startswith("Error:"),
19849
+ }
19850
+ )
18216
19851
  self._emit("tool_result", {"name": name, "result": trim(output, 500), "summary": f"tool done: {name}"})
18217
19852
  if int(tool_error_streaks.get(tool_key, 0) or 0) >= HARD_BREAK_TOOL_ERROR_THRESHOLD:
18218
19853
  stop_due_to_hard_break = True
@@ -18241,6 +19876,18 @@ class SessionState:
18241
19876
  self.current_phase = "post-tools"
18242
19877
  if interrupted_in_tools:
18243
19878
  break
19879
+ single_watchdog_after_board = self._ensure_blackboard()
19880
+ single_watchdog_after_fp = self._watchdog_state_fingerprint(single_watchdog_after_board)
19881
+ self._watchdog_process_worker_step(
19882
+ single_watchdog_after_board,
19883
+ role=single_role,
19884
+ step={
19885
+ "status": "tools",
19886
+ "tool_results": single_round_tool_results,
19887
+ },
19888
+ state_changed=bool(single_watchdog_after_fp != single_watchdog_before_fp),
19889
+ pinned_selection=pinned_selection,
19890
+ )
18244
19891
  if stop_due_to_hard_break:
18245
19892
  note = (
18246
19893
  "Execution paused after repeated tool/recovery failures. "
@@ -19628,7 +21275,6 @@ window.MathJax={
19628
21275
  }
19629
21276
  };
19630
21277
  </script>
19631
- <script defer src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
19632
21278
  </head>
19633
21279
  <body>
19634
21280
  <div class="bg-layer"></div>
@@ -19883,7 +21529,7 @@ main{display:grid;grid-template-columns:minmax(220px,260px) minmax(520px,920px)
19883
21529
  .upload-list{margin-top:6px;border:1px solid var(--line);border-radius:10px;background:#fff;max-height:88px;overflow:auto;padding:6px}
19884
21530
  .row{display:flex;gap:8px;margin-top:8px;flex-wrap:wrap}
19885
21531
  .ctx-live{margin-left:auto;display:flex;align-items:center;gap:8px;padding:8px 10px;border:1px solid #d6deea;border-radius:999px;background:#f8fbff;min-width:250px}
19886
- .ctx-live-dot{width:8px;height:8px;border-radius:50%;background:#13b8a6;box-shadow:0 0 0 rgba(19,184,166,.45);animation:ctxPulse 1.6s ease-in-out infinite}
21532
+ .ctx-live-dot{width:8px;height:8px;border-radius:50%;background:#13b8a6;box-shadow:0 0 0 rgba(19,184,166,.45)}
19887
21533
  .ctx-live-bar{position:relative;display:inline-block;width:84px;height:6px;border-radius:999px;background:#e5edf8;overflow:hidden}
19888
21534
  .ctx-live-fill{display:block;height:100%;width:0%;background:linear-gradient(90deg,#13b8a6,#1f6feb);transition:width .24s ease,background .24s ease}
19889
21535
  .ctx-live.warn .ctx-live-dot{background:#e1a400}
@@ -19974,10 +21620,10 @@ APP_JS = """const S={sessions:[],activeId:null,snap:null,es:null,esId:'',skills:
19974
21620
  const MD_CACHE=new Map();
19975
21621
  const MD_CACHE_MAX=420;
19976
21622
  const STATIC_UI=((new URLSearchParams(location.search)).get('static_ui')==='1');
19977
- const SNAPSHOT_DELAY_VISIBLE_MS=120;
19978
- const SNAPSHOT_DELAY_HIDDEN_MS=1200;
19979
- const SESSION_POLL_VISIBLE_MS=12000;
19980
- const SESSION_POLL_HIDDEN_MS=30000;
21623
+ const SNAPSHOT_DELAY_VISIBLE_MS=300;
21624
+ const SNAPSHOT_DELAY_HIDDEN_MS=2400;
21625
+ const SESSION_POLL_VISIBLE_MS=30000;
21626
+ const SESSION_POLL_HIDDEN_MS=60000;
19981
21627
  const PANEL_SCROLL_ACTIVE_MS=1100;
19982
21628
  const CHAT_SCROLL_ACTIVE_MS=420;
19983
21629
  const CHAT_SCROLL_LOCK_MS=1200;
@@ -19994,10 +21640,10 @@ const DELTA_MAX_OPERATIONS=220;
19994
21640
  const DELTA_MAX_UPLOADS=40;
19995
21641
  const DELTA_WATCHDOG_INTERVAL_MS=1800;
19996
21642
  const DELTA_WATCHDOG_STALL_MS=9000;
19997
- const MARKDOWN_WORKER_MIN_CHARS=2200;
21643
+ const MARKDOWN_WORKER_MIN_CHARS=800;
19998
21644
  const MARKDOWN_WORKER_MAX_PENDING=96;
19999
21645
  const MARKDOWN_WORKER_REQ_TTL_MS=45000;
20000
- const CHAT_VIRT={heights:Object.create(null),heightVersion:0,avgHeight:140,overscanPx:900,maxCacheKeys:2800,poolByKind:Object.create(null),poolSize:0,poolMax:420};
21646
+ const CHAT_VIRT={heights:Object.create(null),heightVersion:0,avgHeight:140,overscanPx:400,maxCacheKeys:1200,poolByKind:Object.create(null),poolSize:0,poolMax:180};
20001
21647
  const RENDER_EVT_TYPES=new Set(['render_frame','render_bridge']);
20002
21648
  const RENDER_QUEUE_MAX=140;
20003
21649
  const RENDER_META_MIN_INTERVAL_MS=180;
@@ -20576,7 +22222,15 @@ function _mathRunTypeset(root,key=''){
20576
22222
  const run=(retry)=>{
20577
22223
  const mj=window.MathJax;
20578
22224
  if(!mj||typeof mj.typesetPromise!=='function'){
20579
- if(retry<10)setTimeout(()=>run(retry+1),180);
22225
+ // Lazy-load MathJax on first actual math demand
22226
+ if(!window._mjaxLoading){
22227
+ window._mjaxLoading=true;
22228
+ const s=document.createElement('script');
22229
+ s.src='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
22230
+ s.async=true;
22231
+ document.head.appendChild(s);
22232
+ }
22233
+ if(retry<20)setTimeout(()=>run(retry+1),200);
20580
22234
  return;
20581
22235
  }
20582
22236
  if(root._mathPending)return;
@@ -24016,16 +25670,35 @@ Use this skill when tasks match this flow pattern and reusable execution is need
24016
25670
  return self.model_catalog()
24017
25671
 
24018
25672
  class AgentHTTPServer(ThreadingHTTPServer):
25673
+ daemon_threads = True
25674
+ block_on_close = False
25675
+
24019
25676
  def __init__(self, addr: tuple[str, int], handler, app: AppContext):
24020
25677
  super().__init__(addr, handler)
24021
25678
  self.app = app
24022
25679
 
24023
25680
  def handle_error(self, request, client_address):
24024
25681
  _, exc, _ = sys.exc_info()
24025
- if isinstance(exc, (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError)):
25682
+ if swallow_benign_socket_error(exc, "agent-http.handle_error"):
24026
25683
  return
24027
25684
  return super().handle_error(request, client_address)
24028
25685
 
25686
+ def shutdown_request(self, request):
25687
+ try:
25688
+ super().shutdown_request(request)
25689
+ except OSError as exc:
25690
+ if swallow_benign_socket_error(exc, "agent-http.shutdown_request"):
25691
+ return
25692
+ raise
25693
+
25694
+ def close_request(self, request):
25695
+ try:
25696
+ super().close_request(request)
25697
+ except OSError as exc:
25698
+ if swallow_benign_socket_error(exc, "agent-http.close_request"):
25699
+ return
25700
+ raise
25701
+
24029
25702
  class Handler(BaseHTTPRequestHandler):
24030
25703
  protocol_version = "HTTP/1.1"
24031
25704
  server_version = f"StandaloneWebAgent/{APP_VERSION}"
@@ -24036,8 +25709,10 @@ class Handler(BaseHTTPRequestHandler):
24036
25709
  def handle(self):
24037
25710
  try:
24038
25711
  super().handle()
24039
- except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError):
24040
- return
25712
+ except Exception as exc:
25713
+ if swallow_benign_socket_error(exc, "handler.handle"):
25714
+ return
25715
+ raise
24041
25716
 
24042
25717
  @property
24043
25718
  def app(self) -> AppContext:
@@ -24064,48 +25739,70 @@ class Handler(BaseHTTPRequestHandler):
24064
25739
 
24065
25740
  def _send_json(self, obj: object, status: int = 200):
24066
25741
  body = json_dumps(obj).encode("utf-8")
24067
- self.send_response(status)
24068
- self.send_header("Content-Type", "application/json; charset=utf-8")
24069
- self.send_header("Content-Length", str(len(body)))
24070
- self.send_header("Cache-Control", "no-store")
24071
- self.end_headers()
24072
- self.wfile.write(body)
25742
+ try:
25743
+ self.send_response(status)
25744
+ self.send_header("Content-Type", "application/json; charset=utf-8")
25745
+ self.send_header("Content-Length", str(len(body)))
25746
+ self.send_header("Cache-Control", "no-store")
25747
+ self.end_headers()
25748
+ self.wfile.write(body)
25749
+ except Exception as exc:
25750
+ if swallow_benign_socket_error(exc, "handler.send_json"):
25751
+ return
25752
+ raise
24073
25753
 
24074
25754
  def _send_text(self, text: str, content_type: str = "text/plain; charset=utf-8", status: int = 200):
24075
25755
  body = text.encode("utf-8")
24076
- self.send_response(status)
24077
- self.send_header("Content-Type", content_type)
24078
- self.send_header("Content-Length", str(len(body)))
24079
- self.send_header("Cache-Control", "no-store")
24080
- self.send_header("Pragma", "no-cache")
24081
- self.send_header("Expires", "0")
24082
- self.end_headers()
24083
- self.wfile.write(body)
25756
+ try:
25757
+ self.send_response(status)
25758
+ self.send_header("Content-Type", content_type)
25759
+ self.send_header("Content-Length", str(len(body)))
25760
+ self.send_header("Cache-Control", "no-store")
25761
+ self.send_header("Pragma", "no-cache")
25762
+ self.send_header("Expires", "0")
25763
+ self.end_headers()
25764
+ self.wfile.write(body)
25765
+ except Exception as exc:
25766
+ if swallow_benign_socket_error(exc, "handler.send_text"):
25767
+ return
25768
+ raise
24084
25769
 
24085
25770
  def _send_bytes(self, data: bytes, content_type: str, filename: str):
24086
- self.send_response(200)
24087
- self.send_header("Content-Type", content_type)
24088
- self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
24089
- self.send_header("Content-Length", str(len(data)))
24090
- self.end_headers()
24091
- self.wfile.write(data)
25771
+ try:
25772
+ self.send_response(200)
25773
+ self.send_header("Content-Type", content_type)
25774
+ self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
25775
+ self.send_header("Content-Length", str(len(data)))
25776
+ self.end_headers()
25777
+ self.wfile.write(data)
25778
+ except Exception as exc:
25779
+ if swallow_benign_socket_error(exc, "handler.send_bytes"):
25780
+ return
25781
+ raise
24092
25782
 
24093
25783
  def _send_inline_bytes(self, data: bytes, content_type: str, status: int = 200):
24094
- self.send_response(status)
24095
- self.send_header("Content-Type", content_type)
24096
- self.send_header("Content-Length", str(len(data)))
24097
- self.send_header("Content-Disposition", "inline")
24098
- self.send_header("Cache-Control", "no-store")
24099
- self.end_headers()
24100
- self.wfile.write(data)
25784
+ try:
25785
+ self.send_response(status)
25786
+ self.send_header("Content-Type", content_type)
25787
+ self.send_header("Content-Length", str(len(data)))
25788
+ self.send_header("Content-Disposition", "inline")
25789
+ self.send_header("Cache-Control", "no-store")
25790
+ self.end_headers()
25791
+ self.wfile.write(data)
25792
+ except Exception as exc:
25793
+ if swallow_benign_socket_error(exc, "handler.send_inline_bytes"):
25794
+ return
25795
+ raise
24101
25796
 
24102
25797
  def _sse_write(self, payload: bytes) -> bool:
24103
25798
  try:
24104
25799
  self.wfile.write(payload)
24105
25800
  self.wfile.flush()
24106
25801
  return True
24107
- except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError, TimeoutError, OSError):
24108
- return False
25802
+ except Exception as exc:
25803
+ if swallow_benign_socket_error(exc, "handler.sse_write"):
25804
+ return False
25805
+ raise
24109
25806
 
24110
25807
  def do_GET(self):
24111
25808
  parsed_url = urlparse(self.path)
@@ -24523,12 +26220,17 @@ class Handler(BaseHTTPRequestHandler):
24523
26220
  return self._send_json({"ok": True})
24524
26221
 
24525
26222
  def _stream_events(self, sess: SessionState):
24526
- self.send_response(HTTPStatus.OK)
24527
- self.send_header("Content-Type", "text/event-stream; charset=utf-8")
24528
- self.send_header("Cache-Control", "no-cache")
24529
- self.send_header("Connection", "keep-alive")
24530
- self.send_header("X-Accel-Buffering", "no")
24531
- self.end_headers()
26223
+ try:
26224
+ self.send_response(HTTPStatus.OK)
26225
+ self.send_header("Content-Type", "text/event-stream; charset=utf-8")
26226
+ self.send_header("Cache-Control", "no-cache")
26227
+ self.send_header("Connection", "keep-alive")
26228
+ self.send_header("X-Accel-Buffering", "no")
26229
+ self.end_headers()
26230
+ except Exception as exc:
26231
+ if swallow_benign_socket_error(exc, "handler.stream_events.headers"):
26232
+ return
26233
+ raise
24532
26234
  sub = sess.events.subscribe()
24533
26235
  try:
24534
26236
  hello = (
@@ -24548,8 +26250,9 @@ class Handler(BaseHTTPRequestHandler):
24548
26250
  chunk = f": ping {int(now_ts())}\n\n".encode("utf-8")
24549
26251
  if not self._sse_write(chunk):
24550
26252
  break
24551
- except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
24552
- pass
26253
+ except Exception as exc:
26254
+ if not swallow_benign_socket_error(exc, "handler.stream_events.loop"):
26255
+ raise
24553
26256
  finally:
24554
26257
  sess.events.unsubscribe(sub)
24555
26258
 
@@ -24563,8 +26266,10 @@ class SkillsHandler(BaseHTTPRequestHandler):
24563
26266
  def handle(self):
24564
26267
  try:
24565
26268
  super().handle()
24566
- except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError):
24567
- return
26269
+ except Exception as exc:
26270
+ if swallow_benign_socket_error(exc, "skills-handler.handle"):
26271
+ return
26272
+ raise
24568
26273
 
24569
26274
  @property
24570
26275
  def app(self) -> AppContext:
@@ -24591,23 +26296,33 @@ class SkillsHandler(BaseHTTPRequestHandler):
24591
26296
 
24592
26297
  def _send_json(self, obj: object, status: int = 200):
24593
26298
  body = json_dumps(obj).encode("utf-8")
24594
- self.send_response(status)
24595
- self.send_header("Content-Type", "application/json; charset=utf-8")
24596
- self.send_header("Content-Length", str(len(body)))
24597
- self.send_header("Cache-Control", "no-store")
24598
- self.end_headers()
24599
- self.wfile.write(body)
26299
+ try:
26300
+ self.send_response(status)
26301
+ self.send_header("Content-Type", "application/json; charset=utf-8")
26302
+ self.send_header("Content-Length", str(len(body)))
26303
+ self.send_header("Cache-Control", "no-store")
26304
+ self.end_headers()
26305
+ self.wfile.write(body)
26306
+ except Exception as exc:
26307
+ if swallow_benign_socket_error(exc, "skills-handler.send_json"):
26308
+ return
26309
+ raise
24600
26310
 
24601
26311
  def _send_text(self, text: str, content_type: str = "text/plain; charset=utf-8", status: int = 200):
24602
26312
  body = text.encode("utf-8")
24603
- self.send_response(status)
24604
- self.send_header("Content-Type", content_type)
24605
- self.send_header("Content-Length", str(len(body)))
24606
- self.send_header("Cache-Control", "no-store")
24607
- self.send_header("Pragma", "no-cache")
24608
- self.send_header("Expires", "0")
24609
- self.end_headers()
24610
- self.wfile.write(body)
26313
+ try:
26314
+ self.send_response(status)
26315
+ self.send_header("Content-Type", content_type)
26316
+ self.send_header("Content-Length", str(len(body)))
26317
+ self.send_header("Cache-Control", "no-store")
26318
+ self.send_header("Pragma", "no-cache")
26319
+ self.send_header("Expires", "0")
26320
+ self.end_headers()
26321
+ self.wfile.write(body)
26322
+ except Exception as exc:
26323
+ if swallow_benign_socket_error(exc, "skills-handler.send_text"):
26324
+ return
26325
+ raise
24611
26326
 
24612
26327
  def do_GET(self):
24613
26328
  parsed_url = urlparse(self.path)
@@ -25219,7 +26934,15 @@ def main():
25219
26934
  elif int(skills_port) != int(args.port):
25220
26935
  try:
25221
26936
  skills_server = AgentHTTPServer((args.host, skills_port), SkillsHandler, app)
25222
- skills_thread = threading.Thread(target=skills_server.serve_forever, daemon=True)
26937
+
26938
+ def _skills_serve_loop():
26939
+ try:
26940
+ skills_server.serve_forever()
26941
+ except OSError as exc:
26942
+ if not swallow_benign_socket_error(exc, "skills-server.serve_forever"):
26943
+ raise
26944
+
26945
+ skills_thread = threading.Thread(target=_skills_serve_loop, daemon=True)
25223
26946
  skills_thread.start()
25224
26947
  setattr(app, "skills_ui_enabled", True)
25225
26948
  except Exception as exc:
@@ -25330,6 +27053,12 @@ def main():
25330
27053
  server.serve_forever()
25331
27054
  except KeyboardInterrupt:
25332
27055
  print("\n[web-agent] shutting down")
27056
+ except OSError as exc:
27057
+ if swallow_benign_socket_error(exc, "main.serve_forever"):
27058
+ if BENIGN_SOCKET_DEBUG_LOG_ENABLED:
27059
+ print(f"\n[web-agent][debug] socket closed benignly ({trim(str(exc), 180)}), shutting down")
27060
+ else:
27061
+ raise
25333
27062
  finally:
25334
27063
  try:
25335
27064
  persist_report = app.persist_all_sessions(include_running=True, lock_timeout=0.6)