PyPI - clouds-coder - Versions diffs - 2026.3.7__tar.gz → 2026.3.8__tar.gz - Mend

clouds-coder 2026.3.7tar.gz → 2026.3.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{clouds_coder-2026.3.7 → clouds_coder-2026.3.8}/Clouds_Coder.py RENAMED Viewed

@@ -5,6 +5,7 @@ import base64
 from collections import deque
 import csv
 import difflib
+import errno
 import html
 import hashlib
 import hmac
@@ -35,7 +36,7 @@ from pathlib import Path, PurePosixPath
 from urllib.error import HTTPError, URLError
 from urllib.parse import parse_qs, unquote, urlparse
 from urllib.request import Request, urlopen
-APP_VERSION = "2026.03.07"
+APP_VERSION = "0.1.1"
 DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
 DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
 WORKDIR = Path(os.getenv("AGENT_WORKDIR", os.getcwd())).resolve()
@@ -71,6 +72,14 @@ DEFAULT_TIMEOUT_SECONDS = max(
 DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
 AUTO_CONTINUE_BUDGET_DEFAULT = 30
 AGENT_MAX_OUTPUT_TOKENS = 2200
+WATCHDOG_INTENT_NO_TOOL_THRESHOLD = 2
+WATCHDOG_REPEAT_NO_TOOL_THRESHOLD = 2
+WATCHDOG_STATE_STALL_THRESHOLD = 6
+WATCHDOG_CONTEXT_STALL_THRESHOLD = 2
+WATCHDOG_REPEAT_SIMILARITY_THRESHOLD = 0.85
+WATCHDOG_CONTEXT_NEAR_RATIO = 0.92
+WATCHDOG_MAX_DECOMPOSE_STEPS = 12
+WATCHDOG_STEP_MAX_ATTEMPTS = 2
 EMPTY_ACTION_MIN_CONTENT_CHARS = 5
 EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 2
 THINKING_BUDGET_FORCE_RATIO = 0.85
@@ -98,6 +107,22 @@ LIVE_INPUT_WEIGHT_BASE_DELAYED = 0.35
 LIVE_INPUT_WEIGHT_BASE_NORMAL = 0.65
 LIVE_INPUT_WEIGHT_STEP_DELAYED = 0.30
 LIVE_INPUT_WEIGHT_STEP_NORMAL = 0.20
+SOCKET_NOISE_LINE_PATTERNS = (
+    re.compile(r"\bwinerror\s*10038\b", re.IGNORECASE),
+    re.compile(r"\bwsaenotsock\b", re.IGNORECASE),
+    re.compile(r"\bsocket\s+closed\s+benignly\b", re.IGNORECASE),
+    re.compile(r"\bbenign\s+socket\s+error\b", re.IGNORECASE),
+)
+BENIGN_SOCKET_DEBUG_LOG_ENABLED = str(os.getenv("AGENT_DEBUG_SOCKET_LOG", "") or "").strip().lower() in {
+    "1",
+    "true",
+    "yes",
+    "on",
+    "debug",
+}
+BENIGN_SOCKET_LOG_INTERVAL_SECONDS = 30.0
+FINAL_SUMMARY_MIN_CHARS = 80
+FINAL_SUMMARY_STRICT_MIN_CHARS = 120
 RUNTIME_CONTROL_HINT_PREFIXES = (
     "<reminder>",
     "<todo-rescue>",
@@ -170,6 +195,7 @@ TASK_PROFILE_TYPES = (
 )
 TASK_LEVEL_CHOICES = (1, 2, 3, 4, 5)
 TASK_SCALE_PREFERENCES = ("fast", "balanced", "thorough")
+SEMANTIC_CONFIDENCE_CHOICES = ("high", "medium", "low")
 TASK_LEVEL_POLICIES: dict[int, dict] = {
     1: {
         "name": "simple_direct_answer",
@@ -712,6 +738,37 @@ def model_language_instruction(lang: str) -> str:
     )
+def _detect_os_shell_instruction() -> str:
+    """Return a shell environment note for the agent system prompt based on the host OS."""
+    import platform as _platform
+    _sys = _platform.system()
+    if _sys == "Windows":
+        return (
+            "Shell environment: Windows (cmd.exe via shell=True). "
+            "IMPORTANT — use Windows-native commands only: "
+            "use 'dir' (not 'ls'), 'type' (not 'cat'), 'del' (not 'rm'), "
+            "'move' (not 'mv'), 'copy' (not 'cp'), 'findstr' (not 'grep'), "
+            "'where' (not 'which'), 'echo %VAR%' (not 'echo $VAR'). "
+            "To list files recursively use 'dir /s /b'. "
+            "Path separator is backslash (\\). "
+            "Do NOT use POSIX paths like /workspace, /tmp, /usr, ~/... — they do not exist. "
+            "Working directory is already set; use relative paths or the absolute session root shown above."
+        )
+    if _sys == "Darwin":
+        return (
+            "Shell environment: macOS (bash/zsh). "
+            "Standard POSIX commands are available (ls, cat, grep, find, etc.). "
+            "Package manager is 'brew'. "
+            "Do NOT assume Linux-specific paths like /proc or /etc/os-release exist. "
+            "Use relative paths or the absolute session root shown above."
+        )
+    # Linux / other POSIX
+    return (
+        "Shell environment: Linux (bash). "
+        "Standard POSIX commands are available (ls, cat, grep, find, etc.). "
+        "Use relative paths or the absolute session root shown above."
+    )
 def resolve_web_ui_dir_path(raw: str, base_dir: Path | None = None) -> Path:
     txt = str(raw or "").strip()
     if not txt:
@@ -896,6 +953,103 @@ def guess_ext_from_mime(mime: str, fallback: str = ".bin") -> str:
 def now_ts() -> float:
     return time.time()
+_benign_socket_log_lock = threading.Lock()
+_benign_socket_log_state: dict[str, dict[str, float | int]] = {}
+def filter_runtime_noise_lines(text: str) -> tuple[str, int]:
+    raw = str(text or "")
+    if not raw:
+        return "", 0
+    kept: list[str] = []
+    dropped = 0
+    for line in raw.splitlines():
+        row = str(line or "")
+        if any(p.search(row) for p in SOCKET_NOISE_LINE_PATTERNS):
+            dropped += 1
+            continue
+        kept.append(row)
+    return "\n".join(kept).strip(), int(dropped)
+def is_benign_socket_error(exc: BaseException | None) -> bool:
+    if exc is None:
+        return False
+    if isinstance(exc, (BrokenPipeError, ConnectionResetError, ConnectionAbortedError, TimeoutError)):
+        return True
+    if not isinstance(exc, OSError):
+        return False
+    winerror = int(getattr(exc, "winerror", 0) or 0)
+    if winerror in {10038, 10053, 10054, 10057, 10093}:  # 10093 = WSANOTINITIALISED (selector on pipe)
+        return True
+    err = int(getattr(exc, "errno", 0) or 0)
+    benign_errno = {
+        int(getattr(errno, "EPIPE", 32)),
+        int(getattr(errno, "ECONNRESET", 104)),
+        int(getattr(errno, "ECONNABORTED", 103)),
+        int(getattr(errno, "ENOTCONN", 107)),
+        int(getattr(errno, "EBADF", 9)),
+    }
+    return err in benign_errno
+def _socket_error_code(exc: BaseException | None) -> str:
+    if not isinstance(exc, OSError):
+        return str(type(exc).__name__ if exc is not None else "unknown")
+    winerror = int(getattr(exc, "winerror", 0) or 0)
+    if winerror > 0:
+        return f"winerror:{winerror}"
+    err = int(getattr(exc, "errno", 0) or 0)
+    if err > 0:
+        return f"errno:{err}"
+    return str(type(exc).__name__ if exc is not None else "OSError")
+def _log_benign_socket_error_limited(exc: BaseException | None, where: str = ""):
+    if not BENIGN_SOCKET_DEBUG_LOG_ENABLED:
+        return
+    code = _socket_error_code(exc)
+    location = str(where or "runtime").strip()
+    key = f"{location}|{code}"
+    now = now_ts()
+    suppressed = 0
+    should_emit = False
+    with _benign_socket_log_lock:
+        row = _benign_socket_log_state.get(key)
+        if not isinstance(row, dict):
+            _benign_socket_log_state[key] = {"last_ts": now, "suppressed": 0}
+            should_emit = True
+        else:
+            last_ts = float(row.get("last_ts", 0.0) or 0.0)
+            if now - last_ts >= BENIGN_SOCKET_LOG_INTERVAL_SECONDS:
+                suppressed = int(row.get("suppressed", 0) or 0)
+                row["last_ts"] = now
+                row["suppressed"] = 0
+                should_emit = True
+            else:
+                row["suppressed"] = int(row.get("suppressed", 0) or 0) + 1
+        if len(_benign_socket_log_state) > 512:
+            stale = sorted(
+                _benign_socket_log_state.items(),
+                key=lambda item: float((item[1] or {}).get("last_ts", 0.0) if isinstance(item[1], dict) else 0.0),
+            )[:128]
+            for dead_key, _ in stale:
+                _benign_socket_log_state.pop(dead_key, None)
+    if should_emit:
+        msg = f"[web-agent][debug] benign socket error {code} at {location}"
+        if suppressed > 0:
+            msg = f"{msg} (+{suppressed} suppressed)"
+        print(msg, file=sys.stderr)
+def swallow_benign_socket_error(exc: BaseException | None, where: str = "") -> bool:
+    if not is_benign_socket_error(exc):
+        return False
+    _log_benign_socket_error_limited(exc, where)
+    return True
 def normalize_timeout_seconds(
     raw: object,
     *,
@@ -8511,6 +8665,7 @@ class SessionState:
             f"Session absolute writable root is {self.files_root}. "
             "For file tools, prefer relative paths like hello.txt; runtime will map them to the absolute session root. "
             "The '/workspace/...' form is only a virtual alias for path arguments; never create OS-level /workspace in shell. "
+            f"{_detect_os_shell_instruction()} "
             "Use tools to inspect files, execute commands, and edit code safely. "
             f"{route_hint}"
             f"{budget_hint} "
@@ -8558,6 +8713,19 @@ class SessionState:
             "used_percent": used_pct,
         }
+    def _apply_auto_compact_if_needed(self, reason: str = "auto") -> bool:
+        self._microcompact()
+        metrics = self._context_budget_metrics()
+        used = int(metrics.get("used", 0) or 0)
+        limit = max(1, int(metrics.get("limit", 0) or 0))
+        if used < limit:
+            return False
+        now_tick = now_ts()
+        if (now_tick - float(self.last_compact_ts or 0.0)) < 0.8:
+            return False
+        self._auto_compact(reason)
+        return True
     def _estimate_output_tokens(self, text: str, thinking_text: str = "", tool_calls: list | None = None) -> int:
         t_main = len(str(text or "")) // 4
         t_think = len(str(thinking_text or "")) // 4
@@ -10981,9 +11149,18 @@ class SessionState:
             "重构",
             "设计",
             "构建",
+            "架构",
+            "内核",
+            "框架",
+            "死循环",
+            "状态机",
+            "调度",
             "后端",
             "前端",
             "自动化",
+            "agentbus",
+            "watchdog",
+            "decomposition",
             "workflow",
             "architecture",
             "build",
@@ -11102,7 +11279,10 @@ class SessionState:
         return {
             "task_type": "general",
             "complexity": "simple",
-            "direct_objective": "Provide the most direct useful response with minimal orchestration.",
+            "direct_objective": (
+                "Provide the most direct useful response with minimal orchestration, "
+                "anchored to the current project context and user goal."
+            ),
             "recommended_agents": ["developer"],
             "round_budget": 3,
             "reason": "default lightweight profile",
@@ -11667,159 +11847,233 @@ class SessionState:
                 del target[:overflow]
         def _merge_output_text() -> str:
-            out_text = out_buf.decode("utf-8", errors="replace")
-            err_text = err_buf.decode("utf-8", errors="replace")
+            # On Windows, cmd.exe outputs in the system OEM codepage (e.g. cp936/GBK),
+            # not UTF-8.  Detect and use the correct encoding for decoding.
+            if os.name == "nt":
+                try:
+                    import locale as _lc
+                    enc = _lc.getpreferredencoding(False) or "utf-8"
+                except Exception:
+                    enc = "utf-8"
+            else:
+                enc = "utf-8"
+            out_text = out_buf.decode(enc, errors="replace")
+            err_text = err_buf.decode(enc, errors="replace")
             return (out_text + err_text).strip()
-        try:
-            popen_kwargs = {
-                "shell": True,
-                "cwd": cwd,
-                "stdout": subprocess.PIPE,
-                "stderr": subprocess.PIPE,
-                "text": False,
-                "bufsize": 0,
-                "start_new_session": (os.name == "posix"),
-            }
-            if os.name == "nt":
-                create_group = int(getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0) or 0)
-                if create_group > 0:
-                    popen_kwargs["creationflags"] = create_group
-            proc = subprocess.Popen(command, **popen_kwargs)
-            if os.name == "nt":
-                # Windows pipe handles are not selector-friendly; use reader threads.
-                reader_threads: list[threading.Thread] = []
+        def _collect_with_reader_threads(proc: subprocess.Popen):
+            nonlocal next_progress_emit
+            reader_threads: list[threading.Thread] = []
+            io_queue: queue.Queue = queue.Queue()
+            active_readers: set[str] = set()
-                def _spawn_reader(stream, target: bytearray):
-                    if stream is None:
-                        return
+            def _spawn_reader(label: str, stream):
+                if stream is None:
+                    return
+                active_readers.add(label)
+                # Selector fallback may leave PIPE FDs in non-blocking mode.
+                # Reader threads expect blocking reads to avoid early EOF/pipe close.
+                try:
+                    os.set_blocking(stream.fileno(), True)
+                except Exception:
+                    pass
-                    def _reader():
+                def _reader():
+                    try:
                         while True:
                             try:
                                 chunk = stream.read(65536)
                             except Exception:
                                 break
+                            if chunk is None:
+                                time.sleep(0.01)
+                                continue
                             if not chunk:
                                 break
-                            _append_capture(target, chunk)
+                            io_queue.put((label, chunk))
+                    finally:
+                        try:
+                            stream.close()
+                        except Exception:
+                            pass
+                        io_queue.put((label, None))
-                    th = threading.Thread(target=_reader, daemon=True)
-                    th.start()
-                    reader_threads.append(th)
+                th = threading.Thread(target=_reader, daemon=True)
+                th.start()
+                reader_threads.append(th)
-                _spawn_reader(proc.stdout, out_buf)
-                _spawn_reader(proc.stderr, err_buf)
+            _spawn_reader("stdout", proc.stdout)
+            _spawn_reader("stderr", proc.stderr)
+            while True:
+                now = time.time()
+                elapsed = now - start
+                if (not meta.get("error")) and self.cancel_requested:
+                    _stop_process(proc)
+                    meta["error"] = "Error: interrupted by user"
+                    meta["exit_code"] = -130
+                elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
+                    _stop_process(proc)
+                    meta["error"] = f"Error: timeout ({timeout}s)"
+                    meta["exit_code"] = -1
+                try:
+                    label, chunk = io_queue.get(timeout=0.12)
+                    if chunk is None:
+                        active_readers.discard(str(label))
+                    elif str(label) == "stderr":
+                        _append_capture(err_buf, chunk)
+                    else:
+                        _append_capture(out_buf, chunk)
+                except queue.Empty:
+                    pass
                 while True:
-                    now = time.time()
-                    elapsed = now - start
-                    if (not meta.get("error")) and self.cancel_requested:
-                        _stop_process(proc)
-                        meta["error"] = "Error: interrupted by user"
-                        meta["exit_code"] = -130
-                    elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
-                        _stop_process(proc)
-                        meta["error"] = f"Error: timeout ({timeout}s)"
-                        meta["exit_code"] = -1
-                    if now >= next_progress_emit:
-                        self._emit_transient(
-                            "status",
-                            {
-                                "summary": (
-                                    f"bash running ({int(elapsed)}s, "
-                                    f"captured={len(out_buf) + len(err_buf)}B)"
-                                )
-                            },
-                        )
-                        next_progress_emit = now + 0.8
-                    if proc.poll() is not None:
+                    try:
+                        label, chunk = io_queue.get_nowait()
+                    except queue.Empty:
                         break
-                    time.sleep(0.12)
+                    if chunk is None:
+                        active_readers.discard(str(label))
+                    elif str(label) == "stderr":
+                        _append_capture(err_buf, chunk)
+                    else:
+                        _append_capture(out_buf, chunk)
+                if now >= next_progress_emit:
+                    self._emit_transient(
+                        "status",
+                        {
+                            "summary": (
+                                f"bash running ({int(elapsed)}s, "
+                                f"captured={len(out_buf) + len(err_buf)}B)"
+                            )
+                        },
+                    )
+                    next_progress_emit = now + 0.8
+                if (proc.poll() is not None) and (not active_readers) and io_queue.empty():
+                    break
+            for th in reader_threads:
                 try:
-                    extra_out, extra_err = proc.communicate(timeout=0.8)
+                    th.join(timeout=0.8)
                 except Exception:
-                    extra_out, extra_err = b"", b""
-                _append_capture(out_buf, extra_out or b"")
-                _append_capture(err_buf, extra_err or b"")
-                for th in reader_threads:
-                    try:
-                        th.join(timeout=0.8)
-                    except Exception:
-                        pass
-                merged = _merge_output_text()
-                if meta.get("error"):
-                    meta["output"] = trim(merged or str(meta["error"]))
+                    pass
+            while True:
+                try:
+                    label, chunk = io_queue.get_nowait()
+                except queue.Empty:
+                    break
+                if chunk is None:
+                    continue
+                if str(label) == "stderr":
+                    _append_capture(err_buf, chunk)
                 else:
-                    meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
-                    meta["output"] = trim(merged or "(no output)")
+                    _append_capture(out_buf, chunk)
+            merged_raw = _merge_output_text()
+            merged, _ = filter_runtime_noise_lines(merged_raw)
+            if meta.get("error"):
+                meta["output"] = trim(merged or str(meta["error"]))
             else:
-                with selectors.DefaultSelector() as sel:
-                    if proc.stdout is not None:
-                        try:
-                            os.set_blocking(proc.stdout.fileno(), False)
-                        except Exception:
-                            pass
-                        sel.register(proc.stdout, selectors.EVENT_READ, data="stdout")
-                    if proc.stderr is not None:
-                        try:
-                            os.set_blocking(proc.stderr.fileno(), False)
-                        except Exception:
-                            pass
-                        sel.register(proc.stderr, selectors.EVENT_READ, data="stderr")
-                    while True:
-                        now = time.time()
-                        elapsed = now - start
-                        if self.cancel_requested:
-                            _stop_process(proc)
-                            meta["error"] = "Error: interrupted by user"
-                            meta["exit_code"] = -130
-                        elif timeout > 0 and elapsed >= timeout:
-                            _stop_process(proc)
-                            meta["error"] = f"Error: timeout ({timeout}s)"
-                            meta["exit_code"] = -1
-                        events = sel.select(timeout=0.12)
-                        for key, _ in events:
-                            stream = key.fileobj
+                meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
+                meta["output"] = trim(merged or "(no output)")
+        try:
+            popen_kwargs = {
+                "shell": True,
+                "cwd": cwd,
+                "stdout": subprocess.PIPE,
+                "stderr": subprocess.PIPE,
+                "text": False,
+                "bufsize": 0,
+                "start_new_session": (os.name == "posix"),
+            }
+            if os.name == "nt":
+                create_group = int(getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0) or 0)
+                if create_group > 0:
+                    popen_kwargs["creationflags"] = create_group
+            proc = subprocess.Popen(command, **popen_kwargs)
+            if os.name == "nt":
+                # Windows: read PIPE output via blocking reader threads + queue.
+                _collect_with_reader_threads(proc)
+            else:
+                try:
+                    with selectors.DefaultSelector() as sel:
+                        if proc.stdout is not None:
                             try:
-                                chunk = os.read(stream.fileno(), 65536)
-                            except BlockingIOError:
-                                continue
+                                os.set_blocking(proc.stdout.fileno(), False)
                             except Exception:
-                                chunk = b""
-                            if not chunk:
+                                pass
+                            sel.register(proc.stdout, selectors.EVENT_READ, data="stdout")
+                        if proc.stderr is not None:
+                            try:
+                                os.set_blocking(proc.stderr.fileno(), False)
+                            except Exception:
+                                pass
+                            sel.register(proc.stderr, selectors.EVENT_READ, data="stderr")
+                        while True:
+                            now = time.time()
+                            elapsed = now - start
+                            if self.cancel_requested:
+                                _stop_process(proc)
+                                meta["error"] = "Error: interrupted by user"
+                                meta["exit_code"] = -130
+                            elif timeout > 0 and elapsed >= timeout:
+                                _stop_process(proc)
+                                meta["error"] = f"Error: timeout ({timeout}s)"
+                                meta["exit_code"] = -1
+                            events = sel.select(timeout=0.12)
+                            for key, _ in events:
+                                stream = key.fileobj
                                 try:
-                                    sel.unregister(stream)
+                                    chunk = os.read(stream.fileno(), 65536)
+                                except BlockingIOError:
+                                    continue
                                 except Exception:
-                                    pass
-                                continue
-                            if key.data == "stderr":
-                                _append_capture(err_buf, chunk)
-                            else:
-                                _append_capture(out_buf, chunk)
-                        if now >= next_progress_emit:
-                            self._emit_transient(
-                                "status",
-                                {
-                                    "summary": (
-                                        f"bash running ({int(elapsed)}s, "
-                                        f"captured={len(out_buf) + len(err_buf)}B)"
-                                    )
-                                },
-                            )
-                            next_progress_emit = now + 0.8
-                        if (proc.poll() is not None) and (not sel.get_map()):
-                            break
-                    merged = _merge_output_text()
-                    if meta.get("error"):
-                        meta["output"] = trim(merged or str(meta["error"]))
+                                    chunk = b""
+                                if not chunk:
+                                    try:
+                                        sel.unregister(stream)
+                                    except Exception:
+                                        pass
+                                    continue
+                                if key.data == "stderr":
+                                    _append_capture(err_buf, chunk)
+                                else:
+                                    _append_capture(out_buf, chunk)
+                            if now >= next_progress_emit:
+                                self._emit_transient(
+                                    "status",
+                                    {
+                                        "summary": (
+                                            f"bash running ({int(elapsed)}s, "
+                                            f"captured={len(out_buf) + len(err_buf)}B)"
+                                        )
+                                    },
+                                )
+                                next_progress_emit = now + 0.8
+                            if (proc.poll() is not None) and (not sel.get_map()):
+                                break
+                        merged_raw = _merge_output_text()
+                        merged, _ = filter_runtime_noise_lines(merged_raw)
+                        if meta.get("error"):
+                            meta["output"] = trim(merged or str(meta["error"]))
+                        else:
+                            meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
+                            meta["output"] = trim(merged or "(no output)")
+                except Exception as exc:
+                    # Some platforms may reject selector registration for PIPEs.
+                    # On Windows, also catch any OSError (e.g. WinError 10093 WSANOTINITIALISED).
+                    if is_benign_socket_error(exc) or isinstance(exc, ValueError) or (os.name == "nt" and isinstance(exc, OSError)):
+                        _collect_with_reader_threads(proc)
                     else:
-                        meta["exit_code"] = int(proc.returncode if proc.returncode is not None else 0)
-                        meta["output"] = trim(merged or "(no output)")
+                        raise
         except Exception as exc:
-            meta["error"] = f"Error: {exc}"
-            meta["output"] = meta["error"]
-            meta["exit_code"] = -1
+            # On Windows, WinError 10038 (WSAENOTSOCK) can surface here when
+            # selector-based I/O is used with pipe FDs. Fall back to thread-based reading.
+            if proc is not None and is_benign_socket_error(exc):
+                _collect_with_reader_threads(proc)
+            else:
+                meta["error"] = f"Error: {exc}"
+                meta["output"] = meta["error"]
+                meta["exit_code"] = -1
         meta["duration_ms"] = int((time.time() - start) * 1000)
         after = self._git_status_map(cwd)
         meta["changed_files"] = self._status_delta(before, after) if before or after else []
@@ -12350,6 +12604,35 @@ class SessionState:
             return trim(text.replace("\n", " "), 220)
         return "current task"
+    def _compose_default_direct_objective(self, base_objective: str, goal: str, task_type: str) -> str:
+        base = trim(str(base_objective or "").strip(), 520)
+        goal_clean = trim(strip_thinking_content(str(goal or "")).replace("\n", " ").strip(), 220)
+        path_hits = re.findall(
+            r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.(?:py|js|ts|tsx|jsx|java|go|rs|md|json|yaml|yml|toml|ini|sh|html|css|c|cpp|h)",
+            goal_clean,
+        )
+        uniq_paths: list[str] = []
+        for item in path_hits:
+            one = trim(str(item or "").strip(), 80)
+            if one and one not in uniq_paths:
+                uniq_paths.append(one)
+            if len(uniq_paths) >= 3:
+                break
+        if uniq_paths:
+            anchor = f" Project anchors: {', '.join(uniq_paths)}."
+        elif goal_clean:
+            anchor = f" Project anchor: {goal_clean}."
+        else:
+            anchor = " Project anchor: current repository context."
+        if task_type == "simple_qa":
+            postfix = " Keep orchestration lightweight and answer directly with project-aware specifics."
+        else:
+            postfix = (
+                " Keep orchestration lightweight and execution-first. "
+                "Use bounded creativity for ambiguous details while preserving existing architecture and constraints."
+            )
+        return trim(f"{base}{anchor}{postfix}", 800)
     def _normalize_task_profile(self, goal: str, raw: object) -> dict:
         base = self._infer_task_profile(goal)
         src = raw if isinstance(raw, dict) else {}
@@ -12362,13 +12645,22 @@ class SessionState:
         complexity = str(src.get("complexity", base.get("complexity", "simple")) or "").strip().lower()
         if complexity not in TASK_COMPLEXITY_LEVELS:
             complexity = str(base.get("complexity", "simple"))
-        direct_objective = (
-            trim(
-                str(src.get("direct_objective", base.get("direct_objective", "")) or "").strip(),
-                800,
+        src_direct_objective = trim(str(src.get("direct_objective", "") or "").strip(), 800)
+        legacy_objectives = {
+            "Provide the most direct useful response with minimal orchestration.",
+            (
+                "Provide the most direct useful response with minimal orchestration, "
+                "anchored to the current project context and user goal."
+            ),
+        }
+        if src_direct_objective and src_direct_objective not in legacy_objectives:
+            direct_objective = src_direct_objective
+        else:
+            direct_objective = self._compose_default_direct_objective(
+                str(base.get("direct_objective", "")),
+                goal,
+                task_type,
             )
-            or str(base.get("direct_objective", ""))
-        )
         rec_raw = src.get("recommended_agents", base.get("recommended_agents", []))
         recommended: list[str] = []
         if isinstance(rec_raw, list):
@@ -12669,95 +12961,793 @@ class SessionState:
         key = str(raw or "").strip().upper()
         return key if key in BLACKBOARD_STATUSES else "INITIALIZING"
-    def _new_blackboard(self, goal: str = "") -> dict:
-        profile = self._normalize_task_profile(goal, {})
-        progress = "done" if str(profile.get("task_type", "") or "") == "simple_qa" and not str(goal or "").strip() else "initializing"
+    def _new_watchdog_state(self) -> dict:
         return {
-            "version": 1,
-            "updated_at": float(now_ts()),
-            "original_goal": trim(str(goal or "").strip(), 4000),
-            "research_notes": [],
-            "code_artifacts": {},
-            "execution_logs": [],
-            "review_feedback": [],
-            "conversation_history": [],
-            "status": "INITIALIZING",
-            "approval": {
-                "approved": False,
-                "by": "",
-                "note": "",
-                "ts": 0.0,
-            },
-            "manager_cycles": 0,
-            "manager_summary_attempts": 0,
-            "active_agent": "",
-            "last_delegate": {
-                "target": "",
-                "instruction": "",
-                "reason": "",
-                "source": "",
-                "is_mandatory": False,
-                "ts": 0.0,
-            },
-            "task_profile": profile,
-            "manager_judgement": {
-                "task_type": str(profile.get("task_type", "general")),
-                "complexity": str(profile.get("complexity", "simple")),
-                "scale_preference": str(profile.get("scale_preference", "balanced") or "balanced"),
-                "progress": progress,
-                "remaining_rounds": (
-                    -1
-                    if int(profile.get("round_budget", 0) or 0) <= 0
-                    else int(profile.get("round_budget", 1) or 1)
-                ),
-                "updated_at": float(now_ts()),
-            },
-            "last_worker_reply": {
-                "role": "",
-                "text": "",
-                "ts": 0.0,
-            },
+            "intent_no_tool_streak": 0,
+            "repeat_no_tool_streak": 0,
+            "state_unchanged_streak": 0,
+            "last_no_tool_text": "",
+            "last_no_tool_hash": "",
+            "last_state_fp": "",
+            "trigger_count": 0,
+            "last_trigger_reason": "",
+            "last_trigger_ts": 0.0,
         }
-    def _normalize_blackboard(self, raw: object) -> dict:
+    def _normalize_watchdog_state(self, raw: object) -> dict:
         src = raw if isinstance(raw, dict) else {}
-        board = self._new_blackboard(str(src.get("original_goal", "") or ""))
-        try:
-            board["version"] = int(src.get("version", 1) or 1)
-        except Exception:
-            board["version"] = 1
-        board["updated_at"] = float(src.get("updated_at", now_ts()) or now_ts())
-        board["status"] = self._normalize_blackboard_status(src.get("status", board["status"]))
-        board["manager_cycles"] = max(0, int(src.get("manager_cycles", 0) or 0))
-        board["manager_summary_attempts"] = max(0, int(src.get("manager_summary_attempts", 0) or 0))
-        board["active_agent"] = self._sanitize_agent_role(src.get("active_agent", ""))
-        raw_delegate = src.get("last_delegate", {})
-        if isinstance(raw_delegate, dict):
-            board["last_delegate"] = {
-                "target": str(raw_delegate.get("target", "") or "").strip().lower(),
-                "instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
-                "reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
-                "source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
-                "is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
-                "ts": float(raw_delegate.get("ts", 0.0) or 0.0),
-            }
-        raw_approval = src.get("approval", {})
-        if isinstance(raw_approval, dict):
-            board["approval"] = {
-                "approved": bool(raw_approval.get("approved", False)),
-                "by": trim(str(raw_approval.get("by", "") or "").strip(), 60),
-                "note": trim(str(raw_approval.get("note", "") or "").strip(), 1000),
-                "ts": float(raw_approval.get("ts", 0.0) or 0.0),
-            }
-        board["task_profile"] = self._normalize_task_profile(
-            str(board.get("original_goal", "") or ""),
-            src.get("task_profile", {}),
-        )
-        raw_judgement = src.get("manager_judgement", {})
-        if isinstance(raw_judgement, dict):
-            board["manager_judgement"] = {
-                "task_type": trim(
-                    str(raw_judgement.get("task_type", board["task_profile"].get("task_type", "")) or "").strip(),
+        out = self._new_watchdog_state()
+        out["intent_no_tool_streak"] = max(0, int(src.get("intent_no_tool_streak", 0) or 0))
+        out["repeat_no_tool_streak"] = max(0, int(src.get("repeat_no_tool_streak", 0) or 0))
+        out["state_unchanged_streak"] = max(0, int(src.get("state_unchanged_streak", 0) or 0))
+        out["last_no_tool_text"] = trim(str(src.get("last_no_tool_text", "") or "").strip(), 1200)
+        out["last_no_tool_hash"] = trim(str(src.get("last_no_tool_hash", "") or "").strip(), 80)
+        out["last_state_fp"] = trim(str(src.get("last_state_fp", "") or "").strip(), 120)
+        out["trigger_count"] = max(0, int(src.get("trigger_count", 0) or 0))
+        out["last_trigger_reason"] = trim(str(src.get("last_trigger_reason", "") or "").strip(), 200)
+        out["last_trigger_ts"] = float(src.get("last_trigger_ts", 0.0) or 0.0)
+        return out
+    def _new_decomposition_queue_state(self) -> dict:
+        return {
+            "active": False,
+            "trigger_reason": "",
+            "created_at": 0.0,
+            "cursor": 0,
+            "steps": [],
+            "last_error": "",
+            "snapshot": "",
+            "decomposer_output": "",
+        }
+    def _watchdog_normalize_steps(self, rows: object) -> list[dict]:
+        if not isinstance(rows, list):
+            return []
+        out: list[dict] = []
+        def _infer_target(action_type: str, instruction: str, fallback: str = "developer") -> str:
+            raw = self._sanitize_agent_role(fallback) or "developer"
+            low = f"{action_type} {instruction}".lower()
+            if any(tok in low for tok in ("review", "verify", "validate", "test", "qa", "检查", "验证", "评审", "審查")):
+                return "reviewer"
+            if any(tok in low for tok in ("research", "inspect", "analy", "explore", "investigate", "分析", "调研", "調研", "探索")):
+                return "explorer"
+            return raw
+        for idx, row in enumerate(rows[:WATCHDOG_MAX_DECOMPOSE_STEPS]):
+            if not isinstance(row, dict):
+                continue
+            instruction = trim(
+                str(
+                    row.get("description", "")
+                    or row.get("instruction", "")
+                    or row.get("content", "")
+                    or row.get("task", "")
+                    or ""
+                ).strip(),
+                900,
+            )
+            if not instruction:
+                continue
+            action_type = trim(str(row.get("action_type", "") or "").strip(), 80)
+            target = self._sanitize_agent_role(
+                row.get("target", row.get("owner", row.get("role", row.get("agent", ""))))
+            )
+            target = target or _infer_target(action_type, instruction)
+            if target == "developer" and "incremental" not in instruction.lower():
+                instruction = trim(
+                    (
+                        f"{instruction}\n"
+                        "Use incremental edits (append/targeted replace) instead of full-file overwrite unless unavoidable."
+                    ),
+                    1000,
+                )
+            try:
+                step_no = int(row.get("step", idx + 1) or (idx + 1))
+            except Exception:
+                step_no = idx + 1
+            out.append(
+                {
+                    "step": max(1, step_no),
+                    "target": target,
+                    "action_type": action_type or "execute",
+                    "instruction": instruction,
+                    "attempts": max(0, int(row.get("attempts", 0) or 0)),
+                    "status": trim(str(row.get("status", "pending") or "pending").strip().lower(), 20) or "pending",
+                    "updated_at": float(now_ts()),
+                }
+            )
+        if not out:
+            return []
+        return out[:WATCHDOG_MAX_DECOMPOSE_STEPS]
+    def _normalize_decomposition_queue_state(self, raw: object) -> dict:
+        src = raw if isinstance(raw, dict) else {}
+        out = self._new_decomposition_queue_state()
+        out["active"] = bool(src.get("active", False))
+        out["trigger_reason"] = trim(str(src.get("trigger_reason", "") or "").strip(), 200)
+        out["created_at"] = float(src.get("created_at", 0.0) or 0.0)
+        out["cursor"] = max(0, int(src.get("cursor", 0) or 0))
+        out["last_error"] = trim(str(src.get("last_error", "") or "").strip(), 400)
+        out["snapshot"] = trim(str(src.get("snapshot", "") or "").strip(), 4000)
+        out["decomposer_output"] = trim(str(src.get("decomposer_output", "") or "").strip(), 2000)
+        out["steps"] = self._watchdog_normalize_steps(src.get("steps", []))
+        if out["cursor"] >= len(out["steps"]):
+            out["active"] = False
+        return out
+    def _watchdog_state_fingerprint(self, board: dict | None = None) -> str:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        profile = self._ensure_blackboard_task_profile(bb)
+        payload = {
+            "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
+            "goal": trim(str(bb.get("original_goal", "") or "").strip(), 400),
+            "active_agent": self._sanitize_agent_role(bb.get("active_agent", "")),
+            "delegate": self._sanitize_agent_role((bb.get("last_delegate", {}) or {}).get("target", "")),
+            "research_count": len(bb.get("research_notes", []) or []),
+            "artifact_count": len(bb.get("code_artifacts", {}) or {}),
+            "exec_count": len(bb.get("execution_logs", []) or []),
+            "review_count": len(bb.get("review_feedback", []) or []),
+            "approved": bool((bb.get("approval", {}) or {}).get("approved", False)),
+            "task_type": str(profile.get("task_type", "general") or "general"),
+            "complexity": str(profile.get("complexity", "simple") or "simple"),
+        }
+        raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
+        return hashlib.sha1(raw.encode("utf-8")).hexdigest()
+    def _watchdog_extract_json_array(self, text: str) -> list[dict]:
+        raw = str(text or "").strip()
+        if not raw:
+            return []
+        probe_candidates: list[str] = [raw]
+        fence = re.findall(r"```(?:json)?\s*([\s\S]*?)```", raw, flags=re.IGNORECASE)
+        probe_candidates.extend([str(x or "").strip() for x in fence if str(x or "").strip()])
+        first = raw.find("[")
+        last = raw.rfind("]")
+        if first >= 0 and last > first:
+            probe_candidates.append(raw[first : last + 1].strip())
+        for candidate in probe_candidates:
+            try:
+                parsed = json.loads(candidate)
+            except Exception:
+                continue
+            if isinstance(parsed, list):
+                return [dict(x) for x in parsed if isinstance(x, dict)]
+        return []
+    def _watchdog_intent_without_action(self, text: str) -> bool:
+        clean = strip_thinking_content(str(text or "")).strip()
+        if not clean:
+            return False
+        low = clean.lower()
+        intent_markers = (
+            "i will",
+            "i'm going to",
+            "next step",
+            "plan to",
+            "let me",
+            "我将",
+            "我會",
+            "我会",
+            "下一步",
+            "接下来",
+            "接下來",
+            "计划",
+            "計劃",
+            "准备",
+            "準備",
+        )
+        action_markers = (
+            "wrote",
+            "edited",
+            "executed",
+            "called",
+            "ran ",
+            "已完成",
+            "已执行",
+            "已執行",
+            "已调用",
+            "已調用",
+            "完成了",
+            "执行了",
+            "執行了",
+            "调用了",
+            "調用了",
+        )
+        if any(tok in low for tok in action_markers):
+            return False
+        return any(tok in low for tok in intent_markers)
+    def _watchdog_similarity(self, a: str, b: str) -> float:
+        left = trim(strip_thinking_content(str(a or "")).strip(), 1800)
+        right = trim(strip_thinking_content(str(b or "")).strip(), 1800)
+        if (not left) or (not right):
+            return 0.0
+        return float(difflib.SequenceMatcher(None, left, right).ratio())
+    def _watchdog_context_near_limit(self) -> bool:
+        limit = max(1, int(self.context_token_upper_bound or TOKEN_THRESHOLD))
+        try:
+            used = int(self._estimate_tokens())
+        except Exception:
+            used = 0
+        return bool(used >= int(limit * WATCHDOG_CONTEXT_NEAR_RATIO))
+    def _watchdog_snapshot_payload(self, board: dict, reason: str, role: str, step: dict | None = None) -> str:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        profile = self._ensure_blackboard_task_profile(bb)
+        code_rows = sorted(
+            list((bb.get("code_artifacts", {}) or {}).items()),
+            key=lambda item: float((item[1] or {}).get("updated_at", 0.0) if isinstance(item[1], dict) else 0.0),
+            reverse=True,
+        )
+        payload = {
+            "objective": trim(str(bb.get("original_goal", "") or "").strip(), 1800),
+            "trigger_reason": trim(str(reason or "").strip(), 200),
+            "active_role": self._sanitize_agent_role(role),
+            "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
+            "task_profile": {
+                "task_type": str(profile.get("task_type", "general") or "general"),
+                "complexity": str(profile.get("complexity", "simple") or "simple"),
+                "direct_objective": trim(str(profile.get("direct_objective", "") or "").strip(), 600),
+            },
+            "latest_worker_step": {
+                "status": str((step or {}).get("status", "") or ""),
+                "text": trim(str((step or {}).get("text", "") or "").strip(), 600),
+            },
+            "code_artifacts": [
+                {
+                    "path": str(path),
+                    "summary": trim(str((item or {}).get("summary", "") or "").strip(), 200),
+                }
+                for path, item in code_rows[:6]
+            ],
+            "recent_execution_logs": [
+                trim(str((row or {}).get("content", "") or "").strip(), 220)
+                for row in (bb.get("execution_logs", []) or [])[-4:]
+                if isinstance(row, dict)
+            ],
+            "recent_review_feedback": [
+                trim(str((row or {}).get("content", "") or "").strip(), 220)
+                for row in (bb.get("review_feedback", []) or [])[-4:]
+                if isinstance(row, dict)
+            ],
+        }
+        return trim(json_dumps(payload, indent=2), 6000)
+    def _watchdog_fallback_steps(self, board: dict, reason: str) -> list[dict]:
+        profile = self._ensure_blackboard_task_profile(board)
+        objective = trim(str(profile.get("direct_objective", "") or "").strip(), 280) or trim(
+            str(board.get("original_goal", "") or "").strip(),
+            280,
+        )
+        raw = [
+            {
+                "step": 1,
+                "action_type": "research",
+                "target": "explorer",
+                "description": (
+                    "Analyze the latest blocker quickly and write concrete constraints to blackboard "
+                    f"(trigger={trim(reason, 120)})."
+                ),
+            },
+            {
+                "step": 2,
+                "action_type": "implement",
+                "target": "developer",
+                "description": (
+                    "Implement one incremental fix for the current objective and provide verifiable tool output. "
+                    f"Objective: {objective}"
+                ),
+            },
+            {
+                "step": 3,
+                "action_type": "validate",
+                "target": "reviewer",
+                "description": (
+                    "Run one validation pass, provide pass/fix verdict with evidence, and handoff summary request if needed."
+                ),
+            },
+        ]
+        return self._watchdog_normalize_steps(raw)
+    def _watchdog_decompose_steps(self, board: dict, reason: str, *, pinned_selection: str) -> tuple[list[dict], str, str]:
+        snapshot = self._watchdog_snapshot_payload(board, reason, str(board.get("active_agent", "") or ""), None)
+        objective = trim(str(board.get("original_goal", "") or "").strip(), 1600)
+        system_prompt = (
+            "You are a task decomposer. Your only job is to split OBJECTIVE into executable micro-steps. "
+            "Return strict JSON array only: "
+            "[{\"step\":1,\"action_type\":\"...\",\"target\":\"explorer|developer|reviewer\",\"description\":\"...\"}]. "
+            "No markdown, no prose, no code fence."
+        )
+        user_prompt = (
+            f"OBJECTIVE:\n{objective}\n\n"
+            f"TRIGGER:\n{trim(reason, 220)}\n\n"
+            "SNAPSHOT:\n"
+            f"{snapshot}\n\n"
+            "Rules: keep steps module-level (not line-by-line), use incremental edits, "
+            "and keep total steps <= 12."
+        )
+        raw_text = ""
+        parsed_steps: list[dict] = []
+        try:
+            rsp = self._chat_with_same_model_retry(
+                [{"role": "user", "content": user_prompt, "ts": now_ts()}],
+                tools=None,
+                system=system_prompt,
+                max_tokens=1200,
+                think=False,
+                stream_thinking=False,
+                pinned_selection=pinned_selection,
+                context_label="watchdog decomposer",
+                retries=max(1, min(2, int(MODEL_OUTPUT_RETRY_TIMES))),
+            )
+            raw_text = str(rsp.get("content") or "")
+            parsed_steps = self._watchdog_extract_json_array(raw_text)
+        except Exception as exc:
+            raw_text = f"decomposer-error: {trim(str(exc), 220)}"
+            parsed_steps = []
+        normalized = self._watchdog_normalize_steps(parsed_steps)
+        if not normalized:
+            normalized = self._watchdog_fallback_steps(board, reason)
+        return normalized, snapshot, trim(raw_text, 2000)
+    def _watchdog_activate_decomposition(
+        self,
+        board: dict,
+        *,
+        reason: str,
+        role: str,
+        step: dict | None,
+        pinned_selection: str,
+    ) -> bool:
+        dq = self._normalize_decomposition_queue_state(board.get("decomposition_queue", {}))
+        if bool(dq.get("active", False)):
+            return False
+        steps, snapshot, raw_text = self._watchdog_decompose_steps(
+            board,
+            reason,
+            pinned_selection=pinned_selection,
+        )
+        if not steps:
+            return False
+        dq = {
+            "active": True,
+            "trigger_reason": trim(str(reason or "").strip(), 200),
+            "created_at": float(now_ts()),
+            "cursor": 0,
+            "steps": steps,
+            "last_error": "",
+            "snapshot": trim(snapshot, 4000),
+            "decomposer_output": trim(raw_text, 2000),
+        }
+        wd = self._normalize_watchdog_state(board.get("watchdog", {}))
+        wd["trigger_count"] = max(0, int(wd.get("trigger_count", 0) or 0)) + 1
+        wd["last_trigger_reason"] = trim(str(reason or "").strip(), 200)
+        wd["last_trigger_ts"] = float(now_ts())
+        wd["intent_no_tool_streak"] = 0
+        wd["repeat_no_tool_streak"] = 0
+        board["watchdog"] = wd
+        board["decomposition_queue"] = dq
+        self.blackboard = board
+        self._blackboard_touch()
+        self._blackboard_history(
+            "manager",
+            trim(
+                (
+                    "watchdog triggered decomposition "
+                    f"(reason={reason}, role={self._sanitize_agent_role(role)}, "
+                    f"steps={len(steps)})"
+                ),
+                520,
+            ),
+        )
+        self._emit(
+            "status",
+            {
+                "summary": (
+                    "watchdog triggered; switched to stateless executor queue "
+                    f"(reason={trim(reason, 90)}, steps={len(steps)})"
+                )
+            },
+        )
+        return True
+    def _watchdog_pick_executor_route(self, board: dict | None = None) -> tuple[dict, dict] | None:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
+        if not bool(dq.get("active", False)):
+            return None
+        steps = list(dq.get("steps", []) or [])
+        if not steps:
+            return None
+        cursor = max(0, int(dq.get("cursor", 0) or 0))
+        while cursor < len(steps):
+            status = str((steps[cursor] or {}).get("status", "") or "").strip().lower()
+            if status not in {"done", "skipped"}:
+                break
+            cursor += 1
+        if cursor >= len(steps):
+            dq["active"] = False
+            dq["cursor"] = len(steps)
+            bb["decomposition_queue"] = dq
+            self.blackboard = bb
+            self._blackboard_touch()
+            return None
+        dq["cursor"] = cursor
+        step_row = steps[cursor] if isinstance(steps[cursor], dict) else {}
+        target = self._sanitize_agent_role(step_row.get("target", "")) or "developer"
+        action_type = trim(str(step_row.get("action_type", "execute") or "execute").strip(), 80) or "execute"
+        step_instruction = trim(str(step_row.get("instruction", "") or "").strip(), 900)
+        trigger_reason = trim(str(dq.get("trigger_reason", "") or "").strip(), 180)
+        total = len(steps)
+        current = cursor + 1
+        profile = self._ensure_blackboard_task_profile(bb)
+        task_level = int(profile.get("task_level", self.runtime_task_level or 3) or 3)
+        if task_level not in TASK_LEVEL_CHOICES:
+            task_level = 3
+        args = {
+            "target": target,
+            "instruction": trim(
+                (
+                    f"Executor mode (stateless) step {current}/{total}. "
+                    f"trigger={trigger_reason or 'watchdog'}; action_type={action_type}.\n"
+                    f"{step_instruction}\n"
+                    "Rules: execute one concrete tool call now, keep scope narrow, "
+                    "and update blackboard evidence immediately."
+                ),
+                1200,
+            ),
+            "task_level": int(task_level),
+            "task_type": trim(str(profile.get("task_type", "general") or "general"), 40),
+            "complexity": trim(str(profile.get("complexity", "simple") or "simple"), 20),
+            "scale_preference": trim(str(profile.get("scale_preference", "balanced") or "balanced"), 20),
+            "judgement": trim(
+                f"watchdog-executor-step-{current}/{total}",
+                200,
+            ),
+            "round_budget": int(profile.get("round_budget", self.runtime_round_budget or self.max_agent_rounds) or 0),
+            "direct_objective": trim(str(profile.get("direct_objective", self.runtime_direct_objective or "") or ""), 800),
+            "execution_mode": normalize_execution_mode(
+                profile.get("execution_mode", self._effective_execution_mode()),
+                default=self._effective_execution_mode(),
+            ),
+            "participants": profile.get("participants", self.runtime_participants),
+            "assigned_expert": profile.get("assigned_expert", self.runtime_assigned_expert or "developer"),
+            "requires_user_confirmation": bool(profile.get("requires_user_confirmation", False)),
+            "is_mandatory": True,
+            "executor_mode": True,
+        }
+        bb["decomposition_queue"] = dq
+        self.blackboard = bb
+        self._blackboard_touch()
+        meta = {
+            "trigger_reason": trigger_reason,
+            "cursor": current,
+            "total": total,
+            "target": target,
+            "action_type": action_type,
+        }
+        return args, meta
+    def _watchdog_mark_step_progress(self, board: dict, role: str, step: dict | None) -> dict:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
+        out = {"queue_active": bool(dq.get("active", False)), "step_advanced": False}
+        if not bool(dq.get("active", False)):
+            bb["decomposition_queue"] = dq
+            self.blackboard = bb
+            return out
+        rows = list(dq.get("steps", []) or [])
+        cursor = max(0, int(dq.get("cursor", 0) or 0))
+        if cursor >= len(rows):
+            dq["active"] = False
+            dq["cursor"] = len(rows)
+            bb["decomposition_queue"] = dq
+            self.blackboard = bb
+            return {"queue_active": False, "step_advanced": False}
+        current = rows[cursor] if isinstance(rows[cursor], dict) else {}
+        target = self._sanitize_agent_role(current.get("target", "")) or "developer"
+        role_key = self._sanitize_agent_role(role)
+        if target != role_key:
+            bb["decomposition_queue"] = dq
+            self.blackboard = bb
+            return out
+        status = str((step or {}).get("status", "") or "").strip().lower()
+        text = trim(strip_thinking_content(str((step or {}).get("text", "") or "").strip()), 1200)
+        tool_results = (step or {}).get("tool_results", []) if isinstance((step or {}).get("tool_results"), list) else []
+        has_ok_tool = any(isinstance(row, dict) and bool(row.get("ok", False)) for row in tool_results)
+        success = bool(status == "tools" and has_ok_tool)
+        if (not success) and status == "no-tools" and role_key in {"explorer", "reviewer"} and len(text) >= 120:
+            success = True
+        attempts = max(0, int(current.get("attempts", 0) or 0)) + 1
+        current["attempts"] = attempts
+        current["updated_at"] = float(now_ts())
+        if success:
+            current["status"] = "done"
+            dq["cursor"] = cursor + 1
+            out["step_advanced"] = True
+            dq["last_error"] = ""
+        elif status in {"no-tools", "tools", "skip"}:
+            if attempts >= int(WATCHDOG_STEP_MAX_ATTEMPTS):
+                current["status"] = "skipped"
+                dq["cursor"] = cursor + 1
+                out["step_advanced"] = True
+                dq["last_error"] = trim(
+                    f"step {cursor + 1} skipped after {attempts} attempts ({status})",
+                    300,
+                )
+            else:
+                current["status"] = "retry"
+                dq["last_error"] = trim(
+                    f"step {cursor + 1} retry pending ({status})",
+                    300,
+                )
+        rows[cursor] = current
+        dq["steps"] = rows
+        if int(dq.get("cursor", 0) or 0) >= len(rows):
+            dq["active"] = False
+            out["queue_active"] = False
+            self._emit("status", {"summary": "stateless executor queue drained; returning to normal manager routing"})
+        else:
+            out["queue_active"] = bool(dq.get("active", False))
+        bb["decomposition_queue"] = dq
+        self.blackboard = bb
+        return out
+    def _watchdog_process_worker_step(
+        self,
+        board: dict,
+        *,
+        role: str,
+        step: dict,
+        state_changed: bool,
+        pinned_selection: str,
+    ) -> dict:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        wd = self._normalize_watchdog_state(bb.get("watchdog", {}))
+        dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
+        status = str((step or {}).get("status", "") or "").strip().lower()
+        text = trim(strip_thinking_content(str((step or {}).get("text", "") or "").strip()), 1200)
+        wd["last_state_fp"] = self._watchdog_state_fingerprint(bb)
+        if state_changed:
+            wd["state_unchanged_streak"] = 0
+        else:
+            wd["state_unchanged_streak"] = max(0, int(wd.get("state_unchanged_streak", 0) or 0)) + 1
+        if status == "tools":
+            wd["intent_no_tool_streak"] = 0
+            wd["repeat_no_tool_streak"] = 0
+            wd["last_no_tool_text"] = ""
+            wd["last_no_tool_hash"] = ""
+        elif status == "no-tools":
+            if self._watchdog_intent_without_action(text):
+                wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0)) + 1
+            else:
+                wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0) - 1)
+            prev_text = str(wd.get("last_no_tool_text", "") or "")
+            sim = self._watchdog_similarity(prev_text, text)
+            if sim >= float(WATCHDOG_REPEAT_SIMILARITY_THRESHOLD):
+                wd["repeat_no_tool_streak"] = max(0, int(wd.get("repeat_no_tool_streak", 0) or 0)) + 1
+            else:
+                wd["repeat_no_tool_streak"] = 0
+            wd["last_no_tool_text"] = text
+            wd["last_no_tool_hash"] = hashlib.sha1(text.encode("utf-8")).hexdigest() if text else ""
+        else:
+            wd["intent_no_tool_streak"] = max(0, int(wd.get("intent_no_tool_streak", 0) or 0) - 1)
+            wd["repeat_no_tool_streak"] = max(0, int(wd.get("repeat_no_tool_streak", 0) or 0) - 1)
+        bb["watchdog"] = wd
+        bb["decomposition_queue"] = dq
+        self.blackboard = bb
+        progress_row = self._watchdog_mark_step_progress(bb, role, step)
+        bb = self._ensure_blackboard()
+        dq = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", {}))
+        trigger_reason = ""
+        if not bool(dq.get("active", False)):
+            if int(wd.get("intent_no_tool_streak", 0) or 0) >= int(WATCHDOG_INTENT_NO_TOOL_THRESHOLD):
+                trigger_reason = "intent-without-tool-call"
+            elif int(wd.get("repeat_no_tool_streak", 0) or 0) >= int(WATCHDOG_REPEAT_NO_TOOL_THRESHOLD):
+                trigger_reason = "repeated-no-tool-reply"
+            elif (
+                self._watchdog_context_near_limit()
+                and int(wd.get("state_unchanged_streak", 0) or 0) >= int(WATCHDOG_CONTEXT_STALL_THRESHOLD)
+            ):
+                trigger_reason = "context-threshold-no-state-change"
+            elif (
+                status in {"no-tools", "skip"}
+                and int(wd.get("state_unchanged_streak", 0) or 0) >= int(WATCHDOG_STATE_STALL_THRESHOLD)
+            ):
+                trigger_reason = "state-unchanged-stall"
+        triggered = False
+        if trigger_reason:
+            try:
+                last_trigger_ts = float(wd.get("last_trigger_ts", 0.0) or 0.0)
+            except Exception:
+                last_trigger_ts = 0.0
+            if now_ts() - last_trigger_ts >= 1.0:
+                triggered = self._watchdog_activate_decomposition(
+                    bb,
+                    reason=trigger_reason,
+                    role=role,
+                    step=step,
+                    pinned_selection=pinned_selection,
+                )
+                bb = self._ensure_blackboard()
+        bb["watchdog"] = self._normalize_watchdog_state(bb.get("watchdog", wd))
+        bb["decomposition_queue"] = self._normalize_decomposition_queue_state(bb.get("decomposition_queue", dq))
+        self.blackboard = bb
+        self._blackboard_touch()
+        return {
+            "triggered": bool(triggered),
+            "trigger_reason": trigger_reason,
+            "queue_active": bool((bb.get("decomposition_queue", {}) or {}).get("active", False)),
+            "step_advanced": bool(progress_row.get("step_advanced", False)),
+        }
+    def _watchdog_execute_queue_step(self, *, pinned_selection: str) -> dict:
+        board = self._ensure_blackboard()
+        pick = self._watchdog_pick_executor_route(board)
+        if not pick:
+            dq = self._normalize_decomposition_queue_state(board.get("decomposition_queue", {}))
+            return {"executed": False, "queue_active": bool(dq.get("active", False)), "stop_run": False, "interrupted": False}
+        queue_args, meta = pick
+        role = self._sanitize_agent_role((queue_args or {}).get("target", "")) or "developer"
+        instruction = trim(str((queue_args or {}).get("instruction", "") or "").strip(), 1200)
+        if not instruction:
+            instruction = (
+                "Executor mode step: call one concrete tool now, keep scope narrow, and update blackboard evidence."
+            )
+        self._inject_manager_instruction(role, instruction, is_mandatory=True, executor_mode=True)
+        if role == "explorer":
+            self._blackboard_set_status("RESEARCHING")
+        elif role == "developer":
+            self._blackboard_set_status("CODING")
+        elif role == "reviewer":
+            self._blackboard_set_status("REVIEWING")
+        board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
+        step = self._multi_agent_turn(
+            role,
+            pinned_selection=pinned_selection,
+            media_inputs_round=None,
+        )
+        safe_step = step if isinstance(step, dict) else {}
+        self._blackboard_update_from_worker_step(role, safe_step)
+        board_after = self._ensure_blackboard()
+        board_after_fp = self._watchdog_state_fingerprint(board_after)
+        wd_event = self._watchdog_process_worker_step(
+            board_after,
+            role=role,
+            step=safe_step,
+            state_changed=bool(board_after_fp != board_before_fp),
+            pinned_selection=pinned_selection,
+        )
+        status = str(safe_step.get("status", "") or "").strip().lower()
+        interrupted = bool(status == "interrupted")
+        stop_run = False
+        finish_gate_reason = ""
+        if status == "tools" and bool(safe_step.get("stop_due_to_finish", False)):
+            note = f"{self._agent_display_name(role)} signaled finish via tool."
+            # Approval note should come from finish tool payload sync; avoid overwriting with generic text here.
+            can_finish_now, finish_gate_reason = self._can_auto_finish_from_approval(
+                self._ensure_blackboard(),
+                latest_user_ts=self._latest_user_message_ts(),
+            )
+            if can_finish_now:
+                self._mark_all_done_silently(note)
+                stop_run = True
+            else:
+                self._emit(
+                    "status",
+                    {
+                        "summary": (
+                            f"executor finish deferred by gate ({finish_gate_reason}); "
+                            "continue watchdog queue"
+                        )
+                    },
+                )
+        dq = self._normalize_decomposition_queue_state(self._ensure_blackboard().get("decomposition_queue", {}))
+        return {
+            "executed": True,
+            "queue_active": bool(dq.get("active", False)),
+            "stop_run": bool(stop_run),
+            "interrupted": bool(interrupted),
+            "role": role,
+            "status": status,
+            "wd_event": wd_event,
+            "trigger_reason": trim(str(meta.get("trigger_reason", "") or "").strip(), 120),
+            "finish_gate_reason": finish_gate_reason,
+        }
+    def _new_blackboard(self, goal: str = "") -> dict:
+        profile = self._normalize_task_profile(goal, {})
+        progress = "done" if str(profile.get("task_type", "") or "") == "simple_qa" and not str(goal or "").strip() else "initializing"
+        return {
+            "version": 1,
+            "updated_at": float(now_ts()),
+            "original_goal": trim(str(goal or "").strip(), 4000),
+            "research_notes": [],
+            "code_artifacts": {},
+            "execution_logs": [],
+            "review_feedback": [],
+            "conversation_history": [],
+            "status": "INITIALIZING",
+            "approval": {
+                "approved": False,
+                "by": "",
+                "note": "",
+                "ts": 0.0,
+            },
+            "manager_cycles": 0,
+            "manager_summary_attempts": 0,
+            "active_agent": "",
+            "last_delegate": {
+                "target": "",
+                "instruction": "",
+                "reason": "",
+                "source": "",
+                "is_mandatory": False,
+                "ts": 0.0,
+            },
+            "task_profile": profile,
+            "manager_judgement": {
+                "task_type": str(profile.get("task_type", "general")),
+                "complexity": str(profile.get("complexity", "simple")),
+                "scale_preference": str(profile.get("scale_preference", "balanced") or "balanced"),
+                "progress": progress,
+                "remaining_rounds": (
+                    -1
+                    if int(profile.get("round_budget", 0) or 0) <= 0
+                    else int(profile.get("round_budget", 1) or 1)
+                ),
+                "updated_at": float(now_ts()),
+            },
+            "last_worker_reply": {
+                "role": "",
+                "text": "",
+                "ts": 0.0,
+            },
+            "watchdog": self._new_watchdog_state(),
+            "decomposition_queue": self._new_decomposition_queue_state(),
+        }
+    def _normalize_blackboard(self, raw: object) -> dict:
+        src = raw if isinstance(raw, dict) else {}
+        board = self._new_blackboard(str(src.get("original_goal", "") or ""))
+        try:
+            board["version"] = int(src.get("version", 1) or 1)
+        except Exception:
+            board["version"] = 1
+        board["updated_at"] = float(src.get("updated_at", now_ts()) or now_ts())
+        board["status"] = self._normalize_blackboard_status(src.get("status", board["status"]))
+        board["manager_cycles"] = max(0, int(src.get("manager_cycles", 0) or 0))
+        board["manager_summary_attempts"] = max(0, int(src.get("manager_summary_attempts", 0) or 0))
+        board["active_agent"] = self._sanitize_agent_role(src.get("active_agent", ""))
+        raw_delegate = src.get("last_delegate", {})
+        if isinstance(raw_delegate, dict):
+            board["last_delegate"] = {
+                "target": str(raw_delegate.get("target", "") or "").strip().lower(),
+                "instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
+                "reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
+                "source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
+                "is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
+                "ts": float(raw_delegate.get("ts", 0.0) or 0.0),
+            }
+        raw_approval = src.get("approval", {})
+        if isinstance(raw_approval, dict):
+            board["approval"] = {
+                "approved": bool(raw_approval.get("approved", False)),
+                "by": trim(str(raw_approval.get("by", "") or "").strip(), 60),
+                "note": trim(str(raw_approval.get("note", "") or "").strip(), 1000),
+                "ts": float(raw_approval.get("ts", 0.0) or 0.0),
+            }
+        board["task_profile"] = self._normalize_task_profile(
+            str(board.get("original_goal", "") or ""),
+            src.get("task_profile", {}),
+        )
+        raw_judgement = src.get("manager_judgement", {})
+        if isinstance(raw_judgement, dict):
+            board["manager_judgement"] = {
+                "task_type": trim(
+                    str(raw_judgement.get("task_type", board["task_profile"].get("task_type", "")) or "").strip(),
                     40,
                 ),
                 "complexity": (
@@ -12866,6 +13856,10 @@ class SessionState:
                     "change_count": max(1, int(item.get("change_count", 1) or 1)),
                 }
         board["code_artifacts"] = artifacts
+        board["watchdog"] = self._normalize_watchdog_state(src.get("watchdog", {}))
+        board["decomposition_queue"] = self._normalize_decomposition_queue_state(
+            src.get("decomposition_queue", {})
+        )
         return board
     def _ensure_blackboard(self) -> dict:
@@ -13247,6 +14241,11 @@ class SessionState:
         goal = trim(str(board.get("original_goal", "") or "").strip(), 1800)
         status = self._normalize_blackboard_status(board.get("status", "INITIALIZING"))
         delegate = board.get("last_delegate", {}) if isinstance(board.get("last_delegate"), dict) else {}
+        watchdog = board.get("watchdog", {}) if isinstance(board.get("watchdog"), dict) else {}
+        dq = board.get("decomposition_queue", {}) if isinstance(board.get("decomposition_queue"), dict) else {}
+        dq_steps = dq.get("steps", []) if isinstance(dq.get("steps"), list) else []
+        dq_cursor = max(0, int(dq.get("cursor", 0) or 0))
+        dq_total = len(dq_steps)
         lines = [
             "## Blackboard State",
             f"- status: {status}",
@@ -13269,6 +14268,19 @@ class SessionState:
             f"- active_agent: {board.get('active_agent', '') or '(none)'}",
             f"- manager_cycles: {int(board.get('manager_cycles', 0) or 0)}",
             f"- manager_summary_attempts: {int(board.get('manager_summary_attempts', 0) or 0)}",
+            (
+                "- watchdog: "
+                f"intent_no_tool={int(watchdog.get('intent_no_tool_streak', 0) or 0)}, "
+                f"repeat_no_tool={int(watchdog.get('repeat_no_tool_streak', 0) or 0)}, "
+                f"state_unchanged={int(watchdog.get('state_unchanged_streak', 0) or 0)}, "
+                f"trigger_count={int(watchdog.get('trigger_count', 0) or 0)}"
+            ),
+            (
+                "- decomposition_queue: "
+                f"active={bool(dq.get('active', False))}, "
+                f"cursor={dq_cursor}, total={dq_total}, "
+                f"trigger_reason={trim(str(dq.get('trigger_reason', '') or ''), 140)}"
+            ),
             (
                 "- manager_judgement: "
                 f"{trim(str(judgement.get('progress', 'initializing') or ''), 40)}"
@@ -13357,6 +14369,7 @@ class SessionState:
                     "assigned_expert": {"type": "string", "enum": list(AGENT_ROLES)},
                     "requires_user_confirmation": {"type": "boolean"},
                     "is_mandatory": {"type": "boolean"},
+                    "executor_mode": {"type": "boolean"},
                 },
                 ["target", "instruction"],
             )
@@ -13375,6 +14388,8 @@ class SessionState:
                     "task_type": {"type": "string"},
                     "complexity": {"type": "string", "enum": list(TASK_COMPLEXITY_LEVELS)},
                     "scale_preference": {"type": "string", "enum": list(TASK_SCALE_PREFERENCES)},
+                    "semantic_confidence": {"type": "string", "enum": list(SEMANTIC_CONFIDENCE_CHOICES)},
+                    "low_confidence_reason": {"type": "string"},
                     "inherit_previous_state": {"type": "boolean"},
                     "judgement": {"type": "string"},
                     "round_budget": {"type": "integer"},
@@ -13398,6 +14413,68 @@ class SessionState:
         yes_tokens = ("继续", "确认", "开始", "执行", "同意", "go ahead", "proceed", "continue", "yes")
         return any(tok in low for tok in yes_tokens)
+    def _normalize_semantic_confidence(self, raw: object, *, default: str = "medium") -> str:
+        value = str(raw or "").strip().lower()
+        if value in SEMANTIC_CONFIDENCE_CHOICES:
+            return value
+        return default if default in SEMANTIC_CONFIDENCE_CHOICES else "medium"
+    def _merge_task_decision_for_low_confidence(self, llm_row: dict, fallback_row: dict) -> dict:
+        merged = dict(fallback_row or {})
+        row = llm_row if isinstance(llm_row, dict) else {}
+        if bool(row.get("inherit_previous_state", False)):
+            merged["inherit_previous_state"] = True
+        try:
+            lvl = int(row.get("level", 0) or 0)
+        except Exception:
+            lvl = 0
+        if lvl in TASK_LEVEL_CHOICES:
+            merged["level"] = int(lvl)
+        task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
+        if task_type in TASK_PROFILE_TYPES:
+            merged["task_type"] = task_type
+        complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
+        if complexity in TASK_COMPLEXITY_LEVELS:
+            merged["complexity"] = complexity
+        scale = trim(str(row.get("scale_preference", "") or "").strip().lower(), 20)
+        if scale in TASK_SCALE_PREFERENCES:
+            merged["scale_preference"] = scale
+        mode = normalize_execution_mode(row.get("execution_mode", ""), default="")
+        if mode in EXECUTION_MODE_CHOICES:
+            merged["execution_mode"] = mode
+        assigned = self._sanitize_agent_role(row.get("assigned_expert", ""))
+        if assigned:
+            merged["assigned_expert"] = assigned
+        raw_participants = row.get("participants", [])
+        participants: list[str] = []
+        if isinstance(raw_participants, list):
+            for item in raw_participants:
+                role = self._sanitize_agent_role(item)
+                if role and role not in participants:
+                    participants.append(role)
+        if participants:
+            merged["participants"] = participants[:3]
+        try:
+            budget = int(row.get("round_budget", 0) or 0)
+        except Exception:
+            budget = 0
+        if budget > 0:
+            merged["round_budget"] = int(
+                max(1, min(int(self.max_agent_rounds or MAX_AGENT_ROUNDS), int(budget)))
+            )
+        if bool(row.get("requires_user_confirmation", False)):
+            merged["requires_user_confirmation"] = True
+        objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
+        if objective:
+            merged["direct_objective"] = objective
+        judgement = trim(str(row.get("judgement", "") or "").strip(), 200)
+        if judgement:
+            merged["judgement"] = judgement
+        merged["semantic_confidence"] = self._normalize_semantic_confidence(row.get("semantic_confidence", "low"), default="low")
+        merged["low_confidence_reason"] = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
+        merged["source"] = "manager-low-confidence+fallback"
+        return merged
     def _fallback_task_level_decision(self, goal_text: str) -> dict:
         profile = self._infer_task_profile(goal_text)
         task_type = str(profile.get("task_type", "general") or "general")
@@ -13501,6 +14578,8 @@ class SessionState:
                 "participants": list(inherited_participants),
                 "assigned_expert": inherited_assigned,
                 "requires_user_confirmation": bool(inherited_requires_confirmation if inherited_level == 5 else False),
+                "semantic_confidence": "low",
+                "low_confidence_reason": "rule fallback inherited previous runtime state",
                 "source": "fallback",
             }
         level = 3
@@ -13537,6 +14616,8 @@ class SessionState:
             "participants": participants,
             "assigned_expert": assigned,
             "requires_user_confirmation": bool(requires_confirmation),
+            "semantic_confidence": "low",
+            "low_confidence_reason": "rule fallback classification",
             "source": "fallback",
         }
@@ -13559,7 +14640,9 @@ class SessionState:
             "If user clearly indicates speed vs completeness preference, that preference has higher priority than your default strategy. "
             "Budgets are internal efficiency controls to reduce overthinking and idle loops; "
             "they must not be treated as a user-visible early-stop reason. "
-            "Output exactly one classify_task_level tool call with concise judgement and inherit_previous_state. "
+            "Output exactly one classify_task_level tool call with concise judgement, inherit_previous_state, "
+            "and semantic_confidence(high|medium|low). "
+            "Use low confidence only when semantic ambiguity is substantial, then set low_confidence_reason briefly. "
             f"{model_language_instruction(self.ui_language)}"
         )
@@ -13657,16 +14740,55 @@ class SessionState:
         participants = normalized_participants[:3] or [assigned]
         if assigned not in participants:
             assigned = participants[0]
+        semantic_confidence = self._normalize_semantic_confidence(
+            row.get("semantic_confidence", "medium"),
+            default="medium",
+        )
+        decision_source = trim(str(row.get("source", "") or "").strip().lower(), 80)
+        low_confidence_mode = bool(
+            str(semantic_confidence or "medium") == "low"
+            or decision_source.startswith("fallback")
+            or "low-confidence" in decision_source
+        )
+        if low_confidence_mode:
+            rule_profile = self._infer_task_profile(goal_text)
+            fallback_task_type = str(rule_profile.get("task_type", "general") or "general")
+            fallback_complexity = str(rule_profile.get("complexity", "simple") or "simple")
+            fallback_objective = trim(str(rule_profile.get("direct_objective", "") or ""), 800)
+        else:
+            board_now = self._ensure_blackboard()
+            board_profile = board_now.get("task_profile", {}) if isinstance(board_now.get("task_profile"), dict) else {}
+            fallback_task_type = trim(
+                str(self.runtime_task_type or board_profile.get("task_type", "general") or "general"),
+                40,
+            )
+            if fallback_task_type not in TASK_PROFILE_TYPES:
+                fallback_task_type = "general"
+            fallback_complexity = trim(
+                str(self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple"),
+                20,
+            )
+            if fallback_complexity not in TASK_COMPLEXITY_LEVELS:
+                fallback_complexity = "simple"
+            fallback_objective = trim(
+                str(self.runtime_direct_objective or board_profile.get("direct_objective", "") or "").strip(),
+                800,
+            )
+            if not fallback_objective:
+                fallback_objective = (
+                    "Proceed with direct semantic objective and concrete progress for the current request."
+                )
         task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
         if task_type not in TASK_PROFILE_TYPES:
-            task_type = str(self._infer_task_profile(goal_text).get("task_type", "general"))
+            task_type = fallback_task_type
         complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
         if complexity not in TASK_COMPLEXITY_LEVELS:
-            complexity = str(self._infer_task_profile(goal_text).get("complexity", "simple"))
+            complexity = fallback_complexity
+        low_confidence_reason = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
         judgement = trim(str(row.get("judgement", "") or "").strip(), 200) or "manager classified task level"
         objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
         if not objective:
-            objective = trim(str(self._infer_task_profile(goal_text).get("direct_objective", "") or ""), 800)
+            objective = fallback_objective
         self.runtime_task_level = int(level)
         self.runtime_execution_mode = mode
         self.runtime_assigned_expert = assigned
@@ -13694,6 +14816,8 @@ class SessionState:
         profile["direct_objective"] = objective
         profile["round_budget"] = int(round_budget)
         profile["inherit_previous_state"] = bool(inherit_previous_state)
+        profile["semantic_confidence"] = semantic_confidence
+        profile["low_confidence_reason"] = low_confidence_reason
         profile["recommended_agents"] = list(participants)
         profile["reason"] = trim(str(row.get("judgement", "") or row.get("source", "manager")), 400)
         profile["updated_at"] = float(now_ts())
@@ -13709,6 +14833,8 @@ class SessionState:
             "execution_mode": mode,
             "participants": list(participants),
             "assigned_expert": assigned,
+            "semantic_confidence": semantic_confidence,
+            "low_confidence_reason": low_confidence_reason,
             "updated_at": float(now_ts()),
         }
         board["active_agent"] = assigned if mode == EXECUTION_MODE_SINGLE else ""
@@ -13721,7 +14847,8 @@ class SessionState:
                 "summary": (
                     f"manager classified: L{level} "
                     f"mode={mode} scale={scale_preference} participants={','.join(participants)} "
-                    f"expert={assigned} budget={'unlimited' if int(round_budget) <= 0 else int(round_budget)}"
+                    f"expert={assigned} budget={'unlimited' if int(round_budget) <= 0 else int(round_budget)} "
+                    f"confidence={semantic_confidence}"
                 )
             },
         )
@@ -13779,10 +14906,20 @@ class SessionState:
                     row.get("inherit_previous_state", False),
                     default=False,
                 )
+                row["semantic_confidence"] = self._normalize_semantic_confidence(
+                    row.get("semantic_confidence", "medium"),
+                    default="medium",
+                )
+                if str(row.get("semantic_confidence", "medium")) == "low":
+                    fallback_row = self._fallback_task_level_decision(goal_text)
+                    merged = self._merge_task_decision_for_low_confidence(row, fallback_row)
+                    return merged
                 row["source"] = "manager"
                 return row
         row = self._fallback_task_level_decision(goal_text)
-        row["source"] = "fallback"
+        row["source"] = "fallback-no-toolcall"
+        row["semantic_confidence"] = "low"
+        row["low_confidence_reason"] = "manager classifier returned no valid tool call"
         return row
     def _refresh_runtime_task_policy(
@@ -14038,7 +15175,7 @@ class SessionState:
                 "reason": "forced-finish-budget-exhausted",
                 "source": "fallback",
             }
-        if finish_gate_reason == "reviewer-summary-missing" and summary_attempts >= 1:
+        if finish_gate_reason == "reviewer-summary-missing" and summary_attempts >= 2:
             self._emit("status", {"summary": "Summary generation attempted; forcing finish now."})
             return {
                 "target": "finish",
@@ -14072,13 +15209,28 @@ class SessionState:
                     "source": "fallback",
                 }
             if finish_gate_reason == "reviewer-summary-missing":
-                board["manager_summary_attempts"] = summary_attempts + 1
+                next_attempt = summary_attempts + 1
+                board["manager_summary_attempts"] = next_attempt
                 self.blackboard = board
+                if next_attempt >= 2:
+                    return {
+                        "target": "explorer",
+                        "instruction": (
+                            "Reviewer summary is still missing. Read blackboard sections "
+                            "(code_artifacts, execution_logs, review_feedback, status) and write one structured "
+                            "final summary to blackboard (changes, validation evidence, residual risks/next steps). "
+                            "Do not call finish tool in this step."
+                        ),
+                        "reason": "approval-missing-summary-handoff-explorer",
+                        "source": "fallback",
+                        "is_mandatory": True,
+                    }
                 return {
                     "target": "reviewer",
                     "instruction": (
-                        "Review approved but final summary required. Write one final wrap-up summary from "
-                        "blackboard evidence (changes, validation, residual risks/next steps), and then finish."
+                        "Review approved but final summary required. First call read_from_blackboard for "
+                        "code_artifacts/execution_logs/review_feedback/status, then call finish_task with summary "
+                        "including changes, validation evidence, and residual risks/next steps."
                     ),
                     "reason": "approval-missing-reviewer-summary-request",
                     "source": "fallback",
@@ -14241,6 +15393,8 @@ class SessionState:
     def _manager_apply_anti_stall(self, route: dict) -> dict:
         row = dict(route or {})
+        if bool(row.get("executor_mode", False)):
+            return row
         if str(row.get("task_type", "") or "").strip().lower() == "simple_qa":
             return row
         target = str(row.get("target", "") or "").strip().lower()
@@ -14282,6 +15436,7 @@ class SessionState:
     def _manager_apply_task_policy(self, route: dict) -> dict:
         row = dict(route or {})
+        executor_mode_flag = _to_bool_like(row.get("executor_mode", False), default=False)
         board = self._ensure_blackboard()
         latest_user_ts = self._latest_user_message_ts()
         self._invalidate_stale_approval_if_needed(
@@ -14330,7 +15485,13 @@ class SessionState:
         if target not in MANAGER_ROUTE_TARGETS:
             target = assigned_expert if mode == EXECUTION_MODE_SINGLE else "developer"
         if target in AGENT_ROLES and target not in participants:
-            target = participants[0]
+            if executor_mode_flag:
+                if len(participants) < 3:
+                    participants.append(target)
+                else:
+                    participants[-1] = target
+            else:
+                target = participants[0]
         instruction = trim(str(row.get("instruction", "") or "").strip(), 1200)
         if not instruction:
             instruction = "Proceed with one concrete next step and report evidence."
@@ -14381,22 +15542,19 @@ class SessionState:
             board,
             latest_user_ts=latest_user_ts,
         )
+        board_status = self._normalize_blackboard_status(board.get("status", "INITIALIZING"))
+        code_count = len(board.get("code_artifacts", {}) or {})
+        research_count = len(board.get("research_notes", []) or [])
+        feedback_pass = self._manager_feedback_passed_from_blackboard(board)
         summary_attempts = int(board.get("manager_summary_attempts", 0) or 0)
         force_finish_override = False
-        if remaining == 0:
-            force_finish_override = True
-            target = "finish"
-            instruction = "Maximum rounds reached. Generate final summary and finish immediately."
-            row["reason"] = "forced-finish-budget"
-            row["source"] = "policy"
-            self._emit("status", {"summary": "Round budget exhausted; forcing finish."})
         if bool((board.get("approval", {}) or {}).get("approved", False)) and can_finish_from_approval:
             target = "finish"
             if not instruction:
                 instruction = "Review already approved; finish now."
-        if target == "finish" and (not can_finish_from_approval) and (not force_finish_override):
+        if target == "finish" and (not can_finish_from_approval):
             if finish_gate_reason == "reviewer-summary-missing":
-                if summary_attempts >= 1:
+                if summary_attempts >= 2:
                     force_finish_override = True
                     target = "finish"
                     instruction = (
@@ -14406,13 +15564,27 @@ class SessionState:
                     row["reason"] = "forced-finish-summary-max-retry"
                     row["source"] = "policy"
                     self._emit("status", {"summary": "Summary retry limit reached; forcing finish."})
+                elif summary_attempts >= 1:
+                    board["manager_summary_attempts"] = summary_attempts + 1
+                    self.blackboard = board
+                    target = "explorer"
+                    instruction = (
+                        "Reviewer summary is still missing. Read blackboard sections "
+                        "(code_artifacts, execution_logs, review_feedback, status) and write one structured "
+                        "final summary to blackboard: changes, validation evidence, residual risks/next steps. "
+                        "Do not call finish tool in this step."
+                    )
+                    row["reason"] = "finish-blocked-summary-handoff-explorer"
+                    row["source"] = "policy"
+                    self._emit("status", {"summary": "Reviewer summary missing; handoff to explorer synthesis."})
                 else:
                     board["manager_summary_attempts"] = summary_attempts + 1
                     self.blackboard = board
                     target = "reviewer"
                     instruction = (
-                        "Generate final summary report covering implemented outputs, validation evidence, "
-                        "and residual risks/next steps. This is the final step before completion."
+                        "Generate final summary report from blackboard evidence. First call read_from_blackboard "
+                        "(code_artifacts, execution_logs, review_feedback, status), then call finish_task.summary "
+                        "including changes, validation evidence, and residual risks/next steps."
                     )
                     row["reason"] = "finish-blocked-summary-request"
                     row["source"] = "policy"
@@ -14445,10 +15617,35 @@ class SessionState:
                     "Resolve errors and provide verifiable evidence."
                 )
             else:
-                instruction = (
-                    "Do not finish yet. Completion requires fresh reviewer approval for the current user request. "
-                    "Continue with one concrete step and update blackboard."
-                )
+                has_outputs = bool(code_count > 0 or research_count > 0)
+                if board_status == "COMPLETED" and has_outputs:
+                    force_finish_override = True
+                    target = "finish"
+                    instruction = (
+                        "Task is already in COMPLETED state with concrete outputs. "
+                        "Generate final summary from blackboard (changes, validation evidence, residual "
+                        "risks/next steps) and finish now."
+                    )
+                    row["reason"] = "finish-blocked-completed-auto-summary-close"
+                    row["source"] = "policy"
+                    self._emit(
+                        "status",
+                        {"summary": "Completion gate unresolved but board is COMPLETED; auto-closing with final summary."},
+                    )
+                elif feedback_pass and has_outputs:
+                    force_finish_override = True
+                    target = "finish"
+                    instruction = (
+                        "Reviewer feedback already passed with concrete outputs. "
+                        "Generate final summary and finish now."
+                    )
+                    row["reason"] = "finish-blocked-feedback-pass-auto-close"
+                    row["source"] = "policy"
+                else:
+                    instruction = (
+                        "Do not finish yet. Completion requires fresh reviewer approval for the current user request. "
+                        "Continue with one concrete step and update blackboard."
+                    )
             if finish_gate_reason != "reviewer-summary-missing":
                 self._emit(
                     "status",
@@ -14477,6 +15674,7 @@ class SessionState:
             is_mandatory = True
         if target == "finish":
             is_mandatory = False
+            executor_mode_flag = False
         row.update(
             {
                 "target": target,
@@ -14491,6 +15689,7 @@ class SessionState:
                 "participants": list(participants),
                 "assigned_expert": assigned_expert,
                 "is_mandatory": bool(is_mandatory),
+                "executor_mode": bool(executor_mode_flag and target in AGENT_ROLES),
                 "requires_user_confirmation": bool(
                     row.get(
                         "requires_user_confirmation",
@@ -14535,6 +15734,7 @@ class SessionState:
                 "assigned_expert": trim(str(args.get("assigned_expert", "") or "").strip().lower(), 20),
                 "requires_user_confirmation": bool(args.get("requires_user_confirmation", False)),
                 "is_mandatory": _to_bool_like(args.get("is_mandatory", False), default=False),
+                "executor_mode": _to_bool_like(args.get("executor_mode", False), default=False),
                 "round_budget": args.get("round_budget", 0),
                 "reason": trim(str(text or "").strip(), 600),
                 "source": "tool",
@@ -14563,6 +15763,7 @@ class SessionState:
         objective, _ = self._split_language_policy_from_text(objective_raw, max_len=800)
         instruction, _ = self._split_language_policy_from_text(instruction_raw, max_len=1200)
         is_mandatory = bool(row.get("is_mandatory", False))
+        is_executor = bool(row.get("executor_mode", False))
         round_budget = int(row.get("round_budget", 0) or 0)
         remaining = int(row.get("remaining_rounds", -1) or -1)
         budget_text = "unlimited" if round_budget <= 0 else str(round_budget)
@@ -14571,7 +15772,11 @@ class SessionState:
         lines = [
             f"Manager -> {target_label}",
             f"L{task_level if task_level in TASK_LEVEL_CHOICES else '-'} | {mode} | {task_type}/{complexity} | scale={scale}",
-            f"mandatory={'yes' if is_mandatory else 'no'} | budget={budget_text} | remaining={remaining_text}",
+            (
+                f"mandatory={'yes' if is_mandatory else 'no'}"
+                f" | executor={'yes' if is_executor else 'no'}"
+                f" | budget={budget_text} | remaining={remaining_text}"
+            ),
         ]
         if objective:
             lines.append(f"objective: {objective}")
@@ -14587,6 +15792,7 @@ class SessionState:
             "complexity": complexity,
             "scale_preference": scale,
             "is_mandatory": is_mandatory,
+            "executor_mode": is_executor,
             "round_budget": round_budget,
             "remaining_rounds": remaining,
             "direct_objective": objective,
@@ -14611,21 +15817,24 @@ class SessionState:
         board["manager_cycles"] = int(board.get("manager_cycles", 0) or 0) + 1
         text = ""
         tool_calls: list[dict] = []
+        used_watchdog_executor = False
+        watchdog_meta: dict = {}
+        watchdog_pick = self._watchdog_pick_executor_route(board)
         used_agentbus_fast = False
         fast_meta: dict = {}
-        fast_pick = self._manager_pick_agentbus_fast_route(board)
-        if fast_pick:
-            used_agentbus_fast = True
-            fast_args, fast_meta = fast_pick
+        if watchdog_pick:
+            used_watchdog_executor = True
+            queue_args, watchdog_meta = watchdog_pick
             with self.lock:
-                self.current_phase = "manager:agentbus-fast-route"
+                self.current_phase = "manager:watchdog-executor-route"
                 self.current_tool_name = ""
                 self.active_agent_role = "manager"
             text = trim(
                 (
-                    "agentbus fast-route "
-                    f"{fast_meta.get('from', '?')}->{fast_meta.get('to', '?')} "
-                    f"intent={fast_meta.get('intent', 'message')} id={fast_meta.get('env_id', '-')}"
+                    "watchdog executor route "
+                    f"step={int(watchdog_meta.get('cursor', 0) or 0)}/{int(watchdog_meta.get('total', 0) or 0)} "
+                    f"target={watchdog_meta.get('target', '?')} "
+                    f"trigger={watchdog_meta.get('trigger_reason', '') or '?'}"
                 ),
                 600,
             )
@@ -14635,7 +15844,7 @@ class SessionState:
                     "type": "function",
                     "function": {
                         "name": "route_to_next_agent",
-                        "arguments": dict(fast_args or {}),
+                        "arguments": dict(queue_args or {}),
                     },
                 }
             ]
@@ -14643,7 +15852,7 @@ class SessionState:
                 {
                     "role": "system",
                     "content": (
-                        "[manager-fast-route] "
+                        "[manager-watchdog-route] "
                         f"{trim(str(text or ''), 500)}"
                     ),
                     "ts": now_ts(),
@@ -14654,102 +15863,165 @@ class SessionState:
                 "status",
                 {
                     "summary": (
-                        "manager fast-route via agentbus "
-                        f"({fast_meta.get('from', '?')}->{fast_meta.get('to', '?')}, "
-                        f"intent={fast_meta.get('intent', 'message')}, "
-                        f"age={float(fast_meta.get('age_sec', 0.0) or 0.0):.1f}s)"
+                        "manager watchdog executor active "
+                        f"(step={int(watchdog_meta.get('cursor', 0) or 0)}/"
+                        f"{int(watchdog_meta.get('total', 0) or 0)}, "
+                        f"target={watchdog_meta.get('target', '?')}, "
+                        f"trigger={trim(str(watchdog_meta.get('trigger_reason', '') or ''), 80)})"
                     )
                 },
             )
         else:
-            prompt = (
-                "Read the blackboard and delegate one next short timeslice. "
-                "Return only one route_to_next_agent call.\n\n"
-                f"{self._blackboard_read_state_markdown(max_items=6)}"
-            )
-            self.manager_context.append({"role": "user", "content": prompt, "ts": now_ts()})
-            self.manager_context = self.manager_context[-400:]
-            with self.lock:
-                self.current_phase = "manager:model-call"
-                self.current_tool_name = ""
-                self.active_agent_role = "manager"
-            response = self._chat_with_same_model_retry(
-                self.manager_context,
-                tools=self._manager_route_tools(),
-                system=self._manager_system_prompt(),
-                max_tokens=600,
-                think=False,
-                stream_thinking=False,
-                on_thinking_chunk=self._append_live_thinking,
-                pinned_selection=pinned_selection,
-                context_label="manager turn",
-                retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
-                media_inputs=media_inputs_round,
-            )
-            text = str(response.get("content") or "")
-            tool_calls = response.get("tool_calls", [])
-            text, text_filter_meta = self._sanitize_assistant_text_for_runtime(text, tool_calls)
-            if bool(text_filter_meta.get("filtered", False)) and str(text_filter_meta.get("reason", "")) == "oversized_raw_toolcall":
-                self._inject_toolcall_overflow_hint("manager")
-            assistant = {"role": "assistant", "content": text, "ts": now_ts()}
-            if tool_calls:
-                assistant["tool_calls"] = [
+            fast_pick = self._manager_pick_agentbus_fast_route(board)
+            if fast_pick:
+                used_agentbus_fast = True
+                fast_args, fast_meta = fast_pick
+                with self.lock:
+                    self.current_phase = "manager:agentbus-fast-route"
+                    self.current_tool_name = ""
+                    self.active_agent_role = "manager"
+                text = trim(
+                    (
+                        "agentbus fast-route "
+                        f"{fast_meta.get('from', '?')}->{fast_meta.get('to', '?')} "
+                        f"intent={fast_meta.get('intent', 'message')} id={fast_meta.get('env_id', '-')}"
+                    ),
+                    600,
+                )
+                tool_calls = [
                     {
-                        "id": tc["id"],
+                        "id": make_id("tc"),
                         "type": "function",
                         "function": {
-                            "name": tc["function"]["name"],
-                            "arguments": json_dumps(tc["function"]["arguments"]),
+                            "name": "route_to_next_agent",
+                            "arguments": dict(fast_args or {}),
                         },
                     }
-                    for tc in tool_calls
                 ]
-            self.manager_context.append(assistant)
-            self.manager_context = self.manager_context[-400:]
-            route_only_tool_calls = False
-            if isinstance(tool_calls, list) and tool_calls:
-                tool_names = [
-                    str(tc.get("function", {}).get("name", "") or "").strip().lower()
-                    for tc in tool_calls
-                    if isinstance(tc, dict)
-                ]
-                if tool_names and all(name in {"route_to_next_agent", "routetonext_agent"} for name in tool_names):
-                    route_only_tool_calls = True
-            emit_text = str(text or "").strip()
-            if not emit_text and tool_calls and (not route_only_tool_calls):
-                emit_text = f"[tool calls] {', '.join(str(tc.get('function', {}).get('name', '?')) for tc in tool_calls)}"
-            if emit_text:
-                manager_message = {
-                    "role": "assistant",
-                    "content": emit_text,
-                    "ts": assistant["ts"],
-                    "agent_role": "manager",
-                }
-                if "tool_calls" in assistant and (not route_only_tool_calls):
-                    manager_message["tool_calls"] = assistant["tool_calls"]
-                self.messages.append(manager_message)
-                self.messages = self.messages[-400:]
-            elif "tool_calls" in assistant and (not route_only_tool_calls):
-                manager_message = {
-                    "role": "assistant",
-                    "content": "",
-                    "ts": assistant["ts"],
-                    "agent_role": "manager",
-                    "tool_calls": assistant["tool_calls"],
-                }
-                self.messages.append(manager_message)
-                self.messages = self.messages[-400:]
-            if emit_text:
+                self.manager_context.append(
+                    {
+                        "role": "system",
+                        "content": (
+                            "[manager-fast-route] "
+                            f"{trim(str(text or ''), 500)}"
+                        ),
+                        "ts": now_ts(),
+                    }
+                )
+                self.manager_context = self.manager_context[-400:]
                 self._emit(
-                    "message",
+                    "status",
                     {
-                        "role": "assistant",
-                        "agent_role": "manager",
-                        "text": emit_text,
-                        "summary": "Manager response",
+                        "summary": (
+                            "manager fast-route via agentbus "
+                            f"({fast_meta.get('from', '?')}->{fast_meta.get('to', '?')}, "
+                            f"intent={fast_meta.get('intent', 'message')}, "
+                            f"age={float(fast_meta.get('age_sec', 0.0) or 0.0):.1f}s)"
+                        )
                     },
                 )
+            else:
+                prompt = (
+                    "Read the blackboard and delegate one next short timeslice. "
+                    "Return only one route_to_next_agent call.\n\n"
+                    f"{self._blackboard_read_state_markdown(max_items=6)}"
+                )
+                self.manager_context.append({"role": "user", "content": prompt, "ts": now_ts()})
+                self.manager_context = self.manager_context[-400:]
+                with self.lock:
+                    self.current_phase = "manager:model-call"
+                    self.current_tool_name = ""
+                    self.active_agent_role = "manager"
+                response = self._chat_with_same_model_retry(
+                    self.manager_context,
+                    tools=self._manager_route_tools(),
+                    system=self._manager_system_prompt(),
+                    max_tokens=600,
+                    think=False,
+                    stream_thinking=False,
+                    on_thinking_chunk=self._append_live_thinking,
+                    pinned_selection=pinned_selection,
+                    context_label="manager turn",
+                    retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
+                    media_inputs=media_inputs_round,
+                )
+                text = str(response.get("content") or "")
+                tool_calls = response.get("tool_calls", [])
+                text, text_filter_meta = self._sanitize_assistant_text_for_runtime(text, tool_calls)
+                if bool(text_filter_meta.get("filtered", False)) and str(text_filter_meta.get("reason", "")) == "oversized_raw_toolcall":
+                    self._inject_toolcall_overflow_hint("manager")
+                assistant = {"role": "assistant", "content": text, "ts": now_ts()}
+                if tool_calls:
+                    assistant["tool_calls"] = [
+                        {
+                            "id": tc["id"],
+                            "type": "function",
+                            "function": {
+                                "name": tc["function"]["name"],
+                                "arguments": json_dumps(tc["function"]["arguments"]),
+                            },
+                        }
+                        for tc in tool_calls
+                    ]
+                self.manager_context.append(assistant)
+                self.manager_context = self.manager_context[-400:]
+                route_only_tool_calls = False
+                if isinstance(tool_calls, list) and tool_calls:
+                    tool_names = [
+                        str(tc.get("function", {}).get("name", "") or "").strip().lower()
+                        for tc in tool_calls
+                        if isinstance(tc, dict)
+                    ]
+                    if tool_names and all(name in {"route_to_next_agent", "routetonext_agent"} for name in tool_names):
+                        route_only_tool_calls = True
+                emit_text = str(text or "").strip()
+                if not emit_text and tool_calls and (not route_only_tool_calls):
+                    emit_text = f"[tool calls] {', '.join(str(tc.get('function', {}).get('name', '?')) for tc in tool_calls)}"
+                if emit_text:
+                    manager_message = {
+                        "role": "assistant",
+                        "content": emit_text,
+                        "ts": assistant["ts"],
+                        "agent_role": "manager",
+                    }
+                    if "tool_calls" in assistant and (not route_only_tool_calls):
+                        manager_message["tool_calls"] = assistant["tool_calls"]
+                    self.messages.append(manager_message)
+                    self.messages = self.messages[-400:]
+                elif "tool_calls" in assistant and (not route_only_tool_calls):
+                    manager_message = {
+                        "role": "assistant",
+                        "content": "",
+                        "ts": assistant["ts"],
+                        "agent_role": "manager",
+                        "tool_calls": assistant["tool_calls"],
+                    }
+                    self.messages.append(manager_message)
+                    self.messages = self.messages[-400:]
+                if emit_text:
+                    self._emit(
+                        "message",
+                        {
+                            "role": "assistant",
+                            "agent_role": "manager",
+                            "text": emit_text,
+                            "summary": "Manager response",
+                        },
+                    )
         route = self._manager_route_from_response(text, tool_calls)
+        if used_watchdog_executor:
+            route["source"] = "watchdog-executor"
+            route["reason"] = trim(
+                (
+                    f"watchdog executor step {int(watchdog_meta.get('cursor', 0) or 0)}/"
+                    f"{int(watchdog_meta.get('total', 0) or 0)} "
+                    f"target={watchdog_meta.get('target', '?')} "
+                    f"trigger={watchdog_meta.get('trigger_reason', '')}"
+                ),
+                600,
+            )
+            route["executor_mode"] = True
+            route["is_mandatory"] = True
         if used_agentbus_fast:
             route["source"] = "agentbus-fast"
             route["reason"] = trim(
@@ -14824,6 +16096,7 @@ class SessionState:
             "participants": list(participants),
             "assigned_expert": assigned_expert,
             "is_mandatory": bool(route.get("is_mandatory", False)),
+            "executor_mode": bool(route.get("executor_mode", False)),
             "requires_user_confirmation": bool(route.get("requires_user_confirmation", False)),
             "round_budget": int(round_budget),
             "remaining_rounds": int(remaining_rounds),
@@ -14836,6 +16109,7 @@ class SessionState:
         profile["participants"] = list(participants)
         profile["assigned_expert"] = assigned_expert
         profile["is_mandatory"] = bool(route_row.get("is_mandatory", False))
+        profile["executor_mode"] = bool(route_row.get("executor_mode", False))
         profile["requires_user_confirmation"] = bool(route_row.get("requires_user_confirmation", False))
         if task_type in TASK_PROFILE_TYPES:
             profile["task_type"] = task_type
@@ -14870,6 +16144,7 @@ class SessionState:
             "participants": list(participants),
             "assigned_expert": assigned_expert,
             "is_mandatory": bool(route_row.get("is_mandatory", False)),
+            "executor_mode": bool(route_row.get("executor_mode", False)),
             "remaining_rounds": int(remaining_rounds),
             "updated_at": float(now_ts()),
         }
@@ -14941,10 +16216,40 @@ class SessionState:
         )
         return route_row
-    def _inject_manager_instruction(self, role: str, instruction: str, is_mandatory: bool = False):
+    def _inject_manager_instruction(
+        self,
+        role: str,
+        instruction: str,
+        is_mandatory: bool = False,
+        executor_mode: bool = False,
+    ):
         role_key = self._sanitize_agent_role(role)
         if not role_key:
             return
+        if bool(executor_mode):
+            executor_seed = {
+                "role": "system",
+                "content": self._apply_agent_language_policy(
+                    (
+                        "Executor mode is enabled by watchdog. You are stateless for this step: "
+                        "ignore old conversational plans, execute only the delegated step, call concrete tools, "
+                        "and write verifiable evidence to blackboard."
+                    ),
+                    max_len=800,
+                ),
+                "ts": now_ts(),
+                "agent_role": role_key,
+            }
+            self.contexts[role_key] = [executor_seed]
+            self._emit(
+                "status",
+                {
+                    "summary": (
+                        f"executor hot-swap: reset {self._agent_display_name(role_key)} context "
+                        "for stateless execution"
+                    )
+                },
+            )
         instruction_with_policy = self._apply_agent_language_policy(
             trim(str(instruction or "").strip(), 1400),
             max_len=1400,
@@ -14964,6 +16269,14 @@ class SessionState:
             if bool(is_mandatory)
             else ""
         )
+        executor_note = (
+            (
+                "STATELESS EXECUTOR: do not re-plan globally; "
+                "complete only this delegated step and return concrete tool evidence."
+            )
+            if bool(executor_mode)
+            else ""
+        )
         collaboration_note = (
             "COLLABORATION PREFERENCE: if your current step needs another specialty, "
             "use ask_colleague immediately with explicit intent and concise payload; "
@@ -14974,9 +16287,11 @@ class SessionState:
             "<manager-delegate>\n"
             f"target={role_key}\n"
             f"is_mandatory={bool(is_mandatory)}\n"
+            f"executor_mode={bool(executor_mode)}\n"
             f"instruction={instruction_text}\n"
             f"language_policy={language_note}\n"
             f"{mandatory_note}\n"
+            f"{executor_note}\n"
             f"{collaboration_note}\n"
             "</manager-delegate>\n"
             "<blackboard-state>\n"
@@ -15028,7 +16343,9 @@ class SessionState:
             return
         name = str(item.get("name", "") or "").strip()
         args = item.get("args", {}) if isinstance(item.get("args"), dict) else {}
-        output = trim(str(item.get("output", "") or "").strip(), BLACKBOARD_MAX_TEXT)
+        output_raw = trim(str(item.get("output", "") or "").strip(), BLACKBOARD_MAX_TEXT)
+        output_clean, _ = filter_runtime_noise_lines(output_raw)
+        output = trim(output_clean, BLACKBOARD_MAX_TEXT)
         ok = bool(item.get("ok", False))
         if name in {"write_file", "edit_file"}:
             rel_path = str(args.get("path", "") or "").strip()
@@ -15052,10 +16369,17 @@ class SessionState:
             if role_key == "explorer":
                 self._blackboard_set_status("RESEARCHING")
         elif name in {"finish_task", "finish_current_task", "mark_done"} and ok:
+            summary_arg = trim(str(args.get("summary", "") or "").strip(), BLACKBOARD_MAX_TEXT)
+            if summary_arg:
+                if role_key == "reviewer":
+                    self._blackboard_append_section("review_feedback", role_key, f"final_summary\n{summary_arg}")
+                elif role_key == "explorer":
+                    self._blackboard_append_section("research_notes", role_key, f"final_summary\n{summary_arg}")
             if role_key == "reviewer":
                 gate_ok, gate_reason = self._reviewer_approval_log_gate()
                 if gate_ok:
-                    self._blackboard_mark_approved(output or "finish tool acknowledged", role_key)
+                    approval_note = summary_arg or output or "finish tool acknowledged"
+                    self._blackboard_mark_approved(approval_note, role_key)
                 else:
                     self._blackboard_append_section(
                         "review_feedback",
@@ -15067,7 +16391,8 @@ class SessionState:
                     )
                     self._emit("status", {"summary": f"reviewer finish blocked: {gate_reason}"})
             else:
-                self._blackboard_mark_approved(output or "finish tool acknowledged", role_key)
+                approval_note = summary_arg or output or "finish tool acknowledged"
+                self._blackboard_mark_approved(approval_note, role_key)
         if not ok and output:
             self._blackboard_append_section(
                 "execution_logs",
@@ -15170,25 +16495,151 @@ class SessionState:
         policy_text = trim("\n".join(policy_lines).strip(), 1200)
         return clean_text, policy_text
-    def _reviewer_final_summary_ready(self, board: dict | None = None) -> bool:
-        def _text_good(text: str) -> bool:
-            clean = strip_thinking_content(str(text or "")).strip()
-            if not clean:
-                return False
-            if len(clean) >= 60:
-                return True
-            low = clean.lower()
-            tokens = (
-                "summary",
-                "final",
-                "结论",
-                "总结",
+    def _final_summary_quality(self, text: str) -> dict:
+        clean = strip_thinking_content(str(text or "")).strip()
+        low = clean.lower()
+        chars = len(clean)
+        category_tokens = {
+            "changes": (
+                "changed",
+                "changes",
+                "change",
+                "modified",
+                "implemented",
+                "implementation",
+                "files",
+                "diff",
+                "patch",
+                "改动",
+                "变更",
+                "修改",
+                "实现",
+                "文件",
+            ),
+            "validation": (
+                "test",
+                "tests",
+                "pytest",
+                "validation",
+                "verified",
+                "verify",
+                "check",
+                "checks",
+                "evidence",
+                "pass",
+                "passed",
+                "验证",
+                "测试",
                 "通过",
+                "证据",
+                "日志",
+            ),
+            "risks": (
+                "risk",
+                "risks",
+                "residual",
+                "next step",
+                "next steps",
+                "follow-up",
+                "todo",
+                "limitation",
+                "known issue",
+                "caveat",
                 "风险",
+                "残留",
+                "后续",
+                "下一步",
+                "待办",
+                "限制",
+                "已知问题",
                 "建议",
-                "完成",
-            )
-            return any(tok in low for tok in tokens)
+            ),
+        }
+        hits: dict[str, bool] = {}
+        for cat, words in category_tokens.items():
+            matched = any(tok in low for tok in words)
+            hits[cat] = bool(matched)
+        covered = sum(1 for v in hits.values() if bool(v))
+        ok = bool(chars >= FINAL_SUMMARY_MIN_CHARS and (covered >= 2 or (covered >= 1 and chars >= 220)))
+        strict_ok = bool(chars >= FINAL_SUMMARY_STRICT_MIN_CHARS and covered >= 2)
+        return {
+            "clean": clean,
+            "chars": int(chars),
+            "covered": int(covered),
+            "hits": hits,
+            "ok": bool(ok),
+            "strict_ok": bool(strict_ok),
+        }
+    def _final_summary_sufficient(self, text: str, *, strict: bool = False) -> bool:
+        verdict = self._final_summary_quality(text)
+        return bool(verdict.get("strict_ok" if strict else "ok", False))
+    def _finish_requires_structured_summary(self, role: str, tool_name: str) -> bool:
+        role_key = self._sanitize_agent_role(role)
+        if tool_name not in {"finish_task", "finish_current_task", "mark_done"}:
+            return False
+        if not role_key:
+            return False
+        bb = self._ensure_blackboard()
+        profile = self._ensure_blackboard_task_profile(bb)
+        task_type = str(profile.get("task_type", "general") or "general")
+        if task_type == "simple_qa":
+            return False
+        delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
+        delegate_target = self._sanitize_agent_role(delegate.get("target", ""))
+        delegate_reason = str(delegate.get("reason", "") or "").strip().lower()
+        delegate_instruction = str(delegate.get("instruction", "") or "").strip().lower()
+        summary_markers = (
+            "summary",
+            "wrap-up",
+            "final report",
+            "最终总结",
+            "总结",
+            "收尾",
+        )
+        if delegate_target == role_key and any(tok in delegate_reason or tok in delegate_instruction for tok in summary_markers):
+            return True
+        return bool(role_key == "reviewer" and self._is_multi_agent_mode())
+    def _recent_agent_used_tools(
+        self,
+        role: str,
+        tool_names: set[str],
+        *,
+        lookback: int = 20,
+        max_age_seconds: float = 300.0,
+    ) -> bool:
+        role_key = self._sanitize_agent_role(role)
+        if not role_key or not tool_names:
+            return False
+        names = {str(x or "").strip() for x in tool_names if str(x or "").strip()}
+        if not names:
+            return False
+        now_tick = now_ts()
+        ctx = self._agent_context(role_key)
+        rows = ctx[-max(1, int(lookback)) :] if isinstance(ctx, list) else []
+        for row in reversed(rows):
+            if not isinstance(row, dict):
+                continue
+            if str(row.get("role", "") or "").strip().lower() != "tool":
+                continue
+            name = str(row.get("name", "") or "").strip()
+            if name not in names:
+                continue
+            try:
+                ts = float(row.get("ts", 0.0) or 0.0)
+            except Exception:
+                ts = 0.0
+            if ts <= 0.0:
+                return True
+            if (now_tick - ts) <= float(max_age_seconds):
+                return True
+        return False
+    def _reviewer_final_summary_ready(self, board: dict | None = None) -> bool:
+        def _text_good(text: str) -> bool:
+            return self._final_summary_sufficient(text, strict=True)
         bb = board if isinstance(board, dict) else self._ensure_blackboard()
         approval = bb.get("approval", {}) if isinstance(bb.get("approval"), dict) else {}
@@ -15494,6 +16945,7 @@ class SessionState:
             f"Session absolute writable root is {self.files_root}. "
             "Use relative file paths (for example hello.txt); runtime maps them to session absolute paths. "
             "If '/workspace/...' appears, treat it as a virtual alias only; never create OS-level /workspace in shell. "
+            f"{_detect_os_shell_instruction()} "
             "You must stay within your role boundary and use only provided tools. "
             "Use read_from_blackboard/write_to_blackboard to keep the shared state accurate. "
             "When communicating with other agents, use ask_colleague with structured intent/content. "
@@ -15514,8 +16966,12 @@ class SessionState:
                 base
                 + "Role objective: verify developer output against goal, run checks/tests, and issue pass/fix decisions. "
                 + "If gaps remain, send fix_request to developer with concrete failure evidence and write review_feedback to blackboard. "
-                + "If task is complete, write approval evidence and hand off final summary to Explorer "
-                + "(via ask_colleague intent=final_summary_request) before ending the task."
+                + "If manager requests final summary, first call read_from_blackboard "
+                + "(sections: code_artifacts, execution_logs, review_feedback, status), then generate a structured summary "
+                + "covering changes, validation evidence, and residual risks/next steps. "
+                + "When finishing, pass this summary in finish_task.summary; empty or vague summary is invalid. "
+                + "If you cannot produce summary from current evidence, hand off Explorer via ask_colleague "
+                + "intent=final_summary_request with explicit missing evidence."
             )
         return (
             base
@@ -16343,6 +17799,37 @@ class SessionState:
             return self._todo_write_rescue(args)
         if name in {"finish_task", "finish_current_task", "mark_done"}:
             summary = trim(str(args.get("summary", "") or "").strip(), 400)
+            if role_key == "explorer":
+                bb = self._ensure_blackboard()
+                delegate = bb.get("last_delegate", {}) if isinstance(bb.get("last_delegate"), dict) else {}
+                delegate_target = self._sanitize_agent_role(delegate.get("target", ""))
+                delegate_reason = str(delegate.get("reason", "") or "").strip().lower()
+                if delegate_target == "explorer" and "summary-handoff" in delegate_reason:
+                    return (
+                        "Error: explorer summary handoff step must not call finish tool. "
+                        "Write structured summary to blackboard first, then wait for manager close."
+                    )
+            if self._finish_requires_structured_summary(role_key, name):
+                if role_key == "reviewer" and not self._recent_agent_used_tools(
+                    role_key,
+                    {"read_from_blackboard"},
+                    lookback=24,
+                    max_age_seconds=420.0,
+                ):
+                    return (
+                        "Error: reviewer finalization requires blackboard evidence read. "
+                        "Call read_from_blackboard first (sections: code_artifacts, execution_logs, "
+                        "review_feedback, status), then call finish_task with structured summary."
+                    )
+                if not self._final_summary_sufficient(summary, strict=True):
+                    return (
+                        "Error: structured final summary is required before finish. "
+                        "Provide finish_task.summary with: "
+                        "(1) changes/files touched, "
+                        "(2) validation evidence (tests/commands/results), "
+                        "(3) residual risks or next steps. "
+                        "If evidence is missing, read_from_blackboard first or ask Explorer for final_summary_request."
+                    )
             if name == "finish_task":
                 todo_mark = self.todo.complete_all_open(summary)
             else:
@@ -16421,6 +17908,7 @@ class SessionState:
             if guard_error:
                 return guard_error
             out = self.bg.run(args["command"], int(args.get("timeout", 120)))
+            out_filtered, _ = filter_runtime_noise_lines(str(out or ""))
             self._emit(
                 "command",
                 {
@@ -16430,7 +17918,7 @@ class SessionState:
                     "summary": f"background_run: {args['command'][:80]}",
                 },
             )
-            return out
+            return trim(out_filtered or "(no output)")
         if name == "check_background":
             return self.bg.check(args.get("task_id"))
         if name == "task_create":
@@ -16472,6 +17960,8 @@ class SessionState:
             if section == "original_goal":
                 return trim(str(board.get("original_goal", "") or "").strip(), 4000) or "(empty)"
             if section == "status":
+                wd = board.get("watchdog", {}) if isinstance(board.get("watchdog"), dict) else {}
+                dq = board.get("decomposition_queue", {}) if isinstance(board.get("decomposition_queue"), dict) else {}
                 return json_dumps(
                     {
                         "status": board.get("status", "INITIALIZING"),
@@ -16480,6 +17970,20 @@ class SessionState:
                         "manager_summary_attempts": int(board.get("manager_summary_attempts", 0) or 0),
                         "approval": board.get("approval", {}),
                         "last_delegate": board.get("last_delegate", {}),
+                        "watchdog": {
+                            "intent_no_tool_streak": int(wd.get("intent_no_tool_streak", 0) or 0),
+                            "repeat_no_tool_streak": int(wd.get("repeat_no_tool_streak", 0) or 0),
+                            "state_unchanged_streak": int(wd.get("state_unchanged_streak", 0) or 0),
+                            "trigger_count": int(wd.get("trigger_count", 0) or 0),
+                            "last_trigger_reason": trim(str(wd.get("last_trigger_reason", "") or "").strip(), 160),
+                        },
+                        "decomposition_queue": {
+                            "active": bool(dq.get("active", False)),
+                            "trigger_reason": trim(str(dq.get("trigger_reason", "") or "").strip(), 160),
+                            "cursor": int(dq.get("cursor", 0) or 0),
+                            "total": len(dq.get("steps", []) or []),
+                            "last_error": trim(str(dq.get("last_error", "") or "").strip(), 220),
+                        },
                     },
                     indent=2,
                 )
@@ -16945,6 +18449,15 @@ class SessionState:
                         output = self._dispatch_tool(name, args, agent_role=role_key)
                     except Exception as exc:
                         output = f"Error: {exc}"
+            raw_output = str(output or "")
+            filtered_output, filtered_rows = filter_runtime_noise_lines(raw_output)
+            if filtered_rows > 0:
+                if filtered_output:
+                    output = filtered_output
+                elif raw_output.startswith("Error:"):
+                    output = "Error: runtime socket noise filtered"
+                else:
+                    output = "(no output)"
             self._append_agent_context_message(
                 role_key,
                 {
@@ -17161,6 +18674,7 @@ class SessionState:
             if self.cancel_requested:
                 self._emit("status", {"summary": "run interrupted"})
                 break
+            self._apply_auto_compact_if_needed("auto:multi-sync")
             with self.lock:
                 self.agent_round_index = int(self.agent_round_index) + 1
                 self.current_phase = "manager:dispatch"
@@ -17203,6 +18717,7 @@ class SessionState:
                 role,
                 instruction,
                 is_mandatory=bool(route.get("is_mandatory", False)),
+                executor_mode=bool(route.get("executor_mode", False)),
             )
             if role == "explorer":
                 self._blackboard_set_status("RESEARCHING")
@@ -17216,13 +18731,26 @@ class SessionState:
                 media_inputs_pool=media_inputs_pool,
                 media_seen_ts_by_role=media_seen_ts_by_role,
             )
+            board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
             step = self._multi_agent_turn(
                 role,
                 pinned_selection=pinned_selection,
                 media_inputs_round=role_media_inputs,
             )
             self._blackboard_update_from_worker_step(role, step)
+            board_after = self._ensure_blackboard()
+            board_after_fp = self._watchdog_state_fingerprint(board_after)
+            wd_event = self._watchdog_process_worker_step(
+                board_after,
+                role=role,
+                step=step if isinstance(step, dict) else {},
+                state_changed=bool(board_after_fp != board_before_fp),
+                pinned_selection=pinned_selection,
+            )
             status = str(step.get("status", "") or "")
+            if bool(wd_event.get("triggered", False)):
+                idle_counts[role] = 0
+                continue
             if status == "interrupted":
                 break
             if status == "skip":
@@ -17231,7 +18759,7 @@ class SessionState:
                 idle_counts[role] = 0
                 if bool(step.get("stop_due_to_finish", False)):
                     note = f"{self._agent_display_name(role)} signaled finish via tool."
-                    self._blackboard_mark_approved(note, role)
+                    # Approval note should come from finish tool payload sync; avoid overwriting with generic text here.
                     can_finish_now, finish_gate_reason = self._can_auto_finish_from_approval(
                         self._ensure_blackboard(),
                         latest_user_ts=self._latest_user_message_ts(),
@@ -17243,7 +18771,7 @@ class SessionState:
                                 {
                                     "summary": (
                                         "reviewer finish deferred: final summary missing; "
-                                        "handoff to explorer via agentbus and continue"
+                                        "manager will reroute to explorer summary synthesis"
                                     )
                                 },
                             )
@@ -17347,6 +18875,7 @@ class SessionState:
             if self.cancel_requested:
                 self._emit("status", {"summary": "run interrupted"})
                 break
+            self._apply_auto_compact_if_needed("auto:multi-seq")
             with self.lock:
                 self.agent_round_index = int(self.agent_round_index) + 1
             latest_user_ts = self._latest_user_message_ts()
@@ -17358,6 +18887,28 @@ class SessionState:
                     media_inputs=media_inputs_pool,
                     roles=role_order,
                 )
+            dq = self._normalize_decomposition_queue_state(
+                self._ensure_blackboard().get("decomposition_queue", {})
+            )
+            if bool(dq.get("active", False)):
+                queue_exec = self._watchdog_execute_queue_step(
+                    pinned_selection=pinned_selection,
+                )
+                if bool(queue_exec.get("interrupted", False)):
+                    break
+                if bool(queue_exec.get("stop_run", False)):
+                    self._emit("status", {"summary": "watchdog executor completed task; run paused"})
+                    break
+                if not bool(queue_exec.get("executed", False)):
+                    if bool(queue_exec.get("queue_active", False)):
+                        self._emit(
+                            "status",
+                            {"summary": "watchdog queue active but no executable step; pausing to avoid deadlock"},
+                        )
+                        break
+                    continue
+                idle_counts[str(queue_exec.get("role", "") or "developer")] = 0
+                continue
             role = current_role if mode == EXECUTION_MODE_SEQUENTIAL else role_order[sync_index % len(role_order)]
             role_media_inputs = self._resolve_role_multimodal_payload(
                 role=role,
@@ -17365,12 +18916,27 @@ class SessionState:
                 media_inputs_pool=media_inputs_pool,
                 media_seen_ts_by_role=media_seen_ts_by_role,
             )
+            board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
             step = self._multi_agent_turn(
                 role,
                 pinned_selection=pinned_selection,
                 media_inputs_round=role_media_inputs,
             )
-            status = str(step.get("status", "") or "")
+            safe_step = step if isinstance(step, dict) else {}
+            self._blackboard_update_from_worker_step(role, safe_step)
+            board_after = self._ensure_blackboard()
+            board_after_fp = self._watchdog_state_fingerprint(board_after)
+            wd_event = self._watchdog_process_worker_step(
+                board_after,
+                role=role,
+                step=safe_step,
+                state_changed=bool(board_after_fp != board_before_fp),
+                pinned_selection=pinned_selection,
+            )
+            if bool(wd_event.get("triggered", False)):
+                idle_counts[role] = 0
+                continue
+            status = str(safe_step.get("status", "") or "")
             if status == "interrupted":
                 break
             if status == "skip":
@@ -17384,7 +18950,7 @@ class SessionState:
                 continue
             if status == "tools":
                 idle_counts[role] = 0
-                if bool(step.get("stop_due_to_finish", False)):
+                if bool(safe_step.get("stop_due_to_finish", False)):
                     self._emit("status", {"summary": "finish tool called; run paused and awaiting user instruction"})
                     break
                 if mode == EXECUTION_MODE_SEQUENTIAL:
@@ -17402,7 +18968,7 @@ class SessionState:
                 idle_counts[role] = int(idle_counts.get(role, 0) or 0) + 1
                 should_stop, next_role = self._multi_agent_no_tool_transition(
                     role,
-                    str(step.get("text", "") or ""),
+                    str(safe_step.get("text", "") or ""),
                     mode=mode,
                     idle_counts=idle_counts,
                 )
@@ -17549,9 +19115,7 @@ class SessionState:
                             },
                         )
                         break
-                self._microcompact()
-                if self._estimate_tokens() > self.context_token_upper_bound:
-                    self._auto_compact("auto")
+                self._apply_auto_compact_if_needed("auto")
                 notifs = self.bg.drain()
                 if notifs:
                     text = "\n".join(f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs)
@@ -17583,6 +19147,32 @@ class SessionState:
                         self._seed_multi_agent_contexts_if_needed(self.runtime_reclassify_goal or "")
                         self._multi_agent_worker(pinned_selection=pinned_selection)
                         return
+                dq = self._normalize_decomposition_queue_state(
+                    self._ensure_blackboard().get("decomposition_queue", {})
+                )
+                if bool(dq.get("active", False)):
+                    queue_exec = self._watchdog_execute_queue_step(
+                        pinned_selection=pinned_selection,
+                    )
+                    if bool(queue_exec.get("interrupted", False)):
+                        self._emit("status", {"summary": "run interrupted"})
+                        break
+                    if bool(queue_exec.get("stop_run", False)):
+                        self._emit("status", {"summary": "watchdog executor completed task; run paused"})
+                        break
+                    if not bool(queue_exec.get("executed", False)):
+                        if bool(queue_exec.get("queue_active", False)):
+                            self._emit(
+                                "status",
+                                {"summary": "watchdog queue active but no executable step; pausing to avoid deadlock"},
+                            )
+                            break
+                        continue
+                    no_tool_rounds = 0
+                    arbiter_planning_rounds = 0
+                    fault_counter = 0
+                    last_fault_reason = ""
+                    continue
                 latest_user_ts = self._latest_user_message_ts()
                 media_inputs_round = None
                 if latest_user_ts > media_last_user_ts:
@@ -17778,6 +19368,32 @@ class SessionState:
                         arbiter_planning_rounds = 0
                         self._emit("status", {"summary": "waiting for user input: assistant asked for a decision"})
                         break
+                    wd_event = self._watchdog_process_worker_step(
+                        self._ensure_blackboard(),
+                        role=single_role,
+                        step={
+                            "status": "no-tools",
+                            "text": decision_probe,
+                            "tool_results": [],
+                        },
+                        state_changed=False,
+                        pinned_selection=pinned_selection,
+                    )
+                    if bool(wd_event.get("triggered", False)):
+                        no_tool_rounds = 0
+                        arbiter_planning_rounds = 0
+                        fault_counter = 0
+                        last_fault_reason = ""
+                        self._emit(
+                            "status",
+                            {
+                                "summary": (
+                                    "watchdog triggered in single-agent planner mode; "
+                                    "switching to stateless executor queue"
+                                )
+                            },
+                        )
+                        continue
                     clean_decision_probe = strip_thinking_content(decision_probe).strip()
                     if bool(self.arbiter_enabled) and len(clean_decision_probe) >= int(ARBITER_TRIGGER_MIN_CONTENT_CHARS):
                         arbiter_decision = self._call_arbiter_llm(clean_decision_probe, thinking_text)
@@ -18031,6 +19647,8 @@ class SessionState:
                 stop_due_to_finish_task = False
                 hard_break_reason = ""
                 interrupted_in_tools = False
+                single_round_tool_results: list[dict] = []
+                single_watchdog_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
                 round_tool_fp = self._tool_calls_fingerprint(tool_calls)
                 for tc in tool_calls:
                     if self.cancel_requested:
@@ -18186,6 +19804,15 @@ class SessionState:
                             output = self._dispatch_tool(name, args)
                         except Exception as exc:
                             output = f"Error: {exc}"
+                    raw_output = str(output or "")
+                    filtered_output, filtered_rows = filter_runtime_noise_lines(raw_output)
+                    if filtered_rows > 0:
+                        if filtered_output:
+                            output = filtered_output
+                        elif raw_output.startswith("Error:"):
+                            output = "Error: runtime socket noise filtered"
+                        else:
+                            output = "(no output)"
                     tool_key = str(dispatched_name or name).strip() or str(name or "").strip() or "unknown-tool"
                     if str(output).startswith("Error"):
                         round_error_count += 1
@@ -18213,6 +19840,14 @@ class SessionState:
                     if dispatched_name in {"finish_task", "finish_current_task", "mark_done"}:
                         stop_due_to_finish_task = True
                     self.messages.append({"role": "tool", "tool_call_id": tc["id"], "name": name, "content": trim(output), "ts": now_ts()})
+                    single_round_tool_results.append(
+                        {
+                            "name": dispatched_name or name,
+                            "args": args if isinstance(args, dict) else {},
+                            "output": trim(str(output or ""), 3000),
+                            "ok": not str(output).startswith("Error:"),
+                        }
+                    )
                     self._emit("tool_result", {"name": name, "result": trim(output, 500), "summary": f"tool done: {name}"})
                     if int(tool_error_streaks.get(tool_key, 0) or 0) >= HARD_BREAK_TOOL_ERROR_THRESHOLD:
                         stop_due_to_hard_break = True
@@ -18241,6 +19876,18 @@ class SessionState:
                     self.current_phase = "post-tools"
                 if interrupted_in_tools:
                     break
+                single_watchdog_after_board = self._ensure_blackboard()
+                single_watchdog_after_fp = self._watchdog_state_fingerprint(single_watchdog_after_board)
+                self._watchdog_process_worker_step(
+                    single_watchdog_after_board,
+                    role=single_role,
+                    step={
+                        "status": "tools",
+                        "tool_results": single_round_tool_results,
+                    },
+                    state_changed=bool(single_watchdog_after_fp != single_watchdog_before_fp),
+                    pinned_selection=pinned_selection,
+                )
                 if stop_due_to_hard_break:
                     note = (
                         "Execution paused after repeated tool/recovery failures. "
@@ -19628,7 +21275,6 @@ window.MathJax={
   }
 };
 </script>
-<script defer src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 </head>
 <body>
 <div class="bg-layer"></div>
@@ -19883,7 +21529,7 @@ main{display:grid;grid-template-columns:minmax(220px,260px) minmax(520px,920px)
 .upload-list{margin-top:6px;border:1px solid var(--line);border-radius:10px;background:#fff;max-height:88px;overflow:auto;padding:6px}
 .row{display:flex;gap:8px;margin-top:8px;flex-wrap:wrap}
 .ctx-live{margin-left:auto;display:flex;align-items:center;gap:8px;padding:8px 10px;border:1px solid #d6deea;border-radius:999px;background:#f8fbff;min-width:250px}
-.ctx-live-dot{width:8px;height:8px;border-radius:50%;background:#13b8a6;box-shadow:0 0 0 rgba(19,184,166,.45);animation:ctxPulse 1.6s ease-in-out infinite}
+.ctx-live-dot{width:8px;height:8px;border-radius:50%;background:#13b8a6;box-shadow:0 0 0 rgba(19,184,166,.45)}
 .ctx-live-bar{position:relative;display:inline-block;width:84px;height:6px;border-radius:999px;background:#e5edf8;overflow:hidden}
 .ctx-live-fill{display:block;height:100%;width:0%;background:linear-gradient(90deg,#13b8a6,#1f6feb);transition:width .24s ease,background .24s ease}
 .ctx-live.warn .ctx-live-dot{background:#e1a400}
@@ -19974,10 +21620,10 @@ APP_JS = """const S={sessions:[],activeId:null,snap:null,es:null,esId:'',skills:
 const MD_CACHE=new Map();
 const MD_CACHE_MAX=420;
 const STATIC_UI=((new URLSearchParams(location.search)).get('static_ui')==='1');
-const SNAPSHOT_DELAY_VISIBLE_MS=120;
-const SNAPSHOT_DELAY_HIDDEN_MS=1200;
-const SESSION_POLL_VISIBLE_MS=12000;
-const SESSION_POLL_HIDDEN_MS=30000;
+const SNAPSHOT_DELAY_VISIBLE_MS=300;
+const SNAPSHOT_DELAY_HIDDEN_MS=2400;
+const SESSION_POLL_VISIBLE_MS=30000;
+const SESSION_POLL_HIDDEN_MS=60000;
 const PANEL_SCROLL_ACTIVE_MS=1100;
 const CHAT_SCROLL_ACTIVE_MS=420;
 const CHAT_SCROLL_LOCK_MS=1200;
@@ -19994,10 +21640,10 @@ const DELTA_MAX_OPERATIONS=220;
 const DELTA_MAX_UPLOADS=40;
 const DELTA_WATCHDOG_INTERVAL_MS=1800;
 const DELTA_WATCHDOG_STALL_MS=9000;
-const MARKDOWN_WORKER_MIN_CHARS=2200;
+const MARKDOWN_WORKER_MIN_CHARS=800;
 const MARKDOWN_WORKER_MAX_PENDING=96;
 const MARKDOWN_WORKER_REQ_TTL_MS=45000;
-const CHAT_VIRT={heights:Object.create(null),heightVersion:0,avgHeight:140,overscanPx:900,maxCacheKeys:2800,poolByKind:Object.create(null),poolSize:0,poolMax:420};
+const CHAT_VIRT={heights:Object.create(null),heightVersion:0,avgHeight:140,overscanPx:400,maxCacheKeys:1200,poolByKind:Object.create(null),poolSize:0,poolMax:180};
 const RENDER_EVT_TYPES=new Set(['render_frame','render_bridge']);
 const RENDER_QUEUE_MAX=140;
 const RENDER_META_MIN_INTERVAL_MS=180;
@@ -20576,7 +22222,15 @@ function _mathRunTypeset(root,key=''){
   const run=(retry)=>{
     const mj=window.MathJax;
     if(!mj||typeof mj.typesetPromise!=='function'){
-      if(retry<10)setTimeout(()=>run(retry+1),180);
+      // Lazy-load MathJax on first actual math demand
+      if(!window._mjaxLoading){
+        window._mjaxLoading=true;
+        const s=document.createElement('script');
+        s.src='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
+        s.async=true;
+        document.head.appendChild(s);
+      }
+      if(retry<20)setTimeout(()=>run(retry+1),200);
       return;
     }
     if(root._mathPending)return;
@@ -24016,16 +25670,35 @@ Use this skill when tasks match this flow pattern and reusable execution is need
         return self.model_catalog()
 class AgentHTTPServer(ThreadingHTTPServer):
+    daemon_threads = True
+    block_on_close = False
     def __init__(self, addr: tuple[str, int], handler, app: AppContext):
         super().__init__(addr, handler)
         self.app = app
     def handle_error(self, request, client_address):
         _, exc, _ = sys.exc_info()
-        if isinstance(exc, (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError)):
+        if swallow_benign_socket_error(exc, "agent-http.handle_error"):
             return
         return super().handle_error(request, client_address)
+    def shutdown_request(self, request):
+        try:
+            super().shutdown_request(request)
+        except OSError as exc:
+            if swallow_benign_socket_error(exc, "agent-http.shutdown_request"):
+                return
+            raise
+    def close_request(self, request):
+        try:
+            super().close_request(request)
+        except OSError as exc:
+            if swallow_benign_socket_error(exc, "agent-http.close_request"):
+                return
+            raise
 class Handler(BaseHTTPRequestHandler):
     protocol_version = "HTTP/1.1"
     server_version = f"StandaloneWebAgent/{APP_VERSION}"
@@ -24036,8 +25709,10 @@ class Handler(BaseHTTPRequestHandler):
     def handle(self):
         try:
             super().handle()
-        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError):
-            return
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.handle"):
+                return
+            raise
     @property
     def app(self) -> AppContext:
@@ -24064,48 +25739,70 @@ class Handler(BaseHTTPRequestHandler):
     def _send_json(self, obj: object, status: int = 200):
         body = json_dumps(obj).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json; charset=utf-8")
-        self.send_header("Content-Length", str(len(body)))
-        self.send_header("Cache-Control", "no-store")
-        self.end_headers()
-        self.wfile.write(body)
+        try:
+            self.send_response(status)
+            self.send_header("Content-Type", "application/json; charset=utf-8")
+            self.send_header("Content-Length", str(len(body)))
+            self.send_header("Cache-Control", "no-store")
+            self.end_headers()
+            self.wfile.write(body)
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.send_json"):
+                return
+            raise
     def _send_text(self, text: str, content_type: str = "text/plain; charset=utf-8", status: int = 200):
         body = text.encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", content_type)
-        self.send_header("Content-Length", str(len(body)))
-        self.send_header("Cache-Control", "no-store")
-        self.send_header("Pragma", "no-cache")
-        self.send_header("Expires", "0")
-        self.end_headers()
-        self.wfile.write(body)
+        try:
+            self.send_response(status)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(body)))
+            self.send_header("Cache-Control", "no-store")
+            self.send_header("Pragma", "no-cache")
+            self.send_header("Expires", "0")
+            self.end_headers()
+            self.wfile.write(body)
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.send_text"):
+                return
+            raise
     def _send_bytes(self, data: bytes, content_type: str, filename: str):
-        self.send_response(200)
-        self.send_header("Content-Type", content_type)
-        self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
-        self.send_header("Content-Length", str(len(data)))
-        self.end_headers()
-        self.wfile.write(data)
+        try:
+            self.send_response(200)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
+            self.send_header("Content-Length", str(len(data)))
+            self.end_headers()
+            self.wfile.write(data)
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.send_bytes"):
+                return
+            raise
     def _send_inline_bytes(self, data: bytes, content_type: str, status: int = 200):
-        self.send_response(status)
-        self.send_header("Content-Type", content_type)
-        self.send_header("Content-Length", str(len(data)))
-        self.send_header("Content-Disposition", "inline")
-        self.send_header("Cache-Control", "no-store")
-        self.end_headers()
-        self.wfile.write(data)
+        try:
+            self.send_response(status)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(data)))
+            self.send_header("Content-Disposition", "inline")
+            self.send_header("Cache-Control", "no-store")
+            self.end_headers()
+            self.wfile.write(data)
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.send_inline_bytes"):
+                return
+            raise
     def _sse_write(self, payload: bytes) -> bool:
         try:
             self.wfile.write(payload)
             self.wfile.flush()
             return True
-        except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError, TimeoutError, OSError):
-            return False
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.sse_write"):
+                return False
+            raise
     def do_GET(self):
         parsed_url = urlparse(self.path)
@@ -24523,12 +26220,17 @@ class Handler(BaseHTTPRequestHandler):
         return self._send_json({"ok": True})
     def _stream_events(self, sess: SessionState):
-        self.send_response(HTTPStatus.OK)
-        self.send_header("Content-Type", "text/event-stream; charset=utf-8")
-        self.send_header("Cache-Control", "no-cache")
-        self.send_header("Connection", "keep-alive")
-        self.send_header("X-Accel-Buffering", "no")
-        self.end_headers()
+        try:
+            self.send_response(HTTPStatus.OK)
+            self.send_header("Content-Type", "text/event-stream; charset=utf-8")
+            self.send_header("Cache-Control", "no-cache")
+            self.send_header("Connection", "keep-alive")
+            self.send_header("X-Accel-Buffering", "no")
+            self.end_headers()
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "handler.stream_events.headers"):
+                return
+            raise
         sub = sess.events.subscribe()
         try:
             hello = (
@@ -24548,8 +26250,9 @@ class Handler(BaseHTTPRequestHandler):
                     chunk = f": ping {int(now_ts())}\n\n".encode("utf-8")
                 if not self._sse_write(chunk):
                     break
-        except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
-            pass
+        except Exception as exc:
+            if not swallow_benign_socket_error(exc, "handler.stream_events.loop"):
+                raise
         finally:
             sess.events.unsubscribe(sub)
@@ -24563,8 +26266,10 @@ class SkillsHandler(BaseHTTPRequestHandler):
     def handle(self):
         try:
             super().handle()
-        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, TimeoutError):
-            return
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "skills-handler.handle"):
+                return
+            raise
     @property
     def app(self) -> AppContext:
@@ -24591,23 +26296,33 @@ class SkillsHandler(BaseHTTPRequestHandler):
     def _send_json(self, obj: object, status: int = 200):
         body = json_dumps(obj).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json; charset=utf-8")
-        self.send_header("Content-Length", str(len(body)))
-        self.send_header("Cache-Control", "no-store")
-        self.end_headers()
-        self.wfile.write(body)
+        try:
+            self.send_response(status)
+            self.send_header("Content-Type", "application/json; charset=utf-8")
+            self.send_header("Content-Length", str(len(body)))
+            self.send_header("Cache-Control", "no-store")
+            self.end_headers()
+            self.wfile.write(body)
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "skills-handler.send_json"):
+                return
+            raise
     def _send_text(self, text: str, content_type: str = "text/plain; charset=utf-8", status: int = 200):
         body = text.encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", content_type)
-        self.send_header("Content-Length", str(len(body)))
-        self.send_header("Cache-Control", "no-store")
-        self.send_header("Pragma", "no-cache")
-        self.send_header("Expires", "0")
-        self.end_headers()
-        self.wfile.write(body)
+        try:
+            self.send_response(status)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(body)))
+            self.send_header("Cache-Control", "no-store")
+            self.send_header("Pragma", "no-cache")
+            self.send_header("Expires", "0")
+            self.end_headers()
+            self.wfile.write(body)
+        except Exception as exc:
+            if swallow_benign_socket_error(exc, "skills-handler.send_text"):
+                return
+            raise
     def do_GET(self):
         parsed_url = urlparse(self.path)
@@ -25219,7 +26934,15 @@ def main():
     elif int(skills_port) != int(args.port):
         try:
             skills_server = AgentHTTPServer((args.host, skills_port), SkillsHandler, app)
-            skills_thread = threading.Thread(target=skills_server.serve_forever, daemon=True)
+            def _skills_serve_loop():
+                try:
+                    skills_server.serve_forever()
+                except OSError as exc:
+                    if not swallow_benign_socket_error(exc, "skills-server.serve_forever"):
+                        raise
+            skills_thread = threading.Thread(target=_skills_serve_loop, daemon=True)
             skills_thread.start()
             setattr(app, "skills_ui_enabled", True)
         except Exception as exc:
@@ -25330,6 +27053,12 @@ def main():
         server.serve_forever()
     except KeyboardInterrupt:
         print("\n[web-agent] shutting down")
+    except OSError as exc:
+        if swallow_benign_socket_error(exc, "main.serve_forever"):
+            if BENIGN_SOCKET_DEBUG_LOG_ENABLED:
+                print(f"\n[web-agent][debug] socket closed benignly ({trim(str(exc), 180)}), shutting down")
+        else:
+            raise
     finally:
         try:
             persist_report = app.persist_all_sessions(include_running=True, lock_timeout=0.6)

clouds-coder 2026.3.7__tar.gz → 2026.3.8__tar.gz

clouds-coder 2026.3.7tar.gz → 2026.3.8tar.gz