PyPI - clouds-coder - Versions diffs - 2026.4.2__tar.gz → 2026.4.5__tar.gz - Mend

clouds-coder 2026.4.2tar.gz → 2026.4.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/Clouds_Coder.py RENAMED Viewed

@@ -25,6 +25,7 @@ import selectors
 import signal
 import shutil
 import shlex
+import ssl
 import socket
 import subprocess
 import sys
@@ -44,15 +45,48 @@ from pathlib import Path, PurePosixPath
 from urllib.error import HTTPError, URLError
 from urllib.parse import parse_qs, unquote, urlparse
 from urllib.request import Request, urlopen
+try:
+    import certifi as _certifi
+except Exception:
+    _certifi = None
 try:
     import yaml as _yaml
 except Exception:
     _yaml = None
+_URL_OPEN_ORIGINAL = urlopen
+_HTTP_SSL_CONTEXT = None
 APP_VERSION = "0.1.1"
 DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
 DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
 SCRIPT_DIR = Path(__file__).resolve().parent
+def _shared_http_ssl_context():
+    global _HTTP_SSL_CONTEXT
+    if _HTTP_SSL_CONTEXT is not None:
+        return _HTTP_SSL_CONTEXT
+    cafile = str(os.getenv("SSL_CERT_FILE", "") or "").strip()
+    if not cafile and _certifi is not None:
+        try:
+            cafile = str(_certifi.where() or "").strip()
+        except Exception:
+            cafile = ""
+    try:
+        ctx = ssl.create_default_context(cafile=cafile or None)
+    except Exception:
+        ctx = ssl.create_default_context()
+    _HTTP_SSL_CONTEXT = ctx
+    return ctx
+def urlopen(url, *args, **kwargs):
+    if "context" not in kwargs:
+        target = getattr(url, "full_url", url)
+        if str(target or "").strip().lower().startswith("https://"):
+            try:
+                kwargs["context"] = _shared_http_ssl_context()
+            except Exception:
+                pass
+    return _URL_OPEN_ORIGINAL(url, *args, **kwargs)
 def _resolve_default_agent_workdir() -> Path:
     raw = str(os.getenv("AGENT_WORKDIR", "") or "").strip()
     if raw:
@@ -158,7 +192,7 @@ REPEATED_TOOL_LOOP_THRESHOLD = 2
 BASH_READ_LOOP_THRESHOLD = 3
 HARD_BREAK_TOOL_ERROR_THRESHOLD = 20
 HARD_BREAK_RECOVERY_ROUND_THRESHOLD = 3
-FUSED_FAULT_BREAK_THRESHOLD = 3
+FUSED_FAULT_BREAK_THRESHOLD = 15
 STALL_SEVERITY_ESCALATION_THRESHOLD = 5
 STALL_SEVERITY_WEIGHT_BASH_READ_LOOP = 2
 STALL_SEVERITY_WEIGHT_REPEATED_TOOL = 3
@@ -182,6 +216,23 @@ DEFAULT_TIMEOUT_SECONDS = max(
     ),
 )
 DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
+MIN_SHELL_COMMAND_TIMEOUT_SECONDS = 10
+MAX_SHELL_COMMAND_TIMEOUT_SECONDS = 86_400
+DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS = max(
+    MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+    min(
+        MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+        int(
+            str(
+                os.getenv(
+                    "AGENT_SHELL_COMMAND_TIMEOUT",
+                    os.getenv("AGENT_BASH_TIMEOUT", os.getenv("AGENT_COMMAND_TIMEOUT", "240")),
+                )
+                or "240"
+            )
+        ),
+    ),
+)
 AUTO_CONTINUE_BUDGET_DEFAULT = 30
 AGENT_MAX_OUTPUT_TOKENS = 16384
 OLLAMA_THINKING_TOOL_BUFFER = 4096
@@ -196,7 +247,7 @@ WATCHDOG_CONTEXT_NEAR_RATIO = 0.92
 WATCHDOG_MAX_DECOMPOSE_STEPS = 12
 WATCHDOG_STEP_MAX_ATTEMPTS = 2
 EMPTY_ACTION_MIN_CONTENT_CHARS = 5
-EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 2
+EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 5
 THINKING_BUDGET_FORCE_RATIO = 0.85
 # --- Tool timeout configuration ---
 _TOOL_TIMEOUT_MAP = {
@@ -347,7 +398,13 @@ BLACKBOARD_STATUSES = (
     "COMPLETED",
     "PAUSED",
 )
-TASK_COMPLEXITY_LEVELS = ("simple", "complex")
+TASK_COMPLEXITY_LEVELS = ("simple", "moderate", "complex", "expert")
+TASK_COMPLEXITY_RANKS = {
+    "simple": 1,
+    "moderate": 2,
+    "complex": 3,
+    "expert": 4,
+}
 TASK_PROFILE_TYPES = (
     "simple_qa",
     "simple_code",
@@ -384,7 +441,7 @@ TASK_LEVEL_POLICIES: dict[int, dict] = {
         "assigned_expert": "developer",
         "round_budget": 16,
         "requires_user_confirmation": False,
-        "complexity": "simple",
+        "complexity": "moderate",
     },
     4: {
         "name": "complex_collaboration",
@@ -402,7 +459,7 @@ TASK_LEVEL_POLICIES: dict[int, dict] = {
         "assigned_expert": "explorer",
         "round_budget": 0,  # 0 means unlimited by tier budget (still guarded by global safeguards).
         "requires_user_confirmation": True,
-        "complexity": "complex",
+        "complexity": "expert",
     },
 }
 MANAGER_ROUTE_TARGETS = ("explorer", "developer", "reviewer", "finish")
@@ -469,7 +526,7 @@ TASK_PHASE_ROUTING = {
 COMPLEXITY_KEYWORDS = (
     "简单", "复杂", "难", "容易", "快速", "详细", "深入",
     "l1", "l2", "l3", "l4", "l5",
-    "simple", "complex", "easy", "hard", "difficult",
+    "simple", "moderate", "medium", "complex", "expert", "easy", "hard", "difficult",
     "thorough", "quick", "fast", "lightweight", "heavy",
 )
 USER_COMPLEXITY_SIMPLE_TOKENS = (
@@ -477,12 +534,23 @@ USER_COMPLEXITY_SIMPLE_TOKENS = (
     "low", "simple", "easy", "quick", "fast", "lightweight", "basic", "minimal",
     "l1", "l2",
 )
+USER_COMPLEXITY_MODERATE_TOKENS = (
+    "中等复杂度", "中等难度", "适中", "平衡", "标准", "普通", "常规",
+    "medium", "mid", "moderate", "balanced", "standard", "normal",
+    "l3",
+)
 USER_COMPLEXITY_COMPLEX_TOKENS = (
-    "复杂", "深入", "详细", "高复杂度", "高难度", "中等复杂度", "中高复杂度",
-    "medium", "mid", "high", "complex", "hard", "difficult", "thorough", "detailed", "deep", "heavy",
-    "l3", "l4", "l5",
+    "复杂", "深入", "详细", "高复杂度", "高难度", "中高复杂度",
+    "high", "complex", "hard", "difficult", "thorough", "detailed", "deep", "heavy",
+    "l4",
+)
+USER_COMPLEXITY_EXPERT_TOKENS = (
+    "专家级", "系統級", "系统级", "生产级", "企業級", "企业级", "高风险", "超高复杂度",
+    "expert", "advanced", "system-level", "production-ready", "enterprise", "mission-critical",
+    "l5",
 )
 PLAN_MODE_EXPLORER_MAX_ROUNDS = 8
+PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS = 3
 # Reviewer debug mode
 REVIEWER_DEBUG_MODE_MAX_ROUNDS = 6
 REVIEWER_DEBUG_TOOL_ALLOWLIST = {
@@ -492,7 +560,7 @@ REVIEWER_DEBUG_TOOL_ALLOWLIST = {
 }
 EXPLORER_STALL_THRESHOLD = 3  # consecutive same-target delegations before forced switch
 DEVELOPER_EDIT_STALL_THRESHOLD = 3  # consecutive edit_file failures on same file before forced strategy change
-PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS = 6144
+PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS = 8192
 PLAN_MODE_MAX_OPTIONS = 3
 PLAN_FILE_RELATIVE_PATH = ".clouds_coder/plan.md"
 PLAN_BUBBLE_MAX_CHARS = 12_000
@@ -2014,6 +2082,55 @@ def extract_daily_session_limit_setting(raw: object) -> int | None:
     return None
+def extract_shell_command_timeout_setting(raw: object) -> int | None:
+    """Read shell/bash command timeout from config dict.
+    Accepted keys:
+      - shell_command_timeout
+      - shell_timeout
+      - bash_timeout
+      - command_timeout
+    Sections searched: top-level, then 'startup' / 'runtime' / 'shell' / 'tools' / 'execution'.
+    Returns a clamped positive integer, or None if no setting is present.
+    """
+    if not isinstance(raw, dict):
+        return None
+    def _parse_timeout(value: object) -> int | None:
+        if value is None or isinstance(value, bool):
+            return None
+        try:
+            text = str(value).strip()
+            if not text:
+                return None
+            return normalize_timeout_seconds(
+                text,
+                minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+                maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+                fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+            )
+        except Exception:
+            return None
+    keys = (
+        "shell_command_timeout",
+        "shell_timeout",
+        "bash_timeout",
+        "command_timeout",
+    )
+    for key in keys:
+        if key in raw:
+            return _parse_timeout(raw.get(key))
+    for section_key in ("startup", "runtime", "shell", "tools", "execution"):
+        section = raw.get(section_key)
+        if not isinstance(section, dict):
+            continue
+        for key in keys:
+            if key in section:
+                return _parse_timeout(section.get(key))
+    return None
 class SessionCreationLimitExceeded(RuntimeError):
     def __init__(self, status: dict):
         self.status = dict(status or {})
@@ -2986,19 +3103,243 @@ def decompress_text_blob(blob_b64: str) -> str:
     except Exception:
         return ""
+def normalize_embedded_newlines(text: object) -> str:
+    s = str(text or "")
+    if not s:
+        return ""
+    s = s.replace("\u2028", "\n").replace("\u2029", "\n")
+    s = s.replace("\r\n", "\n").replace("\r", "\n")
+    if "\\n" in s or "\\r" in s or "\\t" in s:
+        s = s.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n").replace("\\t", "\t")
+    return s
+def _map_todo_status_token(token: str) -> str:
+    raw = str(token or "").strip().lower().replace("_", " ").replace("-", " ")
+    raw = re.sub(r"\s+", " ", raw)
+    return {
+        "pending": "pending",
+        "待处理": "pending",
+        "待處理": "pending",
+        "未着手": "pending",
+        "in progress": "in_progress",
+        "进行中": "in_progress",
+        "進行中": "in_progress",
+        "completed": "completed",
+        "已完成": "completed",
+        "完了": "completed",
+        "blocked": "pending",
+    }.get(raw, "")
+def split_todo_status_text(text: object) -> tuple[str, str]:
+    probe = normalize_embedded_newlines(text).strip()
+    if not probe:
+        return "", ""
+    status = ""
+    marker_prefix = r"(?:[-*•>]+\s*)?"
+    for _ in range(4):
+        before = probe
+        probe = re.sub(r"^\s+", "", probe)
+        matched = False
+        for row_status, pattern in (
+            (
+                "completed",
+                rf"^(?:{marker_prefix})(?:"
+                rf"\[x\]\s*"
+                rf")",
+            ),
+            (
+                "in_progress",
+                rf"^(?:{marker_prefix})(?:"
+                rf"\[>\]\s*"
+                rf")",
+            ),
+            (
+                "pending",
+                rf"^(?:{marker_prefix})(?:"
+                rf"\[\s*\]\s*"
+                rf")",
+            ),
+        ):
+            m = re.match(pattern, probe, flags=re.IGNORECASE)
+            if not m:
+                continue
+            status = row_status
+            probe = probe[m.end():].strip()
+            matched = True
+            break
+        if matched:
+            continue
+        m = re.match(
+            rf"^(?:{marker_prefix})"
+            rf"(pending|in[_\-\s]?progress|completed|blocked|"
+            rf"待处理|待處理|未着手|进行中|進行中|已完成|完了)"
+            rf"\s*[：:\-\]]\s*",
+            probe,
+            flags=re.IGNORECASE,
+        )
+        if m:
+            mapped = _map_todo_status_token(str(m.group(1) or ""))
+            if mapped:
+                status = mapped
+            probe = probe[m.end():].strip()
+            continue
+        if probe == before:
+            break
+    return status, probe.strip()
+def extract_todo_rows_from_text(
+    text: object,
+    *,
+    default_parent_step_id: str = "",
+    limit: int = 12,
+) -> list[dict]:
+    src = normalize_embedded_newlines(text)
+    if not src.strip():
+        return []
+    out: list[dict] = []
+    seen: set[tuple[str, str, str]] = set()
+    capped = max(1, min(40, int(limit or 12)))
+    parent_step_id = trim(str(default_parent_step_id or "").strip(), 20)
+    for raw_line in src.splitlines():
+        line = trim(str(raw_line or "").strip(), 600)
+        if not line:
+            continue
+        variants: list[str] = []
+        for candidate in (
+            line,
+            re.sub(r"^\s*(?:[-*•>]+\s*)+", "", line).strip(),
+            re.sub(r"^\s*\*\*([^*]+)\*\*\s*([：:])\s*", r"\1\2 ", line).strip(),
+            re.sub(r"^\s*(?:[-*•>]+\s*)*\*\*([^*]+)\*\*\s*([：:])\s*", r"\1\2 ", line).strip(),
+        ):
+            candidate = trim(str(candidate or "").strip(), 600)
+            if candidate and candidate not in variants:
+                variants.append(candidate)
+        matched = False
+        for candidate in variants:
+            status, content = split_todo_status_text(candidate)
+            if not status or not content:
+                continue
+            cleaned = normalize_work_text(content, status) or content
+            cleaned = trim(cleaned.strip(), 400)
+            if not cleaned:
+                continue
+            low = cleaned.lower()
+            if low in {
+                "todo",
+                "todos",
+                "task",
+                "tasks",
+                "subtask",
+                "subtasks",
+                "待办",
+                "待辦",
+                "子任务",
+                "子任務",
+            }:
+                continue
+            row = {"content": cleaned, "status": status}
+            if parent_step_id:
+                row["parent_step_id"] = parent_step_id
+            identity = (
+                status,
+                normalize_work_text(cleaned, status).strip().lower(),
+                parent_step_id,
+            )
+            if identity in seen:
+                matched = True
+                break
+            seen.add(identity)
+            out.append(row)
+            matched = True
+            break
+        if matched and len(out) >= capped:
+            break
+    return out
+def infer_todo_status_from_text(text: object, default: str = "pending") -> str:
+    status, content = split_todo_status_text(text)
+    if not content and not status:
+        return default
+    if status:
+        return status
+    return default
+def split_structured_todo_content(text: object, limit: int = 7) -> list[str]:
+    src = normalize_embedded_newlines(text).strip()
+    if not src:
+        return []
+    lines = [trim(str(line or "").strip(), 500) for line in src.split("\n")]
+    lines = [line for line in lines if line]
+    if len(lines) <= 1:
+        return [src]
+    major_re = re.compile(r"^(\d+)\.\s+(.+)$")
+    sub_re = re.compile(r"^(\d+)\.(\d+)\s+(.+)$")
+    bullet_re = re.compile(r"^(?:[-*•]\s+)(.+)$")
+    header_major = ""
+    m0 = major_re.match(lines[0])
+    if m0:
+        header_major = str(m0.group(1) or "")
+    picked: list[str] = []
+    for idx, line in enumerate(lines):
+        if idx == 0 and header_major:
+            continue
+        m_sub = sub_re.match(line)
+        if m_sub:
+            major = str(m_sub.group(1) or "")
+            if header_major and major != header_major:
+                if picked:
+                    break
+                continue
+            picked.append(f"{major}.{m_sub.group(2)} {trim(str(m_sub.group(3) or '').strip(), 420)}".strip())
+            continue
+        m_bullet = bullet_re.match(line)
+        if m_bullet and (header_major or picked):
+            picked.append(trim(str(m_bullet.group(1) or "").strip(), 420))
+            continue
+        if picked and re.match(r"^\d+\.\s+", line):
+            break
+    if not picked:
+        for line in lines:
+            m_sub = sub_re.match(line)
+            if m_sub:
+                picked.append(f"{m_sub.group(1)}.{m_sub.group(2)} {trim(str(m_sub.group(3) or '').strip(), 420)}".strip())
+                if len(picked) >= max(1, int(limit or 7)):
+                    break
+    if not picked:
+        return [src]
+    out: list[str] = []
+    seen: set[str] = set()
+    for line in picked:
+        key = re.sub(r"\s+", " ", str(line or "").strip()).lower()
+        if not key or key in seen:
+            continue
+        seen.add(key)
+        out.append(line)
+        if len(out) >= max(1, int(limit or 7)):
+            break
+    return out or [src]
 def normalize_work_text(text: object, status: str = "") -> str:
-    s = re.sub(r"\s+", " ", str(text or "")).strip()
+    parsed_status, parsed_content = split_todo_status_text(text)
+    s = re.sub(r"\s+", " ", parsed_content or normalize_embedded_newlines(text)).strip()
     if not s:
         return ""
-    s = re.sub(r"^\[[ x>\-]\]\s*", "", s, flags=re.IGNORECASE)
     s = re.sub(
-        r"^(pending|in[_\-\s]?progress|completed|done|blocked)\s*[·:\-\]]\s*",
+        r"^(pending|todo|in[_\-\s]?progress|doing|working|completed|done|finished|blocked|"
+        r"待处理|待處理|未着手|进行中|進行中|作業中|已完成|完成|完了)\s*[·：:\-\]]\s*",
         "",
         s,
         flags=re.IGNORECASE,
     )
-    if status:
-        status_pattern = re.escape(status).replace("_", r"[_\-\s]?")
+    status_key = _map_todo_status_token(status) or _map_todo_status_token(parsed_status) or str(status or "").strip().lower()
+    if status_key:
+        status_pattern = re.escape(status_key).replace("_", r"[_\-\s]?")
         s = re.sub(
             rf"\s*[—-]\s*{status_pattern}\s*$",
             "",
@@ -3429,6 +3770,12 @@ def infer_user_complexity_value(text: str) -> str:
     low = strip_thinking_content(str(text or "")).strip().lower()
     if not low:
         return ""
+    for token in USER_COMPLEXITY_EXPERT_TOKENS:
+        if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
+            return "expert"
+    for token in USER_COMPLEXITY_MODERATE_TOKENS:
+        if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
+            return "moderate"
     for token in USER_COMPLEXITY_SIMPLE_TOKENS:
         if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
             return "simple"
@@ -3437,6 +3784,53 @@ def infer_user_complexity_value(text: str) -> str:
             return "complex"
     return ""
+def normalize_task_complexity(raw: object, default: str = "simple") -> str:
+    value = str(raw or "").strip().lower()
+    aliases = {
+        "simple": "simple",
+        "low": "simple",
+        "basic": "simple",
+        "minimal": "simple",
+        "moderate": "moderate",
+        "medium": "moderate",
+        "mid": "moderate",
+        "balanced": "moderate",
+        "standard": "moderate",
+        "complex": "complex",
+        "high": "complex",
+        "hard": "complex",
+        "difficult": "complex",
+        "expert": "expert",
+        "advanced": "expert",
+        "system": "expert",
+        "system_level": "expert",
+        "production": "expert",
+    }
+    normalized = aliases.get(value, value)
+    if normalized in TASK_COMPLEXITY_LEVELS:
+        return normalized
+    fallback = str(default or "").strip().lower()
+    if not fallback:
+        return ""
+    return fallback if fallback in TASK_COMPLEXITY_LEVELS else "simple"
+def task_complexity_rank(raw: object, default: str = "simple") -> int:
+    return int(TASK_COMPLEXITY_RANKS.get(normalize_task_complexity(raw, default=default), 1))
+def task_complexity_at_least(raw: object, threshold: str) -> bool:
+    return task_complexity_rank(raw) >= task_complexity_rank(threshold)
+def max_task_complexity(*values: object, default: str = "simple") -> str:
+    best = normalize_task_complexity(default, default=default)
+    best_rank = task_complexity_rank(best, default=default)
+    for value in values:
+        cur = normalize_task_complexity(value, default=default)
+        cur_rank = task_complexity_rank(cur, default=default)
+        if cur_rank > best_rank:
+            best = cur
+            best_rank = cur_rank
+    return best
 def normalize_openai_compat_provider_name(raw: str) -> str:
     value = str(raw or "").strip().lower().replace("-", "_")
     aliases = {
@@ -5236,6 +5630,31 @@ class TodoManager:
     def update(self, items: list[dict]) -> str:
         if not isinstance(items, list):
             raise ValueError("items must be array")
+        expanded_items: list[dict] = []
+        for item in items:
+            if isinstance(item, str):
+                raw = {"content": item, "status": "pending"}
+            elif isinstance(item, dict):
+                raw = dict(item)
+            else:
+                try:
+                    raw = {"content": str(item).strip(), "status": "pending"}
+                except Exception:
+                    continue
+            raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
+            split_rows = split_structured_todo_content(raw_content, limit=7)
+            if len(split_rows) <= 1:
+                expanded_items.append(raw)
+                continue
+            base_status = str(raw.get("status", raw.get("state", "pending")) or "pending").strip().lower()
+            for split_idx, split_content in enumerate(split_rows):
+                split_raw = dict(raw)
+                split_raw["content"] = split_content
+                split_raw["status"] = infer_todo_status_from_text(
+                    split_content,
+                    default=(base_status if split_idx == 0 else "pending"),
+                )
+                expanded_items.append(split_raw)
         validated = []
         # Plan-step items (bb:proj: key) keep a single in_progress slot.
         # Worker/non-plan items allow one in_progress per owner so sync-mode agents
@@ -5251,23 +5670,21 @@ class TodoManager:
             "finish": "completed",
             "finished": "completed",
         }
-        for idx, item in enumerate(items):
-            if isinstance(item, str):
-                raw = {"content": item, "status": "pending"}
-            elif isinstance(item, dict):
-                raw = item
-            else:
-                raise ValueError(f"item {idx}: invalid type")
+        for idx, item in enumerate(expanded_items):
+            raw = item if isinstance(item, dict) else {"content": str(item or "").strip(), "status": "pending"}
             raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
+            inferred_status = infer_todo_status_from_text(raw_content, default="")
             content = normalize_work_text(raw_content)
             if not content:
                 content = raw_content
             if not content:
-                raise ValueError(f"item {idx}: content required")
+                continue  # Skip empty items instead of raising
             raw_status = str(raw.get("status", raw.get("state", "pending"))).strip().lower()
             status = status_alias.get(raw_status, raw_status or "pending")
+            if inferred_status and status in {"", "pending", "todo"}:
+                status = inferred_status
             if status not in {"pending", "in_progress", "completed"}:
-                status = "pending"
+                status = inferred_status or "pending"
             content = normalize_work_text(content, status) or content
             active_form = str(
                 raw.get(
@@ -7051,9 +7468,11 @@ Use this skill when:
 6. Report rewritten count, copied files, and unresolved URLs.
 ## Rules
+- Treat `./js_lib` and `/js_lib/...` as workspace lookup locations only, not final browser-facing URLs.
 - Keep `./js` per HTML location (do not hardcode global absolute paths).
 - Keep file names deterministic and safe (`[A-Za-z0-9._-]`).
 - Preserve existing relative local script paths if already offline-ready.
+- Final HTML must not point to `/js_lib/...`, `/assets/js_lib/...`, or other virtual asset aliases; copy first, then use plain relative paths.
 ## Output Contract
 Return:
@@ -12420,12 +12839,12 @@ TOOLS = [
     ),
     tool_def("write_file", "Write file content.", {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
     tool_def("edit_file", "Edit a file by replacing first match.", {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, ["path", "old_text", "new_text"]),
-    tool_def("TodoWrite", "Update todo list.", {"items": {"type": "array", "items": {"type": "object"}}}, ["items"]),
+    tool_def("TodoWrite", "Update todo list. Preferred format: objects with content/status/owner/parent_step_id. String fallback should use only '[ ] task', '[>] task', or '[x] task'.", {"items": {"type": "array", "items": {}}}, ["items"]),
     tool_def(
         "TodoWriteRescue",
-        "Fallback todo writer when TodoWrite keeps failing/repeating. Accepts simple string items and auto-normalizes schema.",
+        "Fallback todo writer. Preferred format: objects with content/status/owner/parent_step_id. String fallback should use only '[ ] task', '[>] task', or '[x] task'.",
         {
-            "items": {"type": "array", "items": {"type": "string"}},
+            "items": {"type": "array", "items": {}},
             "in_progress_index": {"type": "integer"},
         },
         ["items"],
@@ -12694,6 +13113,7 @@ class SessionState:
         context_limit_locked: bool = False,
         max_rounds: int = MAX_AGENT_ROUNDS,
         max_run_seconds: int = MAX_RUN_SECONDS,
+        shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
         auto_model_switch: bool = False,
         arbiter_enabled: bool = True,
         arbiter_model: str = "",
@@ -12822,6 +13242,7 @@ class SessionState:
         self.runtime_complexity_floor = ""
         self.runtime_task_level_floor = 0
         self.runtime_task_level_ceiling = 0  # 0 = no ceiling; set from plan risk on approval
+        self._todowrite_step_counter: dict[str, int] = {}  # Fix 5: track consecutive TodoWrite per step for loop detection
         self.runtime_scale_preference = "balanced"
         self.runtime_direct_objective = ""
         self.runtime_reclassify_goal = ""
@@ -12855,6 +13276,8 @@ class SessionState:
         self._cached_llm_complexity = ""
         self._cached_complexity_dimensions: dict = {}  # scope/steps/skill/output dimensions
         self._pending_media_inputs: list[dict] = []
+        self._pending_runtime_updates: list[dict] = []
+        self._deferred_runtime_sync_requested = False
         self.tool_retry_counts: dict[str, int] = {}
         self.last_auto_title_ts = 0.0
         self.live_thinking_text = ""
@@ -12903,6 +13326,12 @@ class SessionState:
             maximum=MAX_RUN_TIMEOUT_SECONDS,
             fallback=MAX_RUN_SECONDS,
         )
+        self.shell_command_timeout_seconds = normalize_timeout_seconds(
+            shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
         self.truncation_count = 0
         self.last_truncation_ts = 0.0
         self.truncation_rescue_task_ids: list[int] = []
@@ -14345,9 +14774,9 @@ class SessionState:
         )
         if task_type in TASK_PROFILE_TYPES:
             self.runtime_task_type = task_type
-        complexity = trim(
-            str(profile.get("complexity", judgement.get("complexity", self.runtime_task_complexity or "")) or "").strip().lower(),
-            20,
+        complexity = normalize_task_complexity(
+            profile.get("complexity", judgement.get("complexity", self.runtime_task_complexity or "")),
+            default="simple",
         )
         if complexity in TASK_COMPLEXITY_LEVELS:
             self.runtime_task_complexity = complexity
@@ -14817,12 +15246,15 @@ class SessionState:
     def _current_plan_step_text(self, board: dict | None = None) -> str:
         row = self._current_plan_step_row(board)
-        return trim(str((row or {}).get("content", "") or "").strip(), 400)
+        content = normalize_embedded_newlines((row or {}).get("content", "") or "").strip()
+        if "\n" in content:
+            content = content.split("\n", 1)[0].strip()
+        return trim(content, 400)
     def _current_plan_step_full_text(self, board: dict | None = None, max_len: int = 1200) -> str:
         row = self._current_plan_step_row(board)
         return trim(
-            str((row or {}).get("full_content", "") or (row or {}).get("content", "") or "").strip(),
+            normalize_embedded_newlines((row or {}).get("full_content", "") or (row or {}).get("content", "") or "").strip(),
             max_len,
         )
@@ -15000,7 +15432,7 @@ class SessionState:
                 pass
         t = threading.Thread(target=_llm_match, daemon=True)
         t.start()
-        t.join(timeout=60.0)
+        t.join(timeout=5.0)
         if llm_result:
             matched_names = llm_result
             self._emit("status", {"summary": f"skill discovery (LLM task analysis): {matched_names} ({trigger})"})
@@ -15034,7 +15466,7 @@ class SessionState:
         # --- Path 3: Deferred LLM pickup if still running ---
         if not matched_names and t.is_alive():
             def _deferred_llm_pickup():
-                t.join(timeout=60.0)
+                t.join(timeout=8.0)
                 if llm_result and not self._loaded_skill_rows():
                     for name_str in llm_result[:3]:
                         try:
@@ -15431,7 +15863,6 @@ class SessionState:
             "ENGINEERING EXECUTION DISCIPLINE: "
             "For coding, bug-fix, architecture, integration, and testing work, proactively use the skill system when a matching skill exists. "
             "Do not wait for failure before calling list_skills/load_skill for debugging, API, frontend, parser, or recovery workflows. "
-            "Already-loaded skills appear as <loaded-skill> messages — use them directly without reloading. "
             "Use a root-cause-first loop: inspect the exact error or failing behavior, read the implicated file or path, form one concrete hypothesis, apply one bounded fix, then run at least one fix-and-verify cycle before declaring success. "
             "If read_file or bash reports a missing path, empty folder, or mismatched filename, stop repeating the same lookup. "
             "Reconcile the path against uploads, recent file paths, file explorer entries, and close workspace matches; then either open the closest candidate or create the intended target. "
@@ -15485,6 +15916,10 @@ class SessionState:
             f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
             f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS require) or pptxgen.bundle.js (browser). "
             f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
+            "IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
+            "If an HTML file uses any asset from js_lib, copy that file into a task-local relative asset folder "
+            "(for example './js/' or './assets/vendor/') next to the deliverable, then reference it with a plain relative path in HTML. "
+            "Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases inside final exported HTML. "
             f"Task level={runtime_level}, mode={runtime_mode}, "
             f"budget={'unlimited' if budget <= 0 else budget}. "
             f"Context limit ~{self.context_token_upper_bound} tokens. "
@@ -19868,11 +20303,17 @@ body{padding:18px}
             with self.lock:
                 if self.running:
                     config_delayed = True
-                    self._pending_media_inputs.append({
-                        "type": "deferred_config",
-                        "config": cfg_obj,
-                        "source": workspace_rel,
-                    })
+            if config_delayed:
+                self._queue_deferred_runtime_update(
+                    "llm_config",
+                    {"config": cfg_obj, "source": workspace_rel},
+                )
+                loaded_config = self.model_catalog()
+                if isinstance(loaded_config, dict):
+                    loaded_config["queued"] = True
+                    loaded_config["note"] = (
+                        "session is running; llm config queued and will apply after the current run finishes"
+                    )
             if not config_delayed:
                 loaded_config = self.load_llm_config(cfg_obj, source=workspace_rel)
             self._emit("config_applied", {
@@ -21038,7 +21479,7 @@ body{padding:18px}
         return any(x in t for x in markers)
     def _llm_classify_task_complexity(self, goal_text: str) -> str:
-        """LLM semantic pre-screening: classify task as simple/complex via 4-dimension analysis. 5s timeout."""
+        """LLM semantic pre-screening: classify task into 4 complexity bands via 4-dimension analysis. 5s timeout."""
         goal = trim(str(goal_text or ""), 400)
         if not goal or len(goal) < 6:
             return "simple"
@@ -21055,8 +21496,7 @@ body{padding:18px}
                         f"SKILL: does it need specialized tools, skills, research, or APIs?\n"
                         f"OUTPUT: what is expected (1=text answer, 2=single file, 3=system/multi-file)?\n\n"
                         f"Output exactly one line:\n"
-                        f"SCOPE:N STEPS:N SKILL:N OUTPUT:N VERDICT:SIMPLE|COMPLEX\n"
-                        f"(COMPLEX if any dimension >= 2)"
+                        f"SCOPE:N STEPS:N SKILL:N OUTPUT:N VERDICT:SIMPLE|MODERATE|COMPLEX|EXPERT"
                     )}],
                     system="/no_think\nAnalyze task dimensions. One line output only.",
                     max_tokens=40,
@@ -21071,8 +21511,26 @@ body{padding:18px}
                         dims[dim.lower()] = int(m.group(1))
                 if dims:
                     self._cached_complexity_dimensions = dims
-                if "COMPLEX" in answer:
+                    vals = [int(v) for v in dims.values()]
+                    max_dim = max(vals) if vals else 1
+                    count_ge2 = sum(1 for v in vals if int(v) >= 2)
+                    count_ge3 = sum(1 for v in vals if int(v) >= 3)
+                    if max_dim <= 1:
+                        result_box[0] = "simple"
+                    elif max_dim == 2:
+                        result_box[0] = "moderate"
+                    elif count_ge3 >= 2 or count_ge2 >= 4:
+                        result_box[0] = "expert"
+                    else:
+                        result_box[0] = "complex"
+                if "VERDICT:EXPERT" in answer:
+                    result_box[0] = "expert"
+                elif "VERDICT:COMPLEX" in answer:
                     result_box[0] = "complex"
+                elif "VERDICT:MODERATE" in answer:
+                    result_box[0] = "moderate"
+                elif "VERDICT:SIMPLE" in answer:
+                    result_box[0] = "simple"
             except Exception:
                 pass
         t = threading.Thread(target=_classify, daemon=True)
@@ -21085,9 +21543,9 @@ body{padding:18px}
         low = clean.lower()
         explicit_complexity = infer_user_complexity_value(clean)
         # Use cached LLM complexity result (set by _agent_worker entry point)
-        llm_complexity = str(getattr(self, '_cached_llm_complexity', '') or '')
-        nontrivial = self._looks_nontrivial_request(clean) or llm_complexity == "complex"
-        direct_question = self._looks_like_direct_question_request(clean) and llm_complexity != "complex"
+        llm_complexity = normalize_task_complexity(str(getattr(self, '_cached_llm_complexity', '') or ''), default="simple")
+        nontrivial = self._looks_nontrivial_request(clean) or task_complexity_at_least(llm_complexity, "moderate")
+        direct_question = self._looks_like_direct_question_request(clean) and (not task_complexity_at_least(llm_complexity, "moderate"))
         code_markers = [
             # 代码/编程
             "代码", "寫代碼", "写代码", "脚本", "模块", "函数", "class", "bug",
@@ -21124,6 +21582,7 @@ body{padding:18px}
         has_code_intent = any(x in low for x in code_markers)
         has_research_intent = any(x in low for x in research_markers)
         length = len(clean)
+        derived_complexity = max_task_complexity(explicit_complexity, llm_complexity, default="simple")
         if direct_question and (not nontrivial) and (not has_code_intent) and length <= 220:
             return {
                 "task_type": "simple_qa",
@@ -21154,7 +21613,11 @@ body{padding:18px}
         if has_research_intent and (not has_code_intent):
             return {
                 "task_type": "research",
-                "complexity": explicit_complexity or ("complex" if (nontrivial or length >= 280) else "simple"),
+                "complexity": explicit_complexity or max_task_complexity(
+                    derived_complexity,
+                    ("complex" if length >= 480 else "moderate" if (nontrivial or length >= 280) else "simple"),
+                    default="simple",
+                ),
                 "direct_objective": "Collect evidence first, then synthesize a concise actionable answer.",
                 "recommended_agents": ["explorer", "developer", "reviewer"],
                 "round_budget": 10 if (nontrivial or length >= 280) else 6,
@@ -21165,7 +21628,15 @@ body{padding:18px}
         if nontrivial or has_code_intent or length >= 280:
             return {
                 "task_type": "engineering",
-                "complexity": explicit_complexity or "complex",
+                "complexity": explicit_complexity or max_task_complexity(
+                    derived_complexity,
+                    (
+                        "expert"
+                        if ((has_code_intent and has_research_intent) or length >= 900)
+                        else "complex"
+                    ),
+                    default="moderate",
+                ),
                 "direct_objective": (
                     "Use blackboard collaboration to implement, validate, and converge with concrete outputs."
                 ),
@@ -21177,7 +21648,7 @@ body{padding:18px}
             }
         return {
             "task_type": "general",
-            "complexity": explicit_complexity or "simple",
+            "complexity": explicit_complexity or derived_complexity or "simple",
             "direct_objective": (
                 "Provide the most direct useful response with minimal orchestration, "
                 "anchored to the current project context and user goal."
@@ -21439,6 +21910,66 @@ body{padding:18px}
         model = str(profile.get("model", self.ollama.model) or self.ollama.model).strip()
         return f"{self.active_profile_id}::{model}"
+    def _queue_deferred_runtime_update(self, kind: str, payload: dict) -> int:
+        row = {
+            "kind": str(kind or "").strip().lower(),
+            "payload": dict(payload or {}),
+            "queued_at": float(now_ts()),
+        }
+        if not row["kind"]:
+            raise ValueError("deferred runtime update kind required")
+        with self.lock:
+            self._pending_runtime_updates.append(row)
+            self._pending_runtime_updates = self._pending_runtime_updates[-16:]
+            queued = len(self._pending_runtime_updates)
+            self.updated_at = now_ts()
+            self._persist()
+        return queued
+    def _apply_deferred_runtime_updates(self) -> list[str]:
+        with self.lock:
+            if self.running or not self._pending_runtime_updates:
+                return []
+            queued = list(self._pending_runtime_updates)
+            self._pending_runtime_updates = []
+            self.updated_at = now_ts()
+            self._persist()
+        applied_notes: list[str] = []
+        sync_needed = False
+        for item in queued:
+            kind = str(item.get("kind", "") or "").strip().lower()
+            payload = item.get("payload", {}) if isinstance(item.get("payload"), dict) else {}
+            try:
+                if kind == "llm_config":
+                    source = str(payload.get("source", "") or "deferred-config").strip()
+                    config = payload.get("config", {})
+                    if isinstance(config, dict) and config:
+                        self.load_llm_config(config, source=source)
+                        applied_notes.append(f"deferred llm config applied: {trim(source, 120)}")
+                        sync_needed = True
+                elif kind == "model_selection":
+                    selection = str(payload.get("selection", "") or "").strip()
+                    model_override = payload.get("model_override")
+                    self.set_runtime_selection(
+                        selection,
+                        model_override if isinstance(model_override, str) else None,
+                    )
+                    applied_notes.append(f"deferred model switch applied: {trim(selection, 120)}")
+                    sync_needed = True
+            except Exception as exc:
+                self._emit(
+                    "status",
+                    {
+                        "summary": (
+                            f"deferred runtime update failed ({kind or 'unknown'}): "
+                            f"{trim(str(exc), 180)}"
+                        )
+                    },
+                )
+        if sync_needed:
+            self._deferred_runtime_sync_requested = True
+        return applied_notes
     def _global_wait_timeout_seconds(self) -> int:
         raw = (
             self.max_run_seconds
@@ -21997,10 +22528,12 @@ body{padding:18px}
                     _stop_process(proc)
                     meta["error"] = "Error: interrupted by user"
                     meta["exit_code"] = -130
+                    break
                 elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
                     _stop_process(proc)
                     meta["error"] = f"Error: timeout ({timeout}s)"
                     meta["exit_code"] = -1
+                    break
                 try:
                     label, chunk = io_queue.get(timeout=0.12)
                     if chunk is None:
@@ -22085,6 +22618,7 @@ body{padding:18px}
                 if create_group > 0:
                     popen_kwargs["creationflags"] = create_group
             proc = subprocess.Popen(effective_command, **popen_kwargs)
+            self._running_bash_proc = proc
             if os.name == "nt":
                 # Windows: read PIPE output via blocking reader threads + queue.
                 _collect_with_reader_threads(proc)
@@ -22110,10 +22644,12 @@ body{padding:18px}
                                 _stop_process(proc)
                                 meta["error"] = "Error: interrupted by user"
                                 meta["exit_code"] = -130
+                                break
                             elif timeout > 0 and elapsed >= timeout:
                                 _stop_process(proc)
                                 meta["error"] = f"Error: timeout ({timeout}s)"
                                 meta["exit_code"] = -1
+                                break
                             events = sel.select(timeout=0.12)
                             for key, _ in events:
                                 stream = key.fileobj
@@ -22171,6 +22707,8 @@ body{padding:18px}
                 meta["error"] = f"Error: {exc}"
                 meta["output"] = meta["error"]
                 meta["exit_code"] = -1
+        finally:
+            self._running_bash_proc = None
         meta["duration_ms"] = int((time.time() - start) * 1000)
         after = self._git_status_map(cwd)
         meta["changed_files"] = self._status_delta(before, after) if before or after else []
@@ -22181,11 +22719,19 @@ body{padding:18px}
                     str(meta.get("output") or meta.get("error") or "(no output)"),
                     cwd=cwd,
                 )
-            )
+        )
         return meta
+    def _shell_command_timeout(self) -> int:
+        return normalize_timeout_seconds(
+            getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
     def _run_bash(self, command: str) -> str:
-        return self._run_shell_meta(command, self.files_root, 120)["output"]
+        return self._run_shell_meta(command, self.files_root, self._shell_command_timeout())["output"]
     def _fuzzy_resolve_path(self, fp: Path) -> Path:
         """If fp doesn't exist, try stripping spaces from the filename to find a close match.
@@ -22883,10 +23429,10 @@ body{padding:18px}
             "3) scaffold semantic HTML; "
             "4) apply CSS tokens + responsive layout; "
             "5) wire JS state/data interactions; "
-            "6) localize external JS dependencies to ./js from ./js_lib; "
+            "6) localize external JS dependencies to a task-local relative folder such as ./js from ./js_lib, and rewrite final HTML to plain relative paths; "
             "7) run QA loop for desktop/mobile/a11y/performance and iterate. "
             f"Offline JS libs available now: {libs_hint}. "
-            "Final exported HTML should avoid unresolved CDN-only script src."
+            "Final exported HTML should avoid unresolved CDN-only script src and must not keep '/js_lib/...' or '/assets/js_lib/...' virtual URLs."
         )
     def _contains_any_keyword(self, text: str, keywords: tuple[str, ...]) -> bool:
@@ -23050,9 +23596,10 @@ body{padding:18px}
         ) or str(base.get("task_type", "general"))
         if task_type not in TASK_PROFILE_TYPES:
             task_type = str(base.get("task_type", "general"))
-        complexity = str(src.get("complexity", base.get("complexity", "simple")) or "").strip().lower()
-        if complexity not in TASK_COMPLEXITY_LEVELS:
-            complexity = str(base.get("complexity", "simple"))
+        complexity = normalize_task_complexity(
+            src.get("complexity", base.get("complexity", "simple")),
+            default=str(base.get("complexity", "simple") or "simple"),
+        )
         src_direct_objective = trim(str(src.get("direct_objective", "") or "").strip(), 800)
         legacy_objectives = {
             "Provide the most direct useful response with minimal orchestration.",
@@ -23089,9 +23636,9 @@ body{padding:18px}
             if raw_level not in TASK_LEVEL_CHOICES:
                 if task_type == "simple_qa":
                     raw_level = 1 if len(str(goal or "")) <= 180 else 2
-                elif task_type in {"simple_code", "research"} and complexity == "simple":
+                elif task_type in {"simple_code", "research"} and task_complexity_rank(complexity) <= task_complexity_rank("moderate"):
                     raw_level = 3
-                elif complexity == "complex":
+                elif task_complexity_at_least(complexity, "complex"):
                     raw_level = 4
                 else:
                     raw_level = 2
@@ -23180,7 +23727,7 @@ body{padding:18px}
         goal = str(bb.get("original_goal", "") or "")
         current = bb.get("task_profile", {})
         profile = self._normalize_task_profile(goal, {} if force else current)
-        if profile.get("complexity") == "simple":
+        if task_complexity_rank(profile.get("complexity", "simple")) < task_complexity_rank("complex"):
             logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
             tail = "\n".join(
                 str((row or {}).get("content", "") or "")
@@ -23284,10 +23831,16 @@ body{padding:18px}
         # Project todo gate: coding tasks must pass compile + test
         profile = self._ensure_blackboard_task_profile(bb)
         task_type = str(profile.get("task_type", "general") or "general")
+        exec_mode = normalize_execution_mode(
+            profile.get("execution_mode", self._effective_execution_mode()),
+            default=self._effective_execution_mode(),
+        )
         if task_type in ("simple_code", "engineering"):
             for todo in bb.get("project_todos", []):
                 if todo.get("category") in ("compile_test", "min_test") and todo.get("status") != "completed":
                     return False, f"project-todo-incomplete:{todo.get('category', '')}"
+        if exec_mode == EXECUTION_MODE_SYNC and not self._manager_feedback_passed_from_blackboard(bb):
+            return False, "sync-review-missing"
         return True, "ok"
     def _invalidate_stale_approval_if_needed(
@@ -23505,6 +24058,10 @@ body{padding:18px}
     def _watchdog_state_fingerprint(self, board: dict | None = None) -> str:
         bb = board if isinstance(board, dict) else self._ensure_blackboard()
         profile = self._ensure_blackboard_task_profile(bb)
+        step_snapshot = self._active_plan_progress_snapshot(bb)
+        last_reply = bb.get("last_worker_reply", {}) if isinstance(bb.get("last_worker_reply"), dict) else {}
+        last_reply_role = self._sanitize_agent_role(last_reply.get("role", ""))
+        last_reply_text = trim(str(last_reply.get("text", "") or "").strip(), 240)
         payload = {
             "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
             "goal": trim(str(bb.get("original_goal", "") or "").strip(), 400),
@@ -23517,6 +24074,16 @@ body{padding:18px}
             "approved": bool((bb.get("approval", {}) or {}).get("approved", False)),
             "task_type": str(profile.get("task_type", "general") or "general"),
             "complexity": str(profile.get("complexity", "simple") or "simple"),
+            "plan_step_id": str(step_snapshot.get("step_id", "") or ""),
+            "plan_step_text": trim(str(step_snapshot.get("step_text", "") or "").strip(), 180),
+            "worker_todo_count": int(step_snapshot.get("worker_todo_count", 0) or 0),
+            "worker_todo_completed": int(step_snapshot.get("completed_count", 0) or 0),
+            "worker_todo_in_progress": int(step_snapshot.get("in_progress_count", 0) or 0),
+            "worker_todo_pending": int(step_snapshot.get("pending_count", 0) or 0),
+            "current_subtask": trim(str(step_snapshot.get("current_subtask", "") or "").strip(), 180),
+            "next_pending_subtask": trim(str(step_snapshot.get("next_pending_subtask", "") or "").strip(), 180),
+            "last_worker_reply_role": last_reply_role,
+            "last_worker_reply_text": last_reply_text,
         }
         raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
         return hashlib.sha1(raw.encode("utf-8")).hexdigest()
@@ -24161,6 +24728,7 @@ body{padding:18px}
                 "instruction": "",
                 "reason": "",
                 "source": "",
+                "progress_fp": "",
                 "is_mandatory": False,
                 "ts": 0.0,
             },
@@ -24216,6 +24784,7 @@ body{padding:18px}
                 "instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
                 "reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
                 "source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
+                "progress_fp": trim(str(raw_delegate.get("progress_fp", "") or "").strip(), 80),
                 "is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
                 "ts": float(raw_delegate.get("ts", 0.0) or 0.0),
             }
@@ -24353,8 +24922,8 @@ body{padding:18px}
             for pt in bb_src_todos[:40]:
                 if not isinstance(pt, dict):
                     continue
-                raw_content = trim(str(pt.get("content", "") or ""), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
-                raw_full = trim(str(pt.get("full_content", "") or ""), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
+                raw_content = trim(normalize_embedded_newlines(pt.get("content", "")), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
+                raw_full = trim(normalize_embedded_newlines(pt.get("full_content", "")), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
                 # Migration: if full_content is empty but content has sub-steps, auto-split
                 if not raw_full and raw_content and pt.get("category") == "plan_step":
                     normalized = _mid_re_norm.sub(r"\n\1", raw_content)
@@ -24369,6 +24938,7 @@ body{padding:18px}
                     "category": trim(str(pt.get("category", "") or ""), 40),
                     "plan_step_index": int(pt.get("plan_step_index", -1)) if pt.get("plan_step_index") is not None else -1,
                     "created_at": float(pt.get("created_at", 0.0) or 0.0),
+                    "activated_at": float(pt.get("activated_at", 0.0) or 0.0) if pt.get("activated_at") else None,
                     "completed_at": float(pt.get("completed_at", 0.0) or 0.0) if pt.get("completed_at") else None,
                     "completed_by": trim(str(pt.get("completed_by", "") or ""), 40),
                     "evidence": trim(str(pt.get("evidence", "") or ""), 200),
@@ -24766,7 +25336,8 @@ body{padding:18px}
         if not isinstance(fl, dict):
             return
         delegations = fl.get("repeated_delegations", [])
-        fp = hashlib.sha1(str(instruction or "").encode("utf-8")).hexdigest()[:12]
+        progress_fp = self._watchdog_state_fingerprint(bb)
+        fp = hashlib.sha1((str(instruction or "") + "|" + progress_fp).encode("utf-8")).hexdigest()[:12]
         for entry in delegations:
             if entry.get("instruction_hash") == fp and entry.get("target") == target:
                 entry["count"] = int(entry.get("count", 1) or 1) + 1
@@ -24779,6 +25350,7 @@ body{padding:18px}
             "target": trim(str(target or ""), 40),
             "instruction_hash": fp,
             "instruction_preview": trim(str(instruction or ""), 200),
+            "progress_fp": progress_fp,
             "count": 1,
             "first_round": int(getattr(self, "agent_round_index", 0) or 0),
             "last_round": int(getattr(self, "agent_round_index", 0) or 0),
@@ -25363,6 +25935,195 @@ body{padding:18px}
             return observed_signal or read_back or wrote_files
         return wrote_files or read_back or knowledge_signal or observed_signal
+    def _plan_step_activation_ts(self, plan_step: dict) -> float:
+        if not isinstance(plan_step, dict):
+            return 0.0
+        try:
+            activated = float(plan_step.get("activated_at", 0.0) or 0.0)
+        except Exception:
+            activated = 0.0
+        if activated > 0:
+            return activated
+        try:
+            return float(plan_step.get("created_at", 0.0) or 0.0)
+        except Exception:
+            return 0.0
+    def _plan_step_blackboard_signals(self, plan_step: dict, board: dict | None = None) -> dict:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        step_id = trim(str((plan_step or {}).get("id", "") or ""), 20)
+        since_ts = self._plan_step_activation_ts(plan_step)
+        def _rows_since(rows: object) -> list[dict]:
+            out: list[dict] = []
+            if not isinstance(rows, list):
+                return out
+            for row in rows:
+                if not isinstance(row, dict):
+                    continue
+                txt = trim(str(row.get("content", "") or "").strip(), 1200)
+                if not txt:
+                    continue
+                try:
+                    ts = float(row.get("ts", 0.0) or 0.0)
+                except Exception:
+                    ts = 0.0
+                if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
+                    continue
+                out.append({"ts": ts, "content": txt, "actor": trim(str(row.get("actor", "") or ""), 40)})
+            return out
+        def _recent_excerpt(rows: list[dict], max_chars: int = 120) -> str:
+            if not rows:
+                return ""
+            return trim(str(rows[-1].get("content", "") or "").replace("\r\n", "\n"), max_chars)
+        negative_hints = ("error:", "failed", "failure", "traceback", "fatal error", "assertionerror", "exception")
+        compile_hints = ("compiled successfully", "build successful", "build succeeded", "syntax ok", "lint passed", "no issues found", "0 errors", "编译成功")
+        test_hints = ("test passed", "tests passed", "all tests passed", "0 failed", "100%", "ok", "success", "测试通过")
+        step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
+        step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
+        filtered_entries: list[dict] = []
+        for entry in step_entries:
+            if not isinstance(entry, dict):
+                continue
+            try:
+                ts = float(entry.get("ts", 0.0) or 0.0)
+            except Exception:
+                ts = 0.0
+            if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
+                continue
+            filtered_entries.append(entry)
+        step_entries = filtered_entries
+        artifact_rows: list[dict] = []
+        raw_artifacts = bb.get("code_artifacts", {}) if isinstance(bb.get("code_artifacts"), dict) else {}
+        for path, meta in raw_artifacts.items():
+            if not isinstance(meta, dict):
+                continue
+            try:
+                ts = float(meta.get("updated_at", 0.0) or 0.0)
+            except Exception:
+                ts = 0.0
+            if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
+                continue
+            artifact_rows.append({
+                "path": trim(str(path or "").strip(), 240),
+                "summary": trim(str(meta.get("summary", "") or "").strip(), 200),
+                "updated_at": ts,
+            })
+        research_rows = _rows_since(bb.get("research_notes", []))
+        exec_rows = _rows_since(bb.get("execution_logs", []))
+        review_rows = _rows_since(bb.get("review_feedback", []))
+        file_ops = {
+            trim(str(entry.get("op", "") or "").strip(), 40)
+            for entry in step_entries
+            if isinstance(entry, dict)
+        }
+        has_write = any(op in {"write_file", "edit_file"} for op in file_ops) or bool(artifact_rows)
+        has_read = "read_file" in file_ops
+        def _has_positive(rows: list[dict], hints: tuple[str, ...]) -> bool:
+            for row in reversed(rows[-6:]):
+                low = str(row.get("content", "") or "").lower()
+                if not low or any(neg in low for neg in negative_hints):
+                    continue
+                if any(tok in low for tok in hints):
+                    return True
+            return False
+        def _has_observed(rows: list[dict]) -> bool:
+            for row in reversed(rows[-6:]):
+                low = str(row.get("content", "") or "").lower()
+                if low and not any(neg in low for neg in negative_hints):
+                    return True
+            return False
+        recent_files = [row.get("path", "") for row in artifact_rows[-4:] if row.get("path")]
+        if not recent_files:
+            recent_files = [
+                trim(str(entry.get("path", "") or "").strip(), 240)
+                for entry in step_entries[-4:]
+                if isinstance(entry, dict) and str(entry.get("path", "") or "").strip()
+            ]
+        return {
+            "since_ts": since_ts,
+            "has_write": has_write,
+            "has_read": has_read,
+            "has_research": bool(research_rows),
+            "has_exec": _has_observed(exec_rows),
+            "has_review": _has_observed(review_rows),
+            "has_compile_pass": _has_positive(exec_rows + review_rows, compile_hints),
+            "has_test_pass": _has_positive(exec_rows + review_rows, test_hints),
+            "recent_files": list(dict.fromkeys(recent_files))[-4:],
+            "recent_exec_excerpt": _recent_excerpt(exec_rows, 140),
+            "recent_review_excerpt": _recent_excerpt(review_rows, 140),
+            "recent_research_excerpt": _recent_excerpt(research_rows, 140),
+        }
+    def _plan_step_has_blackboard_evidence(self, plan_step: dict, board: dict | None = None) -> bool:
+        if not isinstance(plan_step, dict):
+            return False
+        sig = self._plan_step_blackboard_signals(plan_step, board)
+        step_text = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
+        phase = self._plan_step_phase_hint(step_text)
+        wants_test = phase in ("test", "review") or any(
+            tok in step_text for tok in ("test", "pytest", "unit", "integration", "验证", "測試", "测试", "回归", "assert")
+        )
+        wants_runtime_validation = wants_test or phase == "implement" or any(
+            tok in step_text for tok in ("verify", "validation", "check", "lint", "build", "compile", "运行", "校验", "檢查")
+        )
+        if wants_test:
+            return sig["has_test_pass"] or sig["has_exec"] or sig["has_review"]
+        if phase == "implement":
+            return sig["has_write"] and (
+                sig["has_compile_pass"] or sig["has_test_pass"] or sig["has_exec"] or sig["has_read"] or sig["has_review"]
+            )
+        if phase in ("research", "design"):
+            return sig["has_research"] or sig["has_read"] or sig["has_exec"] or sig["has_write"]
+        if wants_runtime_validation:
+            return sig["has_exec"] or sig["has_read"] or sig["has_write"] or sig["has_review"]
+        return sig["has_write"] or sig["has_read"] or sig["has_research"] or sig["has_exec"] or sig["has_review"]
+    def _step_has_accumulated_evidence(self, plan_step: dict, bb: dict | None = None) -> bool:
+        """Fix 3: Check if step has accumulated evidence across ALL turns (not just current turn).
+        Uses step_files registry + blackboard signals to detect writes/execution during step lifetime."""
+        if not isinstance(plan_step, dict):
+            return False
+        sig = self._plan_step_blackboard_signals(plan_step, bb)
+        return sig["has_write"] or sig["has_exec"] or sig["has_research"]
+    def _collect_accumulated_step_evidence(self, plan_step: dict, bb: dict | None = None) -> str:
+        """Fix 1 support: Collect evidence summary from accumulated step history (across all turns)."""
+        if not isinstance(plan_step, dict):
+            return ""
+        sig = self._plan_step_blackboard_signals(plan_step, bb)
+        parts: list[str] = []
+        if sig.get("recent_files"):
+            parts.append("files: " + ", ".join(sig["recent_files"][:4]))
+        if sig.get("recent_exec_excerpt"):
+            parts.append("exec: " + trim(sig["recent_exec_excerpt"], 80))
+        if sig.get("recent_research_excerpt"):
+            parts.append("research: " + trim(sig["recent_research_excerpt"], 80))
+        return trim("; ".join(parts) or "accumulated-step-evidence", 200)
+    def _collect_blackboard_step_evidence(self, plan_step: dict, board: dict | None = None) -> str:
+        sig = self._plan_step_blackboard_signals(plan_step, board)
+        parts: list[str] = []
+        if sig.get("recent_files"):
+            parts.append("files: " + ", ".join(sig["recent_files"][:3]))
+        if sig.get("recent_exec_excerpt"):
+            parts.append(f"logs: {sig['recent_exec_excerpt']}")
+        if sig.get("recent_review_excerpt"):
+            parts.append(f"review: {sig['recent_review_excerpt']}")
+        if sig.get("recent_research_excerpt"):
+            parts.append(f"notes: {sig['recent_research_excerpt']}")
+        return trim("; ".join(parts), 200)
     def _has_test_pass_evidence(self, board: dict | None = None) -> bool:
         bb = board if isinstance(board, dict) else self._ensure_blackboard()
         logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
@@ -25395,6 +26156,20 @@ body{padding:18px}
             if todo.get("status") == "completed":
                 continue
             cat = todo.get("category", "")
+            if cat == "plan_step" and todo.get("status") == "in_progress" and not todo.get("activated_at"):
+                step_idx = int(todo.get("plan_step_index", 0) or 0)
+                prior_done_ts = [
+                    float(t.get("completed_at", 0.0) or 0.0)
+                    for t in todos
+                    if t.get("category") == "plan_step"
+                    and int(t.get("plan_step_index", 0) or 0) < step_idx
+                    and t.get("completed_at")
+                ]
+                todo["activated_at"] = (
+                    max(prior_done_ts)
+                    if prior_done_ts
+                    else (float(todo.get("created_at", 0.0) or 0.0) or float(now_ts()))
+                )
             if cat == "setup" and (research_count > 0 or code_count > 0):
                 todo.update(
                     status="completed",
@@ -25446,11 +26221,14 @@ body{padding:18px}
                         if t.get("category") == "plan_step"
                     ):
                         todo["status"] = "in_progress"
+                        todo["activated_at"] = float(now_ts())
         if not any(t.get("status") == "in_progress" for t in todos):
             for t in todos:
                 if t.get("status") == "pending":
                     t["status"] = "in_progress"
+                    if not t.get("activated_at"):
+                        t["activated_at"] = float(now_ts())
                     break
         bb["project_todos"] = todos
@@ -25516,10 +26294,25 @@ body{padding:18px}
                 break
         if not current:
             return False
+        # Fix 5c: Reset TodoWrite loop counter on step advancement
+        try:
+            self._todowrite_step_counter.clear()
+        except Exception:
+            pass
         current["status"] = "completed"
         current["completed_at"] = float(now_ts())
         current["completed_by"] = actor
         current["evidence"] = trim(str(evidence or "").strip(), 200) or self._ui_text("step_completed_evidence")
+        # Clear single-mode validation gate flags for the completed step
+        try:
+            _completed_id = str(current.get("id", "") or "")
+            for _attr_name in (f"_smvg_{_completed_id}", f"_smvg_ts_{_completed_id}", f"_smvg_n_{_completed_id}", f"_sync_exec_gate_n_{_completed_id}", f"_sync_sv_ts_{_completed_id}"):
+                try:
+                    delattr(self, _attr_name)
+                except AttributeError:
+                    pass
+        except Exception:
+            pass
         # 推进 cursor，激活下一步
         cursor = int(bb.get("plan_step_cursor", 0) or 0)
         bb["plan_step_cursor"] = cursor + 1
@@ -25530,6 +26323,7 @@ body{padding:18px}
                 break
         if next_step:
             next_step["status"] = "in_progress"
+            next_step["activated_at"] = float(now_ts())
             step_idx = int(next_step.get("plan_step_index", 0) or 0) + 1
             total = int(bb.get("plan_step_total", len(todos)) or len(todos))
             self._emit("status", {
@@ -25635,27 +26429,90 @@ body{padding:18px}
             isinstance(r, dict) and r.get("ok", False) and str(r.get("name", "")) == "bash"
             for r in results
         )
-        validation_ok = self._tool_results_have_validation_evidence(current, results)
+        validation_ok_current = self._tool_results_have_validation_evidence(current, results)
+        validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
+        validation_ok = validation_ok_current or validation_ok_blackboard
+        bb_sig = self._plan_step_blackboard_signals(current, bb)
         phase_evidence = False
         if phase in ("research", "design") and validation_ok:
             phase_evidence = True
-        elif phase == "implement" and wrote_files and validation_ok:
+        elif phase == "implement" and (
+            (wrote_files and validation_ok_current)
+            or (bb_sig["has_write"] and validation_ok_blackboard)
+        ):
             phase_evidence = True
-        elif phase in ("test", "review") and ran_bash_ok and validation_ok:
+        elif phase in ("test", "review") and (
+            (ran_bash_ok and validation_ok_current)
+            or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
+        ):
             phase_evidence = True
+        todo_progress_signal = any(
+            isinstance(r, dict) and r.get("ok", False)
+            and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
+            for r in results
+        )
         # Advance when:
         # - Manager requested AND worker produced output, OR
         # - All subtasks completed AND worker produced output, OR
-        # - Phase heuristics confirm (write+bash for implement)
-        has_strong_evidence = validation_ok and worker_produced_output and (
-            manager_requested or subtasks_all_done or phase_evidence
-        )
+        # - Phase heuristics confirm BUT ONLY if no incomplete subtasks exist
+        # - Fix 3: All subtasks completed + accumulated step evidence (covers TodoWrite-only turns)
+        # CRITICAL: When subtasks exist, phase_evidence alone CANNOT bypass subtask completion.
+        _has_subtasks = bool(self._active_plan_worker_todo_rows(
+            str(current.get("id", "") or ""), role=""
+        ))
+        _phase_gate = phase_evidence and (subtasks_all_done or not _has_subtasks)
+        accumulated_evidence_path = (
+            subtasks_all_done
+            and todo_progress_signal
+            and self._step_has_accumulated_evidence(current, bb)
+        )
+        has_strong_evidence = (
+            validation_ok and (
+                (
+                    worker_produced_output
+                    and (manager_requested or subtasks_all_done or _phase_gate)
+                )
+                or (
+                    todo_progress_signal
+                    and subtasks_all_done
+                    and validation_ok_blackboard
+                )
+            )
+        ) or accumulated_evidence_path
         if has_strong_evidence:
+            # Sync mode exec gate: when all subtasks done for implement/test/deploy phases,
+            # require at least some execution evidence (bash/test/compile ran at any point).
+            # Manager-requested advancement has its own escape hatch after 10 blocks.
+            _exec_gate_needed = (
+                subtasks_all_done
+                and phase in ("implement", "test", "deploy")
+            )
+            if _exec_gate_needed:
+                # Require model's explicit <step-verified/> tag in agent_messages since step activation
+                _has_verified = self._check_step_verified_tag(current, messages=self.agent_messages)
+                if not _has_verified:
+                    _sync_n_flag = f"_sync_exec_gate_n_{str(current.get('id', '') or '')}"
+                    _sync_n = int(getattr(self, _sync_n_flag, 0))
+                    if _sync_n < 10:
+                        setattr(self, _sync_n_flag, _sync_n + 1)
+                        # No verified tag yet — push worker to evaluate and emit <step-verified/>
+                        self._inject_sync_mode_verification_hint(current, worker_step)
+                        return
+                    # After 10 blocks, allow advancement to prevent permanent stall
             evidence = self._collect_step_evidence(current, worker_step)
+            # Clear sync exec gate counter on successful advance
+            try:
+                _sync_clear = f"_sync_exec_gate_n_{str(current.get('id', '') or '')}"
+                if hasattr(self, _sync_clear):
+                    delattr(self, _sync_clear)
+            except Exception:
+                pass
             self._advance_plan_step(
                 evidence=evidence,
                 actor=str(route.get("target", "developer") or "developer"),
             )
+        else:
+            self._inject_rework_if_needed(current, worker_step)
     def _worker_step_has_evidence(self, step: dict) -> bool:
         """Check if worker step produced concrete tool outputs."""
@@ -25671,7 +26528,8 @@ body{padding:18px}
     def _step_subtasks_all_completed(self, plan_step: dict) -> bool:
         """Check if all worker subtasks linked to this plan step are completed.
-        Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking."""
+        Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
+        Fix 6: Also excludes 'next-step intent' items that were added alongside completed items."""
         step_id = str(plan_step.get("id", "") or "")
         if not step_id:
             return False
@@ -25711,7 +26569,293 @@ body{padding:18px}
                 relevant.append(r)
             if relevant:
                 worker_items = relevant
-        return all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
+        # Fix 6: Exclude "next-step intent" pending items when all other items are completed.
+        # When the worker completes step N and creates step N+1 subtasks in the same TodoWrite call,
+        # the new pending items get parent_step_id of step N, blocking its advancement.
+        completed_items = [r for r in worker_items if str(r.get("status", "")).lower() == "completed"]
+        pending_items = [r for r in worker_items if str(r.get("status", "")).lower() != "completed"]
+        if completed_items and pending_items:
+            # Check if pending items are content-wise duplicates of completed items
+            # (indicating the worker re-sent the same items but some got stuck as pending)
+            completed_content = {
+                normalize_work_text(str(r.get("content", ""))).strip().lower()
+                for r in completed_items
+                if str(r.get("content", "") or "").strip()
+            }
+            truly_new_pending = [
+                r for r in pending_items
+                if normalize_work_text(str(r.get("content", ""))).strip().lower() not in completed_content
+            ]
+            # If all pending items are duplicates of completed items, they don't block
+            if not truly_new_pending:
+                worker_items = completed_items
+            # If there are truly new pending items but all original items are done,
+            # check if the new items match future plan step content
+            elif truly_new_pending and len(completed_items) >= 2:
+                bb = self._ensure_blackboard()
+                future_step_content = set()
+                found_current = False
+                for t in bb.get("project_todos", []):
+                    if not isinstance(t, dict) or t.get("category") != "plan_step":
+                        continue
+                    if str(t.get("id", "") or "") == step_id:
+                        found_current = True
+                        continue
+                    if found_current:
+                        fc = str(t.get("full_content", "") or t.get("content", "") or "").strip().lower()
+                        future_step_content.add(fc)
+                        for line in fc.split("\n"):
+                            sl = line.strip().lower()
+                            if sl:
+                                future_step_content.add(sl)
+                if future_step_content:
+                    _still_blocking = []
+                    for pi in truly_new_pending:
+                        pc = normalize_work_text(str(pi.get("content", ""))).strip().lower()
+                        # Check if this pending item's content appears in any future step
+                        is_future = any(pc in fsc or fsc in pc for fsc in future_step_content if len(fsc) > 4)
+                        if not is_future:
+                            _still_blocking.append(pi)
+                    if not _still_blocking:
+                        worker_items = completed_items
+        all_marked_done = all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
+        if not all_marked_done:
+            return False
+        # Acceptance verification: check that each "completed" subtask has real evidence
+        # Don't just trust the model's TodoWrite status — verify against accumulated tool outputs
+        if worker_items:
+            bb = self._ensure_blackboard()
+            unverified = self._verify_subtasks_acceptance(worker_items, step_id, bb)
+            if unverified:
+                return False
+        return True
+    def _verify_subtasks_acceptance(self, subtasks: list[dict], step_id: str, bb: dict) -> list[str]:
+        """Verify each completed subtask has real evidence. Returns list of unverified subtask descriptions.
+        Checks step_files and execution_logs against what each subtask's content implies."""
+        import re
+        # Gather accumulated evidence for this step
+        step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
+        step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
+        written_paths = set()
+        for entry in step_entries:
+            if isinstance(entry, dict) and str(entry.get("op", "")) in ("write_file", "edit_file"):
+                written_paths.add(str(entry.get("path", "") or "").strip().lower())
+        # Gather bash execution evidence
+        exec_logs = bb.get("execution_logs", [])
+        if not isinstance(exec_logs, list):
+            exec_logs = []
+        bash_outputs_lower = []
+        for log in exec_logs[-30:]:
+            if isinstance(log, dict):
+                c = str(log.get("content", "") or "").lower()
+                if c:
+                    bash_outputs_lower.append(c)
+        all_bash_text = " ".join(bash_outputs_lower)
+        negative_hints = ("error:", "failed", "failure", "traceback", "fatal", "not found",
+                          "no such file", "command not found", "permission denied")
+        has_bash_failure = any(neg in all_bash_text for neg in negative_hints)
+        # Define acceptance patterns from subtask content
+        _file_create_re = re.compile(
+            r"(?:创建|生成|编写|写入|create|write|generate|implement|scaffold)\s+(.+?)(?:\s|$|，|。|,|\()",
+            re.IGNORECASE,
+        )
+        _run_test_kw = ("运行", "测试", "验证", "test", "pytest", "verify", "validate",
+                        "run", "check", "确认", "检查")
+        _compile_kw = ("编译", "构建", "compile", "build", "cmake", "make", "gcc", "gfortran")
+        _install_kw = ("安装", "install", "pip install", "npm install", "apt install")
+        unverified: list[str] = []
+        for st in subtasks:
+            content = str(st.get("content", "") or "").strip()
+            if not content:
+                continue
+            content_lower = content.lower()
+            # Rule 1: If subtask mentions creating a file, check it was actually written
+            m = _file_create_re.search(content)
+            if m:
+                target = m.group(1).strip().strip("\"'`").lower()
+                # Extract just filename from path-like strings
+                if "/" in target:
+                    target_parts = [p for p in target.split("/") if p.strip()]
+                    target_name = target_parts[-1] if target_parts else target
+                else:
+                    target_name = target
+                if target_name and len(target_name) > 2:
+                    found = any(target_name in wp for wp in written_paths)
+                    if not found:
+                        unverified.append(f"file not created: {target_name}")
+                        continue
+            # Rule 2: If subtask mentions testing/running/verifying, check bash was executed
+            if any(kw in content_lower for kw in _run_test_kw):
+                if not bash_outputs_lower:
+                    unverified.append(f"no bash execution for: {trim(content, 60)}")
+                    continue
+                # Check for test failures in recent bash output
+                if has_bash_failure and any(kw in content_lower for kw in ("test", "测试", "pytest")):
+                    # Only block if failure keywords appear near test-related content
+                    test_related_failures = any(
+                        ("test" in line or "pytest" in line or "assert" in line)
+                        and any(neg in line for neg in negative_hints)
+                        for line in bash_outputs_lower[-10:]
+                    )
+                    if test_related_failures:
+                        unverified.append(f"test failures detected for: {trim(content, 60)}")
+                        continue
+            # Rule 3: If subtask mentions compiling/building, check bash + no compile errors
+            if any(kw in content_lower for kw in _compile_kw):
+                if not bash_outputs_lower:
+                    unverified.append(f"no bash execution for compile: {trim(content, 60)}")
+                    continue
+                compile_failures = any(
+                    any(neg in line for neg in ("error:", "failed", "failure"))
+                    and any(kw in line for kw in ("compil", "build", "cmake", "make", "link"))
+                    for line in bash_outputs_lower[-10:]
+                )
+                if compile_failures:
+                    unverified.append(f"compile failures for: {trim(content, 60)}")
+                    continue
+            # Rule 4: If subtask mentions installing, check bash was run
+            if any(kw in content_lower for kw in _install_kw):
+                if not bash_outputs_lower:
+                    unverified.append(f"no bash for install: {trim(content, 60)}")
+                    continue
+            # If none of the specific rules matched, the subtask is considered verified
+            # (generic subtasks like "design" or "analyze" don't need tool evidence)
+        return unverified
+    def _inject_rework_if_needed(self, plan_step: dict, worker_step: dict):
+        """When subtasks are marked completed but acceptance fails, inject rework instruction.
+        Prevents the system from getting stuck or silently skipping unfinished work."""
+        try:
+            step_id = str(plan_step.get("id", "") or "")
+            if not step_id:
+                return
+            rows = self._active_plan_worker_todo_rows(step_id, role="")
+            completed_rows = [r for r in rows if str(r.get("status", "")).lower() == "completed"]
+            if not completed_rows:
+                return
+            bb = self._ensure_blackboard()
+            failures = self._verify_subtasks_acceptance(completed_rows, step_id, bb)
+            if not failures:
+                return
+            # LLM-based acceptance check: semantic analysis over heuristics
+            llm_verdict = self._llm_verify_subtask_acceptance(plan_step, completed_rows, bb)
+            if llm_verdict.get("all_passed", False):
+                return
+            rework_items = llm_verdict.get("rework_items", failures)
+            if not rework_items:
+                return
+            # Rate-limit rework injection
+            _rework_key = f"_rework_injected_{step_id}"
+            _last_rework = getattr(self, _rework_key, 0.0)
+            if float(now_ts()) - float(_last_rework) < 30.0:
+                return
+            setattr(self, _rework_key, float(now_ts()))
+            step_label = trim(str(plan_step.get("content", "") or ""), 80)
+            rework_text = (
+                f"<step-rework>\n"
+                f"Step \"{step_label}\" acceptance check FAILED. "
+                f"The following subtasks were marked completed but did not pass verification:\n"
+            )
+            for i, item in enumerate(rework_items[:5]):
+                rework_text += f"  {i+1}. {trim(str(item), 120)}\n"
+            rework_text += (
+                "\nACTION REQUIRED: Fix these issues NOW before the step can advance.\n"
+                "- For missing files: create them with write_file\n"
+                "- For failed tests/builds: run the command again and fix errors\n"
+                "- For unverified installs: re-run the install command\n"
+                "After fixing, update TodoWrite to reflect the corrected state.\n"
+                "</step-rework>"
+            )
+            # Revert false "completed" status back to in_progress
+            _snap = self.todo.snapshot()
+            _modified = False
+            for row in _snap:
+                if not isinstance(row, dict):
+                    continue
+                if str(row.get("parent_step_id", "") or "") != step_id:
+                    continue
+                if str(row.get("status", "")).lower() != "completed":
+                    continue
+                rc = str(row.get("content", "") or "").strip().lower()
+                for fail in rework_items:
+                    fail_lower = str(fail).lower()
+                    if rc and (rc[:20] in fail_lower or any(w in fail_lower for w in rc.split()[:3] if len(w) > 3)):
+                        row["status"] = "in_progress"
+                        _modified = True
+                        break
+            if _modified:
+                try:
+                    self.todo.update(_snap)
+                except Exception:
+                    pass
+            target_roles: tuple[str, ...] = ()
+            if self._is_multi_agent_mode():
+                active_role = str(bb.get("active_agent", "") or "developer")
+                if active_role:
+                    target_roles = (active_role,)
+            self._append_plan_guidance_bubble(
+                rework_text,
+                target_roles=target_roles,
+                summary=f"step rework: {len(rework_items)} items failed acceptance",
+            )
+        except Exception:
+            pass
+    def _llm_verify_subtask_acceptance(self, plan_step: dict, completed_subtasks: list[dict], bb: dict) -> dict:
+        """Use LLM semantic analysis to verify if subtasks are truly completed.
+        Returns {"all_passed": bool, "rework_items": list[str]}."""
+        try:
+            step_id = str(plan_step.get("id", "") or "")
+            step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
+            step_entries = step_files_raw.get(step_id, []) if step_id else []
+            files_summary = []
+            for entry in (step_entries[-15:] if isinstance(step_entries, list) else []):
+                if isinstance(entry, dict):
+                    files_summary.append(f"{entry.get('op','?')}: {entry.get('path','?')}")
+            exec_logs = bb.get("execution_logs", [])
+            recent_exec = []
+            for log in (exec_logs[-8:] if isinstance(exec_logs, list) else []):
+                if isinstance(log, dict):
+                    c = trim(str(log.get("content", "") or ""), 200)
+                    if c:
+                        recent_exec.append(c)
+            subtask_list = "\n".join(
+                f"- [{str(st.get('status','')).upper()}] {trim(str(st.get('content','') or ''), 120)}"
+                for st in completed_subtasks[:8]
+            )
+            prompt = (
+                "Analyze whether these subtasks are TRULY completed based on the evidence.\n\n"
+                f"SUBTASKS:\n{subtask_list}\n\n"
+                f"FILES CREATED/MODIFIED:\n{chr(10).join(files_summary[-10:]) or '(none)'}\n\n"
+                f"RECENT EXECUTION OUTPUT:\n{chr(10).join(recent_exec[-5:]) or '(none)'}\n\n"
+                "For each subtask, determine if it's genuinely done:\n"
+                "- File creation tasks: was the file actually created?\n"
+                "- Test/verify tasks: was a test/command actually run? Did it pass?\n"
+                "- Build/compile tasks: was compilation attempted? Any errors?\n"
+                "- Install tasks: was the install command run?\n\n"
+                "Reply ONLY as JSON: {\"all_passed\": true/false, \"rework_items\": [\"description of what failed\"]}\n"
+                "If all subtasks pass, return {\"all_passed\": true, \"rework_items\": []}"
+            )
+            resp = self.ollama.chat(
+                [{"role": "user", "content": prompt}],
+                system="You are a strict QA reviewer. Verify task completion against evidence. Reply ONLY valid JSON.",
+                max_tokens=300,
+                think=False,
+            )
+            import json
+            text = str(resp.get("text", "") or "").strip()
+            if "{" in text:
+                json_str = text[text.index("{"):text.rindex("}") + 1]
+                result = json.loads(json_str)
+                if isinstance(result, dict):
+                    return {
+                        "all_passed": bool(result.get("all_passed", False)),
+                        "rework_items": list(result.get("rework_items", [])),
+                    }
+        except Exception:
+            pass
+        return {"all_passed": False, "rework_items": []}
     def _collect_step_evidence(self, plan_step: dict, worker_step: dict) -> str:
         """Collect evidence summary from worker step for plan step completion."""
@@ -25730,11 +26874,19 @@ body{padding:18px}
                 parts.append(f"bash: {cmd}" + (f" => {out}" if out else ""))
             elif name == "read_file":
                 path = str(r.get("args", {}).get("path", "") or "")
+                # Skip plan-infrastructure reads — not meaningful implementation evidence
+                _p = str(path)
+                if (_p.endswith("plan.md") and ".clouds_coder" in _p) or ".clouds_coder/skills_cache/" in _p:
+                    continue
                 out = self._tool_result_output_excerpt(r, 90)
                 parts.append(f"read: {path}" + (f" => {out}" if out else ""))
             elif name in ("write_to_blackboard", "query_code_library", "query_knowledge_library"):
                 out = self._tool_result_output_excerpt(r, 100)
                 parts.append(f"{name}" + (f": {out}" if out else ""))
+        if not parts:
+            bb_evidence = self._collect_blackboard_step_evidence(plan_step)
+            if bb_evidence:
+                return bb_evidence
         return trim("; ".join(parts) or "post-execution evidence", 200)
     def _get_active_plan_step(self, board: dict | None = None) -> dict | None:
@@ -25797,10 +26949,342 @@ body{padding:18px}
             return False
         return bool(self._active_plan_worker_todo_rows(step_id, role=role))
+    def _bridge_flat_todos_to_active_plan_step(
+        self,
+        rows: list[dict] | None,
+        board: dict | None = None,
+    ) -> tuple[list[dict], bool]:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        step = self._get_active_plan_step(bb)
+        if not isinstance(step, dict):
+            return (list(rows or []), False)
+        step_id = trim(str(step.get("id", "") or ""), 20)
+        if not step_id:
+            return (list(rows or []), False)
+        snap = [dict(row) for row in (rows or []) if isinstance(row, dict)]
+        if not snap:
+            return (snap, False)
+        worker_owners = {"developer", "explorer", "reviewer"}
+        if any(str(row.get("parent_step_id", "") or "").strip() for row in snap):
+            return (snap, False)
+        if any(
+            str(row.get("owner", "") or "").strip().lower() in worker_owners
+            and str(row.get("parent_step_id", "") or "").strip() == step_id
+            for row in snap
+        ):
+            return (snap, False)
+        owner_key = self._current_plan_worker_owner(bb)
+        bridged: list[dict] = []
+        migrated = False
+        for row in snap:
+            key = trim(str(row.get("key", "") or "").strip(), 120)
+            if key.startswith("bb:"):
+                bridged.append(dict(row))
+                continue
+            content = normalize_work_text(str(row.get("content", "") or "")) or str(row.get("content", "") or "").strip()
+            if not content:
+                continue
+            new_row = dict(row)
+            new_row["content"] = content
+            new_row["parent_step_id"] = step_id
+            owner = str(new_row.get("owner", "") or "").strip().lower()
+            if owner not in worker_owners:
+                new_row["owner"] = owner_key
+            bridged.append(new_row)
+            migrated = True
+        return (bridged, migrated)
+    def _active_plan_progress_snapshot(self, board: dict | None = None) -> dict:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        step = self._current_plan_step_row(bb)
+        if not isinstance(step, dict):
+            return {
+                "step_id": "",
+                "step_index": 0,
+                "step_text": "",
+                "expected_count": 0,
+                "worker_todo_count": 0,
+                "completed_count": 0,
+                "in_progress_count": 0,
+                "pending_count": 0,
+                "current_subtask": "",
+                "next_pending_subtask": "",
+                "owners": [],
+            }
+        step_id = trim(str(step.get("id", "") or ""), 20)
+        rows = self._active_plan_worker_todo_rows(step_id, role="") if step_id else []
+        expected = self._extract_plan_step_subtasks(step, limit=5)
+        completed_count = 0
+        in_progress_count = 0
+        pending_count = 0
+        current_subtask = ""
+        next_pending_subtask = ""
+        owners: set[str] = set()
+        for row in rows:
+            status = str(row.get("status", "pending") or "pending").strip().lower()
+            content = trim(str(row.get("content", "") or "").strip(), 220)
+            owner = self._sanitize_agent_role(row.get("owner", ""))
+            if owner:
+                owners.add(owner)
+            if status == "completed":
+                completed_count += 1
+            elif status == "in_progress":
+                in_progress_count += 1
+                if content and not current_subtask:
+                    current_subtask = content
+            else:
+                pending_count += 1
+                if content and not next_pending_subtask:
+                    next_pending_subtask = content
+        return {
+            "step_id": step_id,
+            "step_index": max(0, int(step.get("plan_step_index", 0) or 0)),
+            "step_text": self._current_plan_step_text(bb),
+            "expected_count": len(expected),
+            "worker_todo_count": len(rows),
+            "completed_count": completed_count,
+            "in_progress_count": in_progress_count,
+            "pending_count": pending_count,
+            "current_subtask": current_subtask,
+            "next_pending_subtask": next_pending_subtask,
+            "owners": sorted(owners),
+        }
+    def _manager_worker_progress_capsule(self, role: str, step: dict, board: dict | None = None) -> str:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        role_key = self._sanitize_agent_role(role) or "developer"
+        safe_step = step if isinstance(step, dict) else {}
+        snapshot = self._active_plan_progress_snapshot(bb)
+        tool_results = safe_step.get("tool_results", []) or []
+        tool_names: list[str] = []
+        for item in tool_results:
+            if not isinstance(item, dict):
+                continue
+            name = str(item.get("name", "") or "").strip()
+            if not name or name in tool_names:
+                continue
+            tool_names.append(name)
+            if len(tool_names) >= 5:
+                break
+        parts = [
+            f"[worker-progress] owner={role_key}",
+            f"status={trim(str(safe_step.get('status', '') or ''), 40) or '-'}",
+        ]
+        if tool_names:
+            parts.append("tools=" + ",".join(tool_names))
+        step_text = trim(str(snapshot.get("step_text", "") or ""), 180)
+        if step_text:
+            parts.append(f"step={step_text}")
+        todo_state = (
+            f"todos={int(snapshot.get('completed_count', 0) or 0)}/"
+            f"{int(snapshot.get('in_progress_count', 0) or 0)}/"
+            f"{int(snapshot.get('pending_count', 0) or 0)}"
+        )
+        if int(snapshot.get("worker_todo_count", 0) or 0) > 0:
+            parts.append(todo_state)
+        elif int(snapshot.get("expected_count", 0) or 0) > 0:
+            parts.append(f"todos=missing/{int(snapshot.get('expected_count', 0) or 0)}")
+        focus = trim(str(snapshot.get("current_subtask", "") or ""), 160)
+        if focus:
+            parts.append(f"focus={focus}")
+        elif str(snapshot.get("next_pending_subtask", "") or "").strip():
+            parts.append(f"next={trim(str(snapshot.get('next_pending_subtask', '') or ''), 160)}")
+        current_step = self._current_plan_step_row(bb)
+        if isinstance(current_step, dict):
+            evidence = self._collect_blackboard_step_evidence(current_step, bb)
+            if evidence:
+                parts.append(f"evidence={trim(evidence, 180)}")
+            if self._step_subtasks_all_completed(current_step) and self._plan_step_has_blackboard_evidence(current_step, bb):
+                parts.append("acceptance=ready")
+        reply = bb.get("last_worker_reply", {}) if isinstance(bb.get("last_worker_reply"), dict) else {}
+        if self._sanitize_agent_role(reply.get("role", "")) == role_key:
+            reply_text = trim(str(reply.get("text", "") or "").strip(), 180)
+            if reply_text:
+                parts.append(f"reply={reply_text}")
+        return trim(" | ".join(parts), 1600)
+    def _manager_recovery_route_for_repeated_delegate(self, route: dict, board: dict | None = None) -> dict:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        row = dict(route or {})
+        step = self._current_plan_step_row(bb)
+        if not isinstance(step, dict):
+            row["target"] = "developer"
+            row["instruction"] = (
+                "Recovery routing after repeated identical delegation. "
+                "Continue the current objective with one concrete tool action and write observable progress."
+            )
+            row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-no-plan-step", 600)
+            row["source"] = "loop-recovery"
+            row["is_mandatory"] = True
+            return row
+        snapshot = self._active_plan_progress_snapshot(bb)
+        step_text = trim(str(snapshot.get("step_text", "") or ""), 220)
+        full_text = self._current_plan_step_full_text(bb, max_len=600)
+        phase = self._plan_step_phase_hint(full_text)
+        expected_count = int(snapshot.get("expected_count", 0) or 0)
+        worker_todo_count = int(snapshot.get("worker_todo_count", 0) or 0)
+        current_subtask = trim(str(snapshot.get("current_subtask", "") or ""), 180)
+        next_pending = trim(str(snapshot.get("next_pending_subtask", "") or ""), 180)
+        subtasks_done = self._step_subtasks_all_completed(step)
+        has_evidence = self._plan_step_has_blackboard_evidence(step, bb)
+        reviewer_available = True
+        profile = self._ensure_blackboard_task_profile(bb)
+        participants = profile.get("participants", []) if isinstance(profile.get("participants"), list) else []
+        participants_norm = [self._sanitize_agent_role(x) for x in participants]
+        participants_norm = [x for x in participants_norm if x]
+        if participants_norm:
+            reviewer_available = "reviewer" in participants_norm
+        if subtasks_done and has_evidence and reviewer_available:
+            row["target"] = "reviewer"
+            row["instruction"] = trim(
+                (
+                    "Recovery routing after repeated identical delegation. "
+                    f"Validate ONLY the current plan step: {step_text}. "
+                    "Worker subtasks are complete and blackboard evidence already exists. "
+                    "Run acceptance for this step only, record pass/fix with concrete evidence, and do not jump ahead."
+                ),
+                1200,
+            )
+            row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-acceptance", 600)
+            row["source"] = "loop-recovery"
+            row["is_mandatory"] = True
+            return row
+        default_owner = "explorer" if phase in {"research", "design"} else "developer"
+        owner = self._sanitize_agent_role(row.get("target", "")) or self._current_plan_worker_owner(bb)
+        if owner not in {"developer", "explorer"}:
+            owner = default_owner
+        if expected_count > 0 and worker_todo_count == 0:
+            action_text = (
+                "First call TodoWrite for the current plan step and create the missing worker subtasks "
+                "before any more implementation."
+            )
+        elif current_subtask:
+            action_text = f"Continue ONLY the current in_progress subtask: {current_subtask}."
+        elif next_pending:
+            action_text = f"Resume the next pending subtask: {next_pending}."
+        else:
+            action_text = "Continue the current plan step with one concrete tool action."
+        evidence_text = ""
+        if has_evidence:
+            evidence_text = (
+                " Blackboard already contains partial evidence for this step; build on that work instead of restarting."
+            )
+        row["target"] = owner
+        row["instruction"] = trim(
+            (
+                "Recovery routing after repeated identical delegation. "
+                f"Stay on the current plan step: {step_text}. "
+                f"{action_text} "
+                "Do not branch to a different step or restate the whole plan. "
+                "After the subtask is finished, immediately call TodoWrite to mark it completed and move the next subtask to in_progress."
+                f"{evidence_text}"
+            ),
+            1200,
+        )
+        row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-execute", 600)
+        row["source"] = "loop-recovery"
+        row["is_mandatory"] = True
+        return row
+    def _todo_has_plan_steps(self, board: dict | None = None) -> bool:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        todos = bb.get("project_todos", []) if isinstance(bb.get("project_todos"), list) else []
+        return any(
+            isinstance(todo, dict) and todo.get("category") == "plan_step"
+            for todo in todos
+        )
+    def _todo_worker_role_hint(self, role: str = "", board: dict | None = None) -> str:
+        role_key = self._sanitize_agent_role(role)
+        if role_key in {"developer", "explorer", "reviewer"}:
+            return role_key
+        return self._current_plan_worker_owner(board)
+    def _todo_route_kind(self, role: str = "", board: dict | None = None) -> str:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        if self._todo_has_plan_steps(bb):
+            return "plan_sync" if self._is_multi_agent_mode() else "plan_single"
+        role_key = self._todo_worker_role_hint(role, bb)
+        if self._is_multi_agent_mode() and role_key in {"developer", "explorer", "reviewer"}:
+            return "pure_sync"
+        return "pure_single"
+    def _todo_row_kind(self, row: dict | None) -> str:
+        if not isinstance(row, dict):
+            return ""
+        key = str(row.get("key", "") or "").strip()
+        if key.startswith("bb:"):
+            return "system"
+        owner = str(row.get("owner", "") or "").strip().lower()
+        parent_step_id = str(row.get("parent_step_id", "") or "").strip()
+        if owner in {"developer", "explorer", "reviewer"} and parent_step_id:
+            return "plan_worker"
+        if owner in {"developer", "explorer", "reviewer"}:
+            return "owner_worker"
+        return "flat"
+    def _todo_route_rows(
+        self,
+        route_kind: str,
+        *,
+        rows: list[dict] | None = None,
+        role: str = "",
+        board: dict | None = None,
+    ) -> list[dict]:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        snap = [dict(row) for row in (rows if isinstance(rows, list) else self.todo.snapshot()) if isinstance(row, dict)]
+        if route_kind in {"plan_single", "plan_sync"}:
+            step = self._get_active_plan_step(bb)
+            step_id = trim(str((step or {}).get("id", "") or ""), 20)
+            if not step_id:
+                return []
+            return [
+                row for row in snap
+                if self._todo_row_kind(row) == "plan_worker"
+                and str(row.get("parent_step_id", "") or "").strip() == step_id
+            ]
+        if route_kind == "pure_sync":
+            role_key = self._todo_worker_role_hint(role, bb)
+            owner_rows = [row for row in snap if self._todo_row_kind(row) == "owner_worker"]
+            if role_key in {"developer", "explorer", "reviewer"}:
+                scoped = [
+                    row for row in owner_rows
+                    if str(row.get("owner", "") or "").strip().lower() == role_key
+                ]
+                if scoped:
+                    return scoped
+            return owner_rows
+        if route_kind == "pure_single":
+            return [row for row in snap if self._todo_row_kind(row) == "flat"]
+        return []
     def _todo_runtime_has_worker_rows(self, role: str = "") -> bool:
-        if self._get_active_plan_step() is not None:
-            return self._active_plan_step_has_worker_todos(role=role)
-        return bool(self.todo.snapshot())
+        route_kind = self._todo_route_kind(role=role)
+        return bool(self._todo_route_rows(route_kind, role=role))
+    def _merge_todo_signal_rows(self, items: list[dict], role: str = "", board: dict | None = None) -> str:
+        bb = board if isinstance(board, dict) else self._ensure_blackboard()
+        role_key = self._sanitize_agent_role(role)
+        route_kind = self._todo_route_kind(role=role_key, board=bb)
+        step = self._get_active_plan_step(bb) if route_kind in {"plan_single", "plan_sync"} else None
+        step_id = trim(str((step or {}).get("id", "") or ""), 20)
+        normalized: list[dict] = []
+        for item in items or []:
+            if not isinstance(item, dict):
+                continue
+            row = dict(item)
+            if role_key in {"developer", "explorer", "reviewer"} and not str(row.get("owner", "") or "").strip():
+                row["owner"] = role_key
+            if step_id and not str(row.get("parent_step_id", "") or "").strip():
+                row["parent_step_id"] = step_id
+            normalized.append(row)
+        if not normalized:
+            return self.todo.no_changes_text()
+        if route_kind in {"plan_single", "plan_sync"}:
+            return self._merge_plan_worker_todo_items(normalized, role=role_key)
+        if route_kind == "pure_sync":
+            return self._merge_owner_scoped_todo_items(normalized, role=role_key)
+        return self._merge_flat_todo_items(normalized, role=role_key)
     def _plan_worker_todo_identity(self, row: dict | None) -> str:
         import re
@@ -25816,6 +27300,96 @@ body{padding:18px}
             return f"substep:{match.group(1)}"
         return f"text:{content}"
+    def _flat_todo_identity(self, row: dict | None) -> str:
+        import re
+        if not isinstance(row, dict):
+            return ""
+        key = trim(str(row.get("key", "") or "").strip(), 120)
+        if key.startswith("bb:"):
+            return f"system:{key}"
+        content = normalize_work_text(str(row.get("content", "") or "")) or str(row.get("content", "") or "")
+        content = re.sub(r"\s+", " ", content.strip().lower())
+        if not content:
+            return ""
+        match = re.match(r"^(\d+\.\d+)\b", content)
+        if match:
+            return f"substep:{match.group(1)}"
+        return f"text:{content}"
+    def _merge_flat_todo_items(self, items: list[dict], role: str = "") -> str:
+        if not isinstance(items, list):
+            raise ValueError("items must be array")
+        role_key = self._sanitize_agent_role(role)
+        existing = self.todo.snapshot()
+        route_existing = self._todo_route_rows("pure_single", rows=existing, role=role_key)
+        existing_by_identity: dict[str, dict] = {}
+        preserved_system: list[dict] = []
+        for row in existing:
+            if self._todo_row_kind(row) != "system":
+                continue
+            preserved_system.append(dict(row))
+        for row in route_existing:
+            if not isinstance(row, dict):
+                continue
+            identity = self._flat_todo_identity(row)
+            if not identity:
+                continue
+            if identity not in existing_by_identity:
+                existing_by_identity[identity] = dict(row)
+        status_alias = {
+            "todo": "pending",
+            "doing": "in_progress",
+            "inprogress": "in_progress",
+            "in-progress": "in_progress",
+            "done": "completed",
+            "finish": "completed",
+            "finished": "completed",
+        }
+        passthrough_rows: list[dict] = []
+        merged_rows: list[dict] = []
+        seen_identities: set[str] = set()
+        for idx, item in enumerate(items):
+            if isinstance(item, str):
+                raw = {"content": item}
+            elif isinstance(item, dict):
+                raw = dict(item)
+            else:
+                raise ValueError(f"item {idx}: invalid type")
+            key = trim(str(raw.get("key", "") or "").strip(), 120)
+            if key.startswith("bb:"):
+                passthrough_rows.append(raw)
+                continue
+            raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
+            content = normalize_work_text(raw_content) or raw_content
+            if not content:
+                continue
+            normalized: dict[str, object] = {"content": content}
+            raw_status = str(raw.get("status", raw.get("state", "")) or "").strip().lower()
+            if raw_status:
+                normalized["status"] = status_alias.get(raw_status, raw_status)
+            owner = str(raw.get("owner", "") or "").strip().lower()
+            if owner in {"manager", "explorer", "developer", "reviewer"}:
+                normalized["owner"] = owner
+            elif role_key == "manager" and owner == "":
+                normalized["owner"] = role_key
+            active_form = str(raw.get("activeForm", raw.get("active_form", "")) or "").strip()
+            if active_form:
+                normalized["activeForm"] = active_form
+            identity = self._flat_todo_identity(normalized)
+            if not identity:
+                identity = f"ad-hoc:{idx}:{trim(content, 80)}"
+            merged = dict(existing_by_identity.get(identity, {}))
+            if "activeForm" not in normalized:
+                merged.pop("activeForm", None)
+            merged.update(normalized)
+            if identity in seen_identities:
+                continue
+            seen_identities.add(identity)
+            merged_rows.append(merged)
+        return self.todo.update(preserved_system + passthrough_rows + merged_rows)
     def _merge_plan_worker_todo_items(self, items: list[dict], role: str = "") -> str:
         if not isinstance(items, list):
             raise ValueError("items must be array")
@@ -25833,6 +27407,12 @@ body{padding:18px}
         for row in existing:
             if not isinstance(row, dict):
                 continue
+            row_kind = self._todo_row_kind(row)
+            if row_kind == "system":
+                preserved.append(dict(row))
+                continue
+            if row_kind != "plan_worker":
+                continue
             owner = str(row.get("owner", "") or "").strip().lower()
             row_step_id = trim(str(row.get("parent_step_id", "") or ""), 20)
             if owner in worker_owners and row_step_id == step_id:
@@ -25842,10 +27422,13 @@ body{padding:18px}
         merged_by_identity: dict[str, dict] = {}
         ordered_identities: list[str] = []
+        # Fix 2: Compute existing identities for next-step detection
+        _existing_identities: set[str] = set()
         for row in target_rows:
             identity = self._plan_worker_todo_identity(row)
             if not identity:
                 continue
+            _existing_identities.add(identity)
             if identity not in merged_by_identity:
                 merged_by_identity[identity] = dict(row)
                 ordered_identities.append(identity)
@@ -25883,12 +27466,52 @@ body{padding:18px}
             merged.update(row)
             merged["owner"] = str(merged.get("owner", "") or role_key).strip().lower() or role_key
             merged["parent_step_id"] = trim(str(merged.get("parent_step_id", "") or step_id), 20) or step_id
+            # Fix 2 support: Timestamp new items for next-step detection
+            if identity not in _existing_identities and "created_at" not in merged:
+                merged["created_at"] = float(now_ts())
+            if str(merged.get("status", "")).lower() == "completed" and "updated_at" not in merged:
+                merged["updated_at"] = float(now_ts())
             merged_by_identity[identity] = merged
             if identity not in ordered_identities:
                 ordered_identities.append(identity)
         merged_target_rows = [merged_by_identity[i] for i in ordered_identities if i in merged_by_identity]
-        final_rows = preserved + passthrough_rows + merged_target_rows
+        # Fix 4: Content-based deduplication to prevent duplicate subtasks from accumulating
+        _seen_content: set[str] = set()
+        _deduped_target: list[dict] = []
+        for row in merged_target_rows:
+            _ck = normalize_work_text(str(row.get("content", ""))).strip().lower()
+            if _ck in _seen_content:
+                continue
+            _seen_content.add(_ck)
+            _deduped_target.append(row)
+        merged_target_rows = _deduped_target
+        # Fix 2: Detect "next-step intent" — if all existing items are completed,
+        # new pending items that don't match existing identities are for the next step.
+        # Remove their parent_step_id so they don't block current step advancement.
+        _all_existing_done = (
+            bool(target_rows) and
+            all(str(r.get("status", "")).lower() == "completed" for r in target_rows)
+        )
+        if _all_existing_done:
+            for row in merged_target_rows:
+                _rid = self._plan_worker_todo_identity(row)
+                if (_rid and _rid not in _existing_identities
+                        and str(row.get("status", "")).lower() != "completed"):
+                    row.pop("parent_step_id", None)  # Not for current step
+        # Insert merged_target_rows right after the active plan step's bb: row in preserved,
+        # so subtasks appear nested under their parent step rather than at the list bottom.
+        _step_key = str(active_step.get("key", "") or "")
+        _insert_idx = len(preserved)  # fallback: append at end
+        if _step_key:
+            for _i, _r in enumerate(preserved):
+                if str(_r.get("key", "") or "") == _step_key:
+                    _insert_idx = _i + 1
+                    break
+        final_rows = preserved[:_insert_idx] + passthrough_rows + merged_target_rows + preserved[_insert_idx:]
         return self.todo.update(final_rows)
     def _merge_owner_scoped_todo_items(self, items: list[dict], role: str = "") -> str:
@@ -26115,7 +27738,7 @@ body{padding:18px}
         content = self._build_plan_todo_reminder_text(plan_step, missing_subtasks=missing_subtasks)
         if not content:
             return False
-        self.messages.append({"role": "user", "content": content, "ts": now_tick})
+        self._append_plan_guidance_bubble(content, summary="todo reminder")
         self.last_todo_reminder_ts = now_tick
         self.todo_reminder_count += 1
         self._emit(
@@ -26134,7 +27757,7 @@ body{padding:18px}
         if not isinstance(plan_step, dict):
             return []
-        raw = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
+        raw = normalize_embedded_newlines(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
         if not raw.strip():
             return []
         lines = [trim(str(line or "").strip(), 300) for line in raw.replace("\r\n", "\n").split("\n")]
@@ -26264,6 +27887,126 @@ body{padding:18px}
             self.todo.items = preserved + replacement
         return True
+    def _check_step_verified_tag(self, plan_step: dict, *, messages: list | None = None) -> bool:
+        """Return True if the agent has emitted <step-verified> in any assistant message
+        since this plan step was activated (i.e., after plan_step['activated_at']).
+        Pass messages=self.agent_messages for sync mode; defaults to self.messages."""
+        activated_at = float(plan_step.get("activated_at", 0.0) or 0.0)
+        msg_list = messages if messages is not None else self.messages
+        for msg in reversed(msg_list):
+            if not isinstance(msg, dict):
+                continue
+            msg_ts = float(msg.get("ts", 0.0) or 0.0)
+            # Stop once we reach messages predating step activation
+            if activated_at > 0 and msg_ts > 0 and msg_ts < activated_at:
+                break
+            if msg.get("role") == "assistant":
+                content = str(msg.get("content", "") or "")
+                if "<step-verified" in content:
+                    return True
+        return False
+    def _single_mode_validation_gate(self, plan_step: dict, tool_results: list[dict]) -> bool:
+        """Gate: after subtasks complete, require model to explicitly emit <step-verified/>
+        in a message since this step was activated. Research/design phases exempt.
+        Escape hatch: after 10 consecutive blocks, auto-pass to prevent permanent stall."""
+        step_id = str(plan_step.get("id", "") or "")
+        _flag = f"_smvg_{step_id}"
+        if getattr(self, _flag, False):
+            return True  # Already validated in a previous round
+        step_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
+        phase = self._plan_step_phase_hint(step_content)
+        if phase in ("research", "design"):
+            setattr(self, _flag, True)
+            return True  # No verification needed for non-execution phases
+        # Escape hatch: after 10 consecutive blocks, unblock to prevent permanent stall
+        _n_flag = f"_smvg_n_{step_id}"
+        _n_blocked = int(getattr(self, _n_flag, 0))
+        if _n_blocked >= 10:
+            setattr(self, _flag, True)
+            return True
+        # Model must explicitly emit <step-verified/> after evaluating results
+        if self._check_step_verified_tag(plan_step):
+            setattr(self, _flag, True)
+            return True
+        # Gate blocked — increment counter and inject hint
+        setattr(self, _n_flag, _n_blocked + 1)
+        self._inject_single_mode_validation_hint(plan_step)
+        return False
+    def _inject_single_mode_validation_hint(self, plan_step: dict):
+        """Inject a hint (rate-limited 20s) instructing the model to emit <step-verified/>
+        after evaluating bash output against the step's acceptance criteria."""
+        step_id = str(plan_step.get("id", "") or "")
+        _ts_flag = f"_smvg_ts_{step_id}"
+        _last_ts = float(getattr(self, _ts_flag, 0.0))
+        if float(now_ts()) - _last_ts < 20.0:
+            return
+        setattr(self, _ts_flag, float(now_ts()))
+        step_label = trim(str(plan_step.get("content", "") or ""), 80)
+        full_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
+        # Extract ACCEPTANCE criteria line if present
+        acceptance = ""
+        for line in full_content.splitlines():
+            if line.strip().upper().startswith("ACCEPTANCE:"):
+                acceptance = line.strip()[len("ACCEPTANCE:"):].strip()
+                break
+        phase = self._plan_step_phase_hint(full_content.lower())
+        if phase == "test":
+            action = "run the tests with bash and evaluate the results"
+        else:
+            action = "run the build/compile/run command with bash and evaluate the output"
+        accept_line = f"\nAcceptance criteria: {acceptance}" if acceptance else ""
+        msg = (
+            f"<verification-required>\n"
+            f"All subtasks for \"{step_label}\" are marked complete.{accept_line}\n"
+            f"Before this step can advance, you must:\n"
+            f"1. {action}\n"
+            f"2. Review the bash output and confirm it meets the acceptance criteria\n"
+            f"3. If it passes, emit exactly: <step-verified/>\n"
+            f"4. If it fails, fix the issue and retry — do NOT emit <step-verified/> until resolved\n"
+            f"</verification-required>"
+        )
+        _recent = self.messages[-5:]
+        if not any("<verification-required>" in str(m.get("content", "") or "") for m in _recent if isinstance(m, dict)):
+            self.messages.append({"role": "user", "content": msg, "ts": now_ts()})
+    def _inject_sync_mode_verification_hint(self, plan_step: dict, worker_step: dict):
+        """Inject a verification hint into agent_messages (rate-limited 30s) for sync mode.
+        Instructs the worker to emit <step-verified/> after evaluating bash output."""
+        step_id = str(plan_step.get("id", "") or "")
+        _ts_flag = f"_sync_sv_ts_{step_id}"
+        _last_ts = float(getattr(self, _ts_flag, 0.0))
+        if float(now_ts()) - _last_ts < 30.0:
+            return
+        setattr(self, _ts_flag, float(now_ts()))
+        step_label = trim(str(plan_step.get("content", "") or ""), 80)
+        full_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
+        acceptance = ""
+        for line in full_content.splitlines():
+            if line.strip().upper().startswith("ACCEPTANCE:"):
+                acceptance = line.strip()[len("ACCEPTANCE:"):].strip()
+                break
+        phase = self._plan_step_phase_hint(full_content.lower())
+        if phase == "test":
+            action = "run the tests with bash and evaluate results"
+        else:
+            action = "run the build/compile command with bash and evaluate the output"
+        accept_line = f"\nAcceptance criteria: {acceptance}" if acceptance else ""
+        msg = (
+            f"<verification-required>\n"
+            f"All subtasks for \"{step_label}\" are marked complete.{accept_line}\n"
+            f"Before this step can advance:\n"
+            f"1. {action}\n"
+            f"2. Review the output and confirm acceptance criteria are met\n"
+            f"3. If it passes, emit exactly: <step-verified/>\n"
+            f"4. If it fails, fix and retry — do NOT emit <step-verified/> until resolved\n"
+            f"</verification-required>"
+        )
+        _recent = self.agent_messages[-5:]
+        if not any("<verification-required>" in str(m.get("content", "") or "") for m in _recent if isinstance(m, dict)):
+            self.agent_messages.append({"role": "user", "content": msg, "ts": now_ts()})
     def _single_agent_plan_step_check(self, tool_results: list[dict]):
         """In single-agent mode, check if current plan step should be advanced based on tool results."""
         bb = self._ensure_blackboard()
@@ -26281,6 +28024,24 @@ body{padding:18px}
         if not current:
             self._sync_todos_from_blackboard(reason="single-agent-round")
             return
+        # When a new step is activated with no subtasks yet, require TodoWrite first
+        _cur_step_id = str(current.get("id", "") or "")
+        if _cur_step_id:
+            _existing_subs = self._active_plan_worker_todo_rows(_cur_step_id, role="")
+            if not _existing_subs:
+                _step_label_s = trim(str(current.get("content", "") or ""), 60)
+                _force_tw_msg = (
+                    f"<action-required>\n"
+                    f"Step \"{_step_label_s}\" has no subtasks yet. "
+                    f"Your FIRST action MUST be to call TodoWrite with "
+                    f"parent_step_id=\"{_cur_step_id}\" to create this step's subtasks "
+                    f"(e.g. N.1, N.2 ...) before executing any other work.\n"
+                    f"</action-required>"
+                )
+                _recent_msgs = self.messages[-4:]
+                if not any("<action-required>" in str(m.get("content", "") or "") for m in _recent_msgs if isinstance(m, dict)):
+                    self._append_plan_guidance_bubble(_force_tw_msg, summary="action required: create subtasks first")
+                return  # Wait for TodoWrite before doing other checks
         # Heuristic: check if tool results indicate step completion
         step_content = str(current.get("full_content", "") or current.get("content", "") or "").lower()
         phase = self._plan_step_phase_hint(step_content)
@@ -26292,23 +28053,57 @@ body{padding:18px}
             str(r.get("name", "")) == "bash" and r.get("ok", False)
             for r in tool_results
         )
-        validation_ok = self._tool_results_have_validation_evidence(current, tool_results)
+        validation_ok_current = self._tool_results_have_validation_evidence(current, tool_results)
+        validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
+        validation_ok = validation_ok_current or validation_ok_blackboard
+        bb_sig = self._plan_step_blackboard_signals(current, bb)
+        todo_progress_signal = any(
+            isinstance(r, dict) and r.get("ok", False)
+            and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
+            for r in tool_results
+        )
         # Auto-advance conditions:
         should_advance = False
+        _gate_blocked = False  # True when validation gate fired and blocked — no other path may advance
         # Priority 1: Check if worker subtasks are all completed (most reliable signal)
         subtasks_done = self._step_subtasks_all_completed(current)
-        if subtasks_done and validation_ok:
-            should_advance = True
-        # Priority 2: Phase-based heuristics (require observable evidence, not just file creation)
-        if not should_advance:
-            if phase in ("research", "design") and validation_ok:
-                should_advance = True
-            elif phase == "implement" and wrote_files and validation_ok:
-                should_advance = True
-            elif phase in ("test", "review") and ran_bash_ok and validation_ok:
-                should_advance = True
+        if subtasks_done:
+            # Validation gate always fires when subtasks are done — even if validation_ok is False.
+            # For research/design phases the gate passes immediately; for implement/test it requires
+            # a successful bash run. This ensures single mode proactively requests verification.
+            _gate_ok = self._single_mode_validation_gate(current, tool_results)
+            if _gate_ok:
+                if validation_ok:
+                    should_advance = True
+                elif todo_progress_signal and self._step_has_accumulated_evidence(current, bb):
+                    # Accumulated evidence path: subtasks done + TodoWrite progress + history
+                    should_advance = True
+            else:
+                _gate_blocked = True  # Gate blocked — disable ALL remaining advancement paths
+        # Priority 2: Phase-based heuristics — BUT gate by subtask completion when subtasks exist
+        # CRITICAL: A single write_file must NOT advance when 3+ subtasks remain
+        # Skipped when validation gate has blocked advancement (subtasks_done + gate failed)
+        if not should_advance and not _gate_blocked:
+            _has_subtasks_s = bool(self._active_plan_worker_todo_rows(
+                str(current.get("id", "") or ""), role=""
+            ))
+            _can_use_phase_heuristic = subtasks_done or not _has_subtasks_s
+            if _can_use_phase_heuristic:
+                if phase in ("research", "design") and validation_ok:
+                    should_advance = True
+                elif phase == "implement" and (
+                    (wrote_files and validation_ok_current)
+                    or (bb_sig["has_write"] and validation_ok_blackboard)
+                ):
+                    should_advance = True
+                elif phase in ("test", "review") and (
+                    (ran_bash_ok and validation_ok_current)
+                    or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
+                ):
+                    should_advance = True
         # Also check if the agent explicitly mentioned step completion
-        if not should_advance:
+        # Also blocked by validation gate when subtasks_done path was blocked
+        if not should_advance and not _gate_blocked:
             # Check last assistant message for step completion signals
             last_text = ""
             for msg in reversed(self.messages[-3:]):
@@ -26327,7 +28122,25 @@ body{padding:18px}
             except Exception:
                 pass
         else:
+            self._inject_rework_if_needed(current, {"tool_results": tool_results})
             self._sync_todos_from_blackboard(reason="single-agent-round")
+            if todo_progress_signal and not subtasks_done:
+                step_rows = self._active_plan_worker_todo_rows(str(current.get("id", "") or ""), role="")
+                next_row = next(
+                    (r for r in step_rows if str(r.get("status", "") or "").strip().lower() == "in_progress"),
+                    None,
+                )
+                focus_text = trim(str((next_row or {}).get("content", "") or "").strip(), 180)
+                if focus_text:
+                    focus_msg = (
+                        "<todo-focus>"
+                        f"Continue ONLY the current in_progress subtask: {focus_text}. "
+                        "Do not branch away from the active plan step."
+                        "</todo-focus>"
+                    )
+                    recent = self.messages[-6:]
+                    if not any(str(msg.get("content", "") or "").strip() == focus_msg for msg in recent if isinstance(msg, dict)):
+                        self._append_plan_guidance_bubble(focus_msg, summary="todo focus: continue current subtask")
     def _todo_project_rows_from_blackboard(self, board: dict | None = None) -> list[dict]:
         bb = board if isinstance(board, dict) else self._ensure_blackboard()
@@ -26337,7 +28150,9 @@ body{padding:18px}
         rows = []
         for todo in todos:
             s = todo.get("status", "pending")
-            c = todo.get("content", "")
+            c = normalize_embedded_newlines(todo.get("content", "") or "")
+            if str(todo.get("category", "") or "") == "plan_step" and "\n" in c:
+                c = c.split("\n", 1)[0].strip()
             ev = todo.get("evidence", "")
             af = {
                 "in_progress": self._ui_text("todo_working", content=c),
@@ -26350,12 +28165,9 @@ body{padding:18px}
         if bool(self.runtime_reclassify_required):
             return
         bb = board if isinstance(board, dict) else self._ensure_blackboard()
-        # In single mode, still sync plan_step todos if they exist
-        has_plan_steps = any(
-            isinstance(t, dict) and t.get("category") == "plan_step"
-            for t in (bb.get("project_todos", []) if isinstance(bb.get("project_todos"), list) else [])
-        )
-        if not self._is_multi_agent_mode() and not has_plan_steps:
+        route_kind = self._todo_route_kind(board=bb)
+        has_plan_steps = route_kind in {"plan_single", "plan_sync"}
+        if route_kind == "pure_single":
             return
         self._init_project_todos(bb)
         self._update_project_todo_status(bb)
@@ -26367,25 +28179,25 @@ body{padding:18px}
                 pass
         system_rows = self._todo_project_rows_from_blackboard(bb)
         existing = self.todo.snapshot()
+        bridged_flat_rows = False
         worker_rows: list[dict] = []
         non_system_rows: list[dict] = []
-        for row in existing:
-            if not isinstance(row, dict):
-                continue
-            key = str(row.get("key", "") or "").strip()
-            owner = str(row.get("owner", "") or "").strip().lower()
-            is_system_key = key.startswith(("bb:owner:", "bb:node:", "bb:proj:"))
-            # Preserve worker-owned items (from TodoWrite) separately
-            is_worker_item = owner in ("developer", "explorer", "reviewer") and not is_system_key
-            if is_worker_item:
-                worker_rows.append(dict(row))
-                continue
-            if is_system_key or owner == "manager":
-                continue
-            non_system_rows.append(dict(row))
-        if has_plan_steps:
-            worker_rows = [r for r in worker_rows if str(r.get("parent_step_id", "") or "").strip()]
-            non_system_rows = []
+        if route_kind == "plan_single":
+            worker_rows = self._todo_route_rows(route_kind, rows=existing, board=bb)
+            if not worker_rows:
+                flat_rows = self._todo_route_rows("pure_single", rows=existing, board=bb)
+                bridged_rows, bridged_flat_rows = self._bridge_flat_todos_to_active_plan_step(flat_rows, board=bb)
+                if bridged_flat_rows:
+                    worker_rows = self._todo_route_rows(route_kind, rows=bridged_rows, board=bb)
+        elif route_kind == "plan_sync":
+            worker_rows = self._todo_route_rows(route_kind, rows=existing, board=bb)
+        elif route_kind == "pure_sync":
+            worker_rows = self._todo_route_rows(
+                route_kind,
+                rows=existing,
+                role=self._todo_worker_role_hint(board=bb),
+                board=bb,
+            )
         # Smart trim: keep all active (in_progress/pending) system rows,
         # but only recent 3 completed system rows to save capacity for worker subtasks
         active_system = [r for r in system_rows if r.get("status") != "completed"]
@@ -26449,6 +28261,11 @@ body{padding:18px}
             todo_out = self.todo.update(merged)
         except Exception:
             return
+        if bridged_flat_rows:
+            self._emit(
+                "status",
+                {"summary": "flat todos attached to current plan step"},
+            )
         if todo_out != self.todo.no_changes_text() and reason:
             self._emit(
                 "status",
@@ -26781,7 +28598,7 @@ body{padding:18px}
         task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
         if task_type in TASK_PROFILE_TYPES:
             merged["task_type"] = task_type
-        complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
+        complexity = normalize_task_complexity(row.get("complexity", ""), default="")
         if complexity in TASK_COMPLEXITY_LEVELS:
             merged["complexity"] = complexity
         scale = trim(str(row.get("scale_preference", "") or "").strip().lower(), 20)
@@ -26826,7 +28643,7 @@ body{padding:18px}
     def _fallback_task_level_decision(self, goal_text: str) -> dict:
         profile = self._infer_task_profile(goal_text)
         task_type = str(profile.get("task_type", "general") or "general")
-        complexity = str(profile.get("complexity", "simple") or "simple")
+        complexity = normalize_task_complexity(profile.get("complexity", "simple"), default="simple")
         low = str(goal_text or "").lower()
         inherit_previous_state = False
         if bool(self.runtime_goal_reset_pending):
@@ -26945,9 +28762,9 @@ body{padding:18px}
         level = 3
         if task_type == "simple_qa":
             level = 1 if len(str(goal_text or "")) <= 180 else 2
-        elif complexity == "simple" and task_type in {"general"}:
+        elif task_complexity_rank(complexity) <= task_complexity_rank("simple") and task_type in {"general"}:
             level = 2
-        elif complexity == "simple":
+        elif task_complexity_rank(complexity) <= task_complexity_rank("moderate"):
             level = 3
         elif any(tok in low for tok in ("system-level", "系统级", "blackboard", "orchestrator", "内核", "基础设施")):
             level = 5
@@ -27009,7 +28826,9 @@ body{padding:18px}
             "SCALE PREFERENCE: Infer fast|balanced|thorough from user wording. "
             "User-stated preference overrides your default strategy. "
             "Budget controls internal depth/compactness, NOT early-stop messaging to user.\n\n"
-            "Output exactly one classify_task_level tool call with concise judgement, inherit_previous_state, "
+            "CRITICAL OUTPUT CONTRACT: You MUST output exactly one classify_task_level tool call and no plain-text answer. "
+            "A prose-only response is invalid and will be discarded.\n"
+            "The tool call must include concise judgement, inherit_previous_state, "
             "and semantic_confidence (high|medium|low). "
             "Use low confidence only when semantic ambiguity is substantial, then set low_confidence_reason briefly.\n"
             f"{model_language_instruction(self.ui_language)}"
@@ -27021,6 +28840,28 @@ body{padding:18px}
             )
         return base
+    def _extract_classify_task_level_row(self, response: dict | None) -> dict:
+        if not isinstance(response, dict):
+            return {}
+        tool_calls = response.get("tool_calls", []) if isinstance(response.get("tool_calls", []), list) else []
+        for tc in tool_calls:
+            fn = tc.get("function", {}) if isinstance(tc, dict) else {}
+            if str(fn.get("name", "") or "").strip() != "classify_task_level":
+                continue
+            args = fn.get("arguments", {}) if isinstance(fn, dict) else {}
+            if isinstance(args, dict):
+                return dict(args)
+            if isinstance(args, str):
+                parsed, _ = parse_tool_arguments_with_error(args)
+                if isinstance(parsed, dict):
+                    return dict(parsed)
+        content = str(response.get("content", "") or "").strip()
+        if content:
+            parsed, _ = parse_tool_arguments_with_error(content)
+            if isinstance(parsed, dict) and parsed.get("level") is not None:
+                return dict(parsed)
+        return {}
     def _skill_aware_reeval_task_level(
         self,
         goal_text: str,
@@ -27214,7 +29055,7 @@ body{padding:18px}
         if low_confidence_mode:
             rule_profile = self._infer_task_profile(goal_text)
             fallback_task_type = str(rule_profile.get("task_type", "general") or "general")
-            fallback_complexity = str(rule_profile.get("complexity", "simple") or "simple")
+            fallback_complexity = normalize_task_complexity(rule_profile.get("complexity", "simple"), default="simple")
             fallback_objective = trim(str(rule_profile.get("direct_objective", "") or ""), 800)
         else:
             board_now = self._ensure_blackboard()
@@ -27225,12 +29066,10 @@ body{padding:18px}
             )
             if fallback_task_type not in TASK_PROFILE_TYPES:
                 fallback_task_type = "general"
-            fallback_complexity = trim(
-                str(self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple"),
-                20,
+            fallback_complexity = normalize_task_complexity(
+                self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple",
+                default="simple",
             )
-            if fallback_complexity not in TASK_COMPLEXITY_LEVELS:
-                fallback_complexity = "simple"
             fallback_objective = trim(
                 str(self.runtime_direct_objective or board_profile.get("direct_objective", "") or "").strip(),
                 800,
@@ -27242,22 +29081,20 @@ body{padding:18px}
         task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
         if task_type not in TASK_PROFILE_TYPES:
             task_type = fallback_task_type
-        complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
-        if complexity not in TASK_COMPLEXITY_LEVELS:
-            complexity = fallback_complexity
+        complexity = normalize_task_complexity(row.get("complexity", ""), default=fallback_complexity)
         if explicit_complexity in TASK_COMPLEXITY_LEVELS:
-            complexity = explicit_complexity
+            complexity = normalize_task_complexity(explicit_complexity, default=fallback_complexity)
         elif preserve_existing_complexity and previous_complexity in TASK_COMPLEXITY_LEVELS:
-            complexity = previous_complexity
+            complexity = normalize_task_complexity(previous_complexity, default=fallback_complexity)
         low_confidence_reason = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
         judgement = trim(str(row.get("judgement", "") or "").strip(), 200) or "manager classified task level"
         objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
         if not objective:
             objective = fallback_objective
         _prev_level_val = int(getattr(self, '_prev_applied_task_level', 0) or 0)
-        _complexity_floor = str(getattr(self, 'runtime_complexity_floor', '') or '').strip()
-        if _complexity_floor == "complex" and complexity == "simple":
-            complexity = "complex"
+        _complexity_floor = normalize_task_complexity(getattr(self, 'runtime_complexity_floor', '') or '', default="")
+        if _complexity_floor in TASK_COMPLEXITY_LEVELS and task_complexity_rank(complexity) < task_complexity_rank(_complexity_floor):
+            complexity = _complexity_floor
         self.runtime_task_level = int(level)
         self._prev_applied_task_level = int(level)
         self.runtime_execution_mode = mode
@@ -27449,34 +29286,50 @@ body{padding:18px}
             retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
             media_inputs=media_inputs_round,
         )
-        tool_calls = response.get("tool_calls", []) if isinstance(response, dict) else []
-        for tc in tool_calls or []:
-            fn = tc.get("function", {}) if isinstance(tc, dict) else {}
-            if str(fn.get("name", "") or "").strip() != "classify_task_level":
-                continue
-            args = fn.get("arguments", {}) if isinstance(fn, dict) else {}
-            if isinstance(args, dict):
-                row = dict(args)
-                row["inherit_previous_state"] = _to_bool_like(
-                    row.get("inherit_previous_state", False),
-                    default=False,
-                )
-                row["semantic_confidence"] = self._normalize_semantic_confidence(
-                    row.get("semantic_confidence", "medium"),
-                    default="medium",
-                )
-                if str(row.get("semantic_confidence", "medium")) == "low":
-                    # Skill-aware re-evaluation before falling back to keyword heuristic
-                    reeval_row = self._skill_aware_reeval_task_level(goal_text, row, pinned_selection)
-                    fallback_row = self._fallback_task_level_decision(goal_text)
-                    merged = self._merge_task_decision_for_low_confidence(reeval_row, fallback_row)
-                    return merged
-                row["source"] = "manager"
-                return row
+        row = self._extract_classify_task_level_row(response)
+        if not row:
+            repair_prompt = (
+                "Previous answer was invalid because it did not produce a valid classify_task_level tool call. "
+                "Retry now. Output exactly one classify_task_level tool call and no prose."
+            )
+            repair_response = self._chat_with_same_model_retry(
+                [
+                    {"role": "user", "content": prompt, "ts": now_ts()},
+                    {"role": "user", "content": repair_prompt, "ts": now_ts()},
+                ],
+                tools=self._manager_task_classify_tools(),
+                system=self._manager_classification_system_prompt(),
+                max_tokens=220,
+                think=False,
+                stream_thinking=False,
+                on_thinking_chunk=self._append_live_thinking,
+                pinned_selection=pinned_selection,
+                context_label="manager classify repair",
+                retries=1,
+                media_inputs=media_inputs_round,
+            )
+            row = self._extract_classify_task_level_row(repair_response)
+        if row:
+            row["inherit_previous_state"] = _to_bool_like(
+                row.get("inherit_previous_state", False),
+                default=False,
+            )
+            row["semantic_confidence"] = self._normalize_semantic_confidence(
+                row.get("semantic_confidence", "medium"),
+                default="medium",
+            )
+            if str(row.get("semantic_confidence", "medium")) == "low":
+                # Skill-aware re-evaluation before falling back to keyword heuristic
+                reeval_row = self._skill_aware_reeval_task_level(goal_text, row, pinned_selection)
+                fallback_row = self._fallback_task_level_decision(goal_text)
+                merged = self._merge_task_decision_for_low_confidence(reeval_row, fallback_row)
+                return merged
+            row["source"] = "manager"
+            return row
         row = self._fallback_task_level_decision(goal_text)
         row["source"] = "fallback-no-toolcall"
         row["semantic_confidence"] = "low"
-        row["low_confidence_reason"] = "manager classifier returned no valid tool call"
+        row["low_confidence_reason"] = "manager classifier returned no valid classify_task_level tool call"
         return row
     # ------------------------------------------------------------------
@@ -27923,6 +29776,7 @@ body{padding:18px}
                 "IMPORTANT: Previous fix attempts FAILED. You MUST change your approach — "
                 "do NOT repeat the same instruction. Include the exact error output in your delegation. "
             )
+        html_hint = self._html_frontend_boost_instruction()
         # Loaded skills constraint for manager
         skills_constraint = self._loaded_skills_prompt_hint(for_role="manager")
         bb_skills = board.get("loaded_skills", {})
@@ -27957,6 +29811,7 @@ body{padding:18px}
             f"{todo_route_note}"
             f"{phase_hint}"
             f"{failure_hint}"
+            f"{html_hint}"
             f"{skills_constraint}"
             f"Level={level}, mode={mode}, progress={progress}, "
             f"budget={'unlimited' if int(budget) <= 0 else int(budget)}, "
@@ -28282,7 +30137,7 @@ body{padding:18px}
                         "reason": "conclusive-reply-detected",
                         "source": "fallback",
                     }
-        if complexity == "simple" and task_type == "simple_code":
+        if task_complexity_rank(complexity) <= task_complexity_rank("moderate") and task_type == "simple_code":
             if has_error_log:
                 return {
                     "target": "developer",
@@ -28420,6 +30275,10 @@ body{padding:18px}
         task_type_low = str(row.get("task_type", "") or "").strip().lower()
         # 5a: Merge in-memory routes with persisted routes for detection
         bb_for_routes = self._ensure_blackboard()
+        current_progress_fp = self._watchdog_state_fingerprint(bb_for_routes)
+        last_delegate = bb_for_routes.get("last_delegate", {}) if isinstance(bb_for_routes.get("last_delegate"), dict) else {}
+        last_progress_fp = trim(str(last_delegate.get("progress_fp", "") or "").strip(), 80)
+        no_progress_since_last_delegate = bool(last_progress_fp and last_progress_fp == current_progress_fp)
         persisted_routes = bb_for_routes.get("persisted_manager_routes", [])
         if not isinstance(persisted_routes, list):
             persisted_routes = []
@@ -28431,22 +30290,16 @@ body{padding:18px}
                 if (
                     isinstance(deleg, dict)
                     and str(deleg.get("target", "") or "").strip().lower() == target
+                    and (
+                        not str(deleg.get("progress_fp", "") or "").strip()
+                        or str(deleg.get("progress_fp", "") or "").strip() == current_progress_fp
+                    )
                     and int(deleg.get("count", 0) or 0) >= 3
                 ):
-                    alt_targets = [r for r in ("reviewer", "developer", "explorer") if r != target]
-                    if len(bb_for_routes.get("code_artifacts", {}) or {}) > 0:
-                        row["target"] = "finish"
-                        row["instruction"] = (
-                            f"Anti-stall: delegation to '{target}' repeated {deleg.get('count')} times with same instruction. "
-                            "Forcing finish to break loop."
-                        )
-                    else:
-                        row["target"] = alt_targets[0] if alt_targets else "developer"
-                        row["instruction"] = (
-                            f"Anti-stall: delegation to '{target}' repeated {deleg.get('count')} times. "
-                            f"Switching to {row['target']} with fresh approach."
-                        )
-                    row["reason"] = f"{row.get('reason', '')}|anti-stall-repeated-delegation"
+                    if not no_progress_since_last_delegate:
+                        continue
+                    row = self._manager_recovery_route_for_repeated_delegate(row, board=bb_for_routes)
+                    row["reason"] = trim(f"{row.get('reason', '')}|anti-stall-repeated-delegation", 600)
                     row["source"] = "anti-stall"
                     return row
         if task_type_low in ("simple_code", "engineering") and target == "explorer":
@@ -28469,7 +30322,7 @@ body{padding:18px}
         if target not in AGENT_ROLES:
             return row
         recent = [str(x.get("target", "") or "").strip().lower() for x in merged_routes[-4:]]
-        if len(recent) >= 3 and recent[-1] == target and recent[-2] == target and recent[-3] == target:
+        if no_progress_since_last_delegate and len(recent) >= 3 and recent[-1] == target and recent[-2] == target and recent[-3] == target:
             board = bb_for_routes
             low_reason = str(row.get("reason", "") or "").strip().lower()
             if "summary" in low_reason and len(board.get("code_artifacts", {}) or {}) > 0:
@@ -28514,7 +30367,7 @@ body{padding:18px}
                     row["reason"] = f"{row.get('reason', '')}|anti-stall->developer-suggest"
             row["source"] = "anti-stall"
             return row
-        if len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
+        if no_progress_since_last_delegate and len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
             board = bb_for_routes
             if len(board.get("code_artifacts", {}) or {}) > 0:
                 row["target"] = "finish"
@@ -28595,9 +30448,7 @@ body{padding:18px}
         task_type = trim(str(row.get("task_type", default_type) or "").strip().lower(), 40) or default_type
         if task_type not in TASK_PROFILE_TYPES:
             task_type = default_type
-        complexity = trim(str(row.get("complexity", default_complexity) or "").strip().lower(), 20) or default_complexity
-        if complexity not in TASK_COMPLEXITY_LEVELS:
-            complexity = default_complexity
+        complexity = normalize_task_complexity(row.get("complexity", default_complexity) or default_complexity, default=default_complexity)
         scale_preference = trim(
             str(row.get("scale_preference", profile.get("scale_preference", self.runtime_scale_preference)) or "").strip().lower(),
             20,
@@ -29018,6 +30869,19 @@ body{padding:18px}
                         seen.add(low_tail)
                         keep_lines.append(tail)
                 continue
+            if low.startswith("tasks to complete:"):
+                continue
+            if re.match(r"^\d+(?:\.\d+)*[.)]\s+", s):
+                continue
+            if re.match(r"^[-*]\s+", s):
+                continue
+            if re.match(
+                r"(?i)^(mkdir\s+-p|run:|create directories:|create project|create directory|initialize project|cmake\b|python\s+-m\s+venv\b|npx\b)",
+                s,
+            ):
+                continue
+            if re.match(r"^(创建|初始化|运行|目录结构|项目根目录结构)[:：]?", s):
+                continue
             norm = re.sub(r"\s+", " ", s).strip().lower()
             if norm and norm not in seen:
                 seen.add(norm)
@@ -29375,6 +31239,7 @@ body{padding:18px}
             "instruction": instruction,
             "reason": trim(str(route.get("reason", "") or "").strip(), 600),
             "source": trim(str(route.get("source", "") or "").strip(), 40),
+            "progress_fp": self._watchdog_state_fingerprint(board),
             "task_level": int(task_level),
             "execution_mode": execution_mode,
             "task_type": task_type,
@@ -29459,8 +31324,9 @@ body{padding:18px}
             profile["task_type"] = task_type
         if complexity in TASK_COMPLEXITY_LEVELS:
             # Floor protection: if plan mode set a floor, do not allow downgrade
-            if self.runtime_complexity_floor == "complex" and complexity == "simple":
-                complexity = "complex"
+            _route_complexity_floor = normalize_task_complexity(self.runtime_complexity_floor, default="")
+            if _route_complexity_floor in TASK_COMPLEXITY_LEVELS and task_complexity_rank(complexity) < task_complexity_rank(_route_complexity_floor):
+                complexity = _route_complexity_floor
             profile["complexity"] = complexity
         profile["scale_preference"] = scale_preference if scale_preference in TASK_SCALE_PREFERENCES else "balanced"
         if objective:
@@ -29830,8 +31696,25 @@ body{padding:18px}
                     )
                     self._emit("status", {"summary": f"reviewer finish blocked: {gate_reason}"})
             else:
+                bb_finish = self._ensure_blackboard()
+                profile_finish = self._ensure_blackboard_task_profile(bb_finish)
+                exec_mode = normalize_execution_mode(
+                    profile_finish.get("execution_mode", self._effective_execution_mode()),
+                    default=self._effective_execution_mode(),
+                )
                 approval_note = summary_arg or output or "finish tool acknowledged"
-                self._blackboard_mark_approved(approval_note, role_key)
+                if exec_mode == EXECUTION_MODE_SYNC:
+                    self._blackboard_append_section(
+                        "execution_logs",
+                        role_key,
+                        (
+                            "finish requested but deferred: sync mode requires reviewer pass before approval.\n"
+                            f"summary: {approval_note}"
+                        ),
+                    )
+                    self._emit("status", {"summary": "finish deferred: sync mode requires reviewer approval"})
+                else:
+                    self._blackboard_mark_approved(approval_note, role_key)
         if not ok and output:
             self._blackboard_append_section(
                 "execution_logs",
@@ -29869,6 +31752,7 @@ body{padding:18px}
         role_key = self._sanitize_agent_role(role)
         status = str((step or {}).get("status", "") or "")
         text = trim(str((step or {}).get("text", "") or "").strip(), BLACKBOARD_MAX_TEXT)
+        tool_results = (step or {}).get("tool_results", []) if isinstance((step or {}).get("tool_results"), list) else []
         if role_key and text:
             board = self._ensure_blackboard()
             board["last_worker_reply"] = {
@@ -29889,7 +31773,28 @@ body{padding:18px}
                 self._blackboard_set_status("REVIEWING")
                 if self._reviewer_deems_done(text):
                     self._blackboard_mark_approved(text, role_key)
-        for item in (step or {}).get("tool_results", []) or []:
+        explicit_todo_write = any(
+            isinstance(item, dict) and str(item.get("name", "") or "") in {"TodoWrite", "TodoWriteRescue"}
+            for item in tool_results
+        )
+        if role_key and not explicit_todo_write:
+            source_text = text or self._latest_agent_assistant_text(role_key)
+            if re.search(r"(?m)^\s*(?:[-*•>]+\s*)?\[(?: |>|x)\]\s+\S", source_text or ""):
+                board = self._ensure_blackboard()
+                step_id = trim(str((self._get_active_plan_step(board) or {}).get("id", "") or ""), 20)
+                parsed_rows = extract_todo_rows_from_text(
+                    source_text,
+                    default_parent_step_id=step_id,
+                    limit=12,
+                )
+                if parsed_rows:
+                    merged = self._merge_todo_signal_rows(parsed_rows, role=role_key, board=board)
+                    if merged != self.todo.no_changes_text():
+                        self._emit(
+                            "status",
+                            {"summary": f"todo synced from canonical {role_key} text"},
+                        )
+        for item in tool_results:
             if isinstance(item, dict) and bool(item.get("bb_applied", False)):
                 continue
             self._blackboard_update_from_tool_result(role_key, item)
@@ -30490,6 +32395,7 @@ body{padding:18px}
         skills_block = self._skills_awareness_block(for_role=role_key)
         code_note = self._runtime_code_reference_prompt_block(max_chars=2600)
         engineering_note = self._engineering_execution_boost_instruction()
+        html_note = self._html_frontend_boost_instruction()
         plan_todo_note = self._plan_todo_discipline_prompt(role=role_key)
         base = (
             f"You are {self._agent_display_name(role_key)} in a multi-agent coding system. "
@@ -30498,10 +32404,15 @@ body{padding:18px}
             f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
             f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS) or pptxgen.bundle.js (browser). "
             f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
+            "IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
+            "If an HTML deliverable needs any asset from js_lib, copy it into a task-local relative asset folder "
+            "such as './js/' or './assets/vendor/' next to the deliverable, then reference it with a plain relative path. "
+            "Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases in final exported HTML. "
             "Use blackboard for shared state, ask_colleague for inter-agent communication. "
             "Keep outputs concise and action-oriented. "
             f"{code_note + ' ' if code_note else ''}"
             f"{engineering_note + ' ' if engineering_note else ''}"
+            f"{html_note + ' ' if html_note else ''}"
             f"{_detect_os_shell_instruction()} "
             f"{model_language_instruction(self.ui_language)} "
         )
@@ -30557,13 +32468,13 @@ body{padding:18px}
             "The skill's workflow, tools, and file structure OVERRIDE the plan's implementation "
             "approach — if the plan says 'use python-pptx' but the skill says 'use PptxGenJS', "
             "use PptxGenJS. The skill defines HOW to implement; the plan defines WHAT to do. "
-            "AUTONOMOUS SKILL LOADING: When starting a coding, debugging, or architecture task, "
-            "call list_skills to discover available skills, then load_skill to activate the most relevant ones. "
-            "Load skills BEFORE you start working, not after you're stuck. "
-            "Already-loaded skills appear as <loaded-skill> messages in your context — use them directly without reloading. "
             "TODO TRACKING (mandatory): "
             "When a plan step is active, follow the current todo subtask order instead of inventing a parallel path. "
             "After completing ONE subtask, call TodoWrite immediately — mark that subtask as 'completed' and move the next one to 'in_progress' before doing more work. "
+            "Prefer TodoWrite items as objects with explicit fields: "
+            "{content, status, owner?, parent_step_id?}. "
+            "If you must use strings, use ONLY canonical prefixes: '[ ]', '[>]', '[x]'. "
+            "Do not use emoji markers or free-form localized status labels in TodoWrite payloads. "
             "Do not silently batch multiple subtasks and do not delay todo updates until the end of the step. "
             "This manual update is critical because skill re-evaluation is triggered by actual todo progress. "
             "EDIT METHODOLOGY (follow strictly): "
@@ -30650,29 +32561,45 @@ body{padding:18px}
             )
     def _todo_write_rescue(self, args: dict) -> str:
+        """Rescue todo writer — accepts both strings and dicts, auto-normalizes.
+        FIXED: Now preserves status from incoming items (especially 'completed')
+        instead of resetting everything to 'pending'."""
         raw_items = args.get("items", [])
         if not isinstance(raw_items, list) or not raw_items:
             raise ValueError("items must be a non-empty array")
-        limited = raw_items[:7]
+        limited = raw_items[:12]  # Allow more items (was 7) — plans can have 5+ subtasks
         active_step = self._get_active_plan_step()
         active_step_id = trim(str((active_step or {}).get("id", "") or ""), 20)
         owner_hint = self._current_plan_worker_owner()
         clean_items = []
+        _status_alias = {
+            "todo": "pending", "doing": "in_progress", "inprogress": "in_progress",
+            "in-progress": "in_progress", "done": "completed", "finish": "completed",
+            "finished": "completed",
+        }
         for idx, item in enumerate(limited):
             if isinstance(item, dict):
                 content = str(item.get("content", item.get("text", item.get("title", "")))).strip()
                 owner = str(item.get("owner", "") or owner_hint).strip().lower()
                 parent_step_id = trim(str(item.get("parent_step_id", "") or active_step_id), 20)
+                # Preserve status from incoming dict (critical for subtask state tracking)
+                raw_status = str(item.get("status", item.get("state", "pending"))).strip().lower()
+                status = _status_alias.get(raw_status, raw_status)
+                if status not in {"pending", "in_progress", "completed"}:
+                    status = "pending"
             else:
                 content = str(item).strip()
                 owner = owner_hint
                 parent_step_id = active_step_id
+                parsed_status, parsed_content = split_todo_status_text(content)
+                status = parsed_status or "pending"
+                content = parsed_content or content
             content = normalize_work_text(content) or content
             if not content:
                 continue
             row = {
                 "content": content,
-                "status": "pending",
+                "status": status,
             }
             if owner in {"developer", "explorer", "reviewer"}:
                 row["owner"] = owner
@@ -30681,17 +32608,52 @@ body{padding:18px}
             clean_items.append(row)
         if not clean_items:
             raise ValueError("no valid todo item text")
-        in_progress_index = int(args.get("in_progress_index", 0) or 0)
-        if in_progress_index < 0 or in_progress_index >= len(clean_items):
-            in_progress_index = 0
-        clean_items[in_progress_index]["status"] = "in_progress"
-        if active_step is not None:
+        # Only apply in_progress_index if NO items already have in_progress status
+        has_in_progress = any(r["status"] == "in_progress" for r in clean_items)
+        if not has_in_progress:
+            in_progress_index = int(args.get("in_progress_index", 0) or 0)
+            if in_progress_index < 0 or in_progress_index >= len(clean_items):
+                in_progress_index = 0
+            # Only set in_progress on a pending item
+            for i, r in enumerate(clean_items):
+                if r["status"] == "pending":
+                    if i >= in_progress_index:
+                        r["status"] = "in_progress"
+                        break
+        route_kind = self._todo_route_kind(role=owner_hint)
+        if route_kind in {"plan_single", "plan_sync"}:
             return self._merge_plan_worker_todo_items(clean_items, role=owner_hint)
-        if self._is_multi_agent_mode() and owner_hint in {"developer", "explorer", "reviewer"}:
+        if route_kind == "pure_sync":
             return self._merge_owner_scoped_todo_items(clean_items, role=owner_hint)
         return self.todo.update(clean_items)
-    def _analyze_todo_result(self, tool_name: str, output: str) -> tuple[str, str]:
+    def _todo_progress_signature(self, rows: list[dict] | None = None) -> list[tuple[str, str, str, str]]:
+        items = rows if isinstance(rows, list) else self.todo.snapshot()
+        sig: list[tuple[str, str, str, str]] = []
+        for row in items:
+            if not isinstance(row, dict):
+                continue
+            sig.append(
+                (
+                    normalize_work_text(str(row.get("content", "") or "")).strip().lower(),
+                    str(row.get("status", "pending") or "pending").strip().lower(),
+                    str(row.get("owner", "") or "").strip().lower(),
+                    str(row.get("parent_step_id", "") or "").strip(),
+                )
+            )
+        return sig
+    def _todo_progress_changed(self, before_rows: list[dict] | None, after_rows: list[dict] | None) -> bool:
+        return self._todo_progress_signature(before_rows) != self._todo_progress_signature(after_rows)
+    def _analyze_todo_result(
+        self,
+        tool_name: str,
+        output: str,
+        *,
+        before_rows: list[dict] | None = None,
+        after_rows: list[dict] | None = None,
+    ) -> tuple[str, str]:
         txt = str(output or "").strip()
         low = txt.lower()
         has_worker_rows = self._todo_runtime_has_worker_rows()
@@ -31155,12 +33117,15 @@ body{padding:18px}
             except Exception:
                 token_decoded = token
             token_decoded = token_decoded.strip()
-            if token_decoded and token_decoded not in out:
-                out.append(token_decoded)
+            for piece in split_structured_todo_content(token_decoded, limit=7):
+                piece_text = str(piece or "").strip()
+                if piece_text and piece_text not in out:
+                    out.append(piece_text)
         if out:
             return out[:7]
         # Fallback: parse non-empty lines / bullets
-        for line in text.splitlines():
+        normalized_text = normalize_embedded_newlines(text)
+        for line in normalized_text.splitlines():
             s = line.strip().strip(",")
             s = re.sub(r"^[\-\*\d\.\)\s]+", "", s).strip()
             if not s:
@@ -31337,11 +33302,21 @@ body{padding:18px}
     def _dispatch_tool_inner(self, name: str, args: dict, role_key: str = "") -> str:
         """Inner tool dispatcher — all tool logic lives here."""
+        # Fix 5d: Reset TodoWrite loop counter on non-TodoWrite tool calls
+        if name not in ("TodoWrite", "TodoWriteRescue") and hasattr(self, '_todowrite_step_counter'):
+            try:
+                _rst_step = self._get_active_plan_step()
+                if isinstance(_rst_step, dict):
+                    _rst_id = str(_rst_step.get("id", "") or "")
+                    if _rst_id:
+                        self._todowrite_step_counter.pop(_rst_id, None)
+            except Exception:
+                pass
         if name == "bash":
             guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), self.files_root)
             if guard_error:
                 return guard_error
-            meta = self._run_shell_meta(args["command"], self.files_root, 120)
+            meta = self._run_shell_meta(args["command"], self.files_root, self._shell_command_timeout())
             self._emit(
                 "command",
                 {
@@ -31508,20 +33483,16 @@ body{padding:18px}
                 )
             return out
         if name == "TodoWrite":
-            # Protect plan_step todos: worker TodoWrite creates sub-items, not replacements
             bb = self._ensure_blackboard()
-            has_plan_steps = any(
-                t.get("category") == "plan_step"
-                for t in bb.get("project_todos", [])
-            )
-            if has_plan_steps:
+            route_kind = self._todo_route_kind(role=str(role_key or ""), board=bb)
+            if route_kind in {"plan_single", "plan_sync"}:
                 items = args.get("items", [])
                 if isinstance(items, list):
                     for item in items:
                         if isinstance(item, dict) and not item.get("key", "").startswith("bb:"):
                             item["owner"] = str(role_key or "developer")
                 result = self._merge_plan_worker_todo_items(items, role=str(role_key or "developer"))
-            elif self._is_multi_agent_mode() and role_key in {"developer", "explorer", "reviewer"}:
+            elif route_kind == "pure_sync":
                 items = args.get("items", [])
                 if isinstance(items, list):
                     for item in items:
@@ -31891,7 +33862,7 @@ body{padding:18px}
             guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), wt_path)
             if guard_error:
                 return guard_error
-            meta = self._run_shell_meta(args["command"], wt_path, 300)
+            meta = self._run_shell_meta(args["command"], wt_path, self._shell_command_timeout())
             self._emit(
                 "command",
                 {
@@ -32348,6 +34319,18 @@ body{padding:18px}
                 },
             )
             self._persist()
+        _proc = getattr(self, "_running_bash_proc", None)
+        if _proc is not None:
+            try:
+                if os.name == "posix":
+                    try:
+                        os.killpg(os.getpgid(_proc.pid), signal.SIGKILL)
+                    except Exception:
+                        _proc.kill()
+                else:
+                    _proc.kill()
+            except Exception:
+                pass
     def _reviewer_approval_log_gate(self, board: dict | None = None) -> tuple[bool, str]:
         bb = board if isinstance(board, dict) else self._ensure_blackboard()
@@ -32752,8 +34735,8 @@ body{padding:18px}
             isinstance(t, dict) and t.get("category") == "plan_step"
             for t in board.get("project_todos", [])
         )
-        _sync_complexity = str(profile.get("complexity", "simple") or "simple")
-        if not _sync_has_plan and _sync_complexity in ("moderate", "complex", "expert"):
+        _sync_complexity = normalize_task_complexity(profile.get("complexity", "simple"), default="simple")
+        if not _sync_has_plan and task_complexity_at_least(_sync_complexity, "moderate"):
             self.messages.append({
                 "role": "system",
                 "content": (
@@ -32860,29 +34843,59 @@ body{padding:18px}
                 self._mark_all_done_silently(note)
                 self._emit("status", {"summary": "manager decided finish; run paused"})
                 break
-            # Detect manager stuck: same instruction repeated N times → force advance + break
+            # Detect manager loop: same instruction repeated with unchanged progress.
             import hashlib as _hl_mgr
-            _cur_hash = _hl_mgr.sha1((target + "|" + instruction).encode("utf-8")).hexdigest()[:12]
+            _delegate_progress_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
+            _cur_hash = _hl_mgr.sha1((target + "|" + instruction + "|" + _delegate_progress_fp).encode("utf-8")).hexdigest()[:12]
             if _cur_hash == _prev_delegation_hash:
                 _repeat_delegation_count += 1
             else:
                 _repeat_delegation_count = 0
             _prev_delegation_hash = _cur_hash
-            if _repeat_delegation_count >= 15:
-                self._emit("status", {"summary": f"manager stuck: repeated identical delegation x{_repeat_delegation_count + 1}; forcing advance"})
+            if _repeat_delegation_count >= 3:
                 _bb_stuck = self._ensure_blackboard()
                 _stuck_step = next(
                     (t for t in _bb_stuck.get("project_todos", [])
                      if t.get("category") == "plan_step" and t.get("status") == "in_progress"),
                     None,
                 )
-                if _stuck_step:
-                    self._advance_plan_step(evidence="manager stuck: repeated delegation", actor="manager")
-                else:
-                    self._blackboard_mark_approved("manager stuck loop break", "manager")
-                    self._mark_all_done_silently("manager stuck: repeated delegation break")
-                    break
+                _step_note = trim(str((_stuck_step or {}).get("content", "") or ""), 200)
+                route = self._manager_recovery_route_for_repeated_delegate(route, board=_bb_stuck)
+                target = str(route.get("target", "") or "").strip().lower()
+                instruction = trim(str(route.get("instruction", "") or "").strip(), 1400)
                 _repeat_delegation_count = 0
+                _prev_delegation_hash = ""
+                self._emit(
+                    "status",
+                    {
+                        "summary": (
+                            f"manager loop recovery: repeated identical delegation under unchanged progress; "
+                            f"rerouting to {target}"
+                        )
+                    },
+                )
+                self._append_manager_context(
+                    {
+                        "role": "system",
+                        "content": (
+                            "[manager-loop-guard] Repeated identical delegation detected under unchanged progress. "
+                            "Do NOT mark the active step completed just because the owner was delegated repeatedly. "
+                            "Use a recovery route based on current step evidence and worker todo state."
+                            + (f" Active step: {_step_note}." if _step_note else "")
+                            + (f" Recovery target: {target}." if target else "")
+                        ),
+                        "ts": now_ts(),
+                    }
+                )
+                self._blackboard_append_section(
+                    "execution_logs",
+                    "manager",
+                    (
+                        "manager repeated identical delegation; applied recovery reroute"
+                        + (f"\nactive_step: {_step_note}" if _step_note else "")
+                        + (f"\nrecovery_target: {target}" if target else "")
+                    ),
+                )
             role = self._sanitize_agent_role(target) or "developer"
             self._inject_manager_instruction(
                 role,
@@ -32902,13 +34915,6 @@ body{padding:18px}
                 media_inputs_pool=media_inputs_pool,
                 media_seen_ts_by_role=media_seen_ts_by_role,
             )
-            # Sync-mode skill auto-discovery: same mechanism as plan mode's step-completed trigger.
-            # Runs on early rounds for developer/explorer. Uses goal_sig dedup — no re-loading if already loaded.
-            if role in ("developer", "explorer") and rounds_used <= 2:
-                try:
-                    self._refresh_loaded_skills_for_execution_focus(trigger=f"sync-worker-pre:{role}")
-                except Exception:
-                    pass
             board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
             step = self._multi_agent_turn(
                 role,
@@ -32918,49 +34924,24 @@ body{padding:18px}
             self._blackboard_update_from_worker_step(role, step)
             # Post-execution plan step advancement (replaces pre-execution advancement)
             self._post_execution_plan_step_check(route, step if isinstance(step, dict) else {})
-            # Sync-mode failure recovery: detect all-tools-failed and inject recovery hint + auto-load debugging skill
-            _step_dict = step if isinstance(step, dict) else {}
-            _step_results = _step_dict.get("tool_results", []) or []
-            if _step_results:
-                _sync_err_count = sum(1 for r in _step_results if isinstance(r, dict) and not r.get("ok", False))
-                _sync_ok_count = sum(1 for r in _step_results if isinstance(r, dict) and r.get("ok", False))
-                if _sync_err_count > 0 and _sync_ok_count == 0:
-                    # All tool calls failed in this worker turn — inject recovery guidance
-                    _failed_tools = [str(r.get("name", "")) for r in _step_results if isinstance(r, dict)][:4]
-                    _err_outputs = " | ".join(
-                        trim(str(r.get("output", "") or ""), 120)
-                        for r in _step_results if isinstance(r, dict) and not r.get("ok", False)
-                    )[:400]
-                    self._append_agent_context_message(
-                        role,
+            progress_capsule = self._manager_worker_progress_capsule(
+                role,
+                step if isinstance(step, dict) else {},
+                self._ensure_blackboard(),
+            )
+            if progress_capsule:
+                recent_mgr = self.manager_context[-4:] if isinstance(self.manager_context, list) else []
+                if not any(
+                    isinstance(msg, dict) and str(msg.get("content", "") or "").strip() == progress_capsule
+                    for msg in recent_mgr
+                ):
+                    self._append_manager_context(
                         {
-                            "role": "user",
-                            "content": (
-                                "<failure-recovery>"
-                                f"All tool calls failed in this turn ({', '.join(_failed_tools)}). "
-                                f"Errors: {_err_outputs}\n"
-                                "Before retrying, STOP and diagnose:\n"
-                                "1) If a debugging skill is available, call load_skill('systematic-debugging') and follow its workflow.\n"
-                                "2) Read the EXACT error message — identify the root cause, not just the symptom.\n"
-                                "3) Form ONE hypothesis about the cause before making any changes.\n"
-                                "4) Apply ONE targeted fix, then verify with a test/build command.\n"
-                                "5) If still blocked after 2 attempts, report the exact blocker to the user."
-                                "</failure-recovery>"
-                            ),
+                            "role": "system",
+                            "content": progress_capsule,
                             "ts": now_ts(),
-                            "agent_role": role,
-                        },
-                        mirror_to_global=False,
+                        }
                     )
-                    # Auto-load systematic-debugging if failure involves code errors
-                    _code_err_kw = ("bash", "compile", "syntax", "test", "build", "traceback", "error:")
-                    if any(kw in _err_outputs.lower() for kw in _code_err_kw):
-                        _bb_sk = self._ensure_blackboard().get("loaded_skills", {})
-                        if isinstance(_bb_sk, dict) and "systematic-debugging" not in _bb_sk:
-                            try:
-                                self._load_skill_with_cache("systematic-debugging", load_source="auto:sync-worker-failure")
-                            except Exception:
-                                pass
             # Fix 6b: Pure sync no-plan — read worker-done signal and notify manager
             _bb_sync = self._ensure_blackboard()
             if _bb_sync.pop("sync_worker_round_done", False):
@@ -33309,17 +35290,19 @@ body{padding:18px}
         bb["plan"]["phase"] = "synthesis"
         self.blackboard = bb
-        # Synthesis with retry (up to 2 attempts) + minimal fallback
+        # Synthesis with retry + model fallback + deterministic fallback
         proposal = None
-        for _synth_attempt in range(2):
+        for _synth_attempt in range(PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS):
             proposal = self._plan_mode_synthesize_proposal(pinned_selection)
             if proposal and proposal.get("options"):
                 break
-            if _synth_attempt == 0:
+            if _synth_attempt < (PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS - 1):
                 self._emit("status", {"summary": "plan-mode: synthesis retry"})
         if not proposal or not proposal.get("options"):
             # Last resort: minimal fallback with simpler prompt and higher token budget
             proposal = self._synthesis_minimal_fallback(pinned_selection)
+        if not proposal or not proposal.get("options"):
+            proposal = self._synthesis_programmatic_fallback()
         if not proposal or not proposal.get("options"):
             self._emit("status", {"summary": "plan-mode: synthesis failed, falling back to direct execution"})
             self.runtime_plan_mode_needed = False
@@ -33725,21 +35708,29 @@ body{padding:18px}
             f"- Option A: Direct workaround — bypass the blocker with an alternative method\n"
             f"- Option B: Different path — re-approach the goal from a completely different angle\n"
             f"- Option C: Minimal viable + user action items — do what's possible now, list what the user needs to do manually\n\n"
-            f"Call the submit_plan_proposal tool with:\n"
+            f"You MUST call the submit_plan_proposal tool exactly once with:\n"
             f"- context: brief failure analysis (what was tried, what failed, why)\n"
             f"- options: array of 3 options, each with id (A/B/C), title, summary, steps, pros, cons, risk\n"
-            f"- recommended: id of the recommended option\n\n"
+            f"- recommended: id of the recommended option\n"
+            f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n\n"
             f"{model_language_instruction(self.ui_language)}"
         )
         synthesis_ctx = [
-            {"role": "system", "content": "You are a recovery planner analyzing execution failures and proposing alternative approaches.", "ts": now_ts()},
+            {
+                "role": "system",
+                "content": (
+                    "You are a recovery planner analyzing execution failures and proposing alternative approaches. "
+                    "You MUST call submit_plan_proposal exactly once."
+                ),
+                "ts": now_ts(),
+            },
             {"role": "user", "content": synthesis_prompt, "ts": now_ts()},
         ]
         try:
             response = self._chat_with_same_model_retry(
                 synthesis_ctx,
                 tools=self._plan_mode_synthesis_tools(),
-                system="Generate a structured stall recovery plan. Use the submit_plan_proposal tool.",
+                system="Generate a structured stall recovery plan. You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
                 max_tokens=STALL_PLAN_SYNTHESIS_MAX_TOKENS,
                 think=False,
                 stream_thinking=False,
@@ -33748,12 +35739,33 @@ body{padding:18px}
                 context_label="stall-plan synthesis",
                 retries=MODEL_OUTPUT_RETRY_TIMES,
             )
-            tool_calls = response.get("tool_calls", [])
-            for tc in tool_calls:
-                if tc.get("function", {}).get("name") == "submit_plan_proposal":
-                    args = tc["function"].get("arguments", {})
-                    if isinstance(args, dict) and args.get("options"):
-                        return dict(args)
+            proposal = self._extract_plan_proposal_from_response(response)
+            if proposal.get("options"):
+                return proposal
+            repair_response = self._chat_with_same_model_retry(
+                synthesis_ctx + [
+                    {
+                        "role": "user",
+                        "content": (
+                            "Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
+                            "Retry now. Output exactly one submit_plan_proposal tool call and no prose."
+                        ),
+                        "ts": now_ts(),
+                    }
+                ],
+                tools=self._plan_mode_synthesis_tools(),
+                system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
+                max_tokens=STALL_PLAN_SYNTHESIS_MAX_TOKENS,
+                think=False,
+                stream_thinking=False,
+                on_thinking_chunk=self._append_live_thinking,
+                pinned_selection=pinned_selection,
+                context_label="stall-plan synthesis repair",
+                retries=1,
+            )
+            proposal = self._extract_plan_proposal_from_response(repair_response)
+            if proposal.get("options"):
+                return proposal
         except Exception as exc:
             self._emit("status", {"summary": f"stall plan synthesis error: {exc}"})
         return {}
@@ -33824,6 +35836,186 @@ body{padding:18px}
                 lines.append(f"- {trim(str(t), 100)}")
         return "\n".join(lines)
+    def _normalize_plan_proposal_option(self, raw: dict, *, fallback_id: str) -> dict | None:
+        if not isinstance(raw, dict):
+            return None
+        opt_id = trim(str(raw.get("id", "") or fallback_id).strip().upper(), 8) or fallback_id
+        title = trim(str(raw.get("title", "") or "").strip(), 200)
+        summary = trim(str(raw.get("summary", "") or "").strip(), 600)
+        steps_raw = raw.get("steps", [])
+        steps: list[str] = []
+        if isinstance(steps_raw, list):
+            for item in steps_raw:
+                text = normalize_embedded_newlines(str(item or "")).strip()
+                if text:
+                    steps.append(trim(text, PLAN_STEP_FULL_CONTENT_MAX_CHARS))
+        elif isinstance(steps_raw, str):
+            text = normalize_embedded_newlines(steps_raw).strip()
+            if text:
+                steps.append(trim(text, PLAN_STEP_FULL_CONTENT_MAX_CHARS))
+        pros = trim(str(raw.get("pros", "") or "").strip(), 400)
+        cons = trim(str(raw.get("cons", "") or "").strip(), 400)
+        risk = trim(str(raw.get("risk", "") or "").strip().lower(), 20)
+        if risk not in {"low", "medium", "high"}:
+            risk = "medium"
+        if not title and summary:
+            title = trim(summary.split("\n", 1)[0], 120)
+        if not title and steps:
+            title = trim(steps[0].split("\n", 1)[0], 120)
+        if not summary and steps:
+            summary = trim(steps[0], 300)
+        if not steps:
+            return None
+        return {
+            "id": opt_id,
+            "title": title or f"Option {opt_id}",
+            "summary": summary or title or f"Plan {opt_id}",
+            "steps": steps,
+            "pros": pros,
+            "cons": cons,
+            "risk": risk,
+        }
+    def _normalize_plan_proposal_payload(self, raw: object) -> dict:
+        src = raw if isinstance(raw, dict) else {}
+        context = trim(str(src.get("context", "") or "").strip(), 2000)
+        raw_options = src.get("options", [])
+        if isinstance(raw_options, dict):
+            raw_options = [raw_options]
+        if not isinstance(raw_options, list):
+            raw_options = []
+        option_ids = ("A", "B", "C")
+        options: list[dict] = []
+        seen_ids: set[str] = set()
+        for idx, item in enumerate(raw_options[: max(1, PLAN_MODE_MAX_OPTIONS * 2)]):
+            normalized = self._normalize_plan_proposal_option(
+                item,
+                fallback_id=option_ids[min(idx, len(option_ids) - 1)],
+            )
+            if not normalized:
+                continue
+            opt_id = str(normalized.get("id", "") or "").strip().upper() or option_ids[min(idx, len(option_ids) - 1)]
+            if opt_id in seen_ids:
+                opt_id = option_ids[min(len(seen_ids), len(option_ids) - 1)]
+                normalized["id"] = opt_id
+            if opt_id in seen_ids:
+                continue
+            seen_ids.add(opt_id)
+            options.append(normalized)
+            if len(options) >= PLAN_MODE_MAX_OPTIONS:
+                break
+        recommended = trim(str(src.get("recommended", "") or "").strip().upper(), 8)
+        valid_ids = {str(opt.get("id", "") or "").strip().upper() for opt in options}
+        if recommended not in valid_ids:
+            recommended = str(options[0].get("id", "A") or "A") if options else ""
+        return {
+            "context": context,
+            "options": options,
+            "recommended": recommended,
+        }
+    def _parse_plan_proposal_from_text(self, text: str) -> dict:
+        raw = str(text or "").strip()
+        if not raw:
+            return {}
+        candidates: list[str] = [raw]
+        fence_matches = re.findall(r"```(?:json)?\s*([\s\S]*?)```", raw, flags=re.IGNORECASE)
+        for block in fence_matches:
+            block_text = str(block or "").strip()
+            if block_text:
+                candidates.append(block_text)
+        start = raw.find("{")
+        end = raw.rfind("}")
+        if start >= 0 and end > start:
+            candidates.append(raw[start : end + 1].strip())
+        for candidate in candidates:
+            repaired = repair_truncated_json_object(candidate)
+            for probe in [candidate, repaired]:
+                if not probe:
+                    continue
+                try:
+                    parsed = json.loads(probe)
+                except Exception:
+                    continue
+                if isinstance(parsed, list):
+                    parsed = {"context": "", "options": parsed, "recommended": ""}
+                proposal = self._normalize_plan_proposal_payload(parsed)
+                if proposal.get("options"):
+                    return proposal
+        return {}
+    def _extract_plan_proposal_from_response(self, response: dict | None) -> dict:
+        if not isinstance(response, dict):
+            return {}
+        tool_calls = response.get("tool_calls", [])
+        if isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function", {}) if isinstance(tc.get("function"), dict) else {}
+                if str(fn.get("name", "") or "").strip() != "submit_plan_proposal":
+                    continue
+                args = fn.get("arguments", {})
+                if isinstance(args, dict):
+                    proposal = self._normalize_plan_proposal_payload(args)
+                    if proposal.get("options"):
+                        return proposal
+                elif isinstance(args, str):
+                    parsed, _ = parse_tool_arguments_with_error(args)
+                    proposal = self._normalize_plan_proposal_payload(parsed)
+                    if proposal.get("options"):
+                        return proposal
+        return self._parse_plan_proposal_from_text(str(response.get("content", "") or ""))
+    def _synthesis_programmatic_fallback(self) -> dict:
+        bb = self._ensure_blackboard()
+        goal = trim(str(self.runtime_reclassify_goal or self._latest_user_goal_text() or ""), 1200)
+        findings = bb.get("plan", {}).get("findings", []) if isinstance(bb.get("plan"), dict) else []
+        finding_lines: list[str] = []
+        for row in findings[:6]:
+            if not isinstance(row, dict):
+                continue
+            content = trim(str(row.get("content", "") or "").strip(), 280)
+            if content:
+                finding_lines.append(content)
+        context = trim(
+            (
+                "Fallback synthesis generated automatically from the user goal and current research findings. "
+                + (" | ".join(finding_lines[:3]) if finding_lines else goal)
+            ),
+            1800,
+        )
+        detailed_steps = [
+            "1. Scope and constraints\nClarify the exact deliverable, inputs, and acceptance criteria for this task.",
+            "2. Core implementation\nBuild the main artifact for the request using the most direct workable path.",
+            "3. Verification\nRun at least one observable validation and capture the result.",
+            "4. Delivery report\nSummarize what was built, how to run it, and the key outputs.",
+        ]
+        if finding_lines:
+            detailed_steps = [
+                "1. Review findings and lock scope\nUse the collected findings to define the exact execution boundary and required inputs.",
+                "2. Prepare files and dependencies\nCreate or align the necessary files, folders, and runtime prerequisites for the task.",
+                "3. Implement the main work\nExecute the core build/change/generation work for the requested output.",
+                "4. Validate with observable evidence\nRun a concrete check and confirm the expected output, exit code, or rendered result.",
+                "5. Generate delivery report\nSummarize what was built, how to run it, and the key outputs.",
+            ]
+        proposal = {
+            "context": context,
+            "options": [
+                {
+                    "id": "A",
+                    "title": "Direct Execution Plan",
+                    "summary": trim(goal or "Execute the requested task with a direct, verifiable plan.", 240),
+                    "steps": detailed_steps,
+                    "pros": "Deterministic fallback that keeps plan-mode available even when model synthesis formatting is unstable.",
+                    "cons": "Less tailored than a fully synthesized multi-option proposal.",
+                    "risk": "medium",
+                }
+            ],
+            "recommended": "A",
+        }
+        return self._normalize_plan_proposal_payload(proposal)
     def _plan_mode_synthesize_proposal(self, pinned_selection: str) -> dict:
         bb = self._ensure_blackboard()
         plan_data = bb.get("plan", {})
@@ -33859,10 +36051,11 @@ body{padding:18px}
             f"## Research Findings\n{trim(findings_text, 6000)}\n\n"
             f"{skills_section}"
             f"## Instructions\n"
-            f"Call the submit_plan_proposal tool with:\n"
+            f"You MUST call the submit_plan_proposal tool exactly once with:\n"
             f"- context: brief background analysis\n"
             f"- options: array of 1-{PLAN_MODE_MAX_OPTIONS} options, each with id (A/B/C), title, summary, steps, pros, cons, risk\n"
-            f"- recommended: id of the recommended option\n\n"
+            f"- recommended: id of the recommended option\n"
+            f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n\n"
             f"STEP QUALITY REQUIREMENTS:\n"
             f"- Each step must be a concrete, actionable instruction (NOT vague like 'analyze reports')\n"
             f"- Include specific file paths (e.g., 'Read uploaded/IEDM_.parsed.md to extract key findings')\n"
@@ -33934,7 +36127,11 @@ body{padding:18px}
         response = self._chat_with_same_model_retry(
             synthesis_ctx,
             tools=self._plan_mode_synthesis_tools(),
-            system="Generate a structured plan proposal. Use the submit_plan_proposal tool.",
+            system=(
+                "Generate a structured plan proposal. "
+                "You MUST call submit_plan_proposal exactly once. "
+                "Do not answer with plain text."
+            ),
             max_tokens=PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS,
             think=False,
             stream_thinking=False,
@@ -33943,13 +36140,31 @@ body{padding:18px}
             context_label="plan-mode synthesis",
             retries=MODEL_OUTPUT_RETRY_TIMES,
         )
-        tool_calls = response.get("tool_calls", [])
-        for tc in tool_calls:
-            if tc.get("function", {}).get("name") == "submit_plan_proposal":
-                args = tc["function"].get("arguments", {})
-                if isinstance(args, dict) and args.get("options"):
-                    return dict(args)
-        return {}
+        proposal = self._extract_plan_proposal_from_response(response)
+        if proposal.get("options"):
+            return proposal
+        repair_response = self._chat_with_same_model_retry(
+            synthesis_ctx + [
+                {
+                    "role": "user",
+                    "content": (
+                        "Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
+                        "Retry now. Output exactly one submit_plan_proposal tool call and no prose."
+                    ),
+                    "ts": now_ts(),
+                }
+            ],
+            tools=self._plan_mode_synthesis_tools(),
+            system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
+            max_tokens=PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS,
+            think=False,
+            stream_thinking=False,
+            on_thinking_chunk=self._append_live_thinking,
+            pinned_selection=pinned_selection,
+            context_label="plan-mode synthesis repair",
+            retries=1,
+        )
+        return self._extract_plan_proposal_from_response(repair_response)
     def _synthesis_minimal_fallback(self, pinned_selection: str) -> dict:
         """Last-resort: ask model for a single simple plan with higher max_tokens."""
@@ -33961,33 +36176,63 @@ body{padding:18px}
             for f in (findings[:5] if isinstance(findings, list) else [])
         )
         prompt = (
-            f"Generate ONE simple plan for this task. Call submit_plan_proposal with exactly 1 option.\n\n"
+            f"Generate ONE simple plan for this task. You MUST call submit_plan_proposal with exactly 1 option.\n\n"
             f"Task: {goal}\n\nFindings: {trim(findings_text, 3000)}\n\n"
             f"Return a single option with id='A', title, summary, and 5-10 concrete steps.\n"
+            f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n"
             f"{model_language_instruction(self.ui_language)}"
         )
         ctx = [
-            {"role": "system", "content": "You must call submit_plan_proposal tool.", "ts": now_ts()},
+            {
+                "role": "system",
+                "content": (
+                    "You MUST call submit_plan_proposal exactly once. "
+                    "Do not answer with plain text."
+                ),
+                "ts": now_ts(),
+            },
             {"role": "user", "content": prompt, "ts": now_ts()},
         ]
         try:
             response = self._chat_with_same_model_retry(
                 ctx,
                 tools=self._plan_mode_synthesis_tools(),
-                system="Call submit_plan_proposal now.",
+                system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
                 max_tokens=6000,
                 think=False,
                 stream_thinking=False,
                 on_thinking_chunk=self._append_live_thinking,
                 pinned_selection=pinned_selection,
                 context_label="plan-mode minimal fallback",
-                retries=2,
+                retries=3,
+            )
+            proposal = self._extract_plan_proposal_from_response(response)
+            if proposal.get("options"):
+                return proposal
+            repair_response = self._chat_with_same_model_retry(
+                ctx + [
+                    {
+                        "role": "user",
+                        "content": (
+                            "Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
+                            "Retry now. Output exactly one submit_plan_proposal tool call and no prose."
+                        ),
+                        "ts": now_ts(),
+                    }
+                ],
+                tools=self._plan_mode_synthesis_tools(),
+                system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
+                max_tokens=6000,
+                think=False,
+                stream_thinking=False,
+                on_thinking_chunk=self._append_live_thinking,
+                pinned_selection=pinned_selection,
+                context_label="plan-mode minimal fallback repair",
+                retries=1,
             )
-            for tc in response.get("tool_calls", []):
-                if tc.get("function", {}).get("name") == "submit_plan_proposal":
-                    args = tc["function"].get("arguments", {})
-                    if isinstance(args, dict) and args.get("options"):
-                        return dict(args)
+            proposal = self._extract_plan_proposal_from_response(repair_response)
+            if proposal.get("options"):
+                return proposal
         except Exception:
             pass
         return {}
@@ -34071,7 +36316,7 @@ body{padding:18px}
         grouped_steps = self._group_plan_steps(raw_steps if isinstance(raw_steps, list) else [])
         plan_todos: list[dict] = []
         for i, step in enumerate(grouped_steps[:max(1, int(limit))]):
-            step_text = trim(str(step or "").strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
+            step_text = trim(normalize_embedded_newlines(step).strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
             if not step_text:
                 continue
             step_lines = step_text.split("\n")
@@ -34085,6 +36330,7 @@ body{padding:18px}
                     "category": "plan_step",
                     "plan_step_index": i,
                     "created_at": float(now_ts()),
+                    "activated_at": float(now_ts()) if not plan_todos else None,
                     "completed_at": None,
                     "completed_by": "",
                     "evidence": "",
@@ -34197,7 +36443,7 @@ body{padding:18px}
         _mid_re_exec = _re_exec.compile(r"(?<=\S)\s+(\d+\.\d+\s)")
         for t in plan_todos:
             idx = int(t.get("plan_step_index", 0) or 0) + 1
-            full = str(t.get("full_content", "") or t.get("content", "")).strip()
+            full = normalize_embedded_newlines(t.get("full_content", "") or t.get("content", "")).strip()
             # Normalize: split concatenated N.N sub-steps onto own lines
             full = _mid_re_exec.sub(r"\n\1", full)
             header = full.split("\n")[0] if "\n" in full else full
@@ -34359,7 +36605,7 @@ body{padding:18px}
         # Phase 0: Normalize — split mid-string N.N onto own lines
         normalized: list[str] = []
         for s in raw_steps:
-            text = str(s or "").strip()
+            text = normalize_embedded_newlines(s).strip()
             if not text:
                 continue
             fixed = mid_numbered_re.sub(r"\n\1", text)
@@ -34694,18 +36940,16 @@ body{padding:18px}
         chosen_title = trim(str(chosen.get("title", "") or choice_id).strip(), 800)
         chosen_summary = trim(str(chosen.get("summary", "") or "").strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
         # Preserve current complexity unless the user explicitly changes it elsewhere.
-        _current_complexity = trim(
-            str(
-                self.runtime_task_complexity
-                or profile.get("complexity", judgement.get("complexity", ""))
-                or ""
-            ).strip().lower(),
-            20,
+        _current_complexity = normalize_task_complexity(
+            self.runtime_task_complexity
+            or profile.get("complexity", judgement.get("complexity", ""))
+            or "",
+            default="",
         )
         if _current_complexity in TASK_COMPLEXITY_LEVELS:
             self.runtime_task_complexity = _current_complexity
         else:
-            _current_complexity = trim(str(self.runtime_task_complexity or "").strip().lower(), 20)
+            _current_complexity = normalize_task_complexity(str(self.runtime_task_complexity or "").strip().lower(), default="")
         self.runtime_complexity_floor = str(_current_complexity or "complex")
         _plan_risk = self._resolve_plan_option_risk(chosen)
         try:
@@ -35007,13 +37251,6 @@ body{padding:18px}
                     self.agent_round_index = int(self.agent_round_index) + 1
                     self.current_phase = "model-call"
                     self.current_tool_name = ""
-                # Single-mode skill auto-discovery: same as plan mode. Runs on first 2 rounds only.
-                # Uses goal_sig dedup — if skills already loaded for this goal, no-op.
-                if int(self.agent_round_index) <= 2:
-                    try:
-                        self._refresh_loaded_skills_for_execution_focus(trigger="single-worker-pre")
-                    except Exception:
-                        pass
                 if level_budget > 0 and int(self.agent_round_index) > int(level_budget):
                     force_single_tool_rounds = max(force_single_tool_rounds, 2)
                     if not compact_budget_notified:
@@ -35264,7 +37501,7 @@ body{padding:18px}
                         )
                         continue
                     stop_note = (
-                        "模型连续多轮仅输出思考而无动作，自动执行已熔断停止（fault_counter>=3）。"
+                        "模型连续多轮仅输出思考而无动作，自动执行已熔断停止（fault_counter>=15）。"
                         "请尝试拆分任务，或切换更强的推理模型后继续。"
                     )
                     raise CircuitBreakerTriggered(stop_note)
@@ -35611,6 +37848,7 @@ body{padding:18px}
                         self.current_phase = f"tool:{name}"
                         self.current_tool_name = name
                     round_tool_names.append(name)
+                    todo_rows_before = self.todo.snapshot() if name in {"TodoWrite", "TodoWriteRescue"} else None
                     args = tc["function"]["arguments"]
                     args_error = str(tc.get("args_error", "") or "").strip()
                     raw_args = tc.get("raw_arguments")
@@ -35775,15 +38013,41 @@ body{padding:18px}
                         recovery_retry_rounds = 0
                     if dispatched_name in {"TodoWrite", "TodoWriteRescue"}:
                         todo_attempted = True
-                        state, reason = self._analyze_todo_result(dispatched_name, output)
+                        todo_rows_after = self.todo.snapshot()
+                        state, reason = self._analyze_todo_result(
+                            dispatched_name,
+                            output,
+                            before_rows=todo_rows_before,
+                            after_rows=todo_rows_after,
+                        )
                         if state == "ok":
                             used_todo = True
                             self.todo_write_issue_count = 0
                             self.todo_last_issue = ""
+                            self._emit(
+                                "status",
+                                {"summary": f"todo updated ({trim(reason, 100)})"},
+                            )
                         else:
                             self.todo_write_issue_count += 1
                             self.todo_last_issue = reason
-                            if self.todo_write_issue_count >= 2 and not self._todo_runtime_has_worker_rows(single_role):
+                            self._emit(
+                                "status",
+                                {
+                                    "summary": (
+                                        "todo update produced no progress "
+                                        f"({trim(reason, 100)})"
+                                    )
+                                },
+                            )
+                            repeat_no_progress = any(
+                                token in str(reason or "").lower()
+                                for token in ("repeated", "no progress", "without changing")
+                            )
+                            if self.todo_write_issue_count >= 2 and (
+                                not self._todo_runtime_has_worker_rows(single_role)
+                                or repeat_no_progress
+                            ):
                                 self._emit(
                                     "status",
                                     {
@@ -36118,6 +38382,22 @@ body{padding:18px}
                         self.rounds_without_todo += 1
                 else:
                     self.rounds_without_todo += 1
+                concrete_work_without_todo = (
+                    not used_todo
+                    and self._todo_runtime_has_worker_rows(single_role)
+                    and any(
+                        isinstance(r, dict)
+                        and r.get("ok", False)
+                        and str(r.get("name", "") or "") in {
+                            "write_file",
+                            "edit_file",
+                            "bash",
+                            "read_file",
+                            "write_to_blackboard",
+                        }
+                        for r in single_round_tool_results
+                    )
+                )
                 if (
                     todo_attempted
                     and not used_todo
@@ -36142,18 +38422,25 @@ body{padding:18px}
                 now_tick = now_ts()
                 can_remind = (now_tick - self.last_todo_reminder_ts) >= 20
                 if can_remind and self.todo_reminder_count < 2:
-                    if not self._todo_runtime_has_worker_rows(single_role) and self.rounds_without_todo >= 2:
-                        self.messages.append(
-                            {
-                                "role": "user",
-                                "content": "<reminder>Please call TodoWrite now to update the current subtask before continuing. If it fails/repeats, switch to TodoWriteRescue.</reminder>",
-                                "ts": now_tick,
-                            }
+                    if concrete_work_without_todo:
+                        self._append_plan_guidance_bubble(
+                            "<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>",
+                            summary="todo reminder",
+                        )
+                        self.last_todo_reminder_ts = now_tick
+                        self.todo_reminder_count += 1
+                    elif not self._todo_runtime_has_worker_rows(single_role) and self.rounds_without_todo >= 2:
+                        self._append_plan_guidance_bubble(
+                            "<reminder>Please call TodoWrite now to update the current subtask before continuing. If it fails/repeats, switch to TodoWriteRescue.</reminder>",
+                            summary="todo reminder",
                         )
                         self.last_todo_reminder_ts = now_tick
                         self.todo_reminder_count += 1
                     elif self._todo_should_block_auto_continue("") and self.rounds_without_todo >= 4:
-                        self.messages.append({"role": "user", "content": "<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>", "ts": now_tick})
+                        self._append_plan_guidance_bubble(
+                            "<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>",
+                            summary="todo reminder",
+                        )
                         self.last_todo_reminder_ts = now_tick
                         self.todo_reminder_count += 1
                 if manual_compact:
@@ -36247,6 +38534,12 @@ body{padding:18px}
                 self._generate_run_completion_summary()
             except Exception:
                 pass
+            try:
+                _applied_runtime_updates = self._apply_deferred_runtime_updates()
+                for _note in _applied_runtime_updates[:6]:
+                    self._emit("status", {"summary": _note})
+            except Exception:
+                pass
             self._emit("status", {"summary": "run finished"})
             cb = self.run_finished_callback
             if cb:
@@ -36525,6 +38818,7 @@ body{padding:18px}
                 "live_run_notice_elapsed": round(float(self.live_run_notice_elapsed or 0.0), 1),
                 "max_agent_rounds": int(self.max_agent_rounds),
                 "max_run_seconds": int(self.max_run_seconds),
+                "shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
                 "auto_model_switch": bool(self.auto_model_switch),
                 "arbiter_enabled": bool(self.arbiter_enabled),
                 "arbiter_model": str(self.arbiter_model or ""),
@@ -36704,6 +38998,7 @@ class SessionManager:
         context_limit_locked: bool = False,
         max_rounds: int = MAX_AGENT_ROUNDS,
         max_run_seconds: int = MAX_RUN_SECONDS,
+        shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
         auto_model_switch: bool = False,
         arbiter_enabled: bool = True,
         arbiter_model: str = "",
@@ -36749,6 +39044,12 @@ class SessionManager:
             maximum=MAX_RUN_TIMEOUT_SECONDS,
             fallback=MAX_RUN_SECONDS,
         )
+        self.shell_command_timeout_seconds = normalize_timeout_seconds(
+            shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
         self.auto_model_switch = bool(auto_model_switch)
         self.arbiter_enabled = bool(arbiter_enabled)
         self.arbiter_model = str(arbiter_model or "").strip()
@@ -37031,6 +39332,12 @@ class SessionManager:
         )
         sess.execution_mode = normalize_execution_mode(self.execution_mode, default=EXECUTION_MODE_SYNC)
         sess.single_advance_prompt_enhance = bool(self.single_advance_prompt_enhance)
+        sess.shell_command_timeout_seconds = normalize_timeout_seconds(
+            self.shell_command_timeout_seconds,
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
         sess._apply_active_profile()
         sess.updated_at = now_ts()
         sess._persist()
@@ -37091,6 +39398,7 @@ class SessionManager:
                 context_limit_locked=self.context_limit_locked,
                 max_rounds=self.max_rounds,
                 max_run_seconds=self.max_run_seconds,
+                shell_command_timeout_seconds=self.shell_command_timeout_seconds,
                 auto_model_switch=self.auto_model_switch,
                 arbiter_enabled=self.arbiter_enabled,
                 arbiter_model=self.arbiter_model,
@@ -37140,6 +39448,7 @@ class SessionManager:
                 context_limit_locked=self.context_limit_locked,
                 max_rounds=self.max_rounds,
                 max_run_seconds=self.max_run_seconds,
+                shell_command_timeout_seconds=self.shell_command_timeout_seconds,
                 auto_model_switch=self.auto_model_switch,
                 arbiter_enabled=self.arbiter_enabled,
                 arbiter_model=self.arbiter_model,
@@ -38188,7 +40497,7 @@ function renderLlmFields(provider){const container=E('llmFieldsContainer');if(!c
 async function scanOllamaModels(){const urlEl=E('llmF_ollama_url');const sel=E('llmF_ollama_model');const hint=E('ollamaScanHint');const baseUrl=(urlEl?.value||'').trim()||'http://127.0.0.1:11434';if(hint)hint.textContent=t('llm_scanning');try{const res=await fetch('/api/ollama/models?base_url='+encodeURIComponent(baseUrl));const data=await res.json();if(!data.ok||!data.models?.length){if(hint)hint.textContent=t('llm_scan_empty')+(data.error?' ('+data.error+')':'');return}if(sel){sel.innerHTML='';for(const m of data.models){const op=document.createElement('option');op.value=m;op.textContent=m;sel.appendChild(op)}}if(hint)hint.textContent=t('llm_scan_found').replace('{n}',String(data.models.length))}catch(err){if(hint)hint.textContent=t('llm_scan_error')+': '+(err.message||String(err))}}
 async function scanOpenAICompatModels(provider){const scanMap={openai_compat:{urlKey:'openai_url',modelKey:'openai_model',keyKey:'openai_key',defaultUrl:'https://api.openai.com/v1'},siliconflow:{urlKey:'siliconflow_url',modelKey:'siliconflow_model',keyKey:'siliconflow_key',defaultUrl:'https://api.siliconflow.cn/v1'},vllm:{urlKey:'vllm_url',modelKey:'vllm_model',keyKey:'vllm_key',defaultUrl:'http://localhost:8000/v1'},lmstudio:{urlKey:'lmstudio_url',modelKey:'lmstudio_model',keyKey:'lmstudio_key',defaultUrl:'http://localhost:1234/v1'},glm:{urlKey:'glm_url',modelKey:'glm_model',keyKey:'glm_key',defaultUrl:'https://open.bigmodel.cn/api/paas/v4'},kimi:{urlKey:'kimi_url',modelKey:'kimi_model',keyKey:'kimi_key',defaultUrl:'https://api.moonshot.cn/v1'},openrouter:{urlKey:'openrouter_url',modelKey:'openrouter_model',keyKey:'openrouter_key',defaultUrl:'https://openrouter.ai/api/v1'},custom_http:{urlKey:'custom_url',modelKey:'custom_model',keyKey:'custom_key',defaultUrl:''}};const normalizedProvider=String(provider||'openai_compat').trim()||'openai_compat';const meta=scanMap[normalizedProvider]||scanMap.openai_compat;const urlEl=E('llmF_'+meta.urlKey);const modelEl=E('llmF_'+meta.modelKey);const hint=E('localScanHint');const baseUrl=(urlEl?.value||'').trim()||meta.defaultUrl||'';const apiKey=(E('llmF_'+meta.keyKey)?.value||'').trim();if(hint)hint.textContent=t('llm_scanning');try{let url='/api/openai_compat/models?provider='+encodeURIComponent(normalizedProvider)+'&base_url='+encodeURIComponent(baseUrl);if(apiKey)url+='&api_key='+encodeURIComponent(apiKey);const res=await fetch(url);const data=await res.json();const models=Array.isArray(data.models)?data.models.filter(Boolean):[];if(!data.ok){if(hint)hint.textContent=t('llm_scan_error')+(data.error?' ('+data.error+')':'');return}if(models.length){if(modelEl&&!String(modelEl.value||'').trim())modelEl.value=models[0];if(hint)hint.textContent=t('llm_scan_found').replace('{n}',String(models.length))+': '+models.slice(0,3).join(', ');return}if(data.reachable){if(hint)hint.textContent=t('llm_scan_reachable_manual')+(data.error?' ('+data.error+')':'');return}if(hint)hint.textContent=t('llm_scan_empty')+(data.error?' ('+data.error+')':'')}catch(err){if(hint)hint.textContent=t('llm_scan_error')+': '+(err.message||String(err))}}
 function collectLlmConfig(){const provider=E('llmProvider')?.value||'ollama';const config={provider:provider};if(provider==='ollama'){config.ollama_url=(E('llmF_ollama_url')?.value||'').trim()||'http://127.0.0.1:11434';config.ollama_model=E('llmF_ollama_model')?.value||''}else if(provider==='custom_http'){const fields=LLM_PROVIDER_FIELDS.custom_http;for(const f of fields){const el=E('llmF_'+f.key);if(!el)continue;if(f.type==='textarea'){config[f.key]=el.value.trim()}else if(f.key==='temperature'){const v=parseFloat(el.value);if(!isNaN(v))config[f.key]=v}else if(f.key==='request_timeout'){const v=parseInt(el.value,10);if(!isNaN(v)&&v>0)config[f.key]=v}else{config[f.key]=el.value.trim()}}}else{const fields=LLM_PROVIDER_FIELDS[provider]||[];for(const f of fields){const el=E('llmF_'+f.key);if(el){const raw=el.value.trim();config[f.key]=(provider!=='custom_http'&&f.type==='url')?(raw||String(f.placeholder||'').trim()):raw}}}config.thinking_stream=E('llmF_thinking_stream')?.value==='true';return config}
-async function submitLlmConfig(){if(!S.activeId){showError(t('select_session_first'));return}const config=collectLlmConfig();try{const payload={filename:'LLM.config.json',mime:'application/json',content_b64:btoa(unescape(encodeURIComponent(JSON.stringify(config,null,2))))};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'))}else{showError('')}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true});E('llmConfigModal').style.display='none'}catch(err){showError(err.message||String(err))}}
+async function submitLlmConfig(){if(!S.activeId){showError(t('select_session_first'));return}const config=collectLlmConfig();try{const payload={filename:'LLM.config.json',mime:'application/json',content_b64:btoa(unescape(encodeURIComponent(JSON.stringify(config,null,2))))};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});const note=String(out?.note||out?.model_catalog?.note||'').trim();if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'))}else if(note){showError(note)}else{showError('')}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true});E('llmConfigModal').style.display='none'}catch(err){showError(err.message||String(err))}}
 function openLlmConfigModal(){const modal=E('llmConfigModal');if(!modal)return;modal.style.display='flex';const prov=E('llmProvider');if(prov){renderLlmFields(prov.value)}}
 const COMPACT_AUTO_REFRESH_COUNT=3;
 const COMPACT_AUTO_REFRESH_INTERVAL_MS=260;
@@ -38761,7 +41070,7 @@ function feedSignature(snap){const feed=Array.isArray(snap?.conversation_feed)?s
 function boardsSignature(snap){return [snap?.running?1:0,snap?.agent_phase||'',Number(snap?.agent_round_index||0),Number(snap?.queued_user_inputs_count||0),Number(snap?.truncation_count||0),Number(snap?.live_truncation_attempts||0),Number(snap?.live_truncation_tokens||0),snap?.live_truncation_active?1:0,Number(snap?.context_tokens_estimate||0),Number(snap?.context_left_tokens||0),Number(snap?.context_left_percent||0),Number(snap?.render_bridge?.seq||0),(snap?.todos||[]).length,(snap?.tasks||[]).length,(snap?.activity||[]).length,(snap?.operations||[]).length,(snap?.uploads||[]).length].join('|')}
 function sessionsSignature(list){const rows=Array.isArray(list)?list:[];const sig=tailSig(rows,6,row=>`${String(row?.id||'')}:${row?.running?1:0}:${Number(row?.message_count||0)}:${Number(row?.updated_at||0)}`);const aid=String(S.activeId||'').trim();let activeSig='-';if(aid){const activeRow=rows.find(row=>String(row?.id||'')===aid);if(activeRow){activeSig=`${aid}:${activeRow?.running?1:0}:${Number(activeRow?.message_count||0)}:${Number(activeRow?.updated_at||0)}`}else{activeSig=`missing:${aid}`}}return `${rows.length}|active=${activeSig}|${sig}`}
 function _statInfinite(n){const v=Number(n);return(Number.isFinite(v)&&v>0)?String(v):'∞'}
-function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
+function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'shell_command_timeout_seconds'))S.config.shell_command_timeout_seconds=cfg.shell_command_timeout_seconds;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
 function renderStats(){const sessions=S.sessions.length;const running=S.sessions.filter(x=>x.running).length;const msgs=S.sessions.reduce((n,x)=>n+x.message_count,0);const model=S.config?.model||'-';const sched=(S.config&&typeof S.config.scheduler==='object')?S.config.scheduler:{};const quota=(S.config&&typeof S.config.session_creation_limit==='object')?S.config.session_creation_limit:{};const runningTotal=Math.max(0,Number(sched?.running_total||0));const maxTasks=Number(sched?.max_user||0);const globalTasks=`${runningTotal}/${_statInfinite(maxTasks)}`;const dailySessions=(quota&&quota.enabled)?`${Math.max(0,Number(quota.used||0))}/${Math.max(0,Number(quota.limit||0))}`:'∞';const compact=[[t('stat_sessions'),sessions],[t('stat_running'),running],[t('stat_messages'),msgs],[t('stat_global_tasks'),globalTasks],[t('stat_daily_sessions'),dailySessions]].map(([k,v])=>`<div class=\"stat compact\"><div class=\"k\">${esc(k)}</div><div class=\"v\">${esc(v)}</div></div>`).join('');const modelHtml=`<div class=\"stat model\"><div class=\"k\">${esc(t('stat_model'))}</div><div class=\"v\">${esc(model)}</div></div>`;E('topStats').innerHTML=`<div class=\"top-stats-primary\">${compact}</div><div class=\"top-stats-model\">${modelHtml}</div>`}
 function renderSessions(){const html=S.sessions.map(s=>`<div class=\"session-item${s.id===S.activeId?' active':''}\" data-id=\"${esc(s.id)}\"><div><strong>${esc(s.title)}</strong></div><div class=\"mono\">${s.running?t('running'):t('idle')} · ${s.message_count} msgs</div></div>`).join('');setPanelHtml('sessionList',html||`<div class=\"mono\">${esc(t('no_sessions'))}</div>`);for(const el of document.querySelectorAll('#sessionList .session-item')){el.onclick=()=>selectSession(el.getAttribute('data-id'))}}
 function _syncActiveSessionSummaryFromSnapshot(){const sid=String(S.activeId||'').trim();const snap=S.snap;if(!sid||!snap)return false;const rows=Array.isArray(S.sessions)?S.sessions.slice():[];let idx=rows.findIndex(row=>String(row?.id||'')===sid);const running=!!snap?.running;let updatedAt=Number(snap?.updated_at||0);if(!Number.isFinite(updatedAt)||updatedAt<=0){updatedAt=(Date.now()/1000)}let msgCount=Number(snap?.message_count);if(!Number.isFinite(msgCount)||msgCount<0){const arr=Array.isArray(snap?.messages)?snap.messages:[];let cnt=0;for(const row of arr){if(String(row?.role||'').trim()==='tool')continue;cnt+=1}msgCount=cnt}msgCount=Math.max(0,Math.floor(Number(msgCount)||0));const title=String(snap?.title||'').trim();if(idx<0){rows.push({id:sid,title:title||sid,running:running,updated_at:updatedAt,message_count:msgCount});idx=rows.length-1}else{const cur=rows[idx]||{};const next={...cur};let changed=false;if(!!cur.running!==running){next.running=running;changed=true}if(Number(cur.message_count||0)!==msgCount){next.message_count=msgCount;changed=true}if(Number(cur.updated_at||0)!==updatedAt){next.updated_at=updatedAt;changed=true}if(title&&String(cur.title||'')!==title){next.title=title;changed=true}if(!changed)return false;rows[idx]=next}rows.sort((a,b)=>Number(b?.updated_at||0)-Number(a?.updated_at||0));S.sessions=rows;return true}
@@ -40222,8 +42531,7 @@ function _chatVirtBuildMessageNode(m){
     const pillsHtml=pills.map(x=>`<span class=\"manager-delegate-pill\">${esc(String(x))}</span>`).join('');
     const routeHtml=`<div class=\"manager-delegate-route\"><span class=\"agent-bus-pill manager\">${esc(t('role_manager'))}</span><span class=\"agent-bus-arrow\">→</span><span class=\"agent-bus-pill${targetRole?(' '+targetRole):''}\">${esc(targetLabel)}</span></div>`;
     const objectiveHtml=(objective&&instruction&&objective.toLowerCase()===instruction.toLowerCase())?'':(objective?`<div class=\"manager-delegate-line\"><span>${esc(t('event_objective'))}</span><div>${esc(objective)}</div></div>`:'');
-    const instructionKey=`${String(m._vk||'')}:manager-instruction`;
-    const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div class=\"msg-md\">${renderMarkdownCached(instruction,instructionKey)}</div></div>`:'';
+    const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div>${esc(instruction)}</div></div>`:'';
     d.innerHTML=`${roleBadge}<div class=\"manager-delegate-card\"><div class=\"manager-delegate-head\">${esc(t('event_manager_delegate_title'))}</div>${routeHtml}<div class=\"manager-delegate-pills\">${pillsHtml}</div>${objectiveHtml}${instructionHtml}</div>`;
     return d;
   }
@@ -41214,7 +43522,7 @@ async function renameSession(){if(!S.activeId){showError(t('select_session_first
 async function deleteSession(){if(!S.activeId){showError(t('select_session_first'));return}const deletingId=S.activeId;const ok=confirm(t('delete_confirm'));if(!ok)return;await api('/api/sessions/'+S.activeId,{method:'DELETE'});if(S.previewBySession&&deletingId){delete S.previewBySession[deletingId]}if(S.fileExplorerBySession&&deletingId){delete S.fileExplorerBySession[deletingId]}S.activeId=null;S.snap=null;if(S.es)S.es.close();renderPreviewTabs();renderPreviewVisibility();renderActivePreview(false);await refreshSessions();if(S.sessions.length)await selectSession(S.sessions[0].id)}
 async function applyModel(){const sel=E('modelSelect');const btn=E('applyModelBtn');const model=sel?.value||'';if(!model){showError(t('no_model_selected'));return}if(S.staticMode&&S.frozen)resumeAutoUpdates();S.config=S.config||{};const prevModel=String(S.config.model||'');const prevSnapModel=String(S.snap?.model||'');const prevSnapCatalog=(S.snap&&typeof S.snap==='object')?S.snap.llm_model_catalog:undefined;try{S.config.model=model;if(S.snap&&typeof S.snap==='object'){S.snap.model=_modelNameFromSelection(model)||S.snap.model;if(!S.snap.llm_model_catalog||typeof S.snap.llm_model_catalog!=='object')S.snap.llm_model_catalog={};S.snap.llm_model_catalog.selected=model}renderModelControls();renderStats();if(S.snap)renderBoards();if(sel)sel.disabled=true;if(btn)btn.disabled=true;const path=S.activeId?('/api/sessions/'+S.activeId+'/config/model'):'/api/config/model';const changed=await api(path,{method:'POST',body:JSON.stringify({selection:model,model})});if(changed?.note)showError(changed.note);else showError('');if(!applyModelCatalog(changed)){const cat=await loadModelCatalog();if(!applyModelCatalog(cat)){S.config.model=String(changed?.selected||model||'').trim();renderModelControls()}}if(S.snap&&typeof S.snap==='object'){const selected=String(S.config?.model||model||'').trim();const modelName=_modelNameFromSelection(selected);if(modelName)S.snap.model=modelName;if(changed&&typeof changed==='object')S.snap.llm_model_catalog=changed;renderBoards()}scheduleSnapshot({forceFull:true,delayMs:40,allowWhenFrozen:true})}catch(err){S.config.model=prevModel;if(S.snap&&typeof S.snap==='object'){if(prevSnapModel)S.snap.model=prevSnapModel;if(prevSnapCatalog!==undefined)S.snap.llm_model_catalog=prevSnapCatalog;renderBoards()}renderModelControls();renderStats();showError(err.message||String(err))}finally{if(sel)sel.disabled=false;if(btn)btn.disabled=false}}
-async function uploadLlmConfigFile(file){try{if(!S.activeId){showError(t('select_session_first'));return}if(!file){return}const arr=await file.arrayBuffer();const payload={filename:'LLM.config.json',mime:file.type||'application/json',content_b64:ab2b64(arr)};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'));}else{showError('');const modal=E('llmConfigModal');if(modal)modal.style.display='none'}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true})}catch(err){showError(err.message||String(err))}}
+async function uploadLlmConfigFile(file){try{if(!S.activeId){showError(t('select_session_first'));return}if(!file){return}const arr=await file.arrayBuffer();const payload={filename:'LLM.config.json',mime:file.type||'application/json',content_b64:ab2b64(arr)};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});const note=String(out?.note||out?.model_catalog?.note||'').trim();if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'));}else{showError(note||'');const modal=E('llmConfigModal');if(modal)modal.style.display='none'}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true})}catch(err){showError(err.message||String(err))}}
 async function sendMessage(){showError('');const t=E('prompt').value.trim();if(!t||!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();E('prompt').value='';try{await waitForPendingUploads();await api('/api/sessions/'+S.activeId+'/message',{method:'POST',body:JSON.stringify({content:t})});S.lastDeltaTs=Date.now();if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:120,allowWhenFrozen:true})}}catch(err){showError(err.message)}}
 async function interruptRun(){if(!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();await api('/api/sessions/'+S.activeId+'/interrupt',{method:'POST'});S.lastDeltaTs=Date.now();if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:140,allowWhenFrozen:true})}}
 async function compactNow(){if(!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();await api('/api/sessions/'+S.activeId+'/compact',{method:'POST'});S.lastDeltaTs=Date.now();scheduleCompactRefreshBurst(COMPACT_AUTO_REFRESH_COUNT);if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:180,allowWhenFrozen:true})}}
@@ -48574,6 +50882,7 @@ class AppContext:
         context_limit_locked: bool = False,
         max_rounds: int = MAX_AGENT_ROUNDS,
         max_run_seconds: int = MAX_RUN_SECONDS,
+        shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
         auto_model_switch: bool = False,
         arbiter_enabled: bool = True,
         arbiter_model: str = "",
@@ -48594,7 +50903,7 @@ class AppContext:
         self.base_url = base_url
         self.model = model
         self.thinking = False
-        self.js_lib_root = offline_js_lib_root(SCRIPT_DIR)
+        self.js_lib_root = offline_js_lib_root(self.workspace)
         self.offline_js_summary: dict = {}
         try:
             self.offline_js_summary = load_offline_js_lib_index(self.js_lib_root)
@@ -48617,6 +50926,12 @@ class AppContext:
             maximum=MAX_RUN_TIMEOUT_SECONDS,
             fallback=MAX_RUN_SECONDS,
         )
+        self.shell_command_timeout_seconds = normalize_timeout_seconds(
+            shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
         self.auto_model_switch = bool(auto_model_switch)
         self.arbiter_enabled = bool(arbiter_enabled)
         self.arbiter_model = str(arbiter_model or "").strip()
@@ -48785,6 +51100,7 @@ class AppContext:
             "show_upload_list": bool(getattr(self, "show_upload_list", False)),
             "ui_style": normalize_ui_style(getattr(self, "ui_style", DEFAULT_UI_STYLE)),
             "js_lib_download_enabled": bool(getattr(self, "js_lib_download_enabled", True)),
+            "shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
             "daily_session_limit_per_ip": int(getattr(self, "daily_session_limit_per_ip", 0) or 0),
             "daily_session_reset_hour": int(getattr(self, "daily_session_reset_hour", 8) or 8),
             "validation": dict(self.web_ui_validation or {}),
@@ -49894,6 +52210,14 @@ class AppContext:
         return started
     def _on_session_run_finished(self, user_id: str, session_id: str):
+        try:
+            mgr = self.manager_for_user(user_id)
+            sess = mgr.get(session_id)
+            if sess and bool(getattr(sess, "_deferred_runtime_sync_requested", False)):
+                mgr._sync_from_session(sess, apply_to_all=False)
+                sess._deferred_runtime_sync_requested = False
+        except Exception:
+            pass
         if not self.scheduler_limits_enabled():
             return
         started_rows: list[dict] = []
@@ -50028,6 +52352,7 @@ class AppContext:
                 self.context_limit_locked,
                 self.max_rounds,
                 self.max_run_seconds,
+                self.shell_command_timeout_seconds,
                 self.auto_model_switch,
                 self.arbiter_enabled,
                 self.arbiter_model,
@@ -51096,6 +53421,7 @@ class Handler(BaseHTTPRequestHandler):
                         "download_js_lib_enabled": bool(getattr(self.app, "js_lib_download_enabled", True)),
                         "request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
                         "run_timeout": int(mgr.max_run_seconds),
+                        "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
                     }
                 )
             model_cat = mgr.model_catalog()
@@ -51142,6 +53468,7 @@ class Handler(BaseHTTPRequestHandler):
                     "context_token_limit": int(mgr.context_token_limit),
                     "context_limit_locked": bool(mgr.context_limit_locked),
                     "run_timeout": int(mgr.max_run_seconds),
+                    "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
                     "auto_model_switch": bool(mgr.auto_model_switch),
                     "execution_mode": normalize_execution_mode(getattr(mgr, "execution_mode", EXECUTION_MODE_SYNC), default=EXECUTION_MODE_SYNC),
                     "execution_mode_choices": list(EXECUTION_MODE_CHOICES),
@@ -51194,7 +53521,7 @@ class Handler(BaseHTTPRequestHandler):
                         for hk, hv in probe_headers.items():
                             if str(hk or "").strip() and str(hv or "").strip():
                                 req.add_header(str(hk), str(hv))
-                        with urllib.request.urlopen(req, timeout=8) as resp:
+                        with urlopen(req, timeout=8) as resp:
                             body_text = resp.read().decode("utf-8", errors="replace")
                         reachable = True
                         try:
@@ -51251,7 +53578,7 @@ class Handler(BaseHTTPRequestHandler):
                         for hk, hv in probe_headers.items():
                             if str(hk or "").strip() and str(hv or "").strip():
                                 base_req.add_header(str(hk), str(hv))
-                        with urllib.request.urlopen(base_req, timeout=8):
+                        with urlopen(base_req, timeout=8):
                             pass
                         reachable = True
                     except urllib.error.HTTPError as exc:
@@ -51502,9 +53829,26 @@ class Handler(BaseHTTPRequestHandler):
             if not selection:
                 return self._send_json({"error": "selection required"}, status=400)
             model_override = payload.get("model_override")
+            if bool(getattr(sess, "running", False)):
+                try:
+                    sess._queue_deferred_runtime_update(
+                        "model_selection",
+                        {
+                            "selection": selection,
+                            "model_override": model_override if isinstance(model_override, str) else "",
+                        },
+                    )
+                except Exception as exc:
+                    return self._send_json({"error": str(exc)}, status=400)
+                queued = sess.model_catalog()
+                queued["queued"] = True
+                queued["note"] = (
+                    "session is running; model switch queued and will apply after the current run finishes"
+                )
+                return self._send_json(queued)
             try:
                 out = sess.set_runtime_selection(selection, model_override if isinstance(model_override, str) else None)
-                mgr._sync_from_session(sess, apply_to_all=True)
+                mgr._sync_from_session(sess, apply_to_all=False)
             except Exception as exc:
                 return self._send_json({"error": str(exc)}, status=400)
             return self._send_json(out)
@@ -51603,9 +53947,9 @@ class Handler(BaseHTTPRequestHandler):
             if len(raw) > 20 * 1024 * 1024:
                 return self._send_json({"error": "max upload size is 20MB"}, status=413)
             meta = sess.add_upload(filename, raw, mime)
-            if isinstance(meta.get("model_catalog"), dict):
+            if isinstance(meta.get("model_catalog"), dict) and not bool(meta.get("model_catalog", {}).get("queued")):
                 try:
-                    mgr._sync_from_session(sess, apply_to_all=True)
+                    mgr._sync_from_session(sess, apply_to_all=False)
                 except Exception:
                     pass
             return self._send_json(meta, status=201)
@@ -51699,16 +54043,16 @@ class Handler(BaseHTTPRequestHandler):
                     explicit_complexity = infer_user_complexity_value(
                         str(body.get("complexity", body.get("task_complexity", "")) or "")
                     )
-                    current_complexity = trim(
-                        str(getattr(sess, "runtime_task_complexity", "") or "").strip().lower(),
-                        20,
+                    current_complexity = normalize_task_complexity(
+                        getattr(sess, "runtime_task_complexity", "") or "",
+                        default="",
                     )
                     if explicit_complexity in TASK_COMPLEXITY_LEVELS:
-                        sess.runtime_task_complexity = explicit_complexity
+                        sess.runtime_task_complexity = normalize_task_complexity(explicit_complexity, default="")
                     elif current_complexity in TASK_COMPLEXITY_LEVELS:
                         sess.runtime_task_complexity = current_complexity
                     else:
-                        sess.runtime_task_complexity = str(policy.get("complexity", "simple"))
+                        sess.runtime_task_complexity = normalize_task_complexity(policy.get("complexity", "simple"), default="simple")
                     sess.runtime_scale_preference = "thorough" if level >= 4 else "balanced"
             return self._send_json({"task_level": level})
         return self._send_json({"error": "not found"}, status=404)
@@ -51901,6 +54245,7 @@ class SkillsHandler(BaseHTTPRequestHandler):
                     "show_upload_list": bool(getattr(self.app, "show_upload_list", False)),
                     "web_ui": web_ui_state,
                     "run_timeout": int(mgr.max_run_seconds),
+                    "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
                     "request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
                 }
             )
@@ -52332,6 +54677,25 @@ def main():
             f"(minimum {MIN_RUN_TIMEOUT_SECONDS}, model-active time excluded)"
         ),
     )
+    parser.add_argument(
+        "--shell_command_timeout",
+        "--shell-command-timeout",
+        "--bash_timeout",
+        "--bash-timeout",
+        "--command_timeout",
+        "--command-timeout",
+        dest="shell_command_timeout",
+        default=None,
+        type=int,
+        help=(
+            "Per-command shell/bash timeout in seconds "
+            f"(default {DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS}; allowed "
+            f"{MIN_SHELL_COMMAND_TIMEOUT_SECONDS}-{MAX_SHELL_COMMAND_TIMEOUT_SECONDS}). "
+            "Independent from the global run timeout. Also configurable via --config keys "
+            "shell_command_timeout / shell_timeout / bash_timeout / command_timeout and env "
+            "AGENT_SHELL_COMMAND_TIMEOUT / AGENT_BASH_TIMEOUT / AGENT_COMMAND_TIMEOUT."
+        ),
+    )
     parser.add_argument(
         "--live_input_delay_write",
         default=LIVE_INPUT_DELAY_WRITE_ROUNDS,
@@ -52481,9 +54845,10 @@ def main():
         default="",
         help=(
             "LLM config source (URL or local file path). "
-            "Also reads startup keys like show_upload_list, download_js_lib and "
+            "Also reads startup keys like show_upload_list, download_js_lib, shell_command_timeout and "
             "daily_session_limit (aliases: daily_sessions_per_ip / "
-            "max_daily_sessions_per_ip / session_daily_limit)."
+            "max_daily_sessions_per_ip / session_daily_limit; shell aliases: "
+            "shell_timeout / bash_timeout / command_timeout)."
         ),
     )
     parser.add_argument(
@@ -52618,6 +54983,7 @@ def main():
         arbiter_enabled=True,
         show_upload_list=None,
         download_js_lib=None,
+        shell_command_timeout=None,
     )
     args = parser.parse_args()
     ctx_limit_locked = any(str(arg).split("=", 1)[0] == "--ctx_limit" for arg in sys.argv[1:])
@@ -52647,6 +55013,7 @@ def main():
     )
     resolved_show_upload_list = False
     resolved_daily_session_limit_per_ip = 0
+    resolved_shell_command_timeout = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS
     external_config: dict = {}
     external_config_source = ""
     bootstrap_base_url = args.ollama_base_url
@@ -52673,6 +55040,14 @@ def main():
             external_daily_session_limit = extract_daily_session_limit_setting(external_config)
             if external_daily_session_limit is not None:
                 resolved_daily_session_limit_per_ip = int(external_daily_session_limit)
+            external_shell_command_timeout = extract_shell_command_timeout_setting(external_config)
+            if external_shell_command_timeout is not None:
+                resolved_shell_command_timeout = normalize_timeout_seconds(
+                    external_shell_command_timeout,
+                    minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+                    maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+                    fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+                )
             print(f"[web-agent] external config loaded: {external_config_source}")
         except Exception as exc:
             print(f"[web-agent] invalid --config: {exc}")
@@ -52686,9 +55061,25 @@ def main():
     web_ui_daily_session_limit = extract_daily_session_limit_setting(web_ui_config)
     if web_ui_daily_session_limit is not None:
         resolved_daily_session_limit_per_ip = int(web_ui_daily_session_limit)
+    web_ui_shell_command_timeout = extract_shell_command_timeout_setting(web_ui_config)
+    if web_ui_shell_command_timeout is not None:
+        resolved_shell_command_timeout = normalize_timeout_seconds(
+            web_ui_shell_command_timeout,
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
     cli_daily_session_limit = getattr(args, "daily_session_limit_per_ip", None)
     if cli_daily_session_limit is not None:
         resolved_daily_session_limit_per_ip = max(0, int(cli_daily_session_limit or 0))
+    cli_shell_command_timeout = getattr(args, "shell_command_timeout", None)
+    if cli_shell_command_timeout is not None:
+        resolved_shell_command_timeout = normalize_timeout_seconds(
+            cli_shell_command_timeout,
+            minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
+            maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
+            fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
+        )
     raw_ui_style = str(getattr(args, "ui_style", "") or "").strip()
     if not raw_ui_style:
         raw_ui_style = str(extract_ui_style_setting(external_config) or "").strip()
@@ -52743,6 +55134,7 @@ def main():
             f"[web-agent] run_timeout adjusted {requested_run_timeout}->{resolved_run_timeout} "
             f"(allowed range {MIN_RUN_TIMEOUT_SECONDS}-{MAX_RUN_TIMEOUT_SECONDS})"
         )
+    print(f"[web-agent] shell_command_timeout={int(resolved_shell_command_timeout)}s")
     requested_live_input_delay_write = int(args.live_input_delay_write if args.live_input_delay_write is not None else LIVE_INPUT_DELAY_WRITE_ROUNDS)
     resolved_live_input_delay_write = max(0, min(20, requested_live_input_delay_write))
     if resolved_live_input_delay_write != requested_live_input_delay_write:
@@ -52925,6 +55317,7 @@ def main():
         ctx_limit_locked,
         resolved_max_rounds,
         resolved_run_timeout,
+        resolved_shell_command_timeout,
         resolved_auto_model_switch,
         resolved_arbiter_enabled,
         resolved_arbiter_model,

clouds-coder 2026.4.2__tar.gz → 2026.4.5__tar.gz

clouds-coder 2026.4.2tar.gz → 2026.4.5tar.gz