clouds-coder 2026.4.2__tar.gz → 2026.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ import selectors
25
25
  import signal
26
26
  import shutil
27
27
  import shlex
28
+ import ssl
28
29
  import socket
29
30
  import subprocess
30
31
  import sys
@@ -44,15 +45,48 @@ from pathlib import Path, PurePosixPath
44
45
  from urllib.error import HTTPError, URLError
45
46
  from urllib.parse import parse_qs, unquote, urlparse
46
47
  from urllib.request import Request, urlopen
48
+ try:
49
+ import certifi as _certifi
50
+ except Exception:
51
+ _certifi = None
47
52
  try:
48
53
  import yaml as _yaml
49
54
  except Exception:
50
55
  _yaml = None
56
+ _URL_OPEN_ORIGINAL = urlopen
57
+ _HTTP_SSL_CONTEXT = None
51
58
  APP_VERSION = "0.1.1"
52
59
  DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
53
60
  DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
54
61
  SCRIPT_DIR = Path(__file__).resolve().parent
55
62
 
63
+ def _shared_http_ssl_context():
64
+ global _HTTP_SSL_CONTEXT
65
+ if _HTTP_SSL_CONTEXT is not None:
66
+ return _HTTP_SSL_CONTEXT
67
+ cafile = str(os.getenv("SSL_CERT_FILE", "") or "").strip()
68
+ if not cafile and _certifi is not None:
69
+ try:
70
+ cafile = str(_certifi.where() or "").strip()
71
+ except Exception:
72
+ cafile = ""
73
+ try:
74
+ ctx = ssl.create_default_context(cafile=cafile or None)
75
+ except Exception:
76
+ ctx = ssl.create_default_context()
77
+ _HTTP_SSL_CONTEXT = ctx
78
+ return ctx
79
+
80
+ def urlopen(url, *args, **kwargs):
81
+ if "context" not in kwargs:
82
+ target = getattr(url, "full_url", url)
83
+ if str(target or "").strip().lower().startswith("https://"):
84
+ try:
85
+ kwargs["context"] = _shared_http_ssl_context()
86
+ except Exception:
87
+ pass
88
+ return _URL_OPEN_ORIGINAL(url, *args, **kwargs)
89
+
56
90
  def _resolve_default_agent_workdir() -> Path:
57
91
  raw = str(os.getenv("AGENT_WORKDIR", "") or "").strip()
58
92
  if raw:
@@ -158,7 +192,7 @@ REPEATED_TOOL_LOOP_THRESHOLD = 2
158
192
  BASH_READ_LOOP_THRESHOLD = 3
159
193
  HARD_BREAK_TOOL_ERROR_THRESHOLD = 20
160
194
  HARD_BREAK_RECOVERY_ROUND_THRESHOLD = 3
161
- FUSED_FAULT_BREAK_THRESHOLD = 3
195
+ FUSED_FAULT_BREAK_THRESHOLD = 15
162
196
  STALL_SEVERITY_ESCALATION_THRESHOLD = 5
163
197
  STALL_SEVERITY_WEIGHT_BASH_READ_LOOP = 2
164
198
  STALL_SEVERITY_WEIGHT_REPEATED_TOOL = 3
@@ -182,6 +216,23 @@ DEFAULT_TIMEOUT_SECONDS = max(
182
216
  ),
183
217
  )
184
218
  DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
219
+ MIN_SHELL_COMMAND_TIMEOUT_SECONDS = 10
220
+ MAX_SHELL_COMMAND_TIMEOUT_SECONDS = 86_400
221
+ DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS = max(
222
+ MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
223
+ min(
224
+ MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
225
+ int(
226
+ str(
227
+ os.getenv(
228
+ "AGENT_SHELL_COMMAND_TIMEOUT",
229
+ os.getenv("AGENT_BASH_TIMEOUT", os.getenv("AGENT_COMMAND_TIMEOUT", "240")),
230
+ )
231
+ or "240"
232
+ )
233
+ ),
234
+ ),
235
+ )
185
236
  AUTO_CONTINUE_BUDGET_DEFAULT = 30
186
237
  AGENT_MAX_OUTPUT_TOKENS = 16384
187
238
  OLLAMA_THINKING_TOOL_BUFFER = 4096
@@ -196,7 +247,7 @@ WATCHDOG_CONTEXT_NEAR_RATIO = 0.92
196
247
  WATCHDOG_MAX_DECOMPOSE_STEPS = 12
197
248
  WATCHDOG_STEP_MAX_ATTEMPTS = 2
198
249
  EMPTY_ACTION_MIN_CONTENT_CHARS = 5
199
- EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 2
250
+ EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 5
200
251
  THINKING_BUDGET_FORCE_RATIO = 0.85
201
252
  # --- Tool timeout configuration ---
202
253
  _TOOL_TIMEOUT_MAP = {
@@ -347,7 +398,13 @@ BLACKBOARD_STATUSES = (
347
398
  "COMPLETED",
348
399
  "PAUSED",
349
400
  )
350
- TASK_COMPLEXITY_LEVELS = ("simple", "complex")
401
+ TASK_COMPLEXITY_LEVELS = ("simple", "moderate", "complex", "expert")
402
+ TASK_COMPLEXITY_RANKS = {
403
+ "simple": 1,
404
+ "moderate": 2,
405
+ "complex": 3,
406
+ "expert": 4,
407
+ }
351
408
  TASK_PROFILE_TYPES = (
352
409
  "simple_qa",
353
410
  "simple_code",
@@ -384,7 +441,7 @@ TASK_LEVEL_POLICIES: dict[int, dict] = {
384
441
  "assigned_expert": "developer",
385
442
  "round_budget": 16,
386
443
  "requires_user_confirmation": False,
387
- "complexity": "simple",
444
+ "complexity": "moderate",
388
445
  },
389
446
  4: {
390
447
  "name": "complex_collaboration",
@@ -402,7 +459,7 @@ TASK_LEVEL_POLICIES: dict[int, dict] = {
402
459
  "assigned_expert": "explorer",
403
460
  "round_budget": 0, # 0 means unlimited by tier budget (still guarded by global safeguards).
404
461
  "requires_user_confirmation": True,
405
- "complexity": "complex",
462
+ "complexity": "expert",
406
463
  },
407
464
  }
408
465
  MANAGER_ROUTE_TARGETS = ("explorer", "developer", "reviewer", "finish")
@@ -469,7 +526,7 @@ TASK_PHASE_ROUTING = {
469
526
  COMPLEXITY_KEYWORDS = (
470
527
  "简单", "复杂", "难", "容易", "快速", "详细", "深入",
471
528
  "l1", "l2", "l3", "l4", "l5",
472
- "simple", "complex", "easy", "hard", "difficult",
529
+ "simple", "moderate", "medium", "complex", "expert", "easy", "hard", "difficult",
473
530
  "thorough", "quick", "fast", "lightweight", "heavy",
474
531
  )
475
532
  USER_COMPLEXITY_SIMPLE_TOKENS = (
@@ -477,12 +534,23 @@ USER_COMPLEXITY_SIMPLE_TOKENS = (
477
534
  "low", "simple", "easy", "quick", "fast", "lightweight", "basic", "minimal",
478
535
  "l1", "l2",
479
536
  )
537
+ USER_COMPLEXITY_MODERATE_TOKENS = (
538
+ "中等复杂度", "中等难度", "适中", "平衡", "标准", "普通", "常规",
539
+ "medium", "mid", "moderate", "balanced", "standard", "normal",
540
+ "l3",
541
+ )
480
542
  USER_COMPLEXITY_COMPLEX_TOKENS = (
481
- "复杂", "深入", "详细", "高复杂度", "高难度", "中等复杂度", "中高复杂度",
482
- "medium", "mid", "high", "complex", "hard", "difficult", "thorough", "detailed", "deep", "heavy",
483
- "l3", "l4", "l5",
543
+ "复杂", "深入", "详细", "高复杂度", "高难度", "中高复杂度",
544
+ "high", "complex", "hard", "difficult", "thorough", "detailed", "deep", "heavy",
545
+ "l4",
546
+ )
547
+ USER_COMPLEXITY_EXPERT_TOKENS = (
548
+ "专家级", "系統級", "系统级", "生产级", "企業級", "企业级", "高风险", "超高复杂度",
549
+ "expert", "advanced", "system-level", "production-ready", "enterprise", "mission-critical",
550
+ "l5",
484
551
  )
485
552
  PLAN_MODE_EXPLORER_MAX_ROUNDS = 8
553
+ PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS = 3
486
554
  # Reviewer debug mode
487
555
  REVIEWER_DEBUG_MODE_MAX_ROUNDS = 6
488
556
  REVIEWER_DEBUG_TOOL_ALLOWLIST = {
@@ -492,7 +560,7 @@ REVIEWER_DEBUG_TOOL_ALLOWLIST = {
492
560
  }
493
561
  EXPLORER_STALL_THRESHOLD = 3 # consecutive same-target delegations before forced switch
494
562
  DEVELOPER_EDIT_STALL_THRESHOLD = 3 # consecutive edit_file failures on same file before forced strategy change
495
- PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS = 6144
563
+ PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS = 8192
496
564
  PLAN_MODE_MAX_OPTIONS = 3
497
565
  PLAN_FILE_RELATIVE_PATH = ".clouds_coder/plan.md"
498
566
  PLAN_BUBBLE_MAX_CHARS = 12_000
@@ -2014,6 +2082,55 @@ def extract_daily_session_limit_setting(raw: object) -> int | None:
2014
2082
  return None
2015
2083
 
2016
2084
 
2085
+ def extract_shell_command_timeout_setting(raw: object) -> int | None:
2086
+ """Read shell/bash command timeout from config dict.
2087
+
2088
+ Accepted keys:
2089
+ - shell_command_timeout
2090
+ - shell_timeout
2091
+ - bash_timeout
2092
+ - command_timeout
2093
+ Sections searched: top-level, then 'startup' / 'runtime' / 'shell' / 'tools' / 'execution'.
2094
+ Returns a clamped positive integer, or None if no setting is present.
2095
+ """
2096
+ if not isinstance(raw, dict):
2097
+ return None
2098
+
2099
+ def _parse_timeout(value: object) -> int | None:
2100
+ if value is None or isinstance(value, bool):
2101
+ return None
2102
+ try:
2103
+ text = str(value).strip()
2104
+ if not text:
2105
+ return None
2106
+ return normalize_timeout_seconds(
2107
+ text,
2108
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
2109
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
2110
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
2111
+ )
2112
+ except Exception:
2113
+ return None
2114
+
2115
+ keys = (
2116
+ "shell_command_timeout",
2117
+ "shell_timeout",
2118
+ "bash_timeout",
2119
+ "command_timeout",
2120
+ )
2121
+ for key in keys:
2122
+ if key in raw:
2123
+ return _parse_timeout(raw.get(key))
2124
+ for section_key in ("startup", "runtime", "shell", "tools", "execution"):
2125
+ section = raw.get(section_key)
2126
+ if not isinstance(section, dict):
2127
+ continue
2128
+ for key in keys:
2129
+ if key in section:
2130
+ return _parse_timeout(section.get(key))
2131
+ return None
2132
+
2133
+
2017
2134
  class SessionCreationLimitExceeded(RuntimeError):
2018
2135
  def __init__(self, status: dict):
2019
2136
  self.status = dict(status or {})
@@ -2986,19 +3103,243 @@ def decompress_text_blob(blob_b64: str) -> str:
2986
3103
  except Exception:
2987
3104
  return ""
2988
3105
 
3106
+ def normalize_embedded_newlines(text: object) -> str:
3107
+ s = str(text or "")
3108
+ if not s:
3109
+ return ""
3110
+ s = s.replace("\u2028", "\n").replace("\u2029", "\n")
3111
+ s = s.replace("\r\n", "\n").replace("\r", "\n")
3112
+ if "\\n" in s or "\\r" in s or "\\t" in s:
3113
+ s = s.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n").replace("\\t", "\t")
3114
+ return s
3115
+
3116
+
3117
+ def _map_todo_status_token(token: str) -> str:
3118
+ raw = str(token or "").strip().lower().replace("_", " ").replace("-", " ")
3119
+ raw = re.sub(r"\s+", " ", raw)
3120
+ return {
3121
+ "pending": "pending",
3122
+ "待处理": "pending",
3123
+ "待處理": "pending",
3124
+ "未着手": "pending",
3125
+ "in progress": "in_progress",
3126
+ "进行中": "in_progress",
3127
+ "進行中": "in_progress",
3128
+ "completed": "completed",
3129
+ "已完成": "completed",
3130
+ "完了": "completed",
3131
+ "blocked": "pending",
3132
+ }.get(raw, "")
3133
+
3134
+
3135
+ def split_todo_status_text(text: object) -> tuple[str, str]:
3136
+ probe = normalize_embedded_newlines(text).strip()
3137
+ if not probe:
3138
+ return "", ""
3139
+ status = ""
3140
+ marker_prefix = r"(?:[-*•>]+\s*)?"
3141
+ for _ in range(4):
3142
+ before = probe
3143
+ probe = re.sub(r"^\s+", "", probe)
3144
+ matched = False
3145
+ for row_status, pattern in (
3146
+ (
3147
+ "completed",
3148
+ rf"^(?:{marker_prefix})(?:"
3149
+ rf"\[x\]\s*"
3150
+ rf")",
3151
+ ),
3152
+ (
3153
+ "in_progress",
3154
+ rf"^(?:{marker_prefix})(?:"
3155
+ rf"\[>\]\s*"
3156
+ rf")",
3157
+ ),
3158
+ (
3159
+ "pending",
3160
+ rf"^(?:{marker_prefix})(?:"
3161
+ rf"\[\s*\]\s*"
3162
+ rf")",
3163
+ ),
3164
+ ):
3165
+ m = re.match(pattern, probe, flags=re.IGNORECASE)
3166
+ if not m:
3167
+ continue
3168
+ status = row_status
3169
+ probe = probe[m.end():].strip()
3170
+ matched = True
3171
+ break
3172
+ if matched:
3173
+ continue
3174
+ m = re.match(
3175
+ rf"^(?:{marker_prefix})"
3176
+ rf"(pending|in[_\-\s]?progress|completed|blocked|"
3177
+ rf"待处理|待處理|未着手|进行中|進行中|已完成|完了)"
3178
+ rf"\s*[::\-\]]\s*",
3179
+ probe,
3180
+ flags=re.IGNORECASE,
3181
+ )
3182
+ if m:
3183
+ mapped = _map_todo_status_token(str(m.group(1) or ""))
3184
+ if mapped:
3185
+ status = mapped
3186
+ probe = probe[m.end():].strip()
3187
+ continue
3188
+ if probe == before:
3189
+ break
3190
+ return status, probe.strip()
3191
+
3192
+
3193
+ def extract_todo_rows_from_text(
3194
+ text: object,
3195
+ *,
3196
+ default_parent_step_id: str = "",
3197
+ limit: int = 12,
3198
+ ) -> list[dict]:
3199
+ src = normalize_embedded_newlines(text)
3200
+ if not src.strip():
3201
+ return []
3202
+ out: list[dict] = []
3203
+ seen: set[tuple[str, str, str]] = set()
3204
+ capped = max(1, min(40, int(limit or 12)))
3205
+ parent_step_id = trim(str(default_parent_step_id or "").strip(), 20)
3206
+ for raw_line in src.splitlines():
3207
+ line = trim(str(raw_line or "").strip(), 600)
3208
+ if not line:
3209
+ continue
3210
+ variants: list[str] = []
3211
+ for candidate in (
3212
+ line,
3213
+ re.sub(r"^\s*(?:[-*•>]+\s*)+", "", line).strip(),
3214
+ re.sub(r"^\s*\*\*([^*]+)\*\*\s*([::])\s*", r"\1\2 ", line).strip(),
3215
+ re.sub(r"^\s*(?:[-*•>]+\s*)*\*\*([^*]+)\*\*\s*([::])\s*", r"\1\2 ", line).strip(),
3216
+ ):
3217
+ candidate = trim(str(candidate or "").strip(), 600)
3218
+ if candidate and candidate not in variants:
3219
+ variants.append(candidate)
3220
+ matched = False
3221
+ for candidate in variants:
3222
+ status, content = split_todo_status_text(candidate)
3223
+ if not status or not content:
3224
+ continue
3225
+ cleaned = normalize_work_text(content, status) or content
3226
+ cleaned = trim(cleaned.strip(), 400)
3227
+ if not cleaned:
3228
+ continue
3229
+ low = cleaned.lower()
3230
+ if low in {
3231
+ "todo",
3232
+ "todos",
3233
+ "task",
3234
+ "tasks",
3235
+ "subtask",
3236
+ "subtasks",
3237
+ "待办",
3238
+ "待辦",
3239
+ "子任务",
3240
+ "子任務",
3241
+ }:
3242
+ continue
3243
+ row = {"content": cleaned, "status": status}
3244
+ if parent_step_id:
3245
+ row["parent_step_id"] = parent_step_id
3246
+ identity = (
3247
+ status,
3248
+ normalize_work_text(cleaned, status).strip().lower(),
3249
+ parent_step_id,
3250
+ )
3251
+ if identity in seen:
3252
+ matched = True
3253
+ break
3254
+ seen.add(identity)
3255
+ out.append(row)
3256
+ matched = True
3257
+ break
3258
+ if matched and len(out) >= capped:
3259
+ break
3260
+ return out
3261
+
3262
+
3263
+ def infer_todo_status_from_text(text: object, default: str = "pending") -> str:
3264
+ status, content = split_todo_status_text(text)
3265
+ if not content and not status:
3266
+ return default
3267
+ if status:
3268
+ return status
3269
+ return default
3270
+
3271
+
3272
+ def split_structured_todo_content(text: object, limit: int = 7) -> list[str]:
3273
+ src = normalize_embedded_newlines(text).strip()
3274
+ if not src:
3275
+ return []
3276
+ lines = [trim(str(line or "").strip(), 500) for line in src.split("\n")]
3277
+ lines = [line for line in lines if line]
3278
+ if len(lines) <= 1:
3279
+ return [src]
3280
+ major_re = re.compile(r"^(\d+)\.\s+(.+)$")
3281
+ sub_re = re.compile(r"^(\d+)\.(\d+)\s+(.+)$")
3282
+ bullet_re = re.compile(r"^(?:[-*•]\s+)(.+)$")
3283
+ header_major = ""
3284
+ m0 = major_re.match(lines[0])
3285
+ if m0:
3286
+ header_major = str(m0.group(1) or "")
3287
+ picked: list[str] = []
3288
+ for idx, line in enumerate(lines):
3289
+ if idx == 0 and header_major:
3290
+ continue
3291
+ m_sub = sub_re.match(line)
3292
+ if m_sub:
3293
+ major = str(m_sub.group(1) or "")
3294
+ if header_major and major != header_major:
3295
+ if picked:
3296
+ break
3297
+ continue
3298
+ picked.append(f"{major}.{m_sub.group(2)} {trim(str(m_sub.group(3) or '').strip(), 420)}".strip())
3299
+ continue
3300
+ m_bullet = bullet_re.match(line)
3301
+ if m_bullet and (header_major or picked):
3302
+ picked.append(trim(str(m_bullet.group(1) or "").strip(), 420))
3303
+ continue
3304
+ if picked and re.match(r"^\d+\.\s+", line):
3305
+ break
3306
+ if not picked:
3307
+ for line in lines:
3308
+ m_sub = sub_re.match(line)
3309
+ if m_sub:
3310
+ picked.append(f"{m_sub.group(1)}.{m_sub.group(2)} {trim(str(m_sub.group(3) or '').strip(), 420)}".strip())
3311
+ if len(picked) >= max(1, int(limit or 7)):
3312
+ break
3313
+ if not picked:
3314
+ return [src]
3315
+ out: list[str] = []
3316
+ seen: set[str] = set()
3317
+ for line in picked:
3318
+ key = re.sub(r"\s+", " ", str(line or "").strip()).lower()
3319
+ if not key or key in seen:
3320
+ continue
3321
+ seen.add(key)
3322
+ out.append(line)
3323
+ if len(out) >= max(1, int(limit or 7)):
3324
+ break
3325
+ return out or [src]
3326
+
3327
+
2989
3328
  def normalize_work_text(text: object, status: str = "") -> str:
2990
- s = re.sub(r"\s+", " ", str(text or "")).strip()
3329
+ parsed_status, parsed_content = split_todo_status_text(text)
3330
+ s = re.sub(r"\s+", " ", parsed_content or normalize_embedded_newlines(text)).strip()
2991
3331
  if not s:
2992
3332
  return ""
2993
- s = re.sub(r"^\[[ x>\-]\]\s*", "", s, flags=re.IGNORECASE)
2994
3333
  s = re.sub(
2995
- r"^(pending|in[_\-\s]?progress|completed|done|blocked)\s*[·:\-\]]\s*",
3334
+ r"^(pending|todo|in[_\-\s]?progress|doing|working|completed|done|finished|blocked|"
3335
+ r"待处理|待處理|未着手|进行中|進行中|作業中|已完成|完成|完了)\s*[·::\-\]]\s*",
2996
3336
  "",
2997
3337
  s,
2998
3338
  flags=re.IGNORECASE,
2999
3339
  )
3000
- if status:
3001
- status_pattern = re.escape(status).replace("_", r"[_\-\s]?")
3340
+ status_key = _map_todo_status_token(status) or _map_todo_status_token(parsed_status) or str(status or "").strip().lower()
3341
+ if status_key:
3342
+ status_pattern = re.escape(status_key).replace("_", r"[_\-\s]?")
3002
3343
  s = re.sub(
3003
3344
  rf"\s*[—-]\s*{status_pattern}\s*$",
3004
3345
  "",
@@ -3429,6 +3770,12 @@ def infer_user_complexity_value(text: str) -> str:
3429
3770
  low = strip_thinking_content(str(text or "")).strip().lower()
3430
3771
  if not low:
3431
3772
  return ""
3773
+ for token in USER_COMPLEXITY_EXPERT_TOKENS:
3774
+ if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
3775
+ return "expert"
3776
+ for token in USER_COMPLEXITY_MODERATE_TOKENS:
3777
+ if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
3778
+ return "moderate"
3432
3779
  for token in USER_COMPLEXITY_SIMPLE_TOKENS:
3433
3780
  if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
3434
3781
  return "simple"
@@ -3437,6 +3784,53 @@ def infer_user_complexity_value(text: str) -> str:
3437
3784
  return "complex"
3438
3785
  return ""
3439
3786
 
3787
+ def normalize_task_complexity(raw: object, default: str = "simple") -> str:
3788
+ value = str(raw or "").strip().lower()
3789
+ aliases = {
3790
+ "simple": "simple",
3791
+ "low": "simple",
3792
+ "basic": "simple",
3793
+ "minimal": "simple",
3794
+ "moderate": "moderate",
3795
+ "medium": "moderate",
3796
+ "mid": "moderate",
3797
+ "balanced": "moderate",
3798
+ "standard": "moderate",
3799
+ "complex": "complex",
3800
+ "high": "complex",
3801
+ "hard": "complex",
3802
+ "difficult": "complex",
3803
+ "expert": "expert",
3804
+ "advanced": "expert",
3805
+ "system": "expert",
3806
+ "system_level": "expert",
3807
+ "production": "expert",
3808
+ }
3809
+ normalized = aliases.get(value, value)
3810
+ if normalized in TASK_COMPLEXITY_LEVELS:
3811
+ return normalized
3812
+ fallback = str(default or "").strip().lower()
3813
+ if not fallback:
3814
+ return ""
3815
+ return fallback if fallback in TASK_COMPLEXITY_LEVELS else "simple"
3816
+
3817
+ def task_complexity_rank(raw: object, default: str = "simple") -> int:
3818
+ return int(TASK_COMPLEXITY_RANKS.get(normalize_task_complexity(raw, default=default), 1))
3819
+
3820
+ def task_complexity_at_least(raw: object, threshold: str) -> bool:
3821
+ return task_complexity_rank(raw) >= task_complexity_rank(threshold)
3822
+
3823
+ def max_task_complexity(*values: object, default: str = "simple") -> str:
3824
+ best = normalize_task_complexity(default, default=default)
3825
+ best_rank = task_complexity_rank(best, default=default)
3826
+ for value in values:
3827
+ cur = normalize_task_complexity(value, default=default)
3828
+ cur_rank = task_complexity_rank(cur, default=default)
3829
+ if cur_rank > best_rank:
3830
+ best = cur
3831
+ best_rank = cur_rank
3832
+ return best
3833
+
3440
3834
  def normalize_openai_compat_provider_name(raw: str) -> str:
3441
3835
  value = str(raw or "").strip().lower().replace("-", "_")
3442
3836
  aliases = {
@@ -5236,6 +5630,31 @@ class TodoManager:
5236
5630
  def update(self, items: list[dict]) -> str:
5237
5631
  if not isinstance(items, list):
5238
5632
  raise ValueError("items must be array")
5633
+ expanded_items: list[dict] = []
5634
+ for item in items:
5635
+ if isinstance(item, str):
5636
+ raw = {"content": item, "status": "pending"}
5637
+ elif isinstance(item, dict):
5638
+ raw = dict(item)
5639
+ else:
5640
+ try:
5641
+ raw = {"content": str(item).strip(), "status": "pending"}
5642
+ except Exception:
5643
+ continue
5644
+ raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
5645
+ split_rows = split_structured_todo_content(raw_content, limit=7)
5646
+ if len(split_rows) <= 1:
5647
+ expanded_items.append(raw)
5648
+ continue
5649
+ base_status = str(raw.get("status", raw.get("state", "pending")) or "pending").strip().lower()
5650
+ for split_idx, split_content in enumerate(split_rows):
5651
+ split_raw = dict(raw)
5652
+ split_raw["content"] = split_content
5653
+ split_raw["status"] = infer_todo_status_from_text(
5654
+ split_content,
5655
+ default=(base_status if split_idx == 0 else "pending"),
5656
+ )
5657
+ expanded_items.append(split_raw)
5239
5658
  validated = []
5240
5659
  # Plan-step items (bb:proj: key) keep a single in_progress slot.
5241
5660
  # Worker/non-plan items allow one in_progress per owner so sync-mode agents
@@ -5251,23 +5670,21 @@ class TodoManager:
5251
5670
  "finish": "completed",
5252
5671
  "finished": "completed",
5253
5672
  }
5254
- for idx, item in enumerate(items):
5255
- if isinstance(item, str):
5256
- raw = {"content": item, "status": "pending"}
5257
- elif isinstance(item, dict):
5258
- raw = item
5259
- else:
5260
- raise ValueError(f"item {idx}: invalid type")
5673
+ for idx, item in enumerate(expanded_items):
5674
+ raw = item if isinstance(item, dict) else {"content": str(item or "").strip(), "status": "pending"}
5261
5675
  raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
5676
+ inferred_status = infer_todo_status_from_text(raw_content, default="")
5262
5677
  content = normalize_work_text(raw_content)
5263
5678
  if not content:
5264
5679
  content = raw_content
5265
5680
  if not content:
5266
- raise ValueError(f"item {idx}: content required")
5681
+ continue # Skip empty items instead of raising
5267
5682
  raw_status = str(raw.get("status", raw.get("state", "pending"))).strip().lower()
5268
5683
  status = status_alias.get(raw_status, raw_status or "pending")
5684
+ if inferred_status and status in {"", "pending", "todo"}:
5685
+ status = inferred_status
5269
5686
  if status not in {"pending", "in_progress", "completed"}:
5270
- status = "pending"
5687
+ status = inferred_status or "pending"
5271
5688
  content = normalize_work_text(content, status) or content
5272
5689
  active_form = str(
5273
5690
  raw.get(
@@ -7051,9 +7468,11 @@ Use this skill when:
7051
7468
  6. Report rewritten count, copied files, and unresolved URLs.
7052
7469
 
7053
7470
  ## Rules
7471
+ - Treat `./js_lib` and `/js_lib/...` as workspace lookup locations only, not final browser-facing URLs.
7054
7472
  - Keep `./js` per HTML location (do not hardcode global absolute paths).
7055
7473
  - Keep file names deterministic and safe (`[A-Za-z0-9._-]`).
7056
7474
  - Preserve existing relative local script paths if already offline-ready.
7475
+ - Final HTML must not point to `/js_lib/...`, `/assets/js_lib/...`, or other virtual asset aliases; copy first, then use plain relative paths.
7057
7476
 
7058
7477
  ## Output Contract
7059
7478
  Return:
@@ -12420,12 +12839,12 @@ TOOLS = [
12420
12839
  ),
12421
12840
  tool_def("write_file", "Write file content.", {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
12422
12841
  tool_def("edit_file", "Edit a file by replacing first match.", {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, ["path", "old_text", "new_text"]),
12423
- tool_def("TodoWrite", "Update todo list.", {"items": {"type": "array", "items": {"type": "object"}}}, ["items"]),
12842
+ tool_def("TodoWrite", "Update todo list. Preferred format: objects with content/status/owner/parent_step_id. String fallback should use only '[ ] task', '[>] task', or '[x] task'.", {"items": {"type": "array", "items": {}}}, ["items"]),
12424
12843
  tool_def(
12425
12844
  "TodoWriteRescue",
12426
- "Fallback todo writer when TodoWrite keeps failing/repeating. Accepts simple string items and auto-normalizes schema.",
12845
+ "Fallback todo writer. Preferred format: objects with content/status/owner/parent_step_id. String fallback should use only '[ ] task', '[>] task', or '[x] task'.",
12427
12846
  {
12428
- "items": {"type": "array", "items": {"type": "string"}},
12847
+ "items": {"type": "array", "items": {}},
12429
12848
  "in_progress_index": {"type": "integer"},
12430
12849
  },
12431
12850
  ["items"],
@@ -12694,6 +13113,7 @@ class SessionState:
12694
13113
  context_limit_locked: bool = False,
12695
13114
  max_rounds: int = MAX_AGENT_ROUNDS,
12696
13115
  max_run_seconds: int = MAX_RUN_SECONDS,
13116
+ shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
12697
13117
  auto_model_switch: bool = False,
12698
13118
  arbiter_enabled: bool = True,
12699
13119
  arbiter_model: str = "",
@@ -12822,6 +13242,7 @@ class SessionState:
12822
13242
  self.runtime_complexity_floor = ""
12823
13243
  self.runtime_task_level_floor = 0
12824
13244
  self.runtime_task_level_ceiling = 0 # 0 = no ceiling; set from plan risk on approval
13245
+ self._todowrite_step_counter: dict[str, int] = {} # Fix 5: track consecutive TodoWrite per step for loop detection
12825
13246
  self.runtime_scale_preference = "balanced"
12826
13247
  self.runtime_direct_objective = ""
12827
13248
  self.runtime_reclassify_goal = ""
@@ -12855,6 +13276,8 @@ class SessionState:
12855
13276
  self._cached_llm_complexity = ""
12856
13277
  self._cached_complexity_dimensions: dict = {} # scope/steps/skill/output dimensions
12857
13278
  self._pending_media_inputs: list[dict] = []
13279
+ self._pending_runtime_updates: list[dict] = []
13280
+ self._deferred_runtime_sync_requested = False
12858
13281
  self.tool_retry_counts: dict[str, int] = {}
12859
13282
  self.last_auto_title_ts = 0.0
12860
13283
  self.live_thinking_text = ""
@@ -12903,6 +13326,12 @@ class SessionState:
12903
13326
  maximum=MAX_RUN_TIMEOUT_SECONDS,
12904
13327
  fallback=MAX_RUN_SECONDS,
12905
13328
  )
13329
+ self.shell_command_timeout_seconds = normalize_timeout_seconds(
13330
+ shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
13331
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
13332
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
13333
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
13334
+ )
12906
13335
  self.truncation_count = 0
12907
13336
  self.last_truncation_ts = 0.0
12908
13337
  self.truncation_rescue_task_ids: list[int] = []
@@ -14345,9 +14774,9 @@ class SessionState:
14345
14774
  )
14346
14775
  if task_type in TASK_PROFILE_TYPES:
14347
14776
  self.runtime_task_type = task_type
14348
- complexity = trim(
14349
- str(profile.get("complexity", judgement.get("complexity", self.runtime_task_complexity or "")) or "").strip().lower(),
14350
- 20,
14777
+ complexity = normalize_task_complexity(
14778
+ profile.get("complexity", judgement.get("complexity", self.runtime_task_complexity or "")),
14779
+ default="simple",
14351
14780
  )
14352
14781
  if complexity in TASK_COMPLEXITY_LEVELS:
14353
14782
  self.runtime_task_complexity = complexity
@@ -14817,12 +15246,15 @@ class SessionState:
14817
15246
 
14818
15247
  def _current_plan_step_text(self, board: dict | None = None) -> str:
14819
15248
  row = self._current_plan_step_row(board)
14820
- return trim(str((row or {}).get("content", "") or "").strip(), 400)
15249
+ content = normalize_embedded_newlines((row or {}).get("content", "") or "").strip()
15250
+ if "\n" in content:
15251
+ content = content.split("\n", 1)[0].strip()
15252
+ return trim(content, 400)
14821
15253
 
14822
15254
  def _current_plan_step_full_text(self, board: dict | None = None, max_len: int = 1200) -> str:
14823
15255
  row = self._current_plan_step_row(board)
14824
15256
  return trim(
14825
- str((row or {}).get("full_content", "") or (row or {}).get("content", "") or "").strip(),
15257
+ normalize_embedded_newlines((row or {}).get("full_content", "") or (row or {}).get("content", "") or "").strip(),
14826
15258
  max_len,
14827
15259
  )
14828
15260
 
@@ -15000,7 +15432,7 @@ class SessionState:
15000
15432
  pass
15001
15433
  t = threading.Thread(target=_llm_match, daemon=True)
15002
15434
  t.start()
15003
- t.join(timeout=60.0)
15435
+ t.join(timeout=5.0)
15004
15436
  if llm_result:
15005
15437
  matched_names = llm_result
15006
15438
  self._emit("status", {"summary": f"skill discovery (LLM task analysis): {matched_names} ({trigger})"})
@@ -15034,7 +15466,7 @@ class SessionState:
15034
15466
  # --- Path 3: Deferred LLM pickup if still running ---
15035
15467
  if not matched_names and t.is_alive():
15036
15468
  def _deferred_llm_pickup():
15037
- t.join(timeout=60.0)
15469
+ t.join(timeout=8.0)
15038
15470
  if llm_result and not self._loaded_skill_rows():
15039
15471
  for name_str in llm_result[:3]:
15040
15472
  try:
@@ -15431,7 +15863,6 @@ class SessionState:
15431
15863
  "ENGINEERING EXECUTION DISCIPLINE: "
15432
15864
  "For coding, bug-fix, architecture, integration, and testing work, proactively use the skill system when a matching skill exists. "
15433
15865
  "Do not wait for failure before calling list_skills/load_skill for debugging, API, frontend, parser, or recovery workflows. "
15434
- "Already-loaded skills appear as <loaded-skill> messages — use them directly without reloading. "
15435
15866
  "Use a root-cause-first loop: inspect the exact error or failing behavior, read the implicated file or path, form one concrete hypothesis, apply one bounded fix, then run at least one fix-and-verify cycle before declaring success. "
15436
15867
  "If read_file or bash reports a missing path, empty folder, or mismatched filename, stop repeating the same lookup. "
15437
15868
  "Reconcile the path against uploads, recent file paths, file explorer entries, and close workspace matches; then either open the closest candidate or create the intended target. "
@@ -15485,6 +15916,10 @@ class SessionState:
15485
15916
  f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
15486
15917
  f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS require) or pptxgen.bundle.js (browser). "
15487
15918
  f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
15919
+ "IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
15920
+ "If an HTML file uses any asset from js_lib, copy that file into a task-local relative asset folder "
15921
+ "(for example './js/' or './assets/vendor/') next to the deliverable, then reference it with a plain relative path in HTML. "
15922
+ "Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases inside final exported HTML. "
15488
15923
  f"Task level={runtime_level}, mode={runtime_mode}, "
15489
15924
  f"budget={'unlimited' if budget <= 0 else budget}. "
15490
15925
  f"Context limit ~{self.context_token_upper_bound} tokens. "
@@ -19868,11 +20303,17 @@ body{padding:18px}
19868
20303
  with self.lock:
19869
20304
  if self.running:
19870
20305
  config_delayed = True
19871
- self._pending_media_inputs.append({
19872
- "type": "deferred_config",
19873
- "config": cfg_obj,
19874
- "source": workspace_rel,
19875
- })
20306
+ if config_delayed:
20307
+ self._queue_deferred_runtime_update(
20308
+ "llm_config",
20309
+ {"config": cfg_obj, "source": workspace_rel},
20310
+ )
20311
+ loaded_config = self.model_catalog()
20312
+ if isinstance(loaded_config, dict):
20313
+ loaded_config["queued"] = True
20314
+ loaded_config["note"] = (
20315
+ "session is running; llm config queued and will apply after the current run finishes"
20316
+ )
19876
20317
  if not config_delayed:
19877
20318
  loaded_config = self.load_llm_config(cfg_obj, source=workspace_rel)
19878
20319
  self._emit("config_applied", {
@@ -21038,7 +21479,7 @@ body{padding:18px}
21038
21479
  return any(x in t for x in markers)
21039
21480
 
21040
21481
  def _llm_classify_task_complexity(self, goal_text: str) -> str:
21041
- """LLM semantic pre-screening: classify task as simple/complex via 4-dimension analysis. 5s timeout."""
21482
+ """LLM semantic pre-screening: classify task into 4 complexity bands via 4-dimension analysis. 5s timeout."""
21042
21483
  goal = trim(str(goal_text or ""), 400)
21043
21484
  if not goal or len(goal) < 6:
21044
21485
  return "simple"
@@ -21055,8 +21496,7 @@ body{padding:18px}
21055
21496
  f"SKILL: does it need specialized tools, skills, research, or APIs?\n"
21056
21497
  f"OUTPUT: what is expected (1=text answer, 2=single file, 3=system/multi-file)?\n\n"
21057
21498
  f"Output exactly one line:\n"
21058
- f"SCOPE:N STEPS:N SKILL:N OUTPUT:N VERDICT:SIMPLE|COMPLEX\n"
21059
- f"(COMPLEX if any dimension >= 2)"
21499
+ f"SCOPE:N STEPS:N SKILL:N OUTPUT:N VERDICT:SIMPLE|MODERATE|COMPLEX|EXPERT"
21060
21500
  )}],
21061
21501
  system="/no_think\nAnalyze task dimensions. One line output only.",
21062
21502
  max_tokens=40,
@@ -21071,8 +21511,26 @@ body{padding:18px}
21071
21511
  dims[dim.lower()] = int(m.group(1))
21072
21512
  if dims:
21073
21513
  self._cached_complexity_dimensions = dims
21074
- if "COMPLEX" in answer:
21514
+ vals = [int(v) for v in dims.values()]
21515
+ max_dim = max(vals) if vals else 1
21516
+ count_ge2 = sum(1 for v in vals if int(v) >= 2)
21517
+ count_ge3 = sum(1 for v in vals if int(v) >= 3)
21518
+ if max_dim <= 1:
21519
+ result_box[0] = "simple"
21520
+ elif max_dim == 2:
21521
+ result_box[0] = "moderate"
21522
+ elif count_ge3 >= 2 or count_ge2 >= 4:
21523
+ result_box[0] = "expert"
21524
+ else:
21525
+ result_box[0] = "complex"
21526
+ if "VERDICT:EXPERT" in answer:
21527
+ result_box[0] = "expert"
21528
+ elif "VERDICT:COMPLEX" in answer:
21075
21529
  result_box[0] = "complex"
21530
+ elif "VERDICT:MODERATE" in answer:
21531
+ result_box[0] = "moderate"
21532
+ elif "VERDICT:SIMPLE" in answer:
21533
+ result_box[0] = "simple"
21076
21534
  except Exception:
21077
21535
  pass
21078
21536
  t = threading.Thread(target=_classify, daemon=True)
@@ -21085,9 +21543,9 @@ body{padding:18px}
21085
21543
  low = clean.lower()
21086
21544
  explicit_complexity = infer_user_complexity_value(clean)
21087
21545
  # Use cached LLM complexity result (set by _agent_worker entry point)
21088
- llm_complexity = str(getattr(self, '_cached_llm_complexity', '') or '')
21089
- nontrivial = self._looks_nontrivial_request(clean) or llm_complexity == "complex"
21090
- direct_question = self._looks_like_direct_question_request(clean) and llm_complexity != "complex"
21546
+ llm_complexity = normalize_task_complexity(str(getattr(self, '_cached_llm_complexity', '') or ''), default="simple")
21547
+ nontrivial = self._looks_nontrivial_request(clean) or task_complexity_at_least(llm_complexity, "moderate")
21548
+ direct_question = self._looks_like_direct_question_request(clean) and (not task_complexity_at_least(llm_complexity, "moderate"))
21091
21549
  code_markers = [
21092
21550
  # 代码/编程
21093
21551
  "代码", "寫代碼", "写代码", "脚本", "模块", "函数", "class", "bug",
@@ -21124,6 +21582,7 @@ body{padding:18px}
21124
21582
  has_code_intent = any(x in low for x in code_markers)
21125
21583
  has_research_intent = any(x in low for x in research_markers)
21126
21584
  length = len(clean)
21585
+ derived_complexity = max_task_complexity(explicit_complexity, llm_complexity, default="simple")
21127
21586
  if direct_question and (not nontrivial) and (not has_code_intent) and length <= 220:
21128
21587
  return {
21129
21588
  "task_type": "simple_qa",
@@ -21154,7 +21613,11 @@ body{padding:18px}
21154
21613
  if has_research_intent and (not has_code_intent):
21155
21614
  return {
21156
21615
  "task_type": "research",
21157
- "complexity": explicit_complexity or ("complex" if (nontrivial or length >= 280) else "simple"),
21616
+ "complexity": explicit_complexity or max_task_complexity(
21617
+ derived_complexity,
21618
+ ("complex" if length >= 480 else "moderate" if (nontrivial or length >= 280) else "simple"),
21619
+ default="simple",
21620
+ ),
21158
21621
  "direct_objective": "Collect evidence first, then synthesize a concise actionable answer.",
21159
21622
  "recommended_agents": ["explorer", "developer", "reviewer"],
21160
21623
  "round_budget": 10 if (nontrivial or length >= 280) else 6,
@@ -21165,7 +21628,15 @@ body{padding:18px}
21165
21628
  if nontrivial or has_code_intent or length >= 280:
21166
21629
  return {
21167
21630
  "task_type": "engineering",
21168
- "complexity": explicit_complexity or "complex",
21631
+ "complexity": explicit_complexity or max_task_complexity(
21632
+ derived_complexity,
21633
+ (
21634
+ "expert"
21635
+ if ((has_code_intent and has_research_intent) or length >= 900)
21636
+ else "complex"
21637
+ ),
21638
+ default="moderate",
21639
+ ),
21169
21640
  "direct_objective": (
21170
21641
  "Use blackboard collaboration to implement, validate, and converge with concrete outputs."
21171
21642
  ),
@@ -21177,7 +21648,7 @@ body{padding:18px}
21177
21648
  }
21178
21649
  return {
21179
21650
  "task_type": "general",
21180
- "complexity": explicit_complexity or "simple",
21651
+ "complexity": explicit_complexity or derived_complexity or "simple",
21181
21652
  "direct_objective": (
21182
21653
  "Provide the most direct useful response with minimal orchestration, "
21183
21654
  "anchored to the current project context and user goal."
@@ -21439,6 +21910,66 @@ body{padding:18px}
21439
21910
  model = str(profile.get("model", self.ollama.model) or self.ollama.model).strip()
21440
21911
  return f"{self.active_profile_id}::{model}"
21441
21912
 
21913
+ def _queue_deferred_runtime_update(self, kind: str, payload: dict) -> int:
21914
+ row = {
21915
+ "kind": str(kind or "").strip().lower(),
21916
+ "payload": dict(payload or {}),
21917
+ "queued_at": float(now_ts()),
21918
+ }
21919
+ if not row["kind"]:
21920
+ raise ValueError("deferred runtime update kind required")
21921
+ with self.lock:
21922
+ self._pending_runtime_updates.append(row)
21923
+ self._pending_runtime_updates = self._pending_runtime_updates[-16:]
21924
+ queued = len(self._pending_runtime_updates)
21925
+ self.updated_at = now_ts()
21926
+ self._persist()
21927
+ return queued
21928
+
21929
+ def _apply_deferred_runtime_updates(self) -> list[str]:
21930
+ with self.lock:
21931
+ if self.running or not self._pending_runtime_updates:
21932
+ return []
21933
+ queued = list(self._pending_runtime_updates)
21934
+ self._pending_runtime_updates = []
21935
+ self.updated_at = now_ts()
21936
+ self._persist()
21937
+ applied_notes: list[str] = []
21938
+ sync_needed = False
21939
+ for item in queued:
21940
+ kind = str(item.get("kind", "") or "").strip().lower()
21941
+ payload = item.get("payload", {}) if isinstance(item.get("payload"), dict) else {}
21942
+ try:
21943
+ if kind == "llm_config":
21944
+ source = str(payload.get("source", "") or "deferred-config").strip()
21945
+ config = payload.get("config", {})
21946
+ if isinstance(config, dict) and config:
21947
+ self.load_llm_config(config, source=source)
21948
+ applied_notes.append(f"deferred llm config applied: {trim(source, 120)}")
21949
+ sync_needed = True
21950
+ elif kind == "model_selection":
21951
+ selection = str(payload.get("selection", "") or "").strip()
21952
+ model_override = payload.get("model_override")
21953
+ self.set_runtime_selection(
21954
+ selection,
21955
+ model_override if isinstance(model_override, str) else None,
21956
+ )
21957
+ applied_notes.append(f"deferred model switch applied: {trim(selection, 120)}")
21958
+ sync_needed = True
21959
+ except Exception as exc:
21960
+ self._emit(
21961
+ "status",
21962
+ {
21963
+ "summary": (
21964
+ f"deferred runtime update failed ({kind or 'unknown'}): "
21965
+ f"{trim(str(exc), 180)}"
21966
+ )
21967
+ },
21968
+ )
21969
+ if sync_needed:
21970
+ self._deferred_runtime_sync_requested = True
21971
+ return applied_notes
21972
+
21442
21973
  def _global_wait_timeout_seconds(self) -> int:
21443
21974
  raw = (
21444
21975
  self.max_run_seconds
@@ -21997,10 +22528,12 @@ body{padding:18px}
21997
22528
  _stop_process(proc)
21998
22529
  meta["error"] = "Error: interrupted by user"
21999
22530
  meta["exit_code"] = -130
22531
+ break
22000
22532
  elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
22001
22533
  _stop_process(proc)
22002
22534
  meta["error"] = f"Error: timeout ({timeout}s)"
22003
22535
  meta["exit_code"] = -1
22536
+ break
22004
22537
  try:
22005
22538
  label, chunk = io_queue.get(timeout=0.12)
22006
22539
  if chunk is None:
@@ -22085,6 +22618,7 @@ body{padding:18px}
22085
22618
  if create_group > 0:
22086
22619
  popen_kwargs["creationflags"] = create_group
22087
22620
  proc = subprocess.Popen(effective_command, **popen_kwargs)
22621
+ self._running_bash_proc = proc
22088
22622
  if os.name == "nt":
22089
22623
  # Windows: read PIPE output via blocking reader threads + queue.
22090
22624
  _collect_with_reader_threads(proc)
@@ -22110,10 +22644,12 @@ body{padding:18px}
22110
22644
  _stop_process(proc)
22111
22645
  meta["error"] = "Error: interrupted by user"
22112
22646
  meta["exit_code"] = -130
22647
+ break
22113
22648
  elif timeout > 0 and elapsed >= timeout:
22114
22649
  _stop_process(proc)
22115
22650
  meta["error"] = f"Error: timeout ({timeout}s)"
22116
22651
  meta["exit_code"] = -1
22652
+ break
22117
22653
  events = sel.select(timeout=0.12)
22118
22654
  for key, _ in events:
22119
22655
  stream = key.fileobj
@@ -22171,6 +22707,8 @@ body{padding:18px}
22171
22707
  meta["error"] = f"Error: {exc}"
22172
22708
  meta["output"] = meta["error"]
22173
22709
  meta["exit_code"] = -1
22710
+ finally:
22711
+ self._running_bash_proc = None
22174
22712
  meta["duration_ms"] = int((time.time() - start) * 1000)
22175
22713
  after = self._git_status_map(cwd)
22176
22714
  meta["changed_files"] = self._status_delta(before, after) if before or after else []
@@ -22181,11 +22719,19 @@ body{padding:18px}
22181
22719
  str(meta.get("output") or meta.get("error") or "(no output)"),
22182
22720
  cwd=cwd,
22183
22721
  )
22184
- )
22722
+ )
22185
22723
  return meta
22186
22724
 
22725
+ def _shell_command_timeout(self) -> int:
22726
+ return normalize_timeout_seconds(
22727
+ getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
22728
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
22729
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
22730
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
22731
+ )
22732
+
22187
22733
  def _run_bash(self, command: str) -> str:
22188
- return self._run_shell_meta(command, self.files_root, 120)["output"]
22734
+ return self._run_shell_meta(command, self.files_root, self._shell_command_timeout())["output"]
22189
22735
 
22190
22736
  def _fuzzy_resolve_path(self, fp: Path) -> Path:
22191
22737
  """If fp doesn't exist, try stripping spaces from the filename to find a close match.
@@ -22883,10 +23429,10 @@ body{padding:18px}
22883
23429
  "3) scaffold semantic HTML; "
22884
23430
  "4) apply CSS tokens + responsive layout; "
22885
23431
  "5) wire JS state/data interactions; "
22886
- "6) localize external JS dependencies to ./js from ./js_lib; "
23432
+ "6) localize external JS dependencies to a task-local relative folder such as ./js from ./js_lib, and rewrite final HTML to plain relative paths; "
22887
23433
  "7) run QA loop for desktop/mobile/a11y/performance and iterate. "
22888
23434
  f"Offline JS libs available now: {libs_hint}. "
22889
- "Final exported HTML should avoid unresolved CDN-only script src."
23435
+ "Final exported HTML should avoid unresolved CDN-only script src and must not keep '/js_lib/...' or '/assets/js_lib/...' virtual URLs."
22890
23436
  )
22891
23437
 
22892
23438
  def _contains_any_keyword(self, text: str, keywords: tuple[str, ...]) -> bool:
@@ -23050,9 +23596,10 @@ body{padding:18px}
23050
23596
  ) or str(base.get("task_type", "general"))
23051
23597
  if task_type not in TASK_PROFILE_TYPES:
23052
23598
  task_type = str(base.get("task_type", "general"))
23053
- complexity = str(src.get("complexity", base.get("complexity", "simple")) or "").strip().lower()
23054
- if complexity not in TASK_COMPLEXITY_LEVELS:
23055
- complexity = str(base.get("complexity", "simple"))
23599
+ complexity = normalize_task_complexity(
23600
+ src.get("complexity", base.get("complexity", "simple")),
23601
+ default=str(base.get("complexity", "simple") or "simple"),
23602
+ )
23056
23603
  src_direct_objective = trim(str(src.get("direct_objective", "") or "").strip(), 800)
23057
23604
  legacy_objectives = {
23058
23605
  "Provide the most direct useful response with minimal orchestration.",
@@ -23089,9 +23636,9 @@ body{padding:18px}
23089
23636
  if raw_level not in TASK_LEVEL_CHOICES:
23090
23637
  if task_type == "simple_qa":
23091
23638
  raw_level = 1 if len(str(goal or "")) <= 180 else 2
23092
- elif task_type in {"simple_code", "research"} and complexity == "simple":
23639
+ elif task_type in {"simple_code", "research"} and task_complexity_rank(complexity) <= task_complexity_rank("moderate"):
23093
23640
  raw_level = 3
23094
- elif complexity == "complex":
23641
+ elif task_complexity_at_least(complexity, "complex"):
23095
23642
  raw_level = 4
23096
23643
  else:
23097
23644
  raw_level = 2
@@ -23180,7 +23727,7 @@ body{padding:18px}
23180
23727
  goal = str(bb.get("original_goal", "") or "")
23181
23728
  current = bb.get("task_profile", {})
23182
23729
  profile = self._normalize_task_profile(goal, {} if force else current)
23183
- if profile.get("complexity") == "simple":
23730
+ if task_complexity_rank(profile.get("complexity", "simple")) < task_complexity_rank("complex"):
23184
23731
  logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
23185
23732
  tail = "\n".join(
23186
23733
  str((row or {}).get("content", "") or "")
@@ -23284,10 +23831,16 @@ body{padding:18px}
23284
23831
  # Project todo gate: coding tasks must pass compile + test
23285
23832
  profile = self._ensure_blackboard_task_profile(bb)
23286
23833
  task_type = str(profile.get("task_type", "general") or "general")
23834
+ exec_mode = normalize_execution_mode(
23835
+ profile.get("execution_mode", self._effective_execution_mode()),
23836
+ default=self._effective_execution_mode(),
23837
+ )
23287
23838
  if task_type in ("simple_code", "engineering"):
23288
23839
  for todo in bb.get("project_todos", []):
23289
23840
  if todo.get("category") in ("compile_test", "min_test") and todo.get("status") != "completed":
23290
23841
  return False, f"project-todo-incomplete:{todo.get('category', '')}"
23842
+ if exec_mode == EXECUTION_MODE_SYNC and not self._manager_feedback_passed_from_blackboard(bb):
23843
+ return False, "sync-review-missing"
23291
23844
  return True, "ok"
23292
23845
 
23293
23846
  def _invalidate_stale_approval_if_needed(
@@ -23505,6 +24058,10 @@ body{padding:18px}
23505
24058
  def _watchdog_state_fingerprint(self, board: dict | None = None) -> str:
23506
24059
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
23507
24060
  profile = self._ensure_blackboard_task_profile(bb)
24061
+ step_snapshot = self._active_plan_progress_snapshot(bb)
24062
+ last_reply = bb.get("last_worker_reply", {}) if isinstance(bb.get("last_worker_reply"), dict) else {}
24063
+ last_reply_role = self._sanitize_agent_role(last_reply.get("role", ""))
24064
+ last_reply_text = trim(str(last_reply.get("text", "") or "").strip(), 240)
23508
24065
  payload = {
23509
24066
  "status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
23510
24067
  "goal": trim(str(bb.get("original_goal", "") or "").strip(), 400),
@@ -23517,6 +24074,16 @@ body{padding:18px}
23517
24074
  "approved": bool((bb.get("approval", {}) or {}).get("approved", False)),
23518
24075
  "task_type": str(profile.get("task_type", "general") or "general"),
23519
24076
  "complexity": str(profile.get("complexity", "simple") or "simple"),
24077
+ "plan_step_id": str(step_snapshot.get("step_id", "") or ""),
24078
+ "plan_step_text": trim(str(step_snapshot.get("step_text", "") or "").strip(), 180),
24079
+ "worker_todo_count": int(step_snapshot.get("worker_todo_count", 0) or 0),
24080
+ "worker_todo_completed": int(step_snapshot.get("completed_count", 0) or 0),
24081
+ "worker_todo_in_progress": int(step_snapshot.get("in_progress_count", 0) or 0),
24082
+ "worker_todo_pending": int(step_snapshot.get("pending_count", 0) or 0),
24083
+ "current_subtask": trim(str(step_snapshot.get("current_subtask", "") or "").strip(), 180),
24084
+ "next_pending_subtask": trim(str(step_snapshot.get("next_pending_subtask", "") or "").strip(), 180),
24085
+ "last_worker_reply_role": last_reply_role,
24086
+ "last_worker_reply_text": last_reply_text,
23520
24087
  }
23521
24088
  raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
23522
24089
  return hashlib.sha1(raw.encode("utf-8")).hexdigest()
@@ -24161,6 +24728,7 @@ body{padding:18px}
24161
24728
  "instruction": "",
24162
24729
  "reason": "",
24163
24730
  "source": "",
24731
+ "progress_fp": "",
24164
24732
  "is_mandatory": False,
24165
24733
  "ts": 0.0,
24166
24734
  },
@@ -24216,6 +24784,7 @@ body{padding:18px}
24216
24784
  "instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
24217
24785
  "reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
24218
24786
  "source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
24787
+ "progress_fp": trim(str(raw_delegate.get("progress_fp", "") or "").strip(), 80),
24219
24788
  "is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
24220
24789
  "ts": float(raw_delegate.get("ts", 0.0) or 0.0),
24221
24790
  }
@@ -24353,8 +24922,8 @@ body{padding:18px}
24353
24922
  for pt in bb_src_todos[:40]:
24354
24923
  if not isinstance(pt, dict):
24355
24924
  continue
24356
- raw_content = trim(str(pt.get("content", "") or ""), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
24357
- raw_full = trim(str(pt.get("full_content", "") or ""), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
24925
+ raw_content = trim(normalize_embedded_newlines(pt.get("content", "")), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
24926
+ raw_full = trim(normalize_embedded_newlines(pt.get("full_content", "")), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
24358
24927
  # Migration: if full_content is empty but content has sub-steps, auto-split
24359
24928
  if not raw_full and raw_content and pt.get("category") == "plan_step":
24360
24929
  normalized = _mid_re_norm.sub(r"\n\1", raw_content)
@@ -24369,6 +24938,7 @@ body{padding:18px}
24369
24938
  "category": trim(str(pt.get("category", "") or ""), 40),
24370
24939
  "plan_step_index": int(pt.get("plan_step_index", -1)) if pt.get("plan_step_index") is not None else -1,
24371
24940
  "created_at": float(pt.get("created_at", 0.0) or 0.0),
24941
+ "activated_at": float(pt.get("activated_at", 0.0) or 0.0) if pt.get("activated_at") else None,
24372
24942
  "completed_at": float(pt.get("completed_at", 0.0) or 0.0) if pt.get("completed_at") else None,
24373
24943
  "completed_by": trim(str(pt.get("completed_by", "") or ""), 40),
24374
24944
  "evidence": trim(str(pt.get("evidence", "") or ""), 200),
@@ -24766,7 +25336,8 @@ body{padding:18px}
24766
25336
  if not isinstance(fl, dict):
24767
25337
  return
24768
25338
  delegations = fl.get("repeated_delegations", [])
24769
- fp = hashlib.sha1(str(instruction or "").encode("utf-8")).hexdigest()[:12]
25339
+ progress_fp = self._watchdog_state_fingerprint(bb)
25340
+ fp = hashlib.sha1((str(instruction or "") + "|" + progress_fp).encode("utf-8")).hexdigest()[:12]
24770
25341
  for entry in delegations:
24771
25342
  if entry.get("instruction_hash") == fp and entry.get("target") == target:
24772
25343
  entry["count"] = int(entry.get("count", 1) or 1) + 1
@@ -24779,6 +25350,7 @@ body{padding:18px}
24779
25350
  "target": trim(str(target or ""), 40),
24780
25351
  "instruction_hash": fp,
24781
25352
  "instruction_preview": trim(str(instruction or ""), 200),
25353
+ "progress_fp": progress_fp,
24782
25354
  "count": 1,
24783
25355
  "first_round": int(getattr(self, "agent_round_index", 0) or 0),
24784
25356
  "last_round": int(getattr(self, "agent_round_index", 0) or 0),
@@ -25363,6 +25935,195 @@ body{padding:18px}
25363
25935
  return observed_signal or read_back or wrote_files
25364
25936
  return wrote_files or read_back or knowledge_signal or observed_signal
25365
25937
 
25938
+ def _plan_step_activation_ts(self, plan_step: dict) -> float:
25939
+ if not isinstance(plan_step, dict):
25940
+ return 0.0
25941
+ try:
25942
+ activated = float(plan_step.get("activated_at", 0.0) or 0.0)
25943
+ except Exception:
25944
+ activated = 0.0
25945
+ if activated > 0:
25946
+ return activated
25947
+ try:
25948
+ return float(plan_step.get("created_at", 0.0) or 0.0)
25949
+ except Exception:
25950
+ return 0.0
25951
+
25952
+ def _plan_step_blackboard_signals(self, plan_step: dict, board: dict | None = None) -> dict:
25953
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
25954
+ step_id = trim(str((plan_step or {}).get("id", "") or ""), 20)
25955
+ since_ts = self._plan_step_activation_ts(plan_step)
25956
+
25957
+ def _rows_since(rows: object) -> list[dict]:
25958
+ out: list[dict] = []
25959
+ if not isinstance(rows, list):
25960
+ return out
25961
+ for row in rows:
25962
+ if not isinstance(row, dict):
25963
+ continue
25964
+ txt = trim(str(row.get("content", "") or "").strip(), 1200)
25965
+ if not txt:
25966
+ continue
25967
+ try:
25968
+ ts = float(row.get("ts", 0.0) or 0.0)
25969
+ except Exception:
25970
+ ts = 0.0
25971
+ if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
25972
+ continue
25973
+ out.append({"ts": ts, "content": txt, "actor": trim(str(row.get("actor", "") or ""), 40)})
25974
+ return out
25975
+
25976
+ def _recent_excerpt(rows: list[dict], max_chars: int = 120) -> str:
25977
+ if not rows:
25978
+ return ""
25979
+ return trim(str(rows[-1].get("content", "") or "").replace("\r\n", "\n"), max_chars)
25980
+
25981
+ negative_hints = ("error:", "failed", "failure", "traceback", "fatal error", "assertionerror", "exception")
25982
+ compile_hints = ("compiled successfully", "build successful", "build succeeded", "syntax ok", "lint passed", "no issues found", "0 errors", "编译成功")
25983
+ test_hints = ("test passed", "tests passed", "all tests passed", "0 failed", "100%", "ok", "success", "测试通过")
25984
+
25985
+ step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
25986
+ step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
25987
+ filtered_entries: list[dict] = []
25988
+ for entry in step_entries:
25989
+ if not isinstance(entry, dict):
25990
+ continue
25991
+ try:
25992
+ ts = float(entry.get("ts", 0.0) or 0.0)
25993
+ except Exception:
25994
+ ts = 0.0
25995
+ if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
25996
+ continue
25997
+ filtered_entries.append(entry)
25998
+ step_entries = filtered_entries
25999
+
26000
+ artifact_rows: list[dict] = []
26001
+ raw_artifacts = bb.get("code_artifacts", {}) if isinstance(bb.get("code_artifacts"), dict) else {}
26002
+ for path, meta in raw_artifacts.items():
26003
+ if not isinstance(meta, dict):
26004
+ continue
26005
+ try:
26006
+ ts = float(meta.get("updated_at", 0.0) or 0.0)
26007
+ except Exception:
26008
+ ts = 0.0
26009
+ if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
26010
+ continue
26011
+ artifact_rows.append({
26012
+ "path": trim(str(path or "").strip(), 240),
26013
+ "summary": trim(str(meta.get("summary", "") or "").strip(), 200),
26014
+ "updated_at": ts,
26015
+ })
26016
+
26017
+ research_rows = _rows_since(bb.get("research_notes", []))
26018
+ exec_rows = _rows_since(bb.get("execution_logs", []))
26019
+ review_rows = _rows_since(bb.get("review_feedback", []))
26020
+
26021
+ file_ops = {
26022
+ trim(str(entry.get("op", "") or "").strip(), 40)
26023
+ for entry in step_entries
26024
+ if isinstance(entry, dict)
26025
+ }
26026
+ has_write = any(op in {"write_file", "edit_file"} for op in file_ops) or bool(artifact_rows)
26027
+ has_read = "read_file" in file_ops
26028
+
26029
+ def _has_positive(rows: list[dict], hints: tuple[str, ...]) -> bool:
26030
+ for row in reversed(rows[-6:]):
26031
+ low = str(row.get("content", "") or "").lower()
26032
+ if not low or any(neg in low for neg in negative_hints):
26033
+ continue
26034
+ if any(tok in low for tok in hints):
26035
+ return True
26036
+ return False
26037
+
26038
+ def _has_observed(rows: list[dict]) -> bool:
26039
+ for row in reversed(rows[-6:]):
26040
+ low = str(row.get("content", "") or "").lower()
26041
+ if low and not any(neg in low for neg in negative_hints):
26042
+ return True
26043
+ return False
26044
+
26045
+ recent_files = [row.get("path", "") for row in artifact_rows[-4:] if row.get("path")]
26046
+ if not recent_files:
26047
+ recent_files = [
26048
+ trim(str(entry.get("path", "") or "").strip(), 240)
26049
+ for entry in step_entries[-4:]
26050
+ if isinstance(entry, dict) and str(entry.get("path", "") or "").strip()
26051
+ ]
26052
+
26053
+ return {
26054
+ "since_ts": since_ts,
26055
+ "has_write": has_write,
26056
+ "has_read": has_read,
26057
+ "has_research": bool(research_rows),
26058
+ "has_exec": _has_observed(exec_rows),
26059
+ "has_review": _has_observed(review_rows),
26060
+ "has_compile_pass": _has_positive(exec_rows + review_rows, compile_hints),
26061
+ "has_test_pass": _has_positive(exec_rows + review_rows, test_hints),
26062
+ "recent_files": list(dict.fromkeys(recent_files))[-4:],
26063
+ "recent_exec_excerpt": _recent_excerpt(exec_rows, 140),
26064
+ "recent_review_excerpt": _recent_excerpt(review_rows, 140),
26065
+ "recent_research_excerpt": _recent_excerpt(research_rows, 140),
26066
+ }
26067
+
26068
+ def _plan_step_has_blackboard_evidence(self, plan_step: dict, board: dict | None = None) -> bool:
26069
+ if not isinstance(plan_step, dict):
26070
+ return False
26071
+ sig = self._plan_step_blackboard_signals(plan_step, board)
26072
+ step_text = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
26073
+ phase = self._plan_step_phase_hint(step_text)
26074
+ wants_test = phase in ("test", "review") or any(
26075
+ tok in step_text for tok in ("test", "pytest", "unit", "integration", "验证", "測試", "测试", "回归", "assert")
26076
+ )
26077
+ wants_runtime_validation = wants_test or phase == "implement" or any(
26078
+ tok in step_text for tok in ("verify", "validation", "check", "lint", "build", "compile", "运行", "校验", "檢查")
26079
+ )
26080
+ if wants_test:
26081
+ return sig["has_test_pass"] or sig["has_exec"] or sig["has_review"]
26082
+ if phase == "implement":
26083
+ return sig["has_write"] and (
26084
+ sig["has_compile_pass"] or sig["has_test_pass"] or sig["has_exec"] or sig["has_read"] or sig["has_review"]
26085
+ )
26086
+ if phase in ("research", "design"):
26087
+ return sig["has_research"] or sig["has_read"] or sig["has_exec"] or sig["has_write"]
26088
+ if wants_runtime_validation:
26089
+ return sig["has_exec"] or sig["has_read"] or sig["has_write"] or sig["has_review"]
26090
+ return sig["has_write"] or sig["has_read"] or sig["has_research"] or sig["has_exec"] or sig["has_review"]
26091
+
26092
+ def _step_has_accumulated_evidence(self, plan_step: dict, bb: dict | None = None) -> bool:
26093
+ """Fix 3: Check if step has accumulated evidence across ALL turns (not just current turn).
26094
+ Uses step_files registry + blackboard signals to detect writes/execution during step lifetime."""
26095
+ if not isinstance(plan_step, dict):
26096
+ return False
26097
+ sig = self._plan_step_blackboard_signals(plan_step, bb)
26098
+ return sig["has_write"] or sig["has_exec"] or sig["has_research"]
26099
+
26100
+ def _collect_accumulated_step_evidence(self, plan_step: dict, bb: dict | None = None) -> str:
26101
+ """Fix 1 support: Collect evidence summary from accumulated step history (across all turns)."""
26102
+ if not isinstance(plan_step, dict):
26103
+ return ""
26104
+ sig = self._plan_step_blackboard_signals(plan_step, bb)
26105
+ parts: list[str] = []
26106
+ if sig.get("recent_files"):
26107
+ parts.append("files: " + ", ".join(sig["recent_files"][:4]))
26108
+ if sig.get("recent_exec_excerpt"):
26109
+ parts.append("exec: " + trim(sig["recent_exec_excerpt"], 80))
26110
+ if sig.get("recent_research_excerpt"):
26111
+ parts.append("research: " + trim(sig["recent_research_excerpt"], 80))
26112
+ return trim("; ".join(parts) or "accumulated-step-evidence", 200)
26113
+
26114
+ def _collect_blackboard_step_evidence(self, plan_step: dict, board: dict | None = None) -> str:
26115
+ sig = self._plan_step_blackboard_signals(plan_step, board)
26116
+ parts: list[str] = []
26117
+ if sig.get("recent_files"):
26118
+ parts.append("files: " + ", ".join(sig["recent_files"][:3]))
26119
+ if sig.get("recent_exec_excerpt"):
26120
+ parts.append(f"logs: {sig['recent_exec_excerpt']}")
26121
+ if sig.get("recent_review_excerpt"):
26122
+ parts.append(f"review: {sig['recent_review_excerpt']}")
26123
+ if sig.get("recent_research_excerpt"):
26124
+ parts.append(f"notes: {sig['recent_research_excerpt']}")
26125
+ return trim("; ".join(parts), 200)
26126
+
25366
26127
  def _has_test_pass_evidence(self, board: dict | None = None) -> bool:
25367
26128
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
25368
26129
  logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
@@ -25395,6 +26156,20 @@ body{padding:18px}
25395
26156
  if todo.get("status") == "completed":
25396
26157
  continue
25397
26158
  cat = todo.get("category", "")
26159
+ if cat == "plan_step" and todo.get("status") == "in_progress" and not todo.get("activated_at"):
26160
+ step_idx = int(todo.get("plan_step_index", 0) or 0)
26161
+ prior_done_ts = [
26162
+ float(t.get("completed_at", 0.0) or 0.0)
26163
+ for t in todos
26164
+ if t.get("category") == "plan_step"
26165
+ and int(t.get("plan_step_index", 0) or 0) < step_idx
26166
+ and t.get("completed_at")
26167
+ ]
26168
+ todo["activated_at"] = (
26169
+ max(prior_done_ts)
26170
+ if prior_done_ts
26171
+ else (float(todo.get("created_at", 0.0) or 0.0) or float(now_ts()))
26172
+ )
25398
26173
  if cat == "setup" and (research_count > 0 or code_count > 0):
25399
26174
  todo.update(
25400
26175
  status="completed",
@@ -25446,11 +26221,14 @@ body{padding:18px}
25446
26221
  if t.get("category") == "plan_step"
25447
26222
  ):
25448
26223
  todo["status"] = "in_progress"
26224
+ todo["activated_at"] = float(now_ts())
25449
26225
 
25450
26226
  if not any(t.get("status") == "in_progress" for t in todos):
25451
26227
  for t in todos:
25452
26228
  if t.get("status") == "pending":
25453
26229
  t["status"] = "in_progress"
26230
+ if not t.get("activated_at"):
26231
+ t["activated_at"] = float(now_ts())
25454
26232
  break
25455
26233
 
25456
26234
  bb["project_todos"] = todos
@@ -25516,10 +26294,25 @@ body{padding:18px}
25516
26294
  break
25517
26295
  if not current:
25518
26296
  return False
26297
+ # Fix 5c: Reset TodoWrite loop counter on step advancement
26298
+ try:
26299
+ self._todowrite_step_counter.clear()
26300
+ except Exception:
26301
+ pass
25519
26302
  current["status"] = "completed"
25520
26303
  current["completed_at"] = float(now_ts())
25521
26304
  current["completed_by"] = actor
25522
26305
  current["evidence"] = trim(str(evidence or "").strip(), 200) or self._ui_text("step_completed_evidence")
26306
+ # Clear single-mode validation gate flags for the completed step
26307
+ try:
26308
+ _completed_id = str(current.get("id", "") or "")
26309
+ for _attr_name in (f"_smvg_{_completed_id}", f"_smvg_ts_{_completed_id}", f"_smvg_n_{_completed_id}", f"_sync_exec_gate_n_{_completed_id}", f"_sync_sv_ts_{_completed_id}"):
26310
+ try:
26311
+ delattr(self, _attr_name)
26312
+ except AttributeError:
26313
+ pass
26314
+ except Exception:
26315
+ pass
25523
26316
  # 推进 cursor,激活下一步
25524
26317
  cursor = int(bb.get("plan_step_cursor", 0) or 0)
25525
26318
  bb["plan_step_cursor"] = cursor + 1
@@ -25530,6 +26323,7 @@ body{padding:18px}
25530
26323
  break
25531
26324
  if next_step:
25532
26325
  next_step["status"] = "in_progress"
26326
+ next_step["activated_at"] = float(now_ts())
25533
26327
  step_idx = int(next_step.get("plan_step_index", 0) or 0) + 1
25534
26328
  total = int(bb.get("plan_step_total", len(todos)) or len(todos))
25535
26329
  self._emit("status", {
@@ -25635,27 +26429,90 @@ body{padding:18px}
25635
26429
  isinstance(r, dict) and r.get("ok", False) and str(r.get("name", "")) == "bash"
25636
26430
  for r in results
25637
26431
  )
25638
- validation_ok = self._tool_results_have_validation_evidence(current, results)
26432
+ validation_ok_current = self._tool_results_have_validation_evidence(current, results)
26433
+ validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
26434
+ validation_ok = validation_ok_current or validation_ok_blackboard
26435
+ bb_sig = self._plan_step_blackboard_signals(current, bb)
25639
26436
  phase_evidence = False
25640
26437
  if phase in ("research", "design") and validation_ok:
25641
26438
  phase_evidence = True
25642
- elif phase == "implement" and wrote_files and validation_ok:
26439
+ elif phase == "implement" and (
26440
+ (wrote_files and validation_ok_current)
26441
+ or (bb_sig["has_write"] and validation_ok_blackboard)
26442
+ ):
25643
26443
  phase_evidence = True
25644
- elif phase in ("test", "review") and ran_bash_ok and validation_ok:
26444
+ elif phase in ("test", "review") and (
26445
+ (ran_bash_ok and validation_ok_current)
26446
+ or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
26447
+ ):
25645
26448
  phase_evidence = True
26449
+ todo_progress_signal = any(
26450
+ isinstance(r, dict) and r.get("ok", False)
26451
+ and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
26452
+ for r in results
26453
+ )
25646
26454
  # Advance when:
25647
26455
  # - Manager requested AND worker produced output, OR
25648
26456
  # - All subtasks completed AND worker produced output, OR
25649
- # - Phase heuristics confirm (write+bash for implement)
25650
- has_strong_evidence = validation_ok and worker_produced_output and (
25651
- manager_requested or subtasks_all_done or phase_evidence
25652
- )
26457
+ # - Phase heuristics confirm BUT ONLY if no incomplete subtasks exist
26458
+ # - Fix 3: All subtasks completed + accumulated step evidence (covers TodoWrite-only turns)
26459
+ # CRITICAL: When subtasks exist, phase_evidence alone CANNOT bypass subtask completion.
26460
+ _has_subtasks = bool(self._active_plan_worker_todo_rows(
26461
+ str(current.get("id", "") or ""), role=""
26462
+ ))
26463
+ _phase_gate = phase_evidence and (subtasks_all_done or not _has_subtasks)
26464
+ accumulated_evidence_path = (
26465
+ subtasks_all_done
26466
+ and todo_progress_signal
26467
+ and self._step_has_accumulated_evidence(current, bb)
26468
+ )
26469
+ has_strong_evidence = (
26470
+ validation_ok and (
26471
+ (
26472
+ worker_produced_output
26473
+ and (manager_requested or subtasks_all_done or _phase_gate)
26474
+ )
26475
+ or (
26476
+ todo_progress_signal
26477
+ and subtasks_all_done
26478
+ and validation_ok_blackboard
26479
+ )
26480
+ )
26481
+ ) or accumulated_evidence_path
25653
26482
  if has_strong_evidence:
26483
+ # Sync mode exec gate: when all subtasks done for implement/test/deploy phases,
26484
+ # require at least some execution evidence (bash/test/compile ran at any point).
26485
+ # Manager-requested advancement has its own escape hatch after 10 blocks.
26486
+ _exec_gate_needed = (
26487
+ subtasks_all_done
26488
+ and phase in ("implement", "test", "deploy")
26489
+ )
26490
+ if _exec_gate_needed:
26491
+ # Require model's explicit <step-verified/> tag in agent_messages since step activation
26492
+ _has_verified = self._check_step_verified_tag(current, messages=self.agent_messages)
26493
+ if not _has_verified:
26494
+ _sync_n_flag = f"_sync_exec_gate_n_{str(current.get('id', '') or '')}"
26495
+ _sync_n = int(getattr(self, _sync_n_flag, 0))
26496
+ if _sync_n < 10:
26497
+ setattr(self, _sync_n_flag, _sync_n + 1)
26498
+ # No verified tag yet — push worker to evaluate and emit <step-verified/>
26499
+ self._inject_sync_mode_verification_hint(current, worker_step)
26500
+ return
26501
+ # After 10 blocks, allow advancement to prevent permanent stall
25654
26502
  evidence = self._collect_step_evidence(current, worker_step)
26503
+ # Clear sync exec gate counter on successful advance
26504
+ try:
26505
+ _sync_clear = f"_sync_exec_gate_n_{str(current.get('id', '') or '')}"
26506
+ if hasattr(self, _sync_clear):
26507
+ delattr(self, _sync_clear)
26508
+ except Exception:
26509
+ pass
25655
26510
  self._advance_plan_step(
25656
26511
  evidence=evidence,
25657
26512
  actor=str(route.get("target", "developer") or "developer"),
25658
26513
  )
26514
+ else:
26515
+ self._inject_rework_if_needed(current, worker_step)
25659
26516
 
25660
26517
  def _worker_step_has_evidence(self, step: dict) -> bool:
25661
26518
  """Check if worker step produced concrete tool outputs."""
@@ -25671,7 +26528,8 @@ body{padding:18px}
25671
26528
 
25672
26529
  def _step_subtasks_all_completed(self, plan_step: dict) -> bool:
25673
26530
  """Check if all worker subtasks linked to this plan step are completed.
25674
- Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking."""
26531
+ Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
26532
+ Fix 6: Also excludes 'next-step intent' items that were added alongside completed items."""
25675
26533
  step_id = str(plan_step.get("id", "") or "")
25676
26534
  if not step_id:
25677
26535
  return False
@@ -25711,7 +26569,293 @@ body{padding:18px}
25711
26569
  relevant.append(r)
25712
26570
  if relevant:
25713
26571
  worker_items = relevant
25714
- return all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
26572
+ # Fix 6: Exclude "next-step intent" pending items when all other items are completed.
26573
+ # When the worker completes step N and creates step N+1 subtasks in the same TodoWrite call,
26574
+ # the new pending items get parent_step_id of step N, blocking its advancement.
26575
+ completed_items = [r for r in worker_items if str(r.get("status", "")).lower() == "completed"]
26576
+ pending_items = [r for r in worker_items if str(r.get("status", "")).lower() != "completed"]
26577
+ if completed_items and pending_items:
26578
+ # Check if pending items are content-wise duplicates of completed items
26579
+ # (indicating the worker re-sent the same items but some got stuck as pending)
26580
+ completed_content = {
26581
+ normalize_work_text(str(r.get("content", ""))).strip().lower()
26582
+ for r in completed_items
26583
+ if str(r.get("content", "") or "").strip()
26584
+ }
26585
+ truly_new_pending = [
26586
+ r for r in pending_items
26587
+ if normalize_work_text(str(r.get("content", ""))).strip().lower() not in completed_content
26588
+ ]
26589
+ # If all pending items are duplicates of completed items, they don't block
26590
+ if not truly_new_pending:
26591
+ worker_items = completed_items
26592
+ # If there are truly new pending items but all original items are done,
26593
+ # check if the new items match future plan step content
26594
+ elif truly_new_pending and len(completed_items) >= 2:
26595
+ bb = self._ensure_blackboard()
26596
+ future_step_content = set()
26597
+ found_current = False
26598
+ for t in bb.get("project_todos", []):
26599
+ if not isinstance(t, dict) or t.get("category") != "plan_step":
26600
+ continue
26601
+ if str(t.get("id", "") or "") == step_id:
26602
+ found_current = True
26603
+ continue
26604
+ if found_current:
26605
+ fc = str(t.get("full_content", "") or t.get("content", "") or "").strip().lower()
26606
+ future_step_content.add(fc)
26607
+ for line in fc.split("\n"):
26608
+ sl = line.strip().lower()
26609
+ if sl:
26610
+ future_step_content.add(sl)
26611
+ if future_step_content:
26612
+ _still_blocking = []
26613
+ for pi in truly_new_pending:
26614
+ pc = normalize_work_text(str(pi.get("content", ""))).strip().lower()
26615
+ # Check if this pending item's content appears in any future step
26616
+ is_future = any(pc in fsc or fsc in pc for fsc in future_step_content if len(fsc) > 4)
26617
+ if not is_future:
26618
+ _still_blocking.append(pi)
26619
+ if not _still_blocking:
26620
+ worker_items = completed_items
26621
+ all_marked_done = all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
26622
+ if not all_marked_done:
26623
+ return False
26624
+ # Acceptance verification: check that each "completed" subtask has real evidence
26625
+ # Don't just trust the model's TodoWrite status — verify against accumulated tool outputs
26626
+ if worker_items:
26627
+ bb = self._ensure_blackboard()
26628
+ unverified = self._verify_subtasks_acceptance(worker_items, step_id, bb)
26629
+ if unverified:
26630
+ return False
26631
+ return True
26632
+
26633
+ def _verify_subtasks_acceptance(self, subtasks: list[dict], step_id: str, bb: dict) -> list[str]:
26634
+ """Verify each completed subtask has real evidence. Returns list of unverified subtask descriptions.
26635
+ Checks step_files and execution_logs against what each subtask's content implies."""
26636
+ import re
26637
+ # Gather accumulated evidence for this step
26638
+ step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
26639
+ step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
26640
+ written_paths = set()
26641
+ for entry in step_entries:
26642
+ if isinstance(entry, dict) and str(entry.get("op", "")) in ("write_file", "edit_file"):
26643
+ written_paths.add(str(entry.get("path", "") or "").strip().lower())
26644
+ # Gather bash execution evidence
26645
+ exec_logs = bb.get("execution_logs", [])
26646
+ if not isinstance(exec_logs, list):
26647
+ exec_logs = []
26648
+ bash_outputs_lower = []
26649
+ for log in exec_logs[-30:]:
26650
+ if isinstance(log, dict):
26651
+ c = str(log.get("content", "") or "").lower()
26652
+ if c:
26653
+ bash_outputs_lower.append(c)
26654
+ all_bash_text = " ".join(bash_outputs_lower)
26655
+ negative_hints = ("error:", "failed", "failure", "traceback", "fatal", "not found",
26656
+ "no such file", "command not found", "permission denied")
26657
+ has_bash_failure = any(neg in all_bash_text for neg in negative_hints)
26658
+ # Define acceptance patterns from subtask content
26659
+ _file_create_re = re.compile(
26660
+ r"(?:创建|生成|编写|写入|create|write|generate|implement|scaffold)\s+(.+?)(?:\s|$|,|。|,|\()",
26661
+ re.IGNORECASE,
26662
+ )
26663
+ _run_test_kw = ("运行", "测试", "验证", "test", "pytest", "verify", "validate",
26664
+ "run", "check", "确认", "检查")
26665
+ _compile_kw = ("编译", "构建", "compile", "build", "cmake", "make", "gcc", "gfortran")
26666
+ _install_kw = ("安装", "install", "pip install", "npm install", "apt install")
26667
+ unverified: list[str] = []
26668
+ for st in subtasks:
26669
+ content = str(st.get("content", "") or "").strip()
26670
+ if not content:
26671
+ continue
26672
+ content_lower = content.lower()
26673
+ # Rule 1: If subtask mentions creating a file, check it was actually written
26674
+ m = _file_create_re.search(content)
26675
+ if m:
26676
+ target = m.group(1).strip().strip("\"'`").lower()
26677
+ # Extract just filename from path-like strings
26678
+ if "/" in target:
26679
+ target_parts = [p for p in target.split("/") if p.strip()]
26680
+ target_name = target_parts[-1] if target_parts else target
26681
+ else:
26682
+ target_name = target
26683
+ if target_name and len(target_name) > 2:
26684
+ found = any(target_name in wp for wp in written_paths)
26685
+ if not found:
26686
+ unverified.append(f"file not created: {target_name}")
26687
+ continue
26688
+ # Rule 2: If subtask mentions testing/running/verifying, check bash was executed
26689
+ if any(kw in content_lower for kw in _run_test_kw):
26690
+ if not bash_outputs_lower:
26691
+ unverified.append(f"no bash execution for: {trim(content, 60)}")
26692
+ continue
26693
+ # Check for test failures in recent bash output
26694
+ if has_bash_failure and any(kw in content_lower for kw in ("test", "测试", "pytest")):
26695
+ # Only block if failure keywords appear near test-related content
26696
+ test_related_failures = any(
26697
+ ("test" in line or "pytest" in line or "assert" in line)
26698
+ and any(neg in line for neg in negative_hints)
26699
+ for line in bash_outputs_lower[-10:]
26700
+ )
26701
+ if test_related_failures:
26702
+ unverified.append(f"test failures detected for: {trim(content, 60)}")
26703
+ continue
26704
+ # Rule 3: If subtask mentions compiling/building, check bash + no compile errors
26705
+ if any(kw in content_lower for kw in _compile_kw):
26706
+ if not bash_outputs_lower:
26707
+ unverified.append(f"no bash execution for compile: {trim(content, 60)}")
26708
+ continue
26709
+ compile_failures = any(
26710
+ any(neg in line for neg in ("error:", "failed", "failure"))
26711
+ and any(kw in line for kw in ("compil", "build", "cmake", "make", "link"))
26712
+ for line in bash_outputs_lower[-10:]
26713
+ )
26714
+ if compile_failures:
26715
+ unverified.append(f"compile failures for: {trim(content, 60)}")
26716
+ continue
26717
+ # Rule 4: If subtask mentions installing, check bash was run
26718
+ if any(kw in content_lower for kw in _install_kw):
26719
+ if not bash_outputs_lower:
26720
+ unverified.append(f"no bash for install: {trim(content, 60)}")
26721
+ continue
26722
+ # If none of the specific rules matched, the subtask is considered verified
26723
+ # (generic subtasks like "design" or "analyze" don't need tool evidence)
26724
+ return unverified
26725
+
26726
+ def _inject_rework_if_needed(self, plan_step: dict, worker_step: dict):
26727
+ """When subtasks are marked completed but acceptance fails, inject rework instruction.
26728
+ Prevents the system from getting stuck or silently skipping unfinished work."""
26729
+ try:
26730
+ step_id = str(plan_step.get("id", "") or "")
26731
+ if not step_id:
26732
+ return
26733
+ rows = self._active_plan_worker_todo_rows(step_id, role="")
26734
+ completed_rows = [r for r in rows if str(r.get("status", "")).lower() == "completed"]
26735
+ if not completed_rows:
26736
+ return
26737
+ bb = self._ensure_blackboard()
26738
+ failures = self._verify_subtasks_acceptance(completed_rows, step_id, bb)
26739
+ if not failures:
26740
+ return
26741
+ # LLM-based acceptance check: semantic analysis over heuristics
26742
+ llm_verdict = self._llm_verify_subtask_acceptance(plan_step, completed_rows, bb)
26743
+ if llm_verdict.get("all_passed", False):
26744
+ return
26745
+ rework_items = llm_verdict.get("rework_items", failures)
26746
+ if not rework_items:
26747
+ return
26748
+ # Rate-limit rework injection
26749
+ _rework_key = f"_rework_injected_{step_id}"
26750
+ _last_rework = getattr(self, _rework_key, 0.0)
26751
+ if float(now_ts()) - float(_last_rework) < 30.0:
26752
+ return
26753
+ setattr(self, _rework_key, float(now_ts()))
26754
+ step_label = trim(str(plan_step.get("content", "") or ""), 80)
26755
+ rework_text = (
26756
+ f"<step-rework>\n"
26757
+ f"Step \"{step_label}\" acceptance check FAILED. "
26758
+ f"The following subtasks were marked completed but did not pass verification:\n"
26759
+ )
26760
+ for i, item in enumerate(rework_items[:5]):
26761
+ rework_text += f" {i+1}. {trim(str(item), 120)}\n"
26762
+ rework_text += (
26763
+ "\nACTION REQUIRED: Fix these issues NOW before the step can advance.\n"
26764
+ "- For missing files: create them with write_file\n"
26765
+ "- For failed tests/builds: run the command again and fix errors\n"
26766
+ "- For unverified installs: re-run the install command\n"
26767
+ "After fixing, update TodoWrite to reflect the corrected state.\n"
26768
+ "</step-rework>"
26769
+ )
26770
+ # Revert false "completed" status back to in_progress
26771
+ _snap = self.todo.snapshot()
26772
+ _modified = False
26773
+ for row in _snap:
26774
+ if not isinstance(row, dict):
26775
+ continue
26776
+ if str(row.get("parent_step_id", "") or "") != step_id:
26777
+ continue
26778
+ if str(row.get("status", "")).lower() != "completed":
26779
+ continue
26780
+ rc = str(row.get("content", "") or "").strip().lower()
26781
+ for fail in rework_items:
26782
+ fail_lower = str(fail).lower()
26783
+ if rc and (rc[:20] in fail_lower or any(w in fail_lower for w in rc.split()[:3] if len(w) > 3)):
26784
+ row["status"] = "in_progress"
26785
+ _modified = True
26786
+ break
26787
+ if _modified:
26788
+ try:
26789
+ self.todo.update(_snap)
26790
+ except Exception:
26791
+ pass
26792
+ target_roles: tuple[str, ...] = ()
26793
+ if self._is_multi_agent_mode():
26794
+ active_role = str(bb.get("active_agent", "") or "developer")
26795
+ if active_role:
26796
+ target_roles = (active_role,)
26797
+ self._append_plan_guidance_bubble(
26798
+ rework_text,
26799
+ target_roles=target_roles,
26800
+ summary=f"step rework: {len(rework_items)} items failed acceptance",
26801
+ )
26802
+ except Exception:
26803
+ pass
26804
+
26805
+ def _llm_verify_subtask_acceptance(self, plan_step: dict, completed_subtasks: list[dict], bb: dict) -> dict:
26806
+ """Use LLM semantic analysis to verify if subtasks are truly completed.
26807
+ Returns {"all_passed": bool, "rework_items": list[str]}."""
26808
+ try:
26809
+ step_id = str(plan_step.get("id", "") or "")
26810
+ step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
26811
+ step_entries = step_files_raw.get(step_id, []) if step_id else []
26812
+ files_summary = []
26813
+ for entry in (step_entries[-15:] if isinstance(step_entries, list) else []):
26814
+ if isinstance(entry, dict):
26815
+ files_summary.append(f"{entry.get('op','?')}: {entry.get('path','?')}")
26816
+ exec_logs = bb.get("execution_logs", [])
26817
+ recent_exec = []
26818
+ for log in (exec_logs[-8:] if isinstance(exec_logs, list) else []):
26819
+ if isinstance(log, dict):
26820
+ c = trim(str(log.get("content", "") or ""), 200)
26821
+ if c:
26822
+ recent_exec.append(c)
26823
+ subtask_list = "\n".join(
26824
+ f"- [{str(st.get('status','')).upper()}] {trim(str(st.get('content','') or ''), 120)}"
26825
+ for st in completed_subtasks[:8]
26826
+ )
26827
+ prompt = (
26828
+ "Analyze whether these subtasks are TRULY completed based on the evidence.\n\n"
26829
+ f"SUBTASKS:\n{subtask_list}\n\n"
26830
+ f"FILES CREATED/MODIFIED:\n{chr(10).join(files_summary[-10:]) or '(none)'}\n\n"
26831
+ f"RECENT EXECUTION OUTPUT:\n{chr(10).join(recent_exec[-5:]) or '(none)'}\n\n"
26832
+ "For each subtask, determine if it's genuinely done:\n"
26833
+ "- File creation tasks: was the file actually created?\n"
26834
+ "- Test/verify tasks: was a test/command actually run? Did it pass?\n"
26835
+ "- Build/compile tasks: was compilation attempted? Any errors?\n"
26836
+ "- Install tasks: was the install command run?\n\n"
26837
+ "Reply ONLY as JSON: {\"all_passed\": true/false, \"rework_items\": [\"description of what failed\"]}\n"
26838
+ "If all subtasks pass, return {\"all_passed\": true, \"rework_items\": []}"
26839
+ )
26840
+ resp = self.ollama.chat(
26841
+ [{"role": "user", "content": prompt}],
26842
+ system="You are a strict QA reviewer. Verify task completion against evidence. Reply ONLY valid JSON.",
26843
+ max_tokens=300,
26844
+ think=False,
26845
+ )
26846
+ import json
26847
+ text = str(resp.get("text", "") or "").strip()
26848
+ if "{" in text:
26849
+ json_str = text[text.index("{"):text.rindex("}") + 1]
26850
+ result = json.loads(json_str)
26851
+ if isinstance(result, dict):
26852
+ return {
26853
+ "all_passed": bool(result.get("all_passed", False)),
26854
+ "rework_items": list(result.get("rework_items", [])),
26855
+ }
26856
+ except Exception:
26857
+ pass
26858
+ return {"all_passed": False, "rework_items": []}
25715
26859
 
25716
26860
  def _collect_step_evidence(self, plan_step: dict, worker_step: dict) -> str:
25717
26861
  """Collect evidence summary from worker step for plan step completion."""
@@ -25730,11 +26874,19 @@ body{padding:18px}
25730
26874
  parts.append(f"bash: {cmd}" + (f" => {out}" if out else ""))
25731
26875
  elif name == "read_file":
25732
26876
  path = str(r.get("args", {}).get("path", "") or "")
26877
+ # Skip plan-infrastructure reads — not meaningful implementation evidence
26878
+ _p = str(path)
26879
+ if (_p.endswith("plan.md") and ".clouds_coder" in _p) or ".clouds_coder/skills_cache/" in _p:
26880
+ continue
25733
26881
  out = self._tool_result_output_excerpt(r, 90)
25734
26882
  parts.append(f"read: {path}" + (f" => {out}" if out else ""))
25735
26883
  elif name in ("write_to_blackboard", "query_code_library", "query_knowledge_library"):
25736
26884
  out = self._tool_result_output_excerpt(r, 100)
25737
26885
  parts.append(f"{name}" + (f": {out}" if out else ""))
26886
+ if not parts:
26887
+ bb_evidence = self._collect_blackboard_step_evidence(plan_step)
26888
+ if bb_evidence:
26889
+ return bb_evidence
25738
26890
  return trim("; ".join(parts) or "post-execution evidence", 200)
25739
26891
 
25740
26892
  def _get_active_plan_step(self, board: dict | None = None) -> dict | None:
@@ -25797,10 +26949,342 @@ body{padding:18px}
25797
26949
  return False
25798
26950
  return bool(self._active_plan_worker_todo_rows(step_id, role=role))
25799
26951
 
26952
+ def _bridge_flat_todos_to_active_plan_step(
26953
+ self,
26954
+ rows: list[dict] | None,
26955
+ board: dict | None = None,
26956
+ ) -> tuple[list[dict], bool]:
26957
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
26958
+ step = self._get_active_plan_step(bb)
26959
+ if not isinstance(step, dict):
26960
+ return (list(rows or []), False)
26961
+ step_id = trim(str(step.get("id", "") or ""), 20)
26962
+ if not step_id:
26963
+ return (list(rows or []), False)
26964
+ snap = [dict(row) for row in (rows or []) if isinstance(row, dict)]
26965
+ if not snap:
26966
+ return (snap, False)
26967
+ worker_owners = {"developer", "explorer", "reviewer"}
26968
+ if any(str(row.get("parent_step_id", "") or "").strip() for row in snap):
26969
+ return (snap, False)
26970
+ if any(
26971
+ str(row.get("owner", "") or "").strip().lower() in worker_owners
26972
+ and str(row.get("parent_step_id", "") or "").strip() == step_id
26973
+ for row in snap
26974
+ ):
26975
+ return (snap, False)
26976
+ owner_key = self._current_plan_worker_owner(bb)
26977
+ bridged: list[dict] = []
26978
+ migrated = False
26979
+ for row in snap:
26980
+ key = trim(str(row.get("key", "") or "").strip(), 120)
26981
+ if key.startswith("bb:"):
26982
+ bridged.append(dict(row))
26983
+ continue
26984
+ content = normalize_work_text(str(row.get("content", "") or "")) or str(row.get("content", "") or "").strip()
26985
+ if not content:
26986
+ continue
26987
+ new_row = dict(row)
26988
+ new_row["content"] = content
26989
+ new_row["parent_step_id"] = step_id
26990
+ owner = str(new_row.get("owner", "") or "").strip().lower()
26991
+ if owner not in worker_owners:
26992
+ new_row["owner"] = owner_key
26993
+ bridged.append(new_row)
26994
+ migrated = True
26995
+ return (bridged, migrated)
26996
+
26997
+ def _active_plan_progress_snapshot(self, board: dict | None = None) -> dict:
26998
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
26999
+ step = self._current_plan_step_row(bb)
27000
+ if not isinstance(step, dict):
27001
+ return {
27002
+ "step_id": "",
27003
+ "step_index": 0,
27004
+ "step_text": "",
27005
+ "expected_count": 0,
27006
+ "worker_todo_count": 0,
27007
+ "completed_count": 0,
27008
+ "in_progress_count": 0,
27009
+ "pending_count": 0,
27010
+ "current_subtask": "",
27011
+ "next_pending_subtask": "",
27012
+ "owners": [],
27013
+ }
27014
+ step_id = trim(str(step.get("id", "") or ""), 20)
27015
+ rows = self._active_plan_worker_todo_rows(step_id, role="") if step_id else []
27016
+ expected = self._extract_plan_step_subtasks(step, limit=5)
27017
+ completed_count = 0
27018
+ in_progress_count = 0
27019
+ pending_count = 0
27020
+ current_subtask = ""
27021
+ next_pending_subtask = ""
27022
+ owners: set[str] = set()
27023
+ for row in rows:
27024
+ status = str(row.get("status", "pending") or "pending").strip().lower()
27025
+ content = trim(str(row.get("content", "") or "").strip(), 220)
27026
+ owner = self._sanitize_agent_role(row.get("owner", ""))
27027
+ if owner:
27028
+ owners.add(owner)
27029
+ if status == "completed":
27030
+ completed_count += 1
27031
+ elif status == "in_progress":
27032
+ in_progress_count += 1
27033
+ if content and not current_subtask:
27034
+ current_subtask = content
27035
+ else:
27036
+ pending_count += 1
27037
+ if content and not next_pending_subtask:
27038
+ next_pending_subtask = content
27039
+ return {
27040
+ "step_id": step_id,
27041
+ "step_index": max(0, int(step.get("plan_step_index", 0) or 0)),
27042
+ "step_text": self._current_plan_step_text(bb),
27043
+ "expected_count": len(expected),
27044
+ "worker_todo_count": len(rows),
27045
+ "completed_count": completed_count,
27046
+ "in_progress_count": in_progress_count,
27047
+ "pending_count": pending_count,
27048
+ "current_subtask": current_subtask,
27049
+ "next_pending_subtask": next_pending_subtask,
27050
+ "owners": sorted(owners),
27051
+ }
27052
+
27053
+ def _manager_worker_progress_capsule(self, role: str, step: dict, board: dict | None = None) -> str:
27054
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
27055
+ role_key = self._sanitize_agent_role(role) or "developer"
27056
+ safe_step = step if isinstance(step, dict) else {}
27057
+ snapshot = self._active_plan_progress_snapshot(bb)
27058
+ tool_results = safe_step.get("tool_results", []) or []
27059
+ tool_names: list[str] = []
27060
+ for item in tool_results:
27061
+ if not isinstance(item, dict):
27062
+ continue
27063
+ name = str(item.get("name", "") or "").strip()
27064
+ if not name or name in tool_names:
27065
+ continue
27066
+ tool_names.append(name)
27067
+ if len(tool_names) >= 5:
27068
+ break
27069
+ parts = [
27070
+ f"[worker-progress] owner={role_key}",
27071
+ f"status={trim(str(safe_step.get('status', '') or ''), 40) or '-'}",
27072
+ ]
27073
+ if tool_names:
27074
+ parts.append("tools=" + ",".join(tool_names))
27075
+ step_text = trim(str(snapshot.get("step_text", "") or ""), 180)
27076
+ if step_text:
27077
+ parts.append(f"step={step_text}")
27078
+ todo_state = (
27079
+ f"todos={int(snapshot.get('completed_count', 0) or 0)}/"
27080
+ f"{int(snapshot.get('in_progress_count', 0) or 0)}/"
27081
+ f"{int(snapshot.get('pending_count', 0) or 0)}"
27082
+ )
27083
+ if int(snapshot.get("worker_todo_count", 0) or 0) > 0:
27084
+ parts.append(todo_state)
27085
+ elif int(snapshot.get("expected_count", 0) or 0) > 0:
27086
+ parts.append(f"todos=missing/{int(snapshot.get('expected_count', 0) or 0)}")
27087
+ focus = trim(str(snapshot.get("current_subtask", "") or ""), 160)
27088
+ if focus:
27089
+ parts.append(f"focus={focus}")
27090
+ elif str(snapshot.get("next_pending_subtask", "") or "").strip():
27091
+ parts.append(f"next={trim(str(snapshot.get('next_pending_subtask', '') or ''), 160)}")
27092
+ current_step = self._current_plan_step_row(bb)
27093
+ if isinstance(current_step, dict):
27094
+ evidence = self._collect_blackboard_step_evidence(current_step, bb)
27095
+ if evidence:
27096
+ parts.append(f"evidence={trim(evidence, 180)}")
27097
+ if self._step_subtasks_all_completed(current_step) and self._plan_step_has_blackboard_evidence(current_step, bb):
27098
+ parts.append("acceptance=ready")
27099
+ reply = bb.get("last_worker_reply", {}) if isinstance(bb.get("last_worker_reply"), dict) else {}
27100
+ if self._sanitize_agent_role(reply.get("role", "")) == role_key:
27101
+ reply_text = trim(str(reply.get("text", "") or "").strip(), 180)
27102
+ if reply_text:
27103
+ parts.append(f"reply={reply_text}")
27104
+ return trim(" | ".join(parts), 1600)
27105
+
27106
+ def _manager_recovery_route_for_repeated_delegate(self, route: dict, board: dict | None = None) -> dict:
27107
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
27108
+ row = dict(route or {})
27109
+ step = self._current_plan_step_row(bb)
27110
+ if not isinstance(step, dict):
27111
+ row["target"] = "developer"
27112
+ row["instruction"] = (
27113
+ "Recovery routing after repeated identical delegation. "
27114
+ "Continue the current objective with one concrete tool action and write observable progress."
27115
+ )
27116
+ row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-no-plan-step", 600)
27117
+ row["source"] = "loop-recovery"
27118
+ row["is_mandatory"] = True
27119
+ return row
27120
+ snapshot = self._active_plan_progress_snapshot(bb)
27121
+ step_text = trim(str(snapshot.get("step_text", "") or ""), 220)
27122
+ full_text = self._current_plan_step_full_text(bb, max_len=600)
27123
+ phase = self._plan_step_phase_hint(full_text)
27124
+ expected_count = int(snapshot.get("expected_count", 0) or 0)
27125
+ worker_todo_count = int(snapshot.get("worker_todo_count", 0) or 0)
27126
+ current_subtask = trim(str(snapshot.get("current_subtask", "") or ""), 180)
27127
+ next_pending = trim(str(snapshot.get("next_pending_subtask", "") or ""), 180)
27128
+ subtasks_done = self._step_subtasks_all_completed(step)
27129
+ has_evidence = self._plan_step_has_blackboard_evidence(step, bb)
27130
+ reviewer_available = True
27131
+ profile = self._ensure_blackboard_task_profile(bb)
27132
+ participants = profile.get("participants", []) if isinstance(profile.get("participants"), list) else []
27133
+ participants_norm = [self._sanitize_agent_role(x) for x in participants]
27134
+ participants_norm = [x for x in participants_norm if x]
27135
+ if participants_norm:
27136
+ reviewer_available = "reviewer" in participants_norm
27137
+ if subtasks_done and has_evidence and reviewer_available:
27138
+ row["target"] = "reviewer"
27139
+ row["instruction"] = trim(
27140
+ (
27141
+ "Recovery routing after repeated identical delegation. "
27142
+ f"Validate ONLY the current plan step: {step_text}. "
27143
+ "Worker subtasks are complete and blackboard evidence already exists. "
27144
+ "Run acceptance for this step only, record pass/fix with concrete evidence, and do not jump ahead."
27145
+ ),
27146
+ 1200,
27147
+ )
27148
+ row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-acceptance", 600)
27149
+ row["source"] = "loop-recovery"
27150
+ row["is_mandatory"] = True
27151
+ return row
27152
+ default_owner = "explorer" if phase in {"research", "design"} else "developer"
27153
+ owner = self._sanitize_agent_role(row.get("target", "")) or self._current_plan_worker_owner(bb)
27154
+ if owner not in {"developer", "explorer"}:
27155
+ owner = default_owner
27156
+ if expected_count > 0 and worker_todo_count == 0:
27157
+ action_text = (
27158
+ "First call TodoWrite for the current plan step and create the missing worker subtasks "
27159
+ "before any more implementation."
27160
+ )
27161
+ elif current_subtask:
27162
+ action_text = f"Continue ONLY the current in_progress subtask: {current_subtask}."
27163
+ elif next_pending:
27164
+ action_text = f"Resume the next pending subtask: {next_pending}."
27165
+ else:
27166
+ action_text = "Continue the current plan step with one concrete tool action."
27167
+ evidence_text = ""
27168
+ if has_evidence:
27169
+ evidence_text = (
27170
+ " Blackboard already contains partial evidence for this step; build on that work instead of restarting."
27171
+ )
27172
+ row["target"] = owner
27173
+ row["instruction"] = trim(
27174
+ (
27175
+ "Recovery routing after repeated identical delegation. "
27176
+ f"Stay on the current plan step: {step_text}. "
27177
+ f"{action_text} "
27178
+ "Do not branch to a different step or restate the whole plan. "
27179
+ "After the subtask is finished, immediately call TodoWrite to mark it completed and move the next subtask to in_progress."
27180
+ f"{evidence_text}"
27181
+ ),
27182
+ 1200,
27183
+ )
27184
+ row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-execute", 600)
27185
+ row["source"] = "loop-recovery"
27186
+ row["is_mandatory"] = True
27187
+ return row
27188
+
27189
+ def _todo_has_plan_steps(self, board: dict | None = None) -> bool:
27190
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
27191
+ todos = bb.get("project_todos", []) if isinstance(bb.get("project_todos"), list) else []
27192
+ return any(
27193
+ isinstance(todo, dict) and todo.get("category") == "plan_step"
27194
+ for todo in todos
27195
+ )
27196
+
27197
+ def _todo_worker_role_hint(self, role: str = "", board: dict | None = None) -> str:
27198
+ role_key = self._sanitize_agent_role(role)
27199
+ if role_key in {"developer", "explorer", "reviewer"}:
27200
+ return role_key
27201
+ return self._current_plan_worker_owner(board)
27202
+
27203
+ def _todo_route_kind(self, role: str = "", board: dict | None = None) -> str:
27204
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
27205
+ if self._todo_has_plan_steps(bb):
27206
+ return "plan_sync" if self._is_multi_agent_mode() else "plan_single"
27207
+ role_key = self._todo_worker_role_hint(role, bb)
27208
+ if self._is_multi_agent_mode() and role_key in {"developer", "explorer", "reviewer"}:
27209
+ return "pure_sync"
27210
+ return "pure_single"
27211
+
27212
+ def _todo_row_kind(self, row: dict | None) -> str:
27213
+ if not isinstance(row, dict):
27214
+ return ""
27215
+ key = str(row.get("key", "") or "").strip()
27216
+ if key.startswith("bb:"):
27217
+ return "system"
27218
+ owner = str(row.get("owner", "") or "").strip().lower()
27219
+ parent_step_id = str(row.get("parent_step_id", "") or "").strip()
27220
+ if owner in {"developer", "explorer", "reviewer"} and parent_step_id:
27221
+ return "plan_worker"
27222
+ if owner in {"developer", "explorer", "reviewer"}:
27223
+ return "owner_worker"
27224
+ return "flat"
27225
+
27226
+ def _todo_route_rows(
27227
+ self,
27228
+ route_kind: str,
27229
+ *,
27230
+ rows: list[dict] | None = None,
27231
+ role: str = "",
27232
+ board: dict | None = None,
27233
+ ) -> list[dict]:
27234
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
27235
+ snap = [dict(row) for row in (rows if isinstance(rows, list) else self.todo.snapshot()) if isinstance(row, dict)]
27236
+ if route_kind in {"plan_single", "plan_sync"}:
27237
+ step = self._get_active_plan_step(bb)
27238
+ step_id = trim(str((step or {}).get("id", "") or ""), 20)
27239
+ if not step_id:
27240
+ return []
27241
+ return [
27242
+ row for row in snap
27243
+ if self._todo_row_kind(row) == "plan_worker"
27244
+ and str(row.get("parent_step_id", "") or "").strip() == step_id
27245
+ ]
27246
+ if route_kind == "pure_sync":
27247
+ role_key = self._todo_worker_role_hint(role, bb)
27248
+ owner_rows = [row for row in snap if self._todo_row_kind(row) == "owner_worker"]
27249
+ if role_key in {"developer", "explorer", "reviewer"}:
27250
+ scoped = [
27251
+ row for row in owner_rows
27252
+ if str(row.get("owner", "") or "").strip().lower() == role_key
27253
+ ]
27254
+ if scoped:
27255
+ return scoped
27256
+ return owner_rows
27257
+ if route_kind == "pure_single":
27258
+ return [row for row in snap if self._todo_row_kind(row) == "flat"]
27259
+ return []
27260
+
25800
27261
  def _todo_runtime_has_worker_rows(self, role: str = "") -> bool:
25801
- if self._get_active_plan_step() is not None:
25802
- return self._active_plan_step_has_worker_todos(role=role)
25803
- return bool(self.todo.snapshot())
27262
+ route_kind = self._todo_route_kind(role=role)
27263
+ return bool(self._todo_route_rows(route_kind, role=role))
27264
+
27265
+ def _merge_todo_signal_rows(self, items: list[dict], role: str = "", board: dict | None = None) -> str:
27266
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
27267
+ role_key = self._sanitize_agent_role(role)
27268
+ route_kind = self._todo_route_kind(role=role_key, board=bb)
27269
+ step = self._get_active_plan_step(bb) if route_kind in {"plan_single", "plan_sync"} else None
27270
+ step_id = trim(str((step or {}).get("id", "") or ""), 20)
27271
+ normalized: list[dict] = []
27272
+ for item in items or []:
27273
+ if not isinstance(item, dict):
27274
+ continue
27275
+ row = dict(item)
27276
+ if role_key in {"developer", "explorer", "reviewer"} and not str(row.get("owner", "") or "").strip():
27277
+ row["owner"] = role_key
27278
+ if step_id and not str(row.get("parent_step_id", "") or "").strip():
27279
+ row["parent_step_id"] = step_id
27280
+ normalized.append(row)
27281
+ if not normalized:
27282
+ return self.todo.no_changes_text()
27283
+ if route_kind in {"plan_single", "plan_sync"}:
27284
+ return self._merge_plan_worker_todo_items(normalized, role=role_key)
27285
+ if route_kind == "pure_sync":
27286
+ return self._merge_owner_scoped_todo_items(normalized, role=role_key)
27287
+ return self._merge_flat_todo_items(normalized, role=role_key)
25804
27288
 
25805
27289
  def _plan_worker_todo_identity(self, row: dict | None) -> str:
25806
27290
  import re
@@ -25816,6 +27300,96 @@ body{padding:18px}
25816
27300
  return f"substep:{match.group(1)}"
25817
27301
  return f"text:{content}"
25818
27302
 
27303
+ def _flat_todo_identity(self, row: dict | None) -> str:
27304
+ import re
27305
+
27306
+ if not isinstance(row, dict):
27307
+ return ""
27308
+ key = trim(str(row.get("key", "") or "").strip(), 120)
27309
+ if key.startswith("bb:"):
27310
+ return f"system:{key}"
27311
+ content = normalize_work_text(str(row.get("content", "") or "")) or str(row.get("content", "") or "")
27312
+ content = re.sub(r"\s+", " ", content.strip().lower())
27313
+ if not content:
27314
+ return ""
27315
+ match = re.match(r"^(\d+\.\d+)\b", content)
27316
+ if match:
27317
+ return f"substep:{match.group(1)}"
27318
+ return f"text:{content}"
27319
+
27320
+ def _merge_flat_todo_items(self, items: list[dict], role: str = "") -> str:
27321
+ if not isinstance(items, list):
27322
+ raise ValueError("items must be array")
27323
+ role_key = self._sanitize_agent_role(role)
27324
+ existing = self.todo.snapshot()
27325
+ route_existing = self._todo_route_rows("pure_single", rows=existing, role=role_key)
27326
+ existing_by_identity: dict[str, dict] = {}
27327
+ preserved_system: list[dict] = []
27328
+ for row in existing:
27329
+ if self._todo_row_kind(row) != "system":
27330
+ continue
27331
+ preserved_system.append(dict(row))
27332
+ for row in route_existing:
27333
+ if not isinstance(row, dict):
27334
+ continue
27335
+ identity = self._flat_todo_identity(row)
27336
+ if not identity:
27337
+ continue
27338
+ if identity not in existing_by_identity:
27339
+ existing_by_identity[identity] = dict(row)
27340
+
27341
+ status_alias = {
27342
+ "todo": "pending",
27343
+ "doing": "in_progress",
27344
+ "inprogress": "in_progress",
27345
+ "in-progress": "in_progress",
27346
+ "done": "completed",
27347
+ "finish": "completed",
27348
+ "finished": "completed",
27349
+ }
27350
+ passthrough_rows: list[dict] = []
27351
+ merged_rows: list[dict] = []
27352
+ seen_identities: set[str] = set()
27353
+ for idx, item in enumerate(items):
27354
+ if isinstance(item, str):
27355
+ raw = {"content": item}
27356
+ elif isinstance(item, dict):
27357
+ raw = dict(item)
27358
+ else:
27359
+ raise ValueError(f"item {idx}: invalid type")
27360
+ key = trim(str(raw.get("key", "") or "").strip(), 120)
27361
+ if key.startswith("bb:"):
27362
+ passthrough_rows.append(raw)
27363
+ continue
27364
+ raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
27365
+ content = normalize_work_text(raw_content) or raw_content
27366
+ if not content:
27367
+ continue
27368
+ normalized: dict[str, object] = {"content": content}
27369
+ raw_status = str(raw.get("status", raw.get("state", "")) or "").strip().lower()
27370
+ if raw_status:
27371
+ normalized["status"] = status_alias.get(raw_status, raw_status)
27372
+ owner = str(raw.get("owner", "") or "").strip().lower()
27373
+ if owner in {"manager", "explorer", "developer", "reviewer"}:
27374
+ normalized["owner"] = owner
27375
+ elif role_key == "manager" and owner == "":
27376
+ normalized["owner"] = role_key
27377
+ active_form = str(raw.get("activeForm", raw.get("active_form", "")) or "").strip()
27378
+ if active_form:
27379
+ normalized["activeForm"] = active_form
27380
+ identity = self._flat_todo_identity(normalized)
27381
+ if not identity:
27382
+ identity = f"ad-hoc:{idx}:{trim(content, 80)}"
27383
+ merged = dict(existing_by_identity.get(identity, {}))
27384
+ if "activeForm" not in normalized:
27385
+ merged.pop("activeForm", None)
27386
+ merged.update(normalized)
27387
+ if identity in seen_identities:
27388
+ continue
27389
+ seen_identities.add(identity)
27390
+ merged_rows.append(merged)
27391
+ return self.todo.update(preserved_system + passthrough_rows + merged_rows)
27392
+
25819
27393
  def _merge_plan_worker_todo_items(self, items: list[dict], role: str = "") -> str:
25820
27394
  if not isinstance(items, list):
25821
27395
  raise ValueError("items must be array")
@@ -25833,6 +27407,12 @@ body{padding:18px}
25833
27407
  for row in existing:
25834
27408
  if not isinstance(row, dict):
25835
27409
  continue
27410
+ row_kind = self._todo_row_kind(row)
27411
+ if row_kind == "system":
27412
+ preserved.append(dict(row))
27413
+ continue
27414
+ if row_kind != "plan_worker":
27415
+ continue
25836
27416
  owner = str(row.get("owner", "") or "").strip().lower()
25837
27417
  row_step_id = trim(str(row.get("parent_step_id", "") or ""), 20)
25838
27418
  if owner in worker_owners and row_step_id == step_id:
@@ -25842,10 +27422,13 @@ body{padding:18px}
25842
27422
 
25843
27423
  merged_by_identity: dict[str, dict] = {}
25844
27424
  ordered_identities: list[str] = []
27425
+ # Fix 2: Compute existing identities for next-step detection
27426
+ _existing_identities: set[str] = set()
25845
27427
  for row in target_rows:
25846
27428
  identity = self._plan_worker_todo_identity(row)
25847
27429
  if not identity:
25848
27430
  continue
27431
+ _existing_identities.add(identity)
25849
27432
  if identity not in merged_by_identity:
25850
27433
  merged_by_identity[identity] = dict(row)
25851
27434
  ordered_identities.append(identity)
@@ -25883,12 +27466,52 @@ body{padding:18px}
25883
27466
  merged.update(row)
25884
27467
  merged["owner"] = str(merged.get("owner", "") or role_key).strip().lower() or role_key
25885
27468
  merged["parent_step_id"] = trim(str(merged.get("parent_step_id", "") or step_id), 20) or step_id
27469
+ # Fix 2 support: Timestamp new items for next-step detection
27470
+ if identity not in _existing_identities and "created_at" not in merged:
27471
+ merged["created_at"] = float(now_ts())
27472
+ if str(merged.get("status", "")).lower() == "completed" and "updated_at" not in merged:
27473
+ merged["updated_at"] = float(now_ts())
25886
27474
  merged_by_identity[identity] = merged
25887
27475
  if identity not in ordered_identities:
25888
27476
  ordered_identities.append(identity)
25889
27477
 
25890
27478
  merged_target_rows = [merged_by_identity[i] for i in ordered_identities if i in merged_by_identity]
25891
- final_rows = preserved + passthrough_rows + merged_target_rows
27479
+
27480
+ # Fix 4: Content-based deduplication to prevent duplicate subtasks from accumulating
27481
+ _seen_content: set[str] = set()
27482
+ _deduped_target: list[dict] = []
27483
+ for row in merged_target_rows:
27484
+ _ck = normalize_work_text(str(row.get("content", ""))).strip().lower()
27485
+ if _ck in _seen_content:
27486
+ continue
27487
+ _seen_content.add(_ck)
27488
+ _deduped_target.append(row)
27489
+ merged_target_rows = _deduped_target
27490
+
27491
+ # Fix 2: Detect "next-step intent" — if all existing items are completed,
27492
+ # new pending items that don't match existing identities are for the next step.
27493
+ # Remove their parent_step_id so they don't block current step advancement.
27494
+ _all_existing_done = (
27495
+ bool(target_rows) and
27496
+ all(str(r.get("status", "")).lower() == "completed" for r in target_rows)
27497
+ )
27498
+ if _all_existing_done:
27499
+ for row in merged_target_rows:
27500
+ _rid = self._plan_worker_todo_identity(row)
27501
+ if (_rid and _rid not in _existing_identities
27502
+ and str(row.get("status", "")).lower() != "completed"):
27503
+ row.pop("parent_step_id", None) # Not for current step
27504
+
27505
+ # Insert merged_target_rows right after the active plan step's bb: row in preserved,
27506
+ # so subtasks appear nested under their parent step rather than at the list bottom.
27507
+ _step_key = str(active_step.get("key", "") or "")
27508
+ _insert_idx = len(preserved) # fallback: append at end
27509
+ if _step_key:
27510
+ for _i, _r in enumerate(preserved):
27511
+ if str(_r.get("key", "") or "") == _step_key:
27512
+ _insert_idx = _i + 1
27513
+ break
27514
+ final_rows = preserved[:_insert_idx] + passthrough_rows + merged_target_rows + preserved[_insert_idx:]
25892
27515
  return self.todo.update(final_rows)
25893
27516
 
25894
27517
  def _merge_owner_scoped_todo_items(self, items: list[dict], role: str = "") -> str:
@@ -26115,7 +27738,7 @@ body{padding:18px}
26115
27738
  content = self._build_plan_todo_reminder_text(plan_step, missing_subtasks=missing_subtasks)
26116
27739
  if not content:
26117
27740
  return False
26118
- self.messages.append({"role": "user", "content": content, "ts": now_tick})
27741
+ self._append_plan_guidance_bubble(content, summary="todo reminder")
26119
27742
  self.last_todo_reminder_ts = now_tick
26120
27743
  self.todo_reminder_count += 1
26121
27744
  self._emit(
@@ -26134,7 +27757,7 @@ body{padding:18px}
26134
27757
 
26135
27758
  if not isinstance(plan_step, dict):
26136
27759
  return []
26137
- raw = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
27760
+ raw = normalize_embedded_newlines(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
26138
27761
  if not raw.strip():
26139
27762
  return []
26140
27763
  lines = [trim(str(line or "").strip(), 300) for line in raw.replace("\r\n", "\n").split("\n")]
@@ -26264,6 +27887,126 @@ body{padding:18px}
26264
27887
  self.todo.items = preserved + replacement
26265
27888
  return True
26266
27889
 
27890
+ def _check_step_verified_tag(self, plan_step: dict, *, messages: list | None = None) -> bool:
27891
+ """Return True if the agent has emitted <step-verified> in any assistant message
27892
+ since this plan step was activated (i.e., after plan_step['activated_at']).
27893
+ Pass messages=self.agent_messages for sync mode; defaults to self.messages."""
27894
+ activated_at = float(plan_step.get("activated_at", 0.0) or 0.0)
27895
+ msg_list = messages if messages is not None else self.messages
27896
+ for msg in reversed(msg_list):
27897
+ if not isinstance(msg, dict):
27898
+ continue
27899
+ msg_ts = float(msg.get("ts", 0.0) or 0.0)
27900
+ # Stop once we reach messages predating step activation
27901
+ if activated_at > 0 and msg_ts > 0 and msg_ts < activated_at:
27902
+ break
27903
+ if msg.get("role") == "assistant":
27904
+ content = str(msg.get("content", "") or "")
27905
+ if "<step-verified" in content:
27906
+ return True
27907
+ return False
27908
+
27909
+ def _single_mode_validation_gate(self, plan_step: dict, tool_results: list[dict]) -> bool:
27910
+ """Gate: after subtasks complete, require model to explicitly emit <step-verified/>
27911
+ in a message since this step was activated. Research/design phases exempt.
27912
+ Escape hatch: after 10 consecutive blocks, auto-pass to prevent permanent stall."""
27913
+ step_id = str(plan_step.get("id", "") or "")
27914
+ _flag = f"_smvg_{step_id}"
27915
+ if getattr(self, _flag, False):
27916
+ return True # Already validated in a previous round
27917
+ step_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
27918
+ phase = self._plan_step_phase_hint(step_content)
27919
+ if phase in ("research", "design"):
27920
+ setattr(self, _flag, True)
27921
+ return True # No verification needed for non-execution phases
27922
+ # Escape hatch: after 10 consecutive blocks, unblock to prevent permanent stall
27923
+ _n_flag = f"_smvg_n_{step_id}"
27924
+ _n_blocked = int(getattr(self, _n_flag, 0))
27925
+ if _n_blocked >= 10:
27926
+ setattr(self, _flag, True)
27927
+ return True
27928
+ # Model must explicitly emit <step-verified/> after evaluating results
27929
+ if self._check_step_verified_tag(plan_step):
27930
+ setattr(self, _flag, True)
27931
+ return True
27932
+ # Gate blocked — increment counter and inject hint
27933
+ setattr(self, _n_flag, _n_blocked + 1)
27934
+ self._inject_single_mode_validation_hint(plan_step)
27935
+ return False
27936
+
27937
+ def _inject_single_mode_validation_hint(self, plan_step: dict):
27938
+ """Inject a hint (rate-limited 20s) instructing the model to emit <step-verified/>
27939
+ after evaluating bash output against the step's acceptance criteria."""
27940
+ step_id = str(plan_step.get("id", "") or "")
27941
+ _ts_flag = f"_smvg_ts_{step_id}"
27942
+ _last_ts = float(getattr(self, _ts_flag, 0.0))
27943
+ if float(now_ts()) - _last_ts < 20.0:
27944
+ return
27945
+ setattr(self, _ts_flag, float(now_ts()))
27946
+ step_label = trim(str(plan_step.get("content", "") or ""), 80)
27947
+ full_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
27948
+ # Extract ACCEPTANCE criteria line if present
27949
+ acceptance = ""
27950
+ for line in full_content.splitlines():
27951
+ if line.strip().upper().startswith("ACCEPTANCE:"):
27952
+ acceptance = line.strip()[len("ACCEPTANCE:"):].strip()
27953
+ break
27954
+ phase = self._plan_step_phase_hint(full_content.lower())
27955
+ if phase == "test":
27956
+ action = "run the tests with bash and evaluate the results"
27957
+ else:
27958
+ action = "run the build/compile/run command with bash and evaluate the output"
27959
+ accept_line = f"\nAcceptance criteria: {acceptance}" if acceptance else ""
27960
+ msg = (
27961
+ f"<verification-required>\n"
27962
+ f"All subtasks for \"{step_label}\" are marked complete.{accept_line}\n"
27963
+ f"Before this step can advance, you must:\n"
27964
+ f"1. {action}\n"
27965
+ f"2. Review the bash output and confirm it meets the acceptance criteria\n"
27966
+ f"3. If it passes, emit exactly: <step-verified/>\n"
27967
+ f"4. If it fails, fix the issue and retry — do NOT emit <step-verified/> until resolved\n"
27968
+ f"</verification-required>"
27969
+ )
27970
+ _recent = self.messages[-5:]
27971
+ if not any("<verification-required>" in str(m.get("content", "") or "") for m in _recent if isinstance(m, dict)):
27972
+ self.messages.append({"role": "user", "content": msg, "ts": now_ts()})
27973
+
27974
+ def _inject_sync_mode_verification_hint(self, plan_step: dict, worker_step: dict):
27975
+ """Inject a verification hint into agent_messages (rate-limited 30s) for sync mode.
27976
+ Instructs the worker to emit <step-verified/> after evaluating bash output."""
27977
+ step_id = str(plan_step.get("id", "") or "")
27978
+ _ts_flag = f"_sync_sv_ts_{step_id}"
27979
+ _last_ts = float(getattr(self, _ts_flag, 0.0))
27980
+ if float(now_ts()) - _last_ts < 30.0:
27981
+ return
27982
+ setattr(self, _ts_flag, float(now_ts()))
27983
+ step_label = trim(str(plan_step.get("content", "") or ""), 80)
27984
+ full_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
27985
+ acceptance = ""
27986
+ for line in full_content.splitlines():
27987
+ if line.strip().upper().startswith("ACCEPTANCE:"):
27988
+ acceptance = line.strip()[len("ACCEPTANCE:"):].strip()
27989
+ break
27990
+ phase = self._plan_step_phase_hint(full_content.lower())
27991
+ if phase == "test":
27992
+ action = "run the tests with bash and evaluate results"
27993
+ else:
27994
+ action = "run the build/compile command with bash and evaluate the output"
27995
+ accept_line = f"\nAcceptance criteria: {acceptance}" if acceptance else ""
27996
+ msg = (
27997
+ f"<verification-required>\n"
27998
+ f"All subtasks for \"{step_label}\" are marked complete.{accept_line}\n"
27999
+ f"Before this step can advance:\n"
28000
+ f"1. {action}\n"
28001
+ f"2. Review the output and confirm acceptance criteria are met\n"
28002
+ f"3. If it passes, emit exactly: <step-verified/>\n"
28003
+ f"4. If it fails, fix and retry — do NOT emit <step-verified/> until resolved\n"
28004
+ f"</verification-required>"
28005
+ )
28006
+ _recent = self.agent_messages[-5:]
28007
+ if not any("<verification-required>" in str(m.get("content", "") or "") for m in _recent if isinstance(m, dict)):
28008
+ self.agent_messages.append({"role": "user", "content": msg, "ts": now_ts()})
28009
+
26267
28010
  def _single_agent_plan_step_check(self, tool_results: list[dict]):
26268
28011
  """In single-agent mode, check if current plan step should be advanced based on tool results."""
26269
28012
  bb = self._ensure_blackboard()
@@ -26281,6 +28024,24 @@ body{padding:18px}
26281
28024
  if not current:
26282
28025
  self._sync_todos_from_blackboard(reason="single-agent-round")
26283
28026
  return
28027
+ # When a new step is activated with no subtasks yet, require TodoWrite first
28028
+ _cur_step_id = str(current.get("id", "") or "")
28029
+ if _cur_step_id:
28030
+ _existing_subs = self._active_plan_worker_todo_rows(_cur_step_id, role="")
28031
+ if not _existing_subs:
28032
+ _step_label_s = trim(str(current.get("content", "") or ""), 60)
28033
+ _force_tw_msg = (
28034
+ f"<action-required>\n"
28035
+ f"Step \"{_step_label_s}\" has no subtasks yet. "
28036
+ f"Your FIRST action MUST be to call TodoWrite with "
28037
+ f"parent_step_id=\"{_cur_step_id}\" to create this step's subtasks "
28038
+ f"(e.g. N.1, N.2 ...) before executing any other work.\n"
28039
+ f"</action-required>"
28040
+ )
28041
+ _recent_msgs = self.messages[-4:]
28042
+ if not any("<action-required>" in str(m.get("content", "") or "") for m in _recent_msgs if isinstance(m, dict)):
28043
+ self._append_plan_guidance_bubble(_force_tw_msg, summary="action required: create subtasks first")
28044
+ return # Wait for TodoWrite before doing other checks
26284
28045
  # Heuristic: check if tool results indicate step completion
26285
28046
  step_content = str(current.get("full_content", "") or current.get("content", "") or "").lower()
26286
28047
  phase = self._plan_step_phase_hint(step_content)
@@ -26292,23 +28053,57 @@ body{padding:18px}
26292
28053
  str(r.get("name", "")) == "bash" and r.get("ok", False)
26293
28054
  for r in tool_results
26294
28055
  )
26295
- validation_ok = self._tool_results_have_validation_evidence(current, tool_results)
28056
+ validation_ok_current = self._tool_results_have_validation_evidence(current, tool_results)
28057
+ validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
28058
+ validation_ok = validation_ok_current or validation_ok_blackboard
28059
+ bb_sig = self._plan_step_blackboard_signals(current, bb)
28060
+ todo_progress_signal = any(
28061
+ isinstance(r, dict) and r.get("ok", False)
28062
+ and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
28063
+ for r in tool_results
28064
+ )
26296
28065
  # Auto-advance conditions:
26297
28066
  should_advance = False
28067
+ _gate_blocked = False # True when validation gate fired and blocked — no other path may advance
26298
28068
  # Priority 1: Check if worker subtasks are all completed (most reliable signal)
26299
28069
  subtasks_done = self._step_subtasks_all_completed(current)
26300
- if subtasks_done and validation_ok:
26301
- should_advance = True
26302
- # Priority 2: Phase-based heuristics (require observable evidence, not just file creation)
26303
- if not should_advance:
26304
- if phase in ("research", "design") and validation_ok:
26305
- should_advance = True
26306
- elif phase == "implement" and wrote_files and validation_ok:
26307
- should_advance = True
26308
- elif phase in ("test", "review") and ran_bash_ok and validation_ok:
26309
- should_advance = True
28070
+ if subtasks_done:
28071
+ # Validation gate always fires when subtasks are done — even if validation_ok is False.
28072
+ # For research/design phases the gate passes immediately; for implement/test it requires
28073
+ # a successful bash run. This ensures single mode proactively requests verification.
28074
+ _gate_ok = self._single_mode_validation_gate(current, tool_results)
28075
+ if _gate_ok:
28076
+ if validation_ok:
28077
+ should_advance = True
28078
+ elif todo_progress_signal and self._step_has_accumulated_evidence(current, bb):
28079
+ # Accumulated evidence path: subtasks done + TodoWrite progress + history
28080
+ should_advance = True
28081
+ else:
28082
+ _gate_blocked = True # Gate blocked — disable ALL remaining advancement paths
28083
+ # Priority 2: Phase-based heuristics — BUT gate by subtask completion when subtasks exist
28084
+ # CRITICAL: A single write_file must NOT advance when 3+ subtasks remain
28085
+ # Skipped when validation gate has blocked advancement (subtasks_done + gate failed)
28086
+ if not should_advance and not _gate_blocked:
28087
+ _has_subtasks_s = bool(self._active_plan_worker_todo_rows(
28088
+ str(current.get("id", "") or ""), role=""
28089
+ ))
28090
+ _can_use_phase_heuristic = subtasks_done or not _has_subtasks_s
28091
+ if _can_use_phase_heuristic:
28092
+ if phase in ("research", "design") and validation_ok:
28093
+ should_advance = True
28094
+ elif phase == "implement" and (
28095
+ (wrote_files and validation_ok_current)
28096
+ or (bb_sig["has_write"] and validation_ok_blackboard)
28097
+ ):
28098
+ should_advance = True
28099
+ elif phase in ("test", "review") and (
28100
+ (ran_bash_ok and validation_ok_current)
28101
+ or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
28102
+ ):
28103
+ should_advance = True
26310
28104
  # Also check if the agent explicitly mentioned step completion
26311
- if not should_advance:
28105
+ # Also blocked by validation gate when subtasks_done path was blocked
28106
+ if not should_advance and not _gate_blocked:
26312
28107
  # Check last assistant message for step completion signals
26313
28108
  last_text = ""
26314
28109
  for msg in reversed(self.messages[-3:]):
@@ -26327,7 +28122,25 @@ body{padding:18px}
26327
28122
  except Exception:
26328
28123
  pass
26329
28124
  else:
28125
+ self._inject_rework_if_needed(current, {"tool_results": tool_results})
26330
28126
  self._sync_todos_from_blackboard(reason="single-agent-round")
28127
+ if todo_progress_signal and not subtasks_done:
28128
+ step_rows = self._active_plan_worker_todo_rows(str(current.get("id", "") or ""), role="")
28129
+ next_row = next(
28130
+ (r for r in step_rows if str(r.get("status", "") or "").strip().lower() == "in_progress"),
28131
+ None,
28132
+ )
28133
+ focus_text = trim(str((next_row or {}).get("content", "") or "").strip(), 180)
28134
+ if focus_text:
28135
+ focus_msg = (
28136
+ "<todo-focus>"
28137
+ f"Continue ONLY the current in_progress subtask: {focus_text}. "
28138
+ "Do not branch away from the active plan step."
28139
+ "</todo-focus>"
28140
+ )
28141
+ recent = self.messages[-6:]
28142
+ if not any(str(msg.get("content", "") or "").strip() == focus_msg for msg in recent if isinstance(msg, dict)):
28143
+ self._append_plan_guidance_bubble(focus_msg, summary="todo focus: continue current subtask")
26331
28144
 
26332
28145
  def _todo_project_rows_from_blackboard(self, board: dict | None = None) -> list[dict]:
26333
28146
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
@@ -26337,7 +28150,9 @@ body{padding:18px}
26337
28150
  rows = []
26338
28151
  for todo in todos:
26339
28152
  s = todo.get("status", "pending")
26340
- c = todo.get("content", "")
28153
+ c = normalize_embedded_newlines(todo.get("content", "") or "")
28154
+ if str(todo.get("category", "") or "") == "plan_step" and "\n" in c:
28155
+ c = c.split("\n", 1)[0].strip()
26341
28156
  ev = todo.get("evidence", "")
26342
28157
  af = {
26343
28158
  "in_progress": self._ui_text("todo_working", content=c),
@@ -26350,12 +28165,9 @@ body{padding:18px}
26350
28165
  if bool(self.runtime_reclassify_required):
26351
28166
  return
26352
28167
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
26353
- # In single mode, still sync plan_step todos if they exist
26354
- has_plan_steps = any(
26355
- isinstance(t, dict) and t.get("category") == "plan_step"
26356
- for t in (bb.get("project_todos", []) if isinstance(bb.get("project_todos"), list) else [])
26357
- )
26358
- if not self._is_multi_agent_mode() and not has_plan_steps:
28168
+ route_kind = self._todo_route_kind(board=bb)
28169
+ has_plan_steps = route_kind in {"plan_single", "plan_sync"}
28170
+ if route_kind == "pure_single":
26359
28171
  return
26360
28172
  self._init_project_todos(bb)
26361
28173
  self._update_project_todo_status(bb)
@@ -26367,25 +28179,25 @@ body{padding:18px}
26367
28179
  pass
26368
28180
  system_rows = self._todo_project_rows_from_blackboard(bb)
26369
28181
  existing = self.todo.snapshot()
28182
+ bridged_flat_rows = False
26370
28183
  worker_rows: list[dict] = []
26371
28184
  non_system_rows: list[dict] = []
26372
- for row in existing:
26373
- if not isinstance(row, dict):
26374
- continue
26375
- key = str(row.get("key", "") or "").strip()
26376
- owner = str(row.get("owner", "") or "").strip().lower()
26377
- is_system_key = key.startswith(("bb:owner:", "bb:node:", "bb:proj:"))
26378
- # Preserve worker-owned items (from TodoWrite) separately
26379
- is_worker_item = owner in ("developer", "explorer", "reviewer") and not is_system_key
26380
- if is_worker_item:
26381
- worker_rows.append(dict(row))
26382
- continue
26383
- if is_system_key or owner == "manager":
26384
- continue
26385
- non_system_rows.append(dict(row))
26386
- if has_plan_steps:
26387
- worker_rows = [r for r in worker_rows if str(r.get("parent_step_id", "") or "").strip()]
26388
- non_system_rows = []
28185
+ if route_kind == "plan_single":
28186
+ worker_rows = self._todo_route_rows(route_kind, rows=existing, board=bb)
28187
+ if not worker_rows:
28188
+ flat_rows = self._todo_route_rows("pure_single", rows=existing, board=bb)
28189
+ bridged_rows, bridged_flat_rows = self._bridge_flat_todos_to_active_plan_step(flat_rows, board=bb)
28190
+ if bridged_flat_rows:
28191
+ worker_rows = self._todo_route_rows(route_kind, rows=bridged_rows, board=bb)
28192
+ elif route_kind == "plan_sync":
28193
+ worker_rows = self._todo_route_rows(route_kind, rows=existing, board=bb)
28194
+ elif route_kind == "pure_sync":
28195
+ worker_rows = self._todo_route_rows(
28196
+ route_kind,
28197
+ rows=existing,
28198
+ role=self._todo_worker_role_hint(board=bb),
28199
+ board=bb,
28200
+ )
26389
28201
  # Smart trim: keep all active (in_progress/pending) system rows,
26390
28202
  # but only recent 3 completed system rows to save capacity for worker subtasks
26391
28203
  active_system = [r for r in system_rows if r.get("status") != "completed"]
@@ -26449,6 +28261,11 @@ body{padding:18px}
26449
28261
  todo_out = self.todo.update(merged)
26450
28262
  except Exception:
26451
28263
  return
28264
+ if bridged_flat_rows:
28265
+ self._emit(
28266
+ "status",
28267
+ {"summary": "flat todos attached to current plan step"},
28268
+ )
26452
28269
  if todo_out != self.todo.no_changes_text() and reason:
26453
28270
  self._emit(
26454
28271
  "status",
@@ -26781,7 +28598,7 @@ body{padding:18px}
26781
28598
  task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
26782
28599
  if task_type in TASK_PROFILE_TYPES:
26783
28600
  merged["task_type"] = task_type
26784
- complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
28601
+ complexity = normalize_task_complexity(row.get("complexity", ""), default="")
26785
28602
  if complexity in TASK_COMPLEXITY_LEVELS:
26786
28603
  merged["complexity"] = complexity
26787
28604
  scale = trim(str(row.get("scale_preference", "") or "").strip().lower(), 20)
@@ -26826,7 +28643,7 @@ body{padding:18px}
26826
28643
  def _fallback_task_level_decision(self, goal_text: str) -> dict:
26827
28644
  profile = self._infer_task_profile(goal_text)
26828
28645
  task_type = str(profile.get("task_type", "general") or "general")
26829
- complexity = str(profile.get("complexity", "simple") or "simple")
28646
+ complexity = normalize_task_complexity(profile.get("complexity", "simple"), default="simple")
26830
28647
  low = str(goal_text or "").lower()
26831
28648
  inherit_previous_state = False
26832
28649
  if bool(self.runtime_goal_reset_pending):
@@ -26945,9 +28762,9 @@ body{padding:18px}
26945
28762
  level = 3
26946
28763
  if task_type == "simple_qa":
26947
28764
  level = 1 if len(str(goal_text or "")) <= 180 else 2
26948
- elif complexity == "simple" and task_type in {"general"}:
28765
+ elif task_complexity_rank(complexity) <= task_complexity_rank("simple") and task_type in {"general"}:
26949
28766
  level = 2
26950
- elif complexity == "simple":
28767
+ elif task_complexity_rank(complexity) <= task_complexity_rank("moderate"):
26951
28768
  level = 3
26952
28769
  elif any(tok in low for tok in ("system-level", "系统级", "blackboard", "orchestrator", "内核", "基础设施")):
26953
28770
  level = 5
@@ -27009,7 +28826,9 @@ body{padding:18px}
27009
28826
  "SCALE PREFERENCE: Infer fast|balanced|thorough from user wording. "
27010
28827
  "User-stated preference overrides your default strategy. "
27011
28828
  "Budget controls internal depth/compactness, NOT early-stop messaging to user.\n\n"
27012
- "Output exactly one classify_task_level tool call with concise judgement, inherit_previous_state, "
28829
+ "CRITICAL OUTPUT CONTRACT: You MUST output exactly one classify_task_level tool call and no plain-text answer. "
28830
+ "A prose-only response is invalid and will be discarded.\n"
28831
+ "The tool call must include concise judgement, inherit_previous_state, "
27013
28832
  "and semantic_confidence (high|medium|low). "
27014
28833
  "Use low confidence only when semantic ambiguity is substantial, then set low_confidence_reason briefly.\n"
27015
28834
  f"{model_language_instruction(self.ui_language)}"
@@ -27021,6 +28840,28 @@ body{padding:18px}
27021
28840
  )
27022
28841
  return base
27023
28842
 
28843
+ def _extract_classify_task_level_row(self, response: dict | None) -> dict:
28844
+ if not isinstance(response, dict):
28845
+ return {}
28846
+ tool_calls = response.get("tool_calls", []) if isinstance(response.get("tool_calls", []), list) else []
28847
+ for tc in tool_calls:
28848
+ fn = tc.get("function", {}) if isinstance(tc, dict) else {}
28849
+ if str(fn.get("name", "") or "").strip() != "classify_task_level":
28850
+ continue
28851
+ args = fn.get("arguments", {}) if isinstance(fn, dict) else {}
28852
+ if isinstance(args, dict):
28853
+ return dict(args)
28854
+ if isinstance(args, str):
28855
+ parsed, _ = parse_tool_arguments_with_error(args)
28856
+ if isinstance(parsed, dict):
28857
+ return dict(parsed)
28858
+ content = str(response.get("content", "") or "").strip()
28859
+ if content:
28860
+ parsed, _ = parse_tool_arguments_with_error(content)
28861
+ if isinstance(parsed, dict) and parsed.get("level") is not None:
28862
+ return dict(parsed)
28863
+ return {}
28864
+
27024
28865
  def _skill_aware_reeval_task_level(
27025
28866
  self,
27026
28867
  goal_text: str,
@@ -27214,7 +29055,7 @@ body{padding:18px}
27214
29055
  if low_confidence_mode:
27215
29056
  rule_profile = self._infer_task_profile(goal_text)
27216
29057
  fallback_task_type = str(rule_profile.get("task_type", "general") or "general")
27217
- fallback_complexity = str(rule_profile.get("complexity", "simple") or "simple")
29058
+ fallback_complexity = normalize_task_complexity(rule_profile.get("complexity", "simple"), default="simple")
27218
29059
  fallback_objective = trim(str(rule_profile.get("direct_objective", "") or ""), 800)
27219
29060
  else:
27220
29061
  board_now = self._ensure_blackboard()
@@ -27225,12 +29066,10 @@ body{padding:18px}
27225
29066
  )
27226
29067
  if fallback_task_type not in TASK_PROFILE_TYPES:
27227
29068
  fallback_task_type = "general"
27228
- fallback_complexity = trim(
27229
- str(self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple"),
27230
- 20,
29069
+ fallback_complexity = normalize_task_complexity(
29070
+ self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple",
29071
+ default="simple",
27231
29072
  )
27232
- if fallback_complexity not in TASK_COMPLEXITY_LEVELS:
27233
- fallback_complexity = "simple"
27234
29073
  fallback_objective = trim(
27235
29074
  str(self.runtime_direct_objective or board_profile.get("direct_objective", "") or "").strip(),
27236
29075
  800,
@@ -27242,22 +29081,20 @@ body{padding:18px}
27242
29081
  task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
27243
29082
  if task_type not in TASK_PROFILE_TYPES:
27244
29083
  task_type = fallback_task_type
27245
- complexity = trim(str(row.get("complexity", "") or "").strip().lower(), 20)
27246
- if complexity not in TASK_COMPLEXITY_LEVELS:
27247
- complexity = fallback_complexity
29084
+ complexity = normalize_task_complexity(row.get("complexity", ""), default=fallback_complexity)
27248
29085
  if explicit_complexity in TASK_COMPLEXITY_LEVELS:
27249
- complexity = explicit_complexity
29086
+ complexity = normalize_task_complexity(explicit_complexity, default=fallback_complexity)
27250
29087
  elif preserve_existing_complexity and previous_complexity in TASK_COMPLEXITY_LEVELS:
27251
- complexity = previous_complexity
29088
+ complexity = normalize_task_complexity(previous_complexity, default=fallback_complexity)
27252
29089
  low_confidence_reason = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
27253
29090
  judgement = trim(str(row.get("judgement", "") or "").strip(), 200) or "manager classified task level"
27254
29091
  objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
27255
29092
  if not objective:
27256
29093
  objective = fallback_objective
27257
29094
  _prev_level_val = int(getattr(self, '_prev_applied_task_level', 0) or 0)
27258
- _complexity_floor = str(getattr(self, 'runtime_complexity_floor', '') or '').strip()
27259
- if _complexity_floor == "complex" and complexity == "simple":
27260
- complexity = "complex"
29095
+ _complexity_floor = normalize_task_complexity(getattr(self, 'runtime_complexity_floor', '') or '', default="")
29096
+ if _complexity_floor in TASK_COMPLEXITY_LEVELS and task_complexity_rank(complexity) < task_complexity_rank(_complexity_floor):
29097
+ complexity = _complexity_floor
27261
29098
  self.runtime_task_level = int(level)
27262
29099
  self._prev_applied_task_level = int(level)
27263
29100
  self.runtime_execution_mode = mode
@@ -27449,34 +29286,50 @@ body{padding:18px}
27449
29286
  retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
27450
29287
  media_inputs=media_inputs_round,
27451
29288
  )
27452
- tool_calls = response.get("tool_calls", []) if isinstance(response, dict) else []
27453
- for tc in tool_calls or []:
27454
- fn = tc.get("function", {}) if isinstance(tc, dict) else {}
27455
- if str(fn.get("name", "") or "").strip() != "classify_task_level":
27456
- continue
27457
- args = fn.get("arguments", {}) if isinstance(fn, dict) else {}
27458
- if isinstance(args, dict):
27459
- row = dict(args)
27460
- row["inherit_previous_state"] = _to_bool_like(
27461
- row.get("inherit_previous_state", False),
27462
- default=False,
27463
- )
27464
- row["semantic_confidence"] = self._normalize_semantic_confidence(
27465
- row.get("semantic_confidence", "medium"),
27466
- default="medium",
27467
- )
27468
- if str(row.get("semantic_confidence", "medium")) == "low":
27469
- # Skill-aware re-evaluation before falling back to keyword heuristic
27470
- reeval_row = self._skill_aware_reeval_task_level(goal_text, row, pinned_selection)
27471
- fallback_row = self._fallback_task_level_decision(goal_text)
27472
- merged = self._merge_task_decision_for_low_confidence(reeval_row, fallback_row)
27473
- return merged
27474
- row["source"] = "manager"
27475
- return row
29289
+ row = self._extract_classify_task_level_row(response)
29290
+ if not row:
29291
+ repair_prompt = (
29292
+ "Previous answer was invalid because it did not produce a valid classify_task_level tool call. "
29293
+ "Retry now. Output exactly one classify_task_level tool call and no prose."
29294
+ )
29295
+ repair_response = self._chat_with_same_model_retry(
29296
+ [
29297
+ {"role": "user", "content": prompt, "ts": now_ts()},
29298
+ {"role": "user", "content": repair_prompt, "ts": now_ts()},
29299
+ ],
29300
+ tools=self._manager_task_classify_tools(),
29301
+ system=self._manager_classification_system_prompt(),
29302
+ max_tokens=220,
29303
+ think=False,
29304
+ stream_thinking=False,
29305
+ on_thinking_chunk=self._append_live_thinking,
29306
+ pinned_selection=pinned_selection,
29307
+ context_label="manager classify repair",
29308
+ retries=1,
29309
+ media_inputs=media_inputs_round,
29310
+ )
29311
+ row = self._extract_classify_task_level_row(repair_response)
29312
+ if row:
29313
+ row["inherit_previous_state"] = _to_bool_like(
29314
+ row.get("inherit_previous_state", False),
29315
+ default=False,
29316
+ )
29317
+ row["semantic_confidence"] = self._normalize_semantic_confidence(
29318
+ row.get("semantic_confidence", "medium"),
29319
+ default="medium",
29320
+ )
29321
+ if str(row.get("semantic_confidence", "medium")) == "low":
29322
+ # Skill-aware re-evaluation before falling back to keyword heuristic
29323
+ reeval_row = self._skill_aware_reeval_task_level(goal_text, row, pinned_selection)
29324
+ fallback_row = self._fallback_task_level_decision(goal_text)
29325
+ merged = self._merge_task_decision_for_low_confidence(reeval_row, fallback_row)
29326
+ return merged
29327
+ row["source"] = "manager"
29328
+ return row
27476
29329
  row = self._fallback_task_level_decision(goal_text)
27477
29330
  row["source"] = "fallback-no-toolcall"
27478
29331
  row["semantic_confidence"] = "low"
27479
- row["low_confidence_reason"] = "manager classifier returned no valid tool call"
29332
+ row["low_confidence_reason"] = "manager classifier returned no valid classify_task_level tool call"
27480
29333
  return row
27481
29334
 
27482
29335
  # ------------------------------------------------------------------
@@ -27923,6 +29776,7 @@ body{padding:18px}
27923
29776
  "IMPORTANT: Previous fix attempts FAILED. You MUST change your approach — "
27924
29777
  "do NOT repeat the same instruction. Include the exact error output in your delegation. "
27925
29778
  )
29779
+ html_hint = self._html_frontend_boost_instruction()
27926
29780
  # Loaded skills constraint for manager
27927
29781
  skills_constraint = self._loaded_skills_prompt_hint(for_role="manager")
27928
29782
  bb_skills = board.get("loaded_skills", {})
@@ -27957,6 +29811,7 @@ body{padding:18px}
27957
29811
  f"{todo_route_note}"
27958
29812
  f"{phase_hint}"
27959
29813
  f"{failure_hint}"
29814
+ f"{html_hint}"
27960
29815
  f"{skills_constraint}"
27961
29816
  f"Level={level}, mode={mode}, progress={progress}, "
27962
29817
  f"budget={'unlimited' if int(budget) <= 0 else int(budget)}, "
@@ -28282,7 +30137,7 @@ body{padding:18px}
28282
30137
  "reason": "conclusive-reply-detected",
28283
30138
  "source": "fallback",
28284
30139
  }
28285
- if complexity == "simple" and task_type == "simple_code":
30140
+ if task_complexity_rank(complexity) <= task_complexity_rank("moderate") and task_type == "simple_code":
28286
30141
  if has_error_log:
28287
30142
  return {
28288
30143
  "target": "developer",
@@ -28420,6 +30275,10 @@ body{padding:18px}
28420
30275
  task_type_low = str(row.get("task_type", "") or "").strip().lower()
28421
30276
  # 5a: Merge in-memory routes with persisted routes for detection
28422
30277
  bb_for_routes = self._ensure_blackboard()
30278
+ current_progress_fp = self._watchdog_state_fingerprint(bb_for_routes)
30279
+ last_delegate = bb_for_routes.get("last_delegate", {}) if isinstance(bb_for_routes.get("last_delegate"), dict) else {}
30280
+ last_progress_fp = trim(str(last_delegate.get("progress_fp", "") or "").strip(), 80)
30281
+ no_progress_since_last_delegate = bool(last_progress_fp and last_progress_fp == current_progress_fp)
28423
30282
  persisted_routes = bb_for_routes.get("persisted_manager_routes", [])
28424
30283
  if not isinstance(persisted_routes, list):
28425
30284
  persisted_routes = []
@@ -28431,22 +30290,16 @@ body{padding:18px}
28431
30290
  if (
28432
30291
  isinstance(deleg, dict)
28433
30292
  and str(deleg.get("target", "") or "").strip().lower() == target
30293
+ and (
30294
+ not str(deleg.get("progress_fp", "") or "").strip()
30295
+ or str(deleg.get("progress_fp", "") or "").strip() == current_progress_fp
30296
+ )
28434
30297
  and int(deleg.get("count", 0) or 0) >= 3
28435
30298
  ):
28436
- alt_targets = [r for r in ("reviewer", "developer", "explorer") if r != target]
28437
- if len(bb_for_routes.get("code_artifacts", {}) or {}) > 0:
28438
- row["target"] = "finish"
28439
- row["instruction"] = (
28440
- f"Anti-stall: delegation to '{target}' repeated {deleg.get('count')} times with same instruction. "
28441
- "Forcing finish to break loop."
28442
- )
28443
- else:
28444
- row["target"] = alt_targets[0] if alt_targets else "developer"
28445
- row["instruction"] = (
28446
- f"Anti-stall: delegation to '{target}' repeated {deleg.get('count')} times. "
28447
- f"Switching to {row['target']} with fresh approach."
28448
- )
28449
- row["reason"] = f"{row.get('reason', '')}|anti-stall-repeated-delegation"
30299
+ if not no_progress_since_last_delegate:
30300
+ continue
30301
+ row = self._manager_recovery_route_for_repeated_delegate(row, board=bb_for_routes)
30302
+ row["reason"] = trim(f"{row.get('reason', '')}|anti-stall-repeated-delegation", 600)
28450
30303
  row["source"] = "anti-stall"
28451
30304
  return row
28452
30305
  if task_type_low in ("simple_code", "engineering") and target == "explorer":
@@ -28469,7 +30322,7 @@ body{padding:18px}
28469
30322
  if target not in AGENT_ROLES:
28470
30323
  return row
28471
30324
  recent = [str(x.get("target", "") or "").strip().lower() for x in merged_routes[-4:]]
28472
- if len(recent) >= 3 and recent[-1] == target and recent[-2] == target and recent[-3] == target:
30325
+ if no_progress_since_last_delegate and len(recent) >= 3 and recent[-1] == target and recent[-2] == target and recent[-3] == target:
28473
30326
  board = bb_for_routes
28474
30327
  low_reason = str(row.get("reason", "") or "").strip().lower()
28475
30328
  if "summary" in low_reason and len(board.get("code_artifacts", {}) or {}) > 0:
@@ -28514,7 +30367,7 @@ body{padding:18px}
28514
30367
  row["reason"] = f"{row.get('reason', '')}|anti-stall->developer-suggest"
28515
30368
  row["source"] = "anti-stall"
28516
30369
  return row
28517
- if len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
30370
+ if no_progress_since_last_delegate and len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
28518
30371
  board = bb_for_routes
28519
30372
  if len(board.get("code_artifacts", {}) or {}) > 0:
28520
30373
  row["target"] = "finish"
@@ -28595,9 +30448,7 @@ body{padding:18px}
28595
30448
  task_type = trim(str(row.get("task_type", default_type) or "").strip().lower(), 40) or default_type
28596
30449
  if task_type not in TASK_PROFILE_TYPES:
28597
30450
  task_type = default_type
28598
- complexity = trim(str(row.get("complexity", default_complexity) or "").strip().lower(), 20) or default_complexity
28599
- if complexity not in TASK_COMPLEXITY_LEVELS:
28600
- complexity = default_complexity
30451
+ complexity = normalize_task_complexity(row.get("complexity", default_complexity) or default_complexity, default=default_complexity)
28601
30452
  scale_preference = trim(
28602
30453
  str(row.get("scale_preference", profile.get("scale_preference", self.runtime_scale_preference)) or "").strip().lower(),
28603
30454
  20,
@@ -29018,6 +30869,19 @@ body{padding:18px}
29018
30869
  seen.add(low_tail)
29019
30870
  keep_lines.append(tail)
29020
30871
  continue
30872
+ if low.startswith("tasks to complete:"):
30873
+ continue
30874
+ if re.match(r"^\d+(?:\.\d+)*[.)]\s+", s):
30875
+ continue
30876
+ if re.match(r"^[-*]\s+", s):
30877
+ continue
30878
+ if re.match(
30879
+ r"(?i)^(mkdir\s+-p|run:|create directories:|create project|create directory|initialize project|cmake\b|python\s+-m\s+venv\b|npx\b)",
30880
+ s,
30881
+ ):
30882
+ continue
30883
+ if re.match(r"^(创建|初始化|运行|目录结构|项目根目录结构)[::]?", s):
30884
+ continue
29021
30885
  norm = re.sub(r"\s+", " ", s).strip().lower()
29022
30886
  if norm and norm not in seen:
29023
30887
  seen.add(norm)
@@ -29375,6 +31239,7 @@ body{padding:18px}
29375
31239
  "instruction": instruction,
29376
31240
  "reason": trim(str(route.get("reason", "") or "").strip(), 600),
29377
31241
  "source": trim(str(route.get("source", "") or "").strip(), 40),
31242
+ "progress_fp": self._watchdog_state_fingerprint(board),
29378
31243
  "task_level": int(task_level),
29379
31244
  "execution_mode": execution_mode,
29380
31245
  "task_type": task_type,
@@ -29459,8 +31324,9 @@ body{padding:18px}
29459
31324
  profile["task_type"] = task_type
29460
31325
  if complexity in TASK_COMPLEXITY_LEVELS:
29461
31326
  # Floor protection: if plan mode set a floor, do not allow downgrade
29462
- if self.runtime_complexity_floor == "complex" and complexity == "simple":
29463
- complexity = "complex"
31327
+ _route_complexity_floor = normalize_task_complexity(self.runtime_complexity_floor, default="")
31328
+ if _route_complexity_floor in TASK_COMPLEXITY_LEVELS and task_complexity_rank(complexity) < task_complexity_rank(_route_complexity_floor):
31329
+ complexity = _route_complexity_floor
29464
31330
  profile["complexity"] = complexity
29465
31331
  profile["scale_preference"] = scale_preference if scale_preference in TASK_SCALE_PREFERENCES else "balanced"
29466
31332
  if objective:
@@ -29830,8 +31696,25 @@ body{padding:18px}
29830
31696
  )
29831
31697
  self._emit("status", {"summary": f"reviewer finish blocked: {gate_reason}"})
29832
31698
  else:
31699
+ bb_finish = self._ensure_blackboard()
31700
+ profile_finish = self._ensure_blackboard_task_profile(bb_finish)
31701
+ exec_mode = normalize_execution_mode(
31702
+ profile_finish.get("execution_mode", self._effective_execution_mode()),
31703
+ default=self._effective_execution_mode(),
31704
+ )
29833
31705
  approval_note = summary_arg or output or "finish tool acknowledged"
29834
- self._blackboard_mark_approved(approval_note, role_key)
31706
+ if exec_mode == EXECUTION_MODE_SYNC:
31707
+ self._blackboard_append_section(
31708
+ "execution_logs",
31709
+ role_key,
31710
+ (
31711
+ "finish requested but deferred: sync mode requires reviewer pass before approval.\n"
31712
+ f"summary: {approval_note}"
31713
+ ),
31714
+ )
31715
+ self._emit("status", {"summary": "finish deferred: sync mode requires reviewer approval"})
31716
+ else:
31717
+ self._blackboard_mark_approved(approval_note, role_key)
29835
31718
  if not ok and output:
29836
31719
  self._blackboard_append_section(
29837
31720
  "execution_logs",
@@ -29869,6 +31752,7 @@ body{padding:18px}
29869
31752
  role_key = self._sanitize_agent_role(role)
29870
31753
  status = str((step or {}).get("status", "") or "")
29871
31754
  text = trim(str((step or {}).get("text", "") or "").strip(), BLACKBOARD_MAX_TEXT)
31755
+ tool_results = (step or {}).get("tool_results", []) if isinstance((step or {}).get("tool_results"), list) else []
29872
31756
  if role_key and text:
29873
31757
  board = self._ensure_blackboard()
29874
31758
  board["last_worker_reply"] = {
@@ -29889,7 +31773,28 @@ body{padding:18px}
29889
31773
  self._blackboard_set_status("REVIEWING")
29890
31774
  if self._reviewer_deems_done(text):
29891
31775
  self._blackboard_mark_approved(text, role_key)
29892
- for item in (step or {}).get("tool_results", []) or []:
31776
+ explicit_todo_write = any(
31777
+ isinstance(item, dict) and str(item.get("name", "") or "") in {"TodoWrite", "TodoWriteRescue"}
31778
+ for item in tool_results
31779
+ )
31780
+ if role_key and not explicit_todo_write:
31781
+ source_text = text or self._latest_agent_assistant_text(role_key)
31782
+ if re.search(r"(?m)^\s*(?:[-*•>]+\s*)?\[(?: |>|x)\]\s+\S", source_text or ""):
31783
+ board = self._ensure_blackboard()
31784
+ step_id = trim(str((self._get_active_plan_step(board) or {}).get("id", "") or ""), 20)
31785
+ parsed_rows = extract_todo_rows_from_text(
31786
+ source_text,
31787
+ default_parent_step_id=step_id,
31788
+ limit=12,
31789
+ )
31790
+ if parsed_rows:
31791
+ merged = self._merge_todo_signal_rows(parsed_rows, role=role_key, board=board)
31792
+ if merged != self.todo.no_changes_text():
31793
+ self._emit(
31794
+ "status",
31795
+ {"summary": f"todo synced from canonical {role_key} text"},
31796
+ )
31797
+ for item in tool_results:
29893
31798
  if isinstance(item, dict) and bool(item.get("bb_applied", False)):
29894
31799
  continue
29895
31800
  self._blackboard_update_from_tool_result(role_key, item)
@@ -30490,6 +32395,7 @@ body{padding:18px}
30490
32395
  skills_block = self._skills_awareness_block(for_role=role_key)
30491
32396
  code_note = self._runtime_code_reference_prompt_block(max_chars=2600)
30492
32397
  engineering_note = self._engineering_execution_boost_instruction()
32398
+ html_note = self._html_frontend_boost_instruction()
30493
32399
  plan_todo_note = self._plan_todo_discipline_prompt(role=role_key)
30494
32400
  base = (
30495
32401
  f"You are {self._agent_display_name(role_key)} in a multi-agent coding system. "
@@ -30498,10 +32404,15 @@ body{padding:18px}
30498
32404
  f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
30499
32405
  f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS) or pptxgen.bundle.js (browser). "
30500
32406
  f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
32407
+ "IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
32408
+ "If an HTML deliverable needs any asset from js_lib, copy it into a task-local relative asset folder "
32409
+ "such as './js/' or './assets/vendor/' next to the deliverable, then reference it with a plain relative path. "
32410
+ "Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases in final exported HTML. "
30501
32411
  "Use blackboard for shared state, ask_colleague for inter-agent communication. "
30502
32412
  "Keep outputs concise and action-oriented. "
30503
32413
  f"{code_note + ' ' if code_note else ''}"
30504
32414
  f"{engineering_note + ' ' if engineering_note else ''}"
32415
+ f"{html_note + ' ' if html_note else ''}"
30505
32416
  f"{_detect_os_shell_instruction()} "
30506
32417
  f"{model_language_instruction(self.ui_language)} "
30507
32418
  )
@@ -30557,13 +32468,13 @@ body{padding:18px}
30557
32468
  "The skill's workflow, tools, and file structure OVERRIDE the plan's implementation "
30558
32469
  "approach — if the plan says 'use python-pptx' but the skill says 'use PptxGenJS', "
30559
32470
  "use PptxGenJS. The skill defines HOW to implement; the plan defines WHAT to do. "
30560
- "AUTONOMOUS SKILL LOADING: When starting a coding, debugging, or architecture task, "
30561
- "call list_skills to discover available skills, then load_skill to activate the most relevant ones. "
30562
- "Load skills BEFORE you start working, not after you're stuck. "
30563
- "Already-loaded skills appear as <loaded-skill> messages in your context — use them directly without reloading. "
30564
32471
  "TODO TRACKING (mandatory): "
30565
32472
  "When a plan step is active, follow the current todo subtask order instead of inventing a parallel path. "
30566
32473
  "After completing ONE subtask, call TodoWrite immediately — mark that subtask as 'completed' and move the next one to 'in_progress' before doing more work. "
32474
+ "Prefer TodoWrite items as objects with explicit fields: "
32475
+ "{content, status, owner?, parent_step_id?}. "
32476
+ "If you must use strings, use ONLY canonical prefixes: '[ ]', '[>]', '[x]'. "
32477
+ "Do not use emoji markers or free-form localized status labels in TodoWrite payloads. "
30567
32478
  "Do not silently batch multiple subtasks and do not delay todo updates until the end of the step. "
30568
32479
  "This manual update is critical because skill re-evaluation is triggered by actual todo progress. "
30569
32480
  "EDIT METHODOLOGY (follow strictly): "
@@ -30650,29 +32561,45 @@ body{padding:18px}
30650
32561
  )
30651
32562
 
30652
32563
  def _todo_write_rescue(self, args: dict) -> str:
32564
+ """Rescue todo writer — accepts both strings and dicts, auto-normalizes.
32565
+ FIXED: Now preserves status from incoming items (especially 'completed')
32566
+ instead of resetting everything to 'pending'."""
30653
32567
  raw_items = args.get("items", [])
30654
32568
  if not isinstance(raw_items, list) or not raw_items:
30655
32569
  raise ValueError("items must be a non-empty array")
30656
- limited = raw_items[:7]
32570
+ limited = raw_items[:12] # Allow more items (was 7) — plans can have 5+ subtasks
30657
32571
  active_step = self._get_active_plan_step()
30658
32572
  active_step_id = trim(str((active_step or {}).get("id", "") or ""), 20)
30659
32573
  owner_hint = self._current_plan_worker_owner()
30660
32574
  clean_items = []
32575
+ _status_alias = {
32576
+ "todo": "pending", "doing": "in_progress", "inprogress": "in_progress",
32577
+ "in-progress": "in_progress", "done": "completed", "finish": "completed",
32578
+ "finished": "completed",
32579
+ }
30661
32580
  for idx, item in enumerate(limited):
30662
32581
  if isinstance(item, dict):
30663
32582
  content = str(item.get("content", item.get("text", item.get("title", "")))).strip()
30664
32583
  owner = str(item.get("owner", "") or owner_hint).strip().lower()
30665
32584
  parent_step_id = trim(str(item.get("parent_step_id", "") or active_step_id), 20)
32585
+ # Preserve status from incoming dict (critical for subtask state tracking)
32586
+ raw_status = str(item.get("status", item.get("state", "pending"))).strip().lower()
32587
+ status = _status_alias.get(raw_status, raw_status)
32588
+ if status not in {"pending", "in_progress", "completed"}:
32589
+ status = "pending"
30666
32590
  else:
30667
32591
  content = str(item).strip()
30668
32592
  owner = owner_hint
30669
32593
  parent_step_id = active_step_id
32594
+ parsed_status, parsed_content = split_todo_status_text(content)
32595
+ status = parsed_status or "pending"
32596
+ content = parsed_content or content
30670
32597
  content = normalize_work_text(content) or content
30671
32598
  if not content:
30672
32599
  continue
30673
32600
  row = {
30674
32601
  "content": content,
30675
- "status": "pending",
32602
+ "status": status,
30676
32603
  }
30677
32604
  if owner in {"developer", "explorer", "reviewer"}:
30678
32605
  row["owner"] = owner
@@ -30681,17 +32608,52 @@ body{padding:18px}
30681
32608
  clean_items.append(row)
30682
32609
  if not clean_items:
30683
32610
  raise ValueError("no valid todo item text")
30684
- in_progress_index = int(args.get("in_progress_index", 0) or 0)
30685
- if in_progress_index < 0 or in_progress_index >= len(clean_items):
30686
- in_progress_index = 0
30687
- clean_items[in_progress_index]["status"] = "in_progress"
30688
- if active_step is not None:
32611
+ # Only apply in_progress_index if NO items already have in_progress status
32612
+ has_in_progress = any(r["status"] == "in_progress" for r in clean_items)
32613
+ if not has_in_progress:
32614
+ in_progress_index = int(args.get("in_progress_index", 0) or 0)
32615
+ if in_progress_index < 0 or in_progress_index >= len(clean_items):
32616
+ in_progress_index = 0
32617
+ # Only set in_progress on a pending item
32618
+ for i, r in enumerate(clean_items):
32619
+ if r["status"] == "pending":
32620
+ if i >= in_progress_index:
32621
+ r["status"] = "in_progress"
32622
+ break
32623
+ route_kind = self._todo_route_kind(role=owner_hint)
32624
+ if route_kind in {"plan_single", "plan_sync"}:
30689
32625
  return self._merge_plan_worker_todo_items(clean_items, role=owner_hint)
30690
- if self._is_multi_agent_mode() and owner_hint in {"developer", "explorer", "reviewer"}:
32626
+ if route_kind == "pure_sync":
30691
32627
  return self._merge_owner_scoped_todo_items(clean_items, role=owner_hint)
30692
32628
  return self.todo.update(clean_items)
30693
32629
 
30694
- def _analyze_todo_result(self, tool_name: str, output: str) -> tuple[str, str]:
32630
+ def _todo_progress_signature(self, rows: list[dict] | None = None) -> list[tuple[str, str, str, str]]:
32631
+ items = rows if isinstance(rows, list) else self.todo.snapshot()
32632
+ sig: list[tuple[str, str, str, str]] = []
32633
+ for row in items:
32634
+ if not isinstance(row, dict):
32635
+ continue
32636
+ sig.append(
32637
+ (
32638
+ normalize_work_text(str(row.get("content", "") or "")).strip().lower(),
32639
+ str(row.get("status", "pending") or "pending").strip().lower(),
32640
+ str(row.get("owner", "") or "").strip().lower(),
32641
+ str(row.get("parent_step_id", "") or "").strip(),
32642
+ )
32643
+ )
32644
+ return sig
32645
+
32646
+ def _todo_progress_changed(self, before_rows: list[dict] | None, after_rows: list[dict] | None) -> bool:
32647
+ return self._todo_progress_signature(before_rows) != self._todo_progress_signature(after_rows)
32648
+
32649
+ def _analyze_todo_result(
32650
+ self,
32651
+ tool_name: str,
32652
+ output: str,
32653
+ *,
32654
+ before_rows: list[dict] | None = None,
32655
+ after_rows: list[dict] | None = None,
32656
+ ) -> tuple[str, str]:
30695
32657
  txt = str(output or "").strip()
30696
32658
  low = txt.lower()
30697
32659
  has_worker_rows = self._todo_runtime_has_worker_rows()
@@ -31155,12 +33117,15 @@ body{padding:18px}
31155
33117
  except Exception:
31156
33118
  token_decoded = token
31157
33119
  token_decoded = token_decoded.strip()
31158
- if token_decoded and token_decoded not in out:
31159
- out.append(token_decoded)
33120
+ for piece in split_structured_todo_content(token_decoded, limit=7):
33121
+ piece_text = str(piece or "").strip()
33122
+ if piece_text and piece_text not in out:
33123
+ out.append(piece_text)
31160
33124
  if out:
31161
33125
  return out[:7]
31162
33126
  # Fallback: parse non-empty lines / bullets
31163
- for line in text.splitlines():
33127
+ normalized_text = normalize_embedded_newlines(text)
33128
+ for line in normalized_text.splitlines():
31164
33129
  s = line.strip().strip(",")
31165
33130
  s = re.sub(r"^[\-\*\d\.\)\s]+", "", s).strip()
31166
33131
  if not s:
@@ -31337,11 +33302,21 @@ body{padding:18px}
31337
33302
 
31338
33303
  def _dispatch_tool_inner(self, name: str, args: dict, role_key: str = "") -> str:
31339
33304
  """Inner tool dispatcher — all tool logic lives here."""
33305
+ # Fix 5d: Reset TodoWrite loop counter on non-TodoWrite tool calls
33306
+ if name not in ("TodoWrite", "TodoWriteRescue") and hasattr(self, '_todowrite_step_counter'):
33307
+ try:
33308
+ _rst_step = self._get_active_plan_step()
33309
+ if isinstance(_rst_step, dict):
33310
+ _rst_id = str(_rst_step.get("id", "") or "")
33311
+ if _rst_id:
33312
+ self._todowrite_step_counter.pop(_rst_id, None)
33313
+ except Exception:
33314
+ pass
31340
33315
  if name == "bash":
31341
33316
  guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), self.files_root)
31342
33317
  if guard_error:
31343
33318
  return guard_error
31344
- meta = self._run_shell_meta(args["command"], self.files_root, 120)
33319
+ meta = self._run_shell_meta(args["command"], self.files_root, self._shell_command_timeout())
31345
33320
  self._emit(
31346
33321
  "command",
31347
33322
  {
@@ -31508,20 +33483,16 @@ body{padding:18px}
31508
33483
  )
31509
33484
  return out
31510
33485
  if name == "TodoWrite":
31511
- # Protect plan_step todos: worker TodoWrite creates sub-items, not replacements
31512
33486
  bb = self._ensure_blackboard()
31513
- has_plan_steps = any(
31514
- t.get("category") == "plan_step"
31515
- for t in bb.get("project_todos", [])
31516
- )
31517
- if has_plan_steps:
33487
+ route_kind = self._todo_route_kind(role=str(role_key or ""), board=bb)
33488
+ if route_kind in {"plan_single", "plan_sync"}:
31518
33489
  items = args.get("items", [])
31519
33490
  if isinstance(items, list):
31520
33491
  for item in items:
31521
33492
  if isinstance(item, dict) and not item.get("key", "").startswith("bb:"):
31522
33493
  item["owner"] = str(role_key or "developer")
31523
33494
  result = self._merge_plan_worker_todo_items(items, role=str(role_key or "developer"))
31524
- elif self._is_multi_agent_mode() and role_key in {"developer", "explorer", "reviewer"}:
33495
+ elif route_kind == "pure_sync":
31525
33496
  items = args.get("items", [])
31526
33497
  if isinstance(items, list):
31527
33498
  for item in items:
@@ -31891,7 +33862,7 @@ body{padding:18px}
31891
33862
  guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), wt_path)
31892
33863
  if guard_error:
31893
33864
  return guard_error
31894
- meta = self._run_shell_meta(args["command"], wt_path, 300)
33865
+ meta = self._run_shell_meta(args["command"], wt_path, self._shell_command_timeout())
31895
33866
  self._emit(
31896
33867
  "command",
31897
33868
  {
@@ -32348,6 +34319,18 @@ body{padding:18px}
32348
34319
  },
32349
34320
  )
32350
34321
  self._persist()
34322
+ _proc = getattr(self, "_running_bash_proc", None)
34323
+ if _proc is not None:
34324
+ try:
34325
+ if os.name == "posix":
34326
+ try:
34327
+ os.killpg(os.getpgid(_proc.pid), signal.SIGKILL)
34328
+ except Exception:
34329
+ _proc.kill()
34330
+ else:
34331
+ _proc.kill()
34332
+ except Exception:
34333
+ pass
32351
34334
 
32352
34335
  def _reviewer_approval_log_gate(self, board: dict | None = None) -> tuple[bool, str]:
32353
34336
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
@@ -32752,8 +34735,8 @@ body{padding:18px}
32752
34735
  isinstance(t, dict) and t.get("category") == "plan_step"
32753
34736
  for t in board.get("project_todos", [])
32754
34737
  )
32755
- _sync_complexity = str(profile.get("complexity", "simple") or "simple")
32756
- if not _sync_has_plan and _sync_complexity in ("moderate", "complex", "expert"):
34738
+ _sync_complexity = normalize_task_complexity(profile.get("complexity", "simple"), default="simple")
34739
+ if not _sync_has_plan and task_complexity_at_least(_sync_complexity, "moderate"):
32757
34740
  self.messages.append({
32758
34741
  "role": "system",
32759
34742
  "content": (
@@ -32860,29 +34843,59 @@ body{padding:18px}
32860
34843
  self._mark_all_done_silently(note)
32861
34844
  self._emit("status", {"summary": "manager decided finish; run paused"})
32862
34845
  break
32863
- # Detect manager stuck: same instruction repeated N times → force advance + break
34846
+ # Detect manager loop: same instruction repeated with unchanged progress.
32864
34847
  import hashlib as _hl_mgr
32865
- _cur_hash = _hl_mgr.sha1((target + "|" + instruction).encode("utf-8")).hexdigest()[:12]
34848
+ _delegate_progress_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
34849
+ _cur_hash = _hl_mgr.sha1((target + "|" + instruction + "|" + _delegate_progress_fp).encode("utf-8")).hexdigest()[:12]
32866
34850
  if _cur_hash == _prev_delegation_hash:
32867
34851
  _repeat_delegation_count += 1
32868
34852
  else:
32869
34853
  _repeat_delegation_count = 0
32870
34854
  _prev_delegation_hash = _cur_hash
32871
- if _repeat_delegation_count >= 15:
32872
- self._emit("status", {"summary": f"manager stuck: repeated identical delegation x{_repeat_delegation_count + 1}; forcing advance"})
34855
+ if _repeat_delegation_count >= 3:
32873
34856
  _bb_stuck = self._ensure_blackboard()
32874
34857
  _stuck_step = next(
32875
34858
  (t for t in _bb_stuck.get("project_todos", [])
32876
34859
  if t.get("category") == "plan_step" and t.get("status") == "in_progress"),
32877
34860
  None,
32878
34861
  )
32879
- if _stuck_step:
32880
- self._advance_plan_step(evidence="manager stuck: repeated delegation", actor="manager")
32881
- else:
32882
- self._blackboard_mark_approved("manager stuck loop break", "manager")
32883
- self._mark_all_done_silently("manager stuck: repeated delegation break")
32884
- break
34862
+ _step_note = trim(str((_stuck_step or {}).get("content", "") or ""), 200)
34863
+ route = self._manager_recovery_route_for_repeated_delegate(route, board=_bb_stuck)
34864
+ target = str(route.get("target", "") or "").strip().lower()
34865
+ instruction = trim(str(route.get("instruction", "") or "").strip(), 1400)
32885
34866
  _repeat_delegation_count = 0
34867
+ _prev_delegation_hash = ""
34868
+ self._emit(
34869
+ "status",
34870
+ {
34871
+ "summary": (
34872
+ f"manager loop recovery: repeated identical delegation under unchanged progress; "
34873
+ f"rerouting to {target}"
34874
+ )
34875
+ },
34876
+ )
34877
+ self._append_manager_context(
34878
+ {
34879
+ "role": "system",
34880
+ "content": (
34881
+ "[manager-loop-guard] Repeated identical delegation detected under unchanged progress. "
34882
+ "Do NOT mark the active step completed just because the owner was delegated repeatedly. "
34883
+ "Use a recovery route based on current step evidence and worker todo state."
34884
+ + (f" Active step: {_step_note}." if _step_note else "")
34885
+ + (f" Recovery target: {target}." if target else "")
34886
+ ),
34887
+ "ts": now_ts(),
34888
+ }
34889
+ )
34890
+ self._blackboard_append_section(
34891
+ "execution_logs",
34892
+ "manager",
34893
+ (
34894
+ "manager repeated identical delegation; applied recovery reroute"
34895
+ + (f"\nactive_step: {_step_note}" if _step_note else "")
34896
+ + (f"\nrecovery_target: {target}" if target else "")
34897
+ ),
34898
+ )
32886
34899
  role = self._sanitize_agent_role(target) or "developer"
32887
34900
  self._inject_manager_instruction(
32888
34901
  role,
@@ -32902,13 +34915,6 @@ body{padding:18px}
32902
34915
  media_inputs_pool=media_inputs_pool,
32903
34916
  media_seen_ts_by_role=media_seen_ts_by_role,
32904
34917
  )
32905
- # Sync-mode skill auto-discovery: same mechanism as plan mode's step-completed trigger.
32906
- # Runs on early rounds for developer/explorer. Uses goal_sig dedup — no re-loading if already loaded.
32907
- if role in ("developer", "explorer") and rounds_used <= 2:
32908
- try:
32909
- self._refresh_loaded_skills_for_execution_focus(trigger=f"sync-worker-pre:{role}")
32910
- except Exception:
32911
- pass
32912
34918
  board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
32913
34919
  step = self._multi_agent_turn(
32914
34920
  role,
@@ -32918,49 +34924,24 @@ body{padding:18px}
32918
34924
  self._blackboard_update_from_worker_step(role, step)
32919
34925
  # Post-execution plan step advancement (replaces pre-execution advancement)
32920
34926
  self._post_execution_plan_step_check(route, step if isinstance(step, dict) else {})
32921
- # Sync-mode failure recovery: detect all-tools-failed and inject recovery hint + auto-load debugging skill
32922
- _step_dict = step if isinstance(step, dict) else {}
32923
- _step_results = _step_dict.get("tool_results", []) or []
32924
- if _step_results:
32925
- _sync_err_count = sum(1 for r in _step_results if isinstance(r, dict) and not r.get("ok", False))
32926
- _sync_ok_count = sum(1 for r in _step_results if isinstance(r, dict) and r.get("ok", False))
32927
- if _sync_err_count > 0 and _sync_ok_count == 0:
32928
- # All tool calls failed in this worker turn — inject recovery guidance
32929
- _failed_tools = [str(r.get("name", "")) for r in _step_results if isinstance(r, dict)][:4]
32930
- _err_outputs = " | ".join(
32931
- trim(str(r.get("output", "") or ""), 120)
32932
- for r in _step_results if isinstance(r, dict) and not r.get("ok", False)
32933
- )[:400]
32934
- self._append_agent_context_message(
32935
- role,
34927
+ progress_capsule = self._manager_worker_progress_capsule(
34928
+ role,
34929
+ step if isinstance(step, dict) else {},
34930
+ self._ensure_blackboard(),
34931
+ )
34932
+ if progress_capsule:
34933
+ recent_mgr = self.manager_context[-4:] if isinstance(self.manager_context, list) else []
34934
+ if not any(
34935
+ isinstance(msg, dict) and str(msg.get("content", "") or "").strip() == progress_capsule
34936
+ for msg in recent_mgr
34937
+ ):
34938
+ self._append_manager_context(
32936
34939
  {
32937
- "role": "user",
32938
- "content": (
32939
- "<failure-recovery>"
32940
- f"All tool calls failed in this turn ({', '.join(_failed_tools)}). "
32941
- f"Errors: {_err_outputs}\n"
32942
- "Before retrying, STOP and diagnose:\n"
32943
- "1) If a debugging skill is available, call load_skill('systematic-debugging') and follow its workflow.\n"
32944
- "2) Read the EXACT error message — identify the root cause, not just the symptom.\n"
32945
- "3) Form ONE hypothesis about the cause before making any changes.\n"
32946
- "4) Apply ONE targeted fix, then verify with a test/build command.\n"
32947
- "5) If still blocked after 2 attempts, report the exact blocker to the user."
32948
- "</failure-recovery>"
32949
- ),
34940
+ "role": "system",
34941
+ "content": progress_capsule,
32950
34942
  "ts": now_ts(),
32951
- "agent_role": role,
32952
- },
32953
- mirror_to_global=False,
34943
+ }
32954
34944
  )
32955
- # Auto-load systematic-debugging if failure involves code errors
32956
- _code_err_kw = ("bash", "compile", "syntax", "test", "build", "traceback", "error:")
32957
- if any(kw in _err_outputs.lower() for kw in _code_err_kw):
32958
- _bb_sk = self._ensure_blackboard().get("loaded_skills", {})
32959
- if isinstance(_bb_sk, dict) and "systematic-debugging" not in _bb_sk:
32960
- try:
32961
- self._load_skill_with_cache("systematic-debugging", load_source="auto:sync-worker-failure")
32962
- except Exception:
32963
- pass
32964
34945
  # Fix 6b: Pure sync no-plan — read worker-done signal and notify manager
32965
34946
  _bb_sync = self._ensure_blackboard()
32966
34947
  if _bb_sync.pop("sync_worker_round_done", False):
@@ -33309,17 +35290,19 @@ body{padding:18px}
33309
35290
  bb["plan"]["phase"] = "synthesis"
33310
35291
  self.blackboard = bb
33311
35292
 
33312
- # Synthesis with retry (up to 2 attempts) + minimal fallback
35293
+ # Synthesis with retry + model fallback + deterministic fallback
33313
35294
  proposal = None
33314
- for _synth_attempt in range(2):
35295
+ for _synth_attempt in range(PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS):
33315
35296
  proposal = self._plan_mode_synthesize_proposal(pinned_selection)
33316
35297
  if proposal and proposal.get("options"):
33317
35298
  break
33318
- if _synth_attempt == 0:
35299
+ if _synth_attempt < (PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS - 1):
33319
35300
  self._emit("status", {"summary": "plan-mode: synthesis retry"})
33320
35301
  if not proposal or not proposal.get("options"):
33321
35302
  # Last resort: minimal fallback with simpler prompt and higher token budget
33322
35303
  proposal = self._synthesis_minimal_fallback(pinned_selection)
35304
+ if not proposal or not proposal.get("options"):
35305
+ proposal = self._synthesis_programmatic_fallback()
33323
35306
  if not proposal or not proposal.get("options"):
33324
35307
  self._emit("status", {"summary": "plan-mode: synthesis failed, falling back to direct execution"})
33325
35308
  self.runtime_plan_mode_needed = False
@@ -33725,21 +35708,29 @@ body{padding:18px}
33725
35708
  f"- Option A: Direct workaround — bypass the blocker with an alternative method\n"
33726
35709
  f"- Option B: Different path — re-approach the goal from a completely different angle\n"
33727
35710
  f"- Option C: Minimal viable + user action items — do what's possible now, list what the user needs to do manually\n\n"
33728
- f"Call the submit_plan_proposal tool with:\n"
35711
+ f"You MUST call the submit_plan_proposal tool exactly once with:\n"
33729
35712
  f"- context: brief failure analysis (what was tried, what failed, why)\n"
33730
35713
  f"- options: array of 3 options, each with id (A/B/C), title, summary, steps, pros, cons, risk\n"
33731
- f"- recommended: id of the recommended option\n\n"
35714
+ f"- recommended: id of the recommended option\n"
35715
+ f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n\n"
33732
35716
  f"{model_language_instruction(self.ui_language)}"
33733
35717
  )
33734
35718
  synthesis_ctx = [
33735
- {"role": "system", "content": "You are a recovery planner analyzing execution failures and proposing alternative approaches.", "ts": now_ts()},
35719
+ {
35720
+ "role": "system",
35721
+ "content": (
35722
+ "You are a recovery planner analyzing execution failures and proposing alternative approaches. "
35723
+ "You MUST call submit_plan_proposal exactly once."
35724
+ ),
35725
+ "ts": now_ts(),
35726
+ },
33736
35727
  {"role": "user", "content": synthesis_prompt, "ts": now_ts()},
33737
35728
  ]
33738
35729
  try:
33739
35730
  response = self._chat_with_same_model_retry(
33740
35731
  synthesis_ctx,
33741
35732
  tools=self._plan_mode_synthesis_tools(),
33742
- system="Generate a structured stall recovery plan. Use the submit_plan_proposal tool.",
35733
+ system="Generate a structured stall recovery plan. You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
33743
35734
  max_tokens=STALL_PLAN_SYNTHESIS_MAX_TOKENS,
33744
35735
  think=False,
33745
35736
  stream_thinking=False,
@@ -33748,12 +35739,33 @@ body{padding:18px}
33748
35739
  context_label="stall-plan synthesis",
33749
35740
  retries=MODEL_OUTPUT_RETRY_TIMES,
33750
35741
  )
33751
- tool_calls = response.get("tool_calls", [])
33752
- for tc in tool_calls:
33753
- if tc.get("function", {}).get("name") == "submit_plan_proposal":
33754
- args = tc["function"].get("arguments", {})
33755
- if isinstance(args, dict) and args.get("options"):
33756
- return dict(args)
35742
+ proposal = self._extract_plan_proposal_from_response(response)
35743
+ if proposal.get("options"):
35744
+ return proposal
35745
+ repair_response = self._chat_with_same_model_retry(
35746
+ synthesis_ctx + [
35747
+ {
35748
+ "role": "user",
35749
+ "content": (
35750
+ "Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
35751
+ "Retry now. Output exactly one submit_plan_proposal tool call and no prose."
35752
+ ),
35753
+ "ts": now_ts(),
35754
+ }
35755
+ ],
35756
+ tools=self._plan_mode_synthesis_tools(),
35757
+ system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
35758
+ max_tokens=STALL_PLAN_SYNTHESIS_MAX_TOKENS,
35759
+ think=False,
35760
+ stream_thinking=False,
35761
+ on_thinking_chunk=self._append_live_thinking,
35762
+ pinned_selection=pinned_selection,
35763
+ context_label="stall-plan synthesis repair",
35764
+ retries=1,
35765
+ )
35766
+ proposal = self._extract_plan_proposal_from_response(repair_response)
35767
+ if proposal.get("options"):
35768
+ return proposal
33757
35769
  except Exception as exc:
33758
35770
  self._emit("status", {"summary": f"stall plan synthesis error: {exc}"})
33759
35771
  return {}
@@ -33824,6 +35836,186 @@ body{padding:18px}
33824
35836
  lines.append(f"- {trim(str(t), 100)}")
33825
35837
  return "\n".join(lines)
33826
35838
 
35839
+ def _normalize_plan_proposal_option(self, raw: dict, *, fallback_id: str) -> dict | None:
35840
+ if not isinstance(raw, dict):
35841
+ return None
35842
+ opt_id = trim(str(raw.get("id", "") or fallback_id).strip().upper(), 8) or fallback_id
35843
+ title = trim(str(raw.get("title", "") or "").strip(), 200)
35844
+ summary = trim(str(raw.get("summary", "") or "").strip(), 600)
35845
+ steps_raw = raw.get("steps", [])
35846
+ steps: list[str] = []
35847
+ if isinstance(steps_raw, list):
35848
+ for item in steps_raw:
35849
+ text = normalize_embedded_newlines(str(item or "")).strip()
35850
+ if text:
35851
+ steps.append(trim(text, PLAN_STEP_FULL_CONTENT_MAX_CHARS))
35852
+ elif isinstance(steps_raw, str):
35853
+ text = normalize_embedded_newlines(steps_raw).strip()
35854
+ if text:
35855
+ steps.append(trim(text, PLAN_STEP_FULL_CONTENT_MAX_CHARS))
35856
+ pros = trim(str(raw.get("pros", "") or "").strip(), 400)
35857
+ cons = trim(str(raw.get("cons", "") or "").strip(), 400)
35858
+ risk = trim(str(raw.get("risk", "") or "").strip().lower(), 20)
35859
+ if risk not in {"low", "medium", "high"}:
35860
+ risk = "medium"
35861
+ if not title and summary:
35862
+ title = trim(summary.split("\n", 1)[0], 120)
35863
+ if not title and steps:
35864
+ title = trim(steps[0].split("\n", 1)[0], 120)
35865
+ if not summary and steps:
35866
+ summary = trim(steps[0], 300)
35867
+ if not steps:
35868
+ return None
35869
+ return {
35870
+ "id": opt_id,
35871
+ "title": title or f"Option {opt_id}",
35872
+ "summary": summary or title or f"Plan {opt_id}",
35873
+ "steps": steps,
35874
+ "pros": pros,
35875
+ "cons": cons,
35876
+ "risk": risk,
35877
+ }
35878
+
35879
+ def _normalize_plan_proposal_payload(self, raw: object) -> dict:
35880
+ src = raw if isinstance(raw, dict) else {}
35881
+ context = trim(str(src.get("context", "") or "").strip(), 2000)
35882
+ raw_options = src.get("options", [])
35883
+ if isinstance(raw_options, dict):
35884
+ raw_options = [raw_options]
35885
+ if not isinstance(raw_options, list):
35886
+ raw_options = []
35887
+ option_ids = ("A", "B", "C")
35888
+ options: list[dict] = []
35889
+ seen_ids: set[str] = set()
35890
+ for idx, item in enumerate(raw_options[: max(1, PLAN_MODE_MAX_OPTIONS * 2)]):
35891
+ normalized = self._normalize_plan_proposal_option(
35892
+ item,
35893
+ fallback_id=option_ids[min(idx, len(option_ids) - 1)],
35894
+ )
35895
+ if not normalized:
35896
+ continue
35897
+ opt_id = str(normalized.get("id", "") or "").strip().upper() or option_ids[min(idx, len(option_ids) - 1)]
35898
+ if opt_id in seen_ids:
35899
+ opt_id = option_ids[min(len(seen_ids), len(option_ids) - 1)]
35900
+ normalized["id"] = opt_id
35901
+ if opt_id in seen_ids:
35902
+ continue
35903
+ seen_ids.add(opt_id)
35904
+ options.append(normalized)
35905
+ if len(options) >= PLAN_MODE_MAX_OPTIONS:
35906
+ break
35907
+ recommended = trim(str(src.get("recommended", "") or "").strip().upper(), 8)
35908
+ valid_ids = {str(opt.get("id", "") or "").strip().upper() for opt in options}
35909
+ if recommended not in valid_ids:
35910
+ recommended = str(options[0].get("id", "A") or "A") if options else ""
35911
+ return {
35912
+ "context": context,
35913
+ "options": options,
35914
+ "recommended": recommended,
35915
+ }
35916
+
35917
+ def _parse_plan_proposal_from_text(self, text: str) -> dict:
35918
+ raw = str(text or "").strip()
35919
+ if not raw:
35920
+ return {}
35921
+ candidates: list[str] = [raw]
35922
+ fence_matches = re.findall(r"```(?:json)?\s*([\s\S]*?)```", raw, flags=re.IGNORECASE)
35923
+ for block in fence_matches:
35924
+ block_text = str(block or "").strip()
35925
+ if block_text:
35926
+ candidates.append(block_text)
35927
+ start = raw.find("{")
35928
+ end = raw.rfind("}")
35929
+ if start >= 0 and end > start:
35930
+ candidates.append(raw[start : end + 1].strip())
35931
+ for candidate in candidates:
35932
+ repaired = repair_truncated_json_object(candidate)
35933
+ for probe in [candidate, repaired]:
35934
+ if not probe:
35935
+ continue
35936
+ try:
35937
+ parsed = json.loads(probe)
35938
+ except Exception:
35939
+ continue
35940
+ if isinstance(parsed, list):
35941
+ parsed = {"context": "", "options": parsed, "recommended": ""}
35942
+ proposal = self._normalize_plan_proposal_payload(parsed)
35943
+ if proposal.get("options"):
35944
+ return proposal
35945
+ return {}
35946
+
35947
+ def _extract_plan_proposal_from_response(self, response: dict | None) -> dict:
35948
+ if not isinstance(response, dict):
35949
+ return {}
35950
+ tool_calls = response.get("tool_calls", [])
35951
+ if isinstance(tool_calls, list):
35952
+ for tc in tool_calls:
35953
+ if not isinstance(tc, dict):
35954
+ continue
35955
+ fn = tc.get("function", {}) if isinstance(tc.get("function"), dict) else {}
35956
+ if str(fn.get("name", "") or "").strip() != "submit_plan_proposal":
35957
+ continue
35958
+ args = fn.get("arguments", {})
35959
+ if isinstance(args, dict):
35960
+ proposal = self._normalize_plan_proposal_payload(args)
35961
+ if proposal.get("options"):
35962
+ return proposal
35963
+ elif isinstance(args, str):
35964
+ parsed, _ = parse_tool_arguments_with_error(args)
35965
+ proposal = self._normalize_plan_proposal_payload(parsed)
35966
+ if proposal.get("options"):
35967
+ return proposal
35968
+ return self._parse_plan_proposal_from_text(str(response.get("content", "") or ""))
35969
+
35970
+ def _synthesis_programmatic_fallback(self) -> dict:
35971
+ bb = self._ensure_blackboard()
35972
+ goal = trim(str(self.runtime_reclassify_goal or self._latest_user_goal_text() or ""), 1200)
35973
+ findings = bb.get("plan", {}).get("findings", []) if isinstance(bb.get("plan"), dict) else []
35974
+ finding_lines: list[str] = []
35975
+ for row in findings[:6]:
35976
+ if not isinstance(row, dict):
35977
+ continue
35978
+ content = trim(str(row.get("content", "") or "").strip(), 280)
35979
+ if content:
35980
+ finding_lines.append(content)
35981
+ context = trim(
35982
+ (
35983
+ "Fallback synthesis generated automatically from the user goal and current research findings. "
35984
+ + (" | ".join(finding_lines[:3]) if finding_lines else goal)
35985
+ ),
35986
+ 1800,
35987
+ )
35988
+ detailed_steps = [
35989
+ "1. Scope and constraints\nClarify the exact deliverable, inputs, and acceptance criteria for this task.",
35990
+ "2. Core implementation\nBuild the main artifact for the request using the most direct workable path.",
35991
+ "3. Verification\nRun at least one observable validation and capture the result.",
35992
+ "4. Delivery report\nSummarize what was built, how to run it, and the key outputs.",
35993
+ ]
35994
+ if finding_lines:
35995
+ detailed_steps = [
35996
+ "1. Review findings and lock scope\nUse the collected findings to define the exact execution boundary and required inputs.",
35997
+ "2. Prepare files and dependencies\nCreate or align the necessary files, folders, and runtime prerequisites for the task.",
35998
+ "3. Implement the main work\nExecute the core build/change/generation work for the requested output.",
35999
+ "4. Validate with observable evidence\nRun a concrete check and confirm the expected output, exit code, or rendered result.",
36000
+ "5. Generate delivery report\nSummarize what was built, how to run it, and the key outputs.",
36001
+ ]
36002
+ proposal = {
36003
+ "context": context,
36004
+ "options": [
36005
+ {
36006
+ "id": "A",
36007
+ "title": "Direct Execution Plan",
36008
+ "summary": trim(goal or "Execute the requested task with a direct, verifiable plan.", 240),
36009
+ "steps": detailed_steps,
36010
+ "pros": "Deterministic fallback that keeps plan-mode available even when model synthesis formatting is unstable.",
36011
+ "cons": "Less tailored than a fully synthesized multi-option proposal.",
36012
+ "risk": "medium",
36013
+ }
36014
+ ],
36015
+ "recommended": "A",
36016
+ }
36017
+ return self._normalize_plan_proposal_payload(proposal)
36018
+
33827
36019
  def _plan_mode_synthesize_proposal(self, pinned_selection: str) -> dict:
33828
36020
  bb = self._ensure_blackboard()
33829
36021
  plan_data = bb.get("plan", {})
@@ -33859,10 +36051,11 @@ body{padding:18px}
33859
36051
  f"## Research Findings\n{trim(findings_text, 6000)}\n\n"
33860
36052
  f"{skills_section}"
33861
36053
  f"## Instructions\n"
33862
- f"Call the submit_plan_proposal tool with:\n"
36054
+ f"You MUST call the submit_plan_proposal tool exactly once with:\n"
33863
36055
  f"- context: brief background analysis\n"
33864
36056
  f"- options: array of 1-{PLAN_MODE_MAX_OPTIONS} options, each with id (A/B/C), title, summary, steps, pros, cons, risk\n"
33865
- f"- recommended: id of the recommended option\n\n"
36057
+ f"- recommended: id of the recommended option\n"
36058
+ f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n\n"
33866
36059
  f"STEP QUALITY REQUIREMENTS:\n"
33867
36060
  f"- Each step must be a concrete, actionable instruction (NOT vague like 'analyze reports')\n"
33868
36061
  f"- Include specific file paths (e.g., 'Read uploaded/IEDM_.parsed.md to extract key findings')\n"
@@ -33934,7 +36127,11 @@ body{padding:18px}
33934
36127
  response = self._chat_with_same_model_retry(
33935
36128
  synthesis_ctx,
33936
36129
  tools=self._plan_mode_synthesis_tools(),
33937
- system="Generate a structured plan proposal. Use the submit_plan_proposal tool.",
36130
+ system=(
36131
+ "Generate a structured plan proposal. "
36132
+ "You MUST call submit_plan_proposal exactly once. "
36133
+ "Do not answer with plain text."
36134
+ ),
33938
36135
  max_tokens=PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS,
33939
36136
  think=False,
33940
36137
  stream_thinking=False,
@@ -33943,13 +36140,31 @@ body{padding:18px}
33943
36140
  context_label="plan-mode synthesis",
33944
36141
  retries=MODEL_OUTPUT_RETRY_TIMES,
33945
36142
  )
33946
- tool_calls = response.get("tool_calls", [])
33947
- for tc in tool_calls:
33948
- if tc.get("function", {}).get("name") == "submit_plan_proposal":
33949
- args = tc["function"].get("arguments", {})
33950
- if isinstance(args, dict) and args.get("options"):
33951
- return dict(args)
33952
- return {}
36143
+ proposal = self._extract_plan_proposal_from_response(response)
36144
+ if proposal.get("options"):
36145
+ return proposal
36146
+ repair_response = self._chat_with_same_model_retry(
36147
+ synthesis_ctx + [
36148
+ {
36149
+ "role": "user",
36150
+ "content": (
36151
+ "Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
36152
+ "Retry now. Output exactly one submit_plan_proposal tool call and no prose."
36153
+ ),
36154
+ "ts": now_ts(),
36155
+ }
36156
+ ],
36157
+ tools=self._plan_mode_synthesis_tools(),
36158
+ system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
36159
+ max_tokens=PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS,
36160
+ think=False,
36161
+ stream_thinking=False,
36162
+ on_thinking_chunk=self._append_live_thinking,
36163
+ pinned_selection=pinned_selection,
36164
+ context_label="plan-mode synthesis repair",
36165
+ retries=1,
36166
+ )
36167
+ return self._extract_plan_proposal_from_response(repair_response)
33953
36168
 
33954
36169
  def _synthesis_minimal_fallback(self, pinned_selection: str) -> dict:
33955
36170
  """Last-resort: ask model for a single simple plan with higher max_tokens."""
@@ -33961,33 +36176,63 @@ body{padding:18px}
33961
36176
  for f in (findings[:5] if isinstance(findings, list) else [])
33962
36177
  )
33963
36178
  prompt = (
33964
- f"Generate ONE simple plan for this task. Call submit_plan_proposal with exactly 1 option.\n\n"
36179
+ f"Generate ONE simple plan for this task. You MUST call submit_plan_proposal with exactly 1 option.\n\n"
33965
36180
  f"Task: {goal}\n\nFindings: {trim(findings_text, 3000)}\n\n"
33966
36181
  f"Return a single option with id='A', title, summary, and 5-10 concrete steps.\n"
36182
+ f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n"
33967
36183
  f"{model_language_instruction(self.ui_language)}"
33968
36184
  )
33969
36185
  ctx = [
33970
- {"role": "system", "content": "You must call submit_plan_proposal tool.", "ts": now_ts()},
36186
+ {
36187
+ "role": "system",
36188
+ "content": (
36189
+ "You MUST call submit_plan_proposal exactly once. "
36190
+ "Do not answer with plain text."
36191
+ ),
36192
+ "ts": now_ts(),
36193
+ },
33971
36194
  {"role": "user", "content": prompt, "ts": now_ts()},
33972
36195
  ]
33973
36196
  try:
33974
36197
  response = self._chat_with_same_model_retry(
33975
36198
  ctx,
33976
36199
  tools=self._plan_mode_synthesis_tools(),
33977
- system="Call submit_plan_proposal now.",
36200
+ system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
33978
36201
  max_tokens=6000,
33979
36202
  think=False,
33980
36203
  stream_thinking=False,
33981
36204
  on_thinking_chunk=self._append_live_thinking,
33982
36205
  pinned_selection=pinned_selection,
33983
36206
  context_label="plan-mode minimal fallback",
33984
- retries=2,
36207
+ retries=3,
36208
+ )
36209
+ proposal = self._extract_plan_proposal_from_response(response)
36210
+ if proposal.get("options"):
36211
+ return proposal
36212
+ repair_response = self._chat_with_same_model_retry(
36213
+ ctx + [
36214
+ {
36215
+ "role": "user",
36216
+ "content": (
36217
+ "Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
36218
+ "Retry now. Output exactly one submit_plan_proposal tool call and no prose."
36219
+ ),
36220
+ "ts": now_ts(),
36221
+ }
36222
+ ],
36223
+ tools=self._plan_mode_synthesis_tools(),
36224
+ system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
36225
+ max_tokens=6000,
36226
+ think=False,
36227
+ stream_thinking=False,
36228
+ on_thinking_chunk=self._append_live_thinking,
36229
+ pinned_selection=pinned_selection,
36230
+ context_label="plan-mode minimal fallback repair",
36231
+ retries=1,
33985
36232
  )
33986
- for tc in response.get("tool_calls", []):
33987
- if tc.get("function", {}).get("name") == "submit_plan_proposal":
33988
- args = tc["function"].get("arguments", {})
33989
- if isinstance(args, dict) and args.get("options"):
33990
- return dict(args)
36233
+ proposal = self._extract_plan_proposal_from_response(repair_response)
36234
+ if proposal.get("options"):
36235
+ return proposal
33991
36236
  except Exception:
33992
36237
  pass
33993
36238
  return {}
@@ -34071,7 +36316,7 @@ body{padding:18px}
34071
36316
  grouped_steps = self._group_plan_steps(raw_steps if isinstance(raw_steps, list) else [])
34072
36317
  plan_todos: list[dict] = []
34073
36318
  for i, step in enumerate(grouped_steps[:max(1, int(limit))]):
34074
- step_text = trim(str(step or "").strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
36319
+ step_text = trim(normalize_embedded_newlines(step).strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
34075
36320
  if not step_text:
34076
36321
  continue
34077
36322
  step_lines = step_text.split("\n")
@@ -34085,6 +36330,7 @@ body{padding:18px}
34085
36330
  "category": "plan_step",
34086
36331
  "plan_step_index": i,
34087
36332
  "created_at": float(now_ts()),
36333
+ "activated_at": float(now_ts()) if not plan_todos else None,
34088
36334
  "completed_at": None,
34089
36335
  "completed_by": "",
34090
36336
  "evidence": "",
@@ -34197,7 +36443,7 @@ body{padding:18px}
34197
36443
  _mid_re_exec = _re_exec.compile(r"(?<=\S)\s+(\d+\.\d+\s)")
34198
36444
  for t in plan_todos:
34199
36445
  idx = int(t.get("plan_step_index", 0) or 0) + 1
34200
- full = str(t.get("full_content", "") or t.get("content", "")).strip()
36446
+ full = normalize_embedded_newlines(t.get("full_content", "") or t.get("content", "")).strip()
34201
36447
  # Normalize: split concatenated N.N sub-steps onto own lines
34202
36448
  full = _mid_re_exec.sub(r"\n\1", full)
34203
36449
  header = full.split("\n")[0] if "\n" in full else full
@@ -34359,7 +36605,7 @@ body{padding:18px}
34359
36605
  # Phase 0: Normalize — split mid-string N.N onto own lines
34360
36606
  normalized: list[str] = []
34361
36607
  for s in raw_steps:
34362
- text = str(s or "").strip()
36608
+ text = normalize_embedded_newlines(s).strip()
34363
36609
  if not text:
34364
36610
  continue
34365
36611
  fixed = mid_numbered_re.sub(r"\n\1", text)
@@ -34694,18 +36940,16 @@ body{padding:18px}
34694
36940
  chosen_title = trim(str(chosen.get("title", "") or choice_id).strip(), 800)
34695
36941
  chosen_summary = trim(str(chosen.get("summary", "") or "").strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
34696
36942
  # Preserve current complexity unless the user explicitly changes it elsewhere.
34697
- _current_complexity = trim(
34698
- str(
34699
- self.runtime_task_complexity
34700
- or profile.get("complexity", judgement.get("complexity", ""))
34701
- or ""
34702
- ).strip().lower(),
34703
- 20,
36943
+ _current_complexity = normalize_task_complexity(
36944
+ self.runtime_task_complexity
36945
+ or profile.get("complexity", judgement.get("complexity", ""))
36946
+ or "",
36947
+ default="",
34704
36948
  )
34705
36949
  if _current_complexity in TASK_COMPLEXITY_LEVELS:
34706
36950
  self.runtime_task_complexity = _current_complexity
34707
36951
  else:
34708
- _current_complexity = trim(str(self.runtime_task_complexity or "").strip().lower(), 20)
36952
+ _current_complexity = normalize_task_complexity(str(self.runtime_task_complexity or "").strip().lower(), default="")
34709
36953
  self.runtime_complexity_floor = str(_current_complexity or "complex")
34710
36954
  _plan_risk = self._resolve_plan_option_risk(chosen)
34711
36955
  try:
@@ -35007,13 +37251,6 @@ body{padding:18px}
35007
37251
  self.agent_round_index = int(self.agent_round_index) + 1
35008
37252
  self.current_phase = "model-call"
35009
37253
  self.current_tool_name = ""
35010
- # Single-mode skill auto-discovery: same as plan mode. Runs on first 2 rounds only.
35011
- # Uses goal_sig dedup — if skills already loaded for this goal, no-op.
35012
- if int(self.agent_round_index) <= 2:
35013
- try:
35014
- self._refresh_loaded_skills_for_execution_focus(trigger="single-worker-pre")
35015
- except Exception:
35016
- pass
35017
37254
  if level_budget > 0 and int(self.agent_round_index) > int(level_budget):
35018
37255
  force_single_tool_rounds = max(force_single_tool_rounds, 2)
35019
37256
  if not compact_budget_notified:
@@ -35264,7 +37501,7 @@ body{padding:18px}
35264
37501
  )
35265
37502
  continue
35266
37503
  stop_note = (
35267
- "模型连续多轮仅输出思考而无动作,自动执行已熔断停止(fault_counter>=3)。"
37504
+ "模型连续多轮仅输出思考而无动作,自动执行已熔断停止(fault_counter>=15)。"
35268
37505
  "请尝试拆分任务,或切换更强的推理模型后继续。"
35269
37506
  )
35270
37507
  raise CircuitBreakerTriggered(stop_note)
@@ -35611,6 +37848,7 @@ body{padding:18px}
35611
37848
  self.current_phase = f"tool:{name}"
35612
37849
  self.current_tool_name = name
35613
37850
  round_tool_names.append(name)
37851
+ todo_rows_before = self.todo.snapshot() if name in {"TodoWrite", "TodoWriteRescue"} else None
35614
37852
  args = tc["function"]["arguments"]
35615
37853
  args_error = str(tc.get("args_error", "") or "").strip()
35616
37854
  raw_args = tc.get("raw_arguments")
@@ -35775,15 +38013,41 @@ body{padding:18px}
35775
38013
  recovery_retry_rounds = 0
35776
38014
  if dispatched_name in {"TodoWrite", "TodoWriteRescue"}:
35777
38015
  todo_attempted = True
35778
- state, reason = self._analyze_todo_result(dispatched_name, output)
38016
+ todo_rows_after = self.todo.snapshot()
38017
+ state, reason = self._analyze_todo_result(
38018
+ dispatched_name,
38019
+ output,
38020
+ before_rows=todo_rows_before,
38021
+ after_rows=todo_rows_after,
38022
+ )
35779
38023
  if state == "ok":
35780
38024
  used_todo = True
35781
38025
  self.todo_write_issue_count = 0
35782
38026
  self.todo_last_issue = ""
38027
+ self._emit(
38028
+ "status",
38029
+ {"summary": f"todo updated ({trim(reason, 100)})"},
38030
+ )
35783
38031
  else:
35784
38032
  self.todo_write_issue_count += 1
35785
38033
  self.todo_last_issue = reason
35786
- if self.todo_write_issue_count >= 2 and not self._todo_runtime_has_worker_rows(single_role):
38034
+ self._emit(
38035
+ "status",
38036
+ {
38037
+ "summary": (
38038
+ "todo update produced no progress "
38039
+ f"({trim(reason, 100)})"
38040
+ )
38041
+ },
38042
+ )
38043
+ repeat_no_progress = any(
38044
+ token in str(reason or "").lower()
38045
+ for token in ("repeated", "no progress", "without changing")
38046
+ )
38047
+ if self.todo_write_issue_count >= 2 and (
38048
+ not self._todo_runtime_has_worker_rows(single_role)
38049
+ or repeat_no_progress
38050
+ ):
35787
38051
  self._emit(
35788
38052
  "status",
35789
38053
  {
@@ -36118,6 +38382,22 @@ body{padding:18px}
36118
38382
  self.rounds_without_todo += 1
36119
38383
  else:
36120
38384
  self.rounds_without_todo += 1
38385
+ concrete_work_without_todo = (
38386
+ not used_todo
38387
+ and self._todo_runtime_has_worker_rows(single_role)
38388
+ and any(
38389
+ isinstance(r, dict)
38390
+ and r.get("ok", False)
38391
+ and str(r.get("name", "") or "") in {
38392
+ "write_file",
38393
+ "edit_file",
38394
+ "bash",
38395
+ "read_file",
38396
+ "write_to_blackboard",
38397
+ }
38398
+ for r in single_round_tool_results
38399
+ )
38400
+ )
36121
38401
  if (
36122
38402
  todo_attempted
36123
38403
  and not used_todo
@@ -36142,18 +38422,25 @@ body{padding:18px}
36142
38422
  now_tick = now_ts()
36143
38423
  can_remind = (now_tick - self.last_todo_reminder_ts) >= 20
36144
38424
  if can_remind and self.todo_reminder_count < 2:
36145
- if not self._todo_runtime_has_worker_rows(single_role) and self.rounds_without_todo >= 2:
36146
- self.messages.append(
36147
- {
36148
- "role": "user",
36149
- "content": "<reminder>Please call TodoWrite now to update the current subtask before continuing. If it fails/repeats, switch to TodoWriteRescue.</reminder>",
36150
- "ts": now_tick,
36151
- }
38425
+ if concrete_work_without_todo:
38426
+ self._append_plan_guidance_bubble(
38427
+ "<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>",
38428
+ summary="todo reminder",
38429
+ )
38430
+ self.last_todo_reminder_ts = now_tick
38431
+ self.todo_reminder_count += 1
38432
+ elif not self._todo_runtime_has_worker_rows(single_role) and self.rounds_without_todo >= 2:
38433
+ self._append_plan_guidance_bubble(
38434
+ "<reminder>Please call TodoWrite now to update the current subtask before continuing. If it fails/repeats, switch to TodoWriteRescue.</reminder>",
38435
+ summary="todo reminder",
36152
38436
  )
36153
38437
  self.last_todo_reminder_ts = now_tick
36154
38438
  self.todo_reminder_count += 1
36155
38439
  elif self._todo_should_block_auto_continue("") and self.rounds_without_todo >= 4:
36156
- self.messages.append({"role": "user", "content": "<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>", "ts": now_tick})
38440
+ self._append_plan_guidance_bubble(
38441
+ "<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>",
38442
+ summary="todo reminder",
38443
+ )
36157
38444
  self.last_todo_reminder_ts = now_tick
36158
38445
  self.todo_reminder_count += 1
36159
38446
  if manual_compact:
@@ -36247,6 +38534,12 @@ body{padding:18px}
36247
38534
  self._generate_run_completion_summary()
36248
38535
  except Exception:
36249
38536
  pass
38537
+ try:
38538
+ _applied_runtime_updates = self._apply_deferred_runtime_updates()
38539
+ for _note in _applied_runtime_updates[:6]:
38540
+ self._emit("status", {"summary": _note})
38541
+ except Exception:
38542
+ pass
36250
38543
  self._emit("status", {"summary": "run finished"})
36251
38544
  cb = self.run_finished_callback
36252
38545
  if cb:
@@ -36525,6 +38818,7 @@ body{padding:18px}
36525
38818
  "live_run_notice_elapsed": round(float(self.live_run_notice_elapsed or 0.0), 1),
36526
38819
  "max_agent_rounds": int(self.max_agent_rounds),
36527
38820
  "max_run_seconds": int(self.max_run_seconds),
38821
+ "shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
36528
38822
  "auto_model_switch": bool(self.auto_model_switch),
36529
38823
  "arbiter_enabled": bool(self.arbiter_enabled),
36530
38824
  "arbiter_model": str(self.arbiter_model or ""),
@@ -36704,6 +38998,7 @@ class SessionManager:
36704
38998
  context_limit_locked: bool = False,
36705
38999
  max_rounds: int = MAX_AGENT_ROUNDS,
36706
39000
  max_run_seconds: int = MAX_RUN_SECONDS,
39001
+ shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
36707
39002
  auto_model_switch: bool = False,
36708
39003
  arbiter_enabled: bool = True,
36709
39004
  arbiter_model: str = "",
@@ -36749,6 +39044,12 @@ class SessionManager:
36749
39044
  maximum=MAX_RUN_TIMEOUT_SECONDS,
36750
39045
  fallback=MAX_RUN_SECONDS,
36751
39046
  )
39047
+ self.shell_command_timeout_seconds = normalize_timeout_seconds(
39048
+ shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
39049
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
39050
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
39051
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
39052
+ )
36752
39053
  self.auto_model_switch = bool(auto_model_switch)
36753
39054
  self.arbiter_enabled = bool(arbiter_enabled)
36754
39055
  self.arbiter_model = str(arbiter_model or "").strip()
@@ -37031,6 +39332,12 @@ class SessionManager:
37031
39332
  )
37032
39333
  sess.execution_mode = normalize_execution_mode(self.execution_mode, default=EXECUTION_MODE_SYNC)
37033
39334
  sess.single_advance_prompt_enhance = bool(self.single_advance_prompt_enhance)
39335
+ sess.shell_command_timeout_seconds = normalize_timeout_seconds(
39336
+ self.shell_command_timeout_seconds,
39337
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
39338
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
39339
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
39340
+ )
37034
39341
  sess._apply_active_profile()
37035
39342
  sess.updated_at = now_ts()
37036
39343
  sess._persist()
@@ -37091,6 +39398,7 @@ class SessionManager:
37091
39398
  context_limit_locked=self.context_limit_locked,
37092
39399
  max_rounds=self.max_rounds,
37093
39400
  max_run_seconds=self.max_run_seconds,
39401
+ shell_command_timeout_seconds=self.shell_command_timeout_seconds,
37094
39402
  auto_model_switch=self.auto_model_switch,
37095
39403
  arbiter_enabled=self.arbiter_enabled,
37096
39404
  arbiter_model=self.arbiter_model,
@@ -37140,6 +39448,7 @@ class SessionManager:
37140
39448
  context_limit_locked=self.context_limit_locked,
37141
39449
  max_rounds=self.max_rounds,
37142
39450
  max_run_seconds=self.max_run_seconds,
39451
+ shell_command_timeout_seconds=self.shell_command_timeout_seconds,
37143
39452
  auto_model_switch=self.auto_model_switch,
37144
39453
  arbiter_enabled=self.arbiter_enabled,
37145
39454
  arbiter_model=self.arbiter_model,
@@ -38188,7 +40497,7 @@ function renderLlmFields(provider){const container=E('llmFieldsContainer');if(!c
38188
40497
  async function scanOllamaModels(){const urlEl=E('llmF_ollama_url');const sel=E('llmF_ollama_model');const hint=E('ollamaScanHint');const baseUrl=(urlEl?.value||'').trim()||'http://127.0.0.1:11434';if(hint)hint.textContent=t('llm_scanning');try{const res=await fetch('/api/ollama/models?base_url='+encodeURIComponent(baseUrl));const data=await res.json();if(!data.ok||!data.models?.length){if(hint)hint.textContent=t('llm_scan_empty')+(data.error?' ('+data.error+')':'');return}if(sel){sel.innerHTML='';for(const m of data.models){const op=document.createElement('option');op.value=m;op.textContent=m;sel.appendChild(op)}}if(hint)hint.textContent=t('llm_scan_found').replace('{n}',String(data.models.length))}catch(err){if(hint)hint.textContent=t('llm_scan_error')+': '+(err.message||String(err))}}
38189
40498
  async function scanOpenAICompatModels(provider){const scanMap={openai_compat:{urlKey:'openai_url',modelKey:'openai_model',keyKey:'openai_key',defaultUrl:'https://api.openai.com/v1'},siliconflow:{urlKey:'siliconflow_url',modelKey:'siliconflow_model',keyKey:'siliconflow_key',defaultUrl:'https://api.siliconflow.cn/v1'},vllm:{urlKey:'vllm_url',modelKey:'vllm_model',keyKey:'vllm_key',defaultUrl:'http://localhost:8000/v1'},lmstudio:{urlKey:'lmstudio_url',modelKey:'lmstudio_model',keyKey:'lmstudio_key',defaultUrl:'http://localhost:1234/v1'},glm:{urlKey:'glm_url',modelKey:'glm_model',keyKey:'glm_key',defaultUrl:'https://open.bigmodel.cn/api/paas/v4'},kimi:{urlKey:'kimi_url',modelKey:'kimi_model',keyKey:'kimi_key',defaultUrl:'https://api.moonshot.cn/v1'},openrouter:{urlKey:'openrouter_url',modelKey:'openrouter_model',keyKey:'openrouter_key',defaultUrl:'https://openrouter.ai/api/v1'},custom_http:{urlKey:'custom_url',modelKey:'custom_model',keyKey:'custom_key',defaultUrl:''}};const normalizedProvider=String(provider||'openai_compat').trim()||'openai_compat';const meta=scanMap[normalizedProvider]||scanMap.openai_compat;const urlEl=E('llmF_'+meta.urlKey);const modelEl=E('llmF_'+meta.modelKey);const hint=E('localScanHint');const baseUrl=(urlEl?.value||'').trim()||meta.defaultUrl||'';const apiKey=(E('llmF_'+meta.keyKey)?.value||'').trim();if(hint)hint.textContent=t('llm_scanning');try{let url='/api/openai_compat/models?provider='+encodeURIComponent(normalizedProvider)+'&base_url='+encodeURIComponent(baseUrl);if(apiKey)url+='&api_key='+encodeURIComponent(apiKey);const res=await fetch(url);const data=await res.json();const models=Array.isArray(data.models)?data.models.filter(Boolean):[];if(!data.ok){if(hint)hint.textContent=t('llm_scan_error')+(data.error?' ('+data.error+')':'');return}if(models.length){if(modelEl&&!String(modelEl.value||'').trim())modelEl.value=models[0];if(hint)hint.textContent=t('llm_scan_found').replace('{n}',String(models.length))+': '+models.slice(0,3).join(', ');return}if(data.reachable){if(hint)hint.textContent=t('llm_scan_reachable_manual')+(data.error?' ('+data.error+')':'');return}if(hint)hint.textContent=t('llm_scan_empty')+(data.error?' ('+data.error+')':'')}catch(err){if(hint)hint.textContent=t('llm_scan_error')+': '+(err.message||String(err))}}
38190
40499
  function collectLlmConfig(){const provider=E('llmProvider')?.value||'ollama';const config={provider:provider};if(provider==='ollama'){config.ollama_url=(E('llmF_ollama_url')?.value||'').trim()||'http://127.0.0.1:11434';config.ollama_model=E('llmF_ollama_model')?.value||''}else if(provider==='custom_http'){const fields=LLM_PROVIDER_FIELDS.custom_http;for(const f of fields){const el=E('llmF_'+f.key);if(!el)continue;if(f.type==='textarea'){config[f.key]=el.value.trim()}else if(f.key==='temperature'){const v=parseFloat(el.value);if(!isNaN(v))config[f.key]=v}else if(f.key==='request_timeout'){const v=parseInt(el.value,10);if(!isNaN(v)&&v>0)config[f.key]=v}else{config[f.key]=el.value.trim()}}}else{const fields=LLM_PROVIDER_FIELDS[provider]||[];for(const f of fields){const el=E('llmF_'+f.key);if(el){const raw=el.value.trim();config[f.key]=(provider!=='custom_http'&&f.type==='url')?(raw||String(f.placeholder||'').trim()):raw}}}config.thinking_stream=E('llmF_thinking_stream')?.value==='true';return config}
38191
- async function submitLlmConfig(){if(!S.activeId){showError(t('select_session_first'));return}const config=collectLlmConfig();try{const payload={filename:'LLM.config.json',mime:'application/json',content_b64:btoa(unescape(encodeURIComponent(JSON.stringify(config,null,2))))};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'))}else{showError('')}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true});E('llmConfigModal').style.display='none'}catch(err){showError(err.message||String(err))}}
40500
+ async function submitLlmConfig(){if(!S.activeId){showError(t('select_session_first'));return}const config=collectLlmConfig();try{const payload={filename:'LLM.config.json',mime:'application/json',content_b64:btoa(unescape(encodeURIComponent(JSON.stringify(config,null,2))))};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});const note=String(out?.note||out?.model_catalog?.note||'').trim();if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'))}else if(note){showError(note)}else{showError('')}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true});E('llmConfigModal').style.display='none'}catch(err){showError(err.message||String(err))}}
38192
40501
  function openLlmConfigModal(){const modal=E('llmConfigModal');if(!modal)return;modal.style.display='flex';const prov=E('llmProvider');if(prov){renderLlmFields(prov.value)}}
38193
40502
  const COMPACT_AUTO_REFRESH_COUNT=3;
38194
40503
  const COMPACT_AUTO_REFRESH_INTERVAL_MS=260;
@@ -38761,7 +41070,7 @@ function feedSignature(snap){const feed=Array.isArray(snap?.conversation_feed)?s
38761
41070
  function boardsSignature(snap){return [snap?.running?1:0,snap?.agent_phase||'',Number(snap?.agent_round_index||0),Number(snap?.queued_user_inputs_count||0),Number(snap?.truncation_count||0),Number(snap?.live_truncation_attempts||0),Number(snap?.live_truncation_tokens||0),snap?.live_truncation_active?1:0,Number(snap?.context_tokens_estimate||0),Number(snap?.context_left_tokens||0),Number(snap?.context_left_percent||0),Number(snap?.render_bridge?.seq||0),(snap?.todos||[]).length,(snap?.tasks||[]).length,(snap?.activity||[]).length,(snap?.operations||[]).length,(snap?.uploads||[]).length].join('|')}
38762
41071
  function sessionsSignature(list){const rows=Array.isArray(list)?list:[];const sig=tailSig(rows,6,row=>`${String(row?.id||'')}:${row?.running?1:0}:${Number(row?.message_count||0)}:${Number(row?.updated_at||0)}`);const aid=String(S.activeId||'').trim();let activeSig='-';if(aid){const activeRow=rows.find(row=>String(row?.id||'')===aid);if(activeRow){activeSig=`${aid}:${activeRow?.running?1:0}:${Number(activeRow?.message_count||0)}:${Number(activeRow?.updated_at||0)}`}else{activeSig=`missing:${aid}`}}return `${rows.length}|active=${activeSig}|${sig}`}
38763
41072
  function _statInfinite(n){const v=Number(n);return(Number.isFinite(v)&&v>0)?String(v):'∞'}
38764
- function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
41073
+ function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'shell_command_timeout_seconds'))S.config.shell_command_timeout_seconds=cfg.shell_command_timeout_seconds;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
38765
41074
  function renderStats(){const sessions=S.sessions.length;const running=S.sessions.filter(x=>x.running).length;const msgs=S.sessions.reduce((n,x)=>n+x.message_count,0);const model=S.config?.model||'-';const sched=(S.config&&typeof S.config.scheduler==='object')?S.config.scheduler:{};const quota=(S.config&&typeof S.config.session_creation_limit==='object')?S.config.session_creation_limit:{};const runningTotal=Math.max(0,Number(sched?.running_total||0));const maxTasks=Number(sched?.max_user||0);const globalTasks=`${runningTotal}/${_statInfinite(maxTasks)}`;const dailySessions=(quota&&quota.enabled)?`${Math.max(0,Number(quota.used||0))}/${Math.max(0,Number(quota.limit||0))}`:'∞';const compact=[[t('stat_sessions'),sessions],[t('stat_running'),running],[t('stat_messages'),msgs],[t('stat_global_tasks'),globalTasks],[t('stat_daily_sessions'),dailySessions]].map(([k,v])=>`<div class=\"stat compact\"><div class=\"k\">${esc(k)}</div><div class=\"v\">${esc(v)}</div></div>`).join('');const modelHtml=`<div class=\"stat model\"><div class=\"k\">${esc(t('stat_model'))}</div><div class=\"v\">${esc(model)}</div></div>`;E('topStats').innerHTML=`<div class=\"top-stats-primary\">${compact}</div><div class=\"top-stats-model\">${modelHtml}</div>`}
38766
41075
  function renderSessions(){const html=S.sessions.map(s=>`<div class=\"session-item${s.id===S.activeId?' active':''}\" data-id=\"${esc(s.id)}\"><div><strong>${esc(s.title)}</strong></div><div class=\"mono\">${s.running?t('running'):t('idle')} · ${s.message_count} msgs</div></div>`).join('');setPanelHtml('sessionList',html||`<div class=\"mono\">${esc(t('no_sessions'))}</div>`);for(const el of document.querySelectorAll('#sessionList .session-item')){el.onclick=()=>selectSession(el.getAttribute('data-id'))}}
38767
41076
  function _syncActiveSessionSummaryFromSnapshot(){const sid=String(S.activeId||'').trim();const snap=S.snap;if(!sid||!snap)return false;const rows=Array.isArray(S.sessions)?S.sessions.slice():[];let idx=rows.findIndex(row=>String(row?.id||'')===sid);const running=!!snap?.running;let updatedAt=Number(snap?.updated_at||0);if(!Number.isFinite(updatedAt)||updatedAt<=0){updatedAt=(Date.now()/1000)}let msgCount=Number(snap?.message_count);if(!Number.isFinite(msgCount)||msgCount<0){const arr=Array.isArray(snap?.messages)?snap.messages:[];let cnt=0;for(const row of arr){if(String(row?.role||'').trim()==='tool')continue;cnt+=1}msgCount=cnt}msgCount=Math.max(0,Math.floor(Number(msgCount)||0));const title=String(snap?.title||'').trim();if(idx<0){rows.push({id:sid,title:title||sid,running:running,updated_at:updatedAt,message_count:msgCount});idx=rows.length-1}else{const cur=rows[idx]||{};const next={...cur};let changed=false;if(!!cur.running!==running){next.running=running;changed=true}if(Number(cur.message_count||0)!==msgCount){next.message_count=msgCount;changed=true}if(Number(cur.updated_at||0)!==updatedAt){next.updated_at=updatedAt;changed=true}if(title&&String(cur.title||'')!==title){next.title=title;changed=true}if(!changed)return false;rows[idx]=next}rows.sort((a,b)=>Number(b?.updated_at||0)-Number(a?.updated_at||0));S.sessions=rows;return true}
@@ -40222,8 +42531,7 @@ function _chatVirtBuildMessageNode(m){
40222
42531
  const pillsHtml=pills.map(x=>`<span class=\"manager-delegate-pill\">${esc(String(x))}</span>`).join('');
40223
42532
  const routeHtml=`<div class=\"manager-delegate-route\"><span class=\"agent-bus-pill manager\">${esc(t('role_manager'))}</span><span class=\"agent-bus-arrow\">→</span><span class=\"agent-bus-pill${targetRole?(' '+targetRole):''}\">${esc(targetLabel)}</span></div>`;
40224
42533
  const objectiveHtml=(objective&&instruction&&objective.toLowerCase()===instruction.toLowerCase())?'':(objective?`<div class=\"manager-delegate-line\"><span>${esc(t('event_objective'))}</span><div>${esc(objective)}</div></div>`:'');
40225
- const instructionKey=`${String(m._vk||'')}:manager-instruction`;
40226
- const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div class=\"msg-md\">${renderMarkdownCached(instruction,instructionKey)}</div></div>`:'';
42534
+ const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div>${esc(instruction)}</div></div>`:'';
40227
42535
  d.innerHTML=`${roleBadge}<div class=\"manager-delegate-card\"><div class=\"manager-delegate-head\">${esc(t('event_manager_delegate_title'))}</div>${routeHtml}<div class=\"manager-delegate-pills\">${pillsHtml}</div>${objectiveHtml}${instructionHtml}</div>`;
40228
42536
  return d;
40229
42537
  }
@@ -41214,7 +43522,7 @@ async function renameSession(){if(!S.activeId){showError(t('select_session_first
41214
43522
  async function deleteSession(){if(!S.activeId){showError(t('select_session_first'));return}const deletingId=S.activeId;const ok=confirm(t('delete_confirm'));if(!ok)return;await api('/api/sessions/'+S.activeId,{method:'DELETE'});if(S.previewBySession&&deletingId){delete S.previewBySession[deletingId]}if(S.fileExplorerBySession&&deletingId){delete S.fileExplorerBySession[deletingId]}S.activeId=null;S.snap=null;if(S.es)S.es.close();renderPreviewTabs();renderPreviewVisibility();renderActivePreview(false);await refreshSessions();if(S.sessions.length)await selectSession(S.sessions[0].id)}
41215
43523
  async function applyModel(){const sel=E('modelSelect');const btn=E('applyModelBtn');const model=sel?.value||'';if(!model){showError(t('no_model_selected'));return}if(S.staticMode&&S.frozen)resumeAutoUpdates();S.config=S.config||{};const prevModel=String(S.config.model||'');const prevSnapModel=String(S.snap?.model||'');const prevSnapCatalog=(S.snap&&typeof S.snap==='object')?S.snap.llm_model_catalog:undefined;try{S.config.model=model;if(S.snap&&typeof S.snap==='object'){S.snap.model=_modelNameFromSelection(model)||S.snap.model;if(!S.snap.llm_model_catalog||typeof S.snap.llm_model_catalog!=='object')S.snap.llm_model_catalog={};S.snap.llm_model_catalog.selected=model}renderModelControls();renderStats();if(S.snap)renderBoards();if(sel)sel.disabled=true;if(btn)btn.disabled=true;const path=S.activeId?('/api/sessions/'+S.activeId+'/config/model'):'/api/config/model';const changed=await api(path,{method:'POST',body:JSON.stringify({selection:model,model})});if(changed?.note)showError(changed.note);else showError('');if(!applyModelCatalog(changed)){const cat=await loadModelCatalog();if(!applyModelCatalog(cat)){S.config.model=String(changed?.selected||model||'').trim();renderModelControls()}}if(S.snap&&typeof S.snap==='object'){const selected=String(S.config?.model||model||'').trim();const modelName=_modelNameFromSelection(selected);if(modelName)S.snap.model=modelName;if(changed&&typeof changed==='object')S.snap.llm_model_catalog=changed;renderBoards()}scheduleSnapshot({forceFull:true,delayMs:40,allowWhenFrozen:true})}catch(err){S.config.model=prevModel;if(S.snap&&typeof S.snap==='object'){if(prevSnapModel)S.snap.model=prevSnapModel;if(prevSnapCatalog!==undefined)S.snap.llm_model_catalog=prevSnapCatalog;renderBoards()}renderModelControls();renderStats();showError(err.message||String(err))}finally{if(sel)sel.disabled=false;if(btn)btn.disabled=false}}
41216
43524
 
41217
- async function uploadLlmConfigFile(file){try{if(!S.activeId){showError(t('select_session_first'));return}if(!file){return}const arr=await file.arrayBuffer();const payload={filename:'LLM.config.json',mime:file.type||'application/json',content_b64:ab2b64(arr)};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'));}else{showError('');const modal=E('llmConfigModal');if(modal)modal.style.display='none'}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true})}catch(err){showError(err.message||String(err))}}
43525
+ async function uploadLlmConfigFile(file){try{if(!S.activeId){showError(t('select_session_first'));return}if(!file){return}const arr=await file.arrayBuffer();const payload={filename:'LLM.config.json',mime:file.type||'application/json',content_b64:ab2b64(arr)};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});const note=String(out?.note||out?.model_catalog?.note||'').trim();if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'));}else{showError(note||'');const modal=E('llmConfigModal');if(modal)modal.style.display='none'}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true})}catch(err){showError(err.message||String(err))}}
41218
43526
  async function sendMessage(){showError('');const t=E('prompt').value.trim();if(!t||!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();E('prompt').value='';try{await waitForPendingUploads();await api('/api/sessions/'+S.activeId+'/message',{method:'POST',body:JSON.stringify({content:t})});S.lastDeltaTs=Date.now();if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:120,allowWhenFrozen:true})}}catch(err){showError(err.message)}}
41219
43527
  async function interruptRun(){if(!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();await api('/api/sessions/'+S.activeId+'/interrupt',{method:'POST'});S.lastDeltaTs=Date.now();if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:140,allowWhenFrozen:true})}}
41220
43528
  async function compactNow(){if(!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();await api('/api/sessions/'+S.activeId+'/compact',{method:'POST'});S.lastDeltaTs=Date.now();scheduleCompactRefreshBurst(COMPACT_AUTO_REFRESH_COUNT);if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:180,allowWhenFrozen:true})}}
@@ -48574,6 +50882,7 @@ class AppContext:
48574
50882
  context_limit_locked: bool = False,
48575
50883
  max_rounds: int = MAX_AGENT_ROUNDS,
48576
50884
  max_run_seconds: int = MAX_RUN_SECONDS,
50885
+ shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
48577
50886
  auto_model_switch: bool = False,
48578
50887
  arbiter_enabled: bool = True,
48579
50888
  arbiter_model: str = "",
@@ -48594,7 +50903,7 @@ class AppContext:
48594
50903
  self.base_url = base_url
48595
50904
  self.model = model
48596
50905
  self.thinking = False
48597
- self.js_lib_root = offline_js_lib_root(SCRIPT_DIR)
50906
+ self.js_lib_root = offline_js_lib_root(self.workspace)
48598
50907
  self.offline_js_summary: dict = {}
48599
50908
  try:
48600
50909
  self.offline_js_summary = load_offline_js_lib_index(self.js_lib_root)
@@ -48617,6 +50926,12 @@ class AppContext:
48617
50926
  maximum=MAX_RUN_TIMEOUT_SECONDS,
48618
50927
  fallback=MAX_RUN_SECONDS,
48619
50928
  )
50929
+ self.shell_command_timeout_seconds = normalize_timeout_seconds(
50930
+ shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
50931
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
50932
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
50933
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
50934
+ )
48620
50935
  self.auto_model_switch = bool(auto_model_switch)
48621
50936
  self.arbiter_enabled = bool(arbiter_enabled)
48622
50937
  self.arbiter_model = str(arbiter_model or "").strip()
@@ -48785,6 +51100,7 @@ class AppContext:
48785
51100
  "show_upload_list": bool(getattr(self, "show_upload_list", False)),
48786
51101
  "ui_style": normalize_ui_style(getattr(self, "ui_style", DEFAULT_UI_STYLE)),
48787
51102
  "js_lib_download_enabled": bool(getattr(self, "js_lib_download_enabled", True)),
51103
+ "shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
48788
51104
  "daily_session_limit_per_ip": int(getattr(self, "daily_session_limit_per_ip", 0) or 0),
48789
51105
  "daily_session_reset_hour": int(getattr(self, "daily_session_reset_hour", 8) or 8),
48790
51106
  "validation": dict(self.web_ui_validation or {}),
@@ -49894,6 +52210,14 @@ class AppContext:
49894
52210
  return started
49895
52211
 
49896
52212
  def _on_session_run_finished(self, user_id: str, session_id: str):
52213
+ try:
52214
+ mgr = self.manager_for_user(user_id)
52215
+ sess = mgr.get(session_id)
52216
+ if sess and bool(getattr(sess, "_deferred_runtime_sync_requested", False)):
52217
+ mgr._sync_from_session(sess, apply_to_all=False)
52218
+ sess._deferred_runtime_sync_requested = False
52219
+ except Exception:
52220
+ pass
49897
52221
  if not self.scheduler_limits_enabled():
49898
52222
  return
49899
52223
  started_rows: list[dict] = []
@@ -50028,6 +52352,7 @@ class AppContext:
50028
52352
  self.context_limit_locked,
50029
52353
  self.max_rounds,
50030
52354
  self.max_run_seconds,
52355
+ self.shell_command_timeout_seconds,
50031
52356
  self.auto_model_switch,
50032
52357
  self.arbiter_enabled,
50033
52358
  self.arbiter_model,
@@ -51096,6 +53421,7 @@ class Handler(BaseHTTPRequestHandler):
51096
53421
  "download_js_lib_enabled": bool(getattr(self.app, "js_lib_download_enabled", True)),
51097
53422
  "request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
51098
53423
  "run_timeout": int(mgr.max_run_seconds),
53424
+ "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
51099
53425
  }
51100
53426
  )
51101
53427
  model_cat = mgr.model_catalog()
@@ -51142,6 +53468,7 @@ class Handler(BaseHTTPRequestHandler):
51142
53468
  "context_token_limit": int(mgr.context_token_limit),
51143
53469
  "context_limit_locked": bool(mgr.context_limit_locked),
51144
53470
  "run_timeout": int(mgr.max_run_seconds),
53471
+ "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
51145
53472
  "auto_model_switch": bool(mgr.auto_model_switch),
51146
53473
  "execution_mode": normalize_execution_mode(getattr(mgr, "execution_mode", EXECUTION_MODE_SYNC), default=EXECUTION_MODE_SYNC),
51147
53474
  "execution_mode_choices": list(EXECUTION_MODE_CHOICES),
@@ -51194,7 +53521,7 @@ class Handler(BaseHTTPRequestHandler):
51194
53521
  for hk, hv in probe_headers.items():
51195
53522
  if str(hk or "").strip() and str(hv or "").strip():
51196
53523
  req.add_header(str(hk), str(hv))
51197
- with urllib.request.urlopen(req, timeout=8) as resp:
53524
+ with urlopen(req, timeout=8) as resp:
51198
53525
  body_text = resp.read().decode("utf-8", errors="replace")
51199
53526
  reachable = True
51200
53527
  try:
@@ -51251,7 +53578,7 @@ class Handler(BaseHTTPRequestHandler):
51251
53578
  for hk, hv in probe_headers.items():
51252
53579
  if str(hk or "").strip() and str(hv or "").strip():
51253
53580
  base_req.add_header(str(hk), str(hv))
51254
- with urllib.request.urlopen(base_req, timeout=8):
53581
+ with urlopen(base_req, timeout=8):
51255
53582
  pass
51256
53583
  reachable = True
51257
53584
  except urllib.error.HTTPError as exc:
@@ -51502,9 +53829,26 @@ class Handler(BaseHTTPRequestHandler):
51502
53829
  if not selection:
51503
53830
  return self._send_json({"error": "selection required"}, status=400)
51504
53831
  model_override = payload.get("model_override")
53832
+ if bool(getattr(sess, "running", False)):
53833
+ try:
53834
+ sess._queue_deferred_runtime_update(
53835
+ "model_selection",
53836
+ {
53837
+ "selection": selection,
53838
+ "model_override": model_override if isinstance(model_override, str) else "",
53839
+ },
53840
+ )
53841
+ except Exception as exc:
53842
+ return self._send_json({"error": str(exc)}, status=400)
53843
+ queued = sess.model_catalog()
53844
+ queued["queued"] = True
53845
+ queued["note"] = (
53846
+ "session is running; model switch queued and will apply after the current run finishes"
53847
+ )
53848
+ return self._send_json(queued)
51505
53849
  try:
51506
53850
  out = sess.set_runtime_selection(selection, model_override if isinstance(model_override, str) else None)
51507
- mgr._sync_from_session(sess, apply_to_all=True)
53851
+ mgr._sync_from_session(sess, apply_to_all=False)
51508
53852
  except Exception as exc:
51509
53853
  return self._send_json({"error": str(exc)}, status=400)
51510
53854
  return self._send_json(out)
@@ -51603,9 +53947,9 @@ class Handler(BaseHTTPRequestHandler):
51603
53947
  if len(raw) > 20 * 1024 * 1024:
51604
53948
  return self._send_json({"error": "max upload size is 20MB"}, status=413)
51605
53949
  meta = sess.add_upload(filename, raw, mime)
51606
- if isinstance(meta.get("model_catalog"), dict):
53950
+ if isinstance(meta.get("model_catalog"), dict) and not bool(meta.get("model_catalog", {}).get("queued")):
51607
53951
  try:
51608
- mgr._sync_from_session(sess, apply_to_all=True)
53952
+ mgr._sync_from_session(sess, apply_to_all=False)
51609
53953
  except Exception:
51610
53954
  pass
51611
53955
  return self._send_json(meta, status=201)
@@ -51699,16 +54043,16 @@ class Handler(BaseHTTPRequestHandler):
51699
54043
  explicit_complexity = infer_user_complexity_value(
51700
54044
  str(body.get("complexity", body.get("task_complexity", "")) or "")
51701
54045
  )
51702
- current_complexity = trim(
51703
- str(getattr(sess, "runtime_task_complexity", "") or "").strip().lower(),
51704
- 20,
54046
+ current_complexity = normalize_task_complexity(
54047
+ getattr(sess, "runtime_task_complexity", "") or "",
54048
+ default="",
51705
54049
  )
51706
54050
  if explicit_complexity in TASK_COMPLEXITY_LEVELS:
51707
- sess.runtime_task_complexity = explicit_complexity
54051
+ sess.runtime_task_complexity = normalize_task_complexity(explicit_complexity, default="")
51708
54052
  elif current_complexity in TASK_COMPLEXITY_LEVELS:
51709
54053
  sess.runtime_task_complexity = current_complexity
51710
54054
  else:
51711
- sess.runtime_task_complexity = str(policy.get("complexity", "simple"))
54055
+ sess.runtime_task_complexity = normalize_task_complexity(policy.get("complexity", "simple"), default="simple")
51712
54056
  sess.runtime_scale_preference = "thorough" if level >= 4 else "balanced"
51713
54057
  return self._send_json({"task_level": level})
51714
54058
  return self._send_json({"error": "not found"}, status=404)
@@ -51901,6 +54245,7 @@ class SkillsHandler(BaseHTTPRequestHandler):
51901
54245
  "show_upload_list": bool(getattr(self.app, "show_upload_list", False)),
51902
54246
  "web_ui": web_ui_state,
51903
54247
  "run_timeout": int(mgr.max_run_seconds),
54248
+ "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
51904
54249
  "request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
51905
54250
  }
51906
54251
  )
@@ -52332,6 +54677,25 @@ def main():
52332
54677
  f"(minimum {MIN_RUN_TIMEOUT_SECONDS}, model-active time excluded)"
52333
54678
  ),
52334
54679
  )
54680
+ parser.add_argument(
54681
+ "--shell_command_timeout",
54682
+ "--shell-command-timeout",
54683
+ "--bash_timeout",
54684
+ "--bash-timeout",
54685
+ "--command_timeout",
54686
+ "--command-timeout",
54687
+ dest="shell_command_timeout",
54688
+ default=None,
54689
+ type=int,
54690
+ help=(
54691
+ "Per-command shell/bash timeout in seconds "
54692
+ f"(default {DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS}; allowed "
54693
+ f"{MIN_SHELL_COMMAND_TIMEOUT_SECONDS}-{MAX_SHELL_COMMAND_TIMEOUT_SECONDS}). "
54694
+ "Independent from the global run timeout. Also configurable via --config keys "
54695
+ "shell_command_timeout / shell_timeout / bash_timeout / command_timeout and env "
54696
+ "AGENT_SHELL_COMMAND_TIMEOUT / AGENT_BASH_TIMEOUT / AGENT_COMMAND_TIMEOUT."
54697
+ ),
54698
+ )
52335
54699
  parser.add_argument(
52336
54700
  "--live_input_delay_write",
52337
54701
  default=LIVE_INPUT_DELAY_WRITE_ROUNDS,
@@ -52481,9 +54845,10 @@ def main():
52481
54845
  default="",
52482
54846
  help=(
52483
54847
  "LLM config source (URL or local file path). "
52484
- "Also reads startup keys like show_upload_list, download_js_lib and "
54848
+ "Also reads startup keys like show_upload_list, download_js_lib, shell_command_timeout and "
52485
54849
  "daily_session_limit (aliases: daily_sessions_per_ip / "
52486
- "max_daily_sessions_per_ip / session_daily_limit)."
54850
+ "max_daily_sessions_per_ip / session_daily_limit; shell aliases: "
54851
+ "shell_timeout / bash_timeout / command_timeout)."
52487
54852
  ),
52488
54853
  )
52489
54854
  parser.add_argument(
@@ -52618,6 +54983,7 @@ def main():
52618
54983
  arbiter_enabled=True,
52619
54984
  show_upload_list=None,
52620
54985
  download_js_lib=None,
54986
+ shell_command_timeout=None,
52621
54987
  )
52622
54988
  args = parser.parse_args()
52623
54989
  ctx_limit_locked = any(str(arg).split("=", 1)[0] == "--ctx_limit" for arg in sys.argv[1:])
@@ -52647,6 +55013,7 @@ def main():
52647
55013
  )
52648
55014
  resolved_show_upload_list = False
52649
55015
  resolved_daily_session_limit_per_ip = 0
55016
+ resolved_shell_command_timeout = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS
52650
55017
  external_config: dict = {}
52651
55018
  external_config_source = ""
52652
55019
  bootstrap_base_url = args.ollama_base_url
@@ -52673,6 +55040,14 @@ def main():
52673
55040
  external_daily_session_limit = extract_daily_session_limit_setting(external_config)
52674
55041
  if external_daily_session_limit is not None:
52675
55042
  resolved_daily_session_limit_per_ip = int(external_daily_session_limit)
55043
+ external_shell_command_timeout = extract_shell_command_timeout_setting(external_config)
55044
+ if external_shell_command_timeout is not None:
55045
+ resolved_shell_command_timeout = normalize_timeout_seconds(
55046
+ external_shell_command_timeout,
55047
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
55048
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
55049
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
55050
+ )
52676
55051
  print(f"[web-agent] external config loaded: {external_config_source}")
52677
55052
  except Exception as exc:
52678
55053
  print(f"[web-agent] invalid --config: {exc}")
@@ -52686,9 +55061,25 @@ def main():
52686
55061
  web_ui_daily_session_limit = extract_daily_session_limit_setting(web_ui_config)
52687
55062
  if web_ui_daily_session_limit is not None:
52688
55063
  resolved_daily_session_limit_per_ip = int(web_ui_daily_session_limit)
55064
+ web_ui_shell_command_timeout = extract_shell_command_timeout_setting(web_ui_config)
55065
+ if web_ui_shell_command_timeout is not None:
55066
+ resolved_shell_command_timeout = normalize_timeout_seconds(
55067
+ web_ui_shell_command_timeout,
55068
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
55069
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
55070
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
55071
+ )
52689
55072
  cli_daily_session_limit = getattr(args, "daily_session_limit_per_ip", None)
52690
55073
  if cli_daily_session_limit is not None:
52691
55074
  resolved_daily_session_limit_per_ip = max(0, int(cli_daily_session_limit or 0))
55075
+ cli_shell_command_timeout = getattr(args, "shell_command_timeout", None)
55076
+ if cli_shell_command_timeout is not None:
55077
+ resolved_shell_command_timeout = normalize_timeout_seconds(
55078
+ cli_shell_command_timeout,
55079
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
55080
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
55081
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
55082
+ )
52692
55083
  raw_ui_style = str(getattr(args, "ui_style", "") or "").strip()
52693
55084
  if not raw_ui_style:
52694
55085
  raw_ui_style = str(extract_ui_style_setting(external_config) or "").strip()
@@ -52743,6 +55134,7 @@ def main():
52743
55134
  f"[web-agent] run_timeout adjusted {requested_run_timeout}->{resolved_run_timeout} "
52744
55135
  f"(allowed range {MIN_RUN_TIMEOUT_SECONDS}-{MAX_RUN_TIMEOUT_SECONDS})"
52745
55136
  )
55137
+ print(f"[web-agent] shell_command_timeout={int(resolved_shell_command_timeout)}s")
52746
55138
  requested_live_input_delay_write = int(args.live_input_delay_write if args.live_input_delay_write is not None else LIVE_INPUT_DELAY_WRITE_ROUNDS)
52747
55139
  resolved_live_input_delay_write = max(0, min(20, requested_live_input_delay_write))
52748
55140
  if resolved_live_input_delay_write != requested_live_input_delay_write:
@@ -52925,6 +55317,7 @@ def main():
52925
55317
  ctx_limit_locked,
52926
55318
  resolved_max_rounds,
52927
55319
  resolved_run_timeout,
55320
+ resolved_shell_command_timeout,
52928
55321
  resolved_auto_model_switch,
52929
55322
  resolved_arbiter_enabled,
52930
55323
  resolved_arbiter_model,