clouds-coder 2026.4.2__tar.gz → 2026.4.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ import selectors
25
25
  import signal
26
26
  import shutil
27
27
  import shlex
28
+ import ssl
28
29
  import socket
29
30
  import subprocess
30
31
  import sys
@@ -44,15 +45,48 @@ from pathlib import Path, PurePosixPath
44
45
  from urllib.error import HTTPError, URLError
45
46
  from urllib.parse import parse_qs, unquote, urlparse
46
47
  from urllib.request import Request, urlopen
48
+ try:
49
+ import certifi as _certifi
50
+ except Exception:
51
+ _certifi = None
47
52
  try:
48
53
  import yaml as _yaml
49
54
  except Exception:
50
55
  _yaml = None
56
+ _URL_OPEN_ORIGINAL = urlopen
57
+ _HTTP_SSL_CONTEXT = None
51
58
  APP_VERSION = "0.1.1"
52
59
  DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
53
60
  DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
54
61
  SCRIPT_DIR = Path(__file__).resolve().parent
55
62
 
63
+ def _shared_http_ssl_context():
64
+ global _HTTP_SSL_CONTEXT
65
+ if _HTTP_SSL_CONTEXT is not None:
66
+ return _HTTP_SSL_CONTEXT
67
+ cafile = str(os.getenv("SSL_CERT_FILE", "") or "").strip()
68
+ if not cafile and _certifi is not None:
69
+ try:
70
+ cafile = str(_certifi.where() or "").strip()
71
+ except Exception:
72
+ cafile = ""
73
+ try:
74
+ ctx = ssl.create_default_context(cafile=cafile or None)
75
+ except Exception:
76
+ ctx = ssl.create_default_context()
77
+ _HTTP_SSL_CONTEXT = ctx
78
+ return ctx
79
+
80
+ def urlopen(url, *args, **kwargs):
81
+ if "context" not in kwargs:
82
+ target = getattr(url, "full_url", url)
83
+ if str(target or "").strip().lower().startswith("https://"):
84
+ try:
85
+ kwargs["context"] = _shared_http_ssl_context()
86
+ except Exception:
87
+ pass
88
+ return _URL_OPEN_ORIGINAL(url, *args, **kwargs)
89
+
56
90
  def _resolve_default_agent_workdir() -> Path:
57
91
  raw = str(os.getenv("AGENT_WORKDIR", "") or "").strip()
58
92
  if raw:
@@ -182,6 +216,23 @@ DEFAULT_TIMEOUT_SECONDS = max(
182
216
  ),
183
217
  )
184
218
  DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
219
+ MIN_SHELL_COMMAND_TIMEOUT_SECONDS = 10
220
+ MAX_SHELL_COMMAND_TIMEOUT_SECONDS = 86_400
221
+ DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS = max(
222
+ MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
223
+ min(
224
+ MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
225
+ int(
226
+ str(
227
+ os.getenv(
228
+ "AGENT_SHELL_COMMAND_TIMEOUT",
229
+ os.getenv("AGENT_BASH_TIMEOUT", os.getenv("AGENT_COMMAND_TIMEOUT", "240")),
230
+ )
231
+ or "240"
232
+ )
233
+ ),
234
+ ),
235
+ )
185
236
  AUTO_CONTINUE_BUDGET_DEFAULT = 30
186
237
  AGENT_MAX_OUTPUT_TOKENS = 16384
187
238
  OLLAMA_THINKING_TOOL_BUFFER = 4096
@@ -2014,6 +2065,55 @@ def extract_daily_session_limit_setting(raw: object) -> int | None:
2014
2065
  return None
2015
2066
 
2016
2067
 
2068
+ def extract_shell_command_timeout_setting(raw: object) -> int | None:
2069
+ """Read shell/bash command timeout from config dict.
2070
+
2071
+ Accepted keys:
2072
+ - shell_command_timeout
2073
+ - shell_timeout
2074
+ - bash_timeout
2075
+ - command_timeout
2076
+ Sections searched: top-level, then 'startup' / 'runtime' / 'shell' / 'tools' / 'execution'.
2077
+ Returns a clamped positive integer, or None if no setting is present.
2078
+ """
2079
+ if not isinstance(raw, dict):
2080
+ return None
2081
+
2082
+ def _parse_timeout(value: object) -> int | None:
2083
+ if value is None or isinstance(value, bool):
2084
+ return None
2085
+ try:
2086
+ text = str(value).strip()
2087
+ if not text:
2088
+ return None
2089
+ return normalize_timeout_seconds(
2090
+ text,
2091
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
2092
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
2093
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
2094
+ )
2095
+ except Exception:
2096
+ return None
2097
+
2098
+ keys = (
2099
+ "shell_command_timeout",
2100
+ "shell_timeout",
2101
+ "bash_timeout",
2102
+ "command_timeout",
2103
+ )
2104
+ for key in keys:
2105
+ if key in raw:
2106
+ return _parse_timeout(raw.get(key))
2107
+ for section_key in ("startup", "runtime", "shell", "tools", "execution"):
2108
+ section = raw.get(section_key)
2109
+ if not isinstance(section, dict):
2110
+ continue
2111
+ for key in keys:
2112
+ if key in section:
2113
+ return _parse_timeout(section.get(key))
2114
+ return None
2115
+
2116
+
2017
2117
  class SessionCreationLimitExceeded(RuntimeError):
2018
2118
  def __init__(self, status: dict):
2019
2119
  self.status = dict(status or {})
@@ -5257,13 +5357,17 @@ class TodoManager:
5257
5357
  elif isinstance(item, dict):
5258
5358
  raw = item
5259
5359
  else:
5260
- raise ValueError(f"item {idx}: invalid type")
5360
+ # Tolerant: convert to string instead of raising
5361
+ try:
5362
+ raw = {"content": str(item).strip(), "status": "pending"}
5363
+ except Exception:
5364
+ continue # Skip unparseable items
5261
5365
  raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
5262
5366
  content = normalize_work_text(raw_content)
5263
5367
  if not content:
5264
5368
  content = raw_content
5265
5369
  if not content:
5266
- raise ValueError(f"item {idx}: content required")
5370
+ continue # Skip empty items instead of raising
5267
5371
  raw_status = str(raw.get("status", raw.get("state", "pending"))).strip().lower()
5268
5372
  status = status_alias.get(raw_status, raw_status or "pending")
5269
5373
  if status not in {"pending", "in_progress", "completed"}:
@@ -7051,9 +7155,11 @@ Use this skill when:
7051
7155
  6. Report rewritten count, copied files, and unresolved URLs.
7052
7156
 
7053
7157
  ## Rules
7158
+ - Treat `./js_lib` and `/js_lib/...` as workspace lookup locations only, not final browser-facing URLs.
7054
7159
  - Keep `./js` per HTML location (do not hardcode global absolute paths).
7055
7160
  - Keep file names deterministic and safe (`[A-Za-z0-9._-]`).
7056
7161
  - Preserve existing relative local script paths if already offline-ready.
7162
+ - Final HTML must not point to `/js_lib/...`, `/assets/js_lib/...`, or other virtual asset aliases; copy first, then use plain relative paths.
7057
7163
 
7058
7164
  ## Output Contract
7059
7165
  Return:
@@ -12420,12 +12526,12 @@ TOOLS = [
12420
12526
  ),
12421
12527
  tool_def("write_file", "Write file content.", {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
12422
12528
  tool_def("edit_file", "Edit a file by replacing first match.", {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, ["path", "old_text", "new_text"]),
12423
- tool_def("TodoWrite", "Update todo list.", {"items": {"type": "array", "items": {"type": "object"}}}, ["items"]),
12529
+ tool_def("TodoWrite", "Update todo list. Items can be strings or objects with content/status/owner fields.", {"items": {"type": "array", "items": {}}}, ["items"]),
12424
12530
  tool_def(
12425
12531
  "TodoWriteRescue",
12426
- "Fallback todo writer when TodoWrite keeps failing/repeating. Accepts simple string items and auto-normalizes schema.",
12532
+ "Fallback todo writer. Accepts strings with status prefixes: '[x] task' or '✅ task' = completed, '[>] task' = in_progress, plain text = pending. Also accepts dicts with status field.",
12427
12533
  {
12428
- "items": {"type": "array", "items": {"type": "string"}},
12534
+ "items": {"type": "array", "items": {}},
12429
12535
  "in_progress_index": {"type": "integer"},
12430
12536
  },
12431
12537
  ["items"],
@@ -12694,6 +12800,7 @@ class SessionState:
12694
12800
  context_limit_locked: bool = False,
12695
12801
  max_rounds: int = MAX_AGENT_ROUNDS,
12696
12802
  max_run_seconds: int = MAX_RUN_SECONDS,
12803
+ shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
12697
12804
  auto_model_switch: bool = False,
12698
12805
  arbiter_enabled: bool = True,
12699
12806
  arbiter_model: str = "",
@@ -12822,6 +12929,7 @@ class SessionState:
12822
12929
  self.runtime_complexity_floor = ""
12823
12930
  self.runtime_task_level_floor = 0
12824
12931
  self.runtime_task_level_ceiling = 0 # 0 = no ceiling; set from plan risk on approval
12932
+ self._todowrite_step_counter: dict[str, int] = {} # Fix 5: track consecutive TodoWrite per step for loop detection
12825
12933
  self.runtime_scale_preference = "balanced"
12826
12934
  self.runtime_direct_objective = ""
12827
12935
  self.runtime_reclassify_goal = ""
@@ -12903,6 +13011,12 @@ class SessionState:
12903
13011
  maximum=MAX_RUN_TIMEOUT_SECONDS,
12904
13012
  fallback=MAX_RUN_SECONDS,
12905
13013
  )
13014
+ self.shell_command_timeout_seconds = normalize_timeout_seconds(
13015
+ shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
13016
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
13017
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
13018
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
13019
+ )
12906
13020
  self.truncation_count = 0
12907
13021
  self.last_truncation_ts = 0.0
12908
13022
  self.truncation_rescue_task_ids: list[int] = []
@@ -15000,7 +15114,7 @@ class SessionState:
15000
15114
  pass
15001
15115
  t = threading.Thread(target=_llm_match, daemon=True)
15002
15116
  t.start()
15003
- t.join(timeout=60.0)
15117
+ t.join(timeout=5.0)
15004
15118
  if llm_result:
15005
15119
  matched_names = llm_result
15006
15120
  self._emit("status", {"summary": f"skill discovery (LLM task analysis): {matched_names} ({trigger})"})
@@ -15034,7 +15148,7 @@ class SessionState:
15034
15148
  # --- Path 3: Deferred LLM pickup if still running ---
15035
15149
  if not matched_names and t.is_alive():
15036
15150
  def _deferred_llm_pickup():
15037
- t.join(timeout=60.0)
15151
+ t.join(timeout=8.0)
15038
15152
  if llm_result and not self._loaded_skill_rows():
15039
15153
  for name_str in llm_result[:3]:
15040
15154
  try:
@@ -15431,7 +15545,6 @@ class SessionState:
15431
15545
  "ENGINEERING EXECUTION DISCIPLINE: "
15432
15546
  "For coding, bug-fix, architecture, integration, and testing work, proactively use the skill system when a matching skill exists. "
15433
15547
  "Do not wait for failure before calling list_skills/load_skill for debugging, API, frontend, parser, or recovery workflows. "
15434
- "Already-loaded skills appear as <loaded-skill> messages — use them directly without reloading. "
15435
15548
  "Use a root-cause-first loop: inspect the exact error or failing behavior, read the implicated file or path, form one concrete hypothesis, apply one bounded fix, then run at least one fix-and-verify cycle before declaring success. "
15436
15549
  "If read_file or bash reports a missing path, empty folder, or mismatched filename, stop repeating the same lookup. "
15437
15550
  "Reconcile the path against uploads, recent file paths, file explorer entries, and close workspace matches; then either open the closest candidate or create the intended target. "
@@ -15485,6 +15598,10 @@ class SessionState:
15485
15598
  f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
15486
15599
  f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS require) or pptxgen.bundle.js (browser). "
15487
15600
  f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
15601
+ "IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
15602
+ "If an HTML file uses any asset from js_lib, copy that file into a task-local relative asset folder "
15603
+ "(for example './js/' or './assets/vendor/') next to the deliverable, then reference it with a plain relative path in HTML. "
15604
+ "Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases inside final exported HTML. "
15488
15605
  f"Task level={runtime_level}, mode={runtime_mode}, "
15489
15606
  f"budget={'unlimited' if budget <= 0 else budget}. "
15490
15607
  f"Context limit ~{self.context_token_upper_bound} tokens. "
@@ -22181,11 +22298,19 @@ body{padding:18px}
22181
22298
  str(meta.get("output") or meta.get("error") or "(no output)"),
22182
22299
  cwd=cwd,
22183
22300
  )
22184
- )
22301
+ )
22185
22302
  return meta
22186
22303
 
22304
+ def _shell_command_timeout(self) -> int:
22305
+ return normalize_timeout_seconds(
22306
+ getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
22307
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
22308
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
22309
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
22310
+ )
22311
+
22187
22312
  def _run_bash(self, command: str) -> str:
22188
- return self._run_shell_meta(command, self.files_root, 120)["output"]
22313
+ return self._run_shell_meta(command, self.files_root, self._shell_command_timeout())["output"]
22189
22314
 
22190
22315
  def _fuzzy_resolve_path(self, fp: Path) -> Path:
22191
22316
  """If fp doesn't exist, try stripping spaces from the filename to find a close match.
@@ -22883,10 +23008,10 @@ body{padding:18px}
22883
23008
  "3) scaffold semantic HTML; "
22884
23009
  "4) apply CSS tokens + responsive layout; "
22885
23010
  "5) wire JS state/data interactions; "
22886
- "6) localize external JS dependencies to ./js from ./js_lib; "
23011
+ "6) localize external JS dependencies to a task-local relative folder such as ./js from ./js_lib, and rewrite final HTML to plain relative paths; "
22887
23012
  "7) run QA loop for desktop/mobile/a11y/performance and iterate. "
22888
23013
  f"Offline JS libs available now: {libs_hint}. "
22889
- "Final exported HTML should avoid unresolved CDN-only script src."
23014
+ "Final exported HTML should avoid unresolved CDN-only script src and must not keep '/js_lib/...' or '/assets/js_lib/...' virtual URLs."
22890
23015
  )
22891
23016
 
22892
23017
  def _contains_any_keyword(self, text: str, keywords: tuple[str, ...]) -> bool:
@@ -24369,6 +24494,7 @@ body{padding:18px}
24369
24494
  "category": trim(str(pt.get("category", "") or ""), 40),
24370
24495
  "plan_step_index": int(pt.get("plan_step_index", -1)) if pt.get("plan_step_index") is not None else -1,
24371
24496
  "created_at": float(pt.get("created_at", 0.0) or 0.0),
24497
+ "activated_at": float(pt.get("activated_at", 0.0) or 0.0) if pt.get("activated_at") else None,
24372
24498
  "completed_at": float(pt.get("completed_at", 0.0) or 0.0) if pt.get("completed_at") else None,
24373
24499
  "completed_by": trim(str(pt.get("completed_by", "") or ""), 40),
24374
24500
  "evidence": trim(str(pt.get("evidence", "") or ""), 200),
@@ -25363,6 +25489,195 @@ body{padding:18px}
25363
25489
  return observed_signal or read_back or wrote_files
25364
25490
  return wrote_files or read_back or knowledge_signal or observed_signal
25365
25491
 
25492
+ def _plan_step_activation_ts(self, plan_step: dict) -> float:
25493
+ if not isinstance(plan_step, dict):
25494
+ return 0.0
25495
+ try:
25496
+ activated = float(plan_step.get("activated_at", 0.0) or 0.0)
25497
+ except Exception:
25498
+ activated = 0.0
25499
+ if activated > 0:
25500
+ return activated
25501
+ try:
25502
+ return float(plan_step.get("created_at", 0.0) or 0.0)
25503
+ except Exception:
25504
+ return 0.0
25505
+
25506
+ def _plan_step_blackboard_signals(self, plan_step: dict, board: dict | None = None) -> dict:
25507
+ bb = board if isinstance(board, dict) else self._ensure_blackboard()
25508
+ step_id = trim(str((plan_step or {}).get("id", "") or ""), 20)
25509
+ since_ts = self._plan_step_activation_ts(plan_step)
25510
+
25511
+ def _rows_since(rows: object) -> list[dict]:
25512
+ out: list[dict] = []
25513
+ if not isinstance(rows, list):
25514
+ return out
25515
+ for row in rows:
25516
+ if not isinstance(row, dict):
25517
+ continue
25518
+ txt = trim(str(row.get("content", "") or "").strip(), 1200)
25519
+ if not txt:
25520
+ continue
25521
+ try:
25522
+ ts = float(row.get("ts", 0.0) or 0.0)
25523
+ except Exception:
25524
+ ts = 0.0
25525
+ if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
25526
+ continue
25527
+ out.append({"ts": ts, "content": txt, "actor": trim(str(row.get("actor", "") or ""), 40)})
25528
+ return out
25529
+
25530
+ def _recent_excerpt(rows: list[dict], max_chars: int = 120) -> str:
25531
+ if not rows:
25532
+ return ""
25533
+ return trim(str(rows[-1].get("content", "") or "").replace("\r\n", "\n"), max_chars)
25534
+
25535
+ negative_hints = ("error:", "failed", "failure", "traceback", "fatal error", "assertionerror", "exception")
25536
+ compile_hints = ("compiled successfully", "build successful", "build succeeded", "syntax ok", "lint passed", "no issues found", "0 errors", "编译成功")
25537
+ test_hints = ("test passed", "tests passed", "all tests passed", "0 failed", "100%", "ok", "success", "测试通过")
25538
+
25539
+ step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
25540
+ step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
25541
+ filtered_entries: list[dict] = []
25542
+ for entry in step_entries:
25543
+ if not isinstance(entry, dict):
25544
+ continue
25545
+ try:
25546
+ ts = float(entry.get("ts", 0.0) or 0.0)
25547
+ except Exception:
25548
+ ts = 0.0
25549
+ if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
25550
+ continue
25551
+ filtered_entries.append(entry)
25552
+ step_entries = filtered_entries
25553
+
25554
+ artifact_rows: list[dict] = []
25555
+ raw_artifacts = bb.get("code_artifacts", {}) if isinstance(bb.get("code_artifacts"), dict) else {}
25556
+ for path, meta in raw_artifacts.items():
25557
+ if not isinstance(meta, dict):
25558
+ continue
25559
+ try:
25560
+ ts = float(meta.get("updated_at", 0.0) or 0.0)
25561
+ except Exception:
25562
+ ts = 0.0
25563
+ if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
25564
+ continue
25565
+ artifact_rows.append({
25566
+ "path": trim(str(path or "").strip(), 240),
25567
+ "summary": trim(str(meta.get("summary", "") or "").strip(), 200),
25568
+ "updated_at": ts,
25569
+ })
25570
+
25571
+ research_rows = _rows_since(bb.get("research_notes", []))
25572
+ exec_rows = _rows_since(bb.get("execution_logs", []))
25573
+ review_rows = _rows_since(bb.get("review_feedback", []))
25574
+
25575
+ file_ops = {
25576
+ trim(str(entry.get("op", "") or "").strip(), 40)
25577
+ for entry in step_entries
25578
+ if isinstance(entry, dict)
25579
+ }
25580
+ has_write = any(op in {"write_file", "edit_file"} for op in file_ops) or bool(artifact_rows)
25581
+ has_read = "read_file" in file_ops
25582
+
25583
+ def _has_positive(rows: list[dict], hints: tuple[str, ...]) -> bool:
25584
+ for row in reversed(rows[-6:]):
25585
+ low = str(row.get("content", "") or "").lower()
25586
+ if not low or any(neg in low for neg in negative_hints):
25587
+ continue
25588
+ if any(tok in low for tok in hints):
25589
+ return True
25590
+ return False
25591
+
25592
+ def _has_observed(rows: list[dict]) -> bool:
25593
+ for row in reversed(rows[-6:]):
25594
+ low = str(row.get("content", "") or "").lower()
25595
+ if low and not any(neg in low for neg in negative_hints):
25596
+ return True
25597
+ return False
25598
+
25599
+ recent_files = [row.get("path", "") for row in artifact_rows[-4:] if row.get("path")]
25600
+ if not recent_files:
25601
+ recent_files = [
25602
+ trim(str(entry.get("path", "") or "").strip(), 240)
25603
+ for entry in step_entries[-4:]
25604
+ if isinstance(entry, dict) and str(entry.get("path", "") or "").strip()
25605
+ ]
25606
+
25607
+ return {
25608
+ "since_ts": since_ts,
25609
+ "has_write": has_write,
25610
+ "has_read": has_read,
25611
+ "has_research": bool(research_rows),
25612
+ "has_exec": _has_observed(exec_rows),
25613
+ "has_review": _has_observed(review_rows),
25614
+ "has_compile_pass": _has_positive(exec_rows + review_rows, compile_hints),
25615
+ "has_test_pass": _has_positive(exec_rows + review_rows, test_hints),
25616
+ "recent_files": list(dict.fromkeys(recent_files))[-4:],
25617
+ "recent_exec_excerpt": _recent_excerpt(exec_rows, 140),
25618
+ "recent_review_excerpt": _recent_excerpt(review_rows, 140),
25619
+ "recent_research_excerpt": _recent_excerpt(research_rows, 140),
25620
+ }
25621
+
25622
+ def _plan_step_has_blackboard_evidence(self, plan_step: dict, board: dict | None = None) -> bool:
25623
+ if not isinstance(plan_step, dict):
25624
+ return False
25625
+ sig = self._plan_step_blackboard_signals(plan_step, board)
25626
+ step_text = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
25627
+ phase = self._plan_step_phase_hint(step_text)
25628
+ wants_test = phase in ("test", "review") or any(
25629
+ tok in step_text for tok in ("test", "pytest", "unit", "integration", "验证", "測試", "测试", "回归", "assert")
25630
+ )
25631
+ wants_runtime_validation = wants_test or phase == "implement" or any(
25632
+ tok in step_text for tok in ("verify", "validation", "check", "lint", "build", "compile", "运行", "校验", "檢查")
25633
+ )
25634
+ if wants_test:
25635
+ return sig["has_test_pass"] or sig["has_exec"] or sig["has_review"]
25636
+ if phase == "implement":
25637
+ return sig["has_write"] and (
25638
+ sig["has_compile_pass"] or sig["has_test_pass"] or sig["has_exec"] or sig["has_read"] or sig["has_review"]
25639
+ )
25640
+ if phase in ("research", "design"):
25641
+ return sig["has_research"] or sig["has_read"] or sig["has_exec"] or sig["has_write"]
25642
+ if wants_runtime_validation:
25643
+ return sig["has_exec"] or sig["has_read"] or sig["has_write"] or sig["has_review"]
25644
+ return sig["has_write"] or sig["has_read"] or sig["has_research"] or sig["has_exec"] or sig["has_review"]
25645
+
25646
+ def _step_has_accumulated_evidence(self, plan_step: dict, bb: dict | None = None) -> bool:
25647
+ """Fix 3: Check if step has accumulated evidence across ALL turns (not just current turn).
25648
+ Uses step_files registry + blackboard signals to detect writes/execution during step lifetime."""
25649
+ if not isinstance(plan_step, dict):
25650
+ return False
25651
+ sig = self._plan_step_blackboard_signals(plan_step, bb)
25652
+ return sig["has_write"] or sig["has_exec"] or sig["has_research"]
25653
+
25654
+ def _collect_accumulated_step_evidence(self, plan_step: dict, bb: dict | None = None) -> str:
25655
+ """Fix 1 support: Collect evidence summary from accumulated step history (across all turns)."""
25656
+ if not isinstance(plan_step, dict):
25657
+ return ""
25658
+ sig = self._plan_step_blackboard_signals(plan_step, bb)
25659
+ parts: list[str] = []
25660
+ if sig.get("recent_files"):
25661
+ parts.append("files: " + ", ".join(sig["recent_files"][:4]))
25662
+ if sig.get("recent_exec_excerpt"):
25663
+ parts.append("exec: " + trim(sig["recent_exec_excerpt"], 80))
25664
+ if sig.get("recent_research_excerpt"):
25665
+ parts.append("research: " + trim(sig["recent_research_excerpt"], 80))
25666
+ return trim("; ".join(parts) or "accumulated-step-evidence", 200)
25667
+
25668
+ def _collect_blackboard_step_evidence(self, plan_step: dict, board: dict | None = None) -> str:
25669
+ sig = self._plan_step_blackboard_signals(plan_step, board)
25670
+ parts: list[str] = []
25671
+ if sig.get("recent_files"):
25672
+ parts.append("files: " + ", ".join(sig["recent_files"][:3]))
25673
+ if sig.get("recent_exec_excerpt"):
25674
+ parts.append(f"logs: {sig['recent_exec_excerpt']}")
25675
+ if sig.get("recent_review_excerpt"):
25676
+ parts.append(f"review: {sig['recent_review_excerpt']}")
25677
+ if sig.get("recent_research_excerpt"):
25678
+ parts.append(f"notes: {sig['recent_research_excerpt']}")
25679
+ return trim("; ".join(parts), 200)
25680
+
25366
25681
  def _has_test_pass_evidence(self, board: dict | None = None) -> bool:
25367
25682
  bb = board if isinstance(board, dict) else self._ensure_blackboard()
25368
25683
  logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
@@ -25395,6 +25710,20 @@ body{padding:18px}
25395
25710
  if todo.get("status") == "completed":
25396
25711
  continue
25397
25712
  cat = todo.get("category", "")
25713
+ if cat == "plan_step" and todo.get("status") == "in_progress" and not todo.get("activated_at"):
25714
+ step_idx = int(todo.get("plan_step_index", 0) or 0)
25715
+ prior_done_ts = [
25716
+ float(t.get("completed_at", 0.0) or 0.0)
25717
+ for t in todos
25718
+ if t.get("category") == "plan_step"
25719
+ and int(t.get("plan_step_index", 0) or 0) < step_idx
25720
+ and t.get("completed_at")
25721
+ ]
25722
+ todo["activated_at"] = (
25723
+ max(prior_done_ts)
25724
+ if prior_done_ts
25725
+ else (float(todo.get("created_at", 0.0) or 0.0) or float(now_ts()))
25726
+ )
25398
25727
  if cat == "setup" and (research_count > 0 or code_count > 0):
25399
25728
  todo.update(
25400
25729
  status="completed",
@@ -25446,11 +25775,14 @@ body{padding:18px}
25446
25775
  if t.get("category") == "plan_step"
25447
25776
  ):
25448
25777
  todo["status"] = "in_progress"
25778
+ todo["activated_at"] = float(now_ts())
25449
25779
 
25450
25780
  if not any(t.get("status") == "in_progress" for t in todos):
25451
25781
  for t in todos:
25452
25782
  if t.get("status") == "pending":
25453
25783
  t["status"] = "in_progress"
25784
+ if not t.get("activated_at"):
25785
+ t["activated_at"] = float(now_ts())
25454
25786
  break
25455
25787
 
25456
25788
  bb["project_todos"] = todos
@@ -25516,6 +25848,11 @@ body{padding:18px}
25516
25848
  break
25517
25849
  if not current:
25518
25850
  return False
25851
+ # Fix 5c: Reset TodoWrite loop counter on step advancement
25852
+ try:
25853
+ self._todowrite_step_counter.clear()
25854
+ except Exception:
25855
+ pass
25519
25856
  current["status"] = "completed"
25520
25857
  current["completed_at"] = float(now_ts())
25521
25858
  current["completed_by"] = actor
@@ -25530,6 +25867,7 @@ body{padding:18px}
25530
25867
  break
25531
25868
  if next_step:
25532
25869
  next_step["status"] = "in_progress"
25870
+ next_step["activated_at"] = float(now_ts())
25533
25871
  step_idx = int(next_step.get("plan_step_index", 0) or 0) + 1
25534
25872
  total = int(bb.get("plan_step_total", len(todos)) or len(todos))
25535
25873
  self._emit("status", {
@@ -25635,27 +25973,64 @@ body{padding:18px}
25635
25973
  isinstance(r, dict) and r.get("ok", False) and str(r.get("name", "")) == "bash"
25636
25974
  for r in results
25637
25975
  )
25638
- validation_ok = self._tool_results_have_validation_evidence(current, results)
25976
+ validation_ok_current = self._tool_results_have_validation_evidence(current, results)
25977
+ validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
25978
+ validation_ok = validation_ok_current or validation_ok_blackboard
25979
+ bb_sig = self._plan_step_blackboard_signals(current, bb)
25639
25980
  phase_evidence = False
25640
25981
  if phase in ("research", "design") and validation_ok:
25641
25982
  phase_evidence = True
25642
- elif phase == "implement" and wrote_files and validation_ok:
25983
+ elif phase == "implement" and (
25984
+ (wrote_files and validation_ok_current)
25985
+ or (bb_sig["has_write"] and validation_ok_blackboard)
25986
+ ):
25643
25987
  phase_evidence = True
25644
- elif phase in ("test", "review") and ran_bash_ok and validation_ok:
25988
+ elif phase in ("test", "review") and (
25989
+ (ran_bash_ok and validation_ok_current)
25990
+ or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
25991
+ ):
25645
25992
  phase_evidence = True
25993
+ todo_progress_signal = any(
25994
+ isinstance(r, dict) and r.get("ok", False)
25995
+ and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
25996
+ for r in results
25997
+ )
25646
25998
  # Advance when:
25647
25999
  # - Manager requested AND worker produced output, OR
25648
26000
  # - All subtasks completed AND worker produced output, OR
25649
- # - Phase heuristics confirm (write+bash for implement)
25650
- has_strong_evidence = validation_ok and worker_produced_output and (
25651
- manager_requested or subtasks_all_done or phase_evidence
25652
- )
26001
+ # - Phase heuristics confirm BUT ONLY if no incomplete subtasks exist
26002
+ # - Fix 3: All subtasks completed + accumulated step evidence (covers TodoWrite-only turns)
26003
+ # CRITICAL: When subtasks exist, phase_evidence alone CANNOT bypass subtask completion.
26004
+ _has_subtasks = bool(self._active_plan_worker_todo_rows(
26005
+ str(current.get("id", "") or ""), role=""
26006
+ ))
26007
+ _phase_gate = phase_evidence and (subtasks_all_done or not _has_subtasks)
26008
+ accumulated_evidence_path = (
26009
+ subtasks_all_done
26010
+ and todo_progress_signal
26011
+ and self._step_has_accumulated_evidence(current, bb)
26012
+ )
26013
+ has_strong_evidence = (
26014
+ validation_ok and (
26015
+ (
26016
+ worker_produced_output
26017
+ and (manager_requested or subtasks_all_done or _phase_gate)
26018
+ )
26019
+ or (
26020
+ todo_progress_signal
26021
+ and subtasks_all_done
26022
+ and validation_ok_blackboard
26023
+ )
26024
+ )
26025
+ ) or accumulated_evidence_path
25653
26026
  if has_strong_evidence:
25654
26027
  evidence = self._collect_step_evidence(current, worker_step)
25655
26028
  self._advance_plan_step(
25656
26029
  evidence=evidence,
25657
26030
  actor=str(route.get("target", "developer") or "developer"),
25658
26031
  )
26032
+ else:
26033
+ self._inject_rework_if_needed(current, worker_step)
25659
26034
 
25660
26035
  def _worker_step_has_evidence(self, step: dict) -> bool:
25661
26036
  """Check if worker step produced concrete tool outputs."""
@@ -25671,7 +26046,8 @@ body{padding:18px}
25671
26046
 
25672
26047
  def _step_subtasks_all_completed(self, plan_step: dict) -> bool:
25673
26048
  """Check if all worker subtasks linked to this plan step are completed.
25674
- Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking."""
26049
+ Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
26050
+ Fix 6: Also excludes 'next-step intent' items that were added alongside completed items."""
25675
26051
  step_id = str(plan_step.get("id", "") or "")
25676
26052
  if not step_id:
25677
26053
  return False
@@ -25711,7 +26087,293 @@ body{padding:18px}
25711
26087
  relevant.append(r)
25712
26088
  if relevant:
25713
26089
  worker_items = relevant
25714
- return all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
26090
+ # Fix 6: Exclude "next-step intent" pending items when all other items are completed.
26091
+ # When the worker completes step N and creates step N+1 subtasks in the same TodoWrite call,
26092
+ # the new pending items get parent_step_id of step N, blocking its advancement.
26093
+ completed_items = [r for r in worker_items if str(r.get("status", "")).lower() == "completed"]
26094
+ pending_items = [r for r in worker_items if str(r.get("status", "")).lower() != "completed"]
26095
+ if completed_items and pending_items:
26096
+ # Check if pending items are content-wise duplicates of completed items
26097
+ # (indicating the worker re-sent the same items but some got stuck as pending)
26098
+ completed_content = {
26099
+ normalize_work_text(str(r.get("content", ""))).strip().lower()
26100
+ for r in completed_items
26101
+ if str(r.get("content", "") or "").strip()
26102
+ }
26103
+ truly_new_pending = [
26104
+ r for r in pending_items
26105
+ if normalize_work_text(str(r.get("content", ""))).strip().lower() not in completed_content
26106
+ ]
26107
+ # If all pending items are duplicates of completed items, they don't block
26108
+ if not truly_new_pending:
26109
+ worker_items = completed_items
26110
+ # If there are truly new pending items but all original items are done,
26111
+ # check if the new items match future plan step content
26112
+ elif truly_new_pending and len(completed_items) >= 2:
26113
+ bb = self._ensure_blackboard()
26114
+ future_step_content = set()
26115
+ found_current = False
26116
+ for t in bb.get("project_todos", []):
26117
+ if not isinstance(t, dict) or t.get("category") != "plan_step":
26118
+ continue
26119
+ if str(t.get("id", "") or "") == step_id:
26120
+ found_current = True
26121
+ continue
26122
+ if found_current:
26123
+ fc = str(t.get("full_content", "") or t.get("content", "") or "").strip().lower()
26124
+ future_step_content.add(fc)
26125
+ for line in fc.split("\n"):
26126
+ sl = line.strip().lower()
26127
+ if sl:
26128
+ future_step_content.add(sl)
26129
+ if future_step_content:
26130
+ _still_blocking = []
26131
+ for pi in truly_new_pending:
26132
+ pc = normalize_work_text(str(pi.get("content", ""))).strip().lower()
26133
+ # Check if this pending item's content appears in any future step
26134
+ is_future = any(pc in fsc or fsc in pc for fsc in future_step_content if len(fsc) > 4)
26135
+ if not is_future:
26136
+ _still_blocking.append(pi)
26137
+ if not _still_blocking:
26138
+ worker_items = completed_items
26139
+ all_marked_done = all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
26140
+ if not all_marked_done:
26141
+ return False
26142
+ # Acceptance verification: check that each "completed" subtask has real evidence
26143
+ # Don't just trust the model's TodoWrite status — verify against accumulated tool outputs
26144
+ if worker_items:
26145
+ bb = self._ensure_blackboard()
26146
+ unverified = self._verify_subtasks_acceptance(worker_items, step_id, bb)
26147
+ if unverified:
26148
+ return False
26149
+ return True
26150
+
26151
+ def _verify_subtasks_acceptance(self, subtasks: list[dict], step_id: str, bb: dict) -> list[str]:
26152
+ """Verify each completed subtask has real evidence. Returns list of unverified subtask descriptions.
26153
+ Checks step_files and execution_logs against what each subtask's content implies."""
26154
+ import re
26155
+ # Gather accumulated evidence for this step
26156
+ step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
26157
+ step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
26158
+ written_paths = set()
26159
+ for entry in step_entries:
26160
+ if isinstance(entry, dict) and str(entry.get("op", "")) in ("write_file", "edit_file"):
26161
+ written_paths.add(str(entry.get("path", "") or "").strip().lower())
26162
+ # Gather bash execution evidence
26163
+ exec_logs = bb.get("execution_logs", [])
26164
+ if not isinstance(exec_logs, list):
26165
+ exec_logs = []
26166
+ bash_outputs_lower = []
26167
+ for log in exec_logs[-30:]:
26168
+ if isinstance(log, dict):
26169
+ c = str(log.get("content", "") or "").lower()
26170
+ if c:
26171
+ bash_outputs_lower.append(c)
26172
+ all_bash_text = " ".join(bash_outputs_lower)
26173
+ negative_hints = ("error:", "failed", "failure", "traceback", "fatal", "not found",
26174
+ "no such file", "command not found", "permission denied")
26175
+ has_bash_failure = any(neg in all_bash_text for neg in negative_hints)
26176
+ # Define acceptance patterns from subtask content
26177
+ _file_create_re = re.compile(
26178
+ r"(?:创建|生成|编写|写入|create|write|generate|implement|scaffold)\s+(.+?)(?:\s|$|,|。|,|\()",
26179
+ re.IGNORECASE,
26180
+ )
26181
+ _run_test_kw = ("运行", "测试", "验证", "test", "pytest", "verify", "validate",
26182
+ "run", "check", "确认", "检查")
26183
+ _compile_kw = ("编译", "构建", "compile", "build", "cmake", "make", "gcc", "gfortran")
26184
+ _install_kw = ("安装", "install", "pip install", "npm install", "apt install")
26185
+ unverified: list[str] = []
26186
+ for st in subtasks:
26187
+ content = str(st.get("content", "") or "").strip()
26188
+ if not content:
26189
+ continue
26190
+ content_lower = content.lower()
26191
+ # Rule 1: If subtask mentions creating a file, check it was actually written
26192
+ m = _file_create_re.search(content)
26193
+ if m:
26194
+ target = m.group(1).strip().strip("\"'`").lower()
26195
+ # Extract just filename from path-like strings
26196
+ if "/" in target:
26197
+ target_parts = [p for p in target.split("/") if p.strip()]
26198
+ target_name = target_parts[-1] if target_parts else target
26199
+ else:
26200
+ target_name = target
26201
+ if target_name and len(target_name) > 2:
26202
+ found = any(target_name in wp for wp in written_paths)
26203
+ if not found:
26204
+ unverified.append(f"file not created: {target_name}")
26205
+ continue
26206
+ # Rule 2: If subtask mentions testing/running/verifying, check bash was executed
26207
+ if any(kw in content_lower for kw in _run_test_kw):
26208
+ if not bash_outputs_lower:
26209
+ unverified.append(f"no bash execution for: {trim(content, 60)}")
26210
+ continue
26211
+ # Check for test failures in recent bash output
26212
+ if has_bash_failure and any(kw in content_lower for kw in ("test", "测试", "pytest")):
26213
+ # Only block if failure keywords appear near test-related content
26214
+ test_related_failures = any(
26215
+ ("test" in line or "pytest" in line or "assert" in line)
26216
+ and any(neg in line for neg in negative_hints)
26217
+ for line in bash_outputs_lower[-10:]
26218
+ )
26219
+ if test_related_failures:
26220
+ unverified.append(f"test failures detected for: {trim(content, 60)}")
26221
+ continue
26222
+ # Rule 3: If subtask mentions compiling/building, check bash + no compile errors
26223
+ if any(kw in content_lower for kw in _compile_kw):
26224
+ if not bash_outputs_lower:
26225
+ unverified.append(f"no bash execution for compile: {trim(content, 60)}")
26226
+ continue
26227
+ compile_failures = any(
26228
+ any(neg in line for neg in ("error:", "failed", "failure"))
26229
+ and any(kw in line for kw in ("compil", "build", "cmake", "make", "link"))
26230
+ for line in bash_outputs_lower[-10:]
26231
+ )
26232
+ if compile_failures:
26233
+ unverified.append(f"compile failures for: {trim(content, 60)}")
26234
+ continue
26235
+ # Rule 4: If subtask mentions installing, check bash was run
26236
+ if any(kw in content_lower for kw in _install_kw):
26237
+ if not bash_outputs_lower:
26238
+ unverified.append(f"no bash for install: {trim(content, 60)}")
26239
+ continue
26240
+ # If none of the specific rules matched, the subtask is considered verified
26241
+ # (generic subtasks like "design" or "analyze" don't need tool evidence)
26242
+ return unverified
26243
+
26244
+ def _inject_rework_if_needed(self, plan_step: dict, worker_step: dict):
26245
+ """When subtasks are marked completed but acceptance fails, inject rework instruction.
26246
+ Prevents the system from getting stuck or silently skipping unfinished work."""
26247
+ try:
26248
+ step_id = str(plan_step.get("id", "") or "")
26249
+ if not step_id:
26250
+ return
26251
+ rows = self._active_plan_worker_todo_rows(step_id, role="")
26252
+ completed_rows = [r for r in rows if str(r.get("status", "")).lower() == "completed"]
26253
+ if not completed_rows:
26254
+ return
26255
+ bb = self._ensure_blackboard()
26256
+ failures = self._verify_subtasks_acceptance(completed_rows, step_id, bb)
26257
+ if not failures:
26258
+ return
26259
+ # LLM-based acceptance check: semantic analysis over heuristics
26260
+ llm_verdict = self._llm_verify_subtask_acceptance(plan_step, completed_rows, bb)
26261
+ if llm_verdict.get("all_passed", False):
26262
+ return
26263
+ rework_items = llm_verdict.get("rework_items", failures)
26264
+ if not rework_items:
26265
+ return
26266
+ # Rate-limit rework injection
26267
+ _rework_key = f"_rework_injected_{step_id}"
26268
+ _last_rework = getattr(self, _rework_key, 0.0)
26269
+ if float(now_ts()) - float(_last_rework) < 30.0:
26270
+ return
26271
+ setattr(self, _rework_key, float(now_ts()))
26272
+ step_label = trim(str(plan_step.get("content", "") or ""), 80)
26273
+ rework_text = (
26274
+ f"<step-rework>\n"
26275
+ f"Step \"{step_label}\" acceptance check FAILED. "
26276
+ f"The following subtasks were marked completed but did not pass verification:\n"
26277
+ )
26278
+ for i, item in enumerate(rework_items[:5]):
26279
+ rework_text += f" {i+1}. {trim(str(item), 120)}\n"
26280
+ rework_text += (
26281
+ "\nACTION REQUIRED: Fix these issues NOW before the step can advance.\n"
26282
+ "- For missing files: create them with write_file\n"
26283
+ "- For failed tests/builds: run the command again and fix errors\n"
26284
+ "- For unverified installs: re-run the install command\n"
26285
+ "After fixing, update TodoWrite to reflect the corrected state.\n"
26286
+ "</step-rework>"
26287
+ )
26288
+ # Revert false "completed" status back to in_progress
26289
+ _snap = self.todo.snapshot()
26290
+ _modified = False
26291
+ for row in _snap:
26292
+ if not isinstance(row, dict):
26293
+ continue
26294
+ if str(row.get("parent_step_id", "") or "") != step_id:
26295
+ continue
26296
+ if str(row.get("status", "")).lower() != "completed":
26297
+ continue
26298
+ rc = str(row.get("content", "") or "").strip().lower()
26299
+ for fail in rework_items:
26300
+ fail_lower = str(fail).lower()
26301
+ if rc and (rc[:20] in fail_lower or any(w in fail_lower for w in rc.split()[:3] if len(w) > 3)):
26302
+ row["status"] = "in_progress"
26303
+ _modified = True
26304
+ break
26305
+ if _modified:
26306
+ try:
26307
+ self.todo.update(_snap)
26308
+ except Exception:
26309
+ pass
26310
+ target_roles: tuple[str, ...] = ()
26311
+ if self._is_multi_agent_mode():
26312
+ active_role = str(bb.get("active_agent", "") or "developer")
26313
+ if active_role:
26314
+ target_roles = (active_role,)
26315
+ self._append_plan_guidance_bubble(
26316
+ rework_text,
26317
+ target_roles=target_roles,
26318
+ summary=f"step rework: {len(rework_items)} items failed acceptance",
26319
+ )
26320
+ except Exception:
26321
+ pass
26322
+
26323
+ def _llm_verify_subtask_acceptance(self, plan_step: dict, completed_subtasks: list[dict], bb: dict) -> dict:
26324
+ """Use LLM semantic analysis to verify if subtasks are truly completed.
26325
+ Returns {"all_passed": bool, "rework_items": list[str]}."""
26326
+ try:
26327
+ step_id = str(plan_step.get("id", "") or "")
26328
+ step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
26329
+ step_entries = step_files_raw.get(step_id, []) if step_id else []
26330
+ files_summary = []
26331
+ for entry in (step_entries[-15:] if isinstance(step_entries, list) else []):
26332
+ if isinstance(entry, dict):
26333
+ files_summary.append(f"{entry.get('op','?')}: {entry.get('path','?')}")
26334
+ exec_logs = bb.get("execution_logs", [])
26335
+ recent_exec = []
26336
+ for log in (exec_logs[-8:] if isinstance(exec_logs, list) else []):
26337
+ if isinstance(log, dict):
26338
+ c = trim(str(log.get("content", "") or ""), 200)
26339
+ if c:
26340
+ recent_exec.append(c)
26341
+ subtask_list = "\n".join(
26342
+ f"- [{str(st.get('status','')).upper()}] {trim(str(st.get('content','') or ''), 120)}"
26343
+ for st in completed_subtasks[:8]
26344
+ )
26345
+ prompt = (
26346
+ "Analyze whether these subtasks are TRULY completed based on the evidence.\n\n"
26347
+ f"SUBTASKS:\n{subtask_list}\n\n"
26348
+ f"FILES CREATED/MODIFIED:\n{chr(10).join(files_summary[-10:]) or '(none)'}\n\n"
26349
+ f"RECENT EXECUTION OUTPUT:\n{chr(10).join(recent_exec[-5:]) or '(none)'}\n\n"
26350
+ "For each subtask, determine if it's genuinely done:\n"
26351
+ "- File creation tasks: was the file actually created?\n"
26352
+ "- Test/verify tasks: was a test/command actually run? Did it pass?\n"
26353
+ "- Build/compile tasks: was compilation attempted? Any errors?\n"
26354
+ "- Install tasks: was the install command run?\n\n"
26355
+ "Reply ONLY as JSON: {\"all_passed\": true/false, \"rework_items\": [\"description of what failed\"]}\n"
26356
+ "If all subtasks pass, return {\"all_passed\": true, \"rework_items\": []}"
26357
+ )
26358
+ resp = self.ollama.chat(
26359
+ [{"role": "user", "content": prompt}],
26360
+ system="You are a strict QA reviewer. Verify task completion against evidence. Reply ONLY valid JSON.",
26361
+ max_tokens=300,
26362
+ think=False,
26363
+ )
26364
+ import json
26365
+ text = str(resp.get("text", "") or "").strip()
26366
+ if "{" in text:
26367
+ json_str = text[text.index("{"):text.rindex("}") + 1]
26368
+ result = json.loads(json_str)
26369
+ if isinstance(result, dict):
26370
+ return {
26371
+ "all_passed": bool(result.get("all_passed", False)),
26372
+ "rework_items": list(result.get("rework_items", [])),
26373
+ }
26374
+ except Exception:
26375
+ pass
26376
+ return {"all_passed": False, "rework_items": []}
25715
26377
 
25716
26378
  def _collect_step_evidence(self, plan_step: dict, worker_step: dict) -> str:
25717
26379
  """Collect evidence summary from worker step for plan step completion."""
@@ -25735,6 +26397,10 @@ body{padding:18px}
25735
26397
  elif name in ("write_to_blackboard", "query_code_library", "query_knowledge_library"):
25736
26398
  out = self._tool_result_output_excerpt(r, 100)
25737
26399
  parts.append(f"{name}" + (f": {out}" if out else ""))
26400
+ if not parts:
26401
+ bb_evidence = self._collect_blackboard_step_evidence(plan_step)
26402
+ if bb_evidence:
26403
+ return bb_evidence
25738
26404
  return trim("; ".join(parts) or "post-execution evidence", 200)
25739
26405
 
25740
26406
  def _get_active_plan_step(self, board: dict | None = None) -> dict | None:
@@ -25842,10 +26508,13 @@ body{padding:18px}
25842
26508
 
25843
26509
  merged_by_identity: dict[str, dict] = {}
25844
26510
  ordered_identities: list[str] = []
26511
+ # Fix 2: Compute existing identities for next-step detection
26512
+ _existing_identities: set[str] = set()
25845
26513
  for row in target_rows:
25846
26514
  identity = self._plan_worker_todo_identity(row)
25847
26515
  if not identity:
25848
26516
  continue
26517
+ _existing_identities.add(identity)
25849
26518
  if identity not in merged_by_identity:
25850
26519
  merged_by_identity[identity] = dict(row)
25851
26520
  ordered_identities.append(identity)
@@ -25883,11 +26552,42 @@ body{padding:18px}
25883
26552
  merged.update(row)
25884
26553
  merged["owner"] = str(merged.get("owner", "") or role_key).strip().lower() or role_key
25885
26554
  merged["parent_step_id"] = trim(str(merged.get("parent_step_id", "") or step_id), 20) or step_id
26555
+ # Fix 2 support: Timestamp new items for next-step detection
26556
+ if identity not in _existing_identities and "created_at" not in merged:
26557
+ merged["created_at"] = float(now_ts())
26558
+ if str(merged.get("status", "")).lower() == "completed" and "updated_at" not in merged:
26559
+ merged["updated_at"] = float(now_ts())
25886
26560
  merged_by_identity[identity] = merged
25887
26561
  if identity not in ordered_identities:
25888
26562
  ordered_identities.append(identity)
25889
26563
 
25890
26564
  merged_target_rows = [merged_by_identity[i] for i in ordered_identities if i in merged_by_identity]
26565
+
26566
+ # Fix 4: Content-based deduplication to prevent duplicate subtasks from accumulating
26567
+ _seen_content: set[str] = set()
26568
+ _deduped_target: list[dict] = []
26569
+ for row in merged_target_rows:
26570
+ _ck = normalize_work_text(str(row.get("content", ""))).strip().lower()
26571
+ if _ck in _seen_content:
26572
+ continue
26573
+ _seen_content.add(_ck)
26574
+ _deduped_target.append(row)
26575
+ merged_target_rows = _deduped_target
26576
+
26577
+ # Fix 2: Detect "next-step intent" — if all existing items are completed,
26578
+ # new pending items that don't match existing identities are for the next step.
26579
+ # Remove their parent_step_id so they don't block current step advancement.
26580
+ _all_existing_done = (
26581
+ bool(target_rows) and
26582
+ all(str(r.get("status", "")).lower() == "completed" for r in target_rows)
26583
+ )
26584
+ if _all_existing_done:
26585
+ for row in merged_target_rows:
26586
+ _rid = self._plan_worker_todo_identity(row)
26587
+ if (_rid and _rid not in _existing_identities
26588
+ and str(row.get("status", "")).lower() != "completed"):
26589
+ row.pop("parent_step_id", None) # Not for current step
26590
+
25891
26591
  final_rows = preserved + passthrough_rows + merged_target_rows
25892
26592
  return self.todo.update(final_rows)
25893
26593
 
@@ -26292,21 +26992,46 @@ body{padding:18px}
26292
26992
  str(r.get("name", "")) == "bash" and r.get("ok", False)
26293
26993
  for r in tool_results
26294
26994
  )
26295
- validation_ok = self._tool_results_have_validation_evidence(current, tool_results)
26995
+ validation_ok_current = self._tool_results_have_validation_evidence(current, tool_results)
26996
+ validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
26997
+ validation_ok = validation_ok_current or validation_ok_blackboard
26998
+ bb_sig = self._plan_step_blackboard_signals(current, bb)
26296
26999
  # Auto-advance conditions:
26297
27000
  should_advance = False
26298
27001
  # Priority 1: Check if worker subtasks are all completed (most reliable signal)
26299
27002
  subtasks_done = self._step_subtasks_all_completed(current)
26300
27003
  if subtasks_done and validation_ok:
26301
27004
  should_advance = True
26302
- # Priority 2: Phase-based heuristics (require observable evidence, not just file creation)
26303
- if not should_advance:
26304
- if phase in ("research", "design") and validation_ok:
26305
- should_advance = True
26306
- elif phase == "implement" and wrote_files and validation_ok:
26307
- should_advance = True
26308
- elif phase in ("test", "review") and ran_bash_ok and validation_ok:
27005
+ # Fix 3 (single mode): Accumulated evidence path subtasks done + accumulated evidence
27006
+ # Covers TodoWrite-only turns where validation_ok_current is False
27007
+ if not should_advance and subtasks_done:
27008
+ todo_progress_signal = any(
27009
+ isinstance(r, dict) and r.get("ok", False)
27010
+ and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
27011
+ for r in tool_results
27012
+ )
27013
+ if todo_progress_signal and self._step_has_accumulated_evidence(current, bb):
26309
27014
  should_advance = True
27015
+ # Priority 2: Phase-based heuristics — BUT gate by subtask completion when subtasks exist
27016
+ # CRITICAL: A single write_file must NOT advance when 3+ subtasks remain
27017
+ if not should_advance:
27018
+ _has_subtasks_s = bool(self._active_plan_worker_todo_rows(
27019
+ str(current.get("id", "") or ""), role=""
27020
+ ))
27021
+ _can_use_phase_heuristic = subtasks_done or not _has_subtasks_s
27022
+ if _can_use_phase_heuristic:
27023
+ if phase in ("research", "design") and validation_ok:
27024
+ should_advance = True
27025
+ elif phase == "implement" and (
27026
+ (wrote_files and validation_ok_current)
27027
+ or (bb_sig["has_write"] and validation_ok_blackboard)
27028
+ ):
27029
+ should_advance = True
27030
+ elif phase in ("test", "review") and (
27031
+ (ran_bash_ok and validation_ok_current)
27032
+ or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
27033
+ ):
27034
+ should_advance = True
26310
27035
  # Also check if the agent explicitly mentioned step completion
26311
27036
  if not should_advance:
26312
27037
  # Check last assistant message for step completion signals
@@ -26327,6 +27052,7 @@ body{padding:18px}
26327
27052
  except Exception:
26328
27053
  pass
26329
27054
  else:
27055
+ self._inject_rework_if_needed(current, {"tool_results": tool_results})
26330
27056
  self._sync_todos_from_blackboard(reason="single-agent-round")
26331
27057
 
26332
27058
  def _todo_project_rows_from_blackboard(self, board: dict | None = None) -> list[dict]:
@@ -27923,6 +28649,7 @@ body{padding:18px}
27923
28649
  "IMPORTANT: Previous fix attempts FAILED. You MUST change your approach — "
27924
28650
  "do NOT repeat the same instruction. Include the exact error output in your delegation. "
27925
28651
  )
28652
+ html_hint = self._html_frontend_boost_instruction()
27926
28653
  # Loaded skills constraint for manager
27927
28654
  skills_constraint = self._loaded_skills_prompt_hint(for_role="manager")
27928
28655
  bb_skills = board.get("loaded_skills", {})
@@ -27957,6 +28684,7 @@ body{padding:18px}
27957
28684
  f"{todo_route_note}"
27958
28685
  f"{phase_hint}"
27959
28686
  f"{failure_hint}"
28687
+ f"{html_hint}"
27960
28688
  f"{skills_constraint}"
27961
28689
  f"Level={level}, mode={mode}, progress={progress}, "
27962
28690
  f"budget={'unlimited' if int(budget) <= 0 else int(budget)}, "
@@ -29018,6 +29746,19 @@ body{padding:18px}
29018
29746
  seen.add(low_tail)
29019
29747
  keep_lines.append(tail)
29020
29748
  continue
29749
+ if low.startswith("tasks to complete:"):
29750
+ continue
29751
+ if re.match(r"^\d+(?:\.\d+)*[.)]\s+", s):
29752
+ continue
29753
+ if re.match(r"^[-*]\s+", s):
29754
+ continue
29755
+ if re.match(
29756
+ r"(?i)^(mkdir\s+-p|run:|create directories:|create project|create directory|initialize project|cmake\b|python\s+-m\s+venv\b|npx\b)",
29757
+ s,
29758
+ ):
29759
+ continue
29760
+ if re.match(r"^(创建|初始化|运行|目录结构|项目根目录结构)[::]?", s):
29761
+ continue
29021
29762
  norm = re.sub(r"\s+", " ", s).strip().lower()
29022
29763
  if norm and norm not in seen:
29023
29764
  seen.add(norm)
@@ -30490,6 +31231,7 @@ body{padding:18px}
30490
31231
  skills_block = self._skills_awareness_block(for_role=role_key)
30491
31232
  code_note = self._runtime_code_reference_prompt_block(max_chars=2600)
30492
31233
  engineering_note = self._engineering_execution_boost_instruction()
31234
+ html_note = self._html_frontend_boost_instruction()
30493
31235
  plan_todo_note = self._plan_todo_discipline_prompt(role=role_key)
30494
31236
  base = (
30495
31237
  f"You are {self._agent_display_name(role_key)} in a multi-agent coding system. "
@@ -30498,10 +31240,15 @@ body{padding:18px}
30498
31240
  f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
30499
31241
  f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS) or pptxgen.bundle.js (browser). "
30500
31242
  f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
31243
+ "IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
31244
+ "If an HTML deliverable needs any asset from js_lib, copy it into a task-local relative asset folder "
31245
+ "such as './js/' or './assets/vendor/' next to the deliverable, then reference it with a plain relative path. "
31246
+ "Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases in final exported HTML. "
30501
31247
  "Use blackboard for shared state, ask_colleague for inter-agent communication. "
30502
31248
  "Keep outputs concise and action-oriented. "
30503
31249
  f"{code_note + ' ' if code_note else ''}"
30504
31250
  f"{engineering_note + ' ' if engineering_note else ''}"
31251
+ f"{html_note + ' ' if html_note else ''}"
30505
31252
  f"{_detect_os_shell_instruction()} "
30506
31253
  f"{model_language_instruction(self.ui_language)} "
30507
31254
  )
@@ -30557,10 +31304,6 @@ body{padding:18px}
30557
31304
  "The skill's workflow, tools, and file structure OVERRIDE the plan's implementation "
30558
31305
  "approach — if the plan says 'use python-pptx' but the skill says 'use PptxGenJS', "
30559
31306
  "use PptxGenJS. The skill defines HOW to implement; the plan defines WHAT to do. "
30560
- "AUTONOMOUS SKILL LOADING: When starting a coding, debugging, or architecture task, "
30561
- "call list_skills to discover available skills, then load_skill to activate the most relevant ones. "
30562
- "Load skills BEFORE you start working, not after you're stuck. "
30563
- "Already-loaded skills appear as <loaded-skill> messages in your context — use them directly without reloading. "
30564
31307
  "TODO TRACKING (mandatory): "
30565
31308
  "When a plan step is active, follow the current todo subtask order instead of inventing a parallel path. "
30566
31309
  "After completing ONE subtask, call TodoWrite immediately — mark that subtask as 'completed' and move the next one to 'in_progress' before doing more work. "
@@ -30650,29 +31393,71 @@ body{padding:18px}
30650
31393
  )
30651
31394
 
30652
31395
  def _todo_write_rescue(self, args: dict) -> str:
31396
+ """Rescue todo writer — accepts both strings and dicts, auto-normalizes.
31397
+ FIXED: Now preserves status from incoming items (especially 'completed')
31398
+ instead of resetting everything to 'pending'."""
30653
31399
  raw_items = args.get("items", [])
30654
31400
  if not isinstance(raw_items, list) or not raw_items:
30655
31401
  raise ValueError("items must be a non-empty array")
30656
- limited = raw_items[:7]
31402
+ limited = raw_items[:12] # Allow more items (was 7) — plans can have 5+ subtasks
30657
31403
  active_step = self._get_active_plan_step()
30658
31404
  active_step_id = trim(str((active_step or {}).get("id", "") or ""), 20)
30659
31405
  owner_hint = self._current_plan_worker_owner()
30660
31406
  clean_items = []
31407
+ _status_alias = {
31408
+ "todo": "pending", "doing": "in_progress", "inprogress": "in_progress",
31409
+ "in-progress": "in_progress", "done": "completed", "finish": "completed",
31410
+ "finished": "completed",
31411
+ }
30661
31412
  for idx, item in enumerate(limited):
30662
31413
  if isinstance(item, dict):
30663
31414
  content = str(item.get("content", item.get("text", item.get("title", "")))).strip()
30664
31415
  owner = str(item.get("owner", "") or owner_hint).strip().lower()
30665
31416
  parent_step_id = trim(str(item.get("parent_step_id", "") or active_step_id), 20)
31417
+ # Preserve status from incoming dict (critical for subtask state tracking)
31418
+ raw_status = str(item.get("status", item.get("state", "pending"))).strip().lower()
31419
+ status = _status_alias.get(raw_status, raw_status)
31420
+ if status not in {"pending", "in_progress", "completed"}:
31421
+ status = "pending"
30666
31422
  else:
30667
31423
  content = str(item).strip()
30668
31424
  owner = owner_hint
30669
31425
  parent_step_id = active_step_id
31426
+ # Parse status from string prefix markers:
31427
+ # "✅ task" / "[x] task" / "[done] task" / "[completed] task" → completed
31428
+ # "▶ task" / "[>] task" / "[doing] task" / "[in_progress] task" → in_progress
31429
+ # "⬜ task" / "[ ] task" / "[pending] task" / "[todo] task" → pending
31430
+ import re as _re_status
31431
+ _prefix_m = _re_status.match(
31432
+ r'^(?:'
31433
+ r'[\u2705\u2611]\s*' # ✅ ☑
31434
+ r'|\[x\]\s*|\[done\]\s*|\[completed\]\s*|\[finish(?:ed)?\]\s*'
31435
+ r'|\(done\)\s*|\(completed\)\s*|\(x\)\s*'
31436
+ r')',
31437
+ content, _re_status.IGNORECASE
31438
+ )
31439
+ _prefix_ip = _re_status.match(
31440
+ r'^(?:'
31441
+ r'[\u25b6\u25ba\u27a1]\s*' # ▶ ► ➡
31442
+ r'|\[>\]\s*|\[doing\]\s*|\[in.?progress\]\s*'
31443
+ r'|\(doing\)\s*|\(in.?progress\)\s*'
31444
+ r')',
31445
+ content, _re_status.IGNORECASE
31446
+ )
31447
+ if _prefix_m:
31448
+ status = "completed"
31449
+ content = content[_prefix_m.end():].strip()
31450
+ elif _prefix_ip:
31451
+ status = "in_progress"
31452
+ content = content[_prefix_ip.end():].strip()
31453
+ else:
31454
+ status = "pending"
30670
31455
  content = normalize_work_text(content) or content
30671
31456
  if not content:
30672
31457
  continue
30673
31458
  row = {
30674
31459
  "content": content,
30675
- "status": "pending",
31460
+ "status": status,
30676
31461
  }
30677
31462
  if owner in {"developer", "explorer", "reviewer"}:
30678
31463
  row["owner"] = owner
@@ -30681,10 +31466,18 @@ body{padding:18px}
30681
31466
  clean_items.append(row)
30682
31467
  if not clean_items:
30683
31468
  raise ValueError("no valid todo item text")
30684
- in_progress_index = int(args.get("in_progress_index", 0) or 0)
30685
- if in_progress_index < 0 or in_progress_index >= len(clean_items):
30686
- in_progress_index = 0
30687
- clean_items[in_progress_index]["status"] = "in_progress"
31469
+ # Only apply in_progress_index if NO items already have in_progress status
31470
+ has_in_progress = any(r["status"] == "in_progress" for r in clean_items)
31471
+ if not has_in_progress:
31472
+ in_progress_index = int(args.get("in_progress_index", 0) or 0)
31473
+ if in_progress_index < 0 or in_progress_index >= len(clean_items):
31474
+ in_progress_index = 0
31475
+ # Only set in_progress on a pending item
31476
+ for i, r in enumerate(clean_items):
31477
+ if r["status"] == "pending":
31478
+ if i >= in_progress_index:
31479
+ r["status"] = "in_progress"
31480
+ break
30688
31481
  if active_step is not None:
30689
31482
  return self._merge_plan_worker_todo_items(clean_items, role=owner_hint)
30690
31483
  if self._is_multi_agent_mode() and owner_hint in {"developer", "explorer", "reviewer"}:
@@ -31337,11 +32130,21 @@ body{padding:18px}
31337
32130
 
31338
32131
  def _dispatch_tool_inner(self, name: str, args: dict, role_key: str = "") -> str:
31339
32132
  """Inner tool dispatcher — all tool logic lives here."""
32133
+ # Fix 5d: Reset TodoWrite loop counter on non-TodoWrite tool calls
32134
+ if name not in ("TodoWrite", "TodoWriteRescue") and hasattr(self, '_todowrite_step_counter'):
32135
+ try:
32136
+ _rst_step = self._get_active_plan_step()
32137
+ if isinstance(_rst_step, dict):
32138
+ _rst_id = str(_rst_step.get("id", "") or "")
32139
+ if _rst_id:
32140
+ self._todowrite_step_counter.pop(_rst_id, None)
32141
+ except Exception:
32142
+ pass
31340
32143
  if name == "bash":
31341
32144
  guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), self.files_root)
31342
32145
  if guard_error:
31343
32146
  return guard_error
31344
- meta = self._run_shell_meta(args["command"], self.files_root, 120)
32147
+ meta = self._run_shell_meta(args["command"], self.files_root, self._shell_command_timeout())
31345
32148
  self._emit(
31346
32149
  "command",
31347
32150
  {
@@ -31530,6 +32333,50 @@ body{padding:18px}
31530
32333
  result = self._merge_owner_scoped_todo_items(items, role=str(role_key))
31531
32334
  else:
31532
32335
  result = self.todo.update(args["items"])
32336
+ # Fix 1: Auto-advance plan step when all subtasks are completed
32337
+ # This handles the case where the worker's last turn only calls TodoWrite
32338
+ # and _post_execution_plan_step_check would miss it due to no "real" tool evidence
32339
+ if has_plan_steps:
32340
+ try:
32341
+ _as = self._get_active_plan_step()
32342
+ if isinstance(_as, dict):
32343
+ _as_id = str(_as.get("id", "") or "")
32344
+ if _as_id and self._step_subtasks_all_completed(_as):
32345
+ _acc_ev = self._collect_accumulated_step_evidence(_as)
32346
+ if _acc_ev and _acc_ev != "accumulated-step-evidence":
32347
+ # Has real evidence — auto-advance
32348
+ self._advance_plan_step(
32349
+ evidence=_acc_ev or "subtasks-all-completed",
32350
+ actor=str(role_key or "developer"),
32351
+ )
32352
+ elif self._step_has_accumulated_evidence(_as, bb):
32353
+ self._advance_plan_step(
32354
+ evidence="subtasks-all-completed",
32355
+ actor=str(role_key or "developer"),
32356
+ )
32357
+ except Exception:
32358
+ pass
32359
+ # Fix 5b: TodoWrite loop detection — force-advance after 3 consecutive calls
32360
+ if has_plan_steps:
32361
+ try:
32362
+ _as5 = self._get_active_plan_step()
32363
+ if isinstance(_as5, dict):
32364
+ _as5_id = str(_as5.get("id", "") or "")
32365
+ if _as5_id:
32366
+ if not hasattr(self, '_todowrite_step_counter'):
32367
+ self._todowrite_step_counter = {}
32368
+ cnt = self._todowrite_step_counter.get(_as5_id, 0) + 1
32369
+ self._todowrite_step_counter[_as5_id] = cnt
32370
+ if (cnt >= 3
32371
+ and self._step_subtasks_all_completed(_as5)
32372
+ and self._step_has_accumulated_evidence(_as5, bb)):
32373
+ # Force advance — worker is stuck in a loop AND step has real evidence
32374
+ self._advance_plan_step(
32375
+ evidence="force-advance:todowrite-loop-detected",
32376
+ actor=str(role_key or "developer"),
32377
+ )
32378
+ except Exception:
32379
+ pass
31533
32380
  # Step completion skill recheck: if any item just got marked completed, re-evaluate skills
31534
32381
  # This fires in ALL modes (single/sync/plan) when developer writes todos
31535
32382
  try:
@@ -31891,7 +32738,7 @@ body{padding:18px}
31891
32738
  guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), wt_path)
31892
32739
  if guard_error:
31893
32740
  return guard_error
31894
- meta = self._run_shell_meta(args["command"], wt_path, 300)
32741
+ meta = self._run_shell_meta(args["command"], wt_path, self._shell_command_timeout())
31895
32742
  self._emit(
31896
32743
  "command",
31897
32744
  {
@@ -32868,7 +33715,7 @@ body{padding:18px}
32868
33715
  else:
32869
33716
  _repeat_delegation_count = 0
32870
33717
  _prev_delegation_hash = _cur_hash
32871
- if _repeat_delegation_count >= 15:
33718
+ if _repeat_delegation_count >= 3:
32872
33719
  self._emit("status", {"summary": f"manager stuck: repeated identical delegation x{_repeat_delegation_count + 1}; forcing advance"})
32873
33720
  _bb_stuck = self._ensure_blackboard()
32874
33721
  _stuck_step = next(
@@ -32902,13 +33749,6 @@ body{padding:18px}
32902
33749
  media_inputs_pool=media_inputs_pool,
32903
33750
  media_seen_ts_by_role=media_seen_ts_by_role,
32904
33751
  )
32905
- # Sync-mode skill auto-discovery: same mechanism as plan mode's step-completed trigger.
32906
- # Runs on early rounds for developer/explorer. Uses goal_sig dedup — no re-loading if already loaded.
32907
- if role in ("developer", "explorer") and rounds_used <= 2:
32908
- try:
32909
- self._refresh_loaded_skills_for_execution_focus(trigger=f"sync-worker-pre:{role}")
32910
- except Exception:
32911
- pass
32912
33752
  board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
32913
33753
  step = self._multi_agent_turn(
32914
33754
  role,
@@ -32918,49 +33758,6 @@ body{padding:18px}
32918
33758
  self._blackboard_update_from_worker_step(role, step)
32919
33759
  # Post-execution plan step advancement (replaces pre-execution advancement)
32920
33760
  self._post_execution_plan_step_check(route, step if isinstance(step, dict) else {})
32921
- # Sync-mode failure recovery: detect all-tools-failed and inject recovery hint + auto-load debugging skill
32922
- _step_dict = step if isinstance(step, dict) else {}
32923
- _step_results = _step_dict.get("tool_results", []) or []
32924
- if _step_results:
32925
- _sync_err_count = sum(1 for r in _step_results if isinstance(r, dict) and not r.get("ok", False))
32926
- _sync_ok_count = sum(1 for r in _step_results if isinstance(r, dict) and r.get("ok", False))
32927
- if _sync_err_count > 0 and _sync_ok_count == 0:
32928
- # All tool calls failed in this worker turn — inject recovery guidance
32929
- _failed_tools = [str(r.get("name", "")) for r in _step_results if isinstance(r, dict)][:4]
32930
- _err_outputs = " | ".join(
32931
- trim(str(r.get("output", "") or ""), 120)
32932
- for r in _step_results if isinstance(r, dict) and not r.get("ok", False)
32933
- )[:400]
32934
- self._append_agent_context_message(
32935
- role,
32936
- {
32937
- "role": "user",
32938
- "content": (
32939
- "<failure-recovery>"
32940
- f"All tool calls failed in this turn ({', '.join(_failed_tools)}). "
32941
- f"Errors: {_err_outputs}\n"
32942
- "Before retrying, STOP and diagnose:\n"
32943
- "1) If a debugging skill is available, call load_skill('systematic-debugging') and follow its workflow.\n"
32944
- "2) Read the EXACT error message — identify the root cause, not just the symptom.\n"
32945
- "3) Form ONE hypothesis about the cause before making any changes.\n"
32946
- "4) Apply ONE targeted fix, then verify with a test/build command.\n"
32947
- "5) If still blocked after 2 attempts, report the exact blocker to the user."
32948
- "</failure-recovery>"
32949
- ),
32950
- "ts": now_ts(),
32951
- "agent_role": role,
32952
- },
32953
- mirror_to_global=False,
32954
- )
32955
- # Auto-load systematic-debugging if failure involves code errors
32956
- _code_err_kw = ("bash", "compile", "syntax", "test", "build", "traceback", "error:")
32957
- if any(kw in _err_outputs.lower() for kw in _code_err_kw):
32958
- _bb_sk = self._ensure_blackboard().get("loaded_skills", {})
32959
- if isinstance(_bb_sk, dict) and "systematic-debugging" not in _bb_sk:
32960
- try:
32961
- self._load_skill_with_cache("systematic-debugging", load_source="auto:sync-worker-failure")
32962
- except Exception:
32963
- pass
32964
33761
  # Fix 6b: Pure sync no-plan — read worker-done signal and notify manager
32965
33762
  _bb_sync = self._ensure_blackboard()
32966
33763
  if _bb_sync.pop("sync_worker_round_done", False):
@@ -34085,6 +34882,7 @@ body{padding:18px}
34085
34882
  "category": "plan_step",
34086
34883
  "plan_step_index": i,
34087
34884
  "created_at": float(now_ts()),
34885
+ "activated_at": float(now_ts()) if not plan_todos else None,
34088
34886
  "completed_at": None,
34089
34887
  "completed_by": "",
34090
34888
  "evidence": "",
@@ -35007,13 +35805,6 @@ body{padding:18px}
35007
35805
  self.agent_round_index = int(self.agent_round_index) + 1
35008
35806
  self.current_phase = "model-call"
35009
35807
  self.current_tool_name = ""
35010
- # Single-mode skill auto-discovery: same as plan mode. Runs on first 2 rounds only.
35011
- # Uses goal_sig dedup — if skills already loaded for this goal, no-op.
35012
- if int(self.agent_round_index) <= 2:
35013
- try:
35014
- self._refresh_loaded_skills_for_execution_focus(trigger="single-worker-pre")
35015
- except Exception:
35016
- pass
35017
35808
  if level_budget > 0 and int(self.agent_round_index) > int(level_budget):
35018
35809
  force_single_tool_rounds = max(force_single_tool_rounds, 2)
35019
35810
  if not compact_budget_notified:
@@ -36525,6 +37316,7 @@ body{padding:18px}
36525
37316
  "live_run_notice_elapsed": round(float(self.live_run_notice_elapsed or 0.0), 1),
36526
37317
  "max_agent_rounds": int(self.max_agent_rounds),
36527
37318
  "max_run_seconds": int(self.max_run_seconds),
37319
+ "shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
36528
37320
  "auto_model_switch": bool(self.auto_model_switch),
36529
37321
  "arbiter_enabled": bool(self.arbiter_enabled),
36530
37322
  "arbiter_model": str(self.arbiter_model or ""),
@@ -36704,6 +37496,7 @@ class SessionManager:
36704
37496
  context_limit_locked: bool = False,
36705
37497
  max_rounds: int = MAX_AGENT_ROUNDS,
36706
37498
  max_run_seconds: int = MAX_RUN_SECONDS,
37499
+ shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
36707
37500
  auto_model_switch: bool = False,
36708
37501
  arbiter_enabled: bool = True,
36709
37502
  arbiter_model: str = "",
@@ -36749,6 +37542,12 @@ class SessionManager:
36749
37542
  maximum=MAX_RUN_TIMEOUT_SECONDS,
36750
37543
  fallback=MAX_RUN_SECONDS,
36751
37544
  )
37545
+ self.shell_command_timeout_seconds = normalize_timeout_seconds(
37546
+ shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
37547
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
37548
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
37549
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
37550
+ )
36752
37551
  self.auto_model_switch = bool(auto_model_switch)
36753
37552
  self.arbiter_enabled = bool(arbiter_enabled)
36754
37553
  self.arbiter_model = str(arbiter_model or "").strip()
@@ -37031,6 +37830,12 @@ class SessionManager:
37031
37830
  )
37032
37831
  sess.execution_mode = normalize_execution_mode(self.execution_mode, default=EXECUTION_MODE_SYNC)
37033
37832
  sess.single_advance_prompt_enhance = bool(self.single_advance_prompt_enhance)
37833
+ sess.shell_command_timeout_seconds = normalize_timeout_seconds(
37834
+ self.shell_command_timeout_seconds,
37835
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
37836
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
37837
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
37838
+ )
37034
37839
  sess._apply_active_profile()
37035
37840
  sess.updated_at = now_ts()
37036
37841
  sess._persist()
@@ -37091,6 +37896,7 @@ class SessionManager:
37091
37896
  context_limit_locked=self.context_limit_locked,
37092
37897
  max_rounds=self.max_rounds,
37093
37898
  max_run_seconds=self.max_run_seconds,
37899
+ shell_command_timeout_seconds=self.shell_command_timeout_seconds,
37094
37900
  auto_model_switch=self.auto_model_switch,
37095
37901
  arbiter_enabled=self.arbiter_enabled,
37096
37902
  arbiter_model=self.arbiter_model,
@@ -37140,6 +37946,7 @@ class SessionManager:
37140
37946
  context_limit_locked=self.context_limit_locked,
37141
37947
  max_rounds=self.max_rounds,
37142
37948
  max_run_seconds=self.max_run_seconds,
37949
+ shell_command_timeout_seconds=self.shell_command_timeout_seconds,
37143
37950
  auto_model_switch=self.auto_model_switch,
37144
37951
  arbiter_enabled=self.arbiter_enabled,
37145
37952
  arbiter_model=self.arbiter_model,
@@ -38761,7 +39568,7 @@ function feedSignature(snap){const feed=Array.isArray(snap?.conversation_feed)?s
38761
39568
  function boardsSignature(snap){return [snap?.running?1:0,snap?.agent_phase||'',Number(snap?.agent_round_index||0),Number(snap?.queued_user_inputs_count||0),Number(snap?.truncation_count||0),Number(snap?.live_truncation_attempts||0),Number(snap?.live_truncation_tokens||0),snap?.live_truncation_active?1:0,Number(snap?.context_tokens_estimate||0),Number(snap?.context_left_tokens||0),Number(snap?.context_left_percent||0),Number(snap?.render_bridge?.seq||0),(snap?.todos||[]).length,(snap?.tasks||[]).length,(snap?.activity||[]).length,(snap?.operations||[]).length,(snap?.uploads||[]).length].join('|')}
38762
39569
  function sessionsSignature(list){const rows=Array.isArray(list)?list:[];const sig=tailSig(rows,6,row=>`${String(row?.id||'')}:${row?.running?1:0}:${Number(row?.message_count||0)}:${Number(row?.updated_at||0)}`);const aid=String(S.activeId||'').trim();let activeSig='-';if(aid){const activeRow=rows.find(row=>String(row?.id||'')===aid);if(activeRow){activeSig=`${aid}:${activeRow?.running?1:0}:${Number(activeRow?.message_count||0)}:${Number(activeRow?.updated_at||0)}`}else{activeSig=`missing:${aid}`}}return `${rows.length}|active=${activeSig}|${sig}`}
38763
39570
  function _statInfinite(n){const v=Number(n);return(Number.isFinite(v)&&v>0)?String(v):'∞'}
38764
- function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
39571
+ function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'shell_command_timeout_seconds'))S.config.shell_command_timeout_seconds=cfg.shell_command_timeout_seconds;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
38765
39572
  function renderStats(){const sessions=S.sessions.length;const running=S.sessions.filter(x=>x.running).length;const msgs=S.sessions.reduce((n,x)=>n+x.message_count,0);const model=S.config?.model||'-';const sched=(S.config&&typeof S.config.scheduler==='object')?S.config.scheduler:{};const quota=(S.config&&typeof S.config.session_creation_limit==='object')?S.config.session_creation_limit:{};const runningTotal=Math.max(0,Number(sched?.running_total||0));const maxTasks=Number(sched?.max_user||0);const globalTasks=`${runningTotal}/${_statInfinite(maxTasks)}`;const dailySessions=(quota&&quota.enabled)?`${Math.max(0,Number(quota.used||0))}/${Math.max(0,Number(quota.limit||0))}`:'∞';const compact=[[t('stat_sessions'),sessions],[t('stat_running'),running],[t('stat_messages'),msgs],[t('stat_global_tasks'),globalTasks],[t('stat_daily_sessions'),dailySessions]].map(([k,v])=>`<div class=\"stat compact\"><div class=\"k\">${esc(k)}</div><div class=\"v\">${esc(v)}</div></div>`).join('');const modelHtml=`<div class=\"stat model\"><div class=\"k\">${esc(t('stat_model'))}</div><div class=\"v\">${esc(model)}</div></div>`;E('topStats').innerHTML=`<div class=\"top-stats-primary\">${compact}</div><div class=\"top-stats-model\">${modelHtml}</div>`}
38766
39573
  function renderSessions(){const html=S.sessions.map(s=>`<div class=\"session-item${s.id===S.activeId?' active':''}\" data-id=\"${esc(s.id)}\"><div><strong>${esc(s.title)}</strong></div><div class=\"mono\">${s.running?t('running'):t('idle')} · ${s.message_count} msgs</div></div>`).join('');setPanelHtml('sessionList',html||`<div class=\"mono\">${esc(t('no_sessions'))}</div>`);for(const el of document.querySelectorAll('#sessionList .session-item')){el.onclick=()=>selectSession(el.getAttribute('data-id'))}}
38767
39574
  function _syncActiveSessionSummaryFromSnapshot(){const sid=String(S.activeId||'').trim();const snap=S.snap;if(!sid||!snap)return false;const rows=Array.isArray(S.sessions)?S.sessions.slice():[];let idx=rows.findIndex(row=>String(row?.id||'')===sid);const running=!!snap?.running;let updatedAt=Number(snap?.updated_at||0);if(!Number.isFinite(updatedAt)||updatedAt<=0){updatedAt=(Date.now()/1000)}let msgCount=Number(snap?.message_count);if(!Number.isFinite(msgCount)||msgCount<0){const arr=Array.isArray(snap?.messages)?snap.messages:[];let cnt=0;for(const row of arr){if(String(row?.role||'').trim()==='tool')continue;cnt+=1}msgCount=cnt}msgCount=Math.max(0,Math.floor(Number(msgCount)||0));const title=String(snap?.title||'').trim();if(idx<0){rows.push({id:sid,title:title||sid,running:running,updated_at:updatedAt,message_count:msgCount});idx=rows.length-1}else{const cur=rows[idx]||{};const next={...cur};let changed=false;if(!!cur.running!==running){next.running=running;changed=true}if(Number(cur.message_count||0)!==msgCount){next.message_count=msgCount;changed=true}if(Number(cur.updated_at||0)!==updatedAt){next.updated_at=updatedAt;changed=true}if(title&&String(cur.title||'')!==title){next.title=title;changed=true}if(!changed)return false;rows[idx]=next}rows.sort((a,b)=>Number(b?.updated_at||0)-Number(a?.updated_at||0));S.sessions=rows;return true}
@@ -40222,8 +41029,7 @@ function _chatVirtBuildMessageNode(m){
40222
41029
  const pillsHtml=pills.map(x=>`<span class=\"manager-delegate-pill\">${esc(String(x))}</span>`).join('');
40223
41030
  const routeHtml=`<div class=\"manager-delegate-route\"><span class=\"agent-bus-pill manager\">${esc(t('role_manager'))}</span><span class=\"agent-bus-arrow\">→</span><span class=\"agent-bus-pill${targetRole?(' '+targetRole):''}\">${esc(targetLabel)}</span></div>`;
40224
41031
  const objectiveHtml=(objective&&instruction&&objective.toLowerCase()===instruction.toLowerCase())?'':(objective?`<div class=\"manager-delegate-line\"><span>${esc(t('event_objective'))}</span><div>${esc(objective)}</div></div>`:'');
40225
- const instructionKey=`${String(m._vk||'')}:manager-instruction`;
40226
- const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div class=\"msg-md\">${renderMarkdownCached(instruction,instructionKey)}</div></div>`:'';
41032
+ const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div>${esc(instruction)}</div></div>`:'';
40227
41033
  d.innerHTML=`${roleBadge}<div class=\"manager-delegate-card\"><div class=\"manager-delegate-head\">${esc(t('event_manager_delegate_title'))}</div>${routeHtml}<div class=\"manager-delegate-pills\">${pillsHtml}</div>${objectiveHtml}${instructionHtml}</div>`;
40228
41034
  return d;
40229
41035
  }
@@ -48574,6 +49380,7 @@ class AppContext:
48574
49380
  context_limit_locked: bool = False,
48575
49381
  max_rounds: int = MAX_AGENT_ROUNDS,
48576
49382
  max_run_seconds: int = MAX_RUN_SECONDS,
49383
+ shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
48577
49384
  auto_model_switch: bool = False,
48578
49385
  arbiter_enabled: bool = True,
48579
49386
  arbiter_model: str = "",
@@ -48594,7 +49401,7 @@ class AppContext:
48594
49401
  self.base_url = base_url
48595
49402
  self.model = model
48596
49403
  self.thinking = False
48597
- self.js_lib_root = offline_js_lib_root(SCRIPT_DIR)
49404
+ self.js_lib_root = offline_js_lib_root(self.workspace)
48598
49405
  self.offline_js_summary: dict = {}
48599
49406
  try:
48600
49407
  self.offline_js_summary = load_offline_js_lib_index(self.js_lib_root)
@@ -48617,6 +49424,12 @@ class AppContext:
48617
49424
  maximum=MAX_RUN_TIMEOUT_SECONDS,
48618
49425
  fallback=MAX_RUN_SECONDS,
48619
49426
  )
49427
+ self.shell_command_timeout_seconds = normalize_timeout_seconds(
49428
+ shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
49429
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
49430
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
49431
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
49432
+ )
48620
49433
  self.auto_model_switch = bool(auto_model_switch)
48621
49434
  self.arbiter_enabled = bool(arbiter_enabled)
48622
49435
  self.arbiter_model = str(arbiter_model or "").strip()
@@ -48785,6 +49598,7 @@ class AppContext:
48785
49598
  "show_upload_list": bool(getattr(self, "show_upload_list", False)),
48786
49599
  "ui_style": normalize_ui_style(getattr(self, "ui_style", DEFAULT_UI_STYLE)),
48787
49600
  "js_lib_download_enabled": bool(getattr(self, "js_lib_download_enabled", True)),
49601
+ "shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
48788
49602
  "daily_session_limit_per_ip": int(getattr(self, "daily_session_limit_per_ip", 0) or 0),
48789
49603
  "daily_session_reset_hour": int(getattr(self, "daily_session_reset_hour", 8) or 8),
48790
49604
  "validation": dict(self.web_ui_validation or {}),
@@ -50028,6 +50842,7 @@ class AppContext:
50028
50842
  self.context_limit_locked,
50029
50843
  self.max_rounds,
50030
50844
  self.max_run_seconds,
50845
+ self.shell_command_timeout_seconds,
50031
50846
  self.auto_model_switch,
50032
50847
  self.arbiter_enabled,
50033
50848
  self.arbiter_model,
@@ -51096,6 +51911,7 @@ class Handler(BaseHTTPRequestHandler):
51096
51911
  "download_js_lib_enabled": bool(getattr(self.app, "js_lib_download_enabled", True)),
51097
51912
  "request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
51098
51913
  "run_timeout": int(mgr.max_run_seconds),
51914
+ "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
51099
51915
  }
51100
51916
  )
51101
51917
  model_cat = mgr.model_catalog()
@@ -51142,6 +51958,7 @@ class Handler(BaseHTTPRequestHandler):
51142
51958
  "context_token_limit": int(mgr.context_token_limit),
51143
51959
  "context_limit_locked": bool(mgr.context_limit_locked),
51144
51960
  "run_timeout": int(mgr.max_run_seconds),
51961
+ "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
51145
51962
  "auto_model_switch": bool(mgr.auto_model_switch),
51146
51963
  "execution_mode": normalize_execution_mode(getattr(mgr, "execution_mode", EXECUTION_MODE_SYNC), default=EXECUTION_MODE_SYNC),
51147
51964
  "execution_mode_choices": list(EXECUTION_MODE_CHOICES),
@@ -51194,7 +52011,7 @@ class Handler(BaseHTTPRequestHandler):
51194
52011
  for hk, hv in probe_headers.items():
51195
52012
  if str(hk or "").strip() and str(hv or "").strip():
51196
52013
  req.add_header(str(hk), str(hv))
51197
- with urllib.request.urlopen(req, timeout=8) as resp:
52014
+ with urlopen(req, timeout=8) as resp:
51198
52015
  body_text = resp.read().decode("utf-8", errors="replace")
51199
52016
  reachable = True
51200
52017
  try:
@@ -51251,7 +52068,7 @@ class Handler(BaseHTTPRequestHandler):
51251
52068
  for hk, hv in probe_headers.items():
51252
52069
  if str(hk or "").strip() and str(hv or "").strip():
51253
52070
  base_req.add_header(str(hk), str(hv))
51254
- with urllib.request.urlopen(base_req, timeout=8):
52071
+ with urlopen(base_req, timeout=8):
51255
52072
  pass
51256
52073
  reachable = True
51257
52074
  except urllib.error.HTTPError as exc:
@@ -51901,6 +52718,7 @@ class SkillsHandler(BaseHTTPRequestHandler):
51901
52718
  "show_upload_list": bool(getattr(self.app, "show_upload_list", False)),
51902
52719
  "web_ui": web_ui_state,
51903
52720
  "run_timeout": int(mgr.max_run_seconds),
52721
+ "shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
51904
52722
  "request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
51905
52723
  }
51906
52724
  )
@@ -52332,6 +53150,25 @@ def main():
52332
53150
  f"(minimum {MIN_RUN_TIMEOUT_SECONDS}, model-active time excluded)"
52333
53151
  ),
52334
53152
  )
53153
+ parser.add_argument(
53154
+ "--shell_command_timeout",
53155
+ "--shell-command-timeout",
53156
+ "--bash_timeout",
53157
+ "--bash-timeout",
53158
+ "--command_timeout",
53159
+ "--command-timeout",
53160
+ dest="shell_command_timeout",
53161
+ default=None,
53162
+ type=int,
53163
+ help=(
53164
+ "Per-command shell/bash timeout in seconds "
53165
+ f"(default {DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS}; allowed "
53166
+ f"{MIN_SHELL_COMMAND_TIMEOUT_SECONDS}-{MAX_SHELL_COMMAND_TIMEOUT_SECONDS}). "
53167
+ "Independent from the global run timeout. Also configurable via --config keys "
53168
+ "shell_command_timeout / shell_timeout / bash_timeout / command_timeout and env "
53169
+ "AGENT_SHELL_COMMAND_TIMEOUT / AGENT_BASH_TIMEOUT / AGENT_COMMAND_TIMEOUT."
53170
+ ),
53171
+ )
52335
53172
  parser.add_argument(
52336
53173
  "--live_input_delay_write",
52337
53174
  default=LIVE_INPUT_DELAY_WRITE_ROUNDS,
@@ -52481,9 +53318,10 @@ def main():
52481
53318
  default="",
52482
53319
  help=(
52483
53320
  "LLM config source (URL or local file path). "
52484
- "Also reads startup keys like show_upload_list, download_js_lib and "
53321
+ "Also reads startup keys like show_upload_list, download_js_lib, shell_command_timeout and "
52485
53322
  "daily_session_limit (aliases: daily_sessions_per_ip / "
52486
- "max_daily_sessions_per_ip / session_daily_limit)."
53323
+ "max_daily_sessions_per_ip / session_daily_limit; shell aliases: "
53324
+ "shell_timeout / bash_timeout / command_timeout)."
52487
53325
  ),
52488
53326
  )
52489
53327
  parser.add_argument(
@@ -52618,6 +53456,7 @@ def main():
52618
53456
  arbiter_enabled=True,
52619
53457
  show_upload_list=None,
52620
53458
  download_js_lib=None,
53459
+ shell_command_timeout=None,
52621
53460
  )
52622
53461
  args = parser.parse_args()
52623
53462
  ctx_limit_locked = any(str(arg).split("=", 1)[0] == "--ctx_limit" for arg in sys.argv[1:])
@@ -52647,6 +53486,7 @@ def main():
52647
53486
  )
52648
53487
  resolved_show_upload_list = False
52649
53488
  resolved_daily_session_limit_per_ip = 0
53489
+ resolved_shell_command_timeout = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS
52650
53490
  external_config: dict = {}
52651
53491
  external_config_source = ""
52652
53492
  bootstrap_base_url = args.ollama_base_url
@@ -52673,6 +53513,14 @@ def main():
52673
53513
  external_daily_session_limit = extract_daily_session_limit_setting(external_config)
52674
53514
  if external_daily_session_limit is not None:
52675
53515
  resolved_daily_session_limit_per_ip = int(external_daily_session_limit)
53516
+ external_shell_command_timeout = extract_shell_command_timeout_setting(external_config)
53517
+ if external_shell_command_timeout is not None:
53518
+ resolved_shell_command_timeout = normalize_timeout_seconds(
53519
+ external_shell_command_timeout,
53520
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
53521
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
53522
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
53523
+ )
52676
53524
  print(f"[web-agent] external config loaded: {external_config_source}")
52677
53525
  except Exception as exc:
52678
53526
  print(f"[web-agent] invalid --config: {exc}")
@@ -52686,9 +53534,25 @@ def main():
52686
53534
  web_ui_daily_session_limit = extract_daily_session_limit_setting(web_ui_config)
52687
53535
  if web_ui_daily_session_limit is not None:
52688
53536
  resolved_daily_session_limit_per_ip = int(web_ui_daily_session_limit)
53537
+ web_ui_shell_command_timeout = extract_shell_command_timeout_setting(web_ui_config)
53538
+ if web_ui_shell_command_timeout is not None:
53539
+ resolved_shell_command_timeout = normalize_timeout_seconds(
53540
+ web_ui_shell_command_timeout,
53541
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
53542
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
53543
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
53544
+ )
52689
53545
  cli_daily_session_limit = getattr(args, "daily_session_limit_per_ip", None)
52690
53546
  if cli_daily_session_limit is not None:
52691
53547
  resolved_daily_session_limit_per_ip = max(0, int(cli_daily_session_limit or 0))
53548
+ cli_shell_command_timeout = getattr(args, "shell_command_timeout", None)
53549
+ if cli_shell_command_timeout is not None:
53550
+ resolved_shell_command_timeout = normalize_timeout_seconds(
53551
+ cli_shell_command_timeout,
53552
+ minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
53553
+ maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
53554
+ fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
53555
+ )
52692
53556
  raw_ui_style = str(getattr(args, "ui_style", "") or "").strip()
52693
53557
  if not raw_ui_style:
52694
53558
  raw_ui_style = str(extract_ui_style_setting(external_config) or "").strip()
@@ -52743,6 +53607,7 @@ def main():
52743
53607
  f"[web-agent] run_timeout adjusted {requested_run_timeout}->{resolved_run_timeout} "
52744
53608
  f"(allowed range {MIN_RUN_TIMEOUT_SECONDS}-{MAX_RUN_TIMEOUT_SECONDS})"
52745
53609
  )
53610
+ print(f"[web-agent] shell_command_timeout={int(resolved_shell_command_timeout)}s")
52746
53611
  requested_live_input_delay_write = int(args.live_input_delay_write if args.live_input_delay_write is not None else LIVE_INPUT_DELAY_WRITE_ROUNDS)
52747
53612
  resolved_live_input_delay_write = max(0, min(20, requested_live_input_delay_write))
52748
53613
  if resolved_live_input_delay_write != requested_live_input_delay_write:
@@ -52925,6 +53790,7 @@ def main():
52925
53790
  ctx_limit_locked,
52926
53791
  resolved_max_rounds,
52927
53792
  resolved_run_timeout,
53793
+ resolved_shell_command_timeout,
52928
53794
  resolved_auto_model_switch,
52929
53795
  resolved_arbiter_enabled,
52930
53796
  resolved_arbiter_model,