clouds-coder 2026.4.2__tar.gz → 2026.4.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/Clouds_Coder.py +973 -107
- {clouds_coder-2026.4.2/clouds_coder.egg-info → clouds_coder-2026.4.2.1}/PKG-INFO +2 -1
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1/clouds_coder.egg-info}/PKG-INFO +2 -1
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/clouds_coder.egg-info/requires.txt +1 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/pyproject.toml +9 -1
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/LICENSE +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/README.md +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/clouds_coder.egg-info/SOURCES.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/clouds_coder.egg-info/dependency_links.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/clouds_coder.egg-info/entry_points.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/clouds_coder.egg-info/top_level.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/setup.cfg +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.2.1}/tests/test_smoke.py +0 -0
|
@@ -25,6 +25,7 @@ import selectors
|
|
|
25
25
|
import signal
|
|
26
26
|
import shutil
|
|
27
27
|
import shlex
|
|
28
|
+
import ssl
|
|
28
29
|
import socket
|
|
29
30
|
import subprocess
|
|
30
31
|
import sys
|
|
@@ -44,15 +45,48 @@ from pathlib import Path, PurePosixPath
|
|
|
44
45
|
from urllib.error import HTTPError, URLError
|
|
45
46
|
from urllib.parse import parse_qs, unquote, urlparse
|
|
46
47
|
from urllib.request import Request, urlopen
|
|
48
|
+
try:
|
|
49
|
+
import certifi as _certifi
|
|
50
|
+
except Exception:
|
|
51
|
+
_certifi = None
|
|
47
52
|
try:
|
|
48
53
|
import yaml as _yaml
|
|
49
54
|
except Exception:
|
|
50
55
|
_yaml = None
|
|
56
|
+
_URL_OPEN_ORIGINAL = urlopen
|
|
57
|
+
_HTTP_SSL_CONTEXT = None
|
|
51
58
|
APP_VERSION = "0.1.1"
|
|
52
59
|
DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
|
|
53
60
|
DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
|
|
54
61
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
55
62
|
|
|
63
|
+
def _shared_http_ssl_context():
|
|
64
|
+
global _HTTP_SSL_CONTEXT
|
|
65
|
+
if _HTTP_SSL_CONTEXT is not None:
|
|
66
|
+
return _HTTP_SSL_CONTEXT
|
|
67
|
+
cafile = str(os.getenv("SSL_CERT_FILE", "") or "").strip()
|
|
68
|
+
if not cafile and _certifi is not None:
|
|
69
|
+
try:
|
|
70
|
+
cafile = str(_certifi.where() or "").strip()
|
|
71
|
+
except Exception:
|
|
72
|
+
cafile = ""
|
|
73
|
+
try:
|
|
74
|
+
ctx = ssl.create_default_context(cafile=cafile or None)
|
|
75
|
+
except Exception:
|
|
76
|
+
ctx = ssl.create_default_context()
|
|
77
|
+
_HTTP_SSL_CONTEXT = ctx
|
|
78
|
+
return ctx
|
|
79
|
+
|
|
80
|
+
def urlopen(url, *args, **kwargs):
|
|
81
|
+
if "context" not in kwargs:
|
|
82
|
+
target = getattr(url, "full_url", url)
|
|
83
|
+
if str(target or "").strip().lower().startswith("https://"):
|
|
84
|
+
try:
|
|
85
|
+
kwargs["context"] = _shared_http_ssl_context()
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
return _URL_OPEN_ORIGINAL(url, *args, **kwargs)
|
|
89
|
+
|
|
56
90
|
def _resolve_default_agent_workdir() -> Path:
|
|
57
91
|
raw = str(os.getenv("AGENT_WORKDIR", "") or "").strip()
|
|
58
92
|
if raw:
|
|
@@ -182,6 +216,23 @@ DEFAULT_TIMEOUT_SECONDS = max(
|
|
|
182
216
|
),
|
|
183
217
|
)
|
|
184
218
|
DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
|
|
219
|
+
MIN_SHELL_COMMAND_TIMEOUT_SECONDS = 10
|
|
220
|
+
MAX_SHELL_COMMAND_TIMEOUT_SECONDS = 86_400
|
|
221
|
+
DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS = max(
|
|
222
|
+
MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
223
|
+
min(
|
|
224
|
+
MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
225
|
+
int(
|
|
226
|
+
str(
|
|
227
|
+
os.getenv(
|
|
228
|
+
"AGENT_SHELL_COMMAND_TIMEOUT",
|
|
229
|
+
os.getenv("AGENT_BASH_TIMEOUT", os.getenv("AGENT_COMMAND_TIMEOUT", "240")),
|
|
230
|
+
)
|
|
231
|
+
or "240"
|
|
232
|
+
)
|
|
233
|
+
),
|
|
234
|
+
),
|
|
235
|
+
)
|
|
185
236
|
AUTO_CONTINUE_BUDGET_DEFAULT = 30
|
|
186
237
|
AGENT_MAX_OUTPUT_TOKENS = 16384
|
|
187
238
|
OLLAMA_THINKING_TOOL_BUFFER = 4096
|
|
@@ -2014,6 +2065,55 @@ def extract_daily_session_limit_setting(raw: object) -> int | None:
|
|
|
2014
2065
|
return None
|
|
2015
2066
|
|
|
2016
2067
|
|
|
2068
|
+
def extract_shell_command_timeout_setting(raw: object) -> int | None:
|
|
2069
|
+
"""Read shell/bash command timeout from config dict.
|
|
2070
|
+
|
|
2071
|
+
Accepted keys:
|
|
2072
|
+
- shell_command_timeout
|
|
2073
|
+
- shell_timeout
|
|
2074
|
+
- bash_timeout
|
|
2075
|
+
- command_timeout
|
|
2076
|
+
Sections searched: top-level, then 'startup' / 'runtime' / 'shell' / 'tools' / 'execution'.
|
|
2077
|
+
Returns a clamped positive integer, or None if no setting is present.
|
|
2078
|
+
"""
|
|
2079
|
+
if not isinstance(raw, dict):
|
|
2080
|
+
return None
|
|
2081
|
+
|
|
2082
|
+
def _parse_timeout(value: object) -> int | None:
|
|
2083
|
+
if value is None or isinstance(value, bool):
|
|
2084
|
+
return None
|
|
2085
|
+
try:
|
|
2086
|
+
text = str(value).strip()
|
|
2087
|
+
if not text:
|
|
2088
|
+
return None
|
|
2089
|
+
return normalize_timeout_seconds(
|
|
2090
|
+
text,
|
|
2091
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
2092
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
2093
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
2094
|
+
)
|
|
2095
|
+
except Exception:
|
|
2096
|
+
return None
|
|
2097
|
+
|
|
2098
|
+
keys = (
|
|
2099
|
+
"shell_command_timeout",
|
|
2100
|
+
"shell_timeout",
|
|
2101
|
+
"bash_timeout",
|
|
2102
|
+
"command_timeout",
|
|
2103
|
+
)
|
|
2104
|
+
for key in keys:
|
|
2105
|
+
if key in raw:
|
|
2106
|
+
return _parse_timeout(raw.get(key))
|
|
2107
|
+
for section_key in ("startup", "runtime", "shell", "tools", "execution"):
|
|
2108
|
+
section = raw.get(section_key)
|
|
2109
|
+
if not isinstance(section, dict):
|
|
2110
|
+
continue
|
|
2111
|
+
for key in keys:
|
|
2112
|
+
if key in section:
|
|
2113
|
+
return _parse_timeout(section.get(key))
|
|
2114
|
+
return None
|
|
2115
|
+
|
|
2116
|
+
|
|
2017
2117
|
class SessionCreationLimitExceeded(RuntimeError):
|
|
2018
2118
|
def __init__(self, status: dict):
|
|
2019
2119
|
self.status = dict(status or {})
|
|
@@ -5257,13 +5357,17 @@ class TodoManager:
|
|
|
5257
5357
|
elif isinstance(item, dict):
|
|
5258
5358
|
raw = item
|
|
5259
5359
|
else:
|
|
5260
|
-
|
|
5360
|
+
# Tolerant: convert to string instead of raising
|
|
5361
|
+
try:
|
|
5362
|
+
raw = {"content": str(item).strip(), "status": "pending"}
|
|
5363
|
+
except Exception:
|
|
5364
|
+
continue # Skip unparseable items
|
|
5261
5365
|
raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
|
|
5262
5366
|
content = normalize_work_text(raw_content)
|
|
5263
5367
|
if not content:
|
|
5264
5368
|
content = raw_content
|
|
5265
5369
|
if not content:
|
|
5266
|
-
|
|
5370
|
+
continue # Skip empty items instead of raising
|
|
5267
5371
|
raw_status = str(raw.get("status", raw.get("state", "pending"))).strip().lower()
|
|
5268
5372
|
status = status_alias.get(raw_status, raw_status or "pending")
|
|
5269
5373
|
if status not in {"pending", "in_progress", "completed"}:
|
|
@@ -7051,9 +7155,11 @@ Use this skill when:
|
|
|
7051
7155
|
6. Report rewritten count, copied files, and unresolved URLs.
|
|
7052
7156
|
|
|
7053
7157
|
## Rules
|
|
7158
|
+
- Treat `./js_lib` and `/js_lib/...` as workspace lookup locations only, not final browser-facing URLs.
|
|
7054
7159
|
- Keep `./js` per HTML location (do not hardcode global absolute paths).
|
|
7055
7160
|
- Keep file names deterministic and safe (`[A-Za-z0-9._-]`).
|
|
7056
7161
|
- Preserve existing relative local script paths if already offline-ready.
|
|
7162
|
+
- Final HTML must not point to `/js_lib/...`, `/assets/js_lib/...`, or other virtual asset aliases; copy first, then use plain relative paths.
|
|
7057
7163
|
|
|
7058
7164
|
## Output Contract
|
|
7059
7165
|
Return:
|
|
@@ -12420,12 +12526,12 @@ TOOLS = [
|
|
|
12420
12526
|
),
|
|
12421
12527
|
tool_def("write_file", "Write file content.", {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
|
|
12422
12528
|
tool_def("edit_file", "Edit a file by replacing first match.", {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, ["path", "old_text", "new_text"]),
|
|
12423
|
-
tool_def("TodoWrite", "Update todo list.", {"items": {"type": "array", "items": {
|
|
12529
|
+
tool_def("TodoWrite", "Update todo list. Items can be strings or objects with content/status/owner fields.", {"items": {"type": "array", "items": {}}}, ["items"]),
|
|
12424
12530
|
tool_def(
|
|
12425
12531
|
"TodoWriteRescue",
|
|
12426
|
-
"Fallback todo writer
|
|
12532
|
+
"Fallback todo writer. Accepts strings with status prefixes: '[x] task' or '✅ task' = completed, '[>] task' = in_progress, plain text = pending. Also accepts dicts with status field.",
|
|
12427
12533
|
{
|
|
12428
|
-
"items": {"type": "array", "items": {
|
|
12534
|
+
"items": {"type": "array", "items": {}},
|
|
12429
12535
|
"in_progress_index": {"type": "integer"},
|
|
12430
12536
|
},
|
|
12431
12537
|
["items"],
|
|
@@ -12694,6 +12800,7 @@ class SessionState:
|
|
|
12694
12800
|
context_limit_locked: bool = False,
|
|
12695
12801
|
max_rounds: int = MAX_AGENT_ROUNDS,
|
|
12696
12802
|
max_run_seconds: int = MAX_RUN_SECONDS,
|
|
12803
|
+
shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
12697
12804
|
auto_model_switch: bool = False,
|
|
12698
12805
|
arbiter_enabled: bool = True,
|
|
12699
12806
|
arbiter_model: str = "",
|
|
@@ -12822,6 +12929,7 @@ class SessionState:
|
|
|
12822
12929
|
self.runtime_complexity_floor = ""
|
|
12823
12930
|
self.runtime_task_level_floor = 0
|
|
12824
12931
|
self.runtime_task_level_ceiling = 0 # 0 = no ceiling; set from plan risk on approval
|
|
12932
|
+
self._todowrite_step_counter: dict[str, int] = {} # Fix 5: track consecutive TodoWrite per step for loop detection
|
|
12825
12933
|
self.runtime_scale_preference = "balanced"
|
|
12826
12934
|
self.runtime_direct_objective = ""
|
|
12827
12935
|
self.runtime_reclassify_goal = ""
|
|
@@ -12903,6 +13011,12 @@ class SessionState:
|
|
|
12903
13011
|
maximum=MAX_RUN_TIMEOUT_SECONDS,
|
|
12904
13012
|
fallback=MAX_RUN_SECONDS,
|
|
12905
13013
|
)
|
|
13014
|
+
self.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
13015
|
+
shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13016
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13017
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13018
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13019
|
+
)
|
|
12906
13020
|
self.truncation_count = 0
|
|
12907
13021
|
self.last_truncation_ts = 0.0
|
|
12908
13022
|
self.truncation_rescue_task_ids: list[int] = []
|
|
@@ -15000,7 +15114,7 @@ class SessionState:
|
|
|
15000
15114
|
pass
|
|
15001
15115
|
t = threading.Thread(target=_llm_match, daemon=True)
|
|
15002
15116
|
t.start()
|
|
15003
|
-
t.join(timeout=
|
|
15117
|
+
t.join(timeout=5.0)
|
|
15004
15118
|
if llm_result:
|
|
15005
15119
|
matched_names = llm_result
|
|
15006
15120
|
self._emit("status", {"summary": f"skill discovery (LLM task analysis): {matched_names} ({trigger})"})
|
|
@@ -15034,7 +15148,7 @@ class SessionState:
|
|
|
15034
15148
|
# --- Path 3: Deferred LLM pickup if still running ---
|
|
15035
15149
|
if not matched_names and t.is_alive():
|
|
15036
15150
|
def _deferred_llm_pickup():
|
|
15037
|
-
t.join(timeout=
|
|
15151
|
+
t.join(timeout=8.0)
|
|
15038
15152
|
if llm_result and not self._loaded_skill_rows():
|
|
15039
15153
|
for name_str in llm_result[:3]:
|
|
15040
15154
|
try:
|
|
@@ -15431,7 +15545,6 @@ class SessionState:
|
|
|
15431
15545
|
"ENGINEERING EXECUTION DISCIPLINE: "
|
|
15432
15546
|
"For coding, bug-fix, architecture, integration, and testing work, proactively use the skill system when a matching skill exists. "
|
|
15433
15547
|
"Do not wait for failure before calling list_skills/load_skill for debugging, API, frontend, parser, or recovery workflows. "
|
|
15434
|
-
"Already-loaded skills appear as <loaded-skill> messages — use them directly without reloading. "
|
|
15435
15548
|
"Use a root-cause-first loop: inspect the exact error or failing behavior, read the implicated file or path, form one concrete hypothesis, apply one bounded fix, then run at least one fix-and-verify cycle before declaring success. "
|
|
15436
15549
|
"If read_file or bash reports a missing path, empty folder, or mismatched filename, stop repeating the same lookup. "
|
|
15437
15550
|
"Reconcile the path against uploads, recent file paths, file explorer entries, and close workspace matches; then either open the closest candidate or create the intended target. "
|
|
@@ -15485,6 +15598,10 @@ class SessionState:
|
|
|
15485
15598
|
f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
|
|
15486
15599
|
f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS require) or pptxgen.bundle.js (browser). "
|
|
15487
15600
|
f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
|
|
15601
|
+
"IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
|
|
15602
|
+
"If an HTML file uses any asset from js_lib, copy that file into a task-local relative asset folder "
|
|
15603
|
+
"(for example './js/' or './assets/vendor/') next to the deliverable, then reference it with a plain relative path in HTML. "
|
|
15604
|
+
"Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases inside final exported HTML. "
|
|
15488
15605
|
f"Task level={runtime_level}, mode={runtime_mode}, "
|
|
15489
15606
|
f"budget={'unlimited' if budget <= 0 else budget}. "
|
|
15490
15607
|
f"Context limit ~{self.context_token_upper_bound} tokens. "
|
|
@@ -22181,11 +22298,19 @@ body{padding:18px}
|
|
|
22181
22298
|
str(meta.get("output") or meta.get("error") or "(no output)"),
|
|
22182
22299
|
cwd=cwd,
|
|
22183
22300
|
)
|
|
22184
|
-
|
|
22301
|
+
)
|
|
22185
22302
|
return meta
|
|
22186
22303
|
|
|
22304
|
+
def _shell_command_timeout(self) -> int:
|
|
22305
|
+
return normalize_timeout_seconds(
|
|
22306
|
+
getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
22307
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
22308
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
22309
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
22310
|
+
)
|
|
22311
|
+
|
|
22187
22312
|
def _run_bash(self, command: str) -> str:
|
|
22188
|
-
return self._run_shell_meta(command, self.files_root,
|
|
22313
|
+
return self._run_shell_meta(command, self.files_root, self._shell_command_timeout())["output"]
|
|
22189
22314
|
|
|
22190
22315
|
def _fuzzy_resolve_path(self, fp: Path) -> Path:
|
|
22191
22316
|
"""If fp doesn't exist, try stripping spaces from the filename to find a close match.
|
|
@@ -22883,10 +23008,10 @@ body{padding:18px}
|
|
|
22883
23008
|
"3) scaffold semantic HTML; "
|
|
22884
23009
|
"4) apply CSS tokens + responsive layout; "
|
|
22885
23010
|
"5) wire JS state/data interactions; "
|
|
22886
|
-
"6) localize external JS dependencies to ./js from ./js_lib; "
|
|
23011
|
+
"6) localize external JS dependencies to a task-local relative folder such as ./js from ./js_lib, and rewrite final HTML to plain relative paths; "
|
|
22887
23012
|
"7) run QA loop for desktop/mobile/a11y/performance and iterate. "
|
|
22888
23013
|
f"Offline JS libs available now: {libs_hint}. "
|
|
22889
|
-
"Final exported HTML should avoid unresolved CDN-only script src."
|
|
23014
|
+
"Final exported HTML should avoid unresolved CDN-only script src and must not keep '/js_lib/...' or '/assets/js_lib/...' virtual URLs."
|
|
22890
23015
|
)
|
|
22891
23016
|
|
|
22892
23017
|
def _contains_any_keyword(self, text: str, keywords: tuple[str, ...]) -> bool:
|
|
@@ -24369,6 +24494,7 @@ body{padding:18px}
|
|
|
24369
24494
|
"category": trim(str(pt.get("category", "") or ""), 40),
|
|
24370
24495
|
"plan_step_index": int(pt.get("plan_step_index", -1)) if pt.get("plan_step_index") is not None else -1,
|
|
24371
24496
|
"created_at": float(pt.get("created_at", 0.0) or 0.0),
|
|
24497
|
+
"activated_at": float(pt.get("activated_at", 0.0) or 0.0) if pt.get("activated_at") else None,
|
|
24372
24498
|
"completed_at": float(pt.get("completed_at", 0.0) or 0.0) if pt.get("completed_at") else None,
|
|
24373
24499
|
"completed_by": trim(str(pt.get("completed_by", "") or ""), 40),
|
|
24374
24500
|
"evidence": trim(str(pt.get("evidence", "") or ""), 200),
|
|
@@ -25363,6 +25489,195 @@ body{padding:18px}
|
|
|
25363
25489
|
return observed_signal or read_back or wrote_files
|
|
25364
25490
|
return wrote_files or read_back or knowledge_signal or observed_signal
|
|
25365
25491
|
|
|
25492
|
+
def _plan_step_activation_ts(self, plan_step: dict) -> float:
|
|
25493
|
+
if not isinstance(plan_step, dict):
|
|
25494
|
+
return 0.0
|
|
25495
|
+
try:
|
|
25496
|
+
activated = float(plan_step.get("activated_at", 0.0) or 0.0)
|
|
25497
|
+
except Exception:
|
|
25498
|
+
activated = 0.0
|
|
25499
|
+
if activated > 0:
|
|
25500
|
+
return activated
|
|
25501
|
+
try:
|
|
25502
|
+
return float(plan_step.get("created_at", 0.0) or 0.0)
|
|
25503
|
+
except Exception:
|
|
25504
|
+
return 0.0
|
|
25505
|
+
|
|
25506
|
+
def _plan_step_blackboard_signals(self, plan_step: dict, board: dict | None = None) -> dict:
|
|
25507
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
25508
|
+
step_id = trim(str((plan_step or {}).get("id", "") or ""), 20)
|
|
25509
|
+
since_ts = self._plan_step_activation_ts(plan_step)
|
|
25510
|
+
|
|
25511
|
+
def _rows_since(rows: object) -> list[dict]:
|
|
25512
|
+
out: list[dict] = []
|
|
25513
|
+
if not isinstance(rows, list):
|
|
25514
|
+
return out
|
|
25515
|
+
for row in rows:
|
|
25516
|
+
if not isinstance(row, dict):
|
|
25517
|
+
continue
|
|
25518
|
+
txt = trim(str(row.get("content", "") or "").strip(), 1200)
|
|
25519
|
+
if not txt:
|
|
25520
|
+
continue
|
|
25521
|
+
try:
|
|
25522
|
+
ts = float(row.get("ts", 0.0) or 0.0)
|
|
25523
|
+
except Exception:
|
|
25524
|
+
ts = 0.0
|
|
25525
|
+
if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
|
|
25526
|
+
continue
|
|
25527
|
+
out.append({"ts": ts, "content": txt, "actor": trim(str(row.get("actor", "") or ""), 40)})
|
|
25528
|
+
return out
|
|
25529
|
+
|
|
25530
|
+
def _recent_excerpt(rows: list[dict], max_chars: int = 120) -> str:
|
|
25531
|
+
if not rows:
|
|
25532
|
+
return ""
|
|
25533
|
+
return trim(str(rows[-1].get("content", "") or "").replace("\r\n", "\n"), max_chars)
|
|
25534
|
+
|
|
25535
|
+
negative_hints = ("error:", "failed", "failure", "traceback", "fatal error", "assertionerror", "exception")
|
|
25536
|
+
compile_hints = ("compiled successfully", "build successful", "build succeeded", "syntax ok", "lint passed", "no issues found", "0 errors", "编译成功")
|
|
25537
|
+
test_hints = ("test passed", "tests passed", "all tests passed", "0 failed", "100%", "ok", "success", "测试通过")
|
|
25538
|
+
|
|
25539
|
+
step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
|
|
25540
|
+
step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
|
|
25541
|
+
filtered_entries: list[dict] = []
|
|
25542
|
+
for entry in step_entries:
|
|
25543
|
+
if not isinstance(entry, dict):
|
|
25544
|
+
continue
|
|
25545
|
+
try:
|
|
25546
|
+
ts = float(entry.get("ts", 0.0) or 0.0)
|
|
25547
|
+
except Exception:
|
|
25548
|
+
ts = 0.0
|
|
25549
|
+
if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
|
|
25550
|
+
continue
|
|
25551
|
+
filtered_entries.append(entry)
|
|
25552
|
+
step_entries = filtered_entries
|
|
25553
|
+
|
|
25554
|
+
artifact_rows: list[dict] = []
|
|
25555
|
+
raw_artifacts = bb.get("code_artifacts", {}) if isinstance(bb.get("code_artifacts"), dict) else {}
|
|
25556
|
+
for path, meta in raw_artifacts.items():
|
|
25557
|
+
if not isinstance(meta, dict):
|
|
25558
|
+
continue
|
|
25559
|
+
try:
|
|
25560
|
+
ts = float(meta.get("updated_at", 0.0) or 0.0)
|
|
25561
|
+
except Exception:
|
|
25562
|
+
ts = 0.0
|
|
25563
|
+
if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
|
|
25564
|
+
continue
|
|
25565
|
+
artifact_rows.append({
|
|
25566
|
+
"path": trim(str(path or "").strip(), 240),
|
|
25567
|
+
"summary": trim(str(meta.get("summary", "") or "").strip(), 200),
|
|
25568
|
+
"updated_at": ts,
|
|
25569
|
+
})
|
|
25570
|
+
|
|
25571
|
+
research_rows = _rows_since(bb.get("research_notes", []))
|
|
25572
|
+
exec_rows = _rows_since(bb.get("execution_logs", []))
|
|
25573
|
+
review_rows = _rows_since(bb.get("review_feedback", []))
|
|
25574
|
+
|
|
25575
|
+
file_ops = {
|
|
25576
|
+
trim(str(entry.get("op", "") or "").strip(), 40)
|
|
25577
|
+
for entry in step_entries
|
|
25578
|
+
if isinstance(entry, dict)
|
|
25579
|
+
}
|
|
25580
|
+
has_write = any(op in {"write_file", "edit_file"} for op in file_ops) or bool(artifact_rows)
|
|
25581
|
+
has_read = "read_file" in file_ops
|
|
25582
|
+
|
|
25583
|
+
def _has_positive(rows: list[dict], hints: tuple[str, ...]) -> bool:
|
|
25584
|
+
for row in reversed(rows[-6:]):
|
|
25585
|
+
low = str(row.get("content", "") or "").lower()
|
|
25586
|
+
if not low or any(neg in low for neg in negative_hints):
|
|
25587
|
+
continue
|
|
25588
|
+
if any(tok in low for tok in hints):
|
|
25589
|
+
return True
|
|
25590
|
+
return False
|
|
25591
|
+
|
|
25592
|
+
def _has_observed(rows: list[dict]) -> bool:
|
|
25593
|
+
for row in reversed(rows[-6:]):
|
|
25594
|
+
low = str(row.get("content", "") or "").lower()
|
|
25595
|
+
if low and not any(neg in low for neg in negative_hints):
|
|
25596
|
+
return True
|
|
25597
|
+
return False
|
|
25598
|
+
|
|
25599
|
+
recent_files = [row.get("path", "") for row in artifact_rows[-4:] if row.get("path")]
|
|
25600
|
+
if not recent_files:
|
|
25601
|
+
recent_files = [
|
|
25602
|
+
trim(str(entry.get("path", "") or "").strip(), 240)
|
|
25603
|
+
for entry in step_entries[-4:]
|
|
25604
|
+
if isinstance(entry, dict) and str(entry.get("path", "") or "").strip()
|
|
25605
|
+
]
|
|
25606
|
+
|
|
25607
|
+
return {
|
|
25608
|
+
"since_ts": since_ts,
|
|
25609
|
+
"has_write": has_write,
|
|
25610
|
+
"has_read": has_read,
|
|
25611
|
+
"has_research": bool(research_rows),
|
|
25612
|
+
"has_exec": _has_observed(exec_rows),
|
|
25613
|
+
"has_review": _has_observed(review_rows),
|
|
25614
|
+
"has_compile_pass": _has_positive(exec_rows + review_rows, compile_hints),
|
|
25615
|
+
"has_test_pass": _has_positive(exec_rows + review_rows, test_hints),
|
|
25616
|
+
"recent_files": list(dict.fromkeys(recent_files))[-4:],
|
|
25617
|
+
"recent_exec_excerpt": _recent_excerpt(exec_rows, 140),
|
|
25618
|
+
"recent_review_excerpt": _recent_excerpt(review_rows, 140),
|
|
25619
|
+
"recent_research_excerpt": _recent_excerpt(research_rows, 140),
|
|
25620
|
+
}
|
|
25621
|
+
|
|
25622
|
+
def _plan_step_has_blackboard_evidence(self, plan_step: dict, board: dict | None = None) -> bool:
|
|
25623
|
+
if not isinstance(plan_step, dict):
|
|
25624
|
+
return False
|
|
25625
|
+
sig = self._plan_step_blackboard_signals(plan_step, board)
|
|
25626
|
+
step_text = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
|
|
25627
|
+
phase = self._plan_step_phase_hint(step_text)
|
|
25628
|
+
wants_test = phase in ("test", "review") or any(
|
|
25629
|
+
tok in step_text for tok in ("test", "pytest", "unit", "integration", "验证", "測試", "测试", "回归", "assert")
|
|
25630
|
+
)
|
|
25631
|
+
wants_runtime_validation = wants_test or phase == "implement" or any(
|
|
25632
|
+
tok in step_text for tok in ("verify", "validation", "check", "lint", "build", "compile", "运行", "校验", "檢查")
|
|
25633
|
+
)
|
|
25634
|
+
if wants_test:
|
|
25635
|
+
return sig["has_test_pass"] or sig["has_exec"] or sig["has_review"]
|
|
25636
|
+
if phase == "implement":
|
|
25637
|
+
return sig["has_write"] and (
|
|
25638
|
+
sig["has_compile_pass"] or sig["has_test_pass"] or sig["has_exec"] or sig["has_read"] or sig["has_review"]
|
|
25639
|
+
)
|
|
25640
|
+
if phase in ("research", "design"):
|
|
25641
|
+
return sig["has_research"] or sig["has_read"] or sig["has_exec"] or sig["has_write"]
|
|
25642
|
+
if wants_runtime_validation:
|
|
25643
|
+
return sig["has_exec"] or sig["has_read"] or sig["has_write"] or sig["has_review"]
|
|
25644
|
+
return sig["has_write"] or sig["has_read"] or sig["has_research"] or sig["has_exec"] or sig["has_review"]
|
|
25645
|
+
|
|
25646
|
+
def _step_has_accumulated_evidence(self, plan_step: dict, bb: dict | None = None) -> bool:
|
|
25647
|
+
"""Fix 3: Check if step has accumulated evidence across ALL turns (not just current turn).
|
|
25648
|
+
Uses step_files registry + blackboard signals to detect writes/execution during step lifetime."""
|
|
25649
|
+
if not isinstance(plan_step, dict):
|
|
25650
|
+
return False
|
|
25651
|
+
sig = self._plan_step_blackboard_signals(plan_step, bb)
|
|
25652
|
+
return sig["has_write"] or sig["has_exec"] or sig["has_research"]
|
|
25653
|
+
|
|
25654
|
+
def _collect_accumulated_step_evidence(self, plan_step: dict, bb: dict | None = None) -> str:
|
|
25655
|
+
"""Fix 1 support: Collect evidence summary from accumulated step history (across all turns)."""
|
|
25656
|
+
if not isinstance(plan_step, dict):
|
|
25657
|
+
return ""
|
|
25658
|
+
sig = self._plan_step_blackboard_signals(plan_step, bb)
|
|
25659
|
+
parts: list[str] = []
|
|
25660
|
+
if sig.get("recent_files"):
|
|
25661
|
+
parts.append("files: " + ", ".join(sig["recent_files"][:4]))
|
|
25662
|
+
if sig.get("recent_exec_excerpt"):
|
|
25663
|
+
parts.append("exec: " + trim(sig["recent_exec_excerpt"], 80))
|
|
25664
|
+
if sig.get("recent_research_excerpt"):
|
|
25665
|
+
parts.append("research: " + trim(sig["recent_research_excerpt"], 80))
|
|
25666
|
+
return trim("; ".join(parts) or "accumulated-step-evidence", 200)
|
|
25667
|
+
|
|
25668
|
+
def _collect_blackboard_step_evidence(self, plan_step: dict, board: dict | None = None) -> str:
|
|
25669
|
+
sig = self._plan_step_blackboard_signals(plan_step, board)
|
|
25670
|
+
parts: list[str] = []
|
|
25671
|
+
if sig.get("recent_files"):
|
|
25672
|
+
parts.append("files: " + ", ".join(sig["recent_files"][:3]))
|
|
25673
|
+
if sig.get("recent_exec_excerpt"):
|
|
25674
|
+
parts.append(f"logs: {sig['recent_exec_excerpt']}")
|
|
25675
|
+
if sig.get("recent_review_excerpt"):
|
|
25676
|
+
parts.append(f"review: {sig['recent_review_excerpt']}")
|
|
25677
|
+
if sig.get("recent_research_excerpt"):
|
|
25678
|
+
parts.append(f"notes: {sig['recent_research_excerpt']}")
|
|
25679
|
+
return trim("; ".join(parts), 200)
|
|
25680
|
+
|
|
25366
25681
|
def _has_test_pass_evidence(self, board: dict | None = None) -> bool:
|
|
25367
25682
|
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
25368
25683
|
logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
|
|
@@ -25395,6 +25710,20 @@ body{padding:18px}
|
|
|
25395
25710
|
if todo.get("status") == "completed":
|
|
25396
25711
|
continue
|
|
25397
25712
|
cat = todo.get("category", "")
|
|
25713
|
+
if cat == "plan_step" and todo.get("status") == "in_progress" and not todo.get("activated_at"):
|
|
25714
|
+
step_idx = int(todo.get("plan_step_index", 0) or 0)
|
|
25715
|
+
prior_done_ts = [
|
|
25716
|
+
float(t.get("completed_at", 0.0) or 0.0)
|
|
25717
|
+
for t in todos
|
|
25718
|
+
if t.get("category") == "plan_step"
|
|
25719
|
+
and int(t.get("plan_step_index", 0) or 0) < step_idx
|
|
25720
|
+
and t.get("completed_at")
|
|
25721
|
+
]
|
|
25722
|
+
todo["activated_at"] = (
|
|
25723
|
+
max(prior_done_ts)
|
|
25724
|
+
if prior_done_ts
|
|
25725
|
+
else (float(todo.get("created_at", 0.0) or 0.0) or float(now_ts()))
|
|
25726
|
+
)
|
|
25398
25727
|
if cat == "setup" and (research_count > 0 or code_count > 0):
|
|
25399
25728
|
todo.update(
|
|
25400
25729
|
status="completed",
|
|
@@ -25446,11 +25775,14 @@ body{padding:18px}
|
|
|
25446
25775
|
if t.get("category") == "plan_step"
|
|
25447
25776
|
):
|
|
25448
25777
|
todo["status"] = "in_progress"
|
|
25778
|
+
todo["activated_at"] = float(now_ts())
|
|
25449
25779
|
|
|
25450
25780
|
if not any(t.get("status") == "in_progress" for t in todos):
|
|
25451
25781
|
for t in todos:
|
|
25452
25782
|
if t.get("status") == "pending":
|
|
25453
25783
|
t["status"] = "in_progress"
|
|
25784
|
+
if not t.get("activated_at"):
|
|
25785
|
+
t["activated_at"] = float(now_ts())
|
|
25454
25786
|
break
|
|
25455
25787
|
|
|
25456
25788
|
bb["project_todos"] = todos
|
|
@@ -25516,6 +25848,11 @@ body{padding:18px}
|
|
|
25516
25848
|
break
|
|
25517
25849
|
if not current:
|
|
25518
25850
|
return False
|
|
25851
|
+
# Fix 5c: Reset TodoWrite loop counter on step advancement
|
|
25852
|
+
try:
|
|
25853
|
+
self._todowrite_step_counter.clear()
|
|
25854
|
+
except Exception:
|
|
25855
|
+
pass
|
|
25519
25856
|
current["status"] = "completed"
|
|
25520
25857
|
current["completed_at"] = float(now_ts())
|
|
25521
25858
|
current["completed_by"] = actor
|
|
@@ -25530,6 +25867,7 @@ body{padding:18px}
|
|
|
25530
25867
|
break
|
|
25531
25868
|
if next_step:
|
|
25532
25869
|
next_step["status"] = "in_progress"
|
|
25870
|
+
next_step["activated_at"] = float(now_ts())
|
|
25533
25871
|
step_idx = int(next_step.get("plan_step_index", 0) or 0) + 1
|
|
25534
25872
|
total = int(bb.get("plan_step_total", len(todos)) or len(todos))
|
|
25535
25873
|
self._emit("status", {
|
|
@@ -25635,27 +25973,64 @@ body{padding:18px}
|
|
|
25635
25973
|
isinstance(r, dict) and r.get("ok", False) and str(r.get("name", "")) == "bash"
|
|
25636
25974
|
for r in results
|
|
25637
25975
|
)
|
|
25638
|
-
|
|
25976
|
+
validation_ok_current = self._tool_results_have_validation_evidence(current, results)
|
|
25977
|
+
validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
|
|
25978
|
+
validation_ok = validation_ok_current or validation_ok_blackboard
|
|
25979
|
+
bb_sig = self._plan_step_blackboard_signals(current, bb)
|
|
25639
25980
|
phase_evidence = False
|
|
25640
25981
|
if phase in ("research", "design") and validation_ok:
|
|
25641
25982
|
phase_evidence = True
|
|
25642
|
-
elif phase == "implement" and
|
|
25983
|
+
elif phase == "implement" and (
|
|
25984
|
+
(wrote_files and validation_ok_current)
|
|
25985
|
+
or (bb_sig["has_write"] and validation_ok_blackboard)
|
|
25986
|
+
):
|
|
25643
25987
|
phase_evidence = True
|
|
25644
|
-
elif phase in ("test", "review") and
|
|
25988
|
+
elif phase in ("test", "review") and (
|
|
25989
|
+
(ran_bash_ok and validation_ok_current)
|
|
25990
|
+
or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
|
|
25991
|
+
):
|
|
25645
25992
|
phase_evidence = True
|
|
25993
|
+
todo_progress_signal = any(
|
|
25994
|
+
isinstance(r, dict) and r.get("ok", False)
|
|
25995
|
+
and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
|
|
25996
|
+
for r in results
|
|
25997
|
+
)
|
|
25646
25998
|
# Advance when:
|
|
25647
25999
|
# - Manager requested AND worker produced output, OR
|
|
25648
26000
|
# - All subtasks completed AND worker produced output, OR
|
|
25649
|
-
# - Phase heuristics confirm
|
|
25650
|
-
|
|
25651
|
-
|
|
25652
|
-
|
|
26001
|
+
# - Phase heuristics confirm BUT ONLY if no incomplete subtasks exist
|
|
26002
|
+
# - Fix 3: All subtasks completed + accumulated step evidence (covers TodoWrite-only turns)
|
|
26003
|
+
# CRITICAL: When subtasks exist, phase_evidence alone CANNOT bypass subtask completion.
|
|
26004
|
+
_has_subtasks = bool(self._active_plan_worker_todo_rows(
|
|
26005
|
+
str(current.get("id", "") or ""), role=""
|
|
26006
|
+
))
|
|
26007
|
+
_phase_gate = phase_evidence and (subtasks_all_done or not _has_subtasks)
|
|
26008
|
+
accumulated_evidence_path = (
|
|
26009
|
+
subtasks_all_done
|
|
26010
|
+
and todo_progress_signal
|
|
26011
|
+
and self._step_has_accumulated_evidence(current, bb)
|
|
26012
|
+
)
|
|
26013
|
+
has_strong_evidence = (
|
|
26014
|
+
validation_ok and (
|
|
26015
|
+
(
|
|
26016
|
+
worker_produced_output
|
|
26017
|
+
and (manager_requested or subtasks_all_done or _phase_gate)
|
|
26018
|
+
)
|
|
26019
|
+
or (
|
|
26020
|
+
todo_progress_signal
|
|
26021
|
+
and subtasks_all_done
|
|
26022
|
+
and validation_ok_blackboard
|
|
26023
|
+
)
|
|
26024
|
+
)
|
|
26025
|
+
) or accumulated_evidence_path
|
|
25653
26026
|
if has_strong_evidence:
|
|
25654
26027
|
evidence = self._collect_step_evidence(current, worker_step)
|
|
25655
26028
|
self._advance_plan_step(
|
|
25656
26029
|
evidence=evidence,
|
|
25657
26030
|
actor=str(route.get("target", "developer") or "developer"),
|
|
25658
26031
|
)
|
|
26032
|
+
else:
|
|
26033
|
+
self._inject_rework_if_needed(current, worker_step)
|
|
25659
26034
|
|
|
25660
26035
|
def _worker_step_has_evidence(self, step: dict) -> bool:
|
|
25661
26036
|
"""Check if worker step produced concrete tool outputs."""
|
|
@@ -25671,7 +26046,8 @@ body{padding:18px}
|
|
|
25671
26046
|
|
|
25672
26047
|
def _step_subtasks_all_completed(self, plan_step: dict) -> bool:
|
|
25673
26048
|
"""Check if all worker subtasks linked to this plan step are completed.
|
|
25674
|
-
Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
|
|
26049
|
+
Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
|
|
26050
|
+
Fix 6: Also excludes 'next-step intent' items that were added alongside completed items."""
|
|
25675
26051
|
step_id = str(plan_step.get("id", "") or "")
|
|
25676
26052
|
if not step_id:
|
|
25677
26053
|
return False
|
|
@@ -25711,7 +26087,293 @@ body{padding:18px}
|
|
|
25711
26087
|
relevant.append(r)
|
|
25712
26088
|
if relevant:
|
|
25713
26089
|
worker_items = relevant
|
|
25714
|
-
|
|
26090
|
+
# Fix 6: Exclude "next-step intent" pending items when all other items are completed.
|
|
26091
|
+
# When the worker completes step N and creates step N+1 subtasks in the same TodoWrite call,
|
|
26092
|
+
# the new pending items get parent_step_id of step N, blocking its advancement.
|
|
26093
|
+
completed_items = [r for r in worker_items if str(r.get("status", "")).lower() == "completed"]
|
|
26094
|
+
pending_items = [r for r in worker_items if str(r.get("status", "")).lower() != "completed"]
|
|
26095
|
+
if completed_items and pending_items:
|
|
26096
|
+
# Check if pending items are content-wise duplicates of completed items
|
|
26097
|
+
# (indicating the worker re-sent the same items but some got stuck as pending)
|
|
26098
|
+
completed_content = {
|
|
26099
|
+
normalize_work_text(str(r.get("content", ""))).strip().lower()
|
|
26100
|
+
for r in completed_items
|
|
26101
|
+
if str(r.get("content", "") or "").strip()
|
|
26102
|
+
}
|
|
26103
|
+
truly_new_pending = [
|
|
26104
|
+
r for r in pending_items
|
|
26105
|
+
if normalize_work_text(str(r.get("content", ""))).strip().lower() not in completed_content
|
|
26106
|
+
]
|
|
26107
|
+
# If all pending items are duplicates of completed items, they don't block
|
|
26108
|
+
if not truly_new_pending:
|
|
26109
|
+
worker_items = completed_items
|
|
26110
|
+
# If there are truly new pending items but all original items are done,
|
|
26111
|
+
# check if the new items match future plan step content
|
|
26112
|
+
elif truly_new_pending and len(completed_items) >= 2:
|
|
26113
|
+
bb = self._ensure_blackboard()
|
|
26114
|
+
future_step_content = set()
|
|
26115
|
+
found_current = False
|
|
26116
|
+
for t in bb.get("project_todos", []):
|
|
26117
|
+
if not isinstance(t, dict) or t.get("category") != "plan_step":
|
|
26118
|
+
continue
|
|
26119
|
+
if str(t.get("id", "") or "") == step_id:
|
|
26120
|
+
found_current = True
|
|
26121
|
+
continue
|
|
26122
|
+
if found_current:
|
|
26123
|
+
fc = str(t.get("full_content", "") or t.get("content", "") or "").strip().lower()
|
|
26124
|
+
future_step_content.add(fc)
|
|
26125
|
+
for line in fc.split("\n"):
|
|
26126
|
+
sl = line.strip().lower()
|
|
26127
|
+
if sl:
|
|
26128
|
+
future_step_content.add(sl)
|
|
26129
|
+
if future_step_content:
|
|
26130
|
+
_still_blocking = []
|
|
26131
|
+
for pi in truly_new_pending:
|
|
26132
|
+
pc = normalize_work_text(str(pi.get("content", ""))).strip().lower()
|
|
26133
|
+
# Check if this pending item's content appears in any future step
|
|
26134
|
+
is_future = any(pc in fsc or fsc in pc for fsc in future_step_content if len(fsc) > 4)
|
|
26135
|
+
if not is_future:
|
|
26136
|
+
_still_blocking.append(pi)
|
|
26137
|
+
if not _still_blocking:
|
|
26138
|
+
worker_items = completed_items
|
|
26139
|
+
all_marked_done = all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
|
|
26140
|
+
if not all_marked_done:
|
|
26141
|
+
return False
|
|
26142
|
+
# Acceptance verification: check that each "completed" subtask has real evidence
|
|
26143
|
+
# Don't just trust the model's TodoWrite status — verify against accumulated tool outputs
|
|
26144
|
+
if worker_items:
|
|
26145
|
+
bb = self._ensure_blackboard()
|
|
26146
|
+
unverified = self._verify_subtasks_acceptance(worker_items, step_id, bb)
|
|
26147
|
+
if unverified:
|
|
26148
|
+
return False
|
|
26149
|
+
return True
|
|
26150
|
+
|
|
26151
|
+
def _verify_subtasks_acceptance(self, subtasks: list[dict], step_id: str, bb: dict) -> list[str]:
|
|
26152
|
+
"""Verify each completed subtask has real evidence. Returns list of unverified subtask descriptions.
|
|
26153
|
+
Checks step_files and execution_logs against what each subtask's content implies."""
|
|
26154
|
+
import re
|
|
26155
|
+
# Gather accumulated evidence for this step
|
|
26156
|
+
step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
|
|
26157
|
+
step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
|
|
26158
|
+
written_paths = set()
|
|
26159
|
+
for entry in step_entries:
|
|
26160
|
+
if isinstance(entry, dict) and str(entry.get("op", "")) in ("write_file", "edit_file"):
|
|
26161
|
+
written_paths.add(str(entry.get("path", "") or "").strip().lower())
|
|
26162
|
+
# Gather bash execution evidence
|
|
26163
|
+
exec_logs = bb.get("execution_logs", [])
|
|
26164
|
+
if not isinstance(exec_logs, list):
|
|
26165
|
+
exec_logs = []
|
|
26166
|
+
bash_outputs_lower = []
|
|
26167
|
+
for log in exec_logs[-30:]:
|
|
26168
|
+
if isinstance(log, dict):
|
|
26169
|
+
c = str(log.get("content", "") or "").lower()
|
|
26170
|
+
if c:
|
|
26171
|
+
bash_outputs_lower.append(c)
|
|
26172
|
+
all_bash_text = " ".join(bash_outputs_lower)
|
|
26173
|
+
negative_hints = ("error:", "failed", "failure", "traceback", "fatal", "not found",
|
|
26174
|
+
"no such file", "command not found", "permission denied")
|
|
26175
|
+
has_bash_failure = any(neg in all_bash_text for neg in negative_hints)
|
|
26176
|
+
# Define acceptance patterns from subtask content
|
|
26177
|
+
_file_create_re = re.compile(
|
|
26178
|
+
r"(?:创建|生成|编写|写入|create|write|generate|implement|scaffold)\s+(.+?)(?:\s|$|,|。|,|\()",
|
|
26179
|
+
re.IGNORECASE,
|
|
26180
|
+
)
|
|
26181
|
+
_run_test_kw = ("运行", "测试", "验证", "test", "pytest", "verify", "validate",
|
|
26182
|
+
"run", "check", "确认", "检查")
|
|
26183
|
+
_compile_kw = ("编译", "构建", "compile", "build", "cmake", "make", "gcc", "gfortran")
|
|
26184
|
+
_install_kw = ("安装", "install", "pip install", "npm install", "apt install")
|
|
26185
|
+
unverified: list[str] = []
|
|
26186
|
+
for st in subtasks:
|
|
26187
|
+
content = str(st.get("content", "") or "").strip()
|
|
26188
|
+
if not content:
|
|
26189
|
+
continue
|
|
26190
|
+
content_lower = content.lower()
|
|
26191
|
+
# Rule 1: If subtask mentions creating a file, check it was actually written
|
|
26192
|
+
m = _file_create_re.search(content)
|
|
26193
|
+
if m:
|
|
26194
|
+
target = m.group(1).strip().strip("\"'`").lower()
|
|
26195
|
+
# Extract just filename from path-like strings
|
|
26196
|
+
if "/" in target:
|
|
26197
|
+
target_parts = [p for p in target.split("/") if p.strip()]
|
|
26198
|
+
target_name = target_parts[-1] if target_parts else target
|
|
26199
|
+
else:
|
|
26200
|
+
target_name = target
|
|
26201
|
+
if target_name and len(target_name) > 2:
|
|
26202
|
+
found = any(target_name in wp for wp in written_paths)
|
|
26203
|
+
if not found:
|
|
26204
|
+
unverified.append(f"file not created: {target_name}")
|
|
26205
|
+
continue
|
|
26206
|
+
# Rule 2: If subtask mentions testing/running/verifying, check bash was executed
|
|
26207
|
+
if any(kw in content_lower for kw in _run_test_kw):
|
|
26208
|
+
if not bash_outputs_lower:
|
|
26209
|
+
unverified.append(f"no bash execution for: {trim(content, 60)}")
|
|
26210
|
+
continue
|
|
26211
|
+
# Check for test failures in recent bash output
|
|
26212
|
+
if has_bash_failure and any(kw in content_lower for kw in ("test", "测试", "pytest")):
|
|
26213
|
+
# Only block if failure keywords appear near test-related content
|
|
26214
|
+
test_related_failures = any(
|
|
26215
|
+
("test" in line or "pytest" in line or "assert" in line)
|
|
26216
|
+
and any(neg in line for neg in negative_hints)
|
|
26217
|
+
for line in bash_outputs_lower[-10:]
|
|
26218
|
+
)
|
|
26219
|
+
if test_related_failures:
|
|
26220
|
+
unverified.append(f"test failures detected for: {trim(content, 60)}")
|
|
26221
|
+
continue
|
|
26222
|
+
# Rule 3: If subtask mentions compiling/building, check bash + no compile errors
|
|
26223
|
+
if any(kw in content_lower for kw in _compile_kw):
|
|
26224
|
+
if not bash_outputs_lower:
|
|
26225
|
+
unverified.append(f"no bash execution for compile: {trim(content, 60)}")
|
|
26226
|
+
continue
|
|
26227
|
+
compile_failures = any(
|
|
26228
|
+
any(neg in line for neg in ("error:", "failed", "failure"))
|
|
26229
|
+
and any(kw in line for kw in ("compil", "build", "cmake", "make", "link"))
|
|
26230
|
+
for line in bash_outputs_lower[-10:]
|
|
26231
|
+
)
|
|
26232
|
+
if compile_failures:
|
|
26233
|
+
unverified.append(f"compile failures for: {trim(content, 60)}")
|
|
26234
|
+
continue
|
|
26235
|
+
# Rule 4: If subtask mentions installing, check bash was run
|
|
26236
|
+
if any(kw in content_lower for kw in _install_kw):
|
|
26237
|
+
if not bash_outputs_lower:
|
|
26238
|
+
unverified.append(f"no bash for install: {trim(content, 60)}")
|
|
26239
|
+
continue
|
|
26240
|
+
# If none of the specific rules matched, the subtask is considered verified
|
|
26241
|
+
# (generic subtasks like "design" or "analyze" don't need tool evidence)
|
|
26242
|
+
return unverified
|
|
26243
|
+
|
|
26244
|
+
def _inject_rework_if_needed(self, plan_step: dict, worker_step: dict):
|
|
26245
|
+
"""When subtasks are marked completed but acceptance fails, inject rework instruction.
|
|
26246
|
+
Prevents the system from getting stuck or silently skipping unfinished work."""
|
|
26247
|
+
try:
|
|
26248
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
26249
|
+
if not step_id:
|
|
26250
|
+
return
|
|
26251
|
+
rows = self._active_plan_worker_todo_rows(step_id, role="")
|
|
26252
|
+
completed_rows = [r for r in rows if str(r.get("status", "")).lower() == "completed"]
|
|
26253
|
+
if not completed_rows:
|
|
26254
|
+
return
|
|
26255
|
+
bb = self._ensure_blackboard()
|
|
26256
|
+
failures = self._verify_subtasks_acceptance(completed_rows, step_id, bb)
|
|
26257
|
+
if not failures:
|
|
26258
|
+
return
|
|
26259
|
+
# LLM-based acceptance check: semantic analysis over heuristics
|
|
26260
|
+
llm_verdict = self._llm_verify_subtask_acceptance(plan_step, completed_rows, bb)
|
|
26261
|
+
if llm_verdict.get("all_passed", False):
|
|
26262
|
+
return
|
|
26263
|
+
rework_items = llm_verdict.get("rework_items", failures)
|
|
26264
|
+
if not rework_items:
|
|
26265
|
+
return
|
|
26266
|
+
# Rate-limit rework injection
|
|
26267
|
+
_rework_key = f"_rework_injected_{step_id}"
|
|
26268
|
+
_last_rework = getattr(self, _rework_key, 0.0)
|
|
26269
|
+
if float(now_ts()) - float(_last_rework) < 30.0:
|
|
26270
|
+
return
|
|
26271
|
+
setattr(self, _rework_key, float(now_ts()))
|
|
26272
|
+
step_label = trim(str(plan_step.get("content", "") or ""), 80)
|
|
26273
|
+
rework_text = (
|
|
26274
|
+
f"<step-rework>\n"
|
|
26275
|
+
f"Step \"{step_label}\" acceptance check FAILED. "
|
|
26276
|
+
f"The following subtasks were marked completed but did not pass verification:\n"
|
|
26277
|
+
)
|
|
26278
|
+
for i, item in enumerate(rework_items[:5]):
|
|
26279
|
+
rework_text += f" {i+1}. {trim(str(item), 120)}\n"
|
|
26280
|
+
rework_text += (
|
|
26281
|
+
"\nACTION REQUIRED: Fix these issues NOW before the step can advance.\n"
|
|
26282
|
+
"- For missing files: create them with write_file\n"
|
|
26283
|
+
"- For failed tests/builds: run the command again and fix errors\n"
|
|
26284
|
+
"- For unverified installs: re-run the install command\n"
|
|
26285
|
+
"After fixing, update TodoWrite to reflect the corrected state.\n"
|
|
26286
|
+
"</step-rework>"
|
|
26287
|
+
)
|
|
26288
|
+
# Revert false "completed" status back to in_progress
|
|
26289
|
+
_snap = self.todo.snapshot()
|
|
26290
|
+
_modified = False
|
|
26291
|
+
for row in _snap:
|
|
26292
|
+
if not isinstance(row, dict):
|
|
26293
|
+
continue
|
|
26294
|
+
if str(row.get("parent_step_id", "") or "") != step_id:
|
|
26295
|
+
continue
|
|
26296
|
+
if str(row.get("status", "")).lower() != "completed":
|
|
26297
|
+
continue
|
|
26298
|
+
rc = str(row.get("content", "") or "").strip().lower()
|
|
26299
|
+
for fail in rework_items:
|
|
26300
|
+
fail_lower = str(fail).lower()
|
|
26301
|
+
if rc and (rc[:20] in fail_lower or any(w in fail_lower for w in rc.split()[:3] if len(w) > 3)):
|
|
26302
|
+
row["status"] = "in_progress"
|
|
26303
|
+
_modified = True
|
|
26304
|
+
break
|
|
26305
|
+
if _modified:
|
|
26306
|
+
try:
|
|
26307
|
+
self.todo.update(_snap)
|
|
26308
|
+
except Exception:
|
|
26309
|
+
pass
|
|
26310
|
+
target_roles: tuple[str, ...] = ()
|
|
26311
|
+
if self._is_multi_agent_mode():
|
|
26312
|
+
active_role = str(bb.get("active_agent", "") or "developer")
|
|
26313
|
+
if active_role:
|
|
26314
|
+
target_roles = (active_role,)
|
|
26315
|
+
self._append_plan_guidance_bubble(
|
|
26316
|
+
rework_text,
|
|
26317
|
+
target_roles=target_roles,
|
|
26318
|
+
summary=f"step rework: {len(rework_items)} items failed acceptance",
|
|
26319
|
+
)
|
|
26320
|
+
except Exception:
|
|
26321
|
+
pass
|
|
26322
|
+
|
|
26323
|
+
def _llm_verify_subtask_acceptance(self, plan_step: dict, completed_subtasks: list[dict], bb: dict) -> dict:
|
|
26324
|
+
"""Use LLM semantic analysis to verify if subtasks are truly completed.
|
|
26325
|
+
Returns {"all_passed": bool, "rework_items": list[str]}."""
|
|
26326
|
+
try:
|
|
26327
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
26328
|
+
step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
|
|
26329
|
+
step_entries = step_files_raw.get(step_id, []) if step_id else []
|
|
26330
|
+
files_summary = []
|
|
26331
|
+
for entry in (step_entries[-15:] if isinstance(step_entries, list) else []):
|
|
26332
|
+
if isinstance(entry, dict):
|
|
26333
|
+
files_summary.append(f"{entry.get('op','?')}: {entry.get('path','?')}")
|
|
26334
|
+
exec_logs = bb.get("execution_logs", [])
|
|
26335
|
+
recent_exec = []
|
|
26336
|
+
for log in (exec_logs[-8:] if isinstance(exec_logs, list) else []):
|
|
26337
|
+
if isinstance(log, dict):
|
|
26338
|
+
c = trim(str(log.get("content", "") or ""), 200)
|
|
26339
|
+
if c:
|
|
26340
|
+
recent_exec.append(c)
|
|
26341
|
+
subtask_list = "\n".join(
|
|
26342
|
+
f"- [{str(st.get('status','')).upper()}] {trim(str(st.get('content','') or ''), 120)}"
|
|
26343
|
+
for st in completed_subtasks[:8]
|
|
26344
|
+
)
|
|
26345
|
+
prompt = (
|
|
26346
|
+
"Analyze whether these subtasks are TRULY completed based on the evidence.\n\n"
|
|
26347
|
+
f"SUBTASKS:\n{subtask_list}\n\n"
|
|
26348
|
+
f"FILES CREATED/MODIFIED:\n{chr(10).join(files_summary[-10:]) or '(none)'}\n\n"
|
|
26349
|
+
f"RECENT EXECUTION OUTPUT:\n{chr(10).join(recent_exec[-5:]) or '(none)'}\n\n"
|
|
26350
|
+
"For each subtask, determine if it's genuinely done:\n"
|
|
26351
|
+
"- File creation tasks: was the file actually created?\n"
|
|
26352
|
+
"- Test/verify tasks: was a test/command actually run? Did it pass?\n"
|
|
26353
|
+
"- Build/compile tasks: was compilation attempted? Any errors?\n"
|
|
26354
|
+
"- Install tasks: was the install command run?\n\n"
|
|
26355
|
+
"Reply ONLY as JSON: {\"all_passed\": true/false, \"rework_items\": [\"description of what failed\"]}\n"
|
|
26356
|
+
"If all subtasks pass, return {\"all_passed\": true, \"rework_items\": []}"
|
|
26357
|
+
)
|
|
26358
|
+
resp = self.ollama.chat(
|
|
26359
|
+
[{"role": "user", "content": prompt}],
|
|
26360
|
+
system="You are a strict QA reviewer. Verify task completion against evidence. Reply ONLY valid JSON.",
|
|
26361
|
+
max_tokens=300,
|
|
26362
|
+
think=False,
|
|
26363
|
+
)
|
|
26364
|
+
import json
|
|
26365
|
+
text = str(resp.get("text", "") or "").strip()
|
|
26366
|
+
if "{" in text:
|
|
26367
|
+
json_str = text[text.index("{"):text.rindex("}") + 1]
|
|
26368
|
+
result = json.loads(json_str)
|
|
26369
|
+
if isinstance(result, dict):
|
|
26370
|
+
return {
|
|
26371
|
+
"all_passed": bool(result.get("all_passed", False)),
|
|
26372
|
+
"rework_items": list(result.get("rework_items", [])),
|
|
26373
|
+
}
|
|
26374
|
+
except Exception:
|
|
26375
|
+
pass
|
|
26376
|
+
return {"all_passed": False, "rework_items": []}
|
|
25715
26377
|
|
|
25716
26378
|
def _collect_step_evidence(self, plan_step: dict, worker_step: dict) -> str:
|
|
25717
26379
|
"""Collect evidence summary from worker step for plan step completion."""
|
|
@@ -25735,6 +26397,10 @@ body{padding:18px}
|
|
|
25735
26397
|
elif name in ("write_to_blackboard", "query_code_library", "query_knowledge_library"):
|
|
25736
26398
|
out = self._tool_result_output_excerpt(r, 100)
|
|
25737
26399
|
parts.append(f"{name}" + (f": {out}" if out else ""))
|
|
26400
|
+
if not parts:
|
|
26401
|
+
bb_evidence = self._collect_blackboard_step_evidence(plan_step)
|
|
26402
|
+
if bb_evidence:
|
|
26403
|
+
return bb_evidence
|
|
25738
26404
|
return trim("; ".join(parts) or "post-execution evidence", 200)
|
|
25739
26405
|
|
|
25740
26406
|
def _get_active_plan_step(self, board: dict | None = None) -> dict | None:
|
|
@@ -25842,10 +26508,13 @@ body{padding:18px}
|
|
|
25842
26508
|
|
|
25843
26509
|
merged_by_identity: dict[str, dict] = {}
|
|
25844
26510
|
ordered_identities: list[str] = []
|
|
26511
|
+
# Fix 2: Compute existing identities for next-step detection
|
|
26512
|
+
_existing_identities: set[str] = set()
|
|
25845
26513
|
for row in target_rows:
|
|
25846
26514
|
identity = self._plan_worker_todo_identity(row)
|
|
25847
26515
|
if not identity:
|
|
25848
26516
|
continue
|
|
26517
|
+
_existing_identities.add(identity)
|
|
25849
26518
|
if identity not in merged_by_identity:
|
|
25850
26519
|
merged_by_identity[identity] = dict(row)
|
|
25851
26520
|
ordered_identities.append(identity)
|
|
@@ -25883,11 +26552,42 @@ body{padding:18px}
|
|
|
25883
26552
|
merged.update(row)
|
|
25884
26553
|
merged["owner"] = str(merged.get("owner", "") or role_key).strip().lower() or role_key
|
|
25885
26554
|
merged["parent_step_id"] = trim(str(merged.get("parent_step_id", "") or step_id), 20) or step_id
|
|
26555
|
+
# Fix 2 support: Timestamp new items for next-step detection
|
|
26556
|
+
if identity not in _existing_identities and "created_at" not in merged:
|
|
26557
|
+
merged["created_at"] = float(now_ts())
|
|
26558
|
+
if str(merged.get("status", "")).lower() == "completed" and "updated_at" not in merged:
|
|
26559
|
+
merged["updated_at"] = float(now_ts())
|
|
25886
26560
|
merged_by_identity[identity] = merged
|
|
25887
26561
|
if identity not in ordered_identities:
|
|
25888
26562
|
ordered_identities.append(identity)
|
|
25889
26563
|
|
|
25890
26564
|
merged_target_rows = [merged_by_identity[i] for i in ordered_identities if i in merged_by_identity]
|
|
26565
|
+
|
|
26566
|
+
# Fix 4: Content-based deduplication to prevent duplicate subtasks from accumulating
|
|
26567
|
+
_seen_content: set[str] = set()
|
|
26568
|
+
_deduped_target: list[dict] = []
|
|
26569
|
+
for row in merged_target_rows:
|
|
26570
|
+
_ck = normalize_work_text(str(row.get("content", ""))).strip().lower()
|
|
26571
|
+
if _ck in _seen_content:
|
|
26572
|
+
continue
|
|
26573
|
+
_seen_content.add(_ck)
|
|
26574
|
+
_deduped_target.append(row)
|
|
26575
|
+
merged_target_rows = _deduped_target
|
|
26576
|
+
|
|
26577
|
+
# Fix 2: Detect "next-step intent" — if all existing items are completed,
|
|
26578
|
+
# new pending items that don't match existing identities are for the next step.
|
|
26579
|
+
# Remove their parent_step_id so they don't block current step advancement.
|
|
26580
|
+
_all_existing_done = (
|
|
26581
|
+
bool(target_rows) and
|
|
26582
|
+
all(str(r.get("status", "")).lower() == "completed" for r in target_rows)
|
|
26583
|
+
)
|
|
26584
|
+
if _all_existing_done:
|
|
26585
|
+
for row in merged_target_rows:
|
|
26586
|
+
_rid = self._plan_worker_todo_identity(row)
|
|
26587
|
+
if (_rid and _rid not in _existing_identities
|
|
26588
|
+
and str(row.get("status", "")).lower() != "completed"):
|
|
26589
|
+
row.pop("parent_step_id", None) # Not for current step
|
|
26590
|
+
|
|
25891
26591
|
final_rows = preserved + passthrough_rows + merged_target_rows
|
|
25892
26592
|
return self.todo.update(final_rows)
|
|
25893
26593
|
|
|
@@ -26292,21 +26992,46 @@ body{padding:18px}
|
|
|
26292
26992
|
str(r.get("name", "")) == "bash" and r.get("ok", False)
|
|
26293
26993
|
for r in tool_results
|
|
26294
26994
|
)
|
|
26295
|
-
|
|
26995
|
+
validation_ok_current = self._tool_results_have_validation_evidence(current, tool_results)
|
|
26996
|
+
validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
|
|
26997
|
+
validation_ok = validation_ok_current or validation_ok_blackboard
|
|
26998
|
+
bb_sig = self._plan_step_blackboard_signals(current, bb)
|
|
26296
26999
|
# Auto-advance conditions:
|
|
26297
27000
|
should_advance = False
|
|
26298
27001
|
# Priority 1: Check if worker subtasks are all completed (most reliable signal)
|
|
26299
27002
|
subtasks_done = self._step_subtasks_all_completed(current)
|
|
26300
27003
|
if subtasks_done and validation_ok:
|
|
26301
27004
|
should_advance = True
|
|
26302
|
-
#
|
|
26303
|
-
|
|
26304
|
-
|
|
26305
|
-
|
|
26306
|
-
|
|
26307
|
-
|
|
26308
|
-
|
|
27005
|
+
# Fix 3 (single mode): Accumulated evidence path — subtasks done + accumulated evidence
|
|
27006
|
+
# Covers TodoWrite-only turns where validation_ok_current is False
|
|
27007
|
+
if not should_advance and subtasks_done:
|
|
27008
|
+
todo_progress_signal = any(
|
|
27009
|
+
isinstance(r, dict) and r.get("ok", False)
|
|
27010
|
+
and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
|
|
27011
|
+
for r in tool_results
|
|
27012
|
+
)
|
|
27013
|
+
if todo_progress_signal and self._step_has_accumulated_evidence(current, bb):
|
|
26309
27014
|
should_advance = True
|
|
27015
|
+
# Priority 2: Phase-based heuristics — BUT gate by subtask completion when subtasks exist
|
|
27016
|
+
# CRITICAL: A single write_file must NOT advance when 3+ subtasks remain
|
|
27017
|
+
if not should_advance:
|
|
27018
|
+
_has_subtasks_s = bool(self._active_plan_worker_todo_rows(
|
|
27019
|
+
str(current.get("id", "") or ""), role=""
|
|
27020
|
+
))
|
|
27021
|
+
_can_use_phase_heuristic = subtasks_done or not _has_subtasks_s
|
|
27022
|
+
if _can_use_phase_heuristic:
|
|
27023
|
+
if phase in ("research", "design") and validation_ok:
|
|
27024
|
+
should_advance = True
|
|
27025
|
+
elif phase == "implement" and (
|
|
27026
|
+
(wrote_files and validation_ok_current)
|
|
27027
|
+
or (bb_sig["has_write"] and validation_ok_blackboard)
|
|
27028
|
+
):
|
|
27029
|
+
should_advance = True
|
|
27030
|
+
elif phase in ("test", "review") and (
|
|
27031
|
+
(ran_bash_ok and validation_ok_current)
|
|
27032
|
+
or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
|
|
27033
|
+
):
|
|
27034
|
+
should_advance = True
|
|
26310
27035
|
# Also check if the agent explicitly mentioned step completion
|
|
26311
27036
|
if not should_advance:
|
|
26312
27037
|
# Check last assistant message for step completion signals
|
|
@@ -26327,6 +27052,7 @@ body{padding:18px}
|
|
|
26327
27052
|
except Exception:
|
|
26328
27053
|
pass
|
|
26329
27054
|
else:
|
|
27055
|
+
self._inject_rework_if_needed(current, {"tool_results": tool_results})
|
|
26330
27056
|
self._sync_todos_from_blackboard(reason="single-agent-round")
|
|
26331
27057
|
|
|
26332
27058
|
def _todo_project_rows_from_blackboard(self, board: dict | None = None) -> list[dict]:
|
|
@@ -27923,6 +28649,7 @@ body{padding:18px}
|
|
|
27923
28649
|
"IMPORTANT: Previous fix attempts FAILED. You MUST change your approach — "
|
|
27924
28650
|
"do NOT repeat the same instruction. Include the exact error output in your delegation. "
|
|
27925
28651
|
)
|
|
28652
|
+
html_hint = self._html_frontend_boost_instruction()
|
|
27926
28653
|
# Loaded skills constraint for manager
|
|
27927
28654
|
skills_constraint = self._loaded_skills_prompt_hint(for_role="manager")
|
|
27928
28655
|
bb_skills = board.get("loaded_skills", {})
|
|
@@ -27957,6 +28684,7 @@ body{padding:18px}
|
|
|
27957
28684
|
f"{todo_route_note}"
|
|
27958
28685
|
f"{phase_hint}"
|
|
27959
28686
|
f"{failure_hint}"
|
|
28687
|
+
f"{html_hint}"
|
|
27960
28688
|
f"{skills_constraint}"
|
|
27961
28689
|
f"Level={level}, mode={mode}, progress={progress}, "
|
|
27962
28690
|
f"budget={'unlimited' if int(budget) <= 0 else int(budget)}, "
|
|
@@ -29018,6 +29746,19 @@ body{padding:18px}
|
|
|
29018
29746
|
seen.add(low_tail)
|
|
29019
29747
|
keep_lines.append(tail)
|
|
29020
29748
|
continue
|
|
29749
|
+
if low.startswith("tasks to complete:"):
|
|
29750
|
+
continue
|
|
29751
|
+
if re.match(r"^\d+(?:\.\d+)*[.)]\s+", s):
|
|
29752
|
+
continue
|
|
29753
|
+
if re.match(r"^[-*]\s+", s):
|
|
29754
|
+
continue
|
|
29755
|
+
if re.match(
|
|
29756
|
+
r"(?i)^(mkdir\s+-p|run:|create directories:|create project|create directory|initialize project|cmake\b|python\s+-m\s+venv\b|npx\b)",
|
|
29757
|
+
s,
|
|
29758
|
+
):
|
|
29759
|
+
continue
|
|
29760
|
+
if re.match(r"^(创建|初始化|运行|目录结构|项目根目录结构)[::]?", s):
|
|
29761
|
+
continue
|
|
29021
29762
|
norm = re.sub(r"\s+", " ", s).strip().lower()
|
|
29022
29763
|
if norm and norm not in seen:
|
|
29023
29764
|
seen.add(norm)
|
|
@@ -30490,6 +31231,7 @@ body{padding:18px}
|
|
|
30490
31231
|
skills_block = self._skills_awareness_block(for_role=role_key)
|
|
30491
31232
|
code_note = self._runtime_code_reference_prompt_block(max_chars=2600)
|
|
30492
31233
|
engineering_note = self._engineering_execution_boost_instruction()
|
|
31234
|
+
html_note = self._html_frontend_boost_instruction()
|
|
30493
31235
|
plan_todo_note = self._plan_todo_discipline_prompt(role=role_key)
|
|
30494
31236
|
base = (
|
|
30495
31237
|
f"You are {self._agent_display_name(role_key)} in a multi-agent coding system. "
|
|
@@ -30498,10 +31240,15 @@ body{padding:18px}
|
|
|
30498
31240
|
f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
|
|
30499
31241
|
f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS) or pptxgen.bundle.js (browser). "
|
|
30500
31242
|
f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
|
|
31243
|
+
"IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
|
|
31244
|
+
"If an HTML deliverable needs any asset from js_lib, copy it into a task-local relative asset folder "
|
|
31245
|
+
"such as './js/' or './assets/vendor/' next to the deliverable, then reference it with a plain relative path. "
|
|
31246
|
+
"Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases in final exported HTML. "
|
|
30501
31247
|
"Use blackboard for shared state, ask_colleague for inter-agent communication. "
|
|
30502
31248
|
"Keep outputs concise and action-oriented. "
|
|
30503
31249
|
f"{code_note + ' ' if code_note else ''}"
|
|
30504
31250
|
f"{engineering_note + ' ' if engineering_note else ''}"
|
|
31251
|
+
f"{html_note + ' ' if html_note else ''}"
|
|
30505
31252
|
f"{_detect_os_shell_instruction()} "
|
|
30506
31253
|
f"{model_language_instruction(self.ui_language)} "
|
|
30507
31254
|
)
|
|
@@ -30557,10 +31304,6 @@ body{padding:18px}
|
|
|
30557
31304
|
"The skill's workflow, tools, and file structure OVERRIDE the plan's implementation "
|
|
30558
31305
|
"approach — if the plan says 'use python-pptx' but the skill says 'use PptxGenJS', "
|
|
30559
31306
|
"use PptxGenJS. The skill defines HOW to implement; the plan defines WHAT to do. "
|
|
30560
|
-
"AUTONOMOUS SKILL LOADING: When starting a coding, debugging, or architecture task, "
|
|
30561
|
-
"call list_skills to discover available skills, then load_skill to activate the most relevant ones. "
|
|
30562
|
-
"Load skills BEFORE you start working, not after you're stuck. "
|
|
30563
|
-
"Already-loaded skills appear as <loaded-skill> messages in your context — use them directly without reloading. "
|
|
30564
31307
|
"TODO TRACKING (mandatory): "
|
|
30565
31308
|
"When a plan step is active, follow the current todo subtask order instead of inventing a parallel path. "
|
|
30566
31309
|
"After completing ONE subtask, call TodoWrite immediately — mark that subtask as 'completed' and move the next one to 'in_progress' before doing more work. "
|
|
@@ -30650,29 +31393,71 @@ body{padding:18px}
|
|
|
30650
31393
|
)
|
|
30651
31394
|
|
|
30652
31395
|
def _todo_write_rescue(self, args: dict) -> str:
|
|
31396
|
+
"""Rescue todo writer — accepts both strings and dicts, auto-normalizes.
|
|
31397
|
+
FIXED: Now preserves status from incoming items (especially 'completed')
|
|
31398
|
+
instead of resetting everything to 'pending'."""
|
|
30653
31399
|
raw_items = args.get("items", [])
|
|
30654
31400
|
if not isinstance(raw_items, list) or not raw_items:
|
|
30655
31401
|
raise ValueError("items must be a non-empty array")
|
|
30656
|
-
limited = raw_items[:7
|
|
31402
|
+
limited = raw_items[:12] # Allow more items (was 7) — plans can have 5+ subtasks
|
|
30657
31403
|
active_step = self._get_active_plan_step()
|
|
30658
31404
|
active_step_id = trim(str((active_step or {}).get("id", "") or ""), 20)
|
|
30659
31405
|
owner_hint = self._current_plan_worker_owner()
|
|
30660
31406
|
clean_items = []
|
|
31407
|
+
_status_alias = {
|
|
31408
|
+
"todo": "pending", "doing": "in_progress", "inprogress": "in_progress",
|
|
31409
|
+
"in-progress": "in_progress", "done": "completed", "finish": "completed",
|
|
31410
|
+
"finished": "completed",
|
|
31411
|
+
}
|
|
30661
31412
|
for idx, item in enumerate(limited):
|
|
30662
31413
|
if isinstance(item, dict):
|
|
30663
31414
|
content = str(item.get("content", item.get("text", item.get("title", "")))).strip()
|
|
30664
31415
|
owner = str(item.get("owner", "") or owner_hint).strip().lower()
|
|
30665
31416
|
parent_step_id = trim(str(item.get("parent_step_id", "") or active_step_id), 20)
|
|
31417
|
+
# Preserve status from incoming dict (critical for subtask state tracking)
|
|
31418
|
+
raw_status = str(item.get("status", item.get("state", "pending"))).strip().lower()
|
|
31419
|
+
status = _status_alias.get(raw_status, raw_status)
|
|
31420
|
+
if status not in {"pending", "in_progress", "completed"}:
|
|
31421
|
+
status = "pending"
|
|
30666
31422
|
else:
|
|
30667
31423
|
content = str(item).strip()
|
|
30668
31424
|
owner = owner_hint
|
|
30669
31425
|
parent_step_id = active_step_id
|
|
31426
|
+
# Parse status from string prefix markers:
|
|
31427
|
+
# "✅ task" / "[x] task" / "[done] task" / "[completed] task" → completed
|
|
31428
|
+
# "▶ task" / "[>] task" / "[doing] task" / "[in_progress] task" → in_progress
|
|
31429
|
+
# "⬜ task" / "[ ] task" / "[pending] task" / "[todo] task" → pending
|
|
31430
|
+
import re as _re_status
|
|
31431
|
+
_prefix_m = _re_status.match(
|
|
31432
|
+
r'^(?:'
|
|
31433
|
+
r'[\u2705\u2611]\s*' # ✅ ☑
|
|
31434
|
+
r'|\[x\]\s*|\[done\]\s*|\[completed\]\s*|\[finish(?:ed)?\]\s*'
|
|
31435
|
+
r'|\(done\)\s*|\(completed\)\s*|\(x\)\s*'
|
|
31436
|
+
r')',
|
|
31437
|
+
content, _re_status.IGNORECASE
|
|
31438
|
+
)
|
|
31439
|
+
_prefix_ip = _re_status.match(
|
|
31440
|
+
r'^(?:'
|
|
31441
|
+
r'[\u25b6\u25ba\u27a1]\s*' # ▶ ► ➡
|
|
31442
|
+
r'|\[>\]\s*|\[doing\]\s*|\[in.?progress\]\s*'
|
|
31443
|
+
r'|\(doing\)\s*|\(in.?progress\)\s*'
|
|
31444
|
+
r')',
|
|
31445
|
+
content, _re_status.IGNORECASE
|
|
31446
|
+
)
|
|
31447
|
+
if _prefix_m:
|
|
31448
|
+
status = "completed"
|
|
31449
|
+
content = content[_prefix_m.end():].strip()
|
|
31450
|
+
elif _prefix_ip:
|
|
31451
|
+
status = "in_progress"
|
|
31452
|
+
content = content[_prefix_ip.end():].strip()
|
|
31453
|
+
else:
|
|
31454
|
+
status = "pending"
|
|
30670
31455
|
content = normalize_work_text(content) or content
|
|
30671
31456
|
if not content:
|
|
30672
31457
|
continue
|
|
30673
31458
|
row = {
|
|
30674
31459
|
"content": content,
|
|
30675
|
-
"status":
|
|
31460
|
+
"status": status,
|
|
30676
31461
|
}
|
|
30677
31462
|
if owner in {"developer", "explorer", "reviewer"}:
|
|
30678
31463
|
row["owner"] = owner
|
|
@@ -30681,10 +31466,18 @@ body{padding:18px}
|
|
|
30681
31466
|
clean_items.append(row)
|
|
30682
31467
|
if not clean_items:
|
|
30683
31468
|
raise ValueError("no valid todo item text")
|
|
30684
|
-
|
|
30685
|
-
|
|
30686
|
-
|
|
30687
|
-
|
|
31469
|
+
# Only apply in_progress_index if NO items already have in_progress status
|
|
31470
|
+
has_in_progress = any(r["status"] == "in_progress" for r in clean_items)
|
|
31471
|
+
if not has_in_progress:
|
|
31472
|
+
in_progress_index = int(args.get("in_progress_index", 0) or 0)
|
|
31473
|
+
if in_progress_index < 0 or in_progress_index >= len(clean_items):
|
|
31474
|
+
in_progress_index = 0
|
|
31475
|
+
# Only set in_progress on a pending item
|
|
31476
|
+
for i, r in enumerate(clean_items):
|
|
31477
|
+
if r["status"] == "pending":
|
|
31478
|
+
if i >= in_progress_index:
|
|
31479
|
+
r["status"] = "in_progress"
|
|
31480
|
+
break
|
|
30688
31481
|
if active_step is not None:
|
|
30689
31482
|
return self._merge_plan_worker_todo_items(clean_items, role=owner_hint)
|
|
30690
31483
|
if self._is_multi_agent_mode() and owner_hint in {"developer", "explorer", "reviewer"}:
|
|
@@ -31337,11 +32130,21 @@ body{padding:18px}
|
|
|
31337
32130
|
|
|
31338
32131
|
def _dispatch_tool_inner(self, name: str, args: dict, role_key: str = "") -> str:
|
|
31339
32132
|
"""Inner tool dispatcher — all tool logic lives here."""
|
|
32133
|
+
# Fix 5d: Reset TodoWrite loop counter on non-TodoWrite tool calls
|
|
32134
|
+
if name not in ("TodoWrite", "TodoWriteRescue") and hasattr(self, '_todowrite_step_counter'):
|
|
32135
|
+
try:
|
|
32136
|
+
_rst_step = self._get_active_plan_step()
|
|
32137
|
+
if isinstance(_rst_step, dict):
|
|
32138
|
+
_rst_id = str(_rst_step.get("id", "") or "")
|
|
32139
|
+
if _rst_id:
|
|
32140
|
+
self._todowrite_step_counter.pop(_rst_id, None)
|
|
32141
|
+
except Exception:
|
|
32142
|
+
pass
|
|
31340
32143
|
if name == "bash":
|
|
31341
32144
|
guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), self.files_root)
|
|
31342
32145
|
if guard_error:
|
|
31343
32146
|
return guard_error
|
|
31344
|
-
meta = self._run_shell_meta(args["command"], self.files_root,
|
|
32147
|
+
meta = self._run_shell_meta(args["command"], self.files_root, self._shell_command_timeout())
|
|
31345
32148
|
self._emit(
|
|
31346
32149
|
"command",
|
|
31347
32150
|
{
|
|
@@ -31530,6 +32333,50 @@ body{padding:18px}
|
|
|
31530
32333
|
result = self._merge_owner_scoped_todo_items(items, role=str(role_key))
|
|
31531
32334
|
else:
|
|
31532
32335
|
result = self.todo.update(args["items"])
|
|
32336
|
+
# Fix 1: Auto-advance plan step when all subtasks are completed
|
|
32337
|
+
# This handles the case where the worker's last turn only calls TodoWrite
|
|
32338
|
+
# and _post_execution_plan_step_check would miss it due to no "real" tool evidence
|
|
32339
|
+
if has_plan_steps:
|
|
32340
|
+
try:
|
|
32341
|
+
_as = self._get_active_plan_step()
|
|
32342
|
+
if isinstance(_as, dict):
|
|
32343
|
+
_as_id = str(_as.get("id", "") or "")
|
|
32344
|
+
if _as_id and self._step_subtasks_all_completed(_as):
|
|
32345
|
+
_acc_ev = self._collect_accumulated_step_evidence(_as)
|
|
32346
|
+
if _acc_ev and _acc_ev != "accumulated-step-evidence":
|
|
32347
|
+
# Has real evidence — auto-advance
|
|
32348
|
+
self._advance_plan_step(
|
|
32349
|
+
evidence=_acc_ev or "subtasks-all-completed",
|
|
32350
|
+
actor=str(role_key or "developer"),
|
|
32351
|
+
)
|
|
32352
|
+
elif self._step_has_accumulated_evidence(_as, bb):
|
|
32353
|
+
self._advance_plan_step(
|
|
32354
|
+
evidence="subtasks-all-completed",
|
|
32355
|
+
actor=str(role_key or "developer"),
|
|
32356
|
+
)
|
|
32357
|
+
except Exception:
|
|
32358
|
+
pass
|
|
32359
|
+
# Fix 5b: TodoWrite loop detection — force-advance after 3 consecutive calls
|
|
32360
|
+
if has_plan_steps:
|
|
32361
|
+
try:
|
|
32362
|
+
_as5 = self._get_active_plan_step()
|
|
32363
|
+
if isinstance(_as5, dict):
|
|
32364
|
+
_as5_id = str(_as5.get("id", "") or "")
|
|
32365
|
+
if _as5_id:
|
|
32366
|
+
if not hasattr(self, '_todowrite_step_counter'):
|
|
32367
|
+
self._todowrite_step_counter = {}
|
|
32368
|
+
cnt = self._todowrite_step_counter.get(_as5_id, 0) + 1
|
|
32369
|
+
self._todowrite_step_counter[_as5_id] = cnt
|
|
32370
|
+
if (cnt >= 3
|
|
32371
|
+
and self._step_subtasks_all_completed(_as5)
|
|
32372
|
+
and self._step_has_accumulated_evidence(_as5, bb)):
|
|
32373
|
+
# Force advance — worker is stuck in a loop AND step has real evidence
|
|
32374
|
+
self._advance_plan_step(
|
|
32375
|
+
evidence="force-advance:todowrite-loop-detected",
|
|
32376
|
+
actor=str(role_key or "developer"),
|
|
32377
|
+
)
|
|
32378
|
+
except Exception:
|
|
32379
|
+
pass
|
|
31533
32380
|
# Step completion skill recheck: if any item just got marked completed, re-evaluate skills
|
|
31534
32381
|
# This fires in ALL modes (single/sync/plan) when developer writes todos
|
|
31535
32382
|
try:
|
|
@@ -31891,7 +32738,7 @@ body{padding:18px}
|
|
|
31891
32738
|
guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), wt_path)
|
|
31892
32739
|
if guard_error:
|
|
31893
32740
|
return guard_error
|
|
31894
|
-
meta = self._run_shell_meta(args["command"], wt_path,
|
|
32741
|
+
meta = self._run_shell_meta(args["command"], wt_path, self._shell_command_timeout())
|
|
31895
32742
|
self._emit(
|
|
31896
32743
|
"command",
|
|
31897
32744
|
{
|
|
@@ -32868,7 +33715,7 @@ body{padding:18px}
|
|
|
32868
33715
|
else:
|
|
32869
33716
|
_repeat_delegation_count = 0
|
|
32870
33717
|
_prev_delegation_hash = _cur_hash
|
|
32871
|
-
if _repeat_delegation_count >=
|
|
33718
|
+
if _repeat_delegation_count >= 3:
|
|
32872
33719
|
self._emit("status", {"summary": f"manager stuck: repeated identical delegation x{_repeat_delegation_count + 1}; forcing advance"})
|
|
32873
33720
|
_bb_stuck = self._ensure_blackboard()
|
|
32874
33721
|
_stuck_step = next(
|
|
@@ -32902,13 +33749,6 @@ body{padding:18px}
|
|
|
32902
33749
|
media_inputs_pool=media_inputs_pool,
|
|
32903
33750
|
media_seen_ts_by_role=media_seen_ts_by_role,
|
|
32904
33751
|
)
|
|
32905
|
-
# Sync-mode skill auto-discovery: same mechanism as plan mode's step-completed trigger.
|
|
32906
|
-
# Runs on early rounds for developer/explorer. Uses goal_sig dedup — no re-loading if already loaded.
|
|
32907
|
-
if role in ("developer", "explorer") and rounds_used <= 2:
|
|
32908
|
-
try:
|
|
32909
|
-
self._refresh_loaded_skills_for_execution_focus(trigger=f"sync-worker-pre:{role}")
|
|
32910
|
-
except Exception:
|
|
32911
|
-
pass
|
|
32912
33752
|
board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
|
|
32913
33753
|
step = self._multi_agent_turn(
|
|
32914
33754
|
role,
|
|
@@ -32918,49 +33758,6 @@ body{padding:18px}
|
|
|
32918
33758
|
self._blackboard_update_from_worker_step(role, step)
|
|
32919
33759
|
# Post-execution plan step advancement (replaces pre-execution advancement)
|
|
32920
33760
|
self._post_execution_plan_step_check(route, step if isinstance(step, dict) else {})
|
|
32921
|
-
# Sync-mode failure recovery: detect all-tools-failed and inject recovery hint + auto-load debugging skill
|
|
32922
|
-
_step_dict = step if isinstance(step, dict) else {}
|
|
32923
|
-
_step_results = _step_dict.get("tool_results", []) or []
|
|
32924
|
-
if _step_results:
|
|
32925
|
-
_sync_err_count = sum(1 for r in _step_results if isinstance(r, dict) and not r.get("ok", False))
|
|
32926
|
-
_sync_ok_count = sum(1 for r in _step_results if isinstance(r, dict) and r.get("ok", False))
|
|
32927
|
-
if _sync_err_count > 0 and _sync_ok_count == 0:
|
|
32928
|
-
# All tool calls failed in this worker turn — inject recovery guidance
|
|
32929
|
-
_failed_tools = [str(r.get("name", "")) for r in _step_results if isinstance(r, dict)][:4]
|
|
32930
|
-
_err_outputs = " | ".join(
|
|
32931
|
-
trim(str(r.get("output", "") or ""), 120)
|
|
32932
|
-
for r in _step_results if isinstance(r, dict) and not r.get("ok", False)
|
|
32933
|
-
)[:400]
|
|
32934
|
-
self._append_agent_context_message(
|
|
32935
|
-
role,
|
|
32936
|
-
{
|
|
32937
|
-
"role": "user",
|
|
32938
|
-
"content": (
|
|
32939
|
-
"<failure-recovery>"
|
|
32940
|
-
f"All tool calls failed in this turn ({', '.join(_failed_tools)}). "
|
|
32941
|
-
f"Errors: {_err_outputs}\n"
|
|
32942
|
-
"Before retrying, STOP and diagnose:\n"
|
|
32943
|
-
"1) If a debugging skill is available, call load_skill('systematic-debugging') and follow its workflow.\n"
|
|
32944
|
-
"2) Read the EXACT error message — identify the root cause, not just the symptom.\n"
|
|
32945
|
-
"3) Form ONE hypothesis about the cause before making any changes.\n"
|
|
32946
|
-
"4) Apply ONE targeted fix, then verify with a test/build command.\n"
|
|
32947
|
-
"5) If still blocked after 2 attempts, report the exact blocker to the user."
|
|
32948
|
-
"</failure-recovery>"
|
|
32949
|
-
),
|
|
32950
|
-
"ts": now_ts(),
|
|
32951
|
-
"agent_role": role,
|
|
32952
|
-
},
|
|
32953
|
-
mirror_to_global=False,
|
|
32954
|
-
)
|
|
32955
|
-
# Auto-load systematic-debugging if failure involves code errors
|
|
32956
|
-
_code_err_kw = ("bash", "compile", "syntax", "test", "build", "traceback", "error:")
|
|
32957
|
-
if any(kw in _err_outputs.lower() for kw in _code_err_kw):
|
|
32958
|
-
_bb_sk = self._ensure_blackboard().get("loaded_skills", {})
|
|
32959
|
-
if isinstance(_bb_sk, dict) and "systematic-debugging" not in _bb_sk:
|
|
32960
|
-
try:
|
|
32961
|
-
self._load_skill_with_cache("systematic-debugging", load_source="auto:sync-worker-failure")
|
|
32962
|
-
except Exception:
|
|
32963
|
-
pass
|
|
32964
33761
|
# Fix 6b: Pure sync no-plan — read worker-done signal and notify manager
|
|
32965
33762
|
_bb_sync = self._ensure_blackboard()
|
|
32966
33763
|
if _bb_sync.pop("sync_worker_round_done", False):
|
|
@@ -34085,6 +34882,7 @@ body{padding:18px}
|
|
|
34085
34882
|
"category": "plan_step",
|
|
34086
34883
|
"plan_step_index": i,
|
|
34087
34884
|
"created_at": float(now_ts()),
|
|
34885
|
+
"activated_at": float(now_ts()) if not plan_todos else None,
|
|
34088
34886
|
"completed_at": None,
|
|
34089
34887
|
"completed_by": "",
|
|
34090
34888
|
"evidence": "",
|
|
@@ -35007,13 +35805,6 @@ body{padding:18px}
|
|
|
35007
35805
|
self.agent_round_index = int(self.agent_round_index) + 1
|
|
35008
35806
|
self.current_phase = "model-call"
|
|
35009
35807
|
self.current_tool_name = ""
|
|
35010
|
-
# Single-mode skill auto-discovery: same as plan mode. Runs on first 2 rounds only.
|
|
35011
|
-
# Uses goal_sig dedup — if skills already loaded for this goal, no-op.
|
|
35012
|
-
if int(self.agent_round_index) <= 2:
|
|
35013
|
-
try:
|
|
35014
|
-
self._refresh_loaded_skills_for_execution_focus(trigger="single-worker-pre")
|
|
35015
|
-
except Exception:
|
|
35016
|
-
pass
|
|
35017
35808
|
if level_budget > 0 and int(self.agent_round_index) > int(level_budget):
|
|
35018
35809
|
force_single_tool_rounds = max(force_single_tool_rounds, 2)
|
|
35019
35810
|
if not compact_budget_notified:
|
|
@@ -36525,6 +37316,7 @@ body{padding:18px}
|
|
|
36525
37316
|
"live_run_notice_elapsed": round(float(self.live_run_notice_elapsed or 0.0), 1),
|
|
36526
37317
|
"max_agent_rounds": int(self.max_agent_rounds),
|
|
36527
37318
|
"max_run_seconds": int(self.max_run_seconds),
|
|
37319
|
+
"shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
36528
37320
|
"auto_model_switch": bool(self.auto_model_switch),
|
|
36529
37321
|
"arbiter_enabled": bool(self.arbiter_enabled),
|
|
36530
37322
|
"arbiter_model": str(self.arbiter_model or ""),
|
|
@@ -36704,6 +37496,7 @@ class SessionManager:
|
|
|
36704
37496
|
context_limit_locked: bool = False,
|
|
36705
37497
|
max_rounds: int = MAX_AGENT_ROUNDS,
|
|
36706
37498
|
max_run_seconds: int = MAX_RUN_SECONDS,
|
|
37499
|
+
shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
36707
37500
|
auto_model_switch: bool = False,
|
|
36708
37501
|
arbiter_enabled: bool = True,
|
|
36709
37502
|
arbiter_model: str = "",
|
|
@@ -36749,6 +37542,12 @@ class SessionManager:
|
|
|
36749
37542
|
maximum=MAX_RUN_TIMEOUT_SECONDS,
|
|
36750
37543
|
fallback=MAX_RUN_SECONDS,
|
|
36751
37544
|
)
|
|
37545
|
+
self.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
37546
|
+
shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37547
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37548
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37549
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37550
|
+
)
|
|
36752
37551
|
self.auto_model_switch = bool(auto_model_switch)
|
|
36753
37552
|
self.arbiter_enabled = bool(arbiter_enabled)
|
|
36754
37553
|
self.arbiter_model = str(arbiter_model or "").strip()
|
|
@@ -37031,6 +37830,12 @@ class SessionManager:
|
|
|
37031
37830
|
)
|
|
37032
37831
|
sess.execution_mode = normalize_execution_mode(self.execution_mode, default=EXECUTION_MODE_SYNC)
|
|
37033
37832
|
sess.single_advance_prompt_enhance = bool(self.single_advance_prompt_enhance)
|
|
37833
|
+
sess.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
37834
|
+
self.shell_command_timeout_seconds,
|
|
37835
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37836
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37837
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
37838
|
+
)
|
|
37034
37839
|
sess._apply_active_profile()
|
|
37035
37840
|
sess.updated_at = now_ts()
|
|
37036
37841
|
sess._persist()
|
|
@@ -37091,6 +37896,7 @@ class SessionManager:
|
|
|
37091
37896
|
context_limit_locked=self.context_limit_locked,
|
|
37092
37897
|
max_rounds=self.max_rounds,
|
|
37093
37898
|
max_run_seconds=self.max_run_seconds,
|
|
37899
|
+
shell_command_timeout_seconds=self.shell_command_timeout_seconds,
|
|
37094
37900
|
auto_model_switch=self.auto_model_switch,
|
|
37095
37901
|
arbiter_enabled=self.arbiter_enabled,
|
|
37096
37902
|
arbiter_model=self.arbiter_model,
|
|
@@ -37140,6 +37946,7 @@ class SessionManager:
|
|
|
37140
37946
|
context_limit_locked=self.context_limit_locked,
|
|
37141
37947
|
max_rounds=self.max_rounds,
|
|
37142
37948
|
max_run_seconds=self.max_run_seconds,
|
|
37949
|
+
shell_command_timeout_seconds=self.shell_command_timeout_seconds,
|
|
37143
37950
|
auto_model_switch=self.auto_model_switch,
|
|
37144
37951
|
arbiter_enabled=self.arbiter_enabled,
|
|
37145
37952
|
arbiter_model=self.arbiter_model,
|
|
@@ -38761,7 +39568,7 @@ function feedSignature(snap){const feed=Array.isArray(snap?.conversation_feed)?s
|
|
|
38761
39568
|
function boardsSignature(snap){return [snap?.running?1:0,snap?.agent_phase||'',Number(snap?.agent_round_index||0),Number(snap?.queued_user_inputs_count||0),Number(snap?.truncation_count||0),Number(snap?.live_truncation_attempts||0),Number(snap?.live_truncation_tokens||0),snap?.live_truncation_active?1:0,Number(snap?.context_tokens_estimate||0),Number(snap?.context_left_tokens||0),Number(snap?.context_left_percent||0),Number(snap?.render_bridge?.seq||0),(snap?.todos||[]).length,(snap?.tasks||[]).length,(snap?.activity||[]).length,(snap?.operations||[]).length,(snap?.uploads||[]).length].join('|')}
|
|
38762
39569
|
function sessionsSignature(list){const rows=Array.isArray(list)?list:[];const sig=tailSig(rows,6,row=>`${String(row?.id||'')}:${row?.running?1:0}:${Number(row?.message_count||0)}:${Number(row?.updated_at||0)}`);const aid=String(S.activeId||'').trim();let activeSig='-';if(aid){const activeRow=rows.find(row=>String(row?.id||'')===aid);if(activeRow){activeSig=`${aid}:${activeRow?.running?1:0}:${Number(activeRow?.message_count||0)}:${Number(activeRow?.updated_at||0)}`}else{activeSig=`missing:${aid}`}}return `${rows.length}|active=${activeSig}|${sig}`}
|
|
38763
39570
|
function _statInfinite(n){const v=Number(n);return(Number.isFinite(v)&&v>0)?String(v):'∞'}
|
|
38764
|
-
function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
|
|
39571
|
+
function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'shell_command_timeout_seconds'))S.config.shell_command_timeout_seconds=cfg.shell_command_timeout_seconds;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
|
|
38765
39572
|
function renderStats(){const sessions=S.sessions.length;const running=S.sessions.filter(x=>x.running).length;const msgs=S.sessions.reduce((n,x)=>n+x.message_count,0);const model=S.config?.model||'-';const sched=(S.config&&typeof S.config.scheduler==='object')?S.config.scheduler:{};const quota=(S.config&&typeof S.config.session_creation_limit==='object')?S.config.session_creation_limit:{};const runningTotal=Math.max(0,Number(sched?.running_total||0));const maxTasks=Number(sched?.max_user||0);const globalTasks=`${runningTotal}/${_statInfinite(maxTasks)}`;const dailySessions=(quota&"a.enabled)?`${Math.max(0,Number(quota.used||0))}/${Math.max(0,Number(quota.limit||0))}`:'∞';const compact=[[t('stat_sessions'),sessions],[t('stat_running'),running],[t('stat_messages'),msgs],[t('stat_global_tasks'),globalTasks],[t('stat_daily_sessions'),dailySessions]].map(([k,v])=>`<div class=\"stat compact\"><div class=\"k\">${esc(k)}</div><div class=\"v\">${esc(v)}</div></div>`).join('');const modelHtml=`<div class=\"stat model\"><div class=\"k\">${esc(t('stat_model'))}</div><div class=\"v\">${esc(model)}</div></div>`;E('topStats').innerHTML=`<div class=\"top-stats-primary\">${compact}</div><div class=\"top-stats-model\">${modelHtml}</div>`}
|
|
38766
39573
|
function renderSessions(){const html=S.sessions.map(s=>`<div class=\"session-item${s.id===S.activeId?' active':''}\" data-id=\"${esc(s.id)}\"><div><strong>${esc(s.title)}</strong></div><div class=\"mono\">${s.running?t('running'):t('idle')} · ${s.message_count} msgs</div></div>`).join('');setPanelHtml('sessionList',html||`<div class=\"mono\">${esc(t('no_sessions'))}</div>`);for(const el of document.querySelectorAll('#sessionList .session-item')){el.onclick=()=>selectSession(el.getAttribute('data-id'))}}
|
|
38767
39574
|
function _syncActiveSessionSummaryFromSnapshot(){const sid=String(S.activeId||'').trim();const snap=S.snap;if(!sid||!snap)return false;const rows=Array.isArray(S.sessions)?S.sessions.slice():[];let idx=rows.findIndex(row=>String(row?.id||'')===sid);const running=!!snap?.running;let updatedAt=Number(snap?.updated_at||0);if(!Number.isFinite(updatedAt)||updatedAt<=0){updatedAt=(Date.now()/1000)}let msgCount=Number(snap?.message_count);if(!Number.isFinite(msgCount)||msgCount<0){const arr=Array.isArray(snap?.messages)?snap.messages:[];let cnt=0;for(const row of arr){if(String(row?.role||'').trim()==='tool')continue;cnt+=1}msgCount=cnt}msgCount=Math.max(0,Math.floor(Number(msgCount)||0));const title=String(snap?.title||'').trim();if(idx<0){rows.push({id:sid,title:title||sid,running:running,updated_at:updatedAt,message_count:msgCount});idx=rows.length-1}else{const cur=rows[idx]||{};const next={...cur};let changed=false;if(!!cur.running!==running){next.running=running;changed=true}if(Number(cur.message_count||0)!==msgCount){next.message_count=msgCount;changed=true}if(Number(cur.updated_at||0)!==updatedAt){next.updated_at=updatedAt;changed=true}if(title&&String(cur.title||'')!==title){next.title=title;changed=true}if(!changed)return false;rows[idx]=next}rows.sort((a,b)=>Number(b?.updated_at||0)-Number(a?.updated_at||0));S.sessions=rows;return true}
|
|
@@ -40222,8 +41029,7 @@ function _chatVirtBuildMessageNode(m){
|
|
|
40222
41029
|
const pillsHtml=pills.map(x=>`<span class=\"manager-delegate-pill\">${esc(String(x))}</span>`).join('');
|
|
40223
41030
|
const routeHtml=`<div class=\"manager-delegate-route\"><span class=\"agent-bus-pill manager\">${esc(t('role_manager'))}</span><span class=\"agent-bus-arrow\">→</span><span class=\"agent-bus-pill${targetRole?(' '+targetRole):''}\">${esc(targetLabel)}</span></div>`;
|
|
40224
41031
|
const objectiveHtml=(objective&&instruction&&objective.toLowerCase()===instruction.toLowerCase())?'':(objective?`<div class=\"manager-delegate-line\"><span>${esc(t('event_objective'))}</span><div>${esc(objective)}</div></div>`:'');
|
|
40225
|
-
const
|
|
40226
|
-
const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div class=\"msg-md\">${renderMarkdownCached(instruction,instructionKey)}</div></div>`:'';
|
|
41032
|
+
const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div>${esc(instruction)}</div></div>`:'';
|
|
40227
41033
|
d.innerHTML=`${roleBadge}<div class=\"manager-delegate-card\"><div class=\"manager-delegate-head\">${esc(t('event_manager_delegate_title'))}</div>${routeHtml}<div class=\"manager-delegate-pills\">${pillsHtml}</div>${objectiveHtml}${instructionHtml}</div>`;
|
|
40228
41034
|
return d;
|
|
40229
41035
|
}
|
|
@@ -48574,6 +49380,7 @@ class AppContext:
|
|
|
48574
49380
|
context_limit_locked: bool = False,
|
|
48575
49381
|
max_rounds: int = MAX_AGENT_ROUNDS,
|
|
48576
49382
|
max_run_seconds: int = MAX_RUN_SECONDS,
|
|
49383
|
+
shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
48577
49384
|
auto_model_switch: bool = False,
|
|
48578
49385
|
arbiter_enabled: bool = True,
|
|
48579
49386
|
arbiter_model: str = "",
|
|
@@ -48594,7 +49401,7 @@ class AppContext:
|
|
|
48594
49401
|
self.base_url = base_url
|
|
48595
49402
|
self.model = model
|
|
48596
49403
|
self.thinking = False
|
|
48597
|
-
self.js_lib_root = offline_js_lib_root(
|
|
49404
|
+
self.js_lib_root = offline_js_lib_root(self.workspace)
|
|
48598
49405
|
self.offline_js_summary: dict = {}
|
|
48599
49406
|
try:
|
|
48600
49407
|
self.offline_js_summary = load_offline_js_lib_index(self.js_lib_root)
|
|
@@ -48617,6 +49424,12 @@ class AppContext:
|
|
|
48617
49424
|
maximum=MAX_RUN_TIMEOUT_SECONDS,
|
|
48618
49425
|
fallback=MAX_RUN_SECONDS,
|
|
48619
49426
|
)
|
|
49427
|
+
self.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
49428
|
+
shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
49429
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
49430
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
49431
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
49432
|
+
)
|
|
48620
49433
|
self.auto_model_switch = bool(auto_model_switch)
|
|
48621
49434
|
self.arbiter_enabled = bool(arbiter_enabled)
|
|
48622
49435
|
self.arbiter_model = str(arbiter_model or "").strip()
|
|
@@ -48785,6 +49598,7 @@ class AppContext:
|
|
|
48785
49598
|
"show_upload_list": bool(getattr(self, "show_upload_list", False)),
|
|
48786
49599
|
"ui_style": normalize_ui_style(getattr(self, "ui_style", DEFAULT_UI_STYLE)),
|
|
48787
49600
|
"js_lib_download_enabled": bool(getattr(self, "js_lib_download_enabled", True)),
|
|
49601
|
+
"shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
48788
49602
|
"daily_session_limit_per_ip": int(getattr(self, "daily_session_limit_per_ip", 0) or 0),
|
|
48789
49603
|
"daily_session_reset_hour": int(getattr(self, "daily_session_reset_hour", 8) or 8),
|
|
48790
49604
|
"validation": dict(self.web_ui_validation or {}),
|
|
@@ -50028,6 +50842,7 @@ class AppContext:
|
|
|
50028
50842
|
self.context_limit_locked,
|
|
50029
50843
|
self.max_rounds,
|
|
50030
50844
|
self.max_run_seconds,
|
|
50845
|
+
self.shell_command_timeout_seconds,
|
|
50031
50846
|
self.auto_model_switch,
|
|
50032
50847
|
self.arbiter_enabled,
|
|
50033
50848
|
self.arbiter_model,
|
|
@@ -51096,6 +51911,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51096
51911
|
"download_js_lib_enabled": bool(getattr(self.app, "js_lib_download_enabled", True)),
|
|
51097
51912
|
"request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
|
|
51098
51913
|
"run_timeout": int(mgr.max_run_seconds),
|
|
51914
|
+
"shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
51099
51915
|
}
|
|
51100
51916
|
)
|
|
51101
51917
|
model_cat = mgr.model_catalog()
|
|
@@ -51142,6 +51958,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51142
51958
|
"context_token_limit": int(mgr.context_token_limit),
|
|
51143
51959
|
"context_limit_locked": bool(mgr.context_limit_locked),
|
|
51144
51960
|
"run_timeout": int(mgr.max_run_seconds),
|
|
51961
|
+
"shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
51145
51962
|
"auto_model_switch": bool(mgr.auto_model_switch),
|
|
51146
51963
|
"execution_mode": normalize_execution_mode(getattr(mgr, "execution_mode", EXECUTION_MODE_SYNC), default=EXECUTION_MODE_SYNC),
|
|
51147
51964
|
"execution_mode_choices": list(EXECUTION_MODE_CHOICES),
|
|
@@ -51194,7 +52011,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51194
52011
|
for hk, hv in probe_headers.items():
|
|
51195
52012
|
if str(hk or "").strip() and str(hv or "").strip():
|
|
51196
52013
|
req.add_header(str(hk), str(hv))
|
|
51197
|
-
with
|
|
52014
|
+
with urlopen(req, timeout=8) as resp:
|
|
51198
52015
|
body_text = resp.read().decode("utf-8", errors="replace")
|
|
51199
52016
|
reachable = True
|
|
51200
52017
|
try:
|
|
@@ -51251,7 +52068,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51251
52068
|
for hk, hv in probe_headers.items():
|
|
51252
52069
|
if str(hk or "").strip() and str(hv or "").strip():
|
|
51253
52070
|
base_req.add_header(str(hk), str(hv))
|
|
51254
|
-
with
|
|
52071
|
+
with urlopen(base_req, timeout=8):
|
|
51255
52072
|
pass
|
|
51256
52073
|
reachable = True
|
|
51257
52074
|
except urllib.error.HTTPError as exc:
|
|
@@ -51901,6 +52718,7 @@ class SkillsHandler(BaseHTTPRequestHandler):
|
|
|
51901
52718
|
"show_upload_list": bool(getattr(self.app, "show_upload_list", False)),
|
|
51902
52719
|
"web_ui": web_ui_state,
|
|
51903
52720
|
"run_timeout": int(mgr.max_run_seconds),
|
|
52721
|
+
"shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
51904
52722
|
"request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
|
|
51905
52723
|
}
|
|
51906
52724
|
)
|
|
@@ -52332,6 +53150,25 @@ def main():
|
|
|
52332
53150
|
f"(minimum {MIN_RUN_TIMEOUT_SECONDS}, model-active time excluded)"
|
|
52333
53151
|
),
|
|
52334
53152
|
)
|
|
53153
|
+
parser.add_argument(
|
|
53154
|
+
"--shell_command_timeout",
|
|
53155
|
+
"--shell-command-timeout",
|
|
53156
|
+
"--bash_timeout",
|
|
53157
|
+
"--bash-timeout",
|
|
53158
|
+
"--command_timeout",
|
|
53159
|
+
"--command-timeout",
|
|
53160
|
+
dest="shell_command_timeout",
|
|
53161
|
+
default=None,
|
|
53162
|
+
type=int,
|
|
53163
|
+
help=(
|
|
53164
|
+
"Per-command shell/bash timeout in seconds "
|
|
53165
|
+
f"(default {DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS}; allowed "
|
|
53166
|
+
f"{MIN_SHELL_COMMAND_TIMEOUT_SECONDS}-{MAX_SHELL_COMMAND_TIMEOUT_SECONDS}). "
|
|
53167
|
+
"Independent from the global run timeout. Also configurable via --config keys "
|
|
53168
|
+
"shell_command_timeout / shell_timeout / bash_timeout / command_timeout and env "
|
|
53169
|
+
"AGENT_SHELL_COMMAND_TIMEOUT / AGENT_BASH_TIMEOUT / AGENT_COMMAND_TIMEOUT."
|
|
53170
|
+
),
|
|
53171
|
+
)
|
|
52335
53172
|
parser.add_argument(
|
|
52336
53173
|
"--live_input_delay_write",
|
|
52337
53174
|
default=LIVE_INPUT_DELAY_WRITE_ROUNDS,
|
|
@@ -52481,9 +53318,10 @@ def main():
|
|
|
52481
53318
|
default="",
|
|
52482
53319
|
help=(
|
|
52483
53320
|
"LLM config source (URL or local file path). "
|
|
52484
|
-
"Also reads startup keys like show_upload_list, download_js_lib and "
|
|
53321
|
+
"Also reads startup keys like show_upload_list, download_js_lib, shell_command_timeout and "
|
|
52485
53322
|
"daily_session_limit (aliases: daily_sessions_per_ip / "
|
|
52486
|
-
"max_daily_sessions_per_ip / session_daily_limit
|
|
53323
|
+
"max_daily_sessions_per_ip / session_daily_limit; shell aliases: "
|
|
53324
|
+
"shell_timeout / bash_timeout / command_timeout)."
|
|
52487
53325
|
),
|
|
52488
53326
|
)
|
|
52489
53327
|
parser.add_argument(
|
|
@@ -52618,6 +53456,7 @@ def main():
|
|
|
52618
53456
|
arbiter_enabled=True,
|
|
52619
53457
|
show_upload_list=None,
|
|
52620
53458
|
download_js_lib=None,
|
|
53459
|
+
shell_command_timeout=None,
|
|
52621
53460
|
)
|
|
52622
53461
|
args = parser.parse_args()
|
|
52623
53462
|
ctx_limit_locked = any(str(arg).split("=", 1)[0] == "--ctx_limit" for arg in sys.argv[1:])
|
|
@@ -52647,6 +53486,7 @@ def main():
|
|
|
52647
53486
|
)
|
|
52648
53487
|
resolved_show_upload_list = False
|
|
52649
53488
|
resolved_daily_session_limit_per_ip = 0
|
|
53489
|
+
resolved_shell_command_timeout = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS
|
|
52650
53490
|
external_config: dict = {}
|
|
52651
53491
|
external_config_source = ""
|
|
52652
53492
|
bootstrap_base_url = args.ollama_base_url
|
|
@@ -52673,6 +53513,14 @@ def main():
|
|
|
52673
53513
|
external_daily_session_limit = extract_daily_session_limit_setting(external_config)
|
|
52674
53514
|
if external_daily_session_limit is not None:
|
|
52675
53515
|
resolved_daily_session_limit_per_ip = int(external_daily_session_limit)
|
|
53516
|
+
external_shell_command_timeout = extract_shell_command_timeout_setting(external_config)
|
|
53517
|
+
if external_shell_command_timeout is not None:
|
|
53518
|
+
resolved_shell_command_timeout = normalize_timeout_seconds(
|
|
53519
|
+
external_shell_command_timeout,
|
|
53520
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53521
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53522
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53523
|
+
)
|
|
52676
53524
|
print(f"[web-agent] external config loaded: {external_config_source}")
|
|
52677
53525
|
except Exception as exc:
|
|
52678
53526
|
print(f"[web-agent] invalid --config: {exc}")
|
|
@@ -52686,9 +53534,25 @@ def main():
|
|
|
52686
53534
|
web_ui_daily_session_limit = extract_daily_session_limit_setting(web_ui_config)
|
|
52687
53535
|
if web_ui_daily_session_limit is not None:
|
|
52688
53536
|
resolved_daily_session_limit_per_ip = int(web_ui_daily_session_limit)
|
|
53537
|
+
web_ui_shell_command_timeout = extract_shell_command_timeout_setting(web_ui_config)
|
|
53538
|
+
if web_ui_shell_command_timeout is not None:
|
|
53539
|
+
resolved_shell_command_timeout = normalize_timeout_seconds(
|
|
53540
|
+
web_ui_shell_command_timeout,
|
|
53541
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53542
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53543
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53544
|
+
)
|
|
52689
53545
|
cli_daily_session_limit = getattr(args, "daily_session_limit_per_ip", None)
|
|
52690
53546
|
if cli_daily_session_limit is not None:
|
|
52691
53547
|
resolved_daily_session_limit_per_ip = max(0, int(cli_daily_session_limit or 0))
|
|
53548
|
+
cli_shell_command_timeout = getattr(args, "shell_command_timeout", None)
|
|
53549
|
+
if cli_shell_command_timeout is not None:
|
|
53550
|
+
resolved_shell_command_timeout = normalize_timeout_seconds(
|
|
53551
|
+
cli_shell_command_timeout,
|
|
53552
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53553
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53554
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
53555
|
+
)
|
|
52692
53556
|
raw_ui_style = str(getattr(args, "ui_style", "") or "").strip()
|
|
52693
53557
|
if not raw_ui_style:
|
|
52694
53558
|
raw_ui_style = str(extract_ui_style_setting(external_config) or "").strip()
|
|
@@ -52743,6 +53607,7 @@ def main():
|
|
|
52743
53607
|
f"[web-agent] run_timeout adjusted {requested_run_timeout}->{resolved_run_timeout} "
|
|
52744
53608
|
f"(allowed range {MIN_RUN_TIMEOUT_SECONDS}-{MAX_RUN_TIMEOUT_SECONDS})"
|
|
52745
53609
|
)
|
|
53610
|
+
print(f"[web-agent] shell_command_timeout={int(resolved_shell_command_timeout)}s")
|
|
52746
53611
|
requested_live_input_delay_write = int(args.live_input_delay_write if args.live_input_delay_write is not None else LIVE_INPUT_DELAY_WRITE_ROUNDS)
|
|
52747
53612
|
resolved_live_input_delay_write = max(0, min(20, requested_live_input_delay_write))
|
|
52748
53613
|
if resolved_live_input_delay_write != requested_live_input_delay_write:
|
|
@@ -52925,6 +53790,7 @@ def main():
|
|
|
52925
53790
|
ctx_limit_locked,
|
|
52926
53791
|
resolved_max_rounds,
|
|
52927
53792
|
resolved_run_timeout,
|
|
53793
|
+
resolved_shell_command_timeout,
|
|
52928
53794
|
resolved_auto_model_switch,
|
|
52929
53795
|
resolved_arbiter_enabled,
|
|
52930
53796
|
resolved_arbiter_model,
|