clouds-coder 2026.4.2__tar.gz → 2026.4.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/Clouds_Coder.py +2738 -345
- {clouds_coder-2026.4.2/clouds_coder.egg-info → clouds_coder-2026.4.5}/PKG-INFO +2 -1
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5/clouds_coder.egg-info}/PKG-INFO +2 -1
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/clouds_coder.egg-info/requires.txt +1 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/pyproject.toml +9 -1
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/LICENSE +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/README.md +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/clouds_coder.egg-info/SOURCES.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/clouds_coder.egg-info/dependency_links.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/clouds_coder.egg-info/entry_points.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/clouds_coder.egg-info/top_level.txt +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/setup.cfg +0 -0
- {clouds_coder-2026.4.2 → clouds_coder-2026.4.5}/tests/test_smoke.py +0 -0
|
@@ -25,6 +25,7 @@ import selectors
|
|
|
25
25
|
import signal
|
|
26
26
|
import shutil
|
|
27
27
|
import shlex
|
|
28
|
+
import ssl
|
|
28
29
|
import socket
|
|
29
30
|
import subprocess
|
|
30
31
|
import sys
|
|
@@ -44,15 +45,48 @@ from pathlib import Path, PurePosixPath
|
|
|
44
45
|
from urllib.error import HTTPError, URLError
|
|
45
46
|
from urllib.parse import parse_qs, unquote, urlparse
|
|
46
47
|
from urllib.request import Request, urlopen
|
|
48
|
+
try:
|
|
49
|
+
import certifi as _certifi
|
|
50
|
+
except Exception:
|
|
51
|
+
_certifi = None
|
|
47
52
|
try:
|
|
48
53
|
import yaml as _yaml
|
|
49
54
|
except Exception:
|
|
50
55
|
_yaml = None
|
|
56
|
+
_URL_OPEN_ORIGINAL = urlopen
|
|
57
|
+
_HTTP_SSL_CONTEXT = None
|
|
51
58
|
APP_VERSION = "0.1.1"
|
|
52
59
|
DEFAULT_OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
|
|
53
60
|
DEFAULT_OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5-coder:7b")
|
|
54
61
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
55
62
|
|
|
63
|
+
def _shared_http_ssl_context():
|
|
64
|
+
global _HTTP_SSL_CONTEXT
|
|
65
|
+
if _HTTP_SSL_CONTEXT is not None:
|
|
66
|
+
return _HTTP_SSL_CONTEXT
|
|
67
|
+
cafile = str(os.getenv("SSL_CERT_FILE", "") or "").strip()
|
|
68
|
+
if not cafile and _certifi is not None:
|
|
69
|
+
try:
|
|
70
|
+
cafile = str(_certifi.where() or "").strip()
|
|
71
|
+
except Exception:
|
|
72
|
+
cafile = ""
|
|
73
|
+
try:
|
|
74
|
+
ctx = ssl.create_default_context(cafile=cafile or None)
|
|
75
|
+
except Exception:
|
|
76
|
+
ctx = ssl.create_default_context()
|
|
77
|
+
_HTTP_SSL_CONTEXT = ctx
|
|
78
|
+
return ctx
|
|
79
|
+
|
|
80
|
+
def urlopen(url, *args, **kwargs):
|
|
81
|
+
if "context" not in kwargs:
|
|
82
|
+
target = getattr(url, "full_url", url)
|
|
83
|
+
if str(target or "").strip().lower().startswith("https://"):
|
|
84
|
+
try:
|
|
85
|
+
kwargs["context"] = _shared_http_ssl_context()
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
return _URL_OPEN_ORIGINAL(url, *args, **kwargs)
|
|
89
|
+
|
|
56
90
|
def _resolve_default_agent_workdir() -> Path:
|
|
57
91
|
raw = str(os.getenv("AGENT_WORKDIR", "") or "").strip()
|
|
58
92
|
if raw:
|
|
@@ -158,7 +192,7 @@ REPEATED_TOOL_LOOP_THRESHOLD = 2
|
|
|
158
192
|
BASH_READ_LOOP_THRESHOLD = 3
|
|
159
193
|
HARD_BREAK_TOOL_ERROR_THRESHOLD = 20
|
|
160
194
|
HARD_BREAK_RECOVERY_ROUND_THRESHOLD = 3
|
|
161
|
-
FUSED_FAULT_BREAK_THRESHOLD =
|
|
195
|
+
FUSED_FAULT_BREAK_THRESHOLD = 15
|
|
162
196
|
STALL_SEVERITY_ESCALATION_THRESHOLD = 5
|
|
163
197
|
STALL_SEVERITY_WEIGHT_BASH_READ_LOOP = 2
|
|
164
198
|
STALL_SEVERITY_WEIGHT_REPEATED_TOOL = 3
|
|
@@ -182,6 +216,23 @@ DEFAULT_TIMEOUT_SECONDS = max(
|
|
|
182
216
|
),
|
|
183
217
|
)
|
|
184
218
|
DEFAULT_REQUEST_TIMEOUT = DEFAULT_TIMEOUT_SECONDS
|
|
219
|
+
MIN_SHELL_COMMAND_TIMEOUT_SECONDS = 10
|
|
220
|
+
MAX_SHELL_COMMAND_TIMEOUT_SECONDS = 86_400
|
|
221
|
+
DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS = max(
|
|
222
|
+
MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
223
|
+
min(
|
|
224
|
+
MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
225
|
+
int(
|
|
226
|
+
str(
|
|
227
|
+
os.getenv(
|
|
228
|
+
"AGENT_SHELL_COMMAND_TIMEOUT",
|
|
229
|
+
os.getenv("AGENT_BASH_TIMEOUT", os.getenv("AGENT_COMMAND_TIMEOUT", "240")),
|
|
230
|
+
)
|
|
231
|
+
or "240"
|
|
232
|
+
)
|
|
233
|
+
),
|
|
234
|
+
),
|
|
235
|
+
)
|
|
185
236
|
AUTO_CONTINUE_BUDGET_DEFAULT = 30
|
|
186
237
|
AGENT_MAX_OUTPUT_TOKENS = 16384
|
|
187
238
|
OLLAMA_THINKING_TOOL_BUFFER = 4096
|
|
@@ -196,7 +247,7 @@ WATCHDOG_CONTEXT_NEAR_RATIO = 0.92
|
|
|
196
247
|
WATCHDOG_MAX_DECOMPOSE_STEPS = 12
|
|
197
248
|
WATCHDOG_STEP_MAX_ATTEMPTS = 2
|
|
198
249
|
EMPTY_ACTION_MIN_CONTENT_CHARS = 5
|
|
199
|
-
EMPTY_ACTION_WAKEUP_RETRY_LIMIT =
|
|
250
|
+
EMPTY_ACTION_WAKEUP_RETRY_LIMIT = 5
|
|
200
251
|
THINKING_BUDGET_FORCE_RATIO = 0.85
|
|
201
252
|
# --- Tool timeout configuration ---
|
|
202
253
|
_TOOL_TIMEOUT_MAP = {
|
|
@@ -347,7 +398,13 @@ BLACKBOARD_STATUSES = (
|
|
|
347
398
|
"COMPLETED",
|
|
348
399
|
"PAUSED",
|
|
349
400
|
)
|
|
350
|
-
TASK_COMPLEXITY_LEVELS = ("simple", "complex")
|
|
401
|
+
TASK_COMPLEXITY_LEVELS = ("simple", "moderate", "complex", "expert")
|
|
402
|
+
TASK_COMPLEXITY_RANKS = {
|
|
403
|
+
"simple": 1,
|
|
404
|
+
"moderate": 2,
|
|
405
|
+
"complex": 3,
|
|
406
|
+
"expert": 4,
|
|
407
|
+
}
|
|
351
408
|
TASK_PROFILE_TYPES = (
|
|
352
409
|
"simple_qa",
|
|
353
410
|
"simple_code",
|
|
@@ -384,7 +441,7 @@ TASK_LEVEL_POLICIES: dict[int, dict] = {
|
|
|
384
441
|
"assigned_expert": "developer",
|
|
385
442
|
"round_budget": 16,
|
|
386
443
|
"requires_user_confirmation": False,
|
|
387
|
-
"complexity": "
|
|
444
|
+
"complexity": "moderate",
|
|
388
445
|
},
|
|
389
446
|
4: {
|
|
390
447
|
"name": "complex_collaboration",
|
|
@@ -402,7 +459,7 @@ TASK_LEVEL_POLICIES: dict[int, dict] = {
|
|
|
402
459
|
"assigned_expert": "explorer",
|
|
403
460
|
"round_budget": 0, # 0 means unlimited by tier budget (still guarded by global safeguards).
|
|
404
461
|
"requires_user_confirmation": True,
|
|
405
|
-
"complexity": "
|
|
462
|
+
"complexity": "expert",
|
|
406
463
|
},
|
|
407
464
|
}
|
|
408
465
|
MANAGER_ROUTE_TARGETS = ("explorer", "developer", "reviewer", "finish")
|
|
@@ -469,7 +526,7 @@ TASK_PHASE_ROUTING = {
|
|
|
469
526
|
COMPLEXITY_KEYWORDS = (
|
|
470
527
|
"简单", "复杂", "难", "容易", "快速", "详细", "深入",
|
|
471
528
|
"l1", "l2", "l3", "l4", "l5",
|
|
472
|
-
"simple", "complex", "easy", "hard", "difficult",
|
|
529
|
+
"simple", "moderate", "medium", "complex", "expert", "easy", "hard", "difficult",
|
|
473
530
|
"thorough", "quick", "fast", "lightweight", "heavy",
|
|
474
531
|
)
|
|
475
532
|
USER_COMPLEXITY_SIMPLE_TOKENS = (
|
|
@@ -477,12 +534,23 @@ USER_COMPLEXITY_SIMPLE_TOKENS = (
|
|
|
477
534
|
"low", "simple", "easy", "quick", "fast", "lightweight", "basic", "minimal",
|
|
478
535
|
"l1", "l2",
|
|
479
536
|
)
|
|
537
|
+
USER_COMPLEXITY_MODERATE_TOKENS = (
|
|
538
|
+
"中等复杂度", "中等难度", "适中", "平衡", "标准", "普通", "常规",
|
|
539
|
+
"medium", "mid", "moderate", "balanced", "standard", "normal",
|
|
540
|
+
"l3",
|
|
541
|
+
)
|
|
480
542
|
USER_COMPLEXITY_COMPLEX_TOKENS = (
|
|
481
|
-
"复杂", "深入", "详细", "高复杂度", "高难度", "
|
|
482
|
-
"
|
|
483
|
-
"
|
|
543
|
+
"复杂", "深入", "详细", "高复杂度", "高难度", "中高复杂度",
|
|
544
|
+
"high", "complex", "hard", "difficult", "thorough", "detailed", "deep", "heavy",
|
|
545
|
+
"l4",
|
|
546
|
+
)
|
|
547
|
+
USER_COMPLEXITY_EXPERT_TOKENS = (
|
|
548
|
+
"专家级", "系統級", "系统级", "生产级", "企業級", "企业级", "高风险", "超高复杂度",
|
|
549
|
+
"expert", "advanced", "system-level", "production-ready", "enterprise", "mission-critical",
|
|
550
|
+
"l5",
|
|
484
551
|
)
|
|
485
552
|
PLAN_MODE_EXPLORER_MAX_ROUNDS = 8
|
|
553
|
+
PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS = 3
|
|
486
554
|
# Reviewer debug mode
|
|
487
555
|
REVIEWER_DEBUG_MODE_MAX_ROUNDS = 6
|
|
488
556
|
REVIEWER_DEBUG_TOOL_ALLOWLIST = {
|
|
@@ -492,7 +560,7 @@ REVIEWER_DEBUG_TOOL_ALLOWLIST = {
|
|
|
492
560
|
}
|
|
493
561
|
EXPLORER_STALL_THRESHOLD = 3 # consecutive same-target delegations before forced switch
|
|
494
562
|
DEVELOPER_EDIT_STALL_THRESHOLD = 3 # consecutive edit_file failures on same file before forced strategy change
|
|
495
|
-
PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS =
|
|
563
|
+
PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS = 8192
|
|
496
564
|
PLAN_MODE_MAX_OPTIONS = 3
|
|
497
565
|
PLAN_FILE_RELATIVE_PATH = ".clouds_coder/plan.md"
|
|
498
566
|
PLAN_BUBBLE_MAX_CHARS = 12_000
|
|
@@ -2014,6 +2082,55 @@ def extract_daily_session_limit_setting(raw: object) -> int | None:
|
|
|
2014
2082
|
return None
|
|
2015
2083
|
|
|
2016
2084
|
|
|
2085
|
+
def extract_shell_command_timeout_setting(raw: object) -> int | None:
|
|
2086
|
+
"""Read shell/bash command timeout from config dict.
|
|
2087
|
+
|
|
2088
|
+
Accepted keys:
|
|
2089
|
+
- shell_command_timeout
|
|
2090
|
+
- shell_timeout
|
|
2091
|
+
- bash_timeout
|
|
2092
|
+
- command_timeout
|
|
2093
|
+
Sections searched: top-level, then 'startup' / 'runtime' / 'shell' / 'tools' / 'execution'.
|
|
2094
|
+
Returns a clamped positive integer, or None if no setting is present.
|
|
2095
|
+
"""
|
|
2096
|
+
if not isinstance(raw, dict):
|
|
2097
|
+
return None
|
|
2098
|
+
|
|
2099
|
+
def _parse_timeout(value: object) -> int | None:
|
|
2100
|
+
if value is None or isinstance(value, bool):
|
|
2101
|
+
return None
|
|
2102
|
+
try:
|
|
2103
|
+
text = str(value).strip()
|
|
2104
|
+
if not text:
|
|
2105
|
+
return None
|
|
2106
|
+
return normalize_timeout_seconds(
|
|
2107
|
+
text,
|
|
2108
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
2109
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
2110
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
2111
|
+
)
|
|
2112
|
+
except Exception:
|
|
2113
|
+
return None
|
|
2114
|
+
|
|
2115
|
+
keys = (
|
|
2116
|
+
"shell_command_timeout",
|
|
2117
|
+
"shell_timeout",
|
|
2118
|
+
"bash_timeout",
|
|
2119
|
+
"command_timeout",
|
|
2120
|
+
)
|
|
2121
|
+
for key in keys:
|
|
2122
|
+
if key in raw:
|
|
2123
|
+
return _parse_timeout(raw.get(key))
|
|
2124
|
+
for section_key in ("startup", "runtime", "shell", "tools", "execution"):
|
|
2125
|
+
section = raw.get(section_key)
|
|
2126
|
+
if not isinstance(section, dict):
|
|
2127
|
+
continue
|
|
2128
|
+
for key in keys:
|
|
2129
|
+
if key in section:
|
|
2130
|
+
return _parse_timeout(section.get(key))
|
|
2131
|
+
return None
|
|
2132
|
+
|
|
2133
|
+
|
|
2017
2134
|
class SessionCreationLimitExceeded(RuntimeError):
|
|
2018
2135
|
def __init__(self, status: dict):
|
|
2019
2136
|
self.status = dict(status or {})
|
|
@@ -2986,19 +3103,243 @@ def decompress_text_blob(blob_b64: str) -> str:
|
|
|
2986
3103
|
except Exception:
|
|
2987
3104
|
return ""
|
|
2988
3105
|
|
|
3106
|
+
def normalize_embedded_newlines(text: object) -> str:
|
|
3107
|
+
s = str(text or "")
|
|
3108
|
+
if not s:
|
|
3109
|
+
return ""
|
|
3110
|
+
s = s.replace("\u2028", "\n").replace("\u2029", "\n")
|
|
3111
|
+
s = s.replace("\r\n", "\n").replace("\r", "\n")
|
|
3112
|
+
if "\\n" in s or "\\r" in s or "\\t" in s:
|
|
3113
|
+
s = s.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n").replace("\\t", "\t")
|
|
3114
|
+
return s
|
|
3115
|
+
|
|
3116
|
+
|
|
3117
|
+
def _map_todo_status_token(token: str) -> str:
|
|
3118
|
+
raw = str(token or "").strip().lower().replace("_", " ").replace("-", " ")
|
|
3119
|
+
raw = re.sub(r"\s+", " ", raw)
|
|
3120
|
+
return {
|
|
3121
|
+
"pending": "pending",
|
|
3122
|
+
"待处理": "pending",
|
|
3123
|
+
"待處理": "pending",
|
|
3124
|
+
"未着手": "pending",
|
|
3125
|
+
"in progress": "in_progress",
|
|
3126
|
+
"进行中": "in_progress",
|
|
3127
|
+
"進行中": "in_progress",
|
|
3128
|
+
"completed": "completed",
|
|
3129
|
+
"已完成": "completed",
|
|
3130
|
+
"完了": "completed",
|
|
3131
|
+
"blocked": "pending",
|
|
3132
|
+
}.get(raw, "")
|
|
3133
|
+
|
|
3134
|
+
|
|
3135
|
+
def split_todo_status_text(text: object) -> tuple[str, str]:
|
|
3136
|
+
probe = normalize_embedded_newlines(text).strip()
|
|
3137
|
+
if not probe:
|
|
3138
|
+
return "", ""
|
|
3139
|
+
status = ""
|
|
3140
|
+
marker_prefix = r"(?:[-*•>]+\s*)?"
|
|
3141
|
+
for _ in range(4):
|
|
3142
|
+
before = probe
|
|
3143
|
+
probe = re.sub(r"^\s+", "", probe)
|
|
3144
|
+
matched = False
|
|
3145
|
+
for row_status, pattern in (
|
|
3146
|
+
(
|
|
3147
|
+
"completed",
|
|
3148
|
+
rf"^(?:{marker_prefix})(?:"
|
|
3149
|
+
rf"\[x\]\s*"
|
|
3150
|
+
rf")",
|
|
3151
|
+
),
|
|
3152
|
+
(
|
|
3153
|
+
"in_progress",
|
|
3154
|
+
rf"^(?:{marker_prefix})(?:"
|
|
3155
|
+
rf"\[>\]\s*"
|
|
3156
|
+
rf")",
|
|
3157
|
+
),
|
|
3158
|
+
(
|
|
3159
|
+
"pending",
|
|
3160
|
+
rf"^(?:{marker_prefix})(?:"
|
|
3161
|
+
rf"\[\s*\]\s*"
|
|
3162
|
+
rf")",
|
|
3163
|
+
),
|
|
3164
|
+
):
|
|
3165
|
+
m = re.match(pattern, probe, flags=re.IGNORECASE)
|
|
3166
|
+
if not m:
|
|
3167
|
+
continue
|
|
3168
|
+
status = row_status
|
|
3169
|
+
probe = probe[m.end():].strip()
|
|
3170
|
+
matched = True
|
|
3171
|
+
break
|
|
3172
|
+
if matched:
|
|
3173
|
+
continue
|
|
3174
|
+
m = re.match(
|
|
3175
|
+
rf"^(?:{marker_prefix})"
|
|
3176
|
+
rf"(pending|in[_\-\s]?progress|completed|blocked|"
|
|
3177
|
+
rf"待处理|待處理|未着手|进行中|進行中|已完成|完了)"
|
|
3178
|
+
rf"\s*[::\-\]]\s*",
|
|
3179
|
+
probe,
|
|
3180
|
+
flags=re.IGNORECASE,
|
|
3181
|
+
)
|
|
3182
|
+
if m:
|
|
3183
|
+
mapped = _map_todo_status_token(str(m.group(1) or ""))
|
|
3184
|
+
if mapped:
|
|
3185
|
+
status = mapped
|
|
3186
|
+
probe = probe[m.end():].strip()
|
|
3187
|
+
continue
|
|
3188
|
+
if probe == before:
|
|
3189
|
+
break
|
|
3190
|
+
return status, probe.strip()
|
|
3191
|
+
|
|
3192
|
+
|
|
3193
|
+
def extract_todo_rows_from_text(
|
|
3194
|
+
text: object,
|
|
3195
|
+
*,
|
|
3196
|
+
default_parent_step_id: str = "",
|
|
3197
|
+
limit: int = 12,
|
|
3198
|
+
) -> list[dict]:
|
|
3199
|
+
src = normalize_embedded_newlines(text)
|
|
3200
|
+
if not src.strip():
|
|
3201
|
+
return []
|
|
3202
|
+
out: list[dict] = []
|
|
3203
|
+
seen: set[tuple[str, str, str]] = set()
|
|
3204
|
+
capped = max(1, min(40, int(limit or 12)))
|
|
3205
|
+
parent_step_id = trim(str(default_parent_step_id or "").strip(), 20)
|
|
3206
|
+
for raw_line in src.splitlines():
|
|
3207
|
+
line = trim(str(raw_line or "").strip(), 600)
|
|
3208
|
+
if not line:
|
|
3209
|
+
continue
|
|
3210
|
+
variants: list[str] = []
|
|
3211
|
+
for candidate in (
|
|
3212
|
+
line,
|
|
3213
|
+
re.sub(r"^\s*(?:[-*•>]+\s*)+", "", line).strip(),
|
|
3214
|
+
re.sub(r"^\s*\*\*([^*]+)\*\*\s*([::])\s*", r"\1\2 ", line).strip(),
|
|
3215
|
+
re.sub(r"^\s*(?:[-*•>]+\s*)*\*\*([^*]+)\*\*\s*([::])\s*", r"\1\2 ", line).strip(),
|
|
3216
|
+
):
|
|
3217
|
+
candidate = trim(str(candidate or "").strip(), 600)
|
|
3218
|
+
if candidate and candidate not in variants:
|
|
3219
|
+
variants.append(candidate)
|
|
3220
|
+
matched = False
|
|
3221
|
+
for candidate in variants:
|
|
3222
|
+
status, content = split_todo_status_text(candidate)
|
|
3223
|
+
if not status or not content:
|
|
3224
|
+
continue
|
|
3225
|
+
cleaned = normalize_work_text(content, status) or content
|
|
3226
|
+
cleaned = trim(cleaned.strip(), 400)
|
|
3227
|
+
if not cleaned:
|
|
3228
|
+
continue
|
|
3229
|
+
low = cleaned.lower()
|
|
3230
|
+
if low in {
|
|
3231
|
+
"todo",
|
|
3232
|
+
"todos",
|
|
3233
|
+
"task",
|
|
3234
|
+
"tasks",
|
|
3235
|
+
"subtask",
|
|
3236
|
+
"subtasks",
|
|
3237
|
+
"待办",
|
|
3238
|
+
"待辦",
|
|
3239
|
+
"子任务",
|
|
3240
|
+
"子任務",
|
|
3241
|
+
}:
|
|
3242
|
+
continue
|
|
3243
|
+
row = {"content": cleaned, "status": status}
|
|
3244
|
+
if parent_step_id:
|
|
3245
|
+
row["parent_step_id"] = parent_step_id
|
|
3246
|
+
identity = (
|
|
3247
|
+
status,
|
|
3248
|
+
normalize_work_text(cleaned, status).strip().lower(),
|
|
3249
|
+
parent_step_id,
|
|
3250
|
+
)
|
|
3251
|
+
if identity in seen:
|
|
3252
|
+
matched = True
|
|
3253
|
+
break
|
|
3254
|
+
seen.add(identity)
|
|
3255
|
+
out.append(row)
|
|
3256
|
+
matched = True
|
|
3257
|
+
break
|
|
3258
|
+
if matched and len(out) >= capped:
|
|
3259
|
+
break
|
|
3260
|
+
return out
|
|
3261
|
+
|
|
3262
|
+
|
|
3263
|
+
def infer_todo_status_from_text(text: object, default: str = "pending") -> str:
|
|
3264
|
+
status, content = split_todo_status_text(text)
|
|
3265
|
+
if not content and not status:
|
|
3266
|
+
return default
|
|
3267
|
+
if status:
|
|
3268
|
+
return status
|
|
3269
|
+
return default
|
|
3270
|
+
|
|
3271
|
+
|
|
3272
|
+
def split_structured_todo_content(text: object, limit: int = 7) -> list[str]:
|
|
3273
|
+
src = normalize_embedded_newlines(text).strip()
|
|
3274
|
+
if not src:
|
|
3275
|
+
return []
|
|
3276
|
+
lines = [trim(str(line or "").strip(), 500) for line in src.split("\n")]
|
|
3277
|
+
lines = [line for line in lines if line]
|
|
3278
|
+
if len(lines) <= 1:
|
|
3279
|
+
return [src]
|
|
3280
|
+
major_re = re.compile(r"^(\d+)\.\s+(.+)$")
|
|
3281
|
+
sub_re = re.compile(r"^(\d+)\.(\d+)\s+(.+)$")
|
|
3282
|
+
bullet_re = re.compile(r"^(?:[-*•]\s+)(.+)$")
|
|
3283
|
+
header_major = ""
|
|
3284
|
+
m0 = major_re.match(lines[0])
|
|
3285
|
+
if m0:
|
|
3286
|
+
header_major = str(m0.group(1) or "")
|
|
3287
|
+
picked: list[str] = []
|
|
3288
|
+
for idx, line in enumerate(lines):
|
|
3289
|
+
if idx == 0 and header_major:
|
|
3290
|
+
continue
|
|
3291
|
+
m_sub = sub_re.match(line)
|
|
3292
|
+
if m_sub:
|
|
3293
|
+
major = str(m_sub.group(1) or "")
|
|
3294
|
+
if header_major and major != header_major:
|
|
3295
|
+
if picked:
|
|
3296
|
+
break
|
|
3297
|
+
continue
|
|
3298
|
+
picked.append(f"{major}.{m_sub.group(2)} {trim(str(m_sub.group(3) or '').strip(), 420)}".strip())
|
|
3299
|
+
continue
|
|
3300
|
+
m_bullet = bullet_re.match(line)
|
|
3301
|
+
if m_bullet and (header_major or picked):
|
|
3302
|
+
picked.append(trim(str(m_bullet.group(1) or "").strip(), 420))
|
|
3303
|
+
continue
|
|
3304
|
+
if picked and re.match(r"^\d+\.\s+", line):
|
|
3305
|
+
break
|
|
3306
|
+
if not picked:
|
|
3307
|
+
for line in lines:
|
|
3308
|
+
m_sub = sub_re.match(line)
|
|
3309
|
+
if m_sub:
|
|
3310
|
+
picked.append(f"{m_sub.group(1)}.{m_sub.group(2)} {trim(str(m_sub.group(3) or '').strip(), 420)}".strip())
|
|
3311
|
+
if len(picked) >= max(1, int(limit or 7)):
|
|
3312
|
+
break
|
|
3313
|
+
if not picked:
|
|
3314
|
+
return [src]
|
|
3315
|
+
out: list[str] = []
|
|
3316
|
+
seen: set[str] = set()
|
|
3317
|
+
for line in picked:
|
|
3318
|
+
key = re.sub(r"\s+", " ", str(line or "").strip()).lower()
|
|
3319
|
+
if not key or key in seen:
|
|
3320
|
+
continue
|
|
3321
|
+
seen.add(key)
|
|
3322
|
+
out.append(line)
|
|
3323
|
+
if len(out) >= max(1, int(limit or 7)):
|
|
3324
|
+
break
|
|
3325
|
+
return out or [src]
|
|
3326
|
+
|
|
3327
|
+
|
|
2989
3328
|
def normalize_work_text(text: object, status: str = "") -> str:
|
|
2990
|
-
|
|
3329
|
+
parsed_status, parsed_content = split_todo_status_text(text)
|
|
3330
|
+
s = re.sub(r"\s+", " ", parsed_content or normalize_embedded_newlines(text)).strip()
|
|
2991
3331
|
if not s:
|
|
2992
3332
|
return ""
|
|
2993
|
-
s = re.sub(r"^\[[ x>\-]\]\s*", "", s, flags=re.IGNORECASE)
|
|
2994
3333
|
s = re.sub(
|
|
2995
|
-
r"^(pending|in[_\-\s]?progress|completed|done|blocked
|
|
3334
|
+
r"^(pending|todo|in[_\-\s]?progress|doing|working|completed|done|finished|blocked|"
|
|
3335
|
+
r"待处理|待處理|未着手|进行中|進行中|作業中|已完成|完成|完了)\s*[·::\-\]]\s*",
|
|
2996
3336
|
"",
|
|
2997
3337
|
s,
|
|
2998
3338
|
flags=re.IGNORECASE,
|
|
2999
3339
|
)
|
|
3000
|
-
|
|
3001
|
-
|
|
3340
|
+
status_key = _map_todo_status_token(status) or _map_todo_status_token(parsed_status) or str(status or "").strip().lower()
|
|
3341
|
+
if status_key:
|
|
3342
|
+
status_pattern = re.escape(status_key).replace("_", r"[_\-\s]?")
|
|
3002
3343
|
s = re.sub(
|
|
3003
3344
|
rf"\s*[—-]\s*{status_pattern}\s*$",
|
|
3004
3345
|
"",
|
|
@@ -3429,6 +3770,12 @@ def infer_user_complexity_value(text: str) -> str:
|
|
|
3429
3770
|
low = strip_thinking_content(str(text or "")).strip().lower()
|
|
3430
3771
|
if not low:
|
|
3431
3772
|
return ""
|
|
3773
|
+
for token in USER_COMPLEXITY_EXPERT_TOKENS:
|
|
3774
|
+
if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
|
|
3775
|
+
return "expert"
|
|
3776
|
+
for token in USER_COMPLEXITY_MODERATE_TOKENS:
|
|
3777
|
+
if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
|
|
3778
|
+
return "moderate"
|
|
3432
3779
|
for token in USER_COMPLEXITY_SIMPLE_TOKENS:
|
|
3433
3780
|
if re.search(rf"(?<![a-z0-9]){re.escape(token)}(?![a-z0-9])", low) if token.isascii() else token in low:
|
|
3434
3781
|
return "simple"
|
|
@@ -3437,6 +3784,53 @@ def infer_user_complexity_value(text: str) -> str:
|
|
|
3437
3784
|
return "complex"
|
|
3438
3785
|
return ""
|
|
3439
3786
|
|
|
3787
|
+
def normalize_task_complexity(raw: object, default: str = "simple") -> str:
|
|
3788
|
+
value = str(raw or "").strip().lower()
|
|
3789
|
+
aliases = {
|
|
3790
|
+
"simple": "simple",
|
|
3791
|
+
"low": "simple",
|
|
3792
|
+
"basic": "simple",
|
|
3793
|
+
"minimal": "simple",
|
|
3794
|
+
"moderate": "moderate",
|
|
3795
|
+
"medium": "moderate",
|
|
3796
|
+
"mid": "moderate",
|
|
3797
|
+
"balanced": "moderate",
|
|
3798
|
+
"standard": "moderate",
|
|
3799
|
+
"complex": "complex",
|
|
3800
|
+
"high": "complex",
|
|
3801
|
+
"hard": "complex",
|
|
3802
|
+
"difficult": "complex",
|
|
3803
|
+
"expert": "expert",
|
|
3804
|
+
"advanced": "expert",
|
|
3805
|
+
"system": "expert",
|
|
3806
|
+
"system_level": "expert",
|
|
3807
|
+
"production": "expert",
|
|
3808
|
+
}
|
|
3809
|
+
normalized = aliases.get(value, value)
|
|
3810
|
+
if normalized in TASK_COMPLEXITY_LEVELS:
|
|
3811
|
+
return normalized
|
|
3812
|
+
fallback = str(default or "").strip().lower()
|
|
3813
|
+
if not fallback:
|
|
3814
|
+
return ""
|
|
3815
|
+
return fallback if fallback in TASK_COMPLEXITY_LEVELS else "simple"
|
|
3816
|
+
|
|
3817
|
+
def task_complexity_rank(raw: object, default: str = "simple") -> int:
|
|
3818
|
+
return int(TASK_COMPLEXITY_RANKS.get(normalize_task_complexity(raw, default=default), 1))
|
|
3819
|
+
|
|
3820
|
+
def task_complexity_at_least(raw: object, threshold: str) -> bool:
|
|
3821
|
+
return task_complexity_rank(raw) >= task_complexity_rank(threshold)
|
|
3822
|
+
|
|
3823
|
+
def max_task_complexity(*values: object, default: str = "simple") -> str:
|
|
3824
|
+
best = normalize_task_complexity(default, default=default)
|
|
3825
|
+
best_rank = task_complexity_rank(best, default=default)
|
|
3826
|
+
for value in values:
|
|
3827
|
+
cur = normalize_task_complexity(value, default=default)
|
|
3828
|
+
cur_rank = task_complexity_rank(cur, default=default)
|
|
3829
|
+
if cur_rank > best_rank:
|
|
3830
|
+
best = cur
|
|
3831
|
+
best_rank = cur_rank
|
|
3832
|
+
return best
|
|
3833
|
+
|
|
3440
3834
|
def normalize_openai_compat_provider_name(raw: str) -> str:
|
|
3441
3835
|
value = str(raw or "").strip().lower().replace("-", "_")
|
|
3442
3836
|
aliases = {
|
|
@@ -5236,6 +5630,31 @@ class TodoManager:
|
|
|
5236
5630
|
def update(self, items: list[dict]) -> str:
|
|
5237
5631
|
if not isinstance(items, list):
|
|
5238
5632
|
raise ValueError("items must be array")
|
|
5633
|
+
expanded_items: list[dict] = []
|
|
5634
|
+
for item in items:
|
|
5635
|
+
if isinstance(item, str):
|
|
5636
|
+
raw = {"content": item, "status": "pending"}
|
|
5637
|
+
elif isinstance(item, dict):
|
|
5638
|
+
raw = dict(item)
|
|
5639
|
+
else:
|
|
5640
|
+
try:
|
|
5641
|
+
raw = {"content": str(item).strip(), "status": "pending"}
|
|
5642
|
+
except Exception:
|
|
5643
|
+
continue
|
|
5644
|
+
raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
|
|
5645
|
+
split_rows = split_structured_todo_content(raw_content, limit=7)
|
|
5646
|
+
if len(split_rows) <= 1:
|
|
5647
|
+
expanded_items.append(raw)
|
|
5648
|
+
continue
|
|
5649
|
+
base_status = str(raw.get("status", raw.get("state", "pending")) or "pending").strip().lower()
|
|
5650
|
+
for split_idx, split_content in enumerate(split_rows):
|
|
5651
|
+
split_raw = dict(raw)
|
|
5652
|
+
split_raw["content"] = split_content
|
|
5653
|
+
split_raw["status"] = infer_todo_status_from_text(
|
|
5654
|
+
split_content,
|
|
5655
|
+
default=(base_status if split_idx == 0 else "pending"),
|
|
5656
|
+
)
|
|
5657
|
+
expanded_items.append(split_raw)
|
|
5239
5658
|
validated = []
|
|
5240
5659
|
# Plan-step items (bb:proj: key) keep a single in_progress slot.
|
|
5241
5660
|
# Worker/non-plan items allow one in_progress per owner so sync-mode agents
|
|
@@ -5251,23 +5670,21 @@ class TodoManager:
|
|
|
5251
5670
|
"finish": "completed",
|
|
5252
5671
|
"finished": "completed",
|
|
5253
5672
|
}
|
|
5254
|
-
for idx, item in enumerate(
|
|
5255
|
-
if isinstance(item, str):
|
|
5256
|
-
raw = {"content": item, "status": "pending"}
|
|
5257
|
-
elif isinstance(item, dict):
|
|
5258
|
-
raw = item
|
|
5259
|
-
else:
|
|
5260
|
-
raise ValueError(f"item {idx}: invalid type")
|
|
5673
|
+
for idx, item in enumerate(expanded_items):
|
|
5674
|
+
raw = item if isinstance(item, dict) else {"content": str(item or "").strip(), "status": "pending"}
|
|
5261
5675
|
raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
|
|
5676
|
+
inferred_status = infer_todo_status_from_text(raw_content, default="")
|
|
5262
5677
|
content = normalize_work_text(raw_content)
|
|
5263
5678
|
if not content:
|
|
5264
5679
|
content = raw_content
|
|
5265
5680
|
if not content:
|
|
5266
|
-
|
|
5681
|
+
continue # Skip empty items instead of raising
|
|
5267
5682
|
raw_status = str(raw.get("status", raw.get("state", "pending"))).strip().lower()
|
|
5268
5683
|
status = status_alias.get(raw_status, raw_status or "pending")
|
|
5684
|
+
if inferred_status and status in {"", "pending", "todo"}:
|
|
5685
|
+
status = inferred_status
|
|
5269
5686
|
if status not in {"pending", "in_progress", "completed"}:
|
|
5270
|
-
status = "pending"
|
|
5687
|
+
status = inferred_status or "pending"
|
|
5271
5688
|
content = normalize_work_text(content, status) or content
|
|
5272
5689
|
active_form = str(
|
|
5273
5690
|
raw.get(
|
|
@@ -7051,9 +7468,11 @@ Use this skill when:
|
|
|
7051
7468
|
6. Report rewritten count, copied files, and unresolved URLs.
|
|
7052
7469
|
|
|
7053
7470
|
## Rules
|
|
7471
|
+
- Treat `./js_lib` and `/js_lib/...` as workspace lookup locations only, not final browser-facing URLs.
|
|
7054
7472
|
- Keep `./js` per HTML location (do not hardcode global absolute paths).
|
|
7055
7473
|
- Keep file names deterministic and safe (`[A-Za-z0-9._-]`).
|
|
7056
7474
|
- Preserve existing relative local script paths if already offline-ready.
|
|
7475
|
+
- Final HTML must not point to `/js_lib/...`, `/assets/js_lib/...`, or other virtual asset aliases; copy first, then use plain relative paths.
|
|
7057
7476
|
|
|
7058
7477
|
## Output Contract
|
|
7059
7478
|
Return:
|
|
@@ -12420,12 +12839,12 @@ TOOLS = [
|
|
|
12420
12839
|
),
|
|
12421
12840
|
tool_def("write_file", "Write file content.", {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
|
|
12422
12841
|
tool_def("edit_file", "Edit a file by replacing first match.", {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, ["path", "old_text", "new_text"]),
|
|
12423
|
-
tool_def("TodoWrite", "Update todo list.", {"items": {"type": "array", "items": {
|
|
12842
|
+
tool_def("TodoWrite", "Update todo list. Preferred format: objects with content/status/owner/parent_step_id. String fallback should use only '[ ] task', '[>] task', or '[x] task'.", {"items": {"type": "array", "items": {}}}, ["items"]),
|
|
12424
12843
|
tool_def(
|
|
12425
12844
|
"TodoWriteRescue",
|
|
12426
|
-
"Fallback todo writer
|
|
12845
|
+
"Fallback todo writer. Preferred format: objects with content/status/owner/parent_step_id. String fallback should use only '[ ] task', '[>] task', or '[x] task'.",
|
|
12427
12846
|
{
|
|
12428
|
-
"items": {"type": "array", "items": {
|
|
12847
|
+
"items": {"type": "array", "items": {}},
|
|
12429
12848
|
"in_progress_index": {"type": "integer"},
|
|
12430
12849
|
},
|
|
12431
12850
|
["items"],
|
|
@@ -12694,6 +13113,7 @@ class SessionState:
|
|
|
12694
13113
|
context_limit_locked: bool = False,
|
|
12695
13114
|
max_rounds: int = MAX_AGENT_ROUNDS,
|
|
12696
13115
|
max_run_seconds: int = MAX_RUN_SECONDS,
|
|
13116
|
+
shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
12697
13117
|
auto_model_switch: bool = False,
|
|
12698
13118
|
arbiter_enabled: bool = True,
|
|
12699
13119
|
arbiter_model: str = "",
|
|
@@ -12822,6 +13242,7 @@ class SessionState:
|
|
|
12822
13242
|
self.runtime_complexity_floor = ""
|
|
12823
13243
|
self.runtime_task_level_floor = 0
|
|
12824
13244
|
self.runtime_task_level_ceiling = 0 # 0 = no ceiling; set from plan risk on approval
|
|
13245
|
+
self._todowrite_step_counter: dict[str, int] = {} # Fix 5: track consecutive TodoWrite per step for loop detection
|
|
12825
13246
|
self.runtime_scale_preference = "balanced"
|
|
12826
13247
|
self.runtime_direct_objective = ""
|
|
12827
13248
|
self.runtime_reclassify_goal = ""
|
|
@@ -12855,6 +13276,8 @@ class SessionState:
|
|
|
12855
13276
|
self._cached_llm_complexity = ""
|
|
12856
13277
|
self._cached_complexity_dimensions: dict = {} # scope/steps/skill/output dimensions
|
|
12857
13278
|
self._pending_media_inputs: list[dict] = []
|
|
13279
|
+
self._pending_runtime_updates: list[dict] = []
|
|
13280
|
+
self._deferred_runtime_sync_requested = False
|
|
12858
13281
|
self.tool_retry_counts: dict[str, int] = {}
|
|
12859
13282
|
self.last_auto_title_ts = 0.0
|
|
12860
13283
|
self.live_thinking_text = ""
|
|
@@ -12903,6 +13326,12 @@ class SessionState:
|
|
|
12903
13326
|
maximum=MAX_RUN_TIMEOUT_SECONDS,
|
|
12904
13327
|
fallback=MAX_RUN_SECONDS,
|
|
12905
13328
|
)
|
|
13329
|
+
self.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
13330
|
+
shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13331
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13332
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13333
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
13334
|
+
)
|
|
12906
13335
|
self.truncation_count = 0
|
|
12907
13336
|
self.last_truncation_ts = 0.0
|
|
12908
13337
|
self.truncation_rescue_task_ids: list[int] = []
|
|
@@ -14345,9 +14774,9 @@ class SessionState:
|
|
|
14345
14774
|
)
|
|
14346
14775
|
if task_type in TASK_PROFILE_TYPES:
|
|
14347
14776
|
self.runtime_task_type = task_type
|
|
14348
|
-
complexity =
|
|
14349
|
-
|
|
14350
|
-
|
|
14777
|
+
complexity = normalize_task_complexity(
|
|
14778
|
+
profile.get("complexity", judgement.get("complexity", self.runtime_task_complexity or "")),
|
|
14779
|
+
default="simple",
|
|
14351
14780
|
)
|
|
14352
14781
|
if complexity in TASK_COMPLEXITY_LEVELS:
|
|
14353
14782
|
self.runtime_task_complexity = complexity
|
|
@@ -14817,12 +15246,15 @@ class SessionState:
|
|
|
14817
15246
|
|
|
14818
15247
|
def _current_plan_step_text(self, board: dict | None = None) -> str:
|
|
14819
15248
|
row = self._current_plan_step_row(board)
|
|
14820
|
-
|
|
15249
|
+
content = normalize_embedded_newlines((row or {}).get("content", "") or "").strip()
|
|
15250
|
+
if "\n" in content:
|
|
15251
|
+
content = content.split("\n", 1)[0].strip()
|
|
15252
|
+
return trim(content, 400)
|
|
14821
15253
|
|
|
14822
15254
|
def _current_plan_step_full_text(self, board: dict | None = None, max_len: int = 1200) -> str:
|
|
14823
15255
|
row = self._current_plan_step_row(board)
|
|
14824
15256
|
return trim(
|
|
14825
|
-
|
|
15257
|
+
normalize_embedded_newlines((row or {}).get("full_content", "") or (row or {}).get("content", "") or "").strip(),
|
|
14826
15258
|
max_len,
|
|
14827
15259
|
)
|
|
14828
15260
|
|
|
@@ -15000,7 +15432,7 @@ class SessionState:
|
|
|
15000
15432
|
pass
|
|
15001
15433
|
t = threading.Thread(target=_llm_match, daemon=True)
|
|
15002
15434
|
t.start()
|
|
15003
|
-
t.join(timeout=
|
|
15435
|
+
t.join(timeout=5.0)
|
|
15004
15436
|
if llm_result:
|
|
15005
15437
|
matched_names = llm_result
|
|
15006
15438
|
self._emit("status", {"summary": f"skill discovery (LLM task analysis): {matched_names} ({trigger})"})
|
|
@@ -15034,7 +15466,7 @@ class SessionState:
|
|
|
15034
15466
|
# --- Path 3: Deferred LLM pickup if still running ---
|
|
15035
15467
|
if not matched_names and t.is_alive():
|
|
15036
15468
|
def _deferred_llm_pickup():
|
|
15037
|
-
t.join(timeout=
|
|
15469
|
+
t.join(timeout=8.0)
|
|
15038
15470
|
if llm_result and not self._loaded_skill_rows():
|
|
15039
15471
|
for name_str in llm_result[:3]:
|
|
15040
15472
|
try:
|
|
@@ -15431,7 +15863,6 @@ class SessionState:
|
|
|
15431
15863
|
"ENGINEERING EXECUTION DISCIPLINE: "
|
|
15432
15864
|
"For coding, bug-fix, architecture, integration, and testing work, proactively use the skill system when a matching skill exists. "
|
|
15433
15865
|
"Do not wait for failure before calling list_skills/load_skill for debugging, API, frontend, parser, or recovery workflows. "
|
|
15434
|
-
"Already-loaded skills appear as <loaded-skill> messages — use them directly without reloading. "
|
|
15435
15866
|
"Use a root-cause-first loop: inspect the exact error or failing behavior, read the implicated file or path, form one concrete hypothesis, apply one bounded fix, then run at least one fix-and-verify cycle before declaring success. "
|
|
15436
15867
|
"If read_file or bash reports a missing path, empty folder, or mismatched filename, stop repeating the same lookup. "
|
|
15437
15868
|
"Reconcile the path against uploads, recent file paths, file explorer entries, and close workspace matches; then either open the closest candidate or create the intended target. "
|
|
@@ -15485,6 +15916,10 @@ class SessionState:
|
|
|
15485
15916
|
f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
|
|
15486
15917
|
f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS require) or pptxgen.bundle.js (browser). "
|
|
15487
15918
|
f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
|
|
15919
|
+
"IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
|
|
15920
|
+
"If an HTML file uses any asset from js_lib, copy that file into a task-local relative asset folder "
|
|
15921
|
+
"(for example './js/' or './assets/vendor/') next to the deliverable, then reference it with a plain relative path in HTML. "
|
|
15922
|
+
"Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases inside final exported HTML. "
|
|
15488
15923
|
f"Task level={runtime_level}, mode={runtime_mode}, "
|
|
15489
15924
|
f"budget={'unlimited' if budget <= 0 else budget}. "
|
|
15490
15925
|
f"Context limit ~{self.context_token_upper_bound} tokens. "
|
|
@@ -19868,11 +20303,17 @@ body{padding:18px}
|
|
|
19868
20303
|
with self.lock:
|
|
19869
20304
|
if self.running:
|
|
19870
20305
|
config_delayed = True
|
|
19871
|
-
|
|
19872
|
-
|
|
19873
|
-
|
|
19874
|
-
|
|
19875
|
-
|
|
20306
|
+
if config_delayed:
|
|
20307
|
+
self._queue_deferred_runtime_update(
|
|
20308
|
+
"llm_config",
|
|
20309
|
+
{"config": cfg_obj, "source": workspace_rel},
|
|
20310
|
+
)
|
|
20311
|
+
loaded_config = self.model_catalog()
|
|
20312
|
+
if isinstance(loaded_config, dict):
|
|
20313
|
+
loaded_config["queued"] = True
|
|
20314
|
+
loaded_config["note"] = (
|
|
20315
|
+
"session is running; llm config queued and will apply after the current run finishes"
|
|
20316
|
+
)
|
|
19876
20317
|
if not config_delayed:
|
|
19877
20318
|
loaded_config = self.load_llm_config(cfg_obj, source=workspace_rel)
|
|
19878
20319
|
self._emit("config_applied", {
|
|
@@ -21038,7 +21479,7 @@ body{padding:18px}
|
|
|
21038
21479
|
return any(x in t for x in markers)
|
|
21039
21480
|
|
|
21040
21481
|
def _llm_classify_task_complexity(self, goal_text: str) -> str:
|
|
21041
|
-
"""LLM semantic pre-screening: classify task
|
|
21482
|
+
"""LLM semantic pre-screening: classify task into 4 complexity bands via 4-dimension analysis. 5s timeout."""
|
|
21042
21483
|
goal = trim(str(goal_text or ""), 400)
|
|
21043
21484
|
if not goal or len(goal) < 6:
|
|
21044
21485
|
return "simple"
|
|
@@ -21055,8 +21496,7 @@ body{padding:18px}
|
|
|
21055
21496
|
f"SKILL: does it need specialized tools, skills, research, or APIs?\n"
|
|
21056
21497
|
f"OUTPUT: what is expected (1=text answer, 2=single file, 3=system/multi-file)?\n\n"
|
|
21057
21498
|
f"Output exactly one line:\n"
|
|
21058
|
-
f"SCOPE:N STEPS:N SKILL:N OUTPUT:N VERDICT:SIMPLE|COMPLEX
|
|
21059
|
-
f"(COMPLEX if any dimension >= 2)"
|
|
21499
|
+
f"SCOPE:N STEPS:N SKILL:N OUTPUT:N VERDICT:SIMPLE|MODERATE|COMPLEX|EXPERT"
|
|
21060
21500
|
)}],
|
|
21061
21501
|
system="/no_think\nAnalyze task dimensions. One line output only.",
|
|
21062
21502
|
max_tokens=40,
|
|
@@ -21071,8 +21511,26 @@ body{padding:18px}
|
|
|
21071
21511
|
dims[dim.lower()] = int(m.group(1))
|
|
21072
21512
|
if dims:
|
|
21073
21513
|
self._cached_complexity_dimensions = dims
|
|
21074
|
-
|
|
21514
|
+
vals = [int(v) for v in dims.values()]
|
|
21515
|
+
max_dim = max(vals) if vals else 1
|
|
21516
|
+
count_ge2 = sum(1 for v in vals if int(v) >= 2)
|
|
21517
|
+
count_ge3 = sum(1 for v in vals if int(v) >= 3)
|
|
21518
|
+
if max_dim <= 1:
|
|
21519
|
+
result_box[0] = "simple"
|
|
21520
|
+
elif max_dim == 2:
|
|
21521
|
+
result_box[0] = "moderate"
|
|
21522
|
+
elif count_ge3 >= 2 or count_ge2 >= 4:
|
|
21523
|
+
result_box[0] = "expert"
|
|
21524
|
+
else:
|
|
21525
|
+
result_box[0] = "complex"
|
|
21526
|
+
if "VERDICT:EXPERT" in answer:
|
|
21527
|
+
result_box[0] = "expert"
|
|
21528
|
+
elif "VERDICT:COMPLEX" in answer:
|
|
21075
21529
|
result_box[0] = "complex"
|
|
21530
|
+
elif "VERDICT:MODERATE" in answer:
|
|
21531
|
+
result_box[0] = "moderate"
|
|
21532
|
+
elif "VERDICT:SIMPLE" in answer:
|
|
21533
|
+
result_box[0] = "simple"
|
|
21076
21534
|
except Exception:
|
|
21077
21535
|
pass
|
|
21078
21536
|
t = threading.Thread(target=_classify, daemon=True)
|
|
@@ -21085,9 +21543,9 @@ body{padding:18px}
|
|
|
21085
21543
|
low = clean.lower()
|
|
21086
21544
|
explicit_complexity = infer_user_complexity_value(clean)
|
|
21087
21545
|
# Use cached LLM complexity result (set by _agent_worker entry point)
|
|
21088
|
-
llm_complexity = str(getattr(self, '_cached_llm_complexity', '') or '')
|
|
21089
|
-
nontrivial = self._looks_nontrivial_request(clean) or llm_complexity
|
|
21090
|
-
direct_question = self._looks_like_direct_question_request(clean) and llm_complexity
|
|
21546
|
+
llm_complexity = normalize_task_complexity(str(getattr(self, '_cached_llm_complexity', '') or ''), default="simple")
|
|
21547
|
+
nontrivial = self._looks_nontrivial_request(clean) or task_complexity_at_least(llm_complexity, "moderate")
|
|
21548
|
+
direct_question = self._looks_like_direct_question_request(clean) and (not task_complexity_at_least(llm_complexity, "moderate"))
|
|
21091
21549
|
code_markers = [
|
|
21092
21550
|
# 代码/编程
|
|
21093
21551
|
"代码", "寫代碼", "写代码", "脚本", "模块", "函数", "class", "bug",
|
|
@@ -21124,6 +21582,7 @@ body{padding:18px}
|
|
|
21124
21582
|
has_code_intent = any(x in low for x in code_markers)
|
|
21125
21583
|
has_research_intent = any(x in low for x in research_markers)
|
|
21126
21584
|
length = len(clean)
|
|
21585
|
+
derived_complexity = max_task_complexity(explicit_complexity, llm_complexity, default="simple")
|
|
21127
21586
|
if direct_question and (not nontrivial) and (not has_code_intent) and length <= 220:
|
|
21128
21587
|
return {
|
|
21129
21588
|
"task_type": "simple_qa",
|
|
@@ -21154,7 +21613,11 @@ body{padding:18px}
|
|
|
21154
21613
|
if has_research_intent and (not has_code_intent):
|
|
21155
21614
|
return {
|
|
21156
21615
|
"task_type": "research",
|
|
21157
|
-
"complexity": explicit_complexity or (
|
|
21616
|
+
"complexity": explicit_complexity or max_task_complexity(
|
|
21617
|
+
derived_complexity,
|
|
21618
|
+
("complex" if length >= 480 else "moderate" if (nontrivial or length >= 280) else "simple"),
|
|
21619
|
+
default="simple",
|
|
21620
|
+
),
|
|
21158
21621
|
"direct_objective": "Collect evidence first, then synthesize a concise actionable answer.",
|
|
21159
21622
|
"recommended_agents": ["explorer", "developer", "reviewer"],
|
|
21160
21623
|
"round_budget": 10 if (nontrivial or length >= 280) else 6,
|
|
@@ -21165,7 +21628,15 @@ body{padding:18px}
|
|
|
21165
21628
|
if nontrivial or has_code_intent or length >= 280:
|
|
21166
21629
|
return {
|
|
21167
21630
|
"task_type": "engineering",
|
|
21168
|
-
"complexity": explicit_complexity or
|
|
21631
|
+
"complexity": explicit_complexity or max_task_complexity(
|
|
21632
|
+
derived_complexity,
|
|
21633
|
+
(
|
|
21634
|
+
"expert"
|
|
21635
|
+
if ((has_code_intent and has_research_intent) or length >= 900)
|
|
21636
|
+
else "complex"
|
|
21637
|
+
),
|
|
21638
|
+
default="moderate",
|
|
21639
|
+
),
|
|
21169
21640
|
"direct_objective": (
|
|
21170
21641
|
"Use blackboard collaboration to implement, validate, and converge with concrete outputs."
|
|
21171
21642
|
),
|
|
@@ -21177,7 +21648,7 @@ body{padding:18px}
|
|
|
21177
21648
|
}
|
|
21178
21649
|
return {
|
|
21179
21650
|
"task_type": "general",
|
|
21180
|
-
"complexity": explicit_complexity or "simple",
|
|
21651
|
+
"complexity": explicit_complexity or derived_complexity or "simple",
|
|
21181
21652
|
"direct_objective": (
|
|
21182
21653
|
"Provide the most direct useful response with minimal orchestration, "
|
|
21183
21654
|
"anchored to the current project context and user goal."
|
|
@@ -21439,6 +21910,66 @@ body{padding:18px}
|
|
|
21439
21910
|
model = str(profile.get("model", self.ollama.model) or self.ollama.model).strip()
|
|
21440
21911
|
return f"{self.active_profile_id}::{model}"
|
|
21441
21912
|
|
|
21913
|
+
def _queue_deferred_runtime_update(self, kind: str, payload: dict) -> int:
|
|
21914
|
+
row = {
|
|
21915
|
+
"kind": str(kind or "").strip().lower(),
|
|
21916
|
+
"payload": dict(payload or {}),
|
|
21917
|
+
"queued_at": float(now_ts()),
|
|
21918
|
+
}
|
|
21919
|
+
if not row["kind"]:
|
|
21920
|
+
raise ValueError("deferred runtime update kind required")
|
|
21921
|
+
with self.lock:
|
|
21922
|
+
self._pending_runtime_updates.append(row)
|
|
21923
|
+
self._pending_runtime_updates = self._pending_runtime_updates[-16:]
|
|
21924
|
+
queued = len(self._pending_runtime_updates)
|
|
21925
|
+
self.updated_at = now_ts()
|
|
21926
|
+
self._persist()
|
|
21927
|
+
return queued
|
|
21928
|
+
|
|
21929
|
+
def _apply_deferred_runtime_updates(self) -> list[str]:
|
|
21930
|
+
with self.lock:
|
|
21931
|
+
if self.running or not self._pending_runtime_updates:
|
|
21932
|
+
return []
|
|
21933
|
+
queued = list(self._pending_runtime_updates)
|
|
21934
|
+
self._pending_runtime_updates = []
|
|
21935
|
+
self.updated_at = now_ts()
|
|
21936
|
+
self._persist()
|
|
21937
|
+
applied_notes: list[str] = []
|
|
21938
|
+
sync_needed = False
|
|
21939
|
+
for item in queued:
|
|
21940
|
+
kind = str(item.get("kind", "") or "").strip().lower()
|
|
21941
|
+
payload = item.get("payload", {}) if isinstance(item.get("payload"), dict) else {}
|
|
21942
|
+
try:
|
|
21943
|
+
if kind == "llm_config":
|
|
21944
|
+
source = str(payload.get("source", "") or "deferred-config").strip()
|
|
21945
|
+
config = payload.get("config", {})
|
|
21946
|
+
if isinstance(config, dict) and config:
|
|
21947
|
+
self.load_llm_config(config, source=source)
|
|
21948
|
+
applied_notes.append(f"deferred llm config applied: {trim(source, 120)}")
|
|
21949
|
+
sync_needed = True
|
|
21950
|
+
elif kind == "model_selection":
|
|
21951
|
+
selection = str(payload.get("selection", "") or "").strip()
|
|
21952
|
+
model_override = payload.get("model_override")
|
|
21953
|
+
self.set_runtime_selection(
|
|
21954
|
+
selection,
|
|
21955
|
+
model_override if isinstance(model_override, str) else None,
|
|
21956
|
+
)
|
|
21957
|
+
applied_notes.append(f"deferred model switch applied: {trim(selection, 120)}")
|
|
21958
|
+
sync_needed = True
|
|
21959
|
+
except Exception as exc:
|
|
21960
|
+
self._emit(
|
|
21961
|
+
"status",
|
|
21962
|
+
{
|
|
21963
|
+
"summary": (
|
|
21964
|
+
f"deferred runtime update failed ({kind or 'unknown'}): "
|
|
21965
|
+
f"{trim(str(exc), 180)}"
|
|
21966
|
+
)
|
|
21967
|
+
},
|
|
21968
|
+
)
|
|
21969
|
+
if sync_needed:
|
|
21970
|
+
self._deferred_runtime_sync_requested = True
|
|
21971
|
+
return applied_notes
|
|
21972
|
+
|
|
21442
21973
|
def _global_wait_timeout_seconds(self) -> int:
|
|
21443
21974
|
raw = (
|
|
21444
21975
|
self.max_run_seconds
|
|
@@ -21997,10 +22528,12 @@ body{padding:18px}
|
|
|
21997
22528
|
_stop_process(proc)
|
|
21998
22529
|
meta["error"] = "Error: interrupted by user"
|
|
21999
22530
|
meta["exit_code"] = -130
|
|
22531
|
+
break
|
|
22000
22532
|
elif (not meta.get("error")) and timeout > 0 and elapsed >= timeout:
|
|
22001
22533
|
_stop_process(proc)
|
|
22002
22534
|
meta["error"] = f"Error: timeout ({timeout}s)"
|
|
22003
22535
|
meta["exit_code"] = -1
|
|
22536
|
+
break
|
|
22004
22537
|
try:
|
|
22005
22538
|
label, chunk = io_queue.get(timeout=0.12)
|
|
22006
22539
|
if chunk is None:
|
|
@@ -22085,6 +22618,7 @@ body{padding:18px}
|
|
|
22085
22618
|
if create_group > 0:
|
|
22086
22619
|
popen_kwargs["creationflags"] = create_group
|
|
22087
22620
|
proc = subprocess.Popen(effective_command, **popen_kwargs)
|
|
22621
|
+
self._running_bash_proc = proc
|
|
22088
22622
|
if os.name == "nt":
|
|
22089
22623
|
# Windows: read PIPE output via blocking reader threads + queue.
|
|
22090
22624
|
_collect_with_reader_threads(proc)
|
|
@@ -22110,10 +22644,12 @@ body{padding:18px}
|
|
|
22110
22644
|
_stop_process(proc)
|
|
22111
22645
|
meta["error"] = "Error: interrupted by user"
|
|
22112
22646
|
meta["exit_code"] = -130
|
|
22647
|
+
break
|
|
22113
22648
|
elif timeout > 0 and elapsed >= timeout:
|
|
22114
22649
|
_stop_process(proc)
|
|
22115
22650
|
meta["error"] = f"Error: timeout ({timeout}s)"
|
|
22116
22651
|
meta["exit_code"] = -1
|
|
22652
|
+
break
|
|
22117
22653
|
events = sel.select(timeout=0.12)
|
|
22118
22654
|
for key, _ in events:
|
|
22119
22655
|
stream = key.fileobj
|
|
@@ -22171,6 +22707,8 @@ body{padding:18px}
|
|
|
22171
22707
|
meta["error"] = f"Error: {exc}"
|
|
22172
22708
|
meta["output"] = meta["error"]
|
|
22173
22709
|
meta["exit_code"] = -1
|
|
22710
|
+
finally:
|
|
22711
|
+
self._running_bash_proc = None
|
|
22174
22712
|
meta["duration_ms"] = int((time.time() - start) * 1000)
|
|
22175
22713
|
after = self._git_status_map(cwd)
|
|
22176
22714
|
meta["changed_files"] = self._status_delta(before, after) if before or after else []
|
|
@@ -22181,11 +22719,19 @@ body{padding:18px}
|
|
|
22181
22719
|
str(meta.get("output") or meta.get("error") or "(no output)"),
|
|
22182
22720
|
cwd=cwd,
|
|
22183
22721
|
)
|
|
22184
|
-
|
|
22722
|
+
)
|
|
22185
22723
|
return meta
|
|
22186
22724
|
|
|
22725
|
+
def _shell_command_timeout(self) -> int:
|
|
22726
|
+
return normalize_timeout_seconds(
|
|
22727
|
+
getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
22728
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
22729
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
22730
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
22731
|
+
)
|
|
22732
|
+
|
|
22187
22733
|
def _run_bash(self, command: str) -> str:
|
|
22188
|
-
return self._run_shell_meta(command, self.files_root,
|
|
22734
|
+
return self._run_shell_meta(command, self.files_root, self._shell_command_timeout())["output"]
|
|
22189
22735
|
|
|
22190
22736
|
def _fuzzy_resolve_path(self, fp: Path) -> Path:
|
|
22191
22737
|
"""If fp doesn't exist, try stripping spaces from the filename to find a close match.
|
|
@@ -22883,10 +23429,10 @@ body{padding:18px}
|
|
|
22883
23429
|
"3) scaffold semantic HTML; "
|
|
22884
23430
|
"4) apply CSS tokens + responsive layout; "
|
|
22885
23431
|
"5) wire JS state/data interactions; "
|
|
22886
|
-
"6) localize external JS dependencies to ./js from ./js_lib; "
|
|
23432
|
+
"6) localize external JS dependencies to a task-local relative folder such as ./js from ./js_lib, and rewrite final HTML to plain relative paths; "
|
|
22887
23433
|
"7) run QA loop for desktop/mobile/a11y/performance and iterate. "
|
|
22888
23434
|
f"Offline JS libs available now: {libs_hint}. "
|
|
22889
|
-
"Final exported HTML should avoid unresolved CDN-only script src."
|
|
23435
|
+
"Final exported HTML should avoid unresolved CDN-only script src and must not keep '/js_lib/...' or '/assets/js_lib/...' virtual URLs."
|
|
22890
23436
|
)
|
|
22891
23437
|
|
|
22892
23438
|
def _contains_any_keyword(self, text: str, keywords: tuple[str, ...]) -> bool:
|
|
@@ -23050,9 +23596,10 @@ body{padding:18px}
|
|
|
23050
23596
|
) or str(base.get("task_type", "general"))
|
|
23051
23597
|
if task_type not in TASK_PROFILE_TYPES:
|
|
23052
23598
|
task_type = str(base.get("task_type", "general"))
|
|
23053
|
-
complexity =
|
|
23054
|
-
|
|
23055
|
-
|
|
23599
|
+
complexity = normalize_task_complexity(
|
|
23600
|
+
src.get("complexity", base.get("complexity", "simple")),
|
|
23601
|
+
default=str(base.get("complexity", "simple") or "simple"),
|
|
23602
|
+
)
|
|
23056
23603
|
src_direct_objective = trim(str(src.get("direct_objective", "") or "").strip(), 800)
|
|
23057
23604
|
legacy_objectives = {
|
|
23058
23605
|
"Provide the most direct useful response with minimal orchestration.",
|
|
@@ -23089,9 +23636,9 @@ body{padding:18px}
|
|
|
23089
23636
|
if raw_level not in TASK_LEVEL_CHOICES:
|
|
23090
23637
|
if task_type == "simple_qa":
|
|
23091
23638
|
raw_level = 1 if len(str(goal or "")) <= 180 else 2
|
|
23092
|
-
elif task_type in {"simple_code", "research"} and complexity
|
|
23639
|
+
elif task_type in {"simple_code", "research"} and task_complexity_rank(complexity) <= task_complexity_rank("moderate"):
|
|
23093
23640
|
raw_level = 3
|
|
23094
|
-
elif complexity
|
|
23641
|
+
elif task_complexity_at_least(complexity, "complex"):
|
|
23095
23642
|
raw_level = 4
|
|
23096
23643
|
else:
|
|
23097
23644
|
raw_level = 2
|
|
@@ -23180,7 +23727,7 @@ body{padding:18px}
|
|
|
23180
23727
|
goal = str(bb.get("original_goal", "") or "")
|
|
23181
23728
|
current = bb.get("task_profile", {})
|
|
23182
23729
|
profile = self._normalize_task_profile(goal, {} if force else current)
|
|
23183
|
-
if profile.get("complexity")
|
|
23730
|
+
if task_complexity_rank(profile.get("complexity", "simple")) < task_complexity_rank("complex"):
|
|
23184
23731
|
logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
|
|
23185
23732
|
tail = "\n".join(
|
|
23186
23733
|
str((row or {}).get("content", "") or "")
|
|
@@ -23284,10 +23831,16 @@ body{padding:18px}
|
|
|
23284
23831
|
# Project todo gate: coding tasks must pass compile + test
|
|
23285
23832
|
profile = self._ensure_blackboard_task_profile(bb)
|
|
23286
23833
|
task_type = str(profile.get("task_type", "general") or "general")
|
|
23834
|
+
exec_mode = normalize_execution_mode(
|
|
23835
|
+
profile.get("execution_mode", self._effective_execution_mode()),
|
|
23836
|
+
default=self._effective_execution_mode(),
|
|
23837
|
+
)
|
|
23287
23838
|
if task_type in ("simple_code", "engineering"):
|
|
23288
23839
|
for todo in bb.get("project_todos", []):
|
|
23289
23840
|
if todo.get("category") in ("compile_test", "min_test") and todo.get("status") != "completed":
|
|
23290
23841
|
return False, f"project-todo-incomplete:{todo.get('category', '')}"
|
|
23842
|
+
if exec_mode == EXECUTION_MODE_SYNC and not self._manager_feedback_passed_from_blackboard(bb):
|
|
23843
|
+
return False, "sync-review-missing"
|
|
23291
23844
|
return True, "ok"
|
|
23292
23845
|
|
|
23293
23846
|
def _invalidate_stale_approval_if_needed(
|
|
@@ -23505,6 +24058,10 @@ body{padding:18px}
|
|
|
23505
24058
|
def _watchdog_state_fingerprint(self, board: dict | None = None) -> str:
|
|
23506
24059
|
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
23507
24060
|
profile = self._ensure_blackboard_task_profile(bb)
|
|
24061
|
+
step_snapshot = self._active_plan_progress_snapshot(bb)
|
|
24062
|
+
last_reply = bb.get("last_worker_reply", {}) if isinstance(bb.get("last_worker_reply"), dict) else {}
|
|
24063
|
+
last_reply_role = self._sanitize_agent_role(last_reply.get("role", ""))
|
|
24064
|
+
last_reply_text = trim(str(last_reply.get("text", "") or "").strip(), 240)
|
|
23508
24065
|
payload = {
|
|
23509
24066
|
"status": self._normalize_blackboard_status(bb.get("status", "INITIALIZING")),
|
|
23510
24067
|
"goal": trim(str(bb.get("original_goal", "") or "").strip(), 400),
|
|
@@ -23517,6 +24074,16 @@ body{padding:18px}
|
|
|
23517
24074
|
"approved": bool((bb.get("approval", {}) or {}).get("approved", False)),
|
|
23518
24075
|
"task_type": str(profile.get("task_type", "general") or "general"),
|
|
23519
24076
|
"complexity": str(profile.get("complexity", "simple") or "simple"),
|
|
24077
|
+
"plan_step_id": str(step_snapshot.get("step_id", "") or ""),
|
|
24078
|
+
"plan_step_text": trim(str(step_snapshot.get("step_text", "") or "").strip(), 180),
|
|
24079
|
+
"worker_todo_count": int(step_snapshot.get("worker_todo_count", 0) or 0),
|
|
24080
|
+
"worker_todo_completed": int(step_snapshot.get("completed_count", 0) or 0),
|
|
24081
|
+
"worker_todo_in_progress": int(step_snapshot.get("in_progress_count", 0) or 0),
|
|
24082
|
+
"worker_todo_pending": int(step_snapshot.get("pending_count", 0) or 0),
|
|
24083
|
+
"current_subtask": trim(str(step_snapshot.get("current_subtask", "") or "").strip(), 180),
|
|
24084
|
+
"next_pending_subtask": trim(str(step_snapshot.get("next_pending_subtask", "") or "").strip(), 180),
|
|
24085
|
+
"last_worker_reply_role": last_reply_role,
|
|
24086
|
+
"last_worker_reply_text": last_reply_text,
|
|
23520
24087
|
}
|
|
23521
24088
|
raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
|
23522
24089
|
return hashlib.sha1(raw.encode("utf-8")).hexdigest()
|
|
@@ -24161,6 +24728,7 @@ body{padding:18px}
|
|
|
24161
24728
|
"instruction": "",
|
|
24162
24729
|
"reason": "",
|
|
24163
24730
|
"source": "",
|
|
24731
|
+
"progress_fp": "",
|
|
24164
24732
|
"is_mandatory": False,
|
|
24165
24733
|
"ts": 0.0,
|
|
24166
24734
|
},
|
|
@@ -24216,6 +24784,7 @@ body{padding:18px}
|
|
|
24216
24784
|
"instruction": trim(str(raw_delegate.get("instruction", "") or "").strip(), 1200),
|
|
24217
24785
|
"reason": trim(str(raw_delegate.get("reason", "") or "").strip(), 600),
|
|
24218
24786
|
"source": trim(str(raw_delegate.get("source", "") or "").strip(), 40),
|
|
24787
|
+
"progress_fp": trim(str(raw_delegate.get("progress_fp", "") or "").strip(), 80),
|
|
24219
24788
|
"is_mandatory": _to_bool_like(raw_delegate.get("is_mandatory", False), default=False),
|
|
24220
24789
|
"ts": float(raw_delegate.get("ts", 0.0) or 0.0),
|
|
24221
24790
|
}
|
|
@@ -24353,8 +24922,8 @@ body{padding:18px}
|
|
|
24353
24922
|
for pt in bb_src_todos[:40]:
|
|
24354
24923
|
if not isinstance(pt, dict):
|
|
24355
24924
|
continue
|
|
24356
|
-
raw_content = trim(
|
|
24357
|
-
raw_full = trim(
|
|
24925
|
+
raw_content = trim(normalize_embedded_newlines(pt.get("content", "")), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
|
|
24926
|
+
raw_full = trim(normalize_embedded_newlines(pt.get("full_content", "")), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
|
|
24358
24927
|
# Migration: if full_content is empty but content has sub-steps, auto-split
|
|
24359
24928
|
if not raw_full and raw_content and pt.get("category") == "plan_step":
|
|
24360
24929
|
normalized = _mid_re_norm.sub(r"\n\1", raw_content)
|
|
@@ -24369,6 +24938,7 @@ body{padding:18px}
|
|
|
24369
24938
|
"category": trim(str(pt.get("category", "") or ""), 40),
|
|
24370
24939
|
"plan_step_index": int(pt.get("plan_step_index", -1)) if pt.get("plan_step_index") is not None else -1,
|
|
24371
24940
|
"created_at": float(pt.get("created_at", 0.0) or 0.0),
|
|
24941
|
+
"activated_at": float(pt.get("activated_at", 0.0) or 0.0) if pt.get("activated_at") else None,
|
|
24372
24942
|
"completed_at": float(pt.get("completed_at", 0.0) or 0.0) if pt.get("completed_at") else None,
|
|
24373
24943
|
"completed_by": trim(str(pt.get("completed_by", "") or ""), 40),
|
|
24374
24944
|
"evidence": trim(str(pt.get("evidence", "") or ""), 200),
|
|
@@ -24766,7 +25336,8 @@ body{padding:18px}
|
|
|
24766
25336
|
if not isinstance(fl, dict):
|
|
24767
25337
|
return
|
|
24768
25338
|
delegations = fl.get("repeated_delegations", [])
|
|
24769
|
-
|
|
25339
|
+
progress_fp = self._watchdog_state_fingerprint(bb)
|
|
25340
|
+
fp = hashlib.sha1((str(instruction or "") + "|" + progress_fp).encode("utf-8")).hexdigest()[:12]
|
|
24770
25341
|
for entry in delegations:
|
|
24771
25342
|
if entry.get("instruction_hash") == fp and entry.get("target") == target:
|
|
24772
25343
|
entry["count"] = int(entry.get("count", 1) or 1) + 1
|
|
@@ -24779,6 +25350,7 @@ body{padding:18px}
|
|
|
24779
25350
|
"target": trim(str(target or ""), 40),
|
|
24780
25351
|
"instruction_hash": fp,
|
|
24781
25352
|
"instruction_preview": trim(str(instruction or ""), 200),
|
|
25353
|
+
"progress_fp": progress_fp,
|
|
24782
25354
|
"count": 1,
|
|
24783
25355
|
"first_round": int(getattr(self, "agent_round_index", 0) or 0),
|
|
24784
25356
|
"last_round": int(getattr(self, "agent_round_index", 0) or 0),
|
|
@@ -25363,6 +25935,195 @@ body{padding:18px}
|
|
|
25363
25935
|
return observed_signal or read_back or wrote_files
|
|
25364
25936
|
return wrote_files or read_back or knowledge_signal or observed_signal
|
|
25365
25937
|
|
|
25938
|
+
def _plan_step_activation_ts(self, plan_step: dict) -> float:
|
|
25939
|
+
if not isinstance(plan_step, dict):
|
|
25940
|
+
return 0.0
|
|
25941
|
+
try:
|
|
25942
|
+
activated = float(plan_step.get("activated_at", 0.0) or 0.0)
|
|
25943
|
+
except Exception:
|
|
25944
|
+
activated = 0.0
|
|
25945
|
+
if activated > 0:
|
|
25946
|
+
return activated
|
|
25947
|
+
try:
|
|
25948
|
+
return float(plan_step.get("created_at", 0.0) or 0.0)
|
|
25949
|
+
except Exception:
|
|
25950
|
+
return 0.0
|
|
25951
|
+
|
|
25952
|
+
def _plan_step_blackboard_signals(self, plan_step: dict, board: dict | None = None) -> dict:
|
|
25953
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
25954
|
+
step_id = trim(str((plan_step or {}).get("id", "") or ""), 20)
|
|
25955
|
+
since_ts = self._plan_step_activation_ts(plan_step)
|
|
25956
|
+
|
|
25957
|
+
def _rows_since(rows: object) -> list[dict]:
|
|
25958
|
+
out: list[dict] = []
|
|
25959
|
+
if not isinstance(rows, list):
|
|
25960
|
+
return out
|
|
25961
|
+
for row in rows:
|
|
25962
|
+
if not isinstance(row, dict):
|
|
25963
|
+
continue
|
|
25964
|
+
txt = trim(str(row.get("content", "") or "").strip(), 1200)
|
|
25965
|
+
if not txt:
|
|
25966
|
+
continue
|
|
25967
|
+
try:
|
|
25968
|
+
ts = float(row.get("ts", 0.0) or 0.0)
|
|
25969
|
+
except Exception:
|
|
25970
|
+
ts = 0.0
|
|
25971
|
+
if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
|
|
25972
|
+
continue
|
|
25973
|
+
out.append({"ts": ts, "content": txt, "actor": trim(str(row.get("actor", "") or ""), 40)})
|
|
25974
|
+
return out
|
|
25975
|
+
|
|
25976
|
+
def _recent_excerpt(rows: list[dict], max_chars: int = 120) -> str:
|
|
25977
|
+
if not rows:
|
|
25978
|
+
return ""
|
|
25979
|
+
return trim(str(rows[-1].get("content", "") or "").replace("\r\n", "\n"), max_chars)
|
|
25980
|
+
|
|
25981
|
+
negative_hints = ("error:", "failed", "failure", "traceback", "fatal error", "assertionerror", "exception")
|
|
25982
|
+
compile_hints = ("compiled successfully", "build successful", "build succeeded", "syntax ok", "lint passed", "no issues found", "0 errors", "编译成功")
|
|
25983
|
+
test_hints = ("test passed", "tests passed", "all tests passed", "0 failed", "100%", "ok", "success", "测试通过")
|
|
25984
|
+
|
|
25985
|
+
step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
|
|
25986
|
+
step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
|
|
25987
|
+
filtered_entries: list[dict] = []
|
|
25988
|
+
for entry in step_entries:
|
|
25989
|
+
if not isinstance(entry, dict):
|
|
25990
|
+
continue
|
|
25991
|
+
try:
|
|
25992
|
+
ts = float(entry.get("ts", 0.0) or 0.0)
|
|
25993
|
+
except Exception:
|
|
25994
|
+
ts = 0.0
|
|
25995
|
+
if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
|
|
25996
|
+
continue
|
|
25997
|
+
filtered_entries.append(entry)
|
|
25998
|
+
step_entries = filtered_entries
|
|
25999
|
+
|
|
26000
|
+
artifact_rows: list[dict] = []
|
|
26001
|
+
raw_artifacts = bb.get("code_artifacts", {}) if isinstance(bb.get("code_artifacts"), dict) else {}
|
|
26002
|
+
for path, meta in raw_artifacts.items():
|
|
26003
|
+
if not isinstance(meta, dict):
|
|
26004
|
+
continue
|
|
26005
|
+
try:
|
|
26006
|
+
ts = float(meta.get("updated_at", 0.0) or 0.0)
|
|
26007
|
+
except Exception:
|
|
26008
|
+
ts = 0.0
|
|
26009
|
+
if since_ts > 0 and ts > 0 and ts + 1e-6 < since_ts:
|
|
26010
|
+
continue
|
|
26011
|
+
artifact_rows.append({
|
|
26012
|
+
"path": trim(str(path or "").strip(), 240),
|
|
26013
|
+
"summary": trim(str(meta.get("summary", "") or "").strip(), 200),
|
|
26014
|
+
"updated_at": ts,
|
|
26015
|
+
})
|
|
26016
|
+
|
|
26017
|
+
research_rows = _rows_since(bb.get("research_notes", []))
|
|
26018
|
+
exec_rows = _rows_since(bb.get("execution_logs", []))
|
|
26019
|
+
review_rows = _rows_since(bb.get("review_feedback", []))
|
|
26020
|
+
|
|
26021
|
+
file_ops = {
|
|
26022
|
+
trim(str(entry.get("op", "") or "").strip(), 40)
|
|
26023
|
+
for entry in step_entries
|
|
26024
|
+
if isinstance(entry, dict)
|
|
26025
|
+
}
|
|
26026
|
+
has_write = any(op in {"write_file", "edit_file"} for op in file_ops) or bool(artifact_rows)
|
|
26027
|
+
has_read = "read_file" in file_ops
|
|
26028
|
+
|
|
26029
|
+
def _has_positive(rows: list[dict], hints: tuple[str, ...]) -> bool:
|
|
26030
|
+
for row in reversed(rows[-6:]):
|
|
26031
|
+
low = str(row.get("content", "") or "").lower()
|
|
26032
|
+
if not low or any(neg in low for neg in negative_hints):
|
|
26033
|
+
continue
|
|
26034
|
+
if any(tok in low for tok in hints):
|
|
26035
|
+
return True
|
|
26036
|
+
return False
|
|
26037
|
+
|
|
26038
|
+
def _has_observed(rows: list[dict]) -> bool:
|
|
26039
|
+
for row in reversed(rows[-6:]):
|
|
26040
|
+
low = str(row.get("content", "") or "").lower()
|
|
26041
|
+
if low and not any(neg in low for neg in negative_hints):
|
|
26042
|
+
return True
|
|
26043
|
+
return False
|
|
26044
|
+
|
|
26045
|
+
recent_files = [row.get("path", "") for row in artifact_rows[-4:] if row.get("path")]
|
|
26046
|
+
if not recent_files:
|
|
26047
|
+
recent_files = [
|
|
26048
|
+
trim(str(entry.get("path", "") or "").strip(), 240)
|
|
26049
|
+
for entry in step_entries[-4:]
|
|
26050
|
+
if isinstance(entry, dict) and str(entry.get("path", "") or "").strip()
|
|
26051
|
+
]
|
|
26052
|
+
|
|
26053
|
+
return {
|
|
26054
|
+
"since_ts": since_ts,
|
|
26055
|
+
"has_write": has_write,
|
|
26056
|
+
"has_read": has_read,
|
|
26057
|
+
"has_research": bool(research_rows),
|
|
26058
|
+
"has_exec": _has_observed(exec_rows),
|
|
26059
|
+
"has_review": _has_observed(review_rows),
|
|
26060
|
+
"has_compile_pass": _has_positive(exec_rows + review_rows, compile_hints),
|
|
26061
|
+
"has_test_pass": _has_positive(exec_rows + review_rows, test_hints),
|
|
26062
|
+
"recent_files": list(dict.fromkeys(recent_files))[-4:],
|
|
26063
|
+
"recent_exec_excerpt": _recent_excerpt(exec_rows, 140),
|
|
26064
|
+
"recent_review_excerpt": _recent_excerpt(review_rows, 140),
|
|
26065
|
+
"recent_research_excerpt": _recent_excerpt(research_rows, 140),
|
|
26066
|
+
}
|
|
26067
|
+
|
|
26068
|
+
def _plan_step_has_blackboard_evidence(self, plan_step: dict, board: dict | None = None) -> bool:
|
|
26069
|
+
if not isinstance(plan_step, dict):
|
|
26070
|
+
return False
|
|
26071
|
+
sig = self._plan_step_blackboard_signals(plan_step, board)
|
|
26072
|
+
step_text = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
|
|
26073
|
+
phase = self._plan_step_phase_hint(step_text)
|
|
26074
|
+
wants_test = phase in ("test", "review") or any(
|
|
26075
|
+
tok in step_text for tok in ("test", "pytest", "unit", "integration", "验证", "測試", "测试", "回归", "assert")
|
|
26076
|
+
)
|
|
26077
|
+
wants_runtime_validation = wants_test or phase == "implement" or any(
|
|
26078
|
+
tok in step_text for tok in ("verify", "validation", "check", "lint", "build", "compile", "运行", "校验", "檢查")
|
|
26079
|
+
)
|
|
26080
|
+
if wants_test:
|
|
26081
|
+
return sig["has_test_pass"] or sig["has_exec"] or sig["has_review"]
|
|
26082
|
+
if phase == "implement":
|
|
26083
|
+
return sig["has_write"] and (
|
|
26084
|
+
sig["has_compile_pass"] or sig["has_test_pass"] or sig["has_exec"] or sig["has_read"] or sig["has_review"]
|
|
26085
|
+
)
|
|
26086
|
+
if phase in ("research", "design"):
|
|
26087
|
+
return sig["has_research"] or sig["has_read"] or sig["has_exec"] or sig["has_write"]
|
|
26088
|
+
if wants_runtime_validation:
|
|
26089
|
+
return sig["has_exec"] or sig["has_read"] or sig["has_write"] or sig["has_review"]
|
|
26090
|
+
return sig["has_write"] or sig["has_read"] or sig["has_research"] or sig["has_exec"] or sig["has_review"]
|
|
26091
|
+
|
|
26092
|
+
def _step_has_accumulated_evidence(self, plan_step: dict, bb: dict | None = None) -> bool:
|
|
26093
|
+
"""Fix 3: Check if step has accumulated evidence across ALL turns (not just current turn).
|
|
26094
|
+
Uses step_files registry + blackboard signals to detect writes/execution during step lifetime."""
|
|
26095
|
+
if not isinstance(plan_step, dict):
|
|
26096
|
+
return False
|
|
26097
|
+
sig = self._plan_step_blackboard_signals(plan_step, bb)
|
|
26098
|
+
return sig["has_write"] or sig["has_exec"] or sig["has_research"]
|
|
26099
|
+
|
|
26100
|
+
def _collect_accumulated_step_evidence(self, plan_step: dict, bb: dict | None = None) -> str:
|
|
26101
|
+
"""Fix 1 support: Collect evidence summary from accumulated step history (across all turns)."""
|
|
26102
|
+
if not isinstance(plan_step, dict):
|
|
26103
|
+
return ""
|
|
26104
|
+
sig = self._plan_step_blackboard_signals(plan_step, bb)
|
|
26105
|
+
parts: list[str] = []
|
|
26106
|
+
if sig.get("recent_files"):
|
|
26107
|
+
parts.append("files: " + ", ".join(sig["recent_files"][:4]))
|
|
26108
|
+
if sig.get("recent_exec_excerpt"):
|
|
26109
|
+
parts.append("exec: " + trim(sig["recent_exec_excerpt"], 80))
|
|
26110
|
+
if sig.get("recent_research_excerpt"):
|
|
26111
|
+
parts.append("research: " + trim(sig["recent_research_excerpt"], 80))
|
|
26112
|
+
return trim("; ".join(parts) or "accumulated-step-evidence", 200)
|
|
26113
|
+
|
|
26114
|
+
def _collect_blackboard_step_evidence(self, plan_step: dict, board: dict | None = None) -> str:
|
|
26115
|
+
sig = self._plan_step_blackboard_signals(plan_step, board)
|
|
26116
|
+
parts: list[str] = []
|
|
26117
|
+
if sig.get("recent_files"):
|
|
26118
|
+
parts.append("files: " + ", ".join(sig["recent_files"][:3]))
|
|
26119
|
+
if sig.get("recent_exec_excerpt"):
|
|
26120
|
+
parts.append(f"logs: {sig['recent_exec_excerpt']}")
|
|
26121
|
+
if sig.get("recent_review_excerpt"):
|
|
26122
|
+
parts.append(f"review: {sig['recent_review_excerpt']}")
|
|
26123
|
+
if sig.get("recent_research_excerpt"):
|
|
26124
|
+
parts.append(f"notes: {sig['recent_research_excerpt']}")
|
|
26125
|
+
return trim("; ".join(parts), 200)
|
|
26126
|
+
|
|
25366
26127
|
def _has_test_pass_evidence(self, board: dict | None = None) -> bool:
|
|
25367
26128
|
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
25368
26129
|
logs = bb.get("execution_logs", []) if isinstance(bb.get("execution_logs"), list) else []
|
|
@@ -25395,6 +26156,20 @@ body{padding:18px}
|
|
|
25395
26156
|
if todo.get("status") == "completed":
|
|
25396
26157
|
continue
|
|
25397
26158
|
cat = todo.get("category", "")
|
|
26159
|
+
if cat == "plan_step" and todo.get("status") == "in_progress" and not todo.get("activated_at"):
|
|
26160
|
+
step_idx = int(todo.get("plan_step_index", 0) or 0)
|
|
26161
|
+
prior_done_ts = [
|
|
26162
|
+
float(t.get("completed_at", 0.0) or 0.0)
|
|
26163
|
+
for t in todos
|
|
26164
|
+
if t.get("category") == "plan_step"
|
|
26165
|
+
and int(t.get("plan_step_index", 0) or 0) < step_idx
|
|
26166
|
+
and t.get("completed_at")
|
|
26167
|
+
]
|
|
26168
|
+
todo["activated_at"] = (
|
|
26169
|
+
max(prior_done_ts)
|
|
26170
|
+
if prior_done_ts
|
|
26171
|
+
else (float(todo.get("created_at", 0.0) or 0.0) or float(now_ts()))
|
|
26172
|
+
)
|
|
25398
26173
|
if cat == "setup" and (research_count > 0 or code_count > 0):
|
|
25399
26174
|
todo.update(
|
|
25400
26175
|
status="completed",
|
|
@@ -25446,11 +26221,14 @@ body{padding:18px}
|
|
|
25446
26221
|
if t.get("category") == "plan_step"
|
|
25447
26222
|
):
|
|
25448
26223
|
todo["status"] = "in_progress"
|
|
26224
|
+
todo["activated_at"] = float(now_ts())
|
|
25449
26225
|
|
|
25450
26226
|
if not any(t.get("status") == "in_progress" for t in todos):
|
|
25451
26227
|
for t in todos:
|
|
25452
26228
|
if t.get("status") == "pending":
|
|
25453
26229
|
t["status"] = "in_progress"
|
|
26230
|
+
if not t.get("activated_at"):
|
|
26231
|
+
t["activated_at"] = float(now_ts())
|
|
25454
26232
|
break
|
|
25455
26233
|
|
|
25456
26234
|
bb["project_todos"] = todos
|
|
@@ -25516,10 +26294,25 @@ body{padding:18px}
|
|
|
25516
26294
|
break
|
|
25517
26295
|
if not current:
|
|
25518
26296
|
return False
|
|
26297
|
+
# Fix 5c: Reset TodoWrite loop counter on step advancement
|
|
26298
|
+
try:
|
|
26299
|
+
self._todowrite_step_counter.clear()
|
|
26300
|
+
except Exception:
|
|
26301
|
+
pass
|
|
25519
26302
|
current["status"] = "completed"
|
|
25520
26303
|
current["completed_at"] = float(now_ts())
|
|
25521
26304
|
current["completed_by"] = actor
|
|
25522
26305
|
current["evidence"] = trim(str(evidence or "").strip(), 200) or self._ui_text("step_completed_evidence")
|
|
26306
|
+
# Clear single-mode validation gate flags for the completed step
|
|
26307
|
+
try:
|
|
26308
|
+
_completed_id = str(current.get("id", "") or "")
|
|
26309
|
+
for _attr_name in (f"_smvg_{_completed_id}", f"_smvg_ts_{_completed_id}", f"_smvg_n_{_completed_id}", f"_sync_exec_gate_n_{_completed_id}", f"_sync_sv_ts_{_completed_id}"):
|
|
26310
|
+
try:
|
|
26311
|
+
delattr(self, _attr_name)
|
|
26312
|
+
except AttributeError:
|
|
26313
|
+
pass
|
|
26314
|
+
except Exception:
|
|
26315
|
+
pass
|
|
25523
26316
|
# 推进 cursor,激活下一步
|
|
25524
26317
|
cursor = int(bb.get("plan_step_cursor", 0) or 0)
|
|
25525
26318
|
bb["plan_step_cursor"] = cursor + 1
|
|
@@ -25530,6 +26323,7 @@ body{padding:18px}
|
|
|
25530
26323
|
break
|
|
25531
26324
|
if next_step:
|
|
25532
26325
|
next_step["status"] = "in_progress"
|
|
26326
|
+
next_step["activated_at"] = float(now_ts())
|
|
25533
26327
|
step_idx = int(next_step.get("plan_step_index", 0) or 0) + 1
|
|
25534
26328
|
total = int(bb.get("plan_step_total", len(todos)) or len(todos))
|
|
25535
26329
|
self._emit("status", {
|
|
@@ -25635,27 +26429,90 @@ body{padding:18px}
|
|
|
25635
26429
|
isinstance(r, dict) and r.get("ok", False) and str(r.get("name", "")) == "bash"
|
|
25636
26430
|
for r in results
|
|
25637
26431
|
)
|
|
25638
|
-
|
|
26432
|
+
validation_ok_current = self._tool_results_have_validation_evidence(current, results)
|
|
26433
|
+
validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
|
|
26434
|
+
validation_ok = validation_ok_current or validation_ok_blackboard
|
|
26435
|
+
bb_sig = self._plan_step_blackboard_signals(current, bb)
|
|
25639
26436
|
phase_evidence = False
|
|
25640
26437
|
if phase in ("research", "design") and validation_ok:
|
|
25641
26438
|
phase_evidence = True
|
|
25642
|
-
elif phase == "implement" and
|
|
26439
|
+
elif phase == "implement" and (
|
|
26440
|
+
(wrote_files and validation_ok_current)
|
|
26441
|
+
or (bb_sig["has_write"] and validation_ok_blackboard)
|
|
26442
|
+
):
|
|
25643
26443
|
phase_evidence = True
|
|
25644
|
-
elif phase in ("test", "review") and
|
|
26444
|
+
elif phase in ("test", "review") and (
|
|
26445
|
+
(ran_bash_ok and validation_ok_current)
|
|
26446
|
+
or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
|
|
26447
|
+
):
|
|
25645
26448
|
phase_evidence = True
|
|
26449
|
+
todo_progress_signal = any(
|
|
26450
|
+
isinstance(r, dict) and r.get("ok", False)
|
|
26451
|
+
and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
|
|
26452
|
+
for r in results
|
|
26453
|
+
)
|
|
25646
26454
|
# Advance when:
|
|
25647
26455
|
# - Manager requested AND worker produced output, OR
|
|
25648
26456
|
# - All subtasks completed AND worker produced output, OR
|
|
25649
|
-
# - Phase heuristics confirm
|
|
25650
|
-
|
|
25651
|
-
|
|
25652
|
-
|
|
26457
|
+
# - Phase heuristics confirm BUT ONLY if no incomplete subtasks exist
|
|
26458
|
+
# - Fix 3: All subtasks completed + accumulated step evidence (covers TodoWrite-only turns)
|
|
26459
|
+
# CRITICAL: When subtasks exist, phase_evidence alone CANNOT bypass subtask completion.
|
|
26460
|
+
_has_subtasks = bool(self._active_plan_worker_todo_rows(
|
|
26461
|
+
str(current.get("id", "") or ""), role=""
|
|
26462
|
+
))
|
|
26463
|
+
_phase_gate = phase_evidence and (subtasks_all_done or not _has_subtasks)
|
|
26464
|
+
accumulated_evidence_path = (
|
|
26465
|
+
subtasks_all_done
|
|
26466
|
+
and todo_progress_signal
|
|
26467
|
+
and self._step_has_accumulated_evidence(current, bb)
|
|
26468
|
+
)
|
|
26469
|
+
has_strong_evidence = (
|
|
26470
|
+
validation_ok and (
|
|
26471
|
+
(
|
|
26472
|
+
worker_produced_output
|
|
26473
|
+
and (manager_requested or subtasks_all_done or _phase_gate)
|
|
26474
|
+
)
|
|
26475
|
+
or (
|
|
26476
|
+
todo_progress_signal
|
|
26477
|
+
and subtasks_all_done
|
|
26478
|
+
and validation_ok_blackboard
|
|
26479
|
+
)
|
|
26480
|
+
)
|
|
26481
|
+
) or accumulated_evidence_path
|
|
25653
26482
|
if has_strong_evidence:
|
|
26483
|
+
# Sync mode exec gate: when all subtasks done for implement/test/deploy phases,
|
|
26484
|
+
# require at least some execution evidence (bash/test/compile ran at any point).
|
|
26485
|
+
# Manager-requested advancement has its own escape hatch after 10 blocks.
|
|
26486
|
+
_exec_gate_needed = (
|
|
26487
|
+
subtasks_all_done
|
|
26488
|
+
and phase in ("implement", "test", "deploy")
|
|
26489
|
+
)
|
|
26490
|
+
if _exec_gate_needed:
|
|
26491
|
+
# Require model's explicit <step-verified/> tag in agent_messages since step activation
|
|
26492
|
+
_has_verified = self._check_step_verified_tag(current, messages=self.agent_messages)
|
|
26493
|
+
if not _has_verified:
|
|
26494
|
+
_sync_n_flag = f"_sync_exec_gate_n_{str(current.get('id', '') or '')}"
|
|
26495
|
+
_sync_n = int(getattr(self, _sync_n_flag, 0))
|
|
26496
|
+
if _sync_n < 10:
|
|
26497
|
+
setattr(self, _sync_n_flag, _sync_n + 1)
|
|
26498
|
+
# No verified tag yet — push worker to evaluate and emit <step-verified/>
|
|
26499
|
+
self._inject_sync_mode_verification_hint(current, worker_step)
|
|
26500
|
+
return
|
|
26501
|
+
# After 10 blocks, allow advancement to prevent permanent stall
|
|
25654
26502
|
evidence = self._collect_step_evidence(current, worker_step)
|
|
26503
|
+
# Clear sync exec gate counter on successful advance
|
|
26504
|
+
try:
|
|
26505
|
+
_sync_clear = f"_sync_exec_gate_n_{str(current.get('id', '') or '')}"
|
|
26506
|
+
if hasattr(self, _sync_clear):
|
|
26507
|
+
delattr(self, _sync_clear)
|
|
26508
|
+
except Exception:
|
|
26509
|
+
pass
|
|
25655
26510
|
self._advance_plan_step(
|
|
25656
26511
|
evidence=evidence,
|
|
25657
26512
|
actor=str(route.get("target", "developer") or "developer"),
|
|
25658
26513
|
)
|
|
26514
|
+
else:
|
|
26515
|
+
self._inject_rework_if_needed(current, worker_step)
|
|
25659
26516
|
|
|
25660
26517
|
def _worker_step_has_evidence(self, step: dict) -> bool:
|
|
25661
26518
|
"""Check if worker step produced concrete tool outputs."""
|
|
@@ -25671,7 +26528,8 @@ body{padding:18px}
|
|
|
25671
26528
|
|
|
25672
26529
|
def _step_subtasks_all_completed(self, plan_step: dict) -> bool:
|
|
25673
26530
|
"""Check if all worker subtasks linked to this plan step are completed.
|
|
25674
|
-
Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
|
|
26531
|
+
Filters out cross-step subtasks (e.g., 2.1 under step 1) to prevent blocking.
|
|
26532
|
+
Fix 6: Also excludes 'next-step intent' items that were added alongside completed items."""
|
|
25675
26533
|
step_id = str(plan_step.get("id", "") or "")
|
|
25676
26534
|
if not step_id:
|
|
25677
26535
|
return False
|
|
@@ -25711,7 +26569,293 @@ body{padding:18px}
|
|
|
25711
26569
|
relevant.append(r)
|
|
25712
26570
|
if relevant:
|
|
25713
26571
|
worker_items = relevant
|
|
25714
|
-
|
|
26572
|
+
# Fix 6: Exclude "next-step intent" pending items when all other items are completed.
|
|
26573
|
+
# When the worker completes step N and creates step N+1 subtasks in the same TodoWrite call,
|
|
26574
|
+
# the new pending items get parent_step_id of step N, blocking its advancement.
|
|
26575
|
+
completed_items = [r for r in worker_items if str(r.get("status", "")).lower() == "completed"]
|
|
26576
|
+
pending_items = [r for r in worker_items if str(r.get("status", "")).lower() != "completed"]
|
|
26577
|
+
if completed_items and pending_items:
|
|
26578
|
+
# Check if pending items are content-wise duplicates of completed items
|
|
26579
|
+
# (indicating the worker re-sent the same items but some got stuck as pending)
|
|
26580
|
+
completed_content = {
|
|
26581
|
+
normalize_work_text(str(r.get("content", ""))).strip().lower()
|
|
26582
|
+
for r in completed_items
|
|
26583
|
+
if str(r.get("content", "") or "").strip()
|
|
26584
|
+
}
|
|
26585
|
+
truly_new_pending = [
|
|
26586
|
+
r for r in pending_items
|
|
26587
|
+
if normalize_work_text(str(r.get("content", ""))).strip().lower() not in completed_content
|
|
26588
|
+
]
|
|
26589
|
+
# If all pending items are duplicates of completed items, they don't block
|
|
26590
|
+
if not truly_new_pending:
|
|
26591
|
+
worker_items = completed_items
|
|
26592
|
+
# If there are truly new pending items but all original items are done,
|
|
26593
|
+
# check if the new items match future plan step content
|
|
26594
|
+
elif truly_new_pending and len(completed_items) >= 2:
|
|
26595
|
+
bb = self._ensure_blackboard()
|
|
26596
|
+
future_step_content = set()
|
|
26597
|
+
found_current = False
|
|
26598
|
+
for t in bb.get("project_todos", []):
|
|
26599
|
+
if not isinstance(t, dict) or t.get("category") != "plan_step":
|
|
26600
|
+
continue
|
|
26601
|
+
if str(t.get("id", "") or "") == step_id:
|
|
26602
|
+
found_current = True
|
|
26603
|
+
continue
|
|
26604
|
+
if found_current:
|
|
26605
|
+
fc = str(t.get("full_content", "") or t.get("content", "") or "").strip().lower()
|
|
26606
|
+
future_step_content.add(fc)
|
|
26607
|
+
for line in fc.split("\n"):
|
|
26608
|
+
sl = line.strip().lower()
|
|
26609
|
+
if sl:
|
|
26610
|
+
future_step_content.add(sl)
|
|
26611
|
+
if future_step_content:
|
|
26612
|
+
_still_blocking = []
|
|
26613
|
+
for pi in truly_new_pending:
|
|
26614
|
+
pc = normalize_work_text(str(pi.get("content", ""))).strip().lower()
|
|
26615
|
+
# Check if this pending item's content appears in any future step
|
|
26616
|
+
is_future = any(pc in fsc or fsc in pc for fsc in future_step_content if len(fsc) > 4)
|
|
26617
|
+
if not is_future:
|
|
26618
|
+
_still_blocking.append(pi)
|
|
26619
|
+
if not _still_blocking:
|
|
26620
|
+
worker_items = completed_items
|
|
26621
|
+
all_marked_done = all(str(r.get("status", "")).lower() == "completed" for r in worker_items)
|
|
26622
|
+
if not all_marked_done:
|
|
26623
|
+
return False
|
|
26624
|
+
# Acceptance verification: check that each "completed" subtask has real evidence
|
|
26625
|
+
# Don't just trust the model's TodoWrite status — verify against accumulated tool outputs
|
|
26626
|
+
if worker_items:
|
|
26627
|
+
bb = self._ensure_blackboard()
|
|
26628
|
+
unverified = self._verify_subtasks_acceptance(worker_items, step_id, bb)
|
|
26629
|
+
if unverified:
|
|
26630
|
+
return False
|
|
26631
|
+
return True
|
|
26632
|
+
|
|
26633
|
+
def _verify_subtasks_acceptance(self, subtasks: list[dict], step_id: str, bb: dict) -> list[str]:
|
|
26634
|
+
"""Verify each completed subtask has real evidence. Returns list of unverified subtask descriptions.
|
|
26635
|
+
Checks step_files and execution_logs against what each subtask's content implies."""
|
|
26636
|
+
import re
|
|
26637
|
+
# Gather accumulated evidence for this step
|
|
26638
|
+
step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
|
|
26639
|
+
step_entries = step_files_raw.get(step_id, []) if step_id and isinstance(step_files_raw.get(step_id), list) else []
|
|
26640
|
+
written_paths = set()
|
|
26641
|
+
for entry in step_entries:
|
|
26642
|
+
if isinstance(entry, dict) and str(entry.get("op", "")) in ("write_file", "edit_file"):
|
|
26643
|
+
written_paths.add(str(entry.get("path", "") or "").strip().lower())
|
|
26644
|
+
# Gather bash execution evidence
|
|
26645
|
+
exec_logs = bb.get("execution_logs", [])
|
|
26646
|
+
if not isinstance(exec_logs, list):
|
|
26647
|
+
exec_logs = []
|
|
26648
|
+
bash_outputs_lower = []
|
|
26649
|
+
for log in exec_logs[-30:]:
|
|
26650
|
+
if isinstance(log, dict):
|
|
26651
|
+
c = str(log.get("content", "") or "").lower()
|
|
26652
|
+
if c:
|
|
26653
|
+
bash_outputs_lower.append(c)
|
|
26654
|
+
all_bash_text = " ".join(bash_outputs_lower)
|
|
26655
|
+
negative_hints = ("error:", "failed", "failure", "traceback", "fatal", "not found",
|
|
26656
|
+
"no such file", "command not found", "permission denied")
|
|
26657
|
+
has_bash_failure = any(neg in all_bash_text for neg in negative_hints)
|
|
26658
|
+
# Define acceptance patterns from subtask content
|
|
26659
|
+
_file_create_re = re.compile(
|
|
26660
|
+
r"(?:创建|生成|编写|写入|create|write|generate|implement|scaffold)\s+(.+?)(?:\s|$|,|。|,|\()",
|
|
26661
|
+
re.IGNORECASE,
|
|
26662
|
+
)
|
|
26663
|
+
_run_test_kw = ("运行", "测试", "验证", "test", "pytest", "verify", "validate",
|
|
26664
|
+
"run", "check", "确认", "检查")
|
|
26665
|
+
_compile_kw = ("编译", "构建", "compile", "build", "cmake", "make", "gcc", "gfortran")
|
|
26666
|
+
_install_kw = ("安装", "install", "pip install", "npm install", "apt install")
|
|
26667
|
+
unverified: list[str] = []
|
|
26668
|
+
for st in subtasks:
|
|
26669
|
+
content = str(st.get("content", "") or "").strip()
|
|
26670
|
+
if not content:
|
|
26671
|
+
continue
|
|
26672
|
+
content_lower = content.lower()
|
|
26673
|
+
# Rule 1: If subtask mentions creating a file, check it was actually written
|
|
26674
|
+
m = _file_create_re.search(content)
|
|
26675
|
+
if m:
|
|
26676
|
+
target = m.group(1).strip().strip("\"'`").lower()
|
|
26677
|
+
# Extract just filename from path-like strings
|
|
26678
|
+
if "/" in target:
|
|
26679
|
+
target_parts = [p for p in target.split("/") if p.strip()]
|
|
26680
|
+
target_name = target_parts[-1] if target_parts else target
|
|
26681
|
+
else:
|
|
26682
|
+
target_name = target
|
|
26683
|
+
if target_name and len(target_name) > 2:
|
|
26684
|
+
found = any(target_name in wp for wp in written_paths)
|
|
26685
|
+
if not found:
|
|
26686
|
+
unverified.append(f"file not created: {target_name}")
|
|
26687
|
+
continue
|
|
26688
|
+
# Rule 2: If subtask mentions testing/running/verifying, check bash was executed
|
|
26689
|
+
if any(kw in content_lower for kw in _run_test_kw):
|
|
26690
|
+
if not bash_outputs_lower:
|
|
26691
|
+
unverified.append(f"no bash execution for: {trim(content, 60)}")
|
|
26692
|
+
continue
|
|
26693
|
+
# Check for test failures in recent bash output
|
|
26694
|
+
if has_bash_failure and any(kw in content_lower for kw in ("test", "测试", "pytest")):
|
|
26695
|
+
# Only block if failure keywords appear near test-related content
|
|
26696
|
+
test_related_failures = any(
|
|
26697
|
+
("test" in line or "pytest" in line or "assert" in line)
|
|
26698
|
+
and any(neg in line for neg in negative_hints)
|
|
26699
|
+
for line in bash_outputs_lower[-10:]
|
|
26700
|
+
)
|
|
26701
|
+
if test_related_failures:
|
|
26702
|
+
unverified.append(f"test failures detected for: {trim(content, 60)}")
|
|
26703
|
+
continue
|
|
26704
|
+
# Rule 3: If subtask mentions compiling/building, check bash + no compile errors
|
|
26705
|
+
if any(kw in content_lower for kw in _compile_kw):
|
|
26706
|
+
if not bash_outputs_lower:
|
|
26707
|
+
unverified.append(f"no bash execution for compile: {trim(content, 60)}")
|
|
26708
|
+
continue
|
|
26709
|
+
compile_failures = any(
|
|
26710
|
+
any(neg in line for neg in ("error:", "failed", "failure"))
|
|
26711
|
+
and any(kw in line for kw in ("compil", "build", "cmake", "make", "link"))
|
|
26712
|
+
for line in bash_outputs_lower[-10:]
|
|
26713
|
+
)
|
|
26714
|
+
if compile_failures:
|
|
26715
|
+
unverified.append(f"compile failures for: {trim(content, 60)}")
|
|
26716
|
+
continue
|
|
26717
|
+
# Rule 4: If subtask mentions installing, check bash was run
|
|
26718
|
+
if any(kw in content_lower for kw in _install_kw):
|
|
26719
|
+
if not bash_outputs_lower:
|
|
26720
|
+
unverified.append(f"no bash for install: {trim(content, 60)}")
|
|
26721
|
+
continue
|
|
26722
|
+
# If none of the specific rules matched, the subtask is considered verified
|
|
26723
|
+
# (generic subtasks like "design" or "analyze" don't need tool evidence)
|
|
26724
|
+
return unverified
|
|
26725
|
+
|
|
26726
|
+
def _inject_rework_if_needed(self, plan_step: dict, worker_step: dict):
|
|
26727
|
+
"""When subtasks are marked completed but acceptance fails, inject rework instruction.
|
|
26728
|
+
Prevents the system from getting stuck or silently skipping unfinished work."""
|
|
26729
|
+
try:
|
|
26730
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
26731
|
+
if not step_id:
|
|
26732
|
+
return
|
|
26733
|
+
rows = self._active_plan_worker_todo_rows(step_id, role="")
|
|
26734
|
+
completed_rows = [r for r in rows if str(r.get("status", "")).lower() == "completed"]
|
|
26735
|
+
if not completed_rows:
|
|
26736
|
+
return
|
|
26737
|
+
bb = self._ensure_blackboard()
|
|
26738
|
+
failures = self._verify_subtasks_acceptance(completed_rows, step_id, bb)
|
|
26739
|
+
if not failures:
|
|
26740
|
+
return
|
|
26741
|
+
# LLM-based acceptance check: semantic analysis over heuristics
|
|
26742
|
+
llm_verdict = self._llm_verify_subtask_acceptance(plan_step, completed_rows, bb)
|
|
26743
|
+
if llm_verdict.get("all_passed", False):
|
|
26744
|
+
return
|
|
26745
|
+
rework_items = llm_verdict.get("rework_items", failures)
|
|
26746
|
+
if not rework_items:
|
|
26747
|
+
return
|
|
26748
|
+
# Rate-limit rework injection
|
|
26749
|
+
_rework_key = f"_rework_injected_{step_id}"
|
|
26750
|
+
_last_rework = getattr(self, _rework_key, 0.0)
|
|
26751
|
+
if float(now_ts()) - float(_last_rework) < 30.0:
|
|
26752
|
+
return
|
|
26753
|
+
setattr(self, _rework_key, float(now_ts()))
|
|
26754
|
+
step_label = trim(str(plan_step.get("content", "") or ""), 80)
|
|
26755
|
+
rework_text = (
|
|
26756
|
+
f"<step-rework>\n"
|
|
26757
|
+
f"Step \"{step_label}\" acceptance check FAILED. "
|
|
26758
|
+
f"The following subtasks were marked completed but did not pass verification:\n"
|
|
26759
|
+
)
|
|
26760
|
+
for i, item in enumerate(rework_items[:5]):
|
|
26761
|
+
rework_text += f" {i+1}. {trim(str(item), 120)}\n"
|
|
26762
|
+
rework_text += (
|
|
26763
|
+
"\nACTION REQUIRED: Fix these issues NOW before the step can advance.\n"
|
|
26764
|
+
"- For missing files: create them with write_file\n"
|
|
26765
|
+
"- For failed tests/builds: run the command again and fix errors\n"
|
|
26766
|
+
"- For unverified installs: re-run the install command\n"
|
|
26767
|
+
"After fixing, update TodoWrite to reflect the corrected state.\n"
|
|
26768
|
+
"</step-rework>"
|
|
26769
|
+
)
|
|
26770
|
+
# Revert false "completed" status back to in_progress
|
|
26771
|
+
_snap = self.todo.snapshot()
|
|
26772
|
+
_modified = False
|
|
26773
|
+
for row in _snap:
|
|
26774
|
+
if not isinstance(row, dict):
|
|
26775
|
+
continue
|
|
26776
|
+
if str(row.get("parent_step_id", "") or "") != step_id:
|
|
26777
|
+
continue
|
|
26778
|
+
if str(row.get("status", "")).lower() != "completed":
|
|
26779
|
+
continue
|
|
26780
|
+
rc = str(row.get("content", "") or "").strip().lower()
|
|
26781
|
+
for fail in rework_items:
|
|
26782
|
+
fail_lower = str(fail).lower()
|
|
26783
|
+
if rc and (rc[:20] in fail_lower or any(w in fail_lower for w in rc.split()[:3] if len(w) > 3)):
|
|
26784
|
+
row["status"] = "in_progress"
|
|
26785
|
+
_modified = True
|
|
26786
|
+
break
|
|
26787
|
+
if _modified:
|
|
26788
|
+
try:
|
|
26789
|
+
self.todo.update(_snap)
|
|
26790
|
+
except Exception:
|
|
26791
|
+
pass
|
|
26792
|
+
target_roles: tuple[str, ...] = ()
|
|
26793
|
+
if self._is_multi_agent_mode():
|
|
26794
|
+
active_role = str(bb.get("active_agent", "") or "developer")
|
|
26795
|
+
if active_role:
|
|
26796
|
+
target_roles = (active_role,)
|
|
26797
|
+
self._append_plan_guidance_bubble(
|
|
26798
|
+
rework_text,
|
|
26799
|
+
target_roles=target_roles,
|
|
26800
|
+
summary=f"step rework: {len(rework_items)} items failed acceptance",
|
|
26801
|
+
)
|
|
26802
|
+
except Exception:
|
|
26803
|
+
pass
|
|
26804
|
+
|
|
26805
|
+
def _llm_verify_subtask_acceptance(self, plan_step: dict, completed_subtasks: list[dict], bb: dict) -> dict:
|
|
26806
|
+
"""Use LLM semantic analysis to verify if subtasks are truly completed.
|
|
26807
|
+
Returns {"all_passed": bool, "rework_items": list[str]}."""
|
|
26808
|
+
try:
|
|
26809
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
26810
|
+
step_files_raw = bb.get("step_files", {}) if isinstance(bb.get("step_files"), dict) else {}
|
|
26811
|
+
step_entries = step_files_raw.get(step_id, []) if step_id else []
|
|
26812
|
+
files_summary = []
|
|
26813
|
+
for entry in (step_entries[-15:] if isinstance(step_entries, list) else []):
|
|
26814
|
+
if isinstance(entry, dict):
|
|
26815
|
+
files_summary.append(f"{entry.get('op','?')}: {entry.get('path','?')}")
|
|
26816
|
+
exec_logs = bb.get("execution_logs", [])
|
|
26817
|
+
recent_exec = []
|
|
26818
|
+
for log in (exec_logs[-8:] if isinstance(exec_logs, list) else []):
|
|
26819
|
+
if isinstance(log, dict):
|
|
26820
|
+
c = trim(str(log.get("content", "") or ""), 200)
|
|
26821
|
+
if c:
|
|
26822
|
+
recent_exec.append(c)
|
|
26823
|
+
subtask_list = "\n".join(
|
|
26824
|
+
f"- [{str(st.get('status','')).upper()}] {trim(str(st.get('content','') or ''), 120)}"
|
|
26825
|
+
for st in completed_subtasks[:8]
|
|
26826
|
+
)
|
|
26827
|
+
prompt = (
|
|
26828
|
+
"Analyze whether these subtasks are TRULY completed based on the evidence.\n\n"
|
|
26829
|
+
f"SUBTASKS:\n{subtask_list}\n\n"
|
|
26830
|
+
f"FILES CREATED/MODIFIED:\n{chr(10).join(files_summary[-10:]) or '(none)'}\n\n"
|
|
26831
|
+
f"RECENT EXECUTION OUTPUT:\n{chr(10).join(recent_exec[-5:]) or '(none)'}\n\n"
|
|
26832
|
+
"For each subtask, determine if it's genuinely done:\n"
|
|
26833
|
+
"- File creation tasks: was the file actually created?\n"
|
|
26834
|
+
"- Test/verify tasks: was a test/command actually run? Did it pass?\n"
|
|
26835
|
+
"- Build/compile tasks: was compilation attempted? Any errors?\n"
|
|
26836
|
+
"- Install tasks: was the install command run?\n\n"
|
|
26837
|
+
"Reply ONLY as JSON: {\"all_passed\": true/false, \"rework_items\": [\"description of what failed\"]}\n"
|
|
26838
|
+
"If all subtasks pass, return {\"all_passed\": true, \"rework_items\": []}"
|
|
26839
|
+
)
|
|
26840
|
+
resp = self.ollama.chat(
|
|
26841
|
+
[{"role": "user", "content": prompt}],
|
|
26842
|
+
system="You are a strict QA reviewer. Verify task completion against evidence. Reply ONLY valid JSON.",
|
|
26843
|
+
max_tokens=300,
|
|
26844
|
+
think=False,
|
|
26845
|
+
)
|
|
26846
|
+
import json
|
|
26847
|
+
text = str(resp.get("text", "") or "").strip()
|
|
26848
|
+
if "{" in text:
|
|
26849
|
+
json_str = text[text.index("{"):text.rindex("}") + 1]
|
|
26850
|
+
result = json.loads(json_str)
|
|
26851
|
+
if isinstance(result, dict):
|
|
26852
|
+
return {
|
|
26853
|
+
"all_passed": bool(result.get("all_passed", False)),
|
|
26854
|
+
"rework_items": list(result.get("rework_items", [])),
|
|
26855
|
+
}
|
|
26856
|
+
except Exception:
|
|
26857
|
+
pass
|
|
26858
|
+
return {"all_passed": False, "rework_items": []}
|
|
25715
26859
|
|
|
25716
26860
|
def _collect_step_evidence(self, plan_step: dict, worker_step: dict) -> str:
|
|
25717
26861
|
"""Collect evidence summary from worker step for plan step completion."""
|
|
@@ -25730,11 +26874,19 @@ body{padding:18px}
|
|
|
25730
26874
|
parts.append(f"bash: {cmd}" + (f" => {out}" if out else ""))
|
|
25731
26875
|
elif name == "read_file":
|
|
25732
26876
|
path = str(r.get("args", {}).get("path", "") or "")
|
|
26877
|
+
# Skip plan-infrastructure reads — not meaningful implementation evidence
|
|
26878
|
+
_p = str(path)
|
|
26879
|
+
if (_p.endswith("plan.md") and ".clouds_coder" in _p) or ".clouds_coder/skills_cache/" in _p:
|
|
26880
|
+
continue
|
|
25733
26881
|
out = self._tool_result_output_excerpt(r, 90)
|
|
25734
26882
|
parts.append(f"read: {path}" + (f" => {out}" if out else ""))
|
|
25735
26883
|
elif name in ("write_to_blackboard", "query_code_library", "query_knowledge_library"):
|
|
25736
26884
|
out = self._tool_result_output_excerpt(r, 100)
|
|
25737
26885
|
parts.append(f"{name}" + (f": {out}" if out else ""))
|
|
26886
|
+
if not parts:
|
|
26887
|
+
bb_evidence = self._collect_blackboard_step_evidence(plan_step)
|
|
26888
|
+
if bb_evidence:
|
|
26889
|
+
return bb_evidence
|
|
25738
26890
|
return trim("; ".join(parts) or "post-execution evidence", 200)
|
|
25739
26891
|
|
|
25740
26892
|
def _get_active_plan_step(self, board: dict | None = None) -> dict | None:
|
|
@@ -25797,10 +26949,342 @@ body{padding:18px}
|
|
|
25797
26949
|
return False
|
|
25798
26950
|
return bool(self._active_plan_worker_todo_rows(step_id, role=role))
|
|
25799
26951
|
|
|
26952
|
+
def _bridge_flat_todos_to_active_plan_step(
|
|
26953
|
+
self,
|
|
26954
|
+
rows: list[dict] | None,
|
|
26955
|
+
board: dict | None = None,
|
|
26956
|
+
) -> tuple[list[dict], bool]:
|
|
26957
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
26958
|
+
step = self._get_active_plan_step(bb)
|
|
26959
|
+
if not isinstance(step, dict):
|
|
26960
|
+
return (list(rows or []), False)
|
|
26961
|
+
step_id = trim(str(step.get("id", "") or ""), 20)
|
|
26962
|
+
if not step_id:
|
|
26963
|
+
return (list(rows or []), False)
|
|
26964
|
+
snap = [dict(row) for row in (rows or []) if isinstance(row, dict)]
|
|
26965
|
+
if not snap:
|
|
26966
|
+
return (snap, False)
|
|
26967
|
+
worker_owners = {"developer", "explorer", "reviewer"}
|
|
26968
|
+
if any(str(row.get("parent_step_id", "") or "").strip() for row in snap):
|
|
26969
|
+
return (snap, False)
|
|
26970
|
+
if any(
|
|
26971
|
+
str(row.get("owner", "") or "").strip().lower() in worker_owners
|
|
26972
|
+
and str(row.get("parent_step_id", "") or "").strip() == step_id
|
|
26973
|
+
for row in snap
|
|
26974
|
+
):
|
|
26975
|
+
return (snap, False)
|
|
26976
|
+
owner_key = self._current_plan_worker_owner(bb)
|
|
26977
|
+
bridged: list[dict] = []
|
|
26978
|
+
migrated = False
|
|
26979
|
+
for row in snap:
|
|
26980
|
+
key = trim(str(row.get("key", "") or "").strip(), 120)
|
|
26981
|
+
if key.startswith("bb:"):
|
|
26982
|
+
bridged.append(dict(row))
|
|
26983
|
+
continue
|
|
26984
|
+
content = normalize_work_text(str(row.get("content", "") or "")) or str(row.get("content", "") or "").strip()
|
|
26985
|
+
if not content:
|
|
26986
|
+
continue
|
|
26987
|
+
new_row = dict(row)
|
|
26988
|
+
new_row["content"] = content
|
|
26989
|
+
new_row["parent_step_id"] = step_id
|
|
26990
|
+
owner = str(new_row.get("owner", "") or "").strip().lower()
|
|
26991
|
+
if owner not in worker_owners:
|
|
26992
|
+
new_row["owner"] = owner_key
|
|
26993
|
+
bridged.append(new_row)
|
|
26994
|
+
migrated = True
|
|
26995
|
+
return (bridged, migrated)
|
|
26996
|
+
|
|
26997
|
+
def _active_plan_progress_snapshot(self, board: dict | None = None) -> dict:
|
|
26998
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
26999
|
+
step = self._current_plan_step_row(bb)
|
|
27000
|
+
if not isinstance(step, dict):
|
|
27001
|
+
return {
|
|
27002
|
+
"step_id": "",
|
|
27003
|
+
"step_index": 0,
|
|
27004
|
+
"step_text": "",
|
|
27005
|
+
"expected_count": 0,
|
|
27006
|
+
"worker_todo_count": 0,
|
|
27007
|
+
"completed_count": 0,
|
|
27008
|
+
"in_progress_count": 0,
|
|
27009
|
+
"pending_count": 0,
|
|
27010
|
+
"current_subtask": "",
|
|
27011
|
+
"next_pending_subtask": "",
|
|
27012
|
+
"owners": [],
|
|
27013
|
+
}
|
|
27014
|
+
step_id = trim(str(step.get("id", "") or ""), 20)
|
|
27015
|
+
rows = self._active_plan_worker_todo_rows(step_id, role="") if step_id else []
|
|
27016
|
+
expected = self._extract_plan_step_subtasks(step, limit=5)
|
|
27017
|
+
completed_count = 0
|
|
27018
|
+
in_progress_count = 0
|
|
27019
|
+
pending_count = 0
|
|
27020
|
+
current_subtask = ""
|
|
27021
|
+
next_pending_subtask = ""
|
|
27022
|
+
owners: set[str] = set()
|
|
27023
|
+
for row in rows:
|
|
27024
|
+
status = str(row.get("status", "pending") or "pending").strip().lower()
|
|
27025
|
+
content = trim(str(row.get("content", "") or "").strip(), 220)
|
|
27026
|
+
owner = self._sanitize_agent_role(row.get("owner", ""))
|
|
27027
|
+
if owner:
|
|
27028
|
+
owners.add(owner)
|
|
27029
|
+
if status == "completed":
|
|
27030
|
+
completed_count += 1
|
|
27031
|
+
elif status == "in_progress":
|
|
27032
|
+
in_progress_count += 1
|
|
27033
|
+
if content and not current_subtask:
|
|
27034
|
+
current_subtask = content
|
|
27035
|
+
else:
|
|
27036
|
+
pending_count += 1
|
|
27037
|
+
if content and not next_pending_subtask:
|
|
27038
|
+
next_pending_subtask = content
|
|
27039
|
+
return {
|
|
27040
|
+
"step_id": step_id,
|
|
27041
|
+
"step_index": max(0, int(step.get("plan_step_index", 0) or 0)),
|
|
27042
|
+
"step_text": self._current_plan_step_text(bb),
|
|
27043
|
+
"expected_count": len(expected),
|
|
27044
|
+
"worker_todo_count": len(rows),
|
|
27045
|
+
"completed_count": completed_count,
|
|
27046
|
+
"in_progress_count": in_progress_count,
|
|
27047
|
+
"pending_count": pending_count,
|
|
27048
|
+
"current_subtask": current_subtask,
|
|
27049
|
+
"next_pending_subtask": next_pending_subtask,
|
|
27050
|
+
"owners": sorted(owners),
|
|
27051
|
+
}
|
|
27052
|
+
|
|
27053
|
+
def _manager_worker_progress_capsule(self, role: str, step: dict, board: dict | None = None) -> str:
|
|
27054
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
27055
|
+
role_key = self._sanitize_agent_role(role) or "developer"
|
|
27056
|
+
safe_step = step if isinstance(step, dict) else {}
|
|
27057
|
+
snapshot = self._active_plan_progress_snapshot(bb)
|
|
27058
|
+
tool_results = safe_step.get("tool_results", []) or []
|
|
27059
|
+
tool_names: list[str] = []
|
|
27060
|
+
for item in tool_results:
|
|
27061
|
+
if not isinstance(item, dict):
|
|
27062
|
+
continue
|
|
27063
|
+
name = str(item.get("name", "") or "").strip()
|
|
27064
|
+
if not name or name in tool_names:
|
|
27065
|
+
continue
|
|
27066
|
+
tool_names.append(name)
|
|
27067
|
+
if len(tool_names) >= 5:
|
|
27068
|
+
break
|
|
27069
|
+
parts = [
|
|
27070
|
+
f"[worker-progress] owner={role_key}",
|
|
27071
|
+
f"status={trim(str(safe_step.get('status', '') or ''), 40) or '-'}",
|
|
27072
|
+
]
|
|
27073
|
+
if tool_names:
|
|
27074
|
+
parts.append("tools=" + ",".join(tool_names))
|
|
27075
|
+
step_text = trim(str(snapshot.get("step_text", "") or ""), 180)
|
|
27076
|
+
if step_text:
|
|
27077
|
+
parts.append(f"step={step_text}")
|
|
27078
|
+
todo_state = (
|
|
27079
|
+
f"todos={int(snapshot.get('completed_count', 0) or 0)}/"
|
|
27080
|
+
f"{int(snapshot.get('in_progress_count', 0) or 0)}/"
|
|
27081
|
+
f"{int(snapshot.get('pending_count', 0) or 0)}"
|
|
27082
|
+
)
|
|
27083
|
+
if int(snapshot.get("worker_todo_count", 0) or 0) > 0:
|
|
27084
|
+
parts.append(todo_state)
|
|
27085
|
+
elif int(snapshot.get("expected_count", 0) or 0) > 0:
|
|
27086
|
+
parts.append(f"todos=missing/{int(snapshot.get('expected_count', 0) or 0)}")
|
|
27087
|
+
focus = trim(str(snapshot.get("current_subtask", "") or ""), 160)
|
|
27088
|
+
if focus:
|
|
27089
|
+
parts.append(f"focus={focus}")
|
|
27090
|
+
elif str(snapshot.get("next_pending_subtask", "") or "").strip():
|
|
27091
|
+
parts.append(f"next={trim(str(snapshot.get('next_pending_subtask', '') or ''), 160)}")
|
|
27092
|
+
current_step = self._current_plan_step_row(bb)
|
|
27093
|
+
if isinstance(current_step, dict):
|
|
27094
|
+
evidence = self._collect_blackboard_step_evidence(current_step, bb)
|
|
27095
|
+
if evidence:
|
|
27096
|
+
parts.append(f"evidence={trim(evidence, 180)}")
|
|
27097
|
+
if self._step_subtasks_all_completed(current_step) and self._plan_step_has_blackboard_evidence(current_step, bb):
|
|
27098
|
+
parts.append("acceptance=ready")
|
|
27099
|
+
reply = bb.get("last_worker_reply", {}) if isinstance(bb.get("last_worker_reply"), dict) else {}
|
|
27100
|
+
if self._sanitize_agent_role(reply.get("role", "")) == role_key:
|
|
27101
|
+
reply_text = trim(str(reply.get("text", "") or "").strip(), 180)
|
|
27102
|
+
if reply_text:
|
|
27103
|
+
parts.append(f"reply={reply_text}")
|
|
27104
|
+
return trim(" | ".join(parts), 1600)
|
|
27105
|
+
|
|
27106
|
+
def _manager_recovery_route_for_repeated_delegate(self, route: dict, board: dict | None = None) -> dict:
|
|
27107
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
27108
|
+
row = dict(route or {})
|
|
27109
|
+
step = self._current_plan_step_row(bb)
|
|
27110
|
+
if not isinstance(step, dict):
|
|
27111
|
+
row["target"] = "developer"
|
|
27112
|
+
row["instruction"] = (
|
|
27113
|
+
"Recovery routing after repeated identical delegation. "
|
|
27114
|
+
"Continue the current objective with one concrete tool action and write observable progress."
|
|
27115
|
+
)
|
|
27116
|
+
row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-no-plan-step", 600)
|
|
27117
|
+
row["source"] = "loop-recovery"
|
|
27118
|
+
row["is_mandatory"] = True
|
|
27119
|
+
return row
|
|
27120
|
+
snapshot = self._active_plan_progress_snapshot(bb)
|
|
27121
|
+
step_text = trim(str(snapshot.get("step_text", "") or ""), 220)
|
|
27122
|
+
full_text = self._current_plan_step_full_text(bb, max_len=600)
|
|
27123
|
+
phase = self._plan_step_phase_hint(full_text)
|
|
27124
|
+
expected_count = int(snapshot.get("expected_count", 0) or 0)
|
|
27125
|
+
worker_todo_count = int(snapshot.get("worker_todo_count", 0) or 0)
|
|
27126
|
+
current_subtask = trim(str(snapshot.get("current_subtask", "") or ""), 180)
|
|
27127
|
+
next_pending = trim(str(snapshot.get("next_pending_subtask", "") or ""), 180)
|
|
27128
|
+
subtasks_done = self._step_subtasks_all_completed(step)
|
|
27129
|
+
has_evidence = self._plan_step_has_blackboard_evidence(step, bb)
|
|
27130
|
+
reviewer_available = True
|
|
27131
|
+
profile = self._ensure_blackboard_task_profile(bb)
|
|
27132
|
+
participants = profile.get("participants", []) if isinstance(profile.get("participants"), list) else []
|
|
27133
|
+
participants_norm = [self._sanitize_agent_role(x) for x in participants]
|
|
27134
|
+
participants_norm = [x for x in participants_norm if x]
|
|
27135
|
+
if participants_norm:
|
|
27136
|
+
reviewer_available = "reviewer" in participants_norm
|
|
27137
|
+
if subtasks_done and has_evidence and reviewer_available:
|
|
27138
|
+
row["target"] = "reviewer"
|
|
27139
|
+
row["instruction"] = trim(
|
|
27140
|
+
(
|
|
27141
|
+
"Recovery routing after repeated identical delegation. "
|
|
27142
|
+
f"Validate ONLY the current plan step: {step_text}. "
|
|
27143
|
+
"Worker subtasks are complete and blackboard evidence already exists. "
|
|
27144
|
+
"Run acceptance for this step only, record pass/fix with concrete evidence, and do not jump ahead."
|
|
27145
|
+
),
|
|
27146
|
+
1200,
|
|
27147
|
+
)
|
|
27148
|
+
row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-acceptance", 600)
|
|
27149
|
+
row["source"] = "loop-recovery"
|
|
27150
|
+
row["is_mandatory"] = True
|
|
27151
|
+
return row
|
|
27152
|
+
default_owner = "explorer" if phase in {"research", "design"} else "developer"
|
|
27153
|
+
owner = self._sanitize_agent_role(row.get("target", "")) or self._current_plan_worker_owner(bb)
|
|
27154
|
+
if owner not in {"developer", "explorer"}:
|
|
27155
|
+
owner = default_owner
|
|
27156
|
+
if expected_count > 0 and worker_todo_count == 0:
|
|
27157
|
+
action_text = (
|
|
27158
|
+
"First call TodoWrite for the current plan step and create the missing worker subtasks "
|
|
27159
|
+
"before any more implementation."
|
|
27160
|
+
)
|
|
27161
|
+
elif current_subtask:
|
|
27162
|
+
action_text = f"Continue ONLY the current in_progress subtask: {current_subtask}."
|
|
27163
|
+
elif next_pending:
|
|
27164
|
+
action_text = f"Resume the next pending subtask: {next_pending}."
|
|
27165
|
+
else:
|
|
27166
|
+
action_text = "Continue the current plan step with one concrete tool action."
|
|
27167
|
+
evidence_text = ""
|
|
27168
|
+
if has_evidence:
|
|
27169
|
+
evidence_text = (
|
|
27170
|
+
" Blackboard already contains partial evidence for this step; build on that work instead of restarting."
|
|
27171
|
+
)
|
|
27172
|
+
row["target"] = owner
|
|
27173
|
+
row["instruction"] = trim(
|
|
27174
|
+
(
|
|
27175
|
+
"Recovery routing after repeated identical delegation. "
|
|
27176
|
+
f"Stay on the current plan step: {step_text}. "
|
|
27177
|
+
f"{action_text} "
|
|
27178
|
+
"Do not branch to a different step or restate the whole plan. "
|
|
27179
|
+
"After the subtask is finished, immediately call TodoWrite to mark it completed and move the next subtask to in_progress."
|
|
27180
|
+
f"{evidence_text}"
|
|
27181
|
+
),
|
|
27182
|
+
1200,
|
|
27183
|
+
)
|
|
27184
|
+
row["reason"] = trim(f"{row.get('reason', '')}|loop-recovery-execute", 600)
|
|
27185
|
+
row["source"] = "loop-recovery"
|
|
27186
|
+
row["is_mandatory"] = True
|
|
27187
|
+
return row
|
|
27188
|
+
|
|
27189
|
+
def _todo_has_plan_steps(self, board: dict | None = None) -> bool:
|
|
27190
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
27191
|
+
todos = bb.get("project_todos", []) if isinstance(bb.get("project_todos"), list) else []
|
|
27192
|
+
return any(
|
|
27193
|
+
isinstance(todo, dict) and todo.get("category") == "plan_step"
|
|
27194
|
+
for todo in todos
|
|
27195
|
+
)
|
|
27196
|
+
|
|
27197
|
+
def _todo_worker_role_hint(self, role: str = "", board: dict | None = None) -> str:
|
|
27198
|
+
role_key = self._sanitize_agent_role(role)
|
|
27199
|
+
if role_key in {"developer", "explorer", "reviewer"}:
|
|
27200
|
+
return role_key
|
|
27201
|
+
return self._current_plan_worker_owner(board)
|
|
27202
|
+
|
|
27203
|
+
def _todo_route_kind(self, role: str = "", board: dict | None = None) -> str:
|
|
27204
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
27205
|
+
if self._todo_has_plan_steps(bb):
|
|
27206
|
+
return "plan_sync" if self._is_multi_agent_mode() else "plan_single"
|
|
27207
|
+
role_key = self._todo_worker_role_hint(role, bb)
|
|
27208
|
+
if self._is_multi_agent_mode() and role_key in {"developer", "explorer", "reviewer"}:
|
|
27209
|
+
return "pure_sync"
|
|
27210
|
+
return "pure_single"
|
|
27211
|
+
|
|
27212
|
+
def _todo_row_kind(self, row: dict | None) -> str:
|
|
27213
|
+
if not isinstance(row, dict):
|
|
27214
|
+
return ""
|
|
27215
|
+
key = str(row.get("key", "") or "").strip()
|
|
27216
|
+
if key.startswith("bb:"):
|
|
27217
|
+
return "system"
|
|
27218
|
+
owner = str(row.get("owner", "") or "").strip().lower()
|
|
27219
|
+
parent_step_id = str(row.get("parent_step_id", "") or "").strip()
|
|
27220
|
+
if owner in {"developer", "explorer", "reviewer"} and parent_step_id:
|
|
27221
|
+
return "plan_worker"
|
|
27222
|
+
if owner in {"developer", "explorer", "reviewer"}:
|
|
27223
|
+
return "owner_worker"
|
|
27224
|
+
return "flat"
|
|
27225
|
+
|
|
27226
|
+
def _todo_route_rows(
|
|
27227
|
+
self,
|
|
27228
|
+
route_kind: str,
|
|
27229
|
+
*,
|
|
27230
|
+
rows: list[dict] | None = None,
|
|
27231
|
+
role: str = "",
|
|
27232
|
+
board: dict | None = None,
|
|
27233
|
+
) -> list[dict]:
|
|
27234
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
27235
|
+
snap = [dict(row) for row in (rows if isinstance(rows, list) else self.todo.snapshot()) if isinstance(row, dict)]
|
|
27236
|
+
if route_kind in {"plan_single", "plan_sync"}:
|
|
27237
|
+
step = self._get_active_plan_step(bb)
|
|
27238
|
+
step_id = trim(str((step or {}).get("id", "") or ""), 20)
|
|
27239
|
+
if not step_id:
|
|
27240
|
+
return []
|
|
27241
|
+
return [
|
|
27242
|
+
row for row in snap
|
|
27243
|
+
if self._todo_row_kind(row) == "plan_worker"
|
|
27244
|
+
and str(row.get("parent_step_id", "") or "").strip() == step_id
|
|
27245
|
+
]
|
|
27246
|
+
if route_kind == "pure_sync":
|
|
27247
|
+
role_key = self._todo_worker_role_hint(role, bb)
|
|
27248
|
+
owner_rows = [row for row in snap if self._todo_row_kind(row) == "owner_worker"]
|
|
27249
|
+
if role_key in {"developer", "explorer", "reviewer"}:
|
|
27250
|
+
scoped = [
|
|
27251
|
+
row for row in owner_rows
|
|
27252
|
+
if str(row.get("owner", "") or "").strip().lower() == role_key
|
|
27253
|
+
]
|
|
27254
|
+
if scoped:
|
|
27255
|
+
return scoped
|
|
27256
|
+
return owner_rows
|
|
27257
|
+
if route_kind == "pure_single":
|
|
27258
|
+
return [row for row in snap if self._todo_row_kind(row) == "flat"]
|
|
27259
|
+
return []
|
|
27260
|
+
|
|
25800
27261
|
def _todo_runtime_has_worker_rows(self, role: str = "") -> bool:
|
|
25801
|
-
|
|
25802
|
-
|
|
25803
|
-
|
|
27262
|
+
route_kind = self._todo_route_kind(role=role)
|
|
27263
|
+
return bool(self._todo_route_rows(route_kind, role=role))
|
|
27264
|
+
|
|
27265
|
+
def _merge_todo_signal_rows(self, items: list[dict], role: str = "", board: dict | None = None) -> str:
|
|
27266
|
+
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
27267
|
+
role_key = self._sanitize_agent_role(role)
|
|
27268
|
+
route_kind = self._todo_route_kind(role=role_key, board=bb)
|
|
27269
|
+
step = self._get_active_plan_step(bb) if route_kind in {"plan_single", "plan_sync"} else None
|
|
27270
|
+
step_id = trim(str((step or {}).get("id", "") or ""), 20)
|
|
27271
|
+
normalized: list[dict] = []
|
|
27272
|
+
for item in items or []:
|
|
27273
|
+
if not isinstance(item, dict):
|
|
27274
|
+
continue
|
|
27275
|
+
row = dict(item)
|
|
27276
|
+
if role_key in {"developer", "explorer", "reviewer"} and not str(row.get("owner", "") or "").strip():
|
|
27277
|
+
row["owner"] = role_key
|
|
27278
|
+
if step_id and not str(row.get("parent_step_id", "") or "").strip():
|
|
27279
|
+
row["parent_step_id"] = step_id
|
|
27280
|
+
normalized.append(row)
|
|
27281
|
+
if not normalized:
|
|
27282
|
+
return self.todo.no_changes_text()
|
|
27283
|
+
if route_kind in {"plan_single", "plan_sync"}:
|
|
27284
|
+
return self._merge_plan_worker_todo_items(normalized, role=role_key)
|
|
27285
|
+
if route_kind == "pure_sync":
|
|
27286
|
+
return self._merge_owner_scoped_todo_items(normalized, role=role_key)
|
|
27287
|
+
return self._merge_flat_todo_items(normalized, role=role_key)
|
|
25804
27288
|
|
|
25805
27289
|
def _plan_worker_todo_identity(self, row: dict | None) -> str:
|
|
25806
27290
|
import re
|
|
@@ -25816,6 +27300,96 @@ body{padding:18px}
|
|
|
25816
27300
|
return f"substep:{match.group(1)}"
|
|
25817
27301
|
return f"text:{content}"
|
|
25818
27302
|
|
|
27303
|
+
def _flat_todo_identity(self, row: dict | None) -> str:
|
|
27304
|
+
import re
|
|
27305
|
+
|
|
27306
|
+
if not isinstance(row, dict):
|
|
27307
|
+
return ""
|
|
27308
|
+
key = trim(str(row.get("key", "") or "").strip(), 120)
|
|
27309
|
+
if key.startswith("bb:"):
|
|
27310
|
+
return f"system:{key}"
|
|
27311
|
+
content = normalize_work_text(str(row.get("content", "") or "")) or str(row.get("content", "") or "")
|
|
27312
|
+
content = re.sub(r"\s+", " ", content.strip().lower())
|
|
27313
|
+
if not content:
|
|
27314
|
+
return ""
|
|
27315
|
+
match = re.match(r"^(\d+\.\d+)\b", content)
|
|
27316
|
+
if match:
|
|
27317
|
+
return f"substep:{match.group(1)}"
|
|
27318
|
+
return f"text:{content}"
|
|
27319
|
+
|
|
27320
|
+
def _merge_flat_todo_items(self, items: list[dict], role: str = "") -> str:
|
|
27321
|
+
if not isinstance(items, list):
|
|
27322
|
+
raise ValueError("items must be array")
|
|
27323
|
+
role_key = self._sanitize_agent_role(role)
|
|
27324
|
+
existing = self.todo.snapshot()
|
|
27325
|
+
route_existing = self._todo_route_rows("pure_single", rows=existing, role=role_key)
|
|
27326
|
+
existing_by_identity: dict[str, dict] = {}
|
|
27327
|
+
preserved_system: list[dict] = []
|
|
27328
|
+
for row in existing:
|
|
27329
|
+
if self._todo_row_kind(row) != "system":
|
|
27330
|
+
continue
|
|
27331
|
+
preserved_system.append(dict(row))
|
|
27332
|
+
for row in route_existing:
|
|
27333
|
+
if not isinstance(row, dict):
|
|
27334
|
+
continue
|
|
27335
|
+
identity = self._flat_todo_identity(row)
|
|
27336
|
+
if not identity:
|
|
27337
|
+
continue
|
|
27338
|
+
if identity not in existing_by_identity:
|
|
27339
|
+
existing_by_identity[identity] = dict(row)
|
|
27340
|
+
|
|
27341
|
+
status_alias = {
|
|
27342
|
+
"todo": "pending",
|
|
27343
|
+
"doing": "in_progress",
|
|
27344
|
+
"inprogress": "in_progress",
|
|
27345
|
+
"in-progress": "in_progress",
|
|
27346
|
+
"done": "completed",
|
|
27347
|
+
"finish": "completed",
|
|
27348
|
+
"finished": "completed",
|
|
27349
|
+
}
|
|
27350
|
+
passthrough_rows: list[dict] = []
|
|
27351
|
+
merged_rows: list[dict] = []
|
|
27352
|
+
seen_identities: set[str] = set()
|
|
27353
|
+
for idx, item in enumerate(items):
|
|
27354
|
+
if isinstance(item, str):
|
|
27355
|
+
raw = {"content": item}
|
|
27356
|
+
elif isinstance(item, dict):
|
|
27357
|
+
raw = dict(item)
|
|
27358
|
+
else:
|
|
27359
|
+
raise ValueError(f"item {idx}: invalid type")
|
|
27360
|
+
key = trim(str(raw.get("key", "") or "").strip(), 120)
|
|
27361
|
+
if key.startswith("bb:"):
|
|
27362
|
+
passthrough_rows.append(raw)
|
|
27363
|
+
continue
|
|
27364
|
+
raw_content = str(raw.get("content", raw.get("text", raw.get("title", "")))).strip()
|
|
27365
|
+
content = normalize_work_text(raw_content) or raw_content
|
|
27366
|
+
if not content:
|
|
27367
|
+
continue
|
|
27368
|
+
normalized: dict[str, object] = {"content": content}
|
|
27369
|
+
raw_status = str(raw.get("status", raw.get("state", "")) or "").strip().lower()
|
|
27370
|
+
if raw_status:
|
|
27371
|
+
normalized["status"] = status_alias.get(raw_status, raw_status)
|
|
27372
|
+
owner = str(raw.get("owner", "") or "").strip().lower()
|
|
27373
|
+
if owner in {"manager", "explorer", "developer", "reviewer"}:
|
|
27374
|
+
normalized["owner"] = owner
|
|
27375
|
+
elif role_key == "manager" and owner == "":
|
|
27376
|
+
normalized["owner"] = role_key
|
|
27377
|
+
active_form = str(raw.get("activeForm", raw.get("active_form", "")) or "").strip()
|
|
27378
|
+
if active_form:
|
|
27379
|
+
normalized["activeForm"] = active_form
|
|
27380
|
+
identity = self._flat_todo_identity(normalized)
|
|
27381
|
+
if not identity:
|
|
27382
|
+
identity = f"ad-hoc:{idx}:{trim(content, 80)}"
|
|
27383
|
+
merged = dict(existing_by_identity.get(identity, {}))
|
|
27384
|
+
if "activeForm" not in normalized:
|
|
27385
|
+
merged.pop("activeForm", None)
|
|
27386
|
+
merged.update(normalized)
|
|
27387
|
+
if identity in seen_identities:
|
|
27388
|
+
continue
|
|
27389
|
+
seen_identities.add(identity)
|
|
27390
|
+
merged_rows.append(merged)
|
|
27391
|
+
return self.todo.update(preserved_system + passthrough_rows + merged_rows)
|
|
27392
|
+
|
|
25819
27393
|
def _merge_plan_worker_todo_items(self, items: list[dict], role: str = "") -> str:
|
|
25820
27394
|
if not isinstance(items, list):
|
|
25821
27395
|
raise ValueError("items must be array")
|
|
@@ -25833,6 +27407,12 @@ body{padding:18px}
|
|
|
25833
27407
|
for row in existing:
|
|
25834
27408
|
if not isinstance(row, dict):
|
|
25835
27409
|
continue
|
|
27410
|
+
row_kind = self._todo_row_kind(row)
|
|
27411
|
+
if row_kind == "system":
|
|
27412
|
+
preserved.append(dict(row))
|
|
27413
|
+
continue
|
|
27414
|
+
if row_kind != "plan_worker":
|
|
27415
|
+
continue
|
|
25836
27416
|
owner = str(row.get("owner", "") or "").strip().lower()
|
|
25837
27417
|
row_step_id = trim(str(row.get("parent_step_id", "") or ""), 20)
|
|
25838
27418
|
if owner in worker_owners and row_step_id == step_id:
|
|
@@ -25842,10 +27422,13 @@ body{padding:18px}
|
|
|
25842
27422
|
|
|
25843
27423
|
merged_by_identity: dict[str, dict] = {}
|
|
25844
27424
|
ordered_identities: list[str] = []
|
|
27425
|
+
# Fix 2: Compute existing identities for next-step detection
|
|
27426
|
+
_existing_identities: set[str] = set()
|
|
25845
27427
|
for row in target_rows:
|
|
25846
27428
|
identity = self._plan_worker_todo_identity(row)
|
|
25847
27429
|
if not identity:
|
|
25848
27430
|
continue
|
|
27431
|
+
_existing_identities.add(identity)
|
|
25849
27432
|
if identity not in merged_by_identity:
|
|
25850
27433
|
merged_by_identity[identity] = dict(row)
|
|
25851
27434
|
ordered_identities.append(identity)
|
|
@@ -25883,12 +27466,52 @@ body{padding:18px}
|
|
|
25883
27466
|
merged.update(row)
|
|
25884
27467
|
merged["owner"] = str(merged.get("owner", "") or role_key).strip().lower() or role_key
|
|
25885
27468
|
merged["parent_step_id"] = trim(str(merged.get("parent_step_id", "") or step_id), 20) or step_id
|
|
27469
|
+
# Fix 2 support: Timestamp new items for next-step detection
|
|
27470
|
+
if identity not in _existing_identities and "created_at" not in merged:
|
|
27471
|
+
merged["created_at"] = float(now_ts())
|
|
27472
|
+
if str(merged.get("status", "")).lower() == "completed" and "updated_at" not in merged:
|
|
27473
|
+
merged["updated_at"] = float(now_ts())
|
|
25886
27474
|
merged_by_identity[identity] = merged
|
|
25887
27475
|
if identity not in ordered_identities:
|
|
25888
27476
|
ordered_identities.append(identity)
|
|
25889
27477
|
|
|
25890
27478
|
merged_target_rows = [merged_by_identity[i] for i in ordered_identities if i in merged_by_identity]
|
|
25891
|
-
|
|
27479
|
+
|
|
27480
|
+
# Fix 4: Content-based deduplication to prevent duplicate subtasks from accumulating
|
|
27481
|
+
_seen_content: set[str] = set()
|
|
27482
|
+
_deduped_target: list[dict] = []
|
|
27483
|
+
for row in merged_target_rows:
|
|
27484
|
+
_ck = normalize_work_text(str(row.get("content", ""))).strip().lower()
|
|
27485
|
+
if _ck in _seen_content:
|
|
27486
|
+
continue
|
|
27487
|
+
_seen_content.add(_ck)
|
|
27488
|
+
_deduped_target.append(row)
|
|
27489
|
+
merged_target_rows = _deduped_target
|
|
27490
|
+
|
|
27491
|
+
# Fix 2: Detect "next-step intent" — if all existing items are completed,
|
|
27492
|
+
# new pending items that don't match existing identities are for the next step.
|
|
27493
|
+
# Remove their parent_step_id so they don't block current step advancement.
|
|
27494
|
+
_all_existing_done = (
|
|
27495
|
+
bool(target_rows) and
|
|
27496
|
+
all(str(r.get("status", "")).lower() == "completed" for r in target_rows)
|
|
27497
|
+
)
|
|
27498
|
+
if _all_existing_done:
|
|
27499
|
+
for row in merged_target_rows:
|
|
27500
|
+
_rid = self._plan_worker_todo_identity(row)
|
|
27501
|
+
if (_rid and _rid not in _existing_identities
|
|
27502
|
+
and str(row.get("status", "")).lower() != "completed"):
|
|
27503
|
+
row.pop("parent_step_id", None) # Not for current step
|
|
27504
|
+
|
|
27505
|
+
# Insert merged_target_rows right after the active plan step's bb: row in preserved,
|
|
27506
|
+
# so subtasks appear nested under their parent step rather than at the list bottom.
|
|
27507
|
+
_step_key = str(active_step.get("key", "") or "")
|
|
27508
|
+
_insert_idx = len(preserved) # fallback: append at end
|
|
27509
|
+
if _step_key:
|
|
27510
|
+
for _i, _r in enumerate(preserved):
|
|
27511
|
+
if str(_r.get("key", "") or "") == _step_key:
|
|
27512
|
+
_insert_idx = _i + 1
|
|
27513
|
+
break
|
|
27514
|
+
final_rows = preserved[:_insert_idx] + passthrough_rows + merged_target_rows + preserved[_insert_idx:]
|
|
25892
27515
|
return self.todo.update(final_rows)
|
|
25893
27516
|
|
|
25894
27517
|
def _merge_owner_scoped_todo_items(self, items: list[dict], role: str = "") -> str:
|
|
@@ -26115,7 +27738,7 @@ body{padding:18px}
|
|
|
26115
27738
|
content = self._build_plan_todo_reminder_text(plan_step, missing_subtasks=missing_subtasks)
|
|
26116
27739
|
if not content:
|
|
26117
27740
|
return False
|
|
26118
|
-
self.
|
|
27741
|
+
self._append_plan_guidance_bubble(content, summary="todo reminder")
|
|
26119
27742
|
self.last_todo_reminder_ts = now_tick
|
|
26120
27743
|
self.todo_reminder_count += 1
|
|
26121
27744
|
self._emit(
|
|
@@ -26134,7 +27757,7 @@ body{padding:18px}
|
|
|
26134
27757
|
|
|
26135
27758
|
if not isinstance(plan_step, dict):
|
|
26136
27759
|
return []
|
|
26137
|
-
raw =
|
|
27760
|
+
raw = normalize_embedded_newlines(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
|
|
26138
27761
|
if not raw.strip():
|
|
26139
27762
|
return []
|
|
26140
27763
|
lines = [trim(str(line or "").strip(), 300) for line in raw.replace("\r\n", "\n").split("\n")]
|
|
@@ -26264,6 +27887,126 @@ body{padding:18px}
|
|
|
26264
27887
|
self.todo.items = preserved + replacement
|
|
26265
27888
|
return True
|
|
26266
27889
|
|
|
27890
|
+
def _check_step_verified_tag(self, plan_step: dict, *, messages: list | None = None) -> bool:
|
|
27891
|
+
"""Return True if the agent has emitted <step-verified> in any assistant message
|
|
27892
|
+
since this plan step was activated (i.e., after plan_step['activated_at']).
|
|
27893
|
+
Pass messages=self.agent_messages for sync mode; defaults to self.messages."""
|
|
27894
|
+
activated_at = float(plan_step.get("activated_at", 0.0) or 0.0)
|
|
27895
|
+
msg_list = messages if messages is not None else self.messages
|
|
27896
|
+
for msg in reversed(msg_list):
|
|
27897
|
+
if not isinstance(msg, dict):
|
|
27898
|
+
continue
|
|
27899
|
+
msg_ts = float(msg.get("ts", 0.0) or 0.0)
|
|
27900
|
+
# Stop once we reach messages predating step activation
|
|
27901
|
+
if activated_at > 0 and msg_ts > 0 and msg_ts < activated_at:
|
|
27902
|
+
break
|
|
27903
|
+
if msg.get("role") == "assistant":
|
|
27904
|
+
content = str(msg.get("content", "") or "")
|
|
27905
|
+
if "<step-verified" in content:
|
|
27906
|
+
return True
|
|
27907
|
+
return False
|
|
27908
|
+
|
|
27909
|
+
def _single_mode_validation_gate(self, plan_step: dict, tool_results: list[dict]) -> bool:
|
|
27910
|
+
"""Gate: after subtasks complete, require model to explicitly emit <step-verified/>
|
|
27911
|
+
in a message since this step was activated. Research/design phases exempt.
|
|
27912
|
+
Escape hatch: after 10 consecutive blocks, auto-pass to prevent permanent stall."""
|
|
27913
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
27914
|
+
_flag = f"_smvg_{step_id}"
|
|
27915
|
+
if getattr(self, _flag, False):
|
|
27916
|
+
return True # Already validated in a previous round
|
|
27917
|
+
step_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "").lower()
|
|
27918
|
+
phase = self._plan_step_phase_hint(step_content)
|
|
27919
|
+
if phase in ("research", "design"):
|
|
27920
|
+
setattr(self, _flag, True)
|
|
27921
|
+
return True # No verification needed for non-execution phases
|
|
27922
|
+
# Escape hatch: after 10 consecutive blocks, unblock to prevent permanent stall
|
|
27923
|
+
_n_flag = f"_smvg_n_{step_id}"
|
|
27924
|
+
_n_blocked = int(getattr(self, _n_flag, 0))
|
|
27925
|
+
if _n_blocked >= 10:
|
|
27926
|
+
setattr(self, _flag, True)
|
|
27927
|
+
return True
|
|
27928
|
+
# Model must explicitly emit <step-verified/> after evaluating results
|
|
27929
|
+
if self._check_step_verified_tag(plan_step):
|
|
27930
|
+
setattr(self, _flag, True)
|
|
27931
|
+
return True
|
|
27932
|
+
# Gate blocked — increment counter and inject hint
|
|
27933
|
+
setattr(self, _n_flag, _n_blocked + 1)
|
|
27934
|
+
self._inject_single_mode_validation_hint(plan_step)
|
|
27935
|
+
return False
|
|
27936
|
+
|
|
27937
|
+
def _inject_single_mode_validation_hint(self, plan_step: dict):
|
|
27938
|
+
"""Inject a hint (rate-limited 20s) instructing the model to emit <step-verified/>
|
|
27939
|
+
after evaluating bash output against the step's acceptance criteria."""
|
|
27940
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
27941
|
+
_ts_flag = f"_smvg_ts_{step_id}"
|
|
27942
|
+
_last_ts = float(getattr(self, _ts_flag, 0.0))
|
|
27943
|
+
if float(now_ts()) - _last_ts < 20.0:
|
|
27944
|
+
return
|
|
27945
|
+
setattr(self, _ts_flag, float(now_ts()))
|
|
27946
|
+
step_label = trim(str(plan_step.get("content", "") or ""), 80)
|
|
27947
|
+
full_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
|
|
27948
|
+
# Extract ACCEPTANCE criteria line if present
|
|
27949
|
+
acceptance = ""
|
|
27950
|
+
for line in full_content.splitlines():
|
|
27951
|
+
if line.strip().upper().startswith("ACCEPTANCE:"):
|
|
27952
|
+
acceptance = line.strip()[len("ACCEPTANCE:"):].strip()
|
|
27953
|
+
break
|
|
27954
|
+
phase = self._plan_step_phase_hint(full_content.lower())
|
|
27955
|
+
if phase == "test":
|
|
27956
|
+
action = "run the tests with bash and evaluate the results"
|
|
27957
|
+
else:
|
|
27958
|
+
action = "run the build/compile/run command with bash and evaluate the output"
|
|
27959
|
+
accept_line = f"\nAcceptance criteria: {acceptance}" if acceptance else ""
|
|
27960
|
+
msg = (
|
|
27961
|
+
f"<verification-required>\n"
|
|
27962
|
+
f"All subtasks for \"{step_label}\" are marked complete.{accept_line}\n"
|
|
27963
|
+
f"Before this step can advance, you must:\n"
|
|
27964
|
+
f"1. {action}\n"
|
|
27965
|
+
f"2. Review the bash output and confirm it meets the acceptance criteria\n"
|
|
27966
|
+
f"3. If it passes, emit exactly: <step-verified/>\n"
|
|
27967
|
+
f"4. If it fails, fix the issue and retry — do NOT emit <step-verified/> until resolved\n"
|
|
27968
|
+
f"</verification-required>"
|
|
27969
|
+
)
|
|
27970
|
+
_recent = self.messages[-5:]
|
|
27971
|
+
if not any("<verification-required>" in str(m.get("content", "") or "") for m in _recent if isinstance(m, dict)):
|
|
27972
|
+
self.messages.append({"role": "user", "content": msg, "ts": now_ts()})
|
|
27973
|
+
|
|
27974
|
+
def _inject_sync_mode_verification_hint(self, plan_step: dict, worker_step: dict):
|
|
27975
|
+
"""Inject a verification hint into agent_messages (rate-limited 30s) for sync mode.
|
|
27976
|
+
Instructs the worker to emit <step-verified/> after evaluating bash output."""
|
|
27977
|
+
step_id = str(plan_step.get("id", "") or "")
|
|
27978
|
+
_ts_flag = f"_sync_sv_ts_{step_id}"
|
|
27979
|
+
_last_ts = float(getattr(self, _ts_flag, 0.0))
|
|
27980
|
+
if float(now_ts()) - _last_ts < 30.0:
|
|
27981
|
+
return
|
|
27982
|
+
setattr(self, _ts_flag, float(now_ts()))
|
|
27983
|
+
step_label = trim(str(plan_step.get("content", "") or ""), 80)
|
|
27984
|
+
full_content = str(plan_step.get("full_content", "") or plan_step.get("content", "") or "")
|
|
27985
|
+
acceptance = ""
|
|
27986
|
+
for line in full_content.splitlines():
|
|
27987
|
+
if line.strip().upper().startswith("ACCEPTANCE:"):
|
|
27988
|
+
acceptance = line.strip()[len("ACCEPTANCE:"):].strip()
|
|
27989
|
+
break
|
|
27990
|
+
phase = self._plan_step_phase_hint(full_content.lower())
|
|
27991
|
+
if phase == "test":
|
|
27992
|
+
action = "run the tests with bash and evaluate results"
|
|
27993
|
+
else:
|
|
27994
|
+
action = "run the build/compile command with bash and evaluate the output"
|
|
27995
|
+
accept_line = f"\nAcceptance criteria: {acceptance}" if acceptance else ""
|
|
27996
|
+
msg = (
|
|
27997
|
+
f"<verification-required>\n"
|
|
27998
|
+
f"All subtasks for \"{step_label}\" are marked complete.{accept_line}\n"
|
|
27999
|
+
f"Before this step can advance:\n"
|
|
28000
|
+
f"1. {action}\n"
|
|
28001
|
+
f"2. Review the output and confirm acceptance criteria are met\n"
|
|
28002
|
+
f"3. If it passes, emit exactly: <step-verified/>\n"
|
|
28003
|
+
f"4. If it fails, fix and retry — do NOT emit <step-verified/> until resolved\n"
|
|
28004
|
+
f"</verification-required>"
|
|
28005
|
+
)
|
|
28006
|
+
_recent = self.agent_messages[-5:]
|
|
28007
|
+
if not any("<verification-required>" in str(m.get("content", "") or "") for m in _recent if isinstance(m, dict)):
|
|
28008
|
+
self.agent_messages.append({"role": "user", "content": msg, "ts": now_ts()})
|
|
28009
|
+
|
|
26267
28010
|
def _single_agent_plan_step_check(self, tool_results: list[dict]):
|
|
26268
28011
|
"""In single-agent mode, check if current plan step should be advanced based on tool results."""
|
|
26269
28012
|
bb = self._ensure_blackboard()
|
|
@@ -26281,6 +28024,24 @@ body{padding:18px}
|
|
|
26281
28024
|
if not current:
|
|
26282
28025
|
self._sync_todos_from_blackboard(reason="single-agent-round")
|
|
26283
28026
|
return
|
|
28027
|
+
# When a new step is activated with no subtasks yet, require TodoWrite first
|
|
28028
|
+
_cur_step_id = str(current.get("id", "") or "")
|
|
28029
|
+
if _cur_step_id:
|
|
28030
|
+
_existing_subs = self._active_plan_worker_todo_rows(_cur_step_id, role="")
|
|
28031
|
+
if not _existing_subs:
|
|
28032
|
+
_step_label_s = trim(str(current.get("content", "") or ""), 60)
|
|
28033
|
+
_force_tw_msg = (
|
|
28034
|
+
f"<action-required>\n"
|
|
28035
|
+
f"Step \"{_step_label_s}\" has no subtasks yet. "
|
|
28036
|
+
f"Your FIRST action MUST be to call TodoWrite with "
|
|
28037
|
+
f"parent_step_id=\"{_cur_step_id}\" to create this step's subtasks "
|
|
28038
|
+
f"(e.g. N.1, N.2 ...) before executing any other work.\n"
|
|
28039
|
+
f"</action-required>"
|
|
28040
|
+
)
|
|
28041
|
+
_recent_msgs = self.messages[-4:]
|
|
28042
|
+
if not any("<action-required>" in str(m.get("content", "") or "") for m in _recent_msgs if isinstance(m, dict)):
|
|
28043
|
+
self._append_plan_guidance_bubble(_force_tw_msg, summary="action required: create subtasks first")
|
|
28044
|
+
return # Wait for TodoWrite before doing other checks
|
|
26284
28045
|
# Heuristic: check if tool results indicate step completion
|
|
26285
28046
|
step_content = str(current.get("full_content", "") or current.get("content", "") or "").lower()
|
|
26286
28047
|
phase = self._plan_step_phase_hint(step_content)
|
|
@@ -26292,23 +28053,57 @@ body{padding:18px}
|
|
|
26292
28053
|
str(r.get("name", "")) == "bash" and r.get("ok", False)
|
|
26293
28054
|
for r in tool_results
|
|
26294
28055
|
)
|
|
26295
|
-
|
|
28056
|
+
validation_ok_current = self._tool_results_have_validation_evidence(current, tool_results)
|
|
28057
|
+
validation_ok_blackboard = self._plan_step_has_blackboard_evidence(current, bb)
|
|
28058
|
+
validation_ok = validation_ok_current or validation_ok_blackboard
|
|
28059
|
+
bb_sig = self._plan_step_blackboard_signals(current, bb)
|
|
28060
|
+
todo_progress_signal = any(
|
|
28061
|
+
isinstance(r, dict) and r.get("ok", False)
|
|
28062
|
+
and str(r.get("name", "")) in ("TodoWrite", "TodoWriteRescue")
|
|
28063
|
+
for r in tool_results
|
|
28064
|
+
)
|
|
26296
28065
|
# Auto-advance conditions:
|
|
26297
28066
|
should_advance = False
|
|
28067
|
+
_gate_blocked = False # True when validation gate fired and blocked — no other path may advance
|
|
26298
28068
|
# Priority 1: Check if worker subtasks are all completed (most reliable signal)
|
|
26299
28069
|
subtasks_done = self._step_subtasks_all_completed(current)
|
|
26300
|
-
if subtasks_done
|
|
26301
|
-
|
|
26302
|
-
|
|
26303
|
-
|
|
26304
|
-
|
|
26305
|
-
|
|
26306
|
-
|
|
26307
|
-
|
|
26308
|
-
|
|
26309
|
-
|
|
28070
|
+
if subtasks_done:
|
|
28071
|
+
# Validation gate always fires when subtasks are done — even if validation_ok is False.
|
|
28072
|
+
# For research/design phases the gate passes immediately; for implement/test it requires
|
|
28073
|
+
# a successful bash run. This ensures single mode proactively requests verification.
|
|
28074
|
+
_gate_ok = self._single_mode_validation_gate(current, tool_results)
|
|
28075
|
+
if _gate_ok:
|
|
28076
|
+
if validation_ok:
|
|
28077
|
+
should_advance = True
|
|
28078
|
+
elif todo_progress_signal and self._step_has_accumulated_evidence(current, bb):
|
|
28079
|
+
# Accumulated evidence path: subtasks done + TodoWrite progress + history
|
|
28080
|
+
should_advance = True
|
|
28081
|
+
else:
|
|
28082
|
+
_gate_blocked = True # Gate blocked — disable ALL remaining advancement paths
|
|
28083
|
+
# Priority 2: Phase-based heuristics — BUT gate by subtask completion when subtasks exist
|
|
28084
|
+
# CRITICAL: A single write_file must NOT advance when 3+ subtasks remain
|
|
28085
|
+
# Skipped when validation gate has blocked advancement (subtasks_done + gate failed)
|
|
28086
|
+
if not should_advance and not _gate_blocked:
|
|
28087
|
+
_has_subtasks_s = bool(self._active_plan_worker_todo_rows(
|
|
28088
|
+
str(current.get("id", "") or ""), role=""
|
|
28089
|
+
))
|
|
28090
|
+
_can_use_phase_heuristic = subtasks_done or not _has_subtasks_s
|
|
28091
|
+
if _can_use_phase_heuristic:
|
|
28092
|
+
if phase in ("research", "design") and validation_ok:
|
|
28093
|
+
should_advance = True
|
|
28094
|
+
elif phase == "implement" and (
|
|
28095
|
+
(wrote_files and validation_ok_current)
|
|
28096
|
+
or (bb_sig["has_write"] and validation_ok_blackboard)
|
|
28097
|
+
):
|
|
28098
|
+
should_advance = True
|
|
28099
|
+
elif phase in ("test", "review") and (
|
|
28100
|
+
(ran_bash_ok and validation_ok_current)
|
|
28101
|
+
or ((bb_sig["has_exec"] or bb_sig["has_review"]) and validation_ok_blackboard)
|
|
28102
|
+
):
|
|
28103
|
+
should_advance = True
|
|
26310
28104
|
# Also check if the agent explicitly mentioned step completion
|
|
26311
|
-
|
|
28105
|
+
# Also blocked by validation gate when subtasks_done path was blocked
|
|
28106
|
+
if not should_advance and not _gate_blocked:
|
|
26312
28107
|
# Check last assistant message for step completion signals
|
|
26313
28108
|
last_text = ""
|
|
26314
28109
|
for msg in reversed(self.messages[-3:]):
|
|
@@ -26327,7 +28122,25 @@ body{padding:18px}
|
|
|
26327
28122
|
except Exception:
|
|
26328
28123
|
pass
|
|
26329
28124
|
else:
|
|
28125
|
+
self._inject_rework_if_needed(current, {"tool_results": tool_results})
|
|
26330
28126
|
self._sync_todos_from_blackboard(reason="single-agent-round")
|
|
28127
|
+
if todo_progress_signal and not subtasks_done:
|
|
28128
|
+
step_rows = self._active_plan_worker_todo_rows(str(current.get("id", "") or ""), role="")
|
|
28129
|
+
next_row = next(
|
|
28130
|
+
(r for r in step_rows if str(r.get("status", "") or "").strip().lower() == "in_progress"),
|
|
28131
|
+
None,
|
|
28132
|
+
)
|
|
28133
|
+
focus_text = trim(str((next_row or {}).get("content", "") or "").strip(), 180)
|
|
28134
|
+
if focus_text:
|
|
28135
|
+
focus_msg = (
|
|
28136
|
+
"<todo-focus>"
|
|
28137
|
+
f"Continue ONLY the current in_progress subtask: {focus_text}. "
|
|
28138
|
+
"Do not branch away from the active plan step."
|
|
28139
|
+
"</todo-focus>"
|
|
28140
|
+
)
|
|
28141
|
+
recent = self.messages[-6:]
|
|
28142
|
+
if not any(str(msg.get("content", "") or "").strip() == focus_msg for msg in recent if isinstance(msg, dict)):
|
|
28143
|
+
self._append_plan_guidance_bubble(focus_msg, summary="todo focus: continue current subtask")
|
|
26331
28144
|
|
|
26332
28145
|
def _todo_project_rows_from_blackboard(self, board: dict | None = None) -> list[dict]:
|
|
26333
28146
|
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
@@ -26337,7 +28150,9 @@ body{padding:18px}
|
|
|
26337
28150
|
rows = []
|
|
26338
28151
|
for todo in todos:
|
|
26339
28152
|
s = todo.get("status", "pending")
|
|
26340
|
-
c = todo.get("content", "")
|
|
28153
|
+
c = normalize_embedded_newlines(todo.get("content", "") or "")
|
|
28154
|
+
if str(todo.get("category", "") or "") == "plan_step" and "\n" in c:
|
|
28155
|
+
c = c.split("\n", 1)[0].strip()
|
|
26341
28156
|
ev = todo.get("evidence", "")
|
|
26342
28157
|
af = {
|
|
26343
28158
|
"in_progress": self._ui_text("todo_working", content=c),
|
|
@@ -26350,12 +28165,9 @@ body{padding:18px}
|
|
|
26350
28165
|
if bool(self.runtime_reclassify_required):
|
|
26351
28166
|
return
|
|
26352
28167
|
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
26353
|
-
|
|
26354
|
-
has_plan_steps =
|
|
26355
|
-
|
|
26356
|
-
for t in (bb.get("project_todos", []) if isinstance(bb.get("project_todos"), list) else [])
|
|
26357
|
-
)
|
|
26358
|
-
if not self._is_multi_agent_mode() and not has_plan_steps:
|
|
28168
|
+
route_kind = self._todo_route_kind(board=bb)
|
|
28169
|
+
has_plan_steps = route_kind in {"plan_single", "plan_sync"}
|
|
28170
|
+
if route_kind == "pure_single":
|
|
26359
28171
|
return
|
|
26360
28172
|
self._init_project_todos(bb)
|
|
26361
28173
|
self._update_project_todo_status(bb)
|
|
@@ -26367,25 +28179,25 @@ body{padding:18px}
|
|
|
26367
28179
|
pass
|
|
26368
28180
|
system_rows = self._todo_project_rows_from_blackboard(bb)
|
|
26369
28181
|
existing = self.todo.snapshot()
|
|
28182
|
+
bridged_flat_rows = False
|
|
26370
28183
|
worker_rows: list[dict] = []
|
|
26371
28184
|
non_system_rows: list[dict] = []
|
|
26372
|
-
|
|
26373
|
-
|
|
26374
|
-
|
|
26375
|
-
|
|
26376
|
-
|
|
26377
|
-
|
|
26378
|
-
|
|
26379
|
-
|
|
26380
|
-
|
|
26381
|
-
|
|
26382
|
-
|
|
26383
|
-
|
|
26384
|
-
|
|
26385
|
-
|
|
26386
|
-
|
|
26387
|
-
|
|
26388
|
-
non_system_rows = []
|
|
28185
|
+
if route_kind == "plan_single":
|
|
28186
|
+
worker_rows = self._todo_route_rows(route_kind, rows=existing, board=bb)
|
|
28187
|
+
if not worker_rows:
|
|
28188
|
+
flat_rows = self._todo_route_rows("pure_single", rows=existing, board=bb)
|
|
28189
|
+
bridged_rows, bridged_flat_rows = self._bridge_flat_todos_to_active_plan_step(flat_rows, board=bb)
|
|
28190
|
+
if bridged_flat_rows:
|
|
28191
|
+
worker_rows = self._todo_route_rows(route_kind, rows=bridged_rows, board=bb)
|
|
28192
|
+
elif route_kind == "plan_sync":
|
|
28193
|
+
worker_rows = self._todo_route_rows(route_kind, rows=existing, board=bb)
|
|
28194
|
+
elif route_kind == "pure_sync":
|
|
28195
|
+
worker_rows = self._todo_route_rows(
|
|
28196
|
+
route_kind,
|
|
28197
|
+
rows=existing,
|
|
28198
|
+
role=self._todo_worker_role_hint(board=bb),
|
|
28199
|
+
board=bb,
|
|
28200
|
+
)
|
|
26389
28201
|
# Smart trim: keep all active (in_progress/pending) system rows,
|
|
26390
28202
|
# but only recent 3 completed system rows to save capacity for worker subtasks
|
|
26391
28203
|
active_system = [r for r in system_rows if r.get("status") != "completed"]
|
|
@@ -26449,6 +28261,11 @@ body{padding:18px}
|
|
|
26449
28261
|
todo_out = self.todo.update(merged)
|
|
26450
28262
|
except Exception:
|
|
26451
28263
|
return
|
|
28264
|
+
if bridged_flat_rows:
|
|
28265
|
+
self._emit(
|
|
28266
|
+
"status",
|
|
28267
|
+
{"summary": "flat todos attached to current plan step"},
|
|
28268
|
+
)
|
|
26452
28269
|
if todo_out != self.todo.no_changes_text() and reason:
|
|
26453
28270
|
self._emit(
|
|
26454
28271
|
"status",
|
|
@@ -26781,7 +28598,7 @@ body{padding:18px}
|
|
|
26781
28598
|
task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
|
|
26782
28599
|
if task_type in TASK_PROFILE_TYPES:
|
|
26783
28600
|
merged["task_type"] = task_type
|
|
26784
|
-
complexity =
|
|
28601
|
+
complexity = normalize_task_complexity(row.get("complexity", ""), default="")
|
|
26785
28602
|
if complexity in TASK_COMPLEXITY_LEVELS:
|
|
26786
28603
|
merged["complexity"] = complexity
|
|
26787
28604
|
scale = trim(str(row.get("scale_preference", "") or "").strip().lower(), 20)
|
|
@@ -26826,7 +28643,7 @@ body{padding:18px}
|
|
|
26826
28643
|
def _fallback_task_level_decision(self, goal_text: str) -> dict:
|
|
26827
28644
|
profile = self._infer_task_profile(goal_text)
|
|
26828
28645
|
task_type = str(profile.get("task_type", "general") or "general")
|
|
26829
|
-
complexity =
|
|
28646
|
+
complexity = normalize_task_complexity(profile.get("complexity", "simple"), default="simple")
|
|
26830
28647
|
low = str(goal_text or "").lower()
|
|
26831
28648
|
inherit_previous_state = False
|
|
26832
28649
|
if bool(self.runtime_goal_reset_pending):
|
|
@@ -26945,9 +28762,9 @@ body{padding:18px}
|
|
|
26945
28762
|
level = 3
|
|
26946
28763
|
if task_type == "simple_qa":
|
|
26947
28764
|
level = 1 if len(str(goal_text or "")) <= 180 else 2
|
|
26948
|
-
elif complexity
|
|
28765
|
+
elif task_complexity_rank(complexity) <= task_complexity_rank("simple") and task_type in {"general"}:
|
|
26949
28766
|
level = 2
|
|
26950
|
-
elif complexity
|
|
28767
|
+
elif task_complexity_rank(complexity) <= task_complexity_rank("moderate"):
|
|
26951
28768
|
level = 3
|
|
26952
28769
|
elif any(tok in low for tok in ("system-level", "系统级", "blackboard", "orchestrator", "内核", "基础设施")):
|
|
26953
28770
|
level = 5
|
|
@@ -27009,7 +28826,9 @@ body{padding:18px}
|
|
|
27009
28826
|
"SCALE PREFERENCE: Infer fast|balanced|thorough from user wording. "
|
|
27010
28827
|
"User-stated preference overrides your default strategy. "
|
|
27011
28828
|
"Budget controls internal depth/compactness, NOT early-stop messaging to user.\n\n"
|
|
27012
|
-
"
|
|
28829
|
+
"CRITICAL OUTPUT CONTRACT: You MUST output exactly one classify_task_level tool call and no plain-text answer. "
|
|
28830
|
+
"A prose-only response is invalid and will be discarded.\n"
|
|
28831
|
+
"The tool call must include concise judgement, inherit_previous_state, "
|
|
27013
28832
|
"and semantic_confidence (high|medium|low). "
|
|
27014
28833
|
"Use low confidence only when semantic ambiguity is substantial, then set low_confidence_reason briefly.\n"
|
|
27015
28834
|
f"{model_language_instruction(self.ui_language)}"
|
|
@@ -27021,6 +28840,28 @@ body{padding:18px}
|
|
|
27021
28840
|
)
|
|
27022
28841
|
return base
|
|
27023
28842
|
|
|
28843
|
+
def _extract_classify_task_level_row(self, response: dict | None) -> dict:
|
|
28844
|
+
if not isinstance(response, dict):
|
|
28845
|
+
return {}
|
|
28846
|
+
tool_calls = response.get("tool_calls", []) if isinstance(response.get("tool_calls", []), list) else []
|
|
28847
|
+
for tc in tool_calls:
|
|
28848
|
+
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
|
|
28849
|
+
if str(fn.get("name", "") or "").strip() != "classify_task_level":
|
|
28850
|
+
continue
|
|
28851
|
+
args = fn.get("arguments", {}) if isinstance(fn, dict) else {}
|
|
28852
|
+
if isinstance(args, dict):
|
|
28853
|
+
return dict(args)
|
|
28854
|
+
if isinstance(args, str):
|
|
28855
|
+
parsed, _ = parse_tool_arguments_with_error(args)
|
|
28856
|
+
if isinstance(parsed, dict):
|
|
28857
|
+
return dict(parsed)
|
|
28858
|
+
content = str(response.get("content", "") or "").strip()
|
|
28859
|
+
if content:
|
|
28860
|
+
parsed, _ = parse_tool_arguments_with_error(content)
|
|
28861
|
+
if isinstance(parsed, dict) and parsed.get("level") is not None:
|
|
28862
|
+
return dict(parsed)
|
|
28863
|
+
return {}
|
|
28864
|
+
|
|
27024
28865
|
def _skill_aware_reeval_task_level(
|
|
27025
28866
|
self,
|
|
27026
28867
|
goal_text: str,
|
|
@@ -27214,7 +29055,7 @@ body{padding:18px}
|
|
|
27214
29055
|
if low_confidence_mode:
|
|
27215
29056
|
rule_profile = self._infer_task_profile(goal_text)
|
|
27216
29057
|
fallback_task_type = str(rule_profile.get("task_type", "general") or "general")
|
|
27217
|
-
fallback_complexity =
|
|
29058
|
+
fallback_complexity = normalize_task_complexity(rule_profile.get("complexity", "simple"), default="simple")
|
|
27218
29059
|
fallback_objective = trim(str(rule_profile.get("direct_objective", "") or ""), 800)
|
|
27219
29060
|
else:
|
|
27220
29061
|
board_now = self._ensure_blackboard()
|
|
@@ -27225,12 +29066,10 @@ body{padding:18px}
|
|
|
27225
29066
|
)
|
|
27226
29067
|
if fallback_task_type not in TASK_PROFILE_TYPES:
|
|
27227
29068
|
fallback_task_type = "general"
|
|
27228
|
-
fallback_complexity =
|
|
27229
|
-
|
|
27230
|
-
|
|
29069
|
+
fallback_complexity = normalize_task_complexity(
|
|
29070
|
+
self.runtime_task_complexity or board_profile.get("complexity", "simple") or "simple",
|
|
29071
|
+
default="simple",
|
|
27231
29072
|
)
|
|
27232
|
-
if fallback_complexity not in TASK_COMPLEXITY_LEVELS:
|
|
27233
|
-
fallback_complexity = "simple"
|
|
27234
29073
|
fallback_objective = trim(
|
|
27235
29074
|
str(self.runtime_direct_objective or board_profile.get("direct_objective", "") or "").strip(),
|
|
27236
29075
|
800,
|
|
@@ -27242,22 +29081,20 @@ body{padding:18px}
|
|
|
27242
29081
|
task_type = trim(str(row.get("task_type", "") or "").strip().lower(), 40)
|
|
27243
29082
|
if task_type not in TASK_PROFILE_TYPES:
|
|
27244
29083
|
task_type = fallback_task_type
|
|
27245
|
-
complexity =
|
|
27246
|
-
if complexity not in TASK_COMPLEXITY_LEVELS:
|
|
27247
|
-
complexity = fallback_complexity
|
|
29084
|
+
complexity = normalize_task_complexity(row.get("complexity", ""), default=fallback_complexity)
|
|
27248
29085
|
if explicit_complexity in TASK_COMPLEXITY_LEVELS:
|
|
27249
|
-
complexity = explicit_complexity
|
|
29086
|
+
complexity = normalize_task_complexity(explicit_complexity, default=fallback_complexity)
|
|
27250
29087
|
elif preserve_existing_complexity and previous_complexity in TASK_COMPLEXITY_LEVELS:
|
|
27251
|
-
complexity = previous_complexity
|
|
29088
|
+
complexity = normalize_task_complexity(previous_complexity, default=fallback_complexity)
|
|
27252
29089
|
low_confidence_reason = trim(str(row.get("low_confidence_reason", "") or "").strip(), 220)
|
|
27253
29090
|
judgement = trim(str(row.get("judgement", "") or "").strip(), 200) or "manager classified task level"
|
|
27254
29091
|
objective = trim(str(row.get("direct_objective", "") or "").strip(), 800)
|
|
27255
29092
|
if not objective:
|
|
27256
29093
|
objective = fallback_objective
|
|
27257
29094
|
_prev_level_val = int(getattr(self, '_prev_applied_task_level', 0) or 0)
|
|
27258
|
-
_complexity_floor =
|
|
27259
|
-
if _complexity_floor
|
|
27260
|
-
complexity =
|
|
29095
|
+
_complexity_floor = normalize_task_complexity(getattr(self, 'runtime_complexity_floor', '') or '', default="")
|
|
29096
|
+
if _complexity_floor in TASK_COMPLEXITY_LEVELS and task_complexity_rank(complexity) < task_complexity_rank(_complexity_floor):
|
|
29097
|
+
complexity = _complexity_floor
|
|
27261
29098
|
self.runtime_task_level = int(level)
|
|
27262
29099
|
self._prev_applied_task_level = int(level)
|
|
27263
29100
|
self.runtime_execution_mode = mode
|
|
@@ -27449,34 +29286,50 @@ body{padding:18px}
|
|
|
27449
29286
|
retries=max(1, int(MODEL_OUTPUT_RETRY_TIMES)),
|
|
27450
29287
|
media_inputs=media_inputs_round,
|
|
27451
29288
|
)
|
|
27452
|
-
|
|
27453
|
-
|
|
27454
|
-
|
|
27455
|
-
|
|
27456
|
-
|
|
27457
|
-
|
|
27458
|
-
|
|
27459
|
-
|
|
27460
|
-
|
|
27461
|
-
|
|
27462
|
-
|
|
27463
|
-
)
|
|
27464
|
-
|
|
27465
|
-
|
|
27466
|
-
|
|
27467
|
-
|
|
27468
|
-
|
|
27469
|
-
|
|
27470
|
-
|
|
27471
|
-
|
|
27472
|
-
|
|
27473
|
-
|
|
27474
|
-
|
|
27475
|
-
|
|
29289
|
+
row = self._extract_classify_task_level_row(response)
|
|
29290
|
+
if not row:
|
|
29291
|
+
repair_prompt = (
|
|
29292
|
+
"Previous answer was invalid because it did not produce a valid classify_task_level tool call. "
|
|
29293
|
+
"Retry now. Output exactly one classify_task_level tool call and no prose."
|
|
29294
|
+
)
|
|
29295
|
+
repair_response = self._chat_with_same_model_retry(
|
|
29296
|
+
[
|
|
29297
|
+
{"role": "user", "content": prompt, "ts": now_ts()},
|
|
29298
|
+
{"role": "user", "content": repair_prompt, "ts": now_ts()},
|
|
29299
|
+
],
|
|
29300
|
+
tools=self._manager_task_classify_tools(),
|
|
29301
|
+
system=self._manager_classification_system_prompt(),
|
|
29302
|
+
max_tokens=220,
|
|
29303
|
+
think=False,
|
|
29304
|
+
stream_thinking=False,
|
|
29305
|
+
on_thinking_chunk=self._append_live_thinking,
|
|
29306
|
+
pinned_selection=pinned_selection,
|
|
29307
|
+
context_label="manager classify repair",
|
|
29308
|
+
retries=1,
|
|
29309
|
+
media_inputs=media_inputs_round,
|
|
29310
|
+
)
|
|
29311
|
+
row = self._extract_classify_task_level_row(repair_response)
|
|
29312
|
+
if row:
|
|
29313
|
+
row["inherit_previous_state"] = _to_bool_like(
|
|
29314
|
+
row.get("inherit_previous_state", False),
|
|
29315
|
+
default=False,
|
|
29316
|
+
)
|
|
29317
|
+
row["semantic_confidence"] = self._normalize_semantic_confidence(
|
|
29318
|
+
row.get("semantic_confidence", "medium"),
|
|
29319
|
+
default="medium",
|
|
29320
|
+
)
|
|
29321
|
+
if str(row.get("semantic_confidence", "medium")) == "low":
|
|
29322
|
+
# Skill-aware re-evaluation before falling back to keyword heuristic
|
|
29323
|
+
reeval_row = self._skill_aware_reeval_task_level(goal_text, row, pinned_selection)
|
|
29324
|
+
fallback_row = self._fallback_task_level_decision(goal_text)
|
|
29325
|
+
merged = self._merge_task_decision_for_low_confidence(reeval_row, fallback_row)
|
|
29326
|
+
return merged
|
|
29327
|
+
row["source"] = "manager"
|
|
29328
|
+
return row
|
|
27476
29329
|
row = self._fallback_task_level_decision(goal_text)
|
|
27477
29330
|
row["source"] = "fallback-no-toolcall"
|
|
27478
29331
|
row["semantic_confidence"] = "low"
|
|
27479
|
-
row["low_confidence_reason"] = "manager classifier returned no valid tool call"
|
|
29332
|
+
row["low_confidence_reason"] = "manager classifier returned no valid classify_task_level tool call"
|
|
27480
29333
|
return row
|
|
27481
29334
|
|
|
27482
29335
|
# ------------------------------------------------------------------
|
|
@@ -27923,6 +29776,7 @@ body{padding:18px}
|
|
|
27923
29776
|
"IMPORTANT: Previous fix attempts FAILED. You MUST change your approach — "
|
|
27924
29777
|
"do NOT repeat the same instruction. Include the exact error output in your delegation. "
|
|
27925
29778
|
)
|
|
29779
|
+
html_hint = self._html_frontend_boost_instruction()
|
|
27926
29780
|
# Loaded skills constraint for manager
|
|
27927
29781
|
skills_constraint = self._loaded_skills_prompt_hint(for_role="manager")
|
|
27928
29782
|
bb_skills = board.get("loaded_skills", {})
|
|
@@ -27957,6 +29811,7 @@ body{padding:18px}
|
|
|
27957
29811
|
f"{todo_route_note}"
|
|
27958
29812
|
f"{phase_hint}"
|
|
27959
29813
|
f"{failure_hint}"
|
|
29814
|
+
f"{html_hint}"
|
|
27960
29815
|
f"{skills_constraint}"
|
|
27961
29816
|
f"Level={level}, mode={mode}, progress={progress}, "
|
|
27962
29817
|
f"budget={'unlimited' if int(budget) <= 0 else int(budget)}, "
|
|
@@ -28282,7 +30137,7 @@ body{padding:18px}
|
|
|
28282
30137
|
"reason": "conclusive-reply-detected",
|
|
28283
30138
|
"source": "fallback",
|
|
28284
30139
|
}
|
|
28285
|
-
if complexity
|
|
30140
|
+
if task_complexity_rank(complexity) <= task_complexity_rank("moderate") and task_type == "simple_code":
|
|
28286
30141
|
if has_error_log:
|
|
28287
30142
|
return {
|
|
28288
30143
|
"target": "developer",
|
|
@@ -28420,6 +30275,10 @@ body{padding:18px}
|
|
|
28420
30275
|
task_type_low = str(row.get("task_type", "") or "").strip().lower()
|
|
28421
30276
|
# 5a: Merge in-memory routes with persisted routes for detection
|
|
28422
30277
|
bb_for_routes = self._ensure_blackboard()
|
|
30278
|
+
current_progress_fp = self._watchdog_state_fingerprint(bb_for_routes)
|
|
30279
|
+
last_delegate = bb_for_routes.get("last_delegate", {}) if isinstance(bb_for_routes.get("last_delegate"), dict) else {}
|
|
30280
|
+
last_progress_fp = trim(str(last_delegate.get("progress_fp", "") or "").strip(), 80)
|
|
30281
|
+
no_progress_since_last_delegate = bool(last_progress_fp and last_progress_fp == current_progress_fp)
|
|
28423
30282
|
persisted_routes = bb_for_routes.get("persisted_manager_routes", [])
|
|
28424
30283
|
if not isinstance(persisted_routes, list):
|
|
28425
30284
|
persisted_routes = []
|
|
@@ -28431,22 +30290,16 @@ body{padding:18px}
|
|
|
28431
30290
|
if (
|
|
28432
30291
|
isinstance(deleg, dict)
|
|
28433
30292
|
and str(deleg.get("target", "") or "").strip().lower() == target
|
|
30293
|
+
and (
|
|
30294
|
+
not str(deleg.get("progress_fp", "") or "").strip()
|
|
30295
|
+
or str(deleg.get("progress_fp", "") or "").strip() == current_progress_fp
|
|
30296
|
+
)
|
|
28434
30297
|
and int(deleg.get("count", 0) or 0) >= 3
|
|
28435
30298
|
):
|
|
28436
|
-
|
|
28437
|
-
|
|
28438
|
-
|
|
28439
|
-
|
|
28440
|
-
f"Anti-stall: delegation to '{target}' repeated {deleg.get('count')} times with same instruction. "
|
|
28441
|
-
"Forcing finish to break loop."
|
|
28442
|
-
)
|
|
28443
|
-
else:
|
|
28444
|
-
row["target"] = alt_targets[0] if alt_targets else "developer"
|
|
28445
|
-
row["instruction"] = (
|
|
28446
|
-
f"Anti-stall: delegation to '{target}' repeated {deleg.get('count')} times. "
|
|
28447
|
-
f"Switching to {row['target']} with fresh approach."
|
|
28448
|
-
)
|
|
28449
|
-
row["reason"] = f"{row.get('reason', '')}|anti-stall-repeated-delegation"
|
|
30299
|
+
if not no_progress_since_last_delegate:
|
|
30300
|
+
continue
|
|
30301
|
+
row = self._manager_recovery_route_for_repeated_delegate(row, board=bb_for_routes)
|
|
30302
|
+
row["reason"] = trim(f"{row.get('reason', '')}|anti-stall-repeated-delegation", 600)
|
|
28450
30303
|
row["source"] = "anti-stall"
|
|
28451
30304
|
return row
|
|
28452
30305
|
if task_type_low in ("simple_code", "engineering") and target == "explorer":
|
|
@@ -28469,7 +30322,7 @@ body{padding:18px}
|
|
|
28469
30322
|
if target not in AGENT_ROLES:
|
|
28470
30323
|
return row
|
|
28471
30324
|
recent = [str(x.get("target", "") or "").strip().lower() for x in merged_routes[-4:]]
|
|
28472
|
-
if len(recent) >= 3 and recent[-1] == target and recent[-2] == target and recent[-3] == target:
|
|
30325
|
+
if no_progress_since_last_delegate and len(recent) >= 3 and recent[-1] == target and recent[-2] == target and recent[-3] == target:
|
|
28473
30326
|
board = bb_for_routes
|
|
28474
30327
|
low_reason = str(row.get("reason", "") or "").strip().lower()
|
|
28475
30328
|
if "summary" in low_reason and len(board.get("code_artifacts", {}) or {}) > 0:
|
|
@@ -28514,7 +30367,7 @@ body{padding:18px}
|
|
|
28514
30367
|
row["reason"] = f"{row.get('reason', '')}|anti-stall->developer-suggest"
|
|
28515
30368
|
row["source"] = "anti-stall"
|
|
28516
30369
|
return row
|
|
28517
|
-
if len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
|
|
30370
|
+
if no_progress_since_last_delegate and len(recent) == 4 and recent[0] == recent[2] and recent[1] == recent[3] and recent[0] != recent[1]:
|
|
28518
30371
|
board = bb_for_routes
|
|
28519
30372
|
if len(board.get("code_artifacts", {}) or {}) > 0:
|
|
28520
30373
|
row["target"] = "finish"
|
|
@@ -28595,9 +30448,7 @@ body{padding:18px}
|
|
|
28595
30448
|
task_type = trim(str(row.get("task_type", default_type) or "").strip().lower(), 40) or default_type
|
|
28596
30449
|
if task_type not in TASK_PROFILE_TYPES:
|
|
28597
30450
|
task_type = default_type
|
|
28598
|
-
complexity =
|
|
28599
|
-
if complexity not in TASK_COMPLEXITY_LEVELS:
|
|
28600
|
-
complexity = default_complexity
|
|
30451
|
+
complexity = normalize_task_complexity(row.get("complexity", default_complexity) or default_complexity, default=default_complexity)
|
|
28601
30452
|
scale_preference = trim(
|
|
28602
30453
|
str(row.get("scale_preference", profile.get("scale_preference", self.runtime_scale_preference)) or "").strip().lower(),
|
|
28603
30454
|
20,
|
|
@@ -29018,6 +30869,19 @@ body{padding:18px}
|
|
|
29018
30869
|
seen.add(low_tail)
|
|
29019
30870
|
keep_lines.append(tail)
|
|
29020
30871
|
continue
|
|
30872
|
+
if low.startswith("tasks to complete:"):
|
|
30873
|
+
continue
|
|
30874
|
+
if re.match(r"^\d+(?:\.\d+)*[.)]\s+", s):
|
|
30875
|
+
continue
|
|
30876
|
+
if re.match(r"^[-*]\s+", s):
|
|
30877
|
+
continue
|
|
30878
|
+
if re.match(
|
|
30879
|
+
r"(?i)^(mkdir\s+-p|run:|create directories:|create project|create directory|initialize project|cmake\b|python\s+-m\s+venv\b|npx\b)",
|
|
30880
|
+
s,
|
|
30881
|
+
):
|
|
30882
|
+
continue
|
|
30883
|
+
if re.match(r"^(创建|初始化|运行|目录结构|项目根目录结构)[::]?", s):
|
|
30884
|
+
continue
|
|
29021
30885
|
norm = re.sub(r"\s+", " ", s).strip().lower()
|
|
29022
30886
|
if norm and norm not in seen:
|
|
29023
30887
|
seen.add(norm)
|
|
@@ -29375,6 +31239,7 @@ body{padding:18px}
|
|
|
29375
31239
|
"instruction": instruction,
|
|
29376
31240
|
"reason": trim(str(route.get("reason", "") or "").strip(), 600),
|
|
29377
31241
|
"source": trim(str(route.get("source", "") or "").strip(), 40),
|
|
31242
|
+
"progress_fp": self._watchdog_state_fingerprint(board),
|
|
29378
31243
|
"task_level": int(task_level),
|
|
29379
31244
|
"execution_mode": execution_mode,
|
|
29380
31245
|
"task_type": task_type,
|
|
@@ -29459,8 +31324,9 @@ body{padding:18px}
|
|
|
29459
31324
|
profile["task_type"] = task_type
|
|
29460
31325
|
if complexity in TASK_COMPLEXITY_LEVELS:
|
|
29461
31326
|
# Floor protection: if plan mode set a floor, do not allow downgrade
|
|
29462
|
-
|
|
29463
|
-
|
|
31327
|
+
_route_complexity_floor = normalize_task_complexity(self.runtime_complexity_floor, default="")
|
|
31328
|
+
if _route_complexity_floor in TASK_COMPLEXITY_LEVELS and task_complexity_rank(complexity) < task_complexity_rank(_route_complexity_floor):
|
|
31329
|
+
complexity = _route_complexity_floor
|
|
29464
31330
|
profile["complexity"] = complexity
|
|
29465
31331
|
profile["scale_preference"] = scale_preference if scale_preference in TASK_SCALE_PREFERENCES else "balanced"
|
|
29466
31332
|
if objective:
|
|
@@ -29830,8 +31696,25 @@ body{padding:18px}
|
|
|
29830
31696
|
)
|
|
29831
31697
|
self._emit("status", {"summary": f"reviewer finish blocked: {gate_reason}"})
|
|
29832
31698
|
else:
|
|
31699
|
+
bb_finish = self._ensure_blackboard()
|
|
31700
|
+
profile_finish = self._ensure_blackboard_task_profile(bb_finish)
|
|
31701
|
+
exec_mode = normalize_execution_mode(
|
|
31702
|
+
profile_finish.get("execution_mode", self._effective_execution_mode()),
|
|
31703
|
+
default=self._effective_execution_mode(),
|
|
31704
|
+
)
|
|
29833
31705
|
approval_note = summary_arg or output or "finish tool acknowledged"
|
|
29834
|
-
|
|
31706
|
+
if exec_mode == EXECUTION_MODE_SYNC:
|
|
31707
|
+
self._blackboard_append_section(
|
|
31708
|
+
"execution_logs",
|
|
31709
|
+
role_key,
|
|
31710
|
+
(
|
|
31711
|
+
"finish requested but deferred: sync mode requires reviewer pass before approval.\n"
|
|
31712
|
+
f"summary: {approval_note}"
|
|
31713
|
+
),
|
|
31714
|
+
)
|
|
31715
|
+
self._emit("status", {"summary": "finish deferred: sync mode requires reviewer approval"})
|
|
31716
|
+
else:
|
|
31717
|
+
self._blackboard_mark_approved(approval_note, role_key)
|
|
29835
31718
|
if not ok and output:
|
|
29836
31719
|
self._blackboard_append_section(
|
|
29837
31720
|
"execution_logs",
|
|
@@ -29869,6 +31752,7 @@ body{padding:18px}
|
|
|
29869
31752
|
role_key = self._sanitize_agent_role(role)
|
|
29870
31753
|
status = str((step or {}).get("status", "") or "")
|
|
29871
31754
|
text = trim(str((step or {}).get("text", "") or "").strip(), BLACKBOARD_MAX_TEXT)
|
|
31755
|
+
tool_results = (step or {}).get("tool_results", []) if isinstance((step or {}).get("tool_results"), list) else []
|
|
29872
31756
|
if role_key and text:
|
|
29873
31757
|
board = self._ensure_blackboard()
|
|
29874
31758
|
board["last_worker_reply"] = {
|
|
@@ -29889,7 +31773,28 @@ body{padding:18px}
|
|
|
29889
31773
|
self._blackboard_set_status("REVIEWING")
|
|
29890
31774
|
if self._reviewer_deems_done(text):
|
|
29891
31775
|
self._blackboard_mark_approved(text, role_key)
|
|
29892
|
-
|
|
31776
|
+
explicit_todo_write = any(
|
|
31777
|
+
isinstance(item, dict) and str(item.get("name", "") or "") in {"TodoWrite", "TodoWriteRescue"}
|
|
31778
|
+
for item in tool_results
|
|
31779
|
+
)
|
|
31780
|
+
if role_key and not explicit_todo_write:
|
|
31781
|
+
source_text = text or self._latest_agent_assistant_text(role_key)
|
|
31782
|
+
if re.search(r"(?m)^\s*(?:[-*•>]+\s*)?\[(?: |>|x)\]\s+\S", source_text or ""):
|
|
31783
|
+
board = self._ensure_blackboard()
|
|
31784
|
+
step_id = trim(str((self._get_active_plan_step(board) or {}).get("id", "") or ""), 20)
|
|
31785
|
+
parsed_rows = extract_todo_rows_from_text(
|
|
31786
|
+
source_text,
|
|
31787
|
+
default_parent_step_id=step_id,
|
|
31788
|
+
limit=12,
|
|
31789
|
+
)
|
|
31790
|
+
if parsed_rows:
|
|
31791
|
+
merged = self._merge_todo_signal_rows(parsed_rows, role=role_key, board=board)
|
|
31792
|
+
if merged != self.todo.no_changes_text():
|
|
31793
|
+
self._emit(
|
|
31794
|
+
"status",
|
|
31795
|
+
{"summary": f"todo synced from canonical {role_key} text"},
|
|
31796
|
+
)
|
|
31797
|
+
for item in tool_results:
|
|
29893
31798
|
if isinstance(item, dict) and bool(item.get("bb_applied", False)):
|
|
29894
31799
|
continue
|
|
29895
31800
|
self._blackboard_update_from_tool_result(role_key, item)
|
|
@@ -30490,6 +32395,7 @@ body{padding:18px}
|
|
|
30490
32395
|
skills_block = self._skills_awareness_block(for_role=role_key)
|
|
30491
32396
|
code_note = self._runtime_code_reference_prompt_block(max_chars=2600)
|
|
30492
32397
|
engineering_note = self._engineering_execution_boost_instruction()
|
|
32398
|
+
html_note = self._html_frontend_boost_instruction()
|
|
30493
32399
|
plan_todo_note = self._plan_todo_discipline_prompt(role=role_key)
|
|
30494
32400
|
base = (
|
|
30495
32401
|
f"You are {self._agent_display_name(role_key)} in a multi-agent coding system. "
|
|
@@ -30498,10 +32404,15 @@ body{padding:18px}
|
|
|
30498
32404
|
f"Structure: flat .js files at $JS_LIB_ROOT/<name>.min.js; "
|
|
30499
32405
|
f"pptxgenjs at $JS_LIB_ROOT/pptxgenjs/dist/pptxgen.cjs.js (CommonJS) or pptxgen.bundle.js (browser). "
|
|
30500
32406
|
f"Do NOT look in node_modules — libs are installed directly under $JS_LIB_ROOT. "
|
|
32407
|
+
"IMPORTANT: '/js_lib/...' is a tool/runtime alias, not a stable final HTML asset URL. "
|
|
32408
|
+
"If an HTML deliverable needs any asset from js_lib, copy it into a task-local relative asset folder "
|
|
32409
|
+
"such as './js/' or './assets/vendor/' next to the deliverable, then reference it with a plain relative path. "
|
|
32410
|
+
"Do not leave '/js_lib/...', '/assets/js_lib/...', or other virtual aliases in final exported HTML. "
|
|
30501
32411
|
"Use blackboard for shared state, ask_colleague for inter-agent communication. "
|
|
30502
32412
|
"Keep outputs concise and action-oriented. "
|
|
30503
32413
|
f"{code_note + ' ' if code_note else ''}"
|
|
30504
32414
|
f"{engineering_note + ' ' if engineering_note else ''}"
|
|
32415
|
+
f"{html_note + ' ' if html_note else ''}"
|
|
30505
32416
|
f"{_detect_os_shell_instruction()} "
|
|
30506
32417
|
f"{model_language_instruction(self.ui_language)} "
|
|
30507
32418
|
)
|
|
@@ -30557,13 +32468,13 @@ body{padding:18px}
|
|
|
30557
32468
|
"The skill's workflow, tools, and file structure OVERRIDE the plan's implementation "
|
|
30558
32469
|
"approach — if the plan says 'use python-pptx' but the skill says 'use PptxGenJS', "
|
|
30559
32470
|
"use PptxGenJS. The skill defines HOW to implement; the plan defines WHAT to do. "
|
|
30560
|
-
"AUTONOMOUS SKILL LOADING: When starting a coding, debugging, or architecture task, "
|
|
30561
|
-
"call list_skills to discover available skills, then load_skill to activate the most relevant ones. "
|
|
30562
|
-
"Load skills BEFORE you start working, not after you're stuck. "
|
|
30563
|
-
"Already-loaded skills appear as <loaded-skill> messages in your context — use them directly without reloading. "
|
|
30564
32471
|
"TODO TRACKING (mandatory): "
|
|
30565
32472
|
"When a plan step is active, follow the current todo subtask order instead of inventing a parallel path. "
|
|
30566
32473
|
"After completing ONE subtask, call TodoWrite immediately — mark that subtask as 'completed' and move the next one to 'in_progress' before doing more work. "
|
|
32474
|
+
"Prefer TodoWrite items as objects with explicit fields: "
|
|
32475
|
+
"{content, status, owner?, parent_step_id?}. "
|
|
32476
|
+
"If you must use strings, use ONLY canonical prefixes: '[ ]', '[>]', '[x]'. "
|
|
32477
|
+
"Do not use emoji markers or free-form localized status labels in TodoWrite payloads. "
|
|
30567
32478
|
"Do not silently batch multiple subtasks and do not delay todo updates until the end of the step. "
|
|
30568
32479
|
"This manual update is critical because skill re-evaluation is triggered by actual todo progress. "
|
|
30569
32480
|
"EDIT METHODOLOGY (follow strictly): "
|
|
@@ -30650,29 +32561,45 @@ body{padding:18px}
|
|
|
30650
32561
|
)
|
|
30651
32562
|
|
|
30652
32563
|
def _todo_write_rescue(self, args: dict) -> str:
|
|
32564
|
+
"""Rescue todo writer — accepts both strings and dicts, auto-normalizes.
|
|
32565
|
+
FIXED: Now preserves status from incoming items (especially 'completed')
|
|
32566
|
+
instead of resetting everything to 'pending'."""
|
|
30653
32567
|
raw_items = args.get("items", [])
|
|
30654
32568
|
if not isinstance(raw_items, list) or not raw_items:
|
|
30655
32569
|
raise ValueError("items must be a non-empty array")
|
|
30656
|
-
limited = raw_items[:7
|
|
32570
|
+
limited = raw_items[:12] # Allow more items (was 7) — plans can have 5+ subtasks
|
|
30657
32571
|
active_step = self._get_active_plan_step()
|
|
30658
32572
|
active_step_id = trim(str((active_step or {}).get("id", "") or ""), 20)
|
|
30659
32573
|
owner_hint = self._current_plan_worker_owner()
|
|
30660
32574
|
clean_items = []
|
|
32575
|
+
_status_alias = {
|
|
32576
|
+
"todo": "pending", "doing": "in_progress", "inprogress": "in_progress",
|
|
32577
|
+
"in-progress": "in_progress", "done": "completed", "finish": "completed",
|
|
32578
|
+
"finished": "completed",
|
|
32579
|
+
}
|
|
30661
32580
|
for idx, item in enumerate(limited):
|
|
30662
32581
|
if isinstance(item, dict):
|
|
30663
32582
|
content = str(item.get("content", item.get("text", item.get("title", "")))).strip()
|
|
30664
32583
|
owner = str(item.get("owner", "") or owner_hint).strip().lower()
|
|
30665
32584
|
parent_step_id = trim(str(item.get("parent_step_id", "") or active_step_id), 20)
|
|
32585
|
+
# Preserve status from incoming dict (critical for subtask state tracking)
|
|
32586
|
+
raw_status = str(item.get("status", item.get("state", "pending"))).strip().lower()
|
|
32587
|
+
status = _status_alias.get(raw_status, raw_status)
|
|
32588
|
+
if status not in {"pending", "in_progress", "completed"}:
|
|
32589
|
+
status = "pending"
|
|
30666
32590
|
else:
|
|
30667
32591
|
content = str(item).strip()
|
|
30668
32592
|
owner = owner_hint
|
|
30669
32593
|
parent_step_id = active_step_id
|
|
32594
|
+
parsed_status, parsed_content = split_todo_status_text(content)
|
|
32595
|
+
status = parsed_status or "pending"
|
|
32596
|
+
content = parsed_content or content
|
|
30670
32597
|
content = normalize_work_text(content) or content
|
|
30671
32598
|
if not content:
|
|
30672
32599
|
continue
|
|
30673
32600
|
row = {
|
|
30674
32601
|
"content": content,
|
|
30675
|
-
"status":
|
|
32602
|
+
"status": status,
|
|
30676
32603
|
}
|
|
30677
32604
|
if owner in {"developer", "explorer", "reviewer"}:
|
|
30678
32605
|
row["owner"] = owner
|
|
@@ -30681,17 +32608,52 @@ body{padding:18px}
|
|
|
30681
32608
|
clean_items.append(row)
|
|
30682
32609
|
if not clean_items:
|
|
30683
32610
|
raise ValueError("no valid todo item text")
|
|
30684
|
-
|
|
30685
|
-
|
|
30686
|
-
|
|
30687
|
-
|
|
30688
|
-
|
|
32611
|
+
# Only apply in_progress_index if NO items already have in_progress status
|
|
32612
|
+
has_in_progress = any(r["status"] == "in_progress" for r in clean_items)
|
|
32613
|
+
if not has_in_progress:
|
|
32614
|
+
in_progress_index = int(args.get("in_progress_index", 0) or 0)
|
|
32615
|
+
if in_progress_index < 0 or in_progress_index >= len(clean_items):
|
|
32616
|
+
in_progress_index = 0
|
|
32617
|
+
# Only set in_progress on a pending item
|
|
32618
|
+
for i, r in enumerate(clean_items):
|
|
32619
|
+
if r["status"] == "pending":
|
|
32620
|
+
if i >= in_progress_index:
|
|
32621
|
+
r["status"] = "in_progress"
|
|
32622
|
+
break
|
|
32623
|
+
route_kind = self._todo_route_kind(role=owner_hint)
|
|
32624
|
+
if route_kind in {"plan_single", "plan_sync"}:
|
|
30689
32625
|
return self._merge_plan_worker_todo_items(clean_items, role=owner_hint)
|
|
30690
|
-
if
|
|
32626
|
+
if route_kind == "pure_sync":
|
|
30691
32627
|
return self._merge_owner_scoped_todo_items(clean_items, role=owner_hint)
|
|
30692
32628
|
return self.todo.update(clean_items)
|
|
30693
32629
|
|
|
30694
|
-
def
|
|
32630
|
+
def _todo_progress_signature(self, rows: list[dict] | None = None) -> list[tuple[str, str, str, str]]:
|
|
32631
|
+
items = rows if isinstance(rows, list) else self.todo.snapshot()
|
|
32632
|
+
sig: list[tuple[str, str, str, str]] = []
|
|
32633
|
+
for row in items:
|
|
32634
|
+
if not isinstance(row, dict):
|
|
32635
|
+
continue
|
|
32636
|
+
sig.append(
|
|
32637
|
+
(
|
|
32638
|
+
normalize_work_text(str(row.get("content", "") or "")).strip().lower(),
|
|
32639
|
+
str(row.get("status", "pending") or "pending").strip().lower(),
|
|
32640
|
+
str(row.get("owner", "") or "").strip().lower(),
|
|
32641
|
+
str(row.get("parent_step_id", "") or "").strip(),
|
|
32642
|
+
)
|
|
32643
|
+
)
|
|
32644
|
+
return sig
|
|
32645
|
+
|
|
32646
|
+
def _todo_progress_changed(self, before_rows: list[dict] | None, after_rows: list[dict] | None) -> bool:
|
|
32647
|
+
return self._todo_progress_signature(before_rows) != self._todo_progress_signature(after_rows)
|
|
32648
|
+
|
|
32649
|
+
def _analyze_todo_result(
|
|
32650
|
+
self,
|
|
32651
|
+
tool_name: str,
|
|
32652
|
+
output: str,
|
|
32653
|
+
*,
|
|
32654
|
+
before_rows: list[dict] | None = None,
|
|
32655
|
+
after_rows: list[dict] | None = None,
|
|
32656
|
+
) -> tuple[str, str]:
|
|
30695
32657
|
txt = str(output or "").strip()
|
|
30696
32658
|
low = txt.lower()
|
|
30697
32659
|
has_worker_rows = self._todo_runtime_has_worker_rows()
|
|
@@ -31155,12 +33117,15 @@ body{padding:18px}
|
|
|
31155
33117
|
except Exception:
|
|
31156
33118
|
token_decoded = token
|
|
31157
33119
|
token_decoded = token_decoded.strip()
|
|
31158
|
-
|
|
31159
|
-
|
|
33120
|
+
for piece in split_structured_todo_content(token_decoded, limit=7):
|
|
33121
|
+
piece_text = str(piece or "").strip()
|
|
33122
|
+
if piece_text and piece_text not in out:
|
|
33123
|
+
out.append(piece_text)
|
|
31160
33124
|
if out:
|
|
31161
33125
|
return out[:7]
|
|
31162
33126
|
# Fallback: parse non-empty lines / bullets
|
|
31163
|
-
|
|
33127
|
+
normalized_text = normalize_embedded_newlines(text)
|
|
33128
|
+
for line in normalized_text.splitlines():
|
|
31164
33129
|
s = line.strip().strip(",")
|
|
31165
33130
|
s = re.sub(r"^[\-\*\d\.\)\s]+", "", s).strip()
|
|
31166
33131
|
if not s:
|
|
@@ -31337,11 +33302,21 @@ body{padding:18px}
|
|
|
31337
33302
|
|
|
31338
33303
|
def _dispatch_tool_inner(self, name: str, args: dict, role_key: str = "") -> str:
|
|
31339
33304
|
"""Inner tool dispatcher — all tool logic lives here."""
|
|
33305
|
+
# Fix 5d: Reset TodoWrite loop counter on non-TodoWrite tool calls
|
|
33306
|
+
if name not in ("TodoWrite", "TodoWriteRescue") and hasattr(self, '_todowrite_step_counter'):
|
|
33307
|
+
try:
|
|
33308
|
+
_rst_step = self._get_active_plan_step()
|
|
33309
|
+
if isinstance(_rst_step, dict):
|
|
33310
|
+
_rst_id = str(_rst_step.get("id", "") or "")
|
|
33311
|
+
if _rst_id:
|
|
33312
|
+
self._todowrite_step_counter.pop(_rst_id, None)
|
|
33313
|
+
except Exception:
|
|
33314
|
+
pass
|
|
31340
33315
|
if name == "bash":
|
|
31341
33316
|
guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), self.files_root)
|
|
31342
33317
|
if guard_error:
|
|
31343
33318
|
return guard_error
|
|
31344
|
-
meta = self._run_shell_meta(args["command"], self.files_root,
|
|
33319
|
+
meta = self._run_shell_meta(args["command"], self.files_root, self._shell_command_timeout())
|
|
31345
33320
|
self._emit(
|
|
31346
33321
|
"command",
|
|
31347
33322
|
{
|
|
@@ -31508,20 +33483,16 @@ body{padding:18px}
|
|
|
31508
33483
|
)
|
|
31509
33484
|
return out
|
|
31510
33485
|
if name == "TodoWrite":
|
|
31511
|
-
# Protect plan_step todos: worker TodoWrite creates sub-items, not replacements
|
|
31512
33486
|
bb = self._ensure_blackboard()
|
|
31513
|
-
|
|
31514
|
-
|
|
31515
|
-
for t in bb.get("project_todos", [])
|
|
31516
|
-
)
|
|
31517
|
-
if has_plan_steps:
|
|
33487
|
+
route_kind = self._todo_route_kind(role=str(role_key or ""), board=bb)
|
|
33488
|
+
if route_kind in {"plan_single", "plan_sync"}:
|
|
31518
33489
|
items = args.get("items", [])
|
|
31519
33490
|
if isinstance(items, list):
|
|
31520
33491
|
for item in items:
|
|
31521
33492
|
if isinstance(item, dict) and not item.get("key", "").startswith("bb:"):
|
|
31522
33493
|
item["owner"] = str(role_key or "developer")
|
|
31523
33494
|
result = self._merge_plan_worker_todo_items(items, role=str(role_key or "developer"))
|
|
31524
|
-
elif
|
|
33495
|
+
elif route_kind == "pure_sync":
|
|
31525
33496
|
items = args.get("items", [])
|
|
31526
33497
|
if isinstance(items, list):
|
|
31527
33498
|
for item in items:
|
|
@@ -31891,7 +33862,7 @@ body{padding:18px}
|
|
|
31891
33862
|
guard_error = self._guard_shell_write_scope(str(args.get("command", "") or ""), wt_path)
|
|
31892
33863
|
if guard_error:
|
|
31893
33864
|
return guard_error
|
|
31894
|
-
meta = self._run_shell_meta(args["command"], wt_path,
|
|
33865
|
+
meta = self._run_shell_meta(args["command"], wt_path, self._shell_command_timeout())
|
|
31895
33866
|
self._emit(
|
|
31896
33867
|
"command",
|
|
31897
33868
|
{
|
|
@@ -32348,6 +34319,18 @@ body{padding:18px}
|
|
|
32348
34319
|
},
|
|
32349
34320
|
)
|
|
32350
34321
|
self._persist()
|
|
34322
|
+
_proc = getattr(self, "_running_bash_proc", None)
|
|
34323
|
+
if _proc is not None:
|
|
34324
|
+
try:
|
|
34325
|
+
if os.name == "posix":
|
|
34326
|
+
try:
|
|
34327
|
+
os.killpg(os.getpgid(_proc.pid), signal.SIGKILL)
|
|
34328
|
+
except Exception:
|
|
34329
|
+
_proc.kill()
|
|
34330
|
+
else:
|
|
34331
|
+
_proc.kill()
|
|
34332
|
+
except Exception:
|
|
34333
|
+
pass
|
|
32351
34334
|
|
|
32352
34335
|
def _reviewer_approval_log_gate(self, board: dict | None = None) -> tuple[bool, str]:
|
|
32353
34336
|
bb = board if isinstance(board, dict) else self._ensure_blackboard()
|
|
@@ -32752,8 +34735,8 @@ body{padding:18px}
|
|
|
32752
34735
|
isinstance(t, dict) and t.get("category") == "plan_step"
|
|
32753
34736
|
for t in board.get("project_todos", [])
|
|
32754
34737
|
)
|
|
32755
|
-
_sync_complexity =
|
|
32756
|
-
if not _sync_has_plan and _sync_complexity
|
|
34738
|
+
_sync_complexity = normalize_task_complexity(profile.get("complexity", "simple"), default="simple")
|
|
34739
|
+
if not _sync_has_plan and task_complexity_at_least(_sync_complexity, "moderate"):
|
|
32757
34740
|
self.messages.append({
|
|
32758
34741
|
"role": "system",
|
|
32759
34742
|
"content": (
|
|
@@ -32860,29 +34843,59 @@ body{padding:18px}
|
|
|
32860
34843
|
self._mark_all_done_silently(note)
|
|
32861
34844
|
self._emit("status", {"summary": "manager decided finish; run paused"})
|
|
32862
34845
|
break
|
|
32863
|
-
# Detect manager
|
|
34846
|
+
# Detect manager loop: same instruction repeated with unchanged progress.
|
|
32864
34847
|
import hashlib as _hl_mgr
|
|
32865
|
-
|
|
34848
|
+
_delegate_progress_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
|
|
34849
|
+
_cur_hash = _hl_mgr.sha1((target + "|" + instruction + "|" + _delegate_progress_fp).encode("utf-8")).hexdigest()[:12]
|
|
32866
34850
|
if _cur_hash == _prev_delegation_hash:
|
|
32867
34851
|
_repeat_delegation_count += 1
|
|
32868
34852
|
else:
|
|
32869
34853
|
_repeat_delegation_count = 0
|
|
32870
34854
|
_prev_delegation_hash = _cur_hash
|
|
32871
|
-
if _repeat_delegation_count >=
|
|
32872
|
-
self._emit("status", {"summary": f"manager stuck: repeated identical delegation x{_repeat_delegation_count + 1}; forcing advance"})
|
|
34855
|
+
if _repeat_delegation_count >= 3:
|
|
32873
34856
|
_bb_stuck = self._ensure_blackboard()
|
|
32874
34857
|
_stuck_step = next(
|
|
32875
34858
|
(t for t in _bb_stuck.get("project_todos", [])
|
|
32876
34859
|
if t.get("category") == "plan_step" and t.get("status") == "in_progress"),
|
|
32877
34860
|
None,
|
|
32878
34861
|
)
|
|
32879
|
-
|
|
32880
|
-
|
|
32881
|
-
|
|
32882
|
-
|
|
32883
|
-
self._mark_all_done_silently("manager stuck: repeated delegation break")
|
|
32884
|
-
break
|
|
34862
|
+
_step_note = trim(str((_stuck_step or {}).get("content", "") or ""), 200)
|
|
34863
|
+
route = self._manager_recovery_route_for_repeated_delegate(route, board=_bb_stuck)
|
|
34864
|
+
target = str(route.get("target", "") or "").strip().lower()
|
|
34865
|
+
instruction = trim(str(route.get("instruction", "") or "").strip(), 1400)
|
|
32885
34866
|
_repeat_delegation_count = 0
|
|
34867
|
+
_prev_delegation_hash = ""
|
|
34868
|
+
self._emit(
|
|
34869
|
+
"status",
|
|
34870
|
+
{
|
|
34871
|
+
"summary": (
|
|
34872
|
+
f"manager loop recovery: repeated identical delegation under unchanged progress; "
|
|
34873
|
+
f"rerouting to {target}"
|
|
34874
|
+
)
|
|
34875
|
+
},
|
|
34876
|
+
)
|
|
34877
|
+
self._append_manager_context(
|
|
34878
|
+
{
|
|
34879
|
+
"role": "system",
|
|
34880
|
+
"content": (
|
|
34881
|
+
"[manager-loop-guard] Repeated identical delegation detected under unchanged progress. "
|
|
34882
|
+
"Do NOT mark the active step completed just because the owner was delegated repeatedly. "
|
|
34883
|
+
"Use a recovery route based on current step evidence and worker todo state."
|
|
34884
|
+
+ (f" Active step: {_step_note}." if _step_note else "")
|
|
34885
|
+
+ (f" Recovery target: {target}." if target else "")
|
|
34886
|
+
),
|
|
34887
|
+
"ts": now_ts(),
|
|
34888
|
+
}
|
|
34889
|
+
)
|
|
34890
|
+
self._blackboard_append_section(
|
|
34891
|
+
"execution_logs",
|
|
34892
|
+
"manager",
|
|
34893
|
+
(
|
|
34894
|
+
"manager repeated identical delegation; applied recovery reroute"
|
|
34895
|
+
+ (f"\nactive_step: {_step_note}" if _step_note else "")
|
|
34896
|
+
+ (f"\nrecovery_target: {target}" if target else "")
|
|
34897
|
+
),
|
|
34898
|
+
)
|
|
32886
34899
|
role = self._sanitize_agent_role(target) or "developer"
|
|
32887
34900
|
self._inject_manager_instruction(
|
|
32888
34901
|
role,
|
|
@@ -32902,13 +34915,6 @@ body{padding:18px}
|
|
|
32902
34915
|
media_inputs_pool=media_inputs_pool,
|
|
32903
34916
|
media_seen_ts_by_role=media_seen_ts_by_role,
|
|
32904
34917
|
)
|
|
32905
|
-
# Sync-mode skill auto-discovery: same mechanism as plan mode's step-completed trigger.
|
|
32906
|
-
# Runs on early rounds for developer/explorer. Uses goal_sig dedup — no re-loading if already loaded.
|
|
32907
|
-
if role in ("developer", "explorer") and rounds_used <= 2:
|
|
32908
|
-
try:
|
|
32909
|
-
self._refresh_loaded_skills_for_execution_focus(trigger=f"sync-worker-pre:{role}")
|
|
32910
|
-
except Exception:
|
|
32911
|
-
pass
|
|
32912
34918
|
board_before_fp = self._watchdog_state_fingerprint(self._ensure_blackboard())
|
|
32913
34919
|
step = self._multi_agent_turn(
|
|
32914
34920
|
role,
|
|
@@ -32918,49 +34924,24 @@ body{padding:18px}
|
|
|
32918
34924
|
self._blackboard_update_from_worker_step(role, step)
|
|
32919
34925
|
# Post-execution plan step advancement (replaces pre-execution advancement)
|
|
32920
34926
|
self._post_execution_plan_step_check(route, step if isinstance(step, dict) else {})
|
|
32921
|
-
|
|
32922
|
-
|
|
32923
|
-
|
|
32924
|
-
|
|
32925
|
-
|
|
32926
|
-
|
|
32927
|
-
|
|
32928
|
-
|
|
32929
|
-
|
|
32930
|
-
|
|
32931
|
-
|
|
32932
|
-
|
|
32933
|
-
)[:400]
|
|
32934
|
-
self._append_agent_context_message(
|
|
32935
|
-
role,
|
|
34927
|
+
progress_capsule = self._manager_worker_progress_capsule(
|
|
34928
|
+
role,
|
|
34929
|
+
step if isinstance(step, dict) else {},
|
|
34930
|
+
self._ensure_blackboard(),
|
|
34931
|
+
)
|
|
34932
|
+
if progress_capsule:
|
|
34933
|
+
recent_mgr = self.manager_context[-4:] if isinstance(self.manager_context, list) else []
|
|
34934
|
+
if not any(
|
|
34935
|
+
isinstance(msg, dict) and str(msg.get("content", "") or "").strip() == progress_capsule
|
|
34936
|
+
for msg in recent_mgr
|
|
34937
|
+
):
|
|
34938
|
+
self._append_manager_context(
|
|
32936
34939
|
{
|
|
32937
|
-
"role": "
|
|
32938
|
-
"content":
|
|
32939
|
-
"<failure-recovery>"
|
|
32940
|
-
f"All tool calls failed in this turn ({', '.join(_failed_tools)}). "
|
|
32941
|
-
f"Errors: {_err_outputs}\n"
|
|
32942
|
-
"Before retrying, STOP and diagnose:\n"
|
|
32943
|
-
"1) If a debugging skill is available, call load_skill('systematic-debugging') and follow its workflow.\n"
|
|
32944
|
-
"2) Read the EXACT error message — identify the root cause, not just the symptom.\n"
|
|
32945
|
-
"3) Form ONE hypothesis about the cause before making any changes.\n"
|
|
32946
|
-
"4) Apply ONE targeted fix, then verify with a test/build command.\n"
|
|
32947
|
-
"5) If still blocked after 2 attempts, report the exact blocker to the user."
|
|
32948
|
-
"</failure-recovery>"
|
|
32949
|
-
),
|
|
34940
|
+
"role": "system",
|
|
34941
|
+
"content": progress_capsule,
|
|
32950
34942
|
"ts": now_ts(),
|
|
32951
|
-
|
|
32952
|
-
},
|
|
32953
|
-
mirror_to_global=False,
|
|
34943
|
+
}
|
|
32954
34944
|
)
|
|
32955
|
-
# Auto-load systematic-debugging if failure involves code errors
|
|
32956
|
-
_code_err_kw = ("bash", "compile", "syntax", "test", "build", "traceback", "error:")
|
|
32957
|
-
if any(kw in _err_outputs.lower() for kw in _code_err_kw):
|
|
32958
|
-
_bb_sk = self._ensure_blackboard().get("loaded_skills", {})
|
|
32959
|
-
if isinstance(_bb_sk, dict) and "systematic-debugging" not in _bb_sk:
|
|
32960
|
-
try:
|
|
32961
|
-
self._load_skill_with_cache("systematic-debugging", load_source="auto:sync-worker-failure")
|
|
32962
|
-
except Exception:
|
|
32963
|
-
pass
|
|
32964
34945
|
# Fix 6b: Pure sync no-plan — read worker-done signal and notify manager
|
|
32965
34946
|
_bb_sync = self._ensure_blackboard()
|
|
32966
34947
|
if _bb_sync.pop("sync_worker_round_done", False):
|
|
@@ -33309,17 +35290,19 @@ body{padding:18px}
|
|
|
33309
35290
|
bb["plan"]["phase"] = "synthesis"
|
|
33310
35291
|
self.blackboard = bb
|
|
33311
35292
|
|
|
33312
|
-
# Synthesis with retry
|
|
35293
|
+
# Synthesis with retry + model fallback + deterministic fallback
|
|
33313
35294
|
proposal = None
|
|
33314
|
-
for _synth_attempt in range(
|
|
35295
|
+
for _synth_attempt in range(PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS):
|
|
33315
35296
|
proposal = self._plan_mode_synthesize_proposal(pinned_selection)
|
|
33316
35297
|
if proposal and proposal.get("options"):
|
|
33317
35298
|
break
|
|
33318
|
-
if _synth_attempt
|
|
35299
|
+
if _synth_attempt < (PLAN_MODE_SYNTHESIS_MAX_ATTEMPTS - 1):
|
|
33319
35300
|
self._emit("status", {"summary": "plan-mode: synthesis retry"})
|
|
33320
35301
|
if not proposal or not proposal.get("options"):
|
|
33321
35302
|
# Last resort: minimal fallback with simpler prompt and higher token budget
|
|
33322
35303
|
proposal = self._synthesis_minimal_fallback(pinned_selection)
|
|
35304
|
+
if not proposal or not proposal.get("options"):
|
|
35305
|
+
proposal = self._synthesis_programmatic_fallback()
|
|
33323
35306
|
if not proposal or not proposal.get("options"):
|
|
33324
35307
|
self._emit("status", {"summary": "plan-mode: synthesis failed, falling back to direct execution"})
|
|
33325
35308
|
self.runtime_plan_mode_needed = False
|
|
@@ -33725,21 +35708,29 @@ body{padding:18px}
|
|
|
33725
35708
|
f"- Option A: Direct workaround — bypass the blocker with an alternative method\n"
|
|
33726
35709
|
f"- Option B: Different path — re-approach the goal from a completely different angle\n"
|
|
33727
35710
|
f"- Option C: Minimal viable + user action items — do what's possible now, list what the user needs to do manually\n\n"
|
|
33728
|
-
f"
|
|
35711
|
+
f"You MUST call the submit_plan_proposal tool exactly once with:\n"
|
|
33729
35712
|
f"- context: brief failure analysis (what was tried, what failed, why)\n"
|
|
33730
35713
|
f"- options: array of 3 options, each with id (A/B/C), title, summary, steps, pros, cons, risk\n"
|
|
33731
|
-
f"- recommended: id of the recommended option\n
|
|
35714
|
+
f"- recommended: id of the recommended option\n"
|
|
35715
|
+
f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n\n"
|
|
33732
35716
|
f"{model_language_instruction(self.ui_language)}"
|
|
33733
35717
|
)
|
|
33734
35718
|
synthesis_ctx = [
|
|
33735
|
-
{
|
|
35719
|
+
{
|
|
35720
|
+
"role": "system",
|
|
35721
|
+
"content": (
|
|
35722
|
+
"You are a recovery planner analyzing execution failures and proposing alternative approaches. "
|
|
35723
|
+
"You MUST call submit_plan_proposal exactly once."
|
|
35724
|
+
),
|
|
35725
|
+
"ts": now_ts(),
|
|
35726
|
+
},
|
|
33736
35727
|
{"role": "user", "content": synthesis_prompt, "ts": now_ts()},
|
|
33737
35728
|
]
|
|
33738
35729
|
try:
|
|
33739
35730
|
response = self._chat_with_same_model_retry(
|
|
33740
35731
|
synthesis_ctx,
|
|
33741
35732
|
tools=self._plan_mode_synthesis_tools(),
|
|
33742
|
-
system="Generate a structured stall recovery plan.
|
|
35733
|
+
system="Generate a structured stall recovery plan. You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
|
|
33743
35734
|
max_tokens=STALL_PLAN_SYNTHESIS_MAX_TOKENS,
|
|
33744
35735
|
think=False,
|
|
33745
35736
|
stream_thinking=False,
|
|
@@ -33748,12 +35739,33 @@ body{padding:18px}
|
|
|
33748
35739
|
context_label="stall-plan synthesis",
|
|
33749
35740
|
retries=MODEL_OUTPUT_RETRY_TIMES,
|
|
33750
35741
|
)
|
|
33751
|
-
|
|
33752
|
-
|
|
33753
|
-
|
|
33754
|
-
|
|
33755
|
-
|
|
33756
|
-
|
|
35742
|
+
proposal = self._extract_plan_proposal_from_response(response)
|
|
35743
|
+
if proposal.get("options"):
|
|
35744
|
+
return proposal
|
|
35745
|
+
repair_response = self._chat_with_same_model_retry(
|
|
35746
|
+
synthesis_ctx + [
|
|
35747
|
+
{
|
|
35748
|
+
"role": "user",
|
|
35749
|
+
"content": (
|
|
35750
|
+
"Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
|
|
35751
|
+
"Retry now. Output exactly one submit_plan_proposal tool call and no prose."
|
|
35752
|
+
),
|
|
35753
|
+
"ts": now_ts(),
|
|
35754
|
+
}
|
|
35755
|
+
],
|
|
35756
|
+
tools=self._plan_mode_synthesis_tools(),
|
|
35757
|
+
system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
|
|
35758
|
+
max_tokens=STALL_PLAN_SYNTHESIS_MAX_TOKENS,
|
|
35759
|
+
think=False,
|
|
35760
|
+
stream_thinking=False,
|
|
35761
|
+
on_thinking_chunk=self._append_live_thinking,
|
|
35762
|
+
pinned_selection=pinned_selection,
|
|
35763
|
+
context_label="stall-plan synthesis repair",
|
|
35764
|
+
retries=1,
|
|
35765
|
+
)
|
|
35766
|
+
proposal = self._extract_plan_proposal_from_response(repair_response)
|
|
35767
|
+
if proposal.get("options"):
|
|
35768
|
+
return proposal
|
|
33757
35769
|
except Exception as exc:
|
|
33758
35770
|
self._emit("status", {"summary": f"stall plan synthesis error: {exc}"})
|
|
33759
35771
|
return {}
|
|
@@ -33824,6 +35836,186 @@ body{padding:18px}
|
|
|
33824
35836
|
lines.append(f"- {trim(str(t), 100)}")
|
|
33825
35837
|
return "\n".join(lines)
|
|
33826
35838
|
|
|
35839
|
+
def _normalize_plan_proposal_option(self, raw: dict, *, fallback_id: str) -> dict | None:
|
|
35840
|
+
if not isinstance(raw, dict):
|
|
35841
|
+
return None
|
|
35842
|
+
opt_id = trim(str(raw.get("id", "") or fallback_id).strip().upper(), 8) or fallback_id
|
|
35843
|
+
title = trim(str(raw.get("title", "") or "").strip(), 200)
|
|
35844
|
+
summary = trim(str(raw.get("summary", "") or "").strip(), 600)
|
|
35845
|
+
steps_raw = raw.get("steps", [])
|
|
35846
|
+
steps: list[str] = []
|
|
35847
|
+
if isinstance(steps_raw, list):
|
|
35848
|
+
for item in steps_raw:
|
|
35849
|
+
text = normalize_embedded_newlines(str(item or "")).strip()
|
|
35850
|
+
if text:
|
|
35851
|
+
steps.append(trim(text, PLAN_STEP_FULL_CONTENT_MAX_CHARS))
|
|
35852
|
+
elif isinstance(steps_raw, str):
|
|
35853
|
+
text = normalize_embedded_newlines(steps_raw).strip()
|
|
35854
|
+
if text:
|
|
35855
|
+
steps.append(trim(text, PLAN_STEP_FULL_CONTENT_MAX_CHARS))
|
|
35856
|
+
pros = trim(str(raw.get("pros", "") or "").strip(), 400)
|
|
35857
|
+
cons = trim(str(raw.get("cons", "") or "").strip(), 400)
|
|
35858
|
+
risk = trim(str(raw.get("risk", "") or "").strip().lower(), 20)
|
|
35859
|
+
if risk not in {"low", "medium", "high"}:
|
|
35860
|
+
risk = "medium"
|
|
35861
|
+
if not title and summary:
|
|
35862
|
+
title = trim(summary.split("\n", 1)[0], 120)
|
|
35863
|
+
if not title and steps:
|
|
35864
|
+
title = trim(steps[0].split("\n", 1)[0], 120)
|
|
35865
|
+
if not summary and steps:
|
|
35866
|
+
summary = trim(steps[0], 300)
|
|
35867
|
+
if not steps:
|
|
35868
|
+
return None
|
|
35869
|
+
return {
|
|
35870
|
+
"id": opt_id,
|
|
35871
|
+
"title": title or f"Option {opt_id}",
|
|
35872
|
+
"summary": summary or title or f"Plan {opt_id}",
|
|
35873
|
+
"steps": steps,
|
|
35874
|
+
"pros": pros,
|
|
35875
|
+
"cons": cons,
|
|
35876
|
+
"risk": risk,
|
|
35877
|
+
}
|
|
35878
|
+
|
|
35879
|
+
def _normalize_plan_proposal_payload(self, raw: object) -> dict:
|
|
35880
|
+
src = raw if isinstance(raw, dict) else {}
|
|
35881
|
+
context = trim(str(src.get("context", "") or "").strip(), 2000)
|
|
35882
|
+
raw_options = src.get("options", [])
|
|
35883
|
+
if isinstance(raw_options, dict):
|
|
35884
|
+
raw_options = [raw_options]
|
|
35885
|
+
if not isinstance(raw_options, list):
|
|
35886
|
+
raw_options = []
|
|
35887
|
+
option_ids = ("A", "B", "C")
|
|
35888
|
+
options: list[dict] = []
|
|
35889
|
+
seen_ids: set[str] = set()
|
|
35890
|
+
for idx, item in enumerate(raw_options[: max(1, PLAN_MODE_MAX_OPTIONS * 2)]):
|
|
35891
|
+
normalized = self._normalize_plan_proposal_option(
|
|
35892
|
+
item,
|
|
35893
|
+
fallback_id=option_ids[min(idx, len(option_ids) - 1)],
|
|
35894
|
+
)
|
|
35895
|
+
if not normalized:
|
|
35896
|
+
continue
|
|
35897
|
+
opt_id = str(normalized.get("id", "") or "").strip().upper() or option_ids[min(idx, len(option_ids) - 1)]
|
|
35898
|
+
if opt_id in seen_ids:
|
|
35899
|
+
opt_id = option_ids[min(len(seen_ids), len(option_ids) - 1)]
|
|
35900
|
+
normalized["id"] = opt_id
|
|
35901
|
+
if opt_id in seen_ids:
|
|
35902
|
+
continue
|
|
35903
|
+
seen_ids.add(opt_id)
|
|
35904
|
+
options.append(normalized)
|
|
35905
|
+
if len(options) >= PLAN_MODE_MAX_OPTIONS:
|
|
35906
|
+
break
|
|
35907
|
+
recommended = trim(str(src.get("recommended", "") or "").strip().upper(), 8)
|
|
35908
|
+
valid_ids = {str(opt.get("id", "") or "").strip().upper() for opt in options}
|
|
35909
|
+
if recommended not in valid_ids:
|
|
35910
|
+
recommended = str(options[0].get("id", "A") or "A") if options else ""
|
|
35911
|
+
return {
|
|
35912
|
+
"context": context,
|
|
35913
|
+
"options": options,
|
|
35914
|
+
"recommended": recommended,
|
|
35915
|
+
}
|
|
35916
|
+
|
|
35917
|
+
def _parse_plan_proposal_from_text(self, text: str) -> dict:
|
|
35918
|
+
raw = str(text or "").strip()
|
|
35919
|
+
if not raw:
|
|
35920
|
+
return {}
|
|
35921
|
+
candidates: list[str] = [raw]
|
|
35922
|
+
fence_matches = re.findall(r"```(?:json)?\s*([\s\S]*?)```", raw, flags=re.IGNORECASE)
|
|
35923
|
+
for block in fence_matches:
|
|
35924
|
+
block_text = str(block or "").strip()
|
|
35925
|
+
if block_text:
|
|
35926
|
+
candidates.append(block_text)
|
|
35927
|
+
start = raw.find("{")
|
|
35928
|
+
end = raw.rfind("}")
|
|
35929
|
+
if start >= 0 and end > start:
|
|
35930
|
+
candidates.append(raw[start : end + 1].strip())
|
|
35931
|
+
for candidate in candidates:
|
|
35932
|
+
repaired = repair_truncated_json_object(candidate)
|
|
35933
|
+
for probe in [candidate, repaired]:
|
|
35934
|
+
if not probe:
|
|
35935
|
+
continue
|
|
35936
|
+
try:
|
|
35937
|
+
parsed = json.loads(probe)
|
|
35938
|
+
except Exception:
|
|
35939
|
+
continue
|
|
35940
|
+
if isinstance(parsed, list):
|
|
35941
|
+
parsed = {"context": "", "options": parsed, "recommended": ""}
|
|
35942
|
+
proposal = self._normalize_plan_proposal_payload(parsed)
|
|
35943
|
+
if proposal.get("options"):
|
|
35944
|
+
return proposal
|
|
35945
|
+
return {}
|
|
35946
|
+
|
|
35947
|
+
def _extract_plan_proposal_from_response(self, response: dict | None) -> dict:
|
|
35948
|
+
if not isinstance(response, dict):
|
|
35949
|
+
return {}
|
|
35950
|
+
tool_calls = response.get("tool_calls", [])
|
|
35951
|
+
if isinstance(tool_calls, list):
|
|
35952
|
+
for tc in tool_calls:
|
|
35953
|
+
if not isinstance(tc, dict):
|
|
35954
|
+
continue
|
|
35955
|
+
fn = tc.get("function", {}) if isinstance(tc.get("function"), dict) else {}
|
|
35956
|
+
if str(fn.get("name", "") or "").strip() != "submit_plan_proposal":
|
|
35957
|
+
continue
|
|
35958
|
+
args = fn.get("arguments", {})
|
|
35959
|
+
if isinstance(args, dict):
|
|
35960
|
+
proposal = self._normalize_plan_proposal_payload(args)
|
|
35961
|
+
if proposal.get("options"):
|
|
35962
|
+
return proposal
|
|
35963
|
+
elif isinstance(args, str):
|
|
35964
|
+
parsed, _ = parse_tool_arguments_with_error(args)
|
|
35965
|
+
proposal = self._normalize_plan_proposal_payload(parsed)
|
|
35966
|
+
if proposal.get("options"):
|
|
35967
|
+
return proposal
|
|
35968
|
+
return self._parse_plan_proposal_from_text(str(response.get("content", "") or ""))
|
|
35969
|
+
|
|
35970
|
+
def _synthesis_programmatic_fallback(self) -> dict:
|
|
35971
|
+
bb = self._ensure_blackboard()
|
|
35972
|
+
goal = trim(str(self.runtime_reclassify_goal or self._latest_user_goal_text() or ""), 1200)
|
|
35973
|
+
findings = bb.get("plan", {}).get("findings", []) if isinstance(bb.get("plan"), dict) else []
|
|
35974
|
+
finding_lines: list[str] = []
|
|
35975
|
+
for row in findings[:6]:
|
|
35976
|
+
if not isinstance(row, dict):
|
|
35977
|
+
continue
|
|
35978
|
+
content = trim(str(row.get("content", "") or "").strip(), 280)
|
|
35979
|
+
if content:
|
|
35980
|
+
finding_lines.append(content)
|
|
35981
|
+
context = trim(
|
|
35982
|
+
(
|
|
35983
|
+
"Fallback synthesis generated automatically from the user goal and current research findings. "
|
|
35984
|
+
+ (" | ".join(finding_lines[:3]) if finding_lines else goal)
|
|
35985
|
+
),
|
|
35986
|
+
1800,
|
|
35987
|
+
)
|
|
35988
|
+
detailed_steps = [
|
|
35989
|
+
"1. Scope and constraints\nClarify the exact deliverable, inputs, and acceptance criteria for this task.",
|
|
35990
|
+
"2. Core implementation\nBuild the main artifact for the request using the most direct workable path.",
|
|
35991
|
+
"3. Verification\nRun at least one observable validation and capture the result.",
|
|
35992
|
+
"4. Delivery report\nSummarize what was built, how to run it, and the key outputs.",
|
|
35993
|
+
]
|
|
35994
|
+
if finding_lines:
|
|
35995
|
+
detailed_steps = [
|
|
35996
|
+
"1. Review findings and lock scope\nUse the collected findings to define the exact execution boundary and required inputs.",
|
|
35997
|
+
"2. Prepare files and dependencies\nCreate or align the necessary files, folders, and runtime prerequisites for the task.",
|
|
35998
|
+
"3. Implement the main work\nExecute the core build/change/generation work for the requested output.",
|
|
35999
|
+
"4. Validate with observable evidence\nRun a concrete check and confirm the expected output, exit code, or rendered result.",
|
|
36000
|
+
"5. Generate delivery report\nSummarize what was built, how to run it, and the key outputs.",
|
|
36001
|
+
]
|
|
36002
|
+
proposal = {
|
|
36003
|
+
"context": context,
|
|
36004
|
+
"options": [
|
|
36005
|
+
{
|
|
36006
|
+
"id": "A",
|
|
36007
|
+
"title": "Direct Execution Plan",
|
|
36008
|
+
"summary": trim(goal or "Execute the requested task with a direct, verifiable plan.", 240),
|
|
36009
|
+
"steps": detailed_steps,
|
|
36010
|
+
"pros": "Deterministic fallback that keeps plan-mode available even when model synthesis formatting is unstable.",
|
|
36011
|
+
"cons": "Less tailored than a fully synthesized multi-option proposal.",
|
|
36012
|
+
"risk": "medium",
|
|
36013
|
+
}
|
|
36014
|
+
],
|
|
36015
|
+
"recommended": "A",
|
|
36016
|
+
}
|
|
36017
|
+
return self._normalize_plan_proposal_payload(proposal)
|
|
36018
|
+
|
|
33827
36019
|
def _plan_mode_synthesize_proposal(self, pinned_selection: str) -> dict:
|
|
33828
36020
|
bb = self._ensure_blackboard()
|
|
33829
36021
|
plan_data = bb.get("plan", {})
|
|
@@ -33859,10 +36051,11 @@ body{padding:18px}
|
|
|
33859
36051
|
f"## Research Findings\n{trim(findings_text, 6000)}\n\n"
|
|
33860
36052
|
f"{skills_section}"
|
|
33861
36053
|
f"## Instructions\n"
|
|
33862
|
-
f"
|
|
36054
|
+
f"You MUST call the submit_plan_proposal tool exactly once with:\n"
|
|
33863
36055
|
f"- context: brief background analysis\n"
|
|
33864
36056
|
f"- options: array of 1-{PLAN_MODE_MAX_OPTIONS} options, each with id (A/B/C), title, summary, steps, pros, cons, risk\n"
|
|
33865
|
-
f"- recommended: id of the recommended option\n
|
|
36057
|
+
f"- recommended: id of the recommended option\n"
|
|
36058
|
+
f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n\n"
|
|
33866
36059
|
f"STEP QUALITY REQUIREMENTS:\n"
|
|
33867
36060
|
f"- Each step must be a concrete, actionable instruction (NOT vague like 'analyze reports')\n"
|
|
33868
36061
|
f"- Include specific file paths (e.g., 'Read uploaded/IEDM_.parsed.md to extract key findings')\n"
|
|
@@ -33934,7 +36127,11 @@ body{padding:18px}
|
|
|
33934
36127
|
response = self._chat_with_same_model_retry(
|
|
33935
36128
|
synthesis_ctx,
|
|
33936
36129
|
tools=self._plan_mode_synthesis_tools(),
|
|
33937
|
-
system=
|
|
36130
|
+
system=(
|
|
36131
|
+
"Generate a structured plan proposal. "
|
|
36132
|
+
"You MUST call submit_plan_proposal exactly once. "
|
|
36133
|
+
"Do not answer with plain text."
|
|
36134
|
+
),
|
|
33938
36135
|
max_tokens=PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS,
|
|
33939
36136
|
think=False,
|
|
33940
36137
|
stream_thinking=False,
|
|
@@ -33943,13 +36140,31 @@ body{padding:18px}
|
|
|
33943
36140
|
context_label="plan-mode synthesis",
|
|
33944
36141
|
retries=MODEL_OUTPUT_RETRY_TIMES,
|
|
33945
36142
|
)
|
|
33946
|
-
|
|
33947
|
-
|
|
33948
|
-
|
|
33949
|
-
|
|
33950
|
-
|
|
33951
|
-
|
|
33952
|
-
|
|
36143
|
+
proposal = self._extract_plan_proposal_from_response(response)
|
|
36144
|
+
if proposal.get("options"):
|
|
36145
|
+
return proposal
|
|
36146
|
+
repair_response = self._chat_with_same_model_retry(
|
|
36147
|
+
synthesis_ctx + [
|
|
36148
|
+
{
|
|
36149
|
+
"role": "user",
|
|
36150
|
+
"content": (
|
|
36151
|
+
"Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
|
|
36152
|
+
"Retry now. Output exactly one submit_plan_proposal tool call and no prose."
|
|
36153
|
+
),
|
|
36154
|
+
"ts": now_ts(),
|
|
36155
|
+
}
|
|
36156
|
+
],
|
|
36157
|
+
tools=self._plan_mode_synthesis_tools(),
|
|
36158
|
+
system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
|
|
36159
|
+
max_tokens=PLAN_MODE_MANAGER_SYNTHESIS_MAX_TOKENS,
|
|
36160
|
+
think=False,
|
|
36161
|
+
stream_thinking=False,
|
|
36162
|
+
on_thinking_chunk=self._append_live_thinking,
|
|
36163
|
+
pinned_selection=pinned_selection,
|
|
36164
|
+
context_label="plan-mode synthesis repair",
|
|
36165
|
+
retries=1,
|
|
36166
|
+
)
|
|
36167
|
+
return self._extract_plan_proposal_from_response(repair_response)
|
|
33953
36168
|
|
|
33954
36169
|
def _synthesis_minimal_fallback(self, pinned_selection: str) -> dict:
|
|
33955
36170
|
"""Last-resort: ask model for a single simple plan with higher max_tokens."""
|
|
@@ -33961,33 +36176,63 @@ body{padding:18px}
|
|
|
33961
36176
|
for f in (findings[:5] if isinstance(findings, list) else [])
|
|
33962
36177
|
)
|
|
33963
36178
|
prompt = (
|
|
33964
|
-
f"Generate ONE simple plan for this task.
|
|
36179
|
+
f"Generate ONE simple plan for this task. You MUST call submit_plan_proposal with exactly 1 option.\n\n"
|
|
33965
36180
|
f"Task: {goal}\n\nFindings: {trim(findings_text, 3000)}\n\n"
|
|
33966
36181
|
f"Return a single option with id='A', title, summary, and 5-10 concrete steps.\n"
|
|
36182
|
+
f"Do NOT answer with prose-only markdown. A response without submit_plan_proposal tool call is invalid.\n"
|
|
33967
36183
|
f"{model_language_instruction(self.ui_language)}"
|
|
33968
36184
|
)
|
|
33969
36185
|
ctx = [
|
|
33970
|
-
{
|
|
36186
|
+
{
|
|
36187
|
+
"role": "system",
|
|
36188
|
+
"content": (
|
|
36189
|
+
"You MUST call submit_plan_proposal exactly once. "
|
|
36190
|
+
"Do not answer with plain text."
|
|
36191
|
+
),
|
|
36192
|
+
"ts": now_ts(),
|
|
36193
|
+
},
|
|
33971
36194
|
{"role": "user", "content": prompt, "ts": now_ts()},
|
|
33972
36195
|
]
|
|
33973
36196
|
try:
|
|
33974
36197
|
response = self._chat_with_same_model_retry(
|
|
33975
36198
|
ctx,
|
|
33976
36199
|
tools=self._plan_mode_synthesis_tools(),
|
|
33977
|
-
system="
|
|
36200
|
+
system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
|
|
33978
36201
|
max_tokens=6000,
|
|
33979
36202
|
think=False,
|
|
33980
36203
|
stream_thinking=False,
|
|
33981
36204
|
on_thinking_chunk=self._append_live_thinking,
|
|
33982
36205
|
pinned_selection=pinned_selection,
|
|
33983
36206
|
context_label="plan-mode minimal fallback",
|
|
33984
|
-
retries=
|
|
36207
|
+
retries=3,
|
|
36208
|
+
)
|
|
36209
|
+
proposal = self._extract_plan_proposal_from_response(response)
|
|
36210
|
+
if proposal.get("options"):
|
|
36211
|
+
return proposal
|
|
36212
|
+
repair_response = self._chat_with_same_model_retry(
|
|
36213
|
+
ctx + [
|
|
36214
|
+
{
|
|
36215
|
+
"role": "user",
|
|
36216
|
+
"content": (
|
|
36217
|
+
"Previous answer was invalid because it did not produce a valid submit_plan_proposal tool call. "
|
|
36218
|
+
"Retry now. Output exactly one submit_plan_proposal tool call and no prose."
|
|
36219
|
+
),
|
|
36220
|
+
"ts": now_ts(),
|
|
36221
|
+
}
|
|
36222
|
+
],
|
|
36223
|
+
tools=self._plan_mode_synthesis_tools(),
|
|
36224
|
+
system="You MUST call submit_plan_proposal exactly once. Do not answer with plain text.",
|
|
36225
|
+
max_tokens=6000,
|
|
36226
|
+
think=False,
|
|
36227
|
+
stream_thinking=False,
|
|
36228
|
+
on_thinking_chunk=self._append_live_thinking,
|
|
36229
|
+
pinned_selection=pinned_selection,
|
|
36230
|
+
context_label="plan-mode minimal fallback repair",
|
|
36231
|
+
retries=1,
|
|
33985
36232
|
)
|
|
33986
|
-
|
|
33987
|
-
|
|
33988
|
-
|
|
33989
|
-
if isinstance(args, dict) and args.get("options"):
|
|
33990
|
-
return dict(args)
|
|
36233
|
+
proposal = self._extract_plan_proposal_from_response(repair_response)
|
|
36234
|
+
if proposal.get("options"):
|
|
36235
|
+
return proposal
|
|
33991
36236
|
except Exception:
|
|
33992
36237
|
pass
|
|
33993
36238
|
return {}
|
|
@@ -34071,7 +36316,7 @@ body{padding:18px}
|
|
|
34071
36316
|
grouped_steps = self._group_plan_steps(raw_steps if isinstance(raw_steps, list) else [])
|
|
34072
36317
|
plan_todos: list[dict] = []
|
|
34073
36318
|
for i, step in enumerate(grouped_steps[:max(1, int(limit))]):
|
|
34074
|
-
step_text = trim(
|
|
36319
|
+
step_text = trim(normalize_embedded_newlines(step).strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
|
|
34075
36320
|
if not step_text:
|
|
34076
36321
|
continue
|
|
34077
36322
|
step_lines = step_text.split("\n")
|
|
@@ -34085,6 +36330,7 @@ body{padding:18px}
|
|
|
34085
36330
|
"category": "plan_step",
|
|
34086
36331
|
"plan_step_index": i,
|
|
34087
36332
|
"created_at": float(now_ts()),
|
|
36333
|
+
"activated_at": float(now_ts()) if not plan_todos else None,
|
|
34088
36334
|
"completed_at": None,
|
|
34089
36335
|
"completed_by": "",
|
|
34090
36336
|
"evidence": "",
|
|
@@ -34197,7 +36443,7 @@ body{padding:18px}
|
|
|
34197
36443
|
_mid_re_exec = _re_exec.compile(r"(?<=\S)\s+(\d+\.\d+\s)")
|
|
34198
36444
|
for t in plan_todos:
|
|
34199
36445
|
idx = int(t.get("plan_step_index", 0) or 0) + 1
|
|
34200
|
-
full =
|
|
36446
|
+
full = normalize_embedded_newlines(t.get("full_content", "") or t.get("content", "")).strip()
|
|
34201
36447
|
# Normalize: split concatenated N.N sub-steps onto own lines
|
|
34202
36448
|
full = _mid_re_exec.sub(r"\n\1", full)
|
|
34203
36449
|
header = full.split("\n")[0] if "\n" in full else full
|
|
@@ -34359,7 +36605,7 @@ body{padding:18px}
|
|
|
34359
36605
|
# Phase 0: Normalize — split mid-string N.N onto own lines
|
|
34360
36606
|
normalized: list[str] = []
|
|
34361
36607
|
for s in raw_steps:
|
|
34362
|
-
text =
|
|
36608
|
+
text = normalize_embedded_newlines(s).strip()
|
|
34363
36609
|
if not text:
|
|
34364
36610
|
continue
|
|
34365
36611
|
fixed = mid_numbered_re.sub(r"\n\1", text)
|
|
@@ -34694,18 +36940,16 @@ body{padding:18px}
|
|
|
34694
36940
|
chosen_title = trim(str(chosen.get("title", "") or choice_id).strip(), 800)
|
|
34695
36941
|
chosen_summary = trim(str(chosen.get("summary", "") or "").strip(), PLAN_STEP_FULL_CONTENT_MAX_CHARS)
|
|
34696
36942
|
# Preserve current complexity unless the user explicitly changes it elsewhere.
|
|
34697
|
-
_current_complexity =
|
|
34698
|
-
|
|
34699
|
-
|
|
34700
|
-
|
|
34701
|
-
|
|
34702
|
-
).strip().lower(),
|
|
34703
|
-
20,
|
|
36943
|
+
_current_complexity = normalize_task_complexity(
|
|
36944
|
+
self.runtime_task_complexity
|
|
36945
|
+
or profile.get("complexity", judgement.get("complexity", ""))
|
|
36946
|
+
or "",
|
|
36947
|
+
default="",
|
|
34704
36948
|
)
|
|
34705
36949
|
if _current_complexity in TASK_COMPLEXITY_LEVELS:
|
|
34706
36950
|
self.runtime_task_complexity = _current_complexity
|
|
34707
36951
|
else:
|
|
34708
|
-
_current_complexity =
|
|
36952
|
+
_current_complexity = normalize_task_complexity(str(self.runtime_task_complexity or "").strip().lower(), default="")
|
|
34709
36953
|
self.runtime_complexity_floor = str(_current_complexity or "complex")
|
|
34710
36954
|
_plan_risk = self._resolve_plan_option_risk(chosen)
|
|
34711
36955
|
try:
|
|
@@ -35007,13 +37251,6 @@ body{padding:18px}
|
|
|
35007
37251
|
self.agent_round_index = int(self.agent_round_index) + 1
|
|
35008
37252
|
self.current_phase = "model-call"
|
|
35009
37253
|
self.current_tool_name = ""
|
|
35010
|
-
# Single-mode skill auto-discovery: same as plan mode. Runs on first 2 rounds only.
|
|
35011
|
-
# Uses goal_sig dedup — if skills already loaded for this goal, no-op.
|
|
35012
|
-
if int(self.agent_round_index) <= 2:
|
|
35013
|
-
try:
|
|
35014
|
-
self._refresh_loaded_skills_for_execution_focus(trigger="single-worker-pre")
|
|
35015
|
-
except Exception:
|
|
35016
|
-
pass
|
|
35017
37254
|
if level_budget > 0 and int(self.agent_round_index) > int(level_budget):
|
|
35018
37255
|
force_single_tool_rounds = max(force_single_tool_rounds, 2)
|
|
35019
37256
|
if not compact_budget_notified:
|
|
@@ -35264,7 +37501,7 @@ body{padding:18px}
|
|
|
35264
37501
|
)
|
|
35265
37502
|
continue
|
|
35266
37503
|
stop_note = (
|
|
35267
|
-
"模型连续多轮仅输出思考而无动作,自动执行已熔断停止(fault_counter>=
|
|
37504
|
+
"模型连续多轮仅输出思考而无动作,自动执行已熔断停止(fault_counter>=15)。"
|
|
35268
37505
|
"请尝试拆分任务,或切换更强的推理模型后继续。"
|
|
35269
37506
|
)
|
|
35270
37507
|
raise CircuitBreakerTriggered(stop_note)
|
|
@@ -35611,6 +37848,7 @@ body{padding:18px}
|
|
|
35611
37848
|
self.current_phase = f"tool:{name}"
|
|
35612
37849
|
self.current_tool_name = name
|
|
35613
37850
|
round_tool_names.append(name)
|
|
37851
|
+
todo_rows_before = self.todo.snapshot() if name in {"TodoWrite", "TodoWriteRescue"} else None
|
|
35614
37852
|
args = tc["function"]["arguments"]
|
|
35615
37853
|
args_error = str(tc.get("args_error", "") or "").strip()
|
|
35616
37854
|
raw_args = tc.get("raw_arguments")
|
|
@@ -35775,15 +38013,41 @@ body{padding:18px}
|
|
|
35775
38013
|
recovery_retry_rounds = 0
|
|
35776
38014
|
if dispatched_name in {"TodoWrite", "TodoWriteRescue"}:
|
|
35777
38015
|
todo_attempted = True
|
|
35778
|
-
|
|
38016
|
+
todo_rows_after = self.todo.snapshot()
|
|
38017
|
+
state, reason = self._analyze_todo_result(
|
|
38018
|
+
dispatched_name,
|
|
38019
|
+
output,
|
|
38020
|
+
before_rows=todo_rows_before,
|
|
38021
|
+
after_rows=todo_rows_after,
|
|
38022
|
+
)
|
|
35779
38023
|
if state == "ok":
|
|
35780
38024
|
used_todo = True
|
|
35781
38025
|
self.todo_write_issue_count = 0
|
|
35782
38026
|
self.todo_last_issue = ""
|
|
38027
|
+
self._emit(
|
|
38028
|
+
"status",
|
|
38029
|
+
{"summary": f"todo updated ({trim(reason, 100)})"},
|
|
38030
|
+
)
|
|
35783
38031
|
else:
|
|
35784
38032
|
self.todo_write_issue_count += 1
|
|
35785
38033
|
self.todo_last_issue = reason
|
|
35786
|
-
|
|
38034
|
+
self._emit(
|
|
38035
|
+
"status",
|
|
38036
|
+
{
|
|
38037
|
+
"summary": (
|
|
38038
|
+
"todo update produced no progress "
|
|
38039
|
+
f"({trim(reason, 100)})"
|
|
38040
|
+
)
|
|
38041
|
+
},
|
|
38042
|
+
)
|
|
38043
|
+
repeat_no_progress = any(
|
|
38044
|
+
token in str(reason or "").lower()
|
|
38045
|
+
for token in ("repeated", "no progress", "without changing")
|
|
38046
|
+
)
|
|
38047
|
+
if self.todo_write_issue_count >= 2 and (
|
|
38048
|
+
not self._todo_runtime_has_worker_rows(single_role)
|
|
38049
|
+
or repeat_no_progress
|
|
38050
|
+
):
|
|
35787
38051
|
self._emit(
|
|
35788
38052
|
"status",
|
|
35789
38053
|
{
|
|
@@ -36118,6 +38382,22 @@ body{padding:18px}
|
|
|
36118
38382
|
self.rounds_without_todo += 1
|
|
36119
38383
|
else:
|
|
36120
38384
|
self.rounds_without_todo += 1
|
|
38385
|
+
concrete_work_without_todo = (
|
|
38386
|
+
not used_todo
|
|
38387
|
+
and self._todo_runtime_has_worker_rows(single_role)
|
|
38388
|
+
and any(
|
|
38389
|
+
isinstance(r, dict)
|
|
38390
|
+
and r.get("ok", False)
|
|
38391
|
+
and str(r.get("name", "") or "") in {
|
|
38392
|
+
"write_file",
|
|
38393
|
+
"edit_file",
|
|
38394
|
+
"bash",
|
|
38395
|
+
"read_file",
|
|
38396
|
+
"write_to_blackboard",
|
|
38397
|
+
}
|
|
38398
|
+
for r in single_round_tool_results
|
|
38399
|
+
)
|
|
38400
|
+
)
|
|
36121
38401
|
if (
|
|
36122
38402
|
todo_attempted
|
|
36123
38403
|
and not used_todo
|
|
@@ -36142,18 +38422,25 @@ body{padding:18px}
|
|
|
36142
38422
|
now_tick = now_ts()
|
|
36143
38423
|
can_remind = (now_tick - self.last_todo_reminder_ts) >= 20
|
|
36144
38424
|
if can_remind and self.todo_reminder_count < 2:
|
|
36145
|
-
if
|
|
36146
|
-
self.
|
|
36147
|
-
|
|
36148
|
-
|
|
36149
|
-
|
|
36150
|
-
|
|
36151
|
-
|
|
38425
|
+
if concrete_work_without_todo:
|
|
38426
|
+
self._append_plan_guidance_bubble(
|
|
38427
|
+
"<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>",
|
|
38428
|
+
summary="todo reminder",
|
|
38429
|
+
)
|
|
38430
|
+
self.last_todo_reminder_ts = now_tick
|
|
38431
|
+
self.todo_reminder_count += 1
|
|
38432
|
+
elif not self._todo_runtime_has_worker_rows(single_role) and self.rounds_without_todo >= 2:
|
|
38433
|
+
self._append_plan_guidance_bubble(
|
|
38434
|
+
"<reminder>Please call TodoWrite now to update the current subtask before continuing. If it fails/repeats, switch to TodoWriteRescue.</reminder>",
|
|
38435
|
+
summary="todo reminder",
|
|
36152
38436
|
)
|
|
36153
38437
|
self.last_todo_reminder_ts = now_tick
|
|
36154
38438
|
self.todo_reminder_count += 1
|
|
36155
38439
|
elif self._todo_should_block_auto_continue("") and self.rounds_without_todo >= 4:
|
|
36156
|
-
self.
|
|
38440
|
+
self._append_plan_guidance_bubble(
|
|
38441
|
+
"<reminder>Update your todos now: finish the current subtask in TodoWrite before moving on.</reminder>",
|
|
38442
|
+
summary="todo reminder",
|
|
38443
|
+
)
|
|
36157
38444
|
self.last_todo_reminder_ts = now_tick
|
|
36158
38445
|
self.todo_reminder_count += 1
|
|
36159
38446
|
if manual_compact:
|
|
@@ -36247,6 +38534,12 @@ body{padding:18px}
|
|
|
36247
38534
|
self._generate_run_completion_summary()
|
|
36248
38535
|
except Exception:
|
|
36249
38536
|
pass
|
|
38537
|
+
try:
|
|
38538
|
+
_applied_runtime_updates = self._apply_deferred_runtime_updates()
|
|
38539
|
+
for _note in _applied_runtime_updates[:6]:
|
|
38540
|
+
self._emit("status", {"summary": _note})
|
|
38541
|
+
except Exception:
|
|
38542
|
+
pass
|
|
36250
38543
|
self._emit("status", {"summary": "run finished"})
|
|
36251
38544
|
cb = self.run_finished_callback
|
|
36252
38545
|
if cb:
|
|
@@ -36525,6 +38818,7 @@ body{padding:18px}
|
|
|
36525
38818
|
"live_run_notice_elapsed": round(float(self.live_run_notice_elapsed or 0.0), 1),
|
|
36526
38819
|
"max_agent_rounds": int(self.max_agent_rounds),
|
|
36527
38820
|
"max_run_seconds": int(self.max_run_seconds),
|
|
38821
|
+
"shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
36528
38822
|
"auto_model_switch": bool(self.auto_model_switch),
|
|
36529
38823
|
"arbiter_enabled": bool(self.arbiter_enabled),
|
|
36530
38824
|
"arbiter_model": str(self.arbiter_model or ""),
|
|
@@ -36704,6 +38998,7 @@ class SessionManager:
|
|
|
36704
38998
|
context_limit_locked: bool = False,
|
|
36705
38999
|
max_rounds: int = MAX_AGENT_ROUNDS,
|
|
36706
39000
|
max_run_seconds: int = MAX_RUN_SECONDS,
|
|
39001
|
+
shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
36707
39002
|
auto_model_switch: bool = False,
|
|
36708
39003
|
arbiter_enabled: bool = True,
|
|
36709
39004
|
arbiter_model: str = "",
|
|
@@ -36749,6 +39044,12 @@ class SessionManager:
|
|
|
36749
39044
|
maximum=MAX_RUN_TIMEOUT_SECONDS,
|
|
36750
39045
|
fallback=MAX_RUN_SECONDS,
|
|
36751
39046
|
)
|
|
39047
|
+
self.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
39048
|
+
shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39049
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39050
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39051
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39052
|
+
)
|
|
36752
39053
|
self.auto_model_switch = bool(auto_model_switch)
|
|
36753
39054
|
self.arbiter_enabled = bool(arbiter_enabled)
|
|
36754
39055
|
self.arbiter_model = str(arbiter_model or "").strip()
|
|
@@ -37031,6 +39332,12 @@ class SessionManager:
|
|
|
37031
39332
|
)
|
|
37032
39333
|
sess.execution_mode = normalize_execution_mode(self.execution_mode, default=EXECUTION_MODE_SYNC)
|
|
37033
39334
|
sess.single_advance_prompt_enhance = bool(self.single_advance_prompt_enhance)
|
|
39335
|
+
sess.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
39336
|
+
self.shell_command_timeout_seconds,
|
|
39337
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39338
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39339
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
39340
|
+
)
|
|
37034
39341
|
sess._apply_active_profile()
|
|
37035
39342
|
sess.updated_at = now_ts()
|
|
37036
39343
|
sess._persist()
|
|
@@ -37091,6 +39398,7 @@ class SessionManager:
|
|
|
37091
39398
|
context_limit_locked=self.context_limit_locked,
|
|
37092
39399
|
max_rounds=self.max_rounds,
|
|
37093
39400
|
max_run_seconds=self.max_run_seconds,
|
|
39401
|
+
shell_command_timeout_seconds=self.shell_command_timeout_seconds,
|
|
37094
39402
|
auto_model_switch=self.auto_model_switch,
|
|
37095
39403
|
arbiter_enabled=self.arbiter_enabled,
|
|
37096
39404
|
arbiter_model=self.arbiter_model,
|
|
@@ -37140,6 +39448,7 @@ class SessionManager:
|
|
|
37140
39448
|
context_limit_locked=self.context_limit_locked,
|
|
37141
39449
|
max_rounds=self.max_rounds,
|
|
37142
39450
|
max_run_seconds=self.max_run_seconds,
|
|
39451
|
+
shell_command_timeout_seconds=self.shell_command_timeout_seconds,
|
|
37143
39452
|
auto_model_switch=self.auto_model_switch,
|
|
37144
39453
|
arbiter_enabled=self.arbiter_enabled,
|
|
37145
39454
|
arbiter_model=self.arbiter_model,
|
|
@@ -38188,7 +40497,7 @@ function renderLlmFields(provider){const container=E('llmFieldsContainer');if(!c
|
|
|
38188
40497
|
async function scanOllamaModels(){const urlEl=E('llmF_ollama_url');const sel=E('llmF_ollama_model');const hint=E('ollamaScanHint');const baseUrl=(urlEl?.value||'').trim()||'http://127.0.0.1:11434';if(hint)hint.textContent=t('llm_scanning');try{const res=await fetch('/api/ollama/models?base_url='+encodeURIComponent(baseUrl));const data=await res.json();if(!data.ok||!data.models?.length){if(hint)hint.textContent=t('llm_scan_empty')+(data.error?' ('+data.error+')':'');return}if(sel){sel.innerHTML='';for(const m of data.models){const op=document.createElement('option');op.value=m;op.textContent=m;sel.appendChild(op)}}if(hint)hint.textContent=t('llm_scan_found').replace('{n}',String(data.models.length))}catch(err){if(hint)hint.textContent=t('llm_scan_error')+': '+(err.message||String(err))}}
|
|
38189
40498
|
async function scanOpenAICompatModels(provider){const scanMap={openai_compat:{urlKey:'openai_url',modelKey:'openai_model',keyKey:'openai_key',defaultUrl:'https://api.openai.com/v1'},siliconflow:{urlKey:'siliconflow_url',modelKey:'siliconflow_model',keyKey:'siliconflow_key',defaultUrl:'https://api.siliconflow.cn/v1'},vllm:{urlKey:'vllm_url',modelKey:'vllm_model',keyKey:'vllm_key',defaultUrl:'http://localhost:8000/v1'},lmstudio:{urlKey:'lmstudio_url',modelKey:'lmstudio_model',keyKey:'lmstudio_key',defaultUrl:'http://localhost:1234/v1'},glm:{urlKey:'glm_url',modelKey:'glm_model',keyKey:'glm_key',defaultUrl:'https://open.bigmodel.cn/api/paas/v4'},kimi:{urlKey:'kimi_url',modelKey:'kimi_model',keyKey:'kimi_key',defaultUrl:'https://api.moonshot.cn/v1'},openrouter:{urlKey:'openrouter_url',modelKey:'openrouter_model',keyKey:'openrouter_key',defaultUrl:'https://openrouter.ai/api/v1'},custom_http:{urlKey:'custom_url',modelKey:'custom_model',keyKey:'custom_key',defaultUrl:''}};const normalizedProvider=String(provider||'openai_compat').trim()||'openai_compat';const meta=scanMap[normalizedProvider]||scanMap.openai_compat;const urlEl=E('llmF_'+meta.urlKey);const modelEl=E('llmF_'+meta.modelKey);const hint=E('localScanHint');const baseUrl=(urlEl?.value||'').trim()||meta.defaultUrl||'';const apiKey=(E('llmF_'+meta.keyKey)?.value||'').trim();if(hint)hint.textContent=t('llm_scanning');try{let url='/api/openai_compat/models?provider='+encodeURIComponent(normalizedProvider)+'&base_url='+encodeURIComponent(baseUrl);if(apiKey)url+='&api_key='+encodeURIComponent(apiKey);const res=await fetch(url);const data=await res.json();const models=Array.isArray(data.models)?data.models.filter(Boolean):[];if(!data.ok){if(hint)hint.textContent=t('llm_scan_error')+(data.error?' ('+data.error+')':'');return}if(models.length){if(modelEl&&!String(modelEl.value||'').trim())modelEl.value=models[0];if(hint)hint.textContent=t('llm_scan_found').replace('{n}',String(models.length))+': '+models.slice(0,3).join(', ');return}if(data.reachable){if(hint)hint.textContent=t('llm_scan_reachable_manual')+(data.error?' ('+data.error+')':'');return}if(hint)hint.textContent=t('llm_scan_empty')+(data.error?' ('+data.error+')':'')}catch(err){if(hint)hint.textContent=t('llm_scan_error')+': '+(err.message||String(err))}}
|
|
38190
40499
|
function collectLlmConfig(){const provider=E('llmProvider')?.value||'ollama';const config={provider:provider};if(provider==='ollama'){config.ollama_url=(E('llmF_ollama_url')?.value||'').trim()||'http://127.0.0.1:11434';config.ollama_model=E('llmF_ollama_model')?.value||''}else if(provider==='custom_http'){const fields=LLM_PROVIDER_FIELDS.custom_http;for(const f of fields){const el=E('llmF_'+f.key);if(!el)continue;if(f.type==='textarea'){config[f.key]=el.value.trim()}else if(f.key==='temperature'){const v=parseFloat(el.value);if(!isNaN(v))config[f.key]=v}else if(f.key==='request_timeout'){const v=parseInt(el.value,10);if(!isNaN(v)&&v>0)config[f.key]=v}else{config[f.key]=el.value.trim()}}}else{const fields=LLM_PROVIDER_FIELDS[provider]||[];for(const f of fields){const el=E('llmF_'+f.key);if(el){const raw=el.value.trim();config[f.key]=(provider!=='custom_http'&&f.type==='url')?(raw||String(f.placeholder||'').trim()):raw}}}config.thinking_stream=E('llmF_thinking_stream')?.value==='true';return config}
|
|
38191
|
-
async function submitLlmConfig(){if(!S.activeId){showError(t('select_session_first'));return}const config=collectLlmConfig();try{const payload={filename:'LLM.config.json',mime:'application/json',content_b64:btoa(unescape(encodeURIComponent(JSON.stringify(config,null,2))))};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'))}else{showError('')}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true});E('llmConfigModal').style.display='none'}catch(err){showError(err.message||String(err))}}
|
|
40500
|
+
async function submitLlmConfig(){if(!S.activeId){showError(t('select_session_first'));return}const config=collectLlmConfig();try{const payload={filename:'LLM.config.json',mime:'application/json',content_b64:btoa(unescape(encodeURIComponent(JSON.stringify(config,null,2))))};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});const note=String(out?.note||out?.model_catalog?.note||'').trim();if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'))}else if(note){showError(note)}else{showError('')}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true});E('llmConfigModal').style.display='none'}catch(err){showError(err.message||String(err))}}
|
|
38192
40501
|
function openLlmConfigModal(){const modal=E('llmConfigModal');if(!modal)return;modal.style.display='flex';const prov=E('llmProvider');if(prov){renderLlmFields(prov.value)}}
|
|
38193
40502
|
const COMPACT_AUTO_REFRESH_COUNT=3;
|
|
38194
40503
|
const COMPACT_AUTO_REFRESH_INTERVAL_MS=260;
|
|
@@ -38761,7 +41070,7 @@ function feedSignature(snap){const feed=Array.isArray(snap?.conversation_feed)?s
|
|
|
38761
41070
|
function boardsSignature(snap){return [snap?.running?1:0,snap?.agent_phase||'',Number(snap?.agent_round_index||0),Number(snap?.queued_user_inputs_count||0),Number(snap?.truncation_count||0),Number(snap?.live_truncation_attempts||0),Number(snap?.live_truncation_tokens||0),snap?.live_truncation_active?1:0,Number(snap?.context_tokens_estimate||0),Number(snap?.context_left_tokens||0),Number(snap?.context_left_percent||0),Number(snap?.render_bridge?.seq||0),(snap?.todos||[]).length,(snap?.tasks||[]).length,(snap?.activity||[]).length,(snap?.operations||[]).length,(snap?.uploads||[]).length].join('|')}
|
|
38762
41071
|
function sessionsSignature(list){const rows=Array.isArray(list)?list:[];const sig=tailSig(rows,6,row=>`${String(row?.id||'')}:${row?.running?1:0}:${Number(row?.message_count||0)}:${Number(row?.updated_at||0)}`);const aid=String(S.activeId||'').trim();let activeSig='-';if(aid){const activeRow=rows.find(row=>String(row?.id||'')===aid);if(activeRow){activeSig=`${aid}:${activeRow?.running?1:0}:${Number(activeRow?.message_count||0)}:${Number(activeRow?.updated_at||0)}`}else{activeSig=`missing:${aid}`}}return `${rows.length}|active=${activeSig}|${sig}`}
|
|
38763
41072
|
function _statInfinite(n){const v=Number(n);return(Number.isFinite(v)&&v>0)?String(v):'∞'}
|
|
38764
|
-
function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
|
|
41073
|
+
function applyRuntimeConfigStats(cfg){if(!cfg||typeof cfg!=='object')return;S.config=S.config||{};if(cfg.scheduler&&typeof cfg.scheduler==='object')S.config.scheduler=cfg.scheduler;if(cfg.session_creation_limit&&typeof cfg.session_creation_limit==='object')S.config.session_creation_limit=cfg.session_creation_limit;if(Object.prototype.hasOwnProperty.call(cfg,'daily_session_limit'))S.config.daily_session_limit=cfg.daily_session_limit;if(Object.prototype.hasOwnProperty.call(cfg,'download_js_lib_enabled'))S.config.download_js_lib_enabled=!!cfg.download_js_lib_enabled;if(Object.prototype.hasOwnProperty.call(cfg,'request_timeout_default'))S.config.request_timeout_default=cfg.request_timeout_default;if(Object.prototype.hasOwnProperty.call(cfg,'run_timeout'))S.config.run_timeout=cfg.run_timeout;if(Object.prototype.hasOwnProperty.call(cfg,'shell_command_timeout_seconds'))S.config.shell_command_timeout_seconds=cfg.shell_command_timeout_seconds;if(Object.prototype.hasOwnProperty.call(cfg,'model')&&String(cfg.model||'').trim())S.config.model=cfg.model}
|
|
38765
41074
|
function renderStats(){const sessions=S.sessions.length;const running=S.sessions.filter(x=>x.running).length;const msgs=S.sessions.reduce((n,x)=>n+x.message_count,0);const model=S.config?.model||'-';const sched=(S.config&&typeof S.config.scheduler==='object')?S.config.scheduler:{};const quota=(S.config&&typeof S.config.session_creation_limit==='object')?S.config.session_creation_limit:{};const runningTotal=Math.max(0,Number(sched?.running_total||0));const maxTasks=Number(sched?.max_user||0);const globalTasks=`${runningTotal}/${_statInfinite(maxTasks)}`;const dailySessions=(quota&"a.enabled)?`${Math.max(0,Number(quota.used||0))}/${Math.max(0,Number(quota.limit||0))}`:'∞';const compact=[[t('stat_sessions'),sessions],[t('stat_running'),running],[t('stat_messages'),msgs],[t('stat_global_tasks'),globalTasks],[t('stat_daily_sessions'),dailySessions]].map(([k,v])=>`<div class=\"stat compact\"><div class=\"k\">${esc(k)}</div><div class=\"v\">${esc(v)}</div></div>`).join('');const modelHtml=`<div class=\"stat model\"><div class=\"k\">${esc(t('stat_model'))}</div><div class=\"v\">${esc(model)}</div></div>`;E('topStats').innerHTML=`<div class=\"top-stats-primary\">${compact}</div><div class=\"top-stats-model\">${modelHtml}</div>`}
|
|
38766
41075
|
function renderSessions(){const html=S.sessions.map(s=>`<div class=\"session-item${s.id===S.activeId?' active':''}\" data-id=\"${esc(s.id)}\"><div><strong>${esc(s.title)}</strong></div><div class=\"mono\">${s.running?t('running'):t('idle')} · ${s.message_count} msgs</div></div>`).join('');setPanelHtml('sessionList',html||`<div class=\"mono\">${esc(t('no_sessions'))}</div>`);for(const el of document.querySelectorAll('#sessionList .session-item')){el.onclick=()=>selectSession(el.getAttribute('data-id'))}}
|
|
38767
41076
|
function _syncActiveSessionSummaryFromSnapshot(){const sid=String(S.activeId||'').trim();const snap=S.snap;if(!sid||!snap)return false;const rows=Array.isArray(S.sessions)?S.sessions.slice():[];let idx=rows.findIndex(row=>String(row?.id||'')===sid);const running=!!snap?.running;let updatedAt=Number(snap?.updated_at||0);if(!Number.isFinite(updatedAt)||updatedAt<=0){updatedAt=(Date.now()/1000)}let msgCount=Number(snap?.message_count);if(!Number.isFinite(msgCount)||msgCount<0){const arr=Array.isArray(snap?.messages)?snap.messages:[];let cnt=0;for(const row of arr){if(String(row?.role||'').trim()==='tool')continue;cnt+=1}msgCount=cnt}msgCount=Math.max(0,Math.floor(Number(msgCount)||0));const title=String(snap?.title||'').trim();if(idx<0){rows.push({id:sid,title:title||sid,running:running,updated_at:updatedAt,message_count:msgCount});idx=rows.length-1}else{const cur=rows[idx]||{};const next={...cur};let changed=false;if(!!cur.running!==running){next.running=running;changed=true}if(Number(cur.message_count||0)!==msgCount){next.message_count=msgCount;changed=true}if(Number(cur.updated_at||0)!==updatedAt){next.updated_at=updatedAt;changed=true}if(title&&String(cur.title||'')!==title){next.title=title;changed=true}if(!changed)return false;rows[idx]=next}rows.sort((a,b)=>Number(b?.updated_at||0)-Number(a?.updated_at||0));S.sessions=rows;return true}
|
|
@@ -40222,8 +42531,7 @@ function _chatVirtBuildMessageNode(m){
|
|
|
40222
42531
|
const pillsHtml=pills.map(x=>`<span class=\"manager-delegate-pill\">${esc(String(x))}</span>`).join('');
|
|
40223
42532
|
const routeHtml=`<div class=\"manager-delegate-route\"><span class=\"agent-bus-pill manager\">${esc(t('role_manager'))}</span><span class=\"agent-bus-arrow\">→</span><span class=\"agent-bus-pill${targetRole?(' '+targetRole):''}\">${esc(targetLabel)}</span></div>`;
|
|
40224
42533
|
const objectiveHtml=(objective&&instruction&&objective.toLowerCase()===instruction.toLowerCase())?'':(objective?`<div class=\"manager-delegate-line\"><span>${esc(t('event_objective'))}</span><div>${esc(objective)}</div></div>`:'');
|
|
40225
|
-
const
|
|
40226
|
-
const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div class=\"msg-md\">${renderMarkdownCached(instruction,instructionKey)}</div></div>`:'';
|
|
42534
|
+
const instructionHtml=instruction?`<div class=\"manager-delegate-line\"><span>${esc(t('event_instruction'))}</span><div>${esc(instruction)}</div></div>`:'';
|
|
40227
42535
|
d.innerHTML=`${roleBadge}<div class=\"manager-delegate-card\"><div class=\"manager-delegate-head\">${esc(t('event_manager_delegate_title'))}</div>${routeHtml}<div class=\"manager-delegate-pills\">${pillsHtml}</div>${objectiveHtml}${instructionHtml}</div>`;
|
|
40228
42536
|
return d;
|
|
40229
42537
|
}
|
|
@@ -41214,7 +43522,7 @@ async function renameSession(){if(!S.activeId){showError(t('select_session_first
|
|
|
41214
43522
|
async function deleteSession(){if(!S.activeId){showError(t('select_session_first'));return}const deletingId=S.activeId;const ok=confirm(t('delete_confirm'));if(!ok)return;await api('/api/sessions/'+S.activeId,{method:'DELETE'});if(S.previewBySession&&deletingId){delete S.previewBySession[deletingId]}if(S.fileExplorerBySession&&deletingId){delete S.fileExplorerBySession[deletingId]}S.activeId=null;S.snap=null;if(S.es)S.es.close();renderPreviewTabs();renderPreviewVisibility();renderActivePreview(false);await refreshSessions();if(S.sessions.length)await selectSession(S.sessions[0].id)}
|
|
41215
43523
|
async function applyModel(){const sel=E('modelSelect');const btn=E('applyModelBtn');const model=sel?.value||'';if(!model){showError(t('no_model_selected'));return}if(S.staticMode&&S.frozen)resumeAutoUpdates();S.config=S.config||{};const prevModel=String(S.config.model||'');const prevSnapModel=String(S.snap?.model||'');const prevSnapCatalog=(S.snap&&typeof S.snap==='object')?S.snap.llm_model_catalog:undefined;try{S.config.model=model;if(S.snap&&typeof S.snap==='object'){S.snap.model=_modelNameFromSelection(model)||S.snap.model;if(!S.snap.llm_model_catalog||typeof S.snap.llm_model_catalog!=='object')S.snap.llm_model_catalog={};S.snap.llm_model_catalog.selected=model}renderModelControls();renderStats();if(S.snap)renderBoards();if(sel)sel.disabled=true;if(btn)btn.disabled=true;const path=S.activeId?('/api/sessions/'+S.activeId+'/config/model'):'/api/config/model';const changed=await api(path,{method:'POST',body:JSON.stringify({selection:model,model})});if(changed?.note)showError(changed.note);else showError('');if(!applyModelCatalog(changed)){const cat=await loadModelCatalog();if(!applyModelCatalog(cat)){S.config.model=String(changed?.selected||model||'').trim();renderModelControls()}}if(S.snap&&typeof S.snap==='object'){const selected=String(S.config?.model||model||'').trim();const modelName=_modelNameFromSelection(selected);if(modelName)S.snap.model=modelName;if(changed&&typeof changed==='object')S.snap.llm_model_catalog=changed;renderBoards()}scheduleSnapshot({forceFull:true,delayMs:40,allowWhenFrozen:true})}catch(err){S.config.model=prevModel;if(S.snap&&typeof S.snap==='object'){if(prevSnapModel)S.snap.model=prevSnapModel;if(prevSnapCatalog!==undefined)S.snap.llm_model_catalog=prevSnapCatalog;renderBoards()}renderModelControls();renderStats();showError(err.message||String(err))}finally{if(sel)sel.disabled=false;if(btn)btn.disabled=false}}
|
|
41216
43524
|
|
|
41217
|
-
async function uploadLlmConfigFile(file){try{if(!S.activeId){showError(t('select_session_first'));return}if(!file){return}const arr=await file.arrayBuffer();const payload={filename:'LLM.config.json',mime:file.type||'application/json',content_b64:ab2b64(arr)};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'));}else{showError('');const modal=E('llmConfigModal');if(modal)modal.style.display='none'}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true})}catch(err){showError(err.message||String(err))}}
|
|
43525
|
+
async function uploadLlmConfigFile(file){try{if(!S.activeId){showError(t('select_session_first'));return}if(!file){return}const arr=await file.arrayBuffer();const payload={filename:'LLM.config.json',mime:file.type||'application/json',content_b64:ab2b64(arr)};const out=await api('/api/sessions/'+S.activeId+'/uploads',{method:'POST',body:JSON.stringify(payload)});const note=String(out?.note||out?.model_catalog?.note||'').trim();if(!out?.model_catalog){showError(t('config_uploaded_no_profiles'));}else{showError(note||'');const modal=E('llmConfigModal');if(modal)modal.style.display='none'}const cat=out?.model_catalog||await loadModelCatalog();if(!applyModelCatalog(cat)){renderModelControls()}await refreshSnapshot({forceFull:true,allowWhenFrozen:true})}catch(err){showError(err.message||String(err))}}
|
|
41218
43526
|
async function sendMessage(){showError('');const t=E('prompt').value.trim();if(!t||!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();E('prompt').value='';try{await waitForPendingUploads();await api('/api/sessions/'+S.activeId+'/message',{method:'POST',body:JSON.stringify({content:t})});S.lastDeltaTs=Date.now();if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:120,allowWhenFrozen:true})}}catch(err){showError(err.message)}}
|
|
41219
43527
|
async function interruptRun(){if(!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();await api('/api/sessions/'+S.activeId+'/interrupt',{method:'POST'});S.lastDeltaTs=Date.now();if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:140,allowWhenFrozen:true})}}
|
|
41220
43528
|
async function compactNow(){if(!S.activeId)return;if(S.staticMode&&S.frozen)resumeAutoUpdates();await api('/api/sessions/'+S.activeId+'/compact',{method:'POST'});S.lastDeltaTs=Date.now();scheduleCompactRefreshBurst(COMPACT_AUTO_REFRESH_COUNT);if(!S.es||S.es.readyState===2){scheduleSnapshot({forceFull:false,delayMs:180,allowWhenFrozen:true})}}
|
|
@@ -48574,6 +50882,7 @@ class AppContext:
|
|
|
48574
50882
|
context_limit_locked: bool = False,
|
|
48575
50883
|
max_rounds: int = MAX_AGENT_ROUNDS,
|
|
48576
50884
|
max_run_seconds: int = MAX_RUN_SECONDS,
|
|
50885
|
+
shell_command_timeout_seconds: int = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
48577
50886
|
auto_model_switch: bool = False,
|
|
48578
50887
|
arbiter_enabled: bool = True,
|
|
48579
50888
|
arbiter_model: str = "",
|
|
@@ -48594,7 +50903,7 @@ class AppContext:
|
|
|
48594
50903
|
self.base_url = base_url
|
|
48595
50904
|
self.model = model
|
|
48596
50905
|
self.thinking = False
|
|
48597
|
-
self.js_lib_root = offline_js_lib_root(
|
|
50906
|
+
self.js_lib_root = offline_js_lib_root(self.workspace)
|
|
48598
50907
|
self.offline_js_summary: dict = {}
|
|
48599
50908
|
try:
|
|
48600
50909
|
self.offline_js_summary = load_offline_js_lib_index(self.js_lib_root)
|
|
@@ -48617,6 +50926,12 @@ class AppContext:
|
|
|
48617
50926
|
maximum=MAX_RUN_TIMEOUT_SECONDS,
|
|
48618
50927
|
fallback=MAX_RUN_SECONDS,
|
|
48619
50928
|
)
|
|
50929
|
+
self.shell_command_timeout_seconds = normalize_timeout_seconds(
|
|
50930
|
+
shell_command_timeout_seconds if shell_command_timeout_seconds is not None else DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
50931
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
50932
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
50933
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
50934
|
+
)
|
|
48620
50935
|
self.auto_model_switch = bool(auto_model_switch)
|
|
48621
50936
|
self.arbiter_enabled = bool(arbiter_enabled)
|
|
48622
50937
|
self.arbiter_model = str(arbiter_model or "").strip()
|
|
@@ -48785,6 +51100,7 @@ class AppContext:
|
|
|
48785
51100
|
"show_upload_list": bool(getattr(self, "show_upload_list", False)),
|
|
48786
51101
|
"ui_style": normalize_ui_style(getattr(self, "ui_style", DEFAULT_UI_STYLE)),
|
|
48787
51102
|
"js_lib_download_enabled": bool(getattr(self, "js_lib_download_enabled", True)),
|
|
51103
|
+
"shell_command_timeout_seconds": int(getattr(self, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
48788
51104
|
"daily_session_limit_per_ip": int(getattr(self, "daily_session_limit_per_ip", 0) or 0),
|
|
48789
51105
|
"daily_session_reset_hour": int(getattr(self, "daily_session_reset_hour", 8) or 8),
|
|
48790
51106
|
"validation": dict(self.web_ui_validation or {}),
|
|
@@ -49894,6 +52210,14 @@ class AppContext:
|
|
|
49894
52210
|
return started
|
|
49895
52211
|
|
|
49896
52212
|
def _on_session_run_finished(self, user_id: str, session_id: str):
|
|
52213
|
+
try:
|
|
52214
|
+
mgr = self.manager_for_user(user_id)
|
|
52215
|
+
sess = mgr.get(session_id)
|
|
52216
|
+
if sess and bool(getattr(sess, "_deferred_runtime_sync_requested", False)):
|
|
52217
|
+
mgr._sync_from_session(sess, apply_to_all=False)
|
|
52218
|
+
sess._deferred_runtime_sync_requested = False
|
|
52219
|
+
except Exception:
|
|
52220
|
+
pass
|
|
49897
52221
|
if not self.scheduler_limits_enabled():
|
|
49898
52222
|
return
|
|
49899
52223
|
started_rows: list[dict] = []
|
|
@@ -50028,6 +52352,7 @@ class AppContext:
|
|
|
50028
52352
|
self.context_limit_locked,
|
|
50029
52353
|
self.max_rounds,
|
|
50030
52354
|
self.max_run_seconds,
|
|
52355
|
+
self.shell_command_timeout_seconds,
|
|
50031
52356
|
self.auto_model_switch,
|
|
50032
52357
|
self.arbiter_enabled,
|
|
50033
52358
|
self.arbiter_model,
|
|
@@ -51096,6 +53421,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51096
53421
|
"download_js_lib_enabled": bool(getattr(self.app, "js_lib_download_enabled", True)),
|
|
51097
53422
|
"request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
|
|
51098
53423
|
"run_timeout": int(mgr.max_run_seconds),
|
|
53424
|
+
"shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
51099
53425
|
}
|
|
51100
53426
|
)
|
|
51101
53427
|
model_cat = mgr.model_catalog()
|
|
@@ -51142,6 +53468,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51142
53468
|
"context_token_limit": int(mgr.context_token_limit),
|
|
51143
53469
|
"context_limit_locked": bool(mgr.context_limit_locked),
|
|
51144
53470
|
"run_timeout": int(mgr.max_run_seconds),
|
|
53471
|
+
"shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
51145
53472
|
"auto_model_switch": bool(mgr.auto_model_switch),
|
|
51146
53473
|
"execution_mode": normalize_execution_mode(getattr(mgr, "execution_mode", EXECUTION_MODE_SYNC), default=EXECUTION_MODE_SYNC),
|
|
51147
53474
|
"execution_mode_choices": list(EXECUTION_MODE_CHOICES),
|
|
@@ -51194,7 +53521,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51194
53521
|
for hk, hv in probe_headers.items():
|
|
51195
53522
|
if str(hk or "").strip() and str(hv or "").strip():
|
|
51196
53523
|
req.add_header(str(hk), str(hv))
|
|
51197
|
-
with
|
|
53524
|
+
with urlopen(req, timeout=8) as resp:
|
|
51198
53525
|
body_text = resp.read().decode("utf-8", errors="replace")
|
|
51199
53526
|
reachable = True
|
|
51200
53527
|
try:
|
|
@@ -51251,7 +53578,7 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51251
53578
|
for hk, hv in probe_headers.items():
|
|
51252
53579
|
if str(hk or "").strip() and str(hv or "").strip():
|
|
51253
53580
|
base_req.add_header(str(hk), str(hv))
|
|
51254
|
-
with
|
|
53581
|
+
with urlopen(base_req, timeout=8):
|
|
51255
53582
|
pass
|
|
51256
53583
|
reachable = True
|
|
51257
53584
|
except urllib.error.HTTPError as exc:
|
|
@@ -51502,9 +53829,26 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51502
53829
|
if not selection:
|
|
51503
53830
|
return self._send_json({"error": "selection required"}, status=400)
|
|
51504
53831
|
model_override = payload.get("model_override")
|
|
53832
|
+
if bool(getattr(sess, "running", False)):
|
|
53833
|
+
try:
|
|
53834
|
+
sess._queue_deferred_runtime_update(
|
|
53835
|
+
"model_selection",
|
|
53836
|
+
{
|
|
53837
|
+
"selection": selection,
|
|
53838
|
+
"model_override": model_override if isinstance(model_override, str) else "",
|
|
53839
|
+
},
|
|
53840
|
+
)
|
|
53841
|
+
except Exception as exc:
|
|
53842
|
+
return self._send_json({"error": str(exc)}, status=400)
|
|
53843
|
+
queued = sess.model_catalog()
|
|
53844
|
+
queued["queued"] = True
|
|
53845
|
+
queued["note"] = (
|
|
53846
|
+
"session is running; model switch queued and will apply after the current run finishes"
|
|
53847
|
+
)
|
|
53848
|
+
return self._send_json(queued)
|
|
51505
53849
|
try:
|
|
51506
53850
|
out = sess.set_runtime_selection(selection, model_override if isinstance(model_override, str) else None)
|
|
51507
|
-
mgr._sync_from_session(sess, apply_to_all=
|
|
53851
|
+
mgr._sync_from_session(sess, apply_to_all=False)
|
|
51508
53852
|
except Exception as exc:
|
|
51509
53853
|
return self._send_json({"error": str(exc)}, status=400)
|
|
51510
53854
|
return self._send_json(out)
|
|
@@ -51603,9 +53947,9 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51603
53947
|
if len(raw) > 20 * 1024 * 1024:
|
|
51604
53948
|
return self._send_json({"error": "max upload size is 20MB"}, status=413)
|
|
51605
53949
|
meta = sess.add_upload(filename, raw, mime)
|
|
51606
|
-
if isinstance(meta.get("model_catalog"), dict):
|
|
53950
|
+
if isinstance(meta.get("model_catalog"), dict) and not bool(meta.get("model_catalog", {}).get("queued")):
|
|
51607
53951
|
try:
|
|
51608
|
-
mgr._sync_from_session(sess, apply_to_all=
|
|
53952
|
+
mgr._sync_from_session(sess, apply_to_all=False)
|
|
51609
53953
|
except Exception:
|
|
51610
53954
|
pass
|
|
51611
53955
|
return self._send_json(meta, status=201)
|
|
@@ -51699,16 +54043,16 @@ class Handler(BaseHTTPRequestHandler):
|
|
|
51699
54043
|
explicit_complexity = infer_user_complexity_value(
|
|
51700
54044
|
str(body.get("complexity", body.get("task_complexity", "")) or "")
|
|
51701
54045
|
)
|
|
51702
|
-
current_complexity =
|
|
51703
|
-
|
|
51704
|
-
|
|
54046
|
+
current_complexity = normalize_task_complexity(
|
|
54047
|
+
getattr(sess, "runtime_task_complexity", "") or "",
|
|
54048
|
+
default="",
|
|
51705
54049
|
)
|
|
51706
54050
|
if explicit_complexity in TASK_COMPLEXITY_LEVELS:
|
|
51707
|
-
sess.runtime_task_complexity = explicit_complexity
|
|
54051
|
+
sess.runtime_task_complexity = normalize_task_complexity(explicit_complexity, default="")
|
|
51708
54052
|
elif current_complexity in TASK_COMPLEXITY_LEVELS:
|
|
51709
54053
|
sess.runtime_task_complexity = current_complexity
|
|
51710
54054
|
else:
|
|
51711
|
-
sess.runtime_task_complexity =
|
|
54055
|
+
sess.runtime_task_complexity = normalize_task_complexity(policy.get("complexity", "simple"), default="simple")
|
|
51712
54056
|
sess.runtime_scale_preference = "thorough" if level >= 4 else "balanced"
|
|
51713
54057
|
return self._send_json({"task_level": level})
|
|
51714
54058
|
return self._send_json({"error": "not found"}, status=404)
|
|
@@ -51901,6 +54245,7 @@ class SkillsHandler(BaseHTTPRequestHandler):
|
|
|
51901
54245
|
"show_upload_list": bool(getattr(self.app, "show_upload_list", False)),
|
|
51902
54246
|
"web_ui": web_ui_state,
|
|
51903
54247
|
"run_timeout": int(mgr.max_run_seconds),
|
|
54248
|
+
"shell_command_timeout_seconds": int(getattr(mgr, "shell_command_timeout_seconds", DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS) or DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS),
|
|
51904
54249
|
"request_timeout_default": int(DEFAULT_REQUEST_TIMEOUT),
|
|
51905
54250
|
}
|
|
51906
54251
|
)
|
|
@@ -52332,6 +54677,25 @@ def main():
|
|
|
52332
54677
|
f"(minimum {MIN_RUN_TIMEOUT_SECONDS}, model-active time excluded)"
|
|
52333
54678
|
),
|
|
52334
54679
|
)
|
|
54680
|
+
parser.add_argument(
|
|
54681
|
+
"--shell_command_timeout",
|
|
54682
|
+
"--shell-command-timeout",
|
|
54683
|
+
"--bash_timeout",
|
|
54684
|
+
"--bash-timeout",
|
|
54685
|
+
"--command_timeout",
|
|
54686
|
+
"--command-timeout",
|
|
54687
|
+
dest="shell_command_timeout",
|
|
54688
|
+
default=None,
|
|
54689
|
+
type=int,
|
|
54690
|
+
help=(
|
|
54691
|
+
"Per-command shell/bash timeout in seconds "
|
|
54692
|
+
f"(default {DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS}; allowed "
|
|
54693
|
+
f"{MIN_SHELL_COMMAND_TIMEOUT_SECONDS}-{MAX_SHELL_COMMAND_TIMEOUT_SECONDS}). "
|
|
54694
|
+
"Independent from the global run timeout. Also configurable via --config keys "
|
|
54695
|
+
"shell_command_timeout / shell_timeout / bash_timeout / command_timeout and env "
|
|
54696
|
+
"AGENT_SHELL_COMMAND_TIMEOUT / AGENT_BASH_TIMEOUT / AGENT_COMMAND_TIMEOUT."
|
|
54697
|
+
),
|
|
54698
|
+
)
|
|
52335
54699
|
parser.add_argument(
|
|
52336
54700
|
"--live_input_delay_write",
|
|
52337
54701
|
default=LIVE_INPUT_DELAY_WRITE_ROUNDS,
|
|
@@ -52481,9 +54845,10 @@ def main():
|
|
|
52481
54845
|
default="",
|
|
52482
54846
|
help=(
|
|
52483
54847
|
"LLM config source (URL or local file path). "
|
|
52484
|
-
"Also reads startup keys like show_upload_list, download_js_lib and "
|
|
54848
|
+
"Also reads startup keys like show_upload_list, download_js_lib, shell_command_timeout and "
|
|
52485
54849
|
"daily_session_limit (aliases: daily_sessions_per_ip / "
|
|
52486
|
-
"max_daily_sessions_per_ip / session_daily_limit
|
|
54850
|
+
"max_daily_sessions_per_ip / session_daily_limit; shell aliases: "
|
|
54851
|
+
"shell_timeout / bash_timeout / command_timeout)."
|
|
52487
54852
|
),
|
|
52488
54853
|
)
|
|
52489
54854
|
parser.add_argument(
|
|
@@ -52618,6 +54983,7 @@ def main():
|
|
|
52618
54983
|
arbiter_enabled=True,
|
|
52619
54984
|
show_upload_list=None,
|
|
52620
54985
|
download_js_lib=None,
|
|
54986
|
+
shell_command_timeout=None,
|
|
52621
54987
|
)
|
|
52622
54988
|
args = parser.parse_args()
|
|
52623
54989
|
ctx_limit_locked = any(str(arg).split("=", 1)[0] == "--ctx_limit" for arg in sys.argv[1:])
|
|
@@ -52647,6 +55013,7 @@ def main():
|
|
|
52647
55013
|
)
|
|
52648
55014
|
resolved_show_upload_list = False
|
|
52649
55015
|
resolved_daily_session_limit_per_ip = 0
|
|
55016
|
+
resolved_shell_command_timeout = DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS
|
|
52650
55017
|
external_config: dict = {}
|
|
52651
55018
|
external_config_source = ""
|
|
52652
55019
|
bootstrap_base_url = args.ollama_base_url
|
|
@@ -52673,6 +55040,14 @@ def main():
|
|
|
52673
55040
|
external_daily_session_limit = extract_daily_session_limit_setting(external_config)
|
|
52674
55041
|
if external_daily_session_limit is not None:
|
|
52675
55042
|
resolved_daily_session_limit_per_ip = int(external_daily_session_limit)
|
|
55043
|
+
external_shell_command_timeout = extract_shell_command_timeout_setting(external_config)
|
|
55044
|
+
if external_shell_command_timeout is not None:
|
|
55045
|
+
resolved_shell_command_timeout = normalize_timeout_seconds(
|
|
55046
|
+
external_shell_command_timeout,
|
|
55047
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55048
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55049
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55050
|
+
)
|
|
52676
55051
|
print(f"[web-agent] external config loaded: {external_config_source}")
|
|
52677
55052
|
except Exception as exc:
|
|
52678
55053
|
print(f"[web-agent] invalid --config: {exc}")
|
|
@@ -52686,9 +55061,25 @@ def main():
|
|
|
52686
55061
|
web_ui_daily_session_limit = extract_daily_session_limit_setting(web_ui_config)
|
|
52687
55062
|
if web_ui_daily_session_limit is not None:
|
|
52688
55063
|
resolved_daily_session_limit_per_ip = int(web_ui_daily_session_limit)
|
|
55064
|
+
web_ui_shell_command_timeout = extract_shell_command_timeout_setting(web_ui_config)
|
|
55065
|
+
if web_ui_shell_command_timeout is not None:
|
|
55066
|
+
resolved_shell_command_timeout = normalize_timeout_seconds(
|
|
55067
|
+
web_ui_shell_command_timeout,
|
|
55068
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55069
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55070
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55071
|
+
)
|
|
52689
55072
|
cli_daily_session_limit = getattr(args, "daily_session_limit_per_ip", None)
|
|
52690
55073
|
if cli_daily_session_limit is not None:
|
|
52691
55074
|
resolved_daily_session_limit_per_ip = max(0, int(cli_daily_session_limit or 0))
|
|
55075
|
+
cli_shell_command_timeout = getattr(args, "shell_command_timeout", None)
|
|
55076
|
+
if cli_shell_command_timeout is not None:
|
|
55077
|
+
resolved_shell_command_timeout = normalize_timeout_seconds(
|
|
55078
|
+
cli_shell_command_timeout,
|
|
55079
|
+
minimum=MIN_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55080
|
+
maximum=MAX_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55081
|
+
fallback=DEFAULT_SHELL_COMMAND_TIMEOUT_SECONDS,
|
|
55082
|
+
)
|
|
52692
55083
|
raw_ui_style = str(getattr(args, "ui_style", "") or "").strip()
|
|
52693
55084
|
if not raw_ui_style:
|
|
52694
55085
|
raw_ui_style = str(extract_ui_style_setting(external_config) or "").strip()
|
|
@@ -52743,6 +55134,7 @@ def main():
|
|
|
52743
55134
|
f"[web-agent] run_timeout adjusted {requested_run_timeout}->{resolved_run_timeout} "
|
|
52744
55135
|
f"(allowed range {MIN_RUN_TIMEOUT_SECONDS}-{MAX_RUN_TIMEOUT_SECONDS})"
|
|
52745
55136
|
)
|
|
55137
|
+
print(f"[web-agent] shell_command_timeout={int(resolved_shell_command_timeout)}s")
|
|
52746
55138
|
requested_live_input_delay_write = int(args.live_input_delay_write if args.live_input_delay_write is not None else LIVE_INPUT_DELAY_WRITE_ROUNDS)
|
|
52747
55139
|
resolved_live_input_delay_write = max(0, min(20, requested_live_input_delay_write))
|
|
52748
55140
|
if resolved_live_input_delay_write != requested_live_input_delay_write:
|
|
@@ -52925,6 +55317,7 @@ def main():
|
|
|
52925
55317
|
ctx_limit_locked,
|
|
52926
55318
|
resolved_max_rounds,
|
|
52927
55319
|
resolved_run_timeout,
|
|
55320
|
+
resolved_shell_command_timeout,
|
|
52928
55321
|
resolved_auto_model_switch,
|
|
52929
55322
|
resolved_arbiter_enabled,
|
|
52930
55323
|
resolved_arbiter_model,
|