@pushpalsdev/cli 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +291 -44
- package/package.json +1 -1
- package/runtime/configs/backend.toml +1 -1
- package/runtime/configs/default.toml +1 -1
- package/runtime/sandbox/apps/workerpals/.python-version +1 -0
- package/runtime/sandbox/apps/workerpals/Dockerfile.sandbox +71 -0
- package/runtime/sandbox/apps/workerpals/package.json +25 -0
- package/runtime/sandbox/apps/workerpals/pyproject.toml +8 -0
- package/runtime/sandbox/apps/workerpals/src/backends/backend_config.ts +119 -0
- package/runtime/sandbox/apps/workerpals/src/backends/miniswe/miniswe_executor.py +2029 -0
- package/runtime/sandbox/apps/workerpals/src/backends/miniswe_backend.ts +48 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +1259 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +110 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex_backend.ts +67 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openhands/openhands_executor.py +563 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openhands_backend.ts +161 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openhands_task_execute.ts +536 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +746 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/test_settings_resolver.py +60 -0
- package/runtime/sandbox/apps/workerpals/src/backends/task_execute_registry.ts +21 -0
- package/runtime/sandbox/apps/workerpals/src/backends/types.ts +52 -0
- package/runtime/sandbox/apps/workerpals/src/common/execution_utils.ts +149 -0
- package/runtime/sandbox/apps/workerpals/src/common/executor_backend.ts +15 -0
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +210 -0
- package/runtime/sandbox/apps/workerpals/src/common/logger.ts +65 -0
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +9 -0
- package/runtime/sandbox/apps/workerpals/src/common/worktree_cleanup.ts +66 -0
- package/runtime/sandbox/apps/workerpals/src/context_manager.ts +45 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +1842 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +3063 -0
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +194 -0
- package/runtime/sandbox/apps/workerpals/src/shell_manager.ts +210 -0
- package/runtime/sandbox/apps/workerpals/src/timeout_policy.ts +24 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +1436 -0
- package/runtime/sandbox/apps/workerpals/tsconfig.json +15 -0
- package/runtime/sandbox/apps/workerpals/uv.lock +2014 -0
- package/runtime/sandbox/bun.lock +2591 -0
- package/runtime/sandbox/configs/backend.toml +79 -0
- package/runtime/sandbox/configs/default.toml +260 -0
- package/runtime/sandbox/configs/dev.toml +2 -0
- package/runtime/sandbox/configs/local.example.toml +129 -0
- package/runtime/sandbox/package.json +65 -0
- package/runtime/sandbox/packages/protocol/README.md +168 -0
- package/runtime/sandbox/packages/protocol/package.json +37 -0
- package/runtime/sandbox/packages/protocol/scripts/copy-schemas.js +17 -0
- package/runtime/sandbox/packages/protocol/src/a2a/README.md +52 -0
- package/runtime/sandbox/packages/protocol/src/a2a/mapping.ts +55 -0
- package/runtime/sandbox/packages/protocol/src/index.browser.ts +25 -0
- package/runtime/sandbox/packages/protocol/src/index.ts +25 -0
- package/runtime/sandbox/packages/protocol/src/schemas/approvals.schema.json +6 -0
- package/runtime/sandbox/packages/protocol/src/schemas/envelope.schema.json +96 -0
- package/runtime/sandbox/packages/protocol/src/schemas/events.schema.json +679 -0
- package/runtime/sandbox/packages/protocol/src/schemas/http.schema.json +50 -0
- package/runtime/sandbox/packages/protocol/src/types.ts +267 -0
- package/runtime/sandbox/packages/protocol/src/validate.browser.ts +154 -0
- package/runtime/sandbox/packages/protocol/src/validate.ts +233 -0
- package/runtime/sandbox/packages/protocol/src/version.ts +1 -0
- package/runtime/sandbox/packages/protocol/tsconfig.json +20 -0
- package/runtime/sandbox/packages/shared/package.json +19 -0
- package/runtime/sandbox/packages/shared/src/autonomy_policy.ts +400 -0
- package/runtime/sandbox/packages/shared/src/client_preflight.ts +286 -0
- package/runtime/sandbox/packages/shared/src/communication.ts +313 -0
- package/runtime/sandbox/packages/shared/src/config.ts +2180 -0
- package/runtime/sandbox/packages/shared/src/config_template_parity.ts +70 -0
- package/runtime/sandbox/packages/shared/src/git_backend.ts +205 -0
- package/runtime/sandbox/packages/shared/src/index.ts +101 -0
- package/runtime/sandbox/packages/shared/src/local_network.ts +101 -0
- package/runtime/sandbox/packages/shared/src/localbuddy_runtime.ts +314 -0
- package/runtime/sandbox/packages/shared/src/prompts.ts +64 -0
- package/runtime/sandbox/packages/shared/src/repo.ts +134 -0
- package/runtime/sandbox/packages/shared/src/session_event_visibility.ts +25 -0
- package/runtime/sandbox/packages/shared/src/vision.ts +247 -0
- package/runtime/sandbox/packages/shared/tsconfig.json +16 -0
- package/runtime/sandbox/prompts/workerpals/codex_quality_critic_instruction_prompt.md +14 -0
- package/runtime/sandbox/prompts/workerpals/commit_message_prompt.md +36 -0
- package/runtime/sandbox/prompts/workerpals/commit_message_user_prompt.md +7 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_broker_system_prompt.md +33 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_broker_task_prompt.md +5 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_completion_requirement.md +1 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_context_compaction_retry_prompt.md +1 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_explicit_targets_block.md +2 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_recovery_guidance_base.md +4 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_recovery_guidance_blocker_line.md +1 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_strict_tool_use_guidance.md +6 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_supplemental_guidance_section.md +2 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_timeout_note.md +1 -0
- package/runtime/sandbox/prompts/workerpals/miniswe_toolcall_retry_guidance.md +1 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_default_system_prompt.md +4 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_instruction_wrapper.md +5 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_runtime_policy_appendix.md +5 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_supplemental_guidance_section.md +2 -0
- package/runtime/sandbox/prompts/workerpals/openai_codex_task_execute_system_prompt.md +12 -0
- package/runtime/sandbox/prompts/workerpals/openhands_minimal_security_policy.j2 +8 -0
- package/runtime/sandbox/prompts/workerpals/openhands_minimal_system_prompt.j2 +20 -0
- package/runtime/sandbox/prompts/workerpals/openhands_strict_tool_use_message.md +1 -0
- package/runtime/sandbox/prompts/workerpals/openhands_supplemental_guidance_message.md +2 -0
- package/runtime/sandbox/prompts/workerpals/openhands_task_execute_fallback_system_prompt.md +1 -0
- package/runtime/sandbox/prompts/workerpals/openhands_task_execute_system_prompt.md +21 -0
- package/runtime/sandbox/prompts/workerpals/openhands_task_user_prompt.md +6 -0
- package/runtime/sandbox/prompts/workerpals/openhands_timeout_note.md +1 -0
- package/runtime/sandbox/prompts/workerpals/pr_description.md +42 -0
- package/runtime/sandbox/prompts/workerpals/task_quality_critic_system_prompt.md +9 -0
- package/runtime/sandbox/prompts/workerpals/task_quality_critic_user_prompt.md +17 -0
- package/runtime/sandbox/prompts/workerpals/workerpals_system_prompt.md +115 -0
- package/runtime/sandbox/protocol/schemas/approvals.schema.json +6 -0
- package/runtime/sandbox/protocol/schemas/envelope.schema.json +96 -0
- package/runtime/sandbox/protocol/schemas/events.schema.json +679 -0
- package/runtime/sandbox/protocol/schemas/http.schema.json +50 -0
|
@@ -0,0 +1,2029 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
PushPals -> mini-swe-agent worker wrapper.
|
|
4
|
+
|
|
5
|
+
This script receives a base64-encoded JSON payload from the TS worker,
|
|
6
|
+
executes the requested task through the mini-swe-agent Python SDK, and prints
|
|
7
|
+
one structured result line:
|
|
8
|
+
|
|
9
|
+
__PUSHPALS_OH_RESULT__ {"ok":true,...}
|
|
10
|
+
|
|
11
|
+
The sentinel prefix is intentionally the same as the OpenHands wrapper so that
|
|
12
|
+
the TypeScript host can parse results with a single code path.
|
|
13
|
+
|
|
14
|
+
Production hardening:
|
|
15
|
+
- Detect the common failure mode where the model never emits tool calls
|
|
16
|
+
("No tool calls found", etc.) and retry once with a strict tool-usage hint.
|
|
17
|
+
- If the model still cannot tool-call, return a structured failure that makes
|
|
18
|
+
the root cause obvious to the TS layer (so you can alert / route / fallback).
|
|
19
|
+
|
|
20
|
+
Tool-broker shim:
|
|
21
|
+
- If mini-swe-agent fails because the model doesn't tool-call, fall back to a
|
|
22
|
+
"tool broker" loop that does NOT require native tool/function calling.
|
|
23
|
+
- The broker asks the model to emit a strict JSON "plan of actions" (file ops + safe shell),
|
|
24
|
+
executes them locally, and feeds observations back to the model for a few steps.
|
|
25
|
+
- Broker can be forced on/off with WORKERPALS_MINISWE_TOOL_BROKER=1/0.
|
|
26
|
+
If unset, local endpoints (LM Studio/Ollama-style) default to ON.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import ast
|
|
33
|
+
import os
|
|
34
|
+
import re
|
|
35
|
+
import shlex
|
|
36
|
+
import sys
|
|
37
|
+
import time
|
|
38
|
+
import traceback
|
|
39
|
+
import fnmatch
|
|
40
|
+
from dataclasses import dataclass
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
43
|
+
from urllib.error import HTTPError, URLError
|
|
44
|
+
from urllib.request import Request, urlopen
|
|
45
|
+
|
|
46
|
+
# Shared executor infrastructure lives in src/backends/shared.
|
|
47
|
+
_SHARED_DIR = Path(__file__).resolve().parents[1] / "shared"
|
|
48
|
+
if str(_SHARED_DIR) not in sys.path:
|
|
49
|
+
sys.path.insert(0, str(_SHARED_DIR))
|
|
50
|
+
|
|
51
|
+
from executor_base import (
|
|
52
|
+
Logger,
|
|
53
|
+
config_get,
|
|
54
|
+
emit,
|
|
55
|
+
is_no_tool_calls_error,
|
|
56
|
+
log_agent_messages,
|
|
57
|
+
log_git_status,
|
|
58
|
+
looks_local_base_url,
|
|
59
|
+
parse_task_execute_payload,
|
|
60
|
+
resolve_llm_config,
|
|
61
|
+
setting_int,
|
|
62
|
+
setting_str,
|
|
63
|
+
summarize_git_changes,
|
|
64
|
+
to_int,
|
|
65
|
+
to_single_line,
|
|
66
|
+
DEFAULT_TOOLCALL_RETRY_MAX,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# ─── Constants ───────────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
DEFAULT_MINISWE_MODEL = "local-model"
|
|
72
|
+
LOG_PREFIX = "[MiniSweExecutor]"
|
|
73
|
+
log = Logger(LOG_PREFIX)
|
|
74
|
+
|
|
75
|
+
# Tool broker defaults (conservative)
|
|
76
|
+
# Keep explicit default off, but auto-enable when using a local endpoint.
|
|
77
|
+
_BROKER_ENABLED_DEFAULT = "0"
|
|
78
|
+
_BROKER_MAX_STEPS_DEFAULT = 8
|
|
79
|
+
_BROKER_MAX_ACTIONS_PER_STEP_DEFAULT = 10
|
|
80
|
+
_BROKER_HTTP_TIMEOUT_SEC_DEFAULT = 90
|
|
81
|
+
_BROKER_HTTP_TIMEOUT_SEC_LOCAL_DEFAULT = 120
|
|
82
|
+
_BROKER_HTTP_RETRY_MAX_DEFAULT = 1
|
|
83
|
+
_BROKER_HTTP_RETRY_MAX_LOCAL_DEFAULT = 2
|
|
84
|
+
_BROKER_RUN_RETRY_MAX_DEFAULT = 1
|
|
85
|
+
_BROKER_TEMPERATURE = 0.0
|
|
86
|
+
_BROKER_SHELL_TIMEOUT_SEC_DEFAULT = 120
|
|
87
|
+
_BROKER_OBSERVATION_MAX_CHARS = 4_000
|
|
88
|
+
_BROKER_READ_PREVIEW_CHARS = 800
|
|
89
|
+
PROMPT_TOKEN_REGEX = re.compile(r"\{\{\s*([a-zA-Z0-9_]+)\s*\}\}")
|
|
90
|
+
_PROMPT_TEMPLATE_CACHE: Dict[str, str] = {}
|
|
91
|
+
|
|
92
|
+
# Safety: very simple denylist for shell commands (can be adjusted)
|
|
93
|
+
_DENY_PATTERNS = [
|
|
94
|
+
r"\bsudo\b",
|
|
95
|
+
r"\brm\b\s+-rf\b",
|
|
96
|
+
r"\bmkfs\b",
|
|
97
|
+
r"\bdd\b",
|
|
98
|
+
r"\bshutdown\b",
|
|
99
|
+
r"\breboot\b",
|
|
100
|
+
r"\bpoweroff\b",
|
|
101
|
+
r"\bcurl\b",
|
|
102
|
+
r"\bwget\b",
|
|
103
|
+
r"\bnc\b",
|
|
104
|
+
r"\bnetcat\b",
|
|
105
|
+
r"\bssh\b",
|
|
106
|
+
r"\bscp\b",
|
|
107
|
+
r"\brsync\b",
|
|
108
|
+
r"\bpython\b\s+-m\s+http\.server\b",
|
|
109
|
+
]
|
|
110
|
+
_ALLOWED_BINARIES = {
|
|
111
|
+
"git",
|
|
112
|
+
"bun",
|
|
113
|
+
"npm",
|
|
114
|
+
"cat",
|
|
115
|
+
"tail",
|
|
116
|
+
"head",
|
|
117
|
+
"ls",
|
|
118
|
+
"find",
|
|
119
|
+
"rg",
|
|
120
|
+
"grep",
|
|
121
|
+
"sed",
|
|
122
|
+
"awk",
|
|
123
|
+
"wc",
|
|
124
|
+
"stat",
|
|
125
|
+
"printf",
|
|
126
|
+
"echo",
|
|
127
|
+
"test",
|
|
128
|
+
}
|
|
129
|
+
_ALLOWED_GIT_SUBCOMMANDS = {
|
|
130
|
+
"status",
|
|
131
|
+
"diff",
|
|
132
|
+
"show",
|
|
133
|
+
"log",
|
|
134
|
+
"grep",
|
|
135
|
+
"rev-parse",
|
|
136
|
+
"ls-files",
|
|
137
|
+
}
|
|
138
|
+
_ALLOWED_PACKAGE_RUNNERS = {
|
|
139
|
+
"bun": {"test", "run", "--version", "-v"},
|
|
140
|
+
"npm": {"test", "run", "--version", "-v"},
|
|
141
|
+
}
|
|
142
|
+
_SHELL_META_CHARS = set(";|&$`()<>")
|
|
143
|
+
_BROKER_MAX_WRITE_CHARS = 200_000
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ─── Mini-swe-specific config ───────────────────────────────────────────────
|
|
147
|
+
|
|
148
|
+
def _execution_timeout_ms() -> int:
|
|
149
|
+
raw = setting_str("WORKERPALS_MINISWE_TIMEOUT_MS", "workerpals.miniswe_timeout_ms", "")
|
|
150
|
+
default_ms = 1800000
|
|
151
|
+
if not raw:
|
|
152
|
+
return default_ms
|
|
153
|
+
try:
|
|
154
|
+
parsed = int(raw)
|
|
155
|
+
except Exception:
|
|
156
|
+
return default_ms
|
|
157
|
+
return max(10000, parsed)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _toolcall_retry_max() -> int:
|
|
161
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOLCALL_RETRY_MAX") or "").strip()
|
|
162
|
+
if raw:
|
|
163
|
+
return max(0, min(3, to_int(raw, DEFAULT_TOOLCALL_RETRY_MAX)))
|
|
164
|
+
cfg = config_get("workerpals.miniswe_toolcall_retry_max", None)
|
|
165
|
+
if cfg is None:
|
|
166
|
+
return DEFAULT_TOOLCALL_RETRY_MAX
|
|
167
|
+
return max(0, min(3, to_int(cfg, DEFAULT_TOOLCALL_RETRY_MAX)))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _parse_boolish(raw: Any) -> Optional[bool]:
|
|
171
|
+
if raw is None:
|
|
172
|
+
return None
|
|
173
|
+
text = str(raw).strip().lower()
|
|
174
|
+
if not text:
|
|
175
|
+
return None
|
|
176
|
+
if text in {"1", "true", "yes", "on"}:
|
|
177
|
+
return True
|
|
178
|
+
if text in {"0", "false", "no", "off"}:
|
|
179
|
+
return False
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _tool_broker_enabled(base_url: str = "") -> bool:
|
|
184
|
+
env_setting = _parse_boolish(os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER"))
|
|
185
|
+
if env_setting is not None:
|
|
186
|
+
return env_setting
|
|
187
|
+
cfg_setting = _parse_boolish(config_get("workerpals.miniswe_tool_broker", None))
|
|
188
|
+
if cfg_setting is not None:
|
|
189
|
+
return cfg_setting
|
|
190
|
+
if looks_local_base_url(base_url):
|
|
191
|
+
return True
|
|
192
|
+
return _parse_boolish(_BROKER_ENABLED_DEFAULT) is True
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _tool_broker_max_steps() -> int:
|
|
196
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_MAX_STEPS") or "").strip()
|
|
197
|
+
if raw:
|
|
198
|
+
return max(1, min(30, to_int(raw, _BROKER_MAX_STEPS_DEFAULT)))
|
|
199
|
+
return _BROKER_MAX_STEPS_DEFAULT
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _tool_broker_max_actions_per_step() -> int:
|
|
203
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_MAX_ACTIONS_PER_STEP") or "").strip()
|
|
204
|
+
if raw:
|
|
205
|
+
return max(1, min(50, to_int(raw, _BROKER_MAX_ACTIONS_PER_STEP_DEFAULT)))
|
|
206
|
+
return _BROKER_MAX_ACTIONS_PER_STEP_DEFAULT
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _tool_broker_shell_timeout_sec() -> int:
|
|
210
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_SHELL_TIMEOUT_SEC") or "").strip()
|
|
211
|
+
if raw:
|
|
212
|
+
return max(5, min(600, to_int(raw, _BROKER_SHELL_TIMEOUT_SEC_DEFAULT)))
|
|
213
|
+
return _BROKER_SHELL_TIMEOUT_SEC_DEFAULT
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _tool_broker_http_timeout_sec(base_url: str = "") -> int:
|
|
217
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_HTTP_TIMEOUT_SEC") or "").strip()
|
|
218
|
+
if raw:
|
|
219
|
+
return max(10, min(600, to_int(raw, _BROKER_HTTP_TIMEOUT_SEC_DEFAULT)))
|
|
220
|
+
cfg = config_get("workerpals.miniswe_tool_broker_http_timeout_sec", None)
|
|
221
|
+
if cfg is not None:
|
|
222
|
+
return max(10, min(600, to_int(cfg, _BROKER_HTTP_TIMEOUT_SEC_DEFAULT)))
|
|
223
|
+
if looks_local_base_url(base_url):
|
|
224
|
+
return _BROKER_HTTP_TIMEOUT_SEC_LOCAL_DEFAULT
|
|
225
|
+
return _BROKER_HTTP_TIMEOUT_SEC_DEFAULT
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _tool_broker_http_retry_max(base_url: str = "") -> int:
|
|
229
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_HTTP_RETRY_MAX") or "").strip()
|
|
230
|
+
if raw:
|
|
231
|
+
return max(0, min(3, to_int(raw, _BROKER_HTTP_RETRY_MAX_DEFAULT)))
|
|
232
|
+
cfg = config_get("workerpals.miniswe_tool_broker_http_retry_max", None)
|
|
233
|
+
if cfg is not None:
|
|
234
|
+
return max(0, min(3, to_int(cfg, _BROKER_HTTP_RETRY_MAX_DEFAULT)))
|
|
235
|
+
if looks_local_base_url(base_url):
|
|
236
|
+
return _BROKER_HTTP_RETRY_MAX_LOCAL_DEFAULT
|
|
237
|
+
return _BROKER_HTTP_RETRY_MAX_DEFAULT
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _tool_broker_run_retry_max() -> int:
|
|
241
|
+
raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_RUN_RETRY_MAX") or "").strip()
|
|
242
|
+
if raw:
|
|
243
|
+
return max(0, min(3, to_int(raw, _BROKER_RUN_RETRY_MAX_DEFAULT)))
|
|
244
|
+
cfg = config_get("workerpals.miniswe_tool_broker_run_retry_max", None)
|
|
245
|
+
if cfg is None:
|
|
246
|
+
return _BROKER_RUN_RETRY_MAX_DEFAULT
|
|
247
|
+
return max(0, min(3, to_int(cfg, _BROKER_RUN_RETRY_MAX_DEFAULT)))
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _build_strict_tool_use_guidance(repo: str) -> str:
|
|
251
|
+
return _load_prompt_template("workerpals/miniswe_strict_tool_use_guidance.md", {"repo": repo})
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ─── Tool Broker Shim ────────────────────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
def _messages_indicate_missing_tool_calls(messages: Any) -> bool:
|
|
257
|
+
if not isinstance(messages, list) or not messages:
|
|
258
|
+
return False
|
|
259
|
+
saw_tool_call = False
|
|
260
|
+
no_tool_call_prompts = 0
|
|
261
|
+
for msg in messages:
|
|
262
|
+
if not isinstance(msg, dict):
|
|
263
|
+
continue
|
|
264
|
+
tool_calls = msg.get("tool_calls")
|
|
265
|
+
if isinstance(tool_calls, list) and tool_calls:
|
|
266
|
+
saw_tool_call = True
|
|
267
|
+
role = str(msg.get("role") or "").strip().lower()
|
|
268
|
+
content = str(msg.get("content") or "").strip().lower()
|
|
269
|
+
if role == "user" and "no tool calls found" in content:
|
|
270
|
+
no_tool_call_prompts += 1
|
|
271
|
+
return (not saw_tool_call) and no_tool_call_prompts > 0
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _is_timeout_like_error_text(value: object) -> bool:
|
|
275
|
+
text = str(value or "").strip().lower()
|
|
276
|
+
return "timeout" in text or "timed out" in text
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _is_broker_timeout_failure(result: object) -> bool:
|
|
280
|
+
if not isinstance(result, dict):
|
|
281
|
+
return False
|
|
282
|
+
if bool(result.get("ok")):
|
|
283
|
+
return False
|
|
284
|
+
summary = str(result.get("summary") or "").lower()
|
|
285
|
+
stderr = str(result.get("stderr") or "")
|
|
286
|
+
return (
|
|
287
|
+
"tool broker failed: llm request error" in summary
|
|
288
|
+
and _is_timeout_like_error_text(stderr)
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _is_broker_incomplete_failure(result: object) -> bool:
|
|
293
|
+
if not isinstance(result, dict):
|
|
294
|
+
return False
|
|
295
|
+
if bool(result.get("ok")):
|
|
296
|
+
return False
|
|
297
|
+
summary = str(result.get("summary") or "").lower()
|
|
298
|
+
return "tool broker failed: did not reach done=true before limits" in summary
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@dataclass
|
|
302
|
+
class _LLMConfig:
|
|
303
|
+
model: str
|
|
304
|
+
api_key: str
|
|
305
|
+
base_url: str
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _normalize_openai_base_url(base_url: str) -> str:
|
|
309
|
+
"""
|
|
310
|
+
Accept:
|
|
311
|
+
- http://host:1234
|
|
312
|
+
- http://host:1234/
|
|
313
|
+
- http://host:1234/v1
|
|
314
|
+
- http://host:1234/v1/
|
|
315
|
+
Return a base that ends with /v1
|
|
316
|
+
"""
|
|
317
|
+
b = (base_url or "").strip()
|
|
318
|
+
if not b:
|
|
319
|
+
return ""
|
|
320
|
+
b = b.rstrip("/")
|
|
321
|
+
if b.endswith("/v1"):
|
|
322
|
+
return b
|
|
323
|
+
if b.endswith("v1"):
|
|
324
|
+
# e.g. ".../v1" already covered, but keep safe
|
|
325
|
+
return b
|
|
326
|
+
return b + "/v1"
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _http_post_json(url: str, payload: Dict[str, Any], api_key: str, timeout_sec: float) -> Dict[str, Any]:
|
|
330
|
+
data = json.dumps(payload).encode("utf-8")
|
|
331
|
+
headers = {"Content-Type": "application/json"}
|
|
332
|
+
if api_key:
|
|
333
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
334
|
+
req = Request(url, data=data, headers=headers, method="POST")
|
|
335
|
+
try:
|
|
336
|
+
with urlopen(req, timeout=timeout_sec) as resp:
|
|
337
|
+
raw = resp.read().decode("utf-8", errors="replace")
|
|
338
|
+
return json.loads(raw)
|
|
339
|
+
except HTTPError as e:
|
|
340
|
+
try:
|
|
341
|
+
details = e.read().decode("utf-8", errors="replace")
|
|
342
|
+
except Exception:
|
|
343
|
+
details = ""
|
|
344
|
+
raise RuntimeError(f"HTTP {e.code} {e.reason} for POST {url}\n{details}") from e
|
|
345
|
+
except URLError as e:
|
|
346
|
+
raise RuntimeError(f"URLError for POST {url}: {e}") from e
|
|
347
|
+
except TimeoutError as e:
|
|
348
|
+
raise RuntimeError(f"TimeoutError for POST {url}: timed out after {timeout_sec}s") from e
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _chat_completion(cfg: _LLMConfig, messages: List[Dict[str, str]], timeout_sec: int) -> str:
|
|
352
|
+
base = _normalize_openai_base_url(cfg.base_url)
|
|
353
|
+
if not base:
|
|
354
|
+
raise RuntimeError("No base_url configured for broker shim (WORKERPALS_LLM_ENDPOINT/BASE_URL).")
|
|
355
|
+
url = base + "/chat/completions"
|
|
356
|
+
payload: Dict[str, Any] = {
|
|
357
|
+
"model": cfg.model,
|
|
358
|
+
"messages": messages,
|
|
359
|
+
"temperature": _BROKER_TEMPERATURE,
|
|
360
|
+
"stream": False,
|
|
361
|
+
}
|
|
362
|
+
obj = _http_post_json(url, payload, cfg.api_key, timeout_sec=float(timeout_sec))
|
|
363
|
+
choices = obj.get("choices") or []
|
|
364
|
+
if not choices:
|
|
365
|
+
raise RuntimeError(f"LLM returned no choices: {to_single_line(obj, 400)}")
|
|
366
|
+
msg = choices[0].get("message") or {}
|
|
367
|
+
content = msg.get("content")
|
|
368
|
+
if not isinstance(content, str):
|
|
369
|
+
raise RuntimeError(f"LLM returned non-text content: {to_single_line(obj, 400)}")
|
|
370
|
+
return content.strip()
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _repo_safe_path(repo: str, rel_path: str) -> Path:
|
|
374
|
+
rel = str(rel_path or "")
|
|
375
|
+
if not rel.strip():
|
|
376
|
+
raise RuntimeError("Path is required")
|
|
377
|
+
if "\x00" in rel:
|
|
378
|
+
raise RuntimeError("Path contains NUL byte")
|
|
379
|
+
root = Path(repo).resolve()
|
|
380
|
+
# Accept absolute paths only when they are contained inside the assigned repo root.
|
|
381
|
+
if Path(rel).is_absolute() or re.match(r"^[A-Za-z]:[\\/]", rel):
|
|
382
|
+
p = Path(rel).resolve()
|
|
383
|
+
else:
|
|
384
|
+
p = (root / rel).resolve()
|
|
385
|
+
# Ensure p is within root
|
|
386
|
+
if root == p or root in p.parents:
|
|
387
|
+
return p
|
|
388
|
+
raise RuntimeError(f"Refusing to access path outside repo: {rel}")
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def _normalize_concrete_repo_path(repo: str, path_value: str) -> Optional[str]:
|
|
392
|
+
"""
|
|
393
|
+
Normalize a concrete file path (possibly absolute) to repo-relative POSIX form.
|
|
394
|
+
Returns None when the path cannot be normalized safely.
|
|
395
|
+
"""
|
|
396
|
+
if not isinstance(path_value, str):
|
|
397
|
+
return None
|
|
398
|
+
raw = path_value.strip()
|
|
399
|
+
if not raw:
|
|
400
|
+
return None
|
|
401
|
+
try:
|
|
402
|
+
root = Path(repo).resolve()
|
|
403
|
+
p = _repo_safe_path(repo, raw)
|
|
404
|
+
rel = p.relative_to(root).as_posix().strip()
|
|
405
|
+
if not rel:
|
|
406
|
+
return "."
|
|
407
|
+
return rel
|
|
408
|
+
except Exception:
|
|
409
|
+
return None
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _normalize_scope_rel_path(value: Any) -> Optional[str]:
|
|
413
|
+
if not isinstance(value, str):
|
|
414
|
+
return None
|
|
415
|
+
raw = value.strip().replace("\\", "/")
|
|
416
|
+
if not raw:
|
|
417
|
+
return None
|
|
418
|
+
while raw.startswith("./"):
|
|
419
|
+
raw = raw[2:]
|
|
420
|
+
raw = raw.rstrip("/")
|
|
421
|
+
if not raw or raw.startswith("/"):
|
|
422
|
+
return None
|
|
423
|
+
if re.match(r"^[A-Za-z]:[\\/]", raw):
|
|
424
|
+
return None
|
|
425
|
+
segments = []
|
|
426
|
+
for segment in raw.split("/"):
|
|
427
|
+
seg = segment.strip()
|
|
428
|
+
if not seg or seg == ".":
|
|
429
|
+
continue
|
|
430
|
+
if seg == "..":
|
|
431
|
+
return None
|
|
432
|
+
segments.append(seg)
|
|
433
|
+
if not segments:
|
|
434
|
+
return None
|
|
435
|
+
return "/".join(segments)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _extract_write_globs_from_payload(payload: Optional[Dict[str, Any]]) -> List[str]:
|
|
439
|
+
if not isinstance(payload, dict):
|
|
440
|
+
return []
|
|
441
|
+
params = payload.get("params")
|
|
442
|
+
if not isinstance(params, dict):
|
|
443
|
+
return []
|
|
444
|
+
planning = params.get("planning")
|
|
445
|
+
if not isinstance(planning, dict):
|
|
446
|
+
return []
|
|
447
|
+
scope = planning.get("scope")
|
|
448
|
+
if not isinstance(scope, dict):
|
|
449
|
+
return []
|
|
450
|
+
write_globs_raw = scope.get("writeGlobs")
|
|
451
|
+
if not isinstance(write_globs_raw, list):
|
|
452
|
+
return []
|
|
453
|
+
out: List[str] = []
|
|
454
|
+
seen = set()
|
|
455
|
+
for item in write_globs_raw:
|
|
456
|
+
normalized = _normalize_scope_rel_path(item)
|
|
457
|
+
if not normalized:
|
|
458
|
+
continue
|
|
459
|
+
if normalized in seen:
|
|
460
|
+
continue
|
|
461
|
+
seen.add(normalized)
|
|
462
|
+
out.append(normalized)
|
|
463
|
+
return out
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _assert_write_allowed(repo: str, path: str, write_globs: Optional[List[str]]) -> None:
|
|
467
|
+
if not write_globs:
|
|
468
|
+
return
|
|
469
|
+
normalized = _normalize_concrete_repo_path(repo, path)
|
|
470
|
+
if not normalized:
|
|
471
|
+
raise RuntimeError(f"Invalid write path for scope enforcement: {path!r}")
|
|
472
|
+
for glob in write_globs:
|
|
473
|
+
pattern = str(glob or "").strip()
|
|
474
|
+
if not pattern:
|
|
475
|
+
continue
|
|
476
|
+
if any(ch in pattern for ch in "*?[]"):
|
|
477
|
+
if fnmatch.fnmatchcase(normalized, pattern):
|
|
478
|
+
return
|
|
479
|
+
continue
|
|
480
|
+
if normalized == pattern or normalized.startswith(pattern + "/"):
|
|
481
|
+
return
|
|
482
|
+
raise RuntimeError(
|
|
483
|
+
"Scope violation: attempted write outside writeGlobs. "
|
|
484
|
+
f"path={normalized!r} write_globs={write_globs!r}"
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _read_text_file(repo: str, path: str, max_chars: int = 60000) -> str:
|
|
489
|
+
p = _repo_safe_path(repo, path)
|
|
490
|
+
if not p.exists():
|
|
491
|
+
raise RuntimeError(f"File not found: {path}")
|
|
492
|
+
data = p.read_text(encoding="utf-8", errors="replace")
|
|
493
|
+
if len(data) > max_chars:
|
|
494
|
+
return data[:max_chars] + "\n... (truncated)"
|
|
495
|
+
return data
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _write_text_file(repo: str, path: str, content: str, write_globs: Optional[List[str]] = None) -> None:
|
|
499
|
+
_assert_write_allowed(repo, path, write_globs)
|
|
500
|
+
p = _repo_safe_path(repo, path)
|
|
501
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
502
|
+
p.write_text(content, encoding="utf-8")
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _append_line(repo: str, path: str, line: str, write_globs: Optional[List[str]] = None) -> None:
|
|
506
|
+
"""
|
|
507
|
+
Append a single line to end of file using append mode (no full-file rewrite).
|
|
508
|
+
If the file exists and does not end with newline, add one first.
|
|
509
|
+
"""
|
|
510
|
+
_assert_write_allowed(repo, path, write_globs)
|
|
511
|
+
p = _repo_safe_path(repo, path)
|
|
512
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
513
|
+
needs_prefix_newline = False
|
|
514
|
+
if p.exists() and p.stat().st_size > 0:
|
|
515
|
+
try:
|
|
516
|
+
with open(p, "rb") as rf:
|
|
517
|
+
rf.seek(-1, os.SEEK_END)
|
|
518
|
+
needs_prefix_newline = rf.read(1) != b"\n"
|
|
519
|
+
except Exception:
|
|
520
|
+
needs_prefix_newline = False
|
|
521
|
+
with open(p, "a", encoding="utf-8") as wf:
|
|
522
|
+
if needs_prefix_newline:
|
|
523
|
+
wf.write("\n")
|
|
524
|
+
wf.write(f"{line}\n")
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _replace_text_once(
|
|
528
|
+
repo: str,
|
|
529
|
+
path: str,
|
|
530
|
+
old: str,
|
|
531
|
+
new: str,
|
|
532
|
+
write_globs: Optional[List[str]] = None,
|
|
533
|
+
) -> int:
|
|
534
|
+
_assert_write_allowed(repo, path, write_globs)
|
|
535
|
+
p = _repo_safe_path(repo, path)
|
|
536
|
+
data = p.read_text(encoding="utf-8", errors="replace")
|
|
537
|
+
idx = data.find(old)
|
|
538
|
+
if idx < 0:
|
|
539
|
+
return 0
|
|
540
|
+
updated = data[:idx] + new + data[idx + len(old):]
|
|
541
|
+
p.write_text(updated, encoding="utf-8")
|
|
542
|
+
return 1
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _parse_and_validate_shell_command(cmd: str) -> Tuple[Optional[List[str]], str]:
|
|
546
|
+
c = (cmd or "").strip()
|
|
547
|
+
if not c:
|
|
548
|
+
return None, "empty command"
|
|
549
|
+
if any(ord(ch) < 32 for ch in c):
|
|
550
|
+
return None, "control characters are not allowed"
|
|
551
|
+
if any(ch in c for ch in _SHELL_META_CHARS):
|
|
552
|
+
return None, "shell metacharacters are not allowed"
|
|
553
|
+
try:
|
|
554
|
+
args = shlex.split(c, posix=True)
|
|
555
|
+
except Exception as exc:
|
|
556
|
+
return None, f"failed to parse command: {exc}"
|
|
557
|
+
if not args:
|
|
558
|
+
return None, "empty parsed command"
|
|
559
|
+
binary = args[0].strip().lower()
|
|
560
|
+
if binary not in _ALLOWED_BINARIES:
|
|
561
|
+
return None, f"binary not allowed: {binary}"
|
|
562
|
+
if binary in _ALLOWED_PACKAGE_RUNNERS:
|
|
563
|
+
if len(args) < 2:
|
|
564
|
+
return None, f"{binary} command requires a subcommand"
|
|
565
|
+
sub = args[1].strip().lower()
|
|
566
|
+
if sub not in _ALLOWED_PACKAGE_RUNNERS[binary]:
|
|
567
|
+
return None, f"{binary} subcommand not allowed: {sub}"
|
|
568
|
+
# Allow script runner only for repo scripts with a simple token.
|
|
569
|
+
if sub == "run":
|
|
570
|
+
if len(args) < 3:
|
|
571
|
+
return None, f"{binary} run requires a script name"
|
|
572
|
+
script_name = str(args[2] or "").strip().lower()
|
|
573
|
+
if not re.match(r"^[a-z0-9:_\-.]+$", script_name):
|
|
574
|
+
return None, f"{binary} run script token is not allowed: {script_name!r}"
|
|
575
|
+
lowered = c.lower()
|
|
576
|
+
for pat in _DENY_PATTERNS:
|
|
577
|
+
if re.search(pat, lowered):
|
|
578
|
+
return None, f"blocked by denylist: {pat}"
|
|
579
|
+
# Additional guardrails for risky allowlisted binaries.
|
|
580
|
+
if binary == "find":
|
|
581
|
+
joined = " ".join(args[1:]).lower()
|
|
582
|
+
if "-exec" in joined or "-delete" in joined:
|
|
583
|
+
return None, "find with -exec/-delete is not allowed"
|
|
584
|
+
if binary == "git" and len(args) >= 2:
|
|
585
|
+
sub = args[1].strip().lower()
|
|
586
|
+
if sub not in _ALLOWED_GIT_SUBCOMMANDS:
|
|
587
|
+
return None, f"git subcommand not allowed: {sub}"
|
|
588
|
+
for raw_arg in args[2:]:
|
|
589
|
+
arg = str(raw_arg or "").strip()
|
|
590
|
+
if not arg:
|
|
591
|
+
continue
|
|
592
|
+
lower_arg = arg.lower()
|
|
593
|
+
if lower_arg in {"-c", "-C"}:
|
|
594
|
+
return None, f"git option is not allowed: {arg}"
|
|
595
|
+
if lower_arg.startswith("-c"):
|
|
596
|
+
return None, f"git option prefix is not allowed: {arg}"
|
|
597
|
+
if lower_arg.startswith("--git-dir") or lower_arg.startswith("--work-tree"):
|
|
598
|
+
return None, f"git path/work-tree override is not allowed: {arg}"
|
|
599
|
+
if lower_arg == "--no-index":
|
|
600
|
+
return None, "git diff --no-index is not allowed"
|
|
601
|
+
if arg.startswith("/") or re.match(r"^[A-Za-z]:[\\/]", arg):
|
|
602
|
+
return None, f"absolute path-like git arg is not allowed: {arg}"
|
|
603
|
+
normalized = arg.replace("\\", "/")
|
|
604
|
+
while normalized.startswith("./"):
|
|
605
|
+
normalized = normalized[2:]
|
|
606
|
+
if (
|
|
607
|
+
normalized == ".."
|
|
608
|
+
or normalized.startswith("../")
|
|
609
|
+
or "/../" in normalized
|
|
610
|
+
):
|
|
611
|
+
return None, f"path escape git arg is not allowed: {arg}"
|
|
612
|
+
if binary == "git" and len(args) < 2:
|
|
613
|
+
return None, "git command requires an explicit allowed subcommand"
|
|
614
|
+
if binary == "sed":
|
|
615
|
+
for raw_arg in args[1:]:
|
|
616
|
+
arg = str(raw_arg or "").strip().lower()
|
|
617
|
+
if not arg:
|
|
618
|
+
continue
|
|
619
|
+
if arg == "-i" or arg.startswith("-i") or arg.startswith("--in-place"):
|
|
620
|
+
return None, "sed in-place edits are not allowed"
|
|
621
|
+
if binary == "awk":
|
|
622
|
+
joined = " ".join(args[1:]).lower()
|
|
623
|
+
if "system(" in joined:
|
|
624
|
+
return None, "awk system() is not allowed"
|
|
625
|
+
return args, ""
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _attempt_salvage_rejected_shell_command(cmd: str, error_text: str) -> Optional[str]:
|
|
629
|
+
"""
|
|
630
|
+
Best-effort salvage for common model command issues:
|
|
631
|
+
- strip piped/redirection suffixes from an otherwise valid command
|
|
632
|
+
- map common npm test/run invocations to bun equivalents for this repo
|
|
633
|
+
"""
|
|
634
|
+
raw = str(cmd or "").strip()
|
|
635
|
+
if not raw:
|
|
636
|
+
return None
|
|
637
|
+
lowered_err = str(error_text or "").lower()
|
|
638
|
+
|
|
639
|
+
# If metacharacters were rejected, keep the prefix command before first metachar.
|
|
640
|
+
if "metacharacters" in lowered_err:
|
|
641
|
+
candidate = re.split(r"[;|&$`()<>]", raw, maxsplit=1)[0].strip()
|
|
642
|
+
if candidate and candidate != raw:
|
|
643
|
+
args, _ = _parse_and_validate_shell_command(candidate)
|
|
644
|
+
if args is not None:
|
|
645
|
+
return candidate
|
|
646
|
+
|
|
647
|
+
# If npm was rejected (or missing), try equivalent bun command.
|
|
648
|
+
if "binary not allowed: npm" in lowered_err or "no such file or directory" in lowered_err:
|
|
649
|
+
parts = raw.split()
|
|
650
|
+
if len(parts) >= 2 and parts[0].lower() == "npm":
|
|
651
|
+
sub = parts[1].lower()
|
|
652
|
+
if sub in {"test", "run"}:
|
|
653
|
+
candidate = "bun " + " ".join(parts[1:])
|
|
654
|
+
args, _ = _parse_and_validate_shell_command(candidate)
|
|
655
|
+
if args is not None:
|
|
656
|
+
return candidate
|
|
657
|
+
|
|
658
|
+
return None
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _run_shell(repo: str, cmd: str, max_output: int = 60000, timeout_sec: Optional[int] = None) -> str:
|
|
662
|
+
"""
|
|
663
|
+
Run a tokenized command in repo without shell expansion/chaining.
|
|
664
|
+
Blocks unsafe commands with binary allowlist + additional guardrails.
|
|
665
|
+
"""
|
|
666
|
+
args, reason = _parse_and_validate_shell_command(cmd)
|
|
667
|
+
if args is None:
|
|
668
|
+
raise RuntimeError(f"Shell command rejected: {reason}. cmd={cmd!r}")
|
|
669
|
+
|
|
670
|
+
import subprocess
|
|
671
|
+
|
|
672
|
+
proc = subprocess.run(
|
|
673
|
+
args,
|
|
674
|
+
cwd=str(Path(repo).resolve()),
|
|
675
|
+
capture_output=True,
|
|
676
|
+
text=True,
|
|
677
|
+
check=False,
|
|
678
|
+
timeout=(timeout_sec if timeout_sec is not None else _tool_broker_shell_timeout_sec()),
|
|
679
|
+
)
|
|
680
|
+
out = (proc.stdout or "") + (("\n" + proc.stderr) if proc.stderr else "")
|
|
681
|
+
out = out.strip()
|
|
682
|
+
if len(out) > max_output:
|
|
683
|
+
out = out[:max_output] + "\n... (truncated)"
|
|
684
|
+
return f"(exit={proc.returncode})\n{out}" if out else f"(exit={proc.returncode})"
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def _shell_exit_code(output: str) -> Optional[int]:
|
|
688
|
+
m = re.match(r"^\(exit=(\d+)\)", str(output or "").strip())
|
|
689
|
+
if not m:
|
|
690
|
+
return None
|
|
691
|
+
try:
|
|
692
|
+
return int(m.group(1))
|
|
693
|
+
except Exception:
|
|
694
|
+
return None
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _extract_first_json_object(text: str) -> Optional[Dict[str, Any]]:
|
|
698
|
+
"""
|
|
699
|
+
Tries to find and parse a single JSON object from the model response.
|
|
700
|
+
Accepts plain JSON, or JSON inside Markdown fences.
|
|
701
|
+
"""
|
|
702
|
+
if not text:
|
|
703
|
+
return None
|
|
704
|
+
# Strip ```json fences
|
|
705
|
+
fenced = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
|
|
706
|
+
candidate = fenced.group(1).strip() if fenced else text.strip()
|
|
707
|
+
|
|
708
|
+
# Fast path: whole string is JSON
|
|
709
|
+
try:
|
|
710
|
+
obj = json.loads(candidate)
|
|
711
|
+
return obj if isinstance(obj, dict) else None
|
|
712
|
+
except Exception:
|
|
713
|
+
pass
|
|
714
|
+
|
|
715
|
+
def _normalize_common_json_typos(src: str) -> str:
|
|
716
|
+
"""
|
|
717
|
+
Repair a few common, low-risk key/value separator typos in model output.
|
|
718
|
+
|
|
719
|
+
Example:
|
|
720
|
+
{"line','foo"} -> {"line":'foo"}
|
|
721
|
+
{"line","foo"} -> {"line":"foo"}
|
|
722
|
+
"""
|
|
723
|
+
fixed = src
|
|
724
|
+
# Key was opened with double-quote but closed with single-quote before comma.
|
|
725
|
+
fixed = re.sub(
|
|
726
|
+
r'([{\s,])"([A-Za-z_][A-Za-z0-9_]*)\'\s*,\s*(["\'])',
|
|
727
|
+
r'\1"\2": \3',
|
|
728
|
+
fixed,
|
|
729
|
+
)
|
|
730
|
+
# Key is correctly quoted but comma used instead of colon.
|
|
731
|
+
fixed = re.sub(
|
|
732
|
+
r'([{\s,])"([A-Za-z_][A-Za-z0-9_]*)"\s*,\s*(["\'])',
|
|
733
|
+
r'\1"\2": \3',
|
|
734
|
+
fixed,
|
|
735
|
+
)
|
|
736
|
+
return fixed
|
|
737
|
+
|
|
738
|
+
def _try_relaxed_json_parse(src: str) -> Optional[Dict[str, Any]]:
|
|
739
|
+
# Common model drift: single-quoted strings, trailing commas, Python booleans.
|
|
740
|
+
working = src.strip()
|
|
741
|
+
if not working:
|
|
742
|
+
return None
|
|
743
|
+
working = _normalize_common_json_typos(working)
|
|
744
|
+
# Convert single-quoted literals to JSON double-quoted strings when possible.
|
|
745
|
+
working = re.sub(
|
|
746
|
+
r"'([^'\\]*(?:\\.[^'\\]*)*)'",
|
|
747
|
+
lambda m: json.dumps(m.group(1)),
|
|
748
|
+
working,
|
|
749
|
+
)
|
|
750
|
+
# Sometimes malformed model output leaves an extra quote after normalization:
|
|
751
|
+
# "line": "value.""}
|
|
752
|
+
# Repair by dropping the stray quote before delimiters.
|
|
753
|
+
working = re.sub(
|
|
754
|
+
r'("(?:(?:\\.|[^"\\])*)")\s*"\s*([,}\]])',
|
|
755
|
+
r"\1\2",
|
|
756
|
+
working,
|
|
757
|
+
)
|
|
758
|
+
# Remove trailing commas before object/array close.
|
|
759
|
+
working = re.sub(r",(\s*[}\]])", r"\1", working)
|
|
760
|
+
try:
|
|
761
|
+
obj = json.loads(working)
|
|
762
|
+
return obj if isinstance(obj, dict) else None
|
|
763
|
+
except Exception:
|
|
764
|
+
pass
|
|
765
|
+
# As a final fallback, parse Python-literal style payloads.
|
|
766
|
+
py_working = (
|
|
767
|
+
working.replace(": true", ": True")
|
|
768
|
+
.replace(": false", ": False")
|
|
769
|
+
.replace(": null", ": None")
|
|
770
|
+
)
|
|
771
|
+
py_working = re.sub(r"\btrue\b", "True", py_working)
|
|
772
|
+
py_working = re.sub(r"\bfalse\b", "False", py_working)
|
|
773
|
+
py_working = re.sub(r"\bnull\b", "None", py_working)
|
|
774
|
+
try:
|
|
775
|
+
parsed = ast.literal_eval(py_working)
|
|
776
|
+
if isinstance(parsed, dict):
|
|
777
|
+
return parsed
|
|
778
|
+
except Exception:
|
|
779
|
+
return None
|
|
780
|
+
return None
|
|
781
|
+
|
|
782
|
+
relaxed = _try_relaxed_json_parse(candidate)
|
|
783
|
+
if relaxed is not None:
|
|
784
|
+
return relaxed
|
|
785
|
+
|
|
786
|
+
# Heuristic: find first {...} block
|
|
787
|
+
start = candidate.find("{")
|
|
788
|
+
end = candidate.rfind("}")
|
|
789
|
+
if start >= 0 and end > start:
|
|
790
|
+
snippet = candidate[start : end + 1]
|
|
791
|
+
try:
|
|
792
|
+
obj = json.loads(snippet)
|
|
793
|
+
return obj if isinstance(obj, dict) else None
|
|
794
|
+
except Exception:
|
|
795
|
+
relaxed_snippet = _try_relaxed_json_parse(snippet)
|
|
796
|
+
if relaxed_snippet is not None:
|
|
797
|
+
return relaxed_snippet
|
|
798
|
+
return None
|
|
799
|
+
return None
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
def _truncate_observation(text: str, max_chars: int = _BROKER_OBSERVATION_MAX_CHARS) -> str:
|
|
803
|
+
t = str(text or "").strip()
|
|
804
|
+
if len(t) <= max_chars:
|
|
805
|
+
return t
|
|
806
|
+
head = max_chars // 2
|
|
807
|
+
tail = max_chars - head
|
|
808
|
+
return f"{t[:head]}\n...[observation truncated]...\n{t[-tail:]}"
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def _validate_broker_actions(actions: Any, max_actions: int) -> Tuple[bool, str, List[Dict[str, Any]]]:
|
|
812
|
+
if actions is None:
|
|
813
|
+
return True, "", []
|
|
814
|
+
if not isinstance(actions, list):
|
|
815
|
+
return False, f"Expected actions to be a list, got: {type(actions).__name__}", []
|
|
816
|
+
valid: List[Dict[str, Any]] = []
|
|
817
|
+
for i, act in enumerate(actions[:max_actions], start=1):
|
|
818
|
+
if not isinstance(act, dict):
|
|
819
|
+
return False, f"Action {i}: must be an object", []
|
|
820
|
+
typ = str(act.get("type") or "").strip()
|
|
821
|
+
if not typ:
|
|
822
|
+
return False, f"Action {i}: missing type", []
|
|
823
|
+
if typ == "read_file":
|
|
824
|
+
if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
|
|
825
|
+
return False, f"Action {i} read_file: path is required", []
|
|
826
|
+
valid.append({"type": typ, "path": str(act.get("path")).strip()})
|
|
827
|
+
continue
|
|
828
|
+
if typ in {"append_line", "append_comment"}:
|
|
829
|
+
if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
|
|
830
|
+
return False, f"Action {i} {typ}: path is required", []
|
|
831
|
+
line_value = act.get("line")
|
|
832
|
+
if typ == "append_comment" and not isinstance(line_value, str):
|
|
833
|
+
line_value = act.get("comment")
|
|
834
|
+
if not isinstance(line_value, str):
|
|
835
|
+
return False, f"Action {i} {typ}: line/comment must be a string", []
|
|
836
|
+
valid.append(
|
|
837
|
+
{
|
|
838
|
+
"type": "append_line",
|
|
839
|
+
"path": str(act.get("path")).strip(),
|
|
840
|
+
"line": str(line_value),
|
|
841
|
+
}
|
|
842
|
+
)
|
|
843
|
+
continue
|
|
844
|
+
if typ == "replace_text_once":
|
|
845
|
+
if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
|
|
846
|
+
return False, f"Action {i} replace_text_once: path is required", []
|
|
847
|
+
old = act.get("old")
|
|
848
|
+
new = act.get("new")
|
|
849
|
+
if not isinstance(old, str) or not isinstance(new, str):
|
|
850
|
+
return False, f"Action {i} replace_text_once: old/new must be strings", []
|
|
851
|
+
if not old:
|
|
852
|
+
return False, f"Action {i} replace_text_once: old must be non-empty", []
|
|
853
|
+
valid.append(
|
|
854
|
+
{"type": typ, "path": str(act.get("path")).strip(), "old": old, "new": new}
|
|
855
|
+
)
|
|
856
|
+
continue
|
|
857
|
+
if typ == "write_file":
|
|
858
|
+
if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
|
|
859
|
+
return False, f"Action {i} write_file: path is required", []
|
|
860
|
+
content = act.get("content")
|
|
861
|
+
if not isinstance(content, str):
|
|
862
|
+
return False, f"Action {i} write_file: content must be a string", []
|
|
863
|
+
if len(content) > _BROKER_MAX_WRITE_CHARS:
|
|
864
|
+
return False, f"Action {i} write_file: content too large ({len(content)} chars)", []
|
|
865
|
+
valid.append({"type": typ, "path": str(act.get("path")).strip(), "content": content})
|
|
866
|
+
continue
|
|
867
|
+
if typ == "run_shell":
|
|
868
|
+
cmd = act.get("command")
|
|
869
|
+
if not isinstance(cmd, str) or not cmd.strip():
|
|
870
|
+
return False, f"Action {i} run_shell: command is required", []
|
|
871
|
+
valid.append({"type": typ, "command": cmd.strip()})
|
|
872
|
+
continue
|
|
873
|
+
return False, f"Action {i}: unknown type {typ!r}", []
|
|
874
|
+
return True, "", valid
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
def _extract_expected_target_paths(instruction: str) -> List[str]:
|
|
878
|
+
targets: List[str] = []
|
|
879
|
+
# lightweight heuristic for common file-target asks
|
|
880
|
+
for m in re.finditer(r"\b([A-Za-z0-9._/\-]+(?:\.[A-Za-z0-9._-]+))\b", instruction or ""):
|
|
881
|
+
token = m.group(1).strip()
|
|
882
|
+
if "/" in token or "." in token:
|
|
883
|
+
lower = token.lower()
|
|
884
|
+
if lower in {"true", "false", "none"}:
|
|
885
|
+
continue
|
|
886
|
+
if token not in targets:
|
|
887
|
+
targets.append(token)
|
|
888
|
+
if len(targets) >= 8:
|
|
889
|
+
break
|
|
890
|
+
return targets
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def _extract_append_line_directives(instruction: str) -> List[Tuple[str, str]]:
|
|
894
|
+
"""
|
|
895
|
+
Extract simple deterministic directives like:
|
|
896
|
+
In <path> append the bullet line '<text>'
|
|
897
|
+
In <path> append the comment line "<text>"
|
|
898
|
+
"""
|
|
899
|
+
directives: List[Tuple[str, str]] = []
|
|
900
|
+
seen = set()
|
|
901
|
+
text = str(instruction or "")
|
|
902
|
+
patterns = [
|
|
903
|
+
re.compile(
|
|
904
|
+
r"in\s+([A-Za-z0-9._/\-\\]+)\s+append\s+the\s+(?:bullet|comment|text)?\s*line\s+['\"]([^'\"]+)['\"]",
|
|
905
|
+
flags=re.IGNORECASE,
|
|
906
|
+
),
|
|
907
|
+
re.compile(
|
|
908
|
+
r"append\s+the\s+(?:bullet|comment|text)?\s*line\s+['\"]([^'\"]+)['\"]\s+to\s+([A-Za-z0-9._/\-\\]+)",
|
|
909
|
+
flags=re.IGNORECASE,
|
|
910
|
+
),
|
|
911
|
+
]
|
|
912
|
+
for pattern in patterns:
|
|
913
|
+
for match in pattern.finditer(text):
|
|
914
|
+
if pattern is patterns[0]:
|
|
915
|
+
path = str(match.group(1) or "").strip()
|
|
916
|
+
line = str(match.group(2) or "").strip()
|
|
917
|
+
else:
|
|
918
|
+
line = str(match.group(1) or "").strip()
|
|
919
|
+
path = str(match.group(2) or "").strip()
|
|
920
|
+
if not path or not line:
|
|
921
|
+
continue
|
|
922
|
+
key = (path.replace("\\", "/").lower(), line)
|
|
923
|
+
if key in seen:
|
|
924
|
+
continue
|
|
925
|
+
seen.add(key)
|
|
926
|
+
directives.append((path, line))
|
|
927
|
+
return directives
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
def _extract_explicit_target_paths_from_payload(payload: Optional[Dict[str, Any]]) -> List[str]:
|
|
931
|
+
if not isinstance(payload, dict):
|
|
932
|
+
return []
|
|
933
|
+
out: List[str] = []
|
|
934
|
+
seen = set()
|
|
935
|
+
params = payload.get("params")
|
|
936
|
+
if not isinstance(params, dict):
|
|
937
|
+
return []
|
|
938
|
+
|
|
939
|
+
def add(val: Any) -> None:
|
|
940
|
+
if not isinstance(val, str):
|
|
941
|
+
return
|
|
942
|
+
token = val.strip().replace("\\", "/")
|
|
943
|
+
while token.startswith("./"):
|
|
944
|
+
token = token[2:]
|
|
945
|
+
token = token.rstrip("/")
|
|
946
|
+
if not token or token in seen:
|
|
947
|
+
return
|
|
948
|
+
if token in {".", "/"}:
|
|
949
|
+
return
|
|
950
|
+
seen.add(token)
|
|
951
|
+
out.append(token)
|
|
952
|
+
|
|
953
|
+
add(params.get("targetPath"))
|
|
954
|
+
planning = params.get("planning")
|
|
955
|
+
if isinstance(planning, dict):
|
|
956
|
+
target_paths = planning.get("targetPaths")
|
|
957
|
+
if isinstance(target_paths, list):
|
|
958
|
+
for item in target_paths:
|
|
959
|
+
add(item)
|
|
960
|
+
scope = planning.get("scope")
|
|
961
|
+
if isinstance(scope, dict):
|
|
962
|
+
write_globs = scope.get("writeGlobs")
|
|
963
|
+
if isinstance(write_globs, list):
|
|
964
|
+
for item in write_globs:
|
|
965
|
+
add(item)
|
|
966
|
+
return out
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
def _target_hint_matches_changed_path(target_hint: str, changed_path: str) -> bool:
|
|
970
|
+
target = str(target_hint or "").strip().replace("\\", "/").rstrip("/")
|
|
971
|
+
changed = str(changed_path or "").strip().replace("\\", "/").rstrip("/")
|
|
972
|
+
if not target or not changed:
|
|
973
|
+
return False
|
|
974
|
+
if target in {".", "/"}:
|
|
975
|
+
return True
|
|
976
|
+
if changed == target:
|
|
977
|
+
return True
|
|
978
|
+
if changed.startswith(target + "/"):
|
|
979
|
+
return True
|
|
980
|
+
if any(ch in target for ch in "*?[]"):
|
|
981
|
+
return fnmatch.fnmatchcase(changed, target)
|
|
982
|
+
return False
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _is_git_porcelain_status_command(cmd: str) -> bool:
|
|
986
|
+
args, reason = _parse_and_validate_shell_command(cmd)
|
|
987
|
+
if args is None:
|
|
988
|
+
return False
|
|
989
|
+
if len(args) < 2 or args[0].lower() != "git" or args[1].lower() != "status":
|
|
990
|
+
return False
|
|
991
|
+
return any(a.lower().startswith("--porcelain") for a in args[2:])
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
def _repo_root_for_prompt_loading() -> Path:
|
|
995
|
+
current = Path(__file__).resolve()
|
|
996
|
+
for parent in current.parents:
|
|
997
|
+
if (parent / "prompts").is_dir():
|
|
998
|
+
return parent
|
|
999
|
+
return current.parents[5]
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def _resolve_prompt_file(relative_path: str) -> Path:
|
|
1003
|
+
return _repo_root_for_prompt_loading() / "prompts" / relative_path
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
def _load_prompt_template(
|
|
1007
|
+
relative_path: str, replacements: Optional[Dict[str, str]] = None
|
|
1008
|
+
) -> str:
|
|
1009
|
+
prompt_path = _resolve_prompt_file(relative_path)
|
|
1010
|
+
cache_key = str(prompt_path)
|
|
1011
|
+
template = _PROMPT_TEMPLATE_CACHE.get(cache_key)
|
|
1012
|
+
if template is None:
|
|
1013
|
+
if not prompt_path.exists():
|
|
1014
|
+
raise FileNotFoundError(f"Prompt template not found: {prompt_path}")
|
|
1015
|
+
template = prompt_path.read_text(encoding="utf-8")
|
|
1016
|
+
_PROMPT_TEMPLATE_CACHE[cache_key] = template
|
|
1017
|
+
|
|
1018
|
+
if not replacements:
|
|
1019
|
+
return template
|
|
1020
|
+
|
|
1021
|
+
def _replace(match: re.Match[str]) -> str:
|
|
1022
|
+
key = match.group(1)
|
|
1023
|
+
if key not in replacements:
|
|
1024
|
+
raise KeyError(f"Missing prompt replacement '{{{{{key}}}}}' for {prompt_path}")
|
|
1025
|
+
return replacements[key]
|
|
1026
|
+
|
|
1027
|
+
return PROMPT_TOKEN_REGEX.sub(_replace, template)
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
def _broker_system_prompt(repo: str) -> str:
|
|
1031
|
+
return _load_prompt_template("workerpals/miniswe_broker_system_prompt.md", {"repo": repo})
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
def _broker_run(
|
|
1035
|
+
repo: str,
|
|
1036
|
+
instruction: str,
|
|
1037
|
+
llm: _LLMConfig,
|
|
1038
|
+
timeout_ms: int,
|
|
1039
|
+
explicit_targets: Optional[List[str]] = None,
|
|
1040
|
+
write_globs: Optional[List[str]] = None,
|
|
1041
|
+
) -> Dict[str, Any]:
|
|
1042
|
+
"""
|
|
1043
|
+
Executes a simple plan/act loop where the model emits JSON actions.
|
|
1044
|
+
"""
|
|
1045
|
+
started = time.time()
|
|
1046
|
+
deadline = started + max(5, int(timeout_ms / 1000))
|
|
1047
|
+
|
|
1048
|
+
max_steps = _tool_broker_max_steps()
|
|
1049
|
+
max_actions = _tool_broker_max_actions_per_step()
|
|
1050
|
+
shell_timeout_sec = _tool_broker_shell_timeout_sec()
|
|
1051
|
+
http_timeout_sec = _tool_broker_http_timeout_sec(llm.base_url)
|
|
1052
|
+
http_retry_max = _tool_broker_http_retry_max(llm.base_url)
|
|
1053
|
+
|
|
1054
|
+
transcript: List[str] = []
|
|
1055
|
+
obs: str = ""
|
|
1056
|
+
edits_made = False
|
|
1057
|
+
shell_validation_ran = False
|
|
1058
|
+
explicit_target_set = {str(t).strip() for t in (explicit_targets or []) if str(t).strip()}
|
|
1059
|
+
allowed_write_globs = [g for g in (write_globs or []) if str(g).strip()]
|
|
1060
|
+
expected_targets = sorted(explicit_target_set) if explicit_target_set else _extract_expected_target_paths(instruction)
|
|
1061
|
+
|
|
1062
|
+
explicit_targets_block = ""
|
|
1063
|
+
completion_requirement = ""
|
|
1064
|
+
if expected_targets:
|
|
1065
|
+
targets_block = "\n".join(f"- {target}" for target in expected_targets[:8])
|
|
1066
|
+
explicit_targets_block = (
|
|
1067
|
+
"\n\n"
|
|
1068
|
+
+ _load_prompt_template(
|
|
1069
|
+
"workerpals/miniswe_explicit_targets_block.md",
|
|
1070
|
+
{"targets_block": targets_block},
|
|
1071
|
+
).strip()
|
|
1072
|
+
)
|
|
1073
|
+
completion_requirement = (
|
|
1074
|
+
"\n\n" + _load_prompt_template("workerpals/miniswe_completion_requirement.md").strip()
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
task_prompt = _load_prompt_template(
|
|
1078
|
+
"workerpals/miniswe_broker_task_prompt.md",
|
|
1079
|
+
{
|
|
1080
|
+
"instruction": instruction,
|
|
1081
|
+
"explicit_targets_block": explicit_targets_block,
|
|
1082
|
+
"completion_requirement": completion_requirement,
|
|
1083
|
+
},
|
|
1084
|
+
).strip()
|
|
1085
|
+
messages: List[Dict[str, str]] = [
|
|
1086
|
+
{"role": "system", "content": _broker_system_prompt(repo)},
|
|
1087
|
+
{"role": "user", "content": task_prompt},
|
|
1088
|
+
]
|
|
1089
|
+
|
|
1090
|
+
def _record(line: str) -> None:
|
|
1091
|
+
transcript.append(line)
|
|
1092
|
+
log.debug(line)
|
|
1093
|
+
|
|
1094
|
+
def _remaining_http_timeout_sec() -> int:
|
|
1095
|
+
remaining = int(deadline - time.time())
|
|
1096
|
+
if remaining <= 0:
|
|
1097
|
+
return 10
|
|
1098
|
+
return max(10, min(http_timeout_sec, remaining))
|
|
1099
|
+
|
|
1100
|
+
def _compact_messages_for_timeout_retry() -> bool:
|
|
1101
|
+
nonlocal messages
|
|
1102
|
+
if len(messages) <= 8:
|
|
1103
|
+
return False
|
|
1104
|
+
head: List[Dict[str, str]] = []
|
|
1105
|
+
if messages and isinstance(messages[0], dict):
|
|
1106
|
+
head.append(messages[0])
|
|
1107
|
+
if len(messages) > 1 and isinstance(messages[1], dict):
|
|
1108
|
+
head.append(messages[1])
|
|
1109
|
+
tail = [m for m in messages[-6:] if isinstance(m, dict)]
|
|
1110
|
+
compacted: List[Dict[str, str]] = list(head)
|
|
1111
|
+
compacted.append(
|
|
1112
|
+
{
|
|
1113
|
+
"role": "user",
|
|
1114
|
+
"content": _load_prompt_template(
|
|
1115
|
+
"workerpals/miniswe_context_compaction_retry_prompt.md"
|
|
1116
|
+
).strip(),
|
|
1117
|
+
}
|
|
1118
|
+
)
|
|
1119
|
+
compacted.extend(tail)
|
|
1120
|
+
if len(compacted) >= len(messages):
|
|
1121
|
+
return False
|
|
1122
|
+
messages = compacted
|
|
1123
|
+
return True
|
|
1124
|
+
|
|
1125
|
+
def _broker_llm_call(step_label: str) -> str:
|
|
1126
|
+
attempt = 0
|
|
1127
|
+
while True:
|
|
1128
|
+
attempt += 1
|
|
1129
|
+
timeout_for_call = _remaining_http_timeout_sec()
|
|
1130
|
+
try:
|
|
1131
|
+
return _chat_completion(llm, messages, timeout_sec=timeout_for_call)
|
|
1132
|
+
except Exception as exc:
|
|
1133
|
+
msg = to_single_line(exc, 400)
|
|
1134
|
+
is_timeout = "timeout" in msg.lower() or "timed out" in msg.lower()
|
|
1135
|
+
if (not is_timeout) or attempt > (http_retry_max + 1) or time.time() >= deadline:
|
|
1136
|
+
raise RuntimeError(
|
|
1137
|
+
f"{step_label} failed after {attempt} attempt(s): {msg}"
|
|
1138
|
+
) from exc
|
|
1139
|
+
compacted = False
|
|
1140
|
+
if attempt >= 2:
|
|
1141
|
+
compacted = _compact_messages_for_timeout_retry()
|
|
1142
|
+
_record(
|
|
1143
|
+
f"[Broker] {step_label} timeout; retry {attempt}/{http_retry_max + 1} "
|
|
1144
|
+
f"(timeout={timeout_for_call}s): {msg}"
|
|
1145
|
+
)
|
|
1146
|
+
if compacted:
|
|
1147
|
+
_record(
|
|
1148
|
+
"[Broker] timeout mitigation: compacted broker message context "
|
|
1149
|
+
"before retry to reduce token load."
|
|
1150
|
+
)
|
|
1151
|
+
time.sleep(min(2.0, 0.25 * attempt))
|
|
1152
|
+
|
|
1153
|
+
def _broker_fail(summary: str, stderr: str, exit_code: int = 3) -> Dict[str, Any]:
|
|
1154
|
+
transcript_text = "\n".join(transcript).strip()
|
|
1155
|
+
stdout = f"Tool broker transcript:\n{transcript_text}" if transcript_text else ""
|
|
1156
|
+
return {
|
|
1157
|
+
"ok": False,
|
|
1158
|
+
"summary": summary,
|
|
1159
|
+
"stdout": stdout,
|
|
1160
|
+
"stderr": stderr,
|
|
1161
|
+
"exitCode": exit_code,
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
def _attempt_append_line_timeout_recovery(step: int, error_text: str) -> Optional[Dict[str, Any]]:
|
|
1165
|
+
lowered = str(error_text or "").lower()
|
|
1166
|
+
if "timeout" not in lowered and "timed out" not in lowered:
|
|
1167
|
+
return None
|
|
1168
|
+
directives = _extract_append_line_directives(instruction)
|
|
1169
|
+
if not directives:
|
|
1170
|
+
return None
|
|
1171
|
+
_record(
|
|
1172
|
+
f"[Broker] timeout recovery: attempting deterministic append-line completion from instruction "
|
|
1173
|
+
f"(step={step}, directives={len(directives)})."
|
|
1174
|
+
)
|
|
1175
|
+
|
|
1176
|
+
applied = 0
|
|
1177
|
+
skipped = 0
|
|
1178
|
+
for raw_path, line in directives:
|
|
1179
|
+
normalized = _normalize_concrete_repo_path(repo, raw_path)
|
|
1180
|
+
if not normalized or normalized in {".", "/"}:
|
|
1181
|
+
skipped += 1
|
|
1182
|
+
continue
|
|
1183
|
+
if expected_targets and not any(
|
|
1184
|
+
_target_hint_matches_changed_path(target, normalized) for target in expected_targets
|
|
1185
|
+
):
|
|
1186
|
+
skipped += 1
|
|
1187
|
+
continue
|
|
1188
|
+
try:
|
|
1189
|
+
existing = _read_text_file(repo, normalized, max_chars=500_000)
|
|
1190
|
+
except Exception:
|
|
1191
|
+
existing = ""
|
|
1192
|
+
if any(existing_line.strip() == line.strip() for existing_line in existing.splitlines()):
|
|
1193
|
+
skipped += 1
|
|
1194
|
+
continue
|
|
1195
|
+
try:
|
|
1196
|
+
_append_line(repo, normalized, line, allowed_write_globs)
|
|
1197
|
+
applied += 1
|
|
1198
|
+
except Exception as exc:
|
|
1199
|
+
_record(
|
|
1200
|
+
f"[Broker] timeout recovery: failed to apply append_line for {normalized}: "
|
|
1201
|
+
f"{to_single_line(exc, 240)}"
|
|
1202
|
+
)
|
|
1203
|
+
return None
|
|
1204
|
+
|
|
1205
|
+
changed_paths = summarize_git_changes(repo)
|
|
1206
|
+
changed_set = {str(p).strip().replace("\\", "/") for p in changed_paths}
|
|
1207
|
+
missing_targets = [
|
|
1208
|
+
t
|
|
1209
|
+
for t in expected_targets
|
|
1210
|
+
if t not in {".", "/"} and not any(_target_hint_matches_changed_path(t, c) for c in changed_set)
|
|
1211
|
+
]
|
|
1212
|
+
if missing_targets:
|
|
1213
|
+
_record(
|
|
1214
|
+
"[Broker] timeout recovery incomplete: expected targets still missing changes: "
|
|
1215
|
+
+ ", ".join(missing_targets)
|
|
1216
|
+
)
|
|
1217
|
+
return None
|
|
1218
|
+
if applied == 0 and not changed_paths:
|
|
1219
|
+
return None
|
|
1220
|
+
|
|
1221
|
+
try:
|
|
1222
|
+
final_status = _run_shell(repo, "git status --porcelain", timeout_sec=shell_timeout_sec)
|
|
1223
|
+
except Exception as exc:
|
|
1224
|
+
final_status = f"(git status failed) {to_single_line(exc, 300)}"
|
|
1225
|
+
|
|
1226
|
+
transcript_text = "\n".join(transcript).strip()
|
|
1227
|
+
stdout = ""
|
|
1228
|
+
if transcript_text:
|
|
1229
|
+
stdout += "Tool broker transcript:\n" + transcript_text + "\n\n"
|
|
1230
|
+
stdout += "Deterministic timeout recovery applied append-line directives.\n"
|
|
1231
|
+
stdout += f"Applied directives: {applied}, skipped: {skipped}\n\n"
|
|
1232
|
+
stdout += "Final verification:\n" + final_status
|
|
1233
|
+
return {
|
|
1234
|
+
"ok": True,
|
|
1235
|
+
"summary": "Executed task via tool broker timeout recovery",
|
|
1236
|
+
"stdout": stdout,
|
|
1237
|
+
"stderr": "",
|
|
1238
|
+
"exitCode": 0,
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
def _attempt_timeout_finalize_from_existing_edits(
|
|
1242
|
+
step: int, error_text: str
|
|
1243
|
+
) -> Optional[Dict[str, Any]]:
|
|
1244
|
+
"""
|
|
1245
|
+
If the model times out after prior edit actions already changed files,
|
|
1246
|
+
finalize deterministically from repo state instead of hard-failing.
|
|
1247
|
+
"""
|
|
1248
|
+
lowered = str(error_text or "").lower()
|
|
1249
|
+
if "timeout" not in lowered and "timed out" not in lowered:
|
|
1250
|
+
return None
|
|
1251
|
+
if not edits_made:
|
|
1252
|
+
return None
|
|
1253
|
+
|
|
1254
|
+
changed_paths = summarize_git_changes(repo)
|
|
1255
|
+
if not changed_paths:
|
|
1256
|
+
return None
|
|
1257
|
+
|
|
1258
|
+
changed_set = {str(p).strip().replace("\\", "/") for p in changed_paths}
|
|
1259
|
+
if expected_targets:
|
|
1260
|
+
expected_set = {str(p).strip().replace("\\", "/") for p in expected_targets}
|
|
1261
|
+
matched = any(
|
|
1262
|
+
_target_hint_matches_changed_path(expected, changed)
|
|
1263
|
+
for expected in expected_set
|
|
1264
|
+
for changed in changed_set
|
|
1265
|
+
)
|
|
1266
|
+
if not matched:
|
|
1267
|
+
_record(
|
|
1268
|
+
"[Broker] timeout finalize skipped: changed files do not match expected targets. "
|
|
1269
|
+
f"expected={sorted(expected_set)} observed={sorted(changed_set)}"
|
|
1270
|
+
)
|
|
1271
|
+
return None
|
|
1272
|
+
|
|
1273
|
+
try:
|
|
1274
|
+
final_status = _run_shell(repo, "git status --porcelain", timeout_sec=shell_timeout_sec)
|
|
1275
|
+
except Exception as exc:
|
|
1276
|
+
final_status = f"(git status failed) {to_single_line(exc, 300)}"
|
|
1277
|
+
|
|
1278
|
+
transcript_text = "\n".join(transcript).strip()
|
|
1279
|
+
stdout = ""
|
|
1280
|
+
if transcript_text:
|
|
1281
|
+
stdout += "Tool broker transcript:\n" + transcript_text + "\n\n"
|
|
1282
|
+
stdout += (
|
|
1283
|
+
"Timeout recovery finalized already-applied edit actions from repository state.\n"
|
|
1284
|
+
f"Recovery trigger: step={step}, error={to_single_line(error_text, 220)}\n\n"
|
|
1285
|
+
"Changed files:\n"
|
|
1286
|
+
+ "\n".join(f"- {p}" for p in changed_paths[:80])
|
|
1287
|
+
+ "\n\nFinal verification:\n"
|
|
1288
|
+
+ final_status
|
|
1289
|
+
)
|
|
1290
|
+
return {
|
|
1291
|
+
"ok": True,
|
|
1292
|
+
"summary": "Executed task via tool broker timeout finalize",
|
|
1293
|
+
"stdout": stdout,
|
|
1294
|
+
"stderr": "",
|
|
1295
|
+
"exitCode": 0,
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
step = 0
|
|
1299
|
+
model_done = False
|
|
1300
|
+
no_edit_steps = 0
|
|
1301
|
+
no_progress_nudges = 0
|
|
1302
|
+
while step < max_steps and time.time() < deadline:
|
|
1303
|
+
step += 1
|
|
1304
|
+
|
|
1305
|
+
if obs:
|
|
1306
|
+
messages.append({"role": "user", "content": f"Observation (from executed actions):\n{obs}\n\nNext JSON only."})
|
|
1307
|
+
|
|
1308
|
+
try:
|
|
1309
|
+
raw = _broker_llm_call(f"step {step} initial call")
|
|
1310
|
+
except Exception as exc:
|
|
1311
|
+
recovered = _attempt_append_line_timeout_recovery(step, str(exc))
|
|
1312
|
+
if recovered:
|
|
1313
|
+
return recovered
|
|
1314
|
+
recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
|
|
1315
|
+
if recovered:
|
|
1316
|
+
return recovered
|
|
1317
|
+
return _broker_fail(
|
|
1318
|
+
"tool broker failed: llm request error",
|
|
1319
|
+
f"Broker LLM request failed at step {step}: {to_single_line(exc, 500)}",
|
|
1320
|
+
)
|
|
1321
|
+
raw_used = raw
|
|
1322
|
+
_record(f"[Broker] Step {step} model output: {to_single_line(raw, 500)}")
|
|
1323
|
+
|
|
1324
|
+
obj = _extract_first_json_object(raw)
|
|
1325
|
+
if not obj:
|
|
1326
|
+
# one reprompt to force JSON
|
|
1327
|
+
messages.append({"role": "user", "content": "Your last response was not valid JSON. Return ONLY the JSON object."})
|
|
1328
|
+
try:
|
|
1329
|
+
raw2 = _broker_llm_call(f"step {step} json-repair call")
|
|
1330
|
+
except Exception as exc:
|
|
1331
|
+
recovered = _attempt_append_line_timeout_recovery(step, str(exc))
|
|
1332
|
+
if recovered:
|
|
1333
|
+
return recovered
|
|
1334
|
+
recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
|
|
1335
|
+
if recovered:
|
|
1336
|
+
return recovered
|
|
1337
|
+
return _broker_fail(
|
|
1338
|
+
"tool broker failed: llm request error",
|
|
1339
|
+
f"Broker JSON-repair request failed at step {step}: {to_single_line(exc, 500)}",
|
|
1340
|
+
)
|
|
1341
|
+
_record(f"[Broker] Step {step} JSON repair output: {to_single_line(raw2, 500)}")
|
|
1342
|
+
obj = _extract_first_json_object(raw2)
|
|
1343
|
+
if not obj:
|
|
1344
|
+
messages.append(
|
|
1345
|
+
{
|
|
1346
|
+
"role": "user",
|
|
1347
|
+
"content": (
|
|
1348
|
+
"Still invalid JSON. Return ONLY one valid JSON object using strict syntax: "
|
|
1349
|
+
'keys must use double quotes, key/value separator must be ":", and top-level '
|
|
1350
|
+
'keys must be exactly: actions, done, note. '
|
|
1351
|
+
'Example format: {"actions":[{"type":"read_file","path":"README.md"}],'
|
|
1352
|
+
'"done":false,"note":"short"}'
|
|
1353
|
+
),
|
|
1354
|
+
}
|
|
1355
|
+
)
|
|
1356
|
+
try:
|
|
1357
|
+
raw3 = _broker_llm_call(f"step {step} hard-json-repair call")
|
|
1358
|
+
except Exception as exc:
|
|
1359
|
+
recovered = _attempt_append_line_timeout_recovery(step, str(exc))
|
|
1360
|
+
if recovered:
|
|
1361
|
+
return recovered
|
|
1362
|
+
recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
|
|
1363
|
+
if recovered:
|
|
1364
|
+
return recovered
|
|
1365
|
+
return _broker_fail(
|
|
1366
|
+
"tool broker failed: llm request error",
|
|
1367
|
+
f"Broker hard JSON-repair request failed at step {step}: {to_single_line(exc, 500)}",
|
|
1368
|
+
)
|
|
1369
|
+
_record(f"[Broker] Step {step} hard JSON repair output: {to_single_line(raw3, 500)}")
|
|
1370
|
+
obj = _extract_first_json_object(raw3)
|
|
1371
|
+
if not obj:
|
|
1372
|
+
return {
|
|
1373
|
+
"ok": False,
|
|
1374
|
+
"summary": "tool broker failed: model did not produce parsable JSON actions",
|
|
1375
|
+
"stderr": "Model output could not be parsed as the required JSON action format.",
|
|
1376
|
+
"exitCode": 3,
|
|
1377
|
+
}
|
|
1378
|
+
raw = raw3
|
|
1379
|
+
else:
|
|
1380
|
+
raw = raw2
|
|
1381
|
+
allowed_top_keys = {"actions", "done", "note"}
|
|
1382
|
+
extras = [k for k in obj.keys() if str(k) not in allowed_top_keys]
|
|
1383
|
+
if extras:
|
|
1384
|
+
messages.append(
|
|
1385
|
+
{
|
|
1386
|
+
"role": "user",
|
|
1387
|
+
"content": (
|
|
1388
|
+
"Your JSON had unsupported top-level keys. "
|
|
1389
|
+
"Return ONLY one JSON object with keys: actions, done, note."
|
|
1390
|
+
),
|
|
1391
|
+
}
|
|
1392
|
+
)
|
|
1393
|
+
try:
|
|
1394
|
+
raw3 = _broker_llm_call(f"step {step} shape-repair call")
|
|
1395
|
+
except Exception as exc:
|
|
1396
|
+
recovered = _attempt_append_line_timeout_recovery(step, str(exc))
|
|
1397
|
+
if recovered:
|
|
1398
|
+
return recovered
|
|
1399
|
+
recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
|
|
1400
|
+
if recovered:
|
|
1401
|
+
return recovered
|
|
1402
|
+
return _broker_fail(
|
|
1403
|
+
"tool broker failed: llm request error",
|
|
1404
|
+
f"Broker shape-repair request failed at step {step}: {to_single_line(exc, 500)}",
|
|
1405
|
+
)
|
|
1406
|
+
_record(f"[Broker] Step {step} shape repair output: {to_single_line(raw3, 500)}")
|
|
1407
|
+
obj2 = _extract_first_json_object(raw3)
|
|
1408
|
+
if not isinstance(obj2, dict):
|
|
1409
|
+
return {
|
|
1410
|
+
"ok": False,
|
|
1411
|
+
"summary": "tool broker failed: invalid response shape",
|
|
1412
|
+
"stderr": f"Unexpected top-level keys in broker JSON: {extras}",
|
|
1413
|
+
"exitCode": 3,
|
|
1414
|
+
}
|
|
1415
|
+
obj = obj2
|
|
1416
|
+
raw_used = raw3
|
|
1417
|
+
extras = [k for k in obj.keys() if str(k) not in allowed_top_keys]
|
|
1418
|
+
if extras:
|
|
1419
|
+
# Recoverable formatting issue: ignore extras rather than hard-fail.
|
|
1420
|
+
obj = {k: obj.get(k) for k in allowed_top_keys}
|
|
1421
|
+
raw_used = json.dumps(obj, ensure_ascii=False)
|
|
1422
|
+
|
|
1423
|
+
actions = obj.get("actions")
|
|
1424
|
+
done = bool(obj.get("done"))
|
|
1425
|
+
|
|
1426
|
+
ok_actions, reason_actions, planned_actions = _validate_broker_actions(actions, max_actions)
|
|
1427
|
+
if not ok_actions:
|
|
1428
|
+
return {
|
|
1429
|
+
"ok": False,
|
|
1430
|
+
"summary": "tool broker failed: invalid actions schema",
|
|
1431
|
+
"stderr": reason_actions,
|
|
1432
|
+
"exitCode": 3,
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
# Execute actions
|
|
1436
|
+
action_logs: List[str] = []
|
|
1437
|
+
step_made_edit = False
|
|
1438
|
+
step_had_shell_rejection = False
|
|
1439
|
+
for i, act in enumerate(planned_actions, start=1):
|
|
1440
|
+
typ = str(act.get("type") or "").strip()
|
|
1441
|
+
try:
|
|
1442
|
+
if typ == "read_file":
|
|
1443
|
+
path = str(act.get("path") or "")
|
|
1444
|
+
content = _read_text_file(repo, path)
|
|
1445
|
+
preview = _truncate_observation(content, max_chars=_BROKER_READ_PREVIEW_CHARS)
|
|
1446
|
+
action_logs.append(
|
|
1447
|
+
f"- read_file {path}: ok ({len(content)} chars total)\n{preview}"
|
|
1448
|
+
)
|
|
1449
|
+
elif typ == "append_line":
|
|
1450
|
+
path = str(act.get("path") or "")
|
|
1451
|
+
line = str(act.get("line") or "")
|
|
1452
|
+
_append_line(repo, path, line, allowed_write_globs)
|
|
1453
|
+
edits_made = True
|
|
1454
|
+
step_made_edit = True
|
|
1455
|
+
action_logs.append(f"- append_line {path}: ok (appended {line!r})")
|
|
1456
|
+
elif typ == "replace_text_once":
|
|
1457
|
+
path = str(act.get("path") or "")
|
|
1458
|
+
old = str(act.get("old") or "")
|
|
1459
|
+
new = str(act.get("new") or "")
|
|
1460
|
+
n = _replace_text_once(repo, path, old, new, allowed_write_globs)
|
|
1461
|
+
edits_made = edits_made or (n > 0)
|
|
1462
|
+
step_made_edit = step_made_edit or (n > 0)
|
|
1463
|
+
action_logs.append(f"- replace_text_once {path}: {n} replacement(s)")
|
|
1464
|
+
elif typ == "write_file":
|
|
1465
|
+
path = str(act.get("path") or "")
|
|
1466
|
+
content = str(act.get("content") or "")
|
|
1467
|
+
_write_text_file(repo, path, content, allowed_write_globs)
|
|
1468
|
+
edits_made = True
|
|
1469
|
+
step_made_edit = True
|
|
1470
|
+
action_logs.append(f"- write_file {path}: ok ({len(content)} chars)")
|
|
1471
|
+
elif typ == "run_shell":
|
|
1472
|
+
cmd = str(act.get("command") or "")
|
|
1473
|
+
out = _run_shell(repo, cmd, timeout_sec=shell_timeout_sec)
|
|
1474
|
+
shell_validation_ran = shell_validation_ran or _is_git_porcelain_status_command(cmd)
|
|
1475
|
+
action_logs.append(f"- run_shell {cmd!r}:\n{out}")
|
|
1476
|
+
else:
|
|
1477
|
+
action_logs.append(f"- action {i}: unknown type {typ!r} (rejected by schema)")
|
|
1478
|
+
except Exception as exc:
|
|
1479
|
+
err = to_single_line(exc, 400)
|
|
1480
|
+
if typ == "run_shell":
|
|
1481
|
+
if "Shell command rejected:" in err:
|
|
1482
|
+
step_had_shell_rejection = True
|
|
1483
|
+
salvage_cmd = _attempt_salvage_rejected_shell_command(
|
|
1484
|
+
str(act.get("command") or ""),
|
|
1485
|
+
err,
|
|
1486
|
+
)
|
|
1487
|
+
if salvage_cmd:
|
|
1488
|
+
try:
|
|
1489
|
+
salvage_out = _run_shell(repo, salvage_cmd, timeout_sec=shell_timeout_sec)
|
|
1490
|
+
shell_validation_ran = shell_validation_ran or _is_git_porcelain_status_command(
|
|
1491
|
+
salvage_cmd,
|
|
1492
|
+
)
|
|
1493
|
+
action_logs.append(
|
|
1494
|
+
f"- run_shell {str(act.get('command') or '')!r}: rejected ({err}); "
|
|
1495
|
+
f"salvage executed {salvage_cmd!r}:\n{salvage_out}"
|
|
1496
|
+
)
|
|
1497
|
+
continue
|
|
1498
|
+
except Exception as salvage_exc:
|
|
1499
|
+
err = f"{err}; salvage failed: {to_single_line(salvage_exc, 260)}"
|
|
1500
|
+
action_logs.append(f"- {typ or 'action'} failed: {err}")
|
|
1501
|
+
|
|
1502
|
+
obs = _truncate_observation("\n".join(action_logs).strip())
|
|
1503
|
+
if step_made_edit:
|
|
1504
|
+
no_edit_steps = 0
|
|
1505
|
+
else:
|
|
1506
|
+
no_edit_steps += 1
|
|
1507
|
+
|
|
1508
|
+
# Feed the raw JSON back as assistant message (helps the model stay consistent)
|
|
1509
|
+
messages.append({"role": "assistant", "content": raw_used})
|
|
1510
|
+
if (
|
|
1511
|
+
not done
|
|
1512
|
+
and not step_made_edit
|
|
1513
|
+
and no_edit_steps >= 2
|
|
1514
|
+
and no_progress_nudges < 2
|
|
1515
|
+
and step < max_steps
|
|
1516
|
+
):
|
|
1517
|
+
no_progress_nudges += 1
|
|
1518
|
+
nudge_lines = [
|
|
1519
|
+
"Progress guard: you have not produced any edit actions yet.",
|
|
1520
|
+
"In your NEXT response, either:",
|
|
1521
|
+
'1) include at least one edit action (`append_line`, `replace_text_once`, or `write_file`) that advances the task, OR',
|
|
1522
|
+
"2) if genuinely blocked, set done=true and explain the blocker in note.",
|
|
1523
|
+
"Do not continue pure exploration.",
|
|
1524
|
+
]
|
|
1525
|
+
if step_had_shell_rejection:
|
|
1526
|
+
nudge_lines.append(
|
|
1527
|
+
"Reminder: run_shell forbids pipes/redirection/chaining; use one simple command."
|
|
1528
|
+
)
|
|
1529
|
+
messages.append({"role": "user", "content": "\n".join(nudge_lines)})
|
|
1530
|
+
_record(
|
|
1531
|
+
f"[Broker] progress guard nudge injected (step={step}, no_edit_steps={no_edit_steps})."
|
|
1532
|
+
)
|
|
1533
|
+
|
|
1534
|
+
if done:
|
|
1535
|
+
_record("[Broker] Model signaled done=true.")
|
|
1536
|
+
model_done = True
|
|
1537
|
+
break
|
|
1538
|
+
|
|
1539
|
+
# Always include a final git status if possible (and safe)
|
|
1540
|
+
try:
|
|
1541
|
+
final_status = _run_shell(repo, "git status --porcelain", timeout_sec=shell_timeout_sec)
|
|
1542
|
+
except Exception as exc:
|
|
1543
|
+
final_status = f"(git status failed) {to_single_line(exc, 300)}"
|
|
1544
|
+
final_status_exit = _shell_exit_code(final_status)
|
|
1545
|
+
|
|
1546
|
+
transcript_text = "\n".join(transcript).strip()
|
|
1547
|
+
stdout = ""
|
|
1548
|
+
if transcript_text:
|
|
1549
|
+
stdout += "Tool broker transcript:\n" + transcript_text + "\n\n"
|
|
1550
|
+
stdout += "Final verification:\n" + final_status
|
|
1551
|
+
changed_paths = summarize_git_changes(repo)
|
|
1552
|
+
|
|
1553
|
+
if not model_done:
|
|
1554
|
+
if edits_made and changed_paths:
|
|
1555
|
+
_record(
|
|
1556
|
+
"[Broker] model never returned done=true, but edits were observed; "
|
|
1557
|
+
"auto-finalizing based on repository state."
|
|
1558
|
+
)
|
|
1559
|
+
stdout += (
|
|
1560
|
+
"\n\nAuto-finalize: model did not return done=true, "
|
|
1561
|
+
"but repository changes were detected."
|
|
1562
|
+
)
|
|
1563
|
+
else:
|
|
1564
|
+
return {
|
|
1565
|
+
"ok": False,
|
|
1566
|
+
"summary": "tool broker failed: did not reach done=true before limits",
|
|
1567
|
+
"stdout": stdout,
|
|
1568
|
+
"stderr": (
|
|
1569
|
+
"Model did not return done=true before max steps/timeout. "
|
|
1570
|
+
"Treating broker run as incomplete."
|
|
1571
|
+
),
|
|
1572
|
+
"exitCode": 3,
|
|
1573
|
+
}
|
|
1574
|
+
if final_status_exit is not None and final_status_exit != 0:
|
|
1575
|
+
return {
|
|
1576
|
+
"ok": False,
|
|
1577
|
+
"summary": "tool broker failed: verification command failed",
|
|
1578
|
+
"stdout": stdout,
|
|
1579
|
+
"stderr": "Final verification command `git status --porcelain` failed.",
|
|
1580
|
+
"exitCode": 3,
|
|
1581
|
+
}
|
|
1582
|
+
if edits_made and not changed_paths:
|
|
1583
|
+
return {
|
|
1584
|
+
"ok": False,
|
|
1585
|
+
"summary": "tool broker failed: model claimed edits but repo has no changes",
|
|
1586
|
+
"stdout": stdout,
|
|
1587
|
+
"stderr": "Broker executed edit actions but git reports no changed files.",
|
|
1588
|
+
"exitCode": 3,
|
|
1589
|
+
}
|
|
1590
|
+
if expected_targets and changed_paths:
|
|
1591
|
+
changed_set = {str(p).strip().replace("\\", "/") for p in changed_paths}
|
|
1592
|
+
expected_set = {str(p).strip().replace("\\", "/") for p in expected_targets}
|
|
1593
|
+
strict_target_match = bool(
|
|
1594
|
+
explicit_target_set
|
|
1595
|
+
and not any(t in {".", "/"} for t in explicit_target_set)
|
|
1596
|
+
and not any(any(ch in t for ch in "*?[]") for t in explicit_target_set)
|
|
1597
|
+
)
|
|
1598
|
+
matched = any(
|
|
1599
|
+
_target_hint_matches_changed_path(expected, changed)
|
|
1600
|
+
for expected in expected_set
|
|
1601
|
+
for changed in changed_set
|
|
1602
|
+
)
|
|
1603
|
+
if expected_set and not matched:
|
|
1604
|
+
msg = (
|
|
1605
|
+
"Expected one of target paths to change, but observed different files. "
|
|
1606
|
+
f"expected={sorted(expected_set)} observed={sorted(changed_set)}"
|
|
1607
|
+
)
|
|
1608
|
+
if strict_target_match:
|
|
1609
|
+
return {
|
|
1610
|
+
"ok": False,
|
|
1611
|
+
"summary": "tool broker failed: changed files do not match explicit target paths",
|
|
1612
|
+
"stdout": stdout + "\n\nChanged files:\n" + "\n".join(f"- {p}" for p in changed_paths),
|
|
1613
|
+
"stderr": msg,
|
|
1614
|
+
"exitCode": 3,
|
|
1615
|
+
}
|
|
1616
|
+
stdout += "\n\nTarget-path mismatch (heuristic, non-fatal):\n" + msg
|
|
1617
|
+
if edits_made and not shell_validation_ran:
|
|
1618
|
+
stdout += (
|
|
1619
|
+
"\n\nValidation note:\n"
|
|
1620
|
+
"Model did not run `git status --porcelain` during broker steps; "
|
|
1621
|
+
"broker-level final verification was used."
|
|
1622
|
+
)
|
|
1623
|
+
|
|
1624
|
+
return {
|
|
1625
|
+
"ok": True,
|
|
1626
|
+
"summary": "Executed task via tool broker shim",
|
|
1627
|
+
"stdout": stdout,
|
|
1628
|
+
"stderr": "",
|
|
1629
|
+
"exitCode": 0,
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
|
|
1633
|
+
# ─── mini-swe-agent execution ───────────────────────────────────────────────
|
|
1634
|
+
|
|
1635
|
+
def _run_miniswe_task(
|
|
1636
|
+
repo: str,
|
|
1637
|
+
instruction: str,
|
|
1638
|
+
payload: Optional[Dict[str, Any]] = None,
|
|
1639
|
+
supplemental_guidance: Optional[List[str]] = None,
|
|
1640
|
+
) -> Dict[str, Any]:
|
|
1641
|
+
"""Execute a task using mini-swe-agent's Python SDK (and optional broker fallback)."""
|
|
1642
|
+
|
|
1643
|
+
try:
|
|
1644
|
+
from minisweagent.agents.default import DefaultAgent
|
|
1645
|
+
from minisweagent.models.litellm_model import LitellmModel
|
|
1646
|
+
from minisweagent.environments.local import LocalEnvironment
|
|
1647
|
+
except ImportError as exc:
|
|
1648
|
+
return {
|
|
1649
|
+
"ok": False,
|
|
1650
|
+
"summary": (
|
|
1651
|
+
"mini-swe-agent is not installed. "
|
|
1652
|
+
"Install with: pip install mini-swe-agent"
|
|
1653
|
+
),
|
|
1654
|
+
"stderr": str(exc),
|
|
1655
|
+
"exitCode": 3,
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
model_name, api_key, base_url = resolve_llm_config(
|
|
1659
|
+
default_model=DEFAULT_MINISWE_MODEL, logger=log,
|
|
1660
|
+
)
|
|
1661
|
+
if not model_name:
|
|
1662
|
+
return {
|
|
1663
|
+
"ok": False,
|
|
1664
|
+
"summary": (
|
|
1665
|
+
"task.execute requires an LLM model for agentic execution. "
|
|
1666
|
+
"Set WORKERPALS_LLM_MODEL."
|
|
1667
|
+
),
|
|
1668
|
+
"stderr": "",
|
|
1669
|
+
"exitCode": 2,
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
if not api_key:
|
|
1673
|
+
if looks_local_base_url(base_url):
|
|
1674
|
+
api_key = "local"
|
|
1675
|
+
else:
|
|
1676
|
+
return {
|
|
1677
|
+
"ok": False,
|
|
1678
|
+
"summary": (
|
|
1679
|
+
"task.execute agent mode requires an API key. "
|
|
1680
|
+
"Set WORKERPALS_LLM_API_KEY."
|
|
1681
|
+
),
|
|
1682
|
+
"stderr": "",
|
|
1683
|
+
"exitCode": 2,
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
timeout_ms = _execution_timeout_ms()
|
|
1687
|
+
timeout_minutes = max(1, round(timeout_ms / 60000))
|
|
1688
|
+
|
|
1689
|
+
def _compose_instruction(extra_guidance: Optional[List[str]] = None) -> str:
|
|
1690
|
+
full = instruction
|
|
1691
|
+
merged_guidance: List[str] = []
|
|
1692
|
+
if supplemental_guidance:
|
|
1693
|
+
merged_guidance.extend([g for g in supplemental_guidance if g and str(g).strip()])
|
|
1694
|
+
if extra_guidance:
|
|
1695
|
+
merged_guidance.extend([g for g in extra_guidance if g and str(g).strip()])
|
|
1696
|
+
if merged_guidance:
|
|
1697
|
+
parts = [str(g).strip() for g in merged_guidance if str(g).strip()]
|
|
1698
|
+
if parts:
|
|
1699
|
+
guidance_section = _load_prompt_template(
|
|
1700
|
+
"workerpals/miniswe_supplemental_guidance_section.md",
|
|
1701
|
+
{"guidance_entries": "\n\n".join(parts)},
|
|
1702
|
+
).strip()
|
|
1703
|
+
full += f"\n\n{guidance_section}"
|
|
1704
|
+
|
|
1705
|
+
timeout_note = _load_prompt_template(
|
|
1706
|
+
"workerpals/miniswe_timeout_note.md",
|
|
1707
|
+
{"timeout_minutes": str(timeout_minutes)},
|
|
1708
|
+
).strip()
|
|
1709
|
+
full += f"\n\n{timeout_note}"
|
|
1710
|
+
return full
|
|
1711
|
+
|
|
1712
|
+
log.info(f"Starting mini-swe-agent execution in {repo}")
|
|
1713
|
+
log.info(f"Model: {model_name}, base_url: {base_url or '(default)'}")
|
|
1714
|
+
log.info(f"Timeout: {timeout_ms}ms ({timeout_minutes}min)")
|
|
1715
|
+
log.debug(f"Instruction: {to_single_line(instruction, 300)}")
|
|
1716
|
+
|
|
1717
|
+
# Pre-run baseline so we can tell whether *anything* changed even if the model/tooling is flaky.
|
|
1718
|
+
baseline_changes = set(summarize_git_changes(repo))
|
|
1719
|
+
explicit_targets = _extract_explicit_target_paths_from_payload(payload)
|
|
1720
|
+
explicit_write_globs = _extract_write_globs_from_payload(payload)
|
|
1721
|
+
|
|
1722
|
+
# Prepare broker config upfront (so we can fall back cleanly)
|
|
1723
|
+
llm_cfg = _LLMConfig(model=model_name, api_key=api_key or "", base_url=base_url or "")
|
|
1724
|
+
|
|
1725
|
+
def _run_broker_with_recovery(extra_guidance: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
1726
|
+
broker_result = _broker_run(
|
|
1727
|
+
repo,
|
|
1728
|
+
instruction=_compose_instruction(extra_guidance=extra_guidance),
|
|
1729
|
+
llm=llm_cfg,
|
|
1730
|
+
timeout_ms=timeout_ms,
|
|
1731
|
+
explicit_targets=explicit_targets,
|
|
1732
|
+
write_globs=explicit_write_globs,
|
|
1733
|
+
)
|
|
1734
|
+
retry_max = _tool_broker_run_retry_max()
|
|
1735
|
+
retry_count = 0
|
|
1736
|
+
while retry_count < retry_max and (
|
|
1737
|
+
_is_broker_timeout_failure(broker_result)
|
|
1738
|
+
or _is_broker_incomplete_failure(broker_result)
|
|
1739
|
+
):
|
|
1740
|
+
retry_count += 1
|
|
1741
|
+
timeout_like = _is_broker_timeout_failure(broker_result)
|
|
1742
|
+
if timeout_like:
|
|
1743
|
+
log.info(
|
|
1744
|
+
"Tool broker timed out while waiting for model output; retrying broker run "
|
|
1745
|
+
f"{retry_count}/{retry_max} with one-pass timeout recovery guidance."
|
|
1746
|
+
)
|
|
1747
|
+
else:
|
|
1748
|
+
log.info(
|
|
1749
|
+
"Tool broker did not converge before limits; retrying broker run "
|
|
1750
|
+
f"{retry_count}/{retry_max} with strict completion guidance."
|
|
1751
|
+
)
|
|
1752
|
+
retry_guidance = [
|
|
1753
|
+
line.strip()
|
|
1754
|
+
for line in _load_prompt_template(
|
|
1755
|
+
"workerpals/miniswe_recovery_guidance_base.md"
|
|
1756
|
+
).splitlines()
|
|
1757
|
+
if line.strip()
|
|
1758
|
+
]
|
|
1759
|
+
if not timeout_like:
|
|
1760
|
+
retry_guidance.append(
|
|
1761
|
+
_load_prompt_template("workerpals/miniswe_recovery_guidance_blocker_line.md").strip()
|
|
1762
|
+
)
|
|
1763
|
+
merged_guidance = list(extra_guidance or [])
|
|
1764
|
+
merged_guidance.extend(retry_guidance)
|
|
1765
|
+
previous = broker_result
|
|
1766
|
+
broker_result = _broker_run(
|
|
1767
|
+
repo,
|
|
1768
|
+
instruction=_compose_instruction(extra_guidance=merged_guidance),
|
|
1769
|
+
llm=llm_cfg,
|
|
1770
|
+
timeout_ms=timeout_ms,
|
|
1771
|
+
explicit_targets=explicit_targets,
|
|
1772
|
+
write_globs=explicit_write_globs,
|
|
1773
|
+
)
|
|
1774
|
+
if not bool(broker_result.get("ok")):
|
|
1775
|
+
prior_detail = to_single_line(
|
|
1776
|
+
previous.get("stderr") or previous.get("summary") or "",
|
|
1777
|
+
300,
|
|
1778
|
+
)
|
|
1779
|
+
if prior_detail:
|
|
1780
|
+
current_stdout = str(broker_result.get("stdout") or "")
|
|
1781
|
+
broker_result["stdout"] = (
|
|
1782
|
+
f"Prior timeout attempt detail: {prior_detail}\n\n{current_stdout}"
|
|
1783
|
+
if current_stdout
|
|
1784
|
+
else f"Prior timeout attempt detail: {prior_detail}"
|
|
1785
|
+
)
|
|
1786
|
+
return broker_result
|
|
1787
|
+
|
|
1788
|
+
exit_info: Dict[str, Any] = {}
|
|
1789
|
+
agent = None
|
|
1790
|
+
agent_messages: List[Dict[str, Any]] = []
|
|
1791
|
+
broker_enabled = _tool_broker_enabled(base_url)
|
|
1792
|
+
prefer_broker_for_scoped_writes = bool(explicit_write_globs)
|
|
1793
|
+
ran_primary_broker = False
|
|
1794
|
+
if prefer_broker_for_scoped_writes and broker_enabled:
|
|
1795
|
+
log.info("Using tool broker shim for strict per-write scope enforcement.")
|
|
1796
|
+
broker_result = _run_broker_with_recovery()
|
|
1797
|
+
if not bool(broker_result.get("ok")):
|
|
1798
|
+
return {
|
|
1799
|
+
"ok": False,
|
|
1800
|
+
"summary": str(broker_result.get("summary") or "tool broker execution failed"),
|
|
1801
|
+
"stdout": str(broker_result.get("stdout") or ""),
|
|
1802
|
+
"stderr": str(broker_result.get("stderr") or ""),
|
|
1803
|
+
"exitCode": to_int(broker_result.get("exitCode"), 3),
|
|
1804
|
+
}
|
|
1805
|
+
exit_info = {"submission": broker_result.get("stdout") or ""}
|
|
1806
|
+
ran_primary_broker = True
|
|
1807
|
+
elif prefer_broker_for_scoped_writes and not broker_enabled:
|
|
1808
|
+
log.info(
|
|
1809
|
+
"Strict write scope requested but tool broker is disabled; "
|
|
1810
|
+
"using native mini-swe path with post-run scope verification."
|
|
1811
|
+
)
|
|
1812
|
+
|
|
1813
|
+
if not ran_primary_broker:
|
|
1814
|
+
try:
|
|
1815
|
+
import yaml
|
|
1816
|
+
from minisweagent import package_dir
|
|
1817
|
+
|
|
1818
|
+
litellm_kwargs: Dict[str, Any] = {}
|
|
1819
|
+
if api_key:
|
|
1820
|
+
litellm_kwargs["api_key"] = api_key
|
|
1821
|
+
if base_url:
|
|
1822
|
+
litellm_kwargs["base_url"] = base_url
|
|
1823
|
+
|
|
1824
|
+
model = LitellmModel(
|
|
1825
|
+
model_name=model_name,
|
|
1826
|
+
model_kwargs=litellm_kwargs,
|
|
1827
|
+
cost_tracking="ignore_errors",
|
|
1828
|
+
)
|
|
1829
|
+
|
|
1830
|
+
env = LocalEnvironment(cwd=repo)
|
|
1831
|
+
|
|
1832
|
+
config_path = package_dir / "config" / "default.yaml"
|
|
1833
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
|
1834
|
+
builtin_config = yaml.safe_load(f)
|
|
1835
|
+
agent_kwargs = builtin_config.get("agent", {}) or {}
|
|
1836
|
+
|
|
1837
|
+
agent_kwargs["cost_limit"] = 0.0 # we manage budget externally
|
|
1838
|
+
agent_kwargs["step_limit"] = setting_int(
|
|
1839
|
+
"WORKERPALS_MINISWE_AGENT_MAX_STEPS",
|
|
1840
|
+
"workerpals.miniswe.agent_max_steps",
|
|
1841
|
+
30,
|
|
1842
|
+
)
|
|
1843
|
+
|
|
1844
|
+
agent = DefaultAgent(model, env, **agent_kwargs)
|
|
1845
|
+
log.info("Agent initialized, running task...")
|
|
1846
|
+
|
|
1847
|
+
toolcall_retry_max = _toolcall_retry_max()
|
|
1848
|
+
attempt = 0
|
|
1849
|
+
while True:
|
|
1850
|
+
try:
|
|
1851
|
+
attempt += 1
|
|
1852
|
+
if attempt > 1:
|
|
1853
|
+
log.info(
|
|
1854
|
+
f"Retrying agent run after tool-call failure (attempt {attempt}/{toolcall_retry_max + 1})."
|
|
1855
|
+
)
|
|
1856
|
+
|
|
1857
|
+
extra_guidance: List[str] = []
|
|
1858
|
+
if attempt > 1:
|
|
1859
|
+
extra_guidance.append(_build_strict_tool_use_guidance(repo))
|
|
1860
|
+
extra_guidance.append(
|
|
1861
|
+
_load_prompt_template("workerpals/miniswe_toolcall_retry_guidance.md").strip()
|
|
1862
|
+
)
|
|
1863
|
+
|
|
1864
|
+
exit_info = agent.run(_compose_instruction(extra_guidance=extra_guidance)) or {}
|
|
1865
|
+
log.info("Agent execution completed.")
|
|
1866
|
+
|
|
1867
|
+
# Log what the agent did
|
|
1868
|
+
if hasattr(agent, "messages") and agent.messages:
|
|
1869
|
+
agent_messages = [msg for msg in agent.messages if isinstance(msg, dict)]
|
|
1870
|
+
log.debug(f"Agent message history ({len(agent.messages)} messages):")
|
|
1871
|
+
log_agent_messages(agent.messages, log)
|
|
1872
|
+
log_git_status(repo, log)
|
|
1873
|
+
break
|
|
1874
|
+
|
|
1875
|
+
except Exception as exc:
|
|
1876
|
+
if is_no_tool_calls_error(exc) and (attempt - 1) < toolcall_retry_max:
|
|
1877
|
+
log.info(
|
|
1878
|
+
"Detected tool-call failure from model/runtime: "
|
|
1879
|
+
f"{to_single_line(exc, 220)}"
|
|
1880
|
+
)
|
|
1881
|
+
continue
|
|
1882
|
+
raise
|
|
1883
|
+
|
|
1884
|
+
except Exception as exc:
|
|
1885
|
+
# If it's a tool-call failure, optionally fall back to broker shim.
|
|
1886
|
+
if is_no_tool_calls_error(exc):
|
|
1887
|
+
if broker_enabled:
|
|
1888
|
+
log.info("mini-swe-agent failed due to missing tool calls; falling back to tool broker shim.")
|
|
1889
|
+
broker_result = _run_broker_with_recovery()
|
|
1890
|
+
if not bool(broker_result.get("ok")):
|
|
1891
|
+
return {
|
|
1892
|
+
"ok": False,
|
|
1893
|
+
"summary": str(broker_result.get("summary") or "tool broker fallback failed"),
|
|
1894
|
+
"stdout": str(broker_result.get("stdout") or ""),
|
|
1895
|
+
"stderr": str(broker_result.get("stderr") or ""),
|
|
1896
|
+
"exitCode": to_int(broker_result.get("exitCode"), 3),
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
# The broker_result itself doesn't include changed-files list; we add it below in the shared post-run path.
|
|
1900
|
+
# We return broker_result as "exit_info-like" output by mapping it into exit_info and continuing.
|
|
1901
|
+
exit_info = {"submission": broker_result.get("stdout") or ""}
|
|
1902
|
+
# Continue into post-run summary construction (changed files etc.) by not returning early.
|
|
1903
|
+
else:
|
|
1904
|
+
return {
|
|
1905
|
+
"ok": False,
|
|
1906
|
+
"summary": "mini-swe-agent could not execute: model did not emit tool calls",
|
|
1907
|
+
"stderr": (
|
|
1908
|
+
"Agentic execution requires a tool-calling-capable model/runtime. "
|
|
1909
|
+
"The model output did not include any tool calls.\n"
|
|
1910
|
+
f"Error: {to_single_line(exc, 600)}\n"
|
|
1911
|
+
"Fix options:\n"
|
|
1912
|
+
"- Use a model/runtime that supports tool calls (function calling), or\n"
|
|
1913
|
+
"- Enable the tool broker shim: WORKERPALS_MINISWE_TOOL_BROKER=1, or\n"
|
|
1914
|
+
"- Switch executor backend."
|
|
1915
|
+
),
|
|
1916
|
+
"exitCode": 3,
|
|
1917
|
+
}
|
|
1918
|
+
else:
|
|
1919
|
+
return {
|
|
1920
|
+
"ok": False,
|
|
1921
|
+
"summary": "mini-swe-agent task execution failed",
|
|
1922
|
+
"stderr": str(exc),
|
|
1923
|
+
"exitCode": 1,
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1926
|
+
if _messages_indicate_missing_tool_calls(agent_messages):
|
|
1927
|
+
if broker_enabled:
|
|
1928
|
+
log.info("mini-swe-agent exited without tool calls; falling back to tool broker shim.")
|
|
1929
|
+
broker_result = _run_broker_with_recovery()
|
|
1930
|
+
if not bool(broker_result.get("ok")):
|
|
1931
|
+
return {
|
|
1932
|
+
"ok": False,
|
|
1933
|
+
"summary": str(broker_result.get("summary") or "tool broker fallback failed"),
|
|
1934
|
+
"stdout": str(broker_result.get("stdout") or ""),
|
|
1935
|
+
"stderr": str(broker_result.get("stderr") or ""),
|
|
1936
|
+
"exitCode": to_int(broker_result.get("exitCode"), 3),
|
|
1937
|
+
}
|
|
1938
|
+
exit_info = {"submission": broker_result.get("stdout") or ""}
|
|
1939
|
+
else:
|
|
1940
|
+
return {
|
|
1941
|
+
"ok": False,
|
|
1942
|
+
"summary": "mini-swe-agent could not execute: model did not emit tool calls",
|
|
1943
|
+
"stderr": (
|
|
1944
|
+
"Agentic execution requires a tool-calling-capable model/runtime. "
|
|
1945
|
+
"The model output did not include any tool calls.\n"
|
|
1946
|
+
"Fix options:\n"
|
|
1947
|
+
"- Enable the tool broker shim: WORKERPALS_MINISWE_TOOL_BROKER=1, or\n"
|
|
1948
|
+
"- Use a model/runtime with function-calling support."
|
|
1949
|
+
),
|
|
1950
|
+
"exitCode": 3,
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
# Extract the agent's conversational output from its message history (or broker transcript).
|
|
1954
|
+
agent_text = ""
|
|
1955
|
+
try:
|
|
1956
|
+
agent_text = str(exit_info.get("submission") or "").strip()
|
|
1957
|
+
if not agent_text and agent is not None and hasattr(agent, "messages"):
|
|
1958
|
+
parts: List[str] = []
|
|
1959
|
+
for msg in agent.messages:
|
|
1960
|
+
if msg.get("role") == "assistant":
|
|
1961
|
+
content = str(msg.get("content") or "").strip()
|
|
1962
|
+
if content:
|
|
1963
|
+
parts.append(content)
|
|
1964
|
+
if parts:
|
|
1965
|
+
agent_text = "\n\n".join(parts)
|
|
1966
|
+
except Exception:
|
|
1967
|
+
pass
|
|
1968
|
+
|
|
1969
|
+
# Post-run: determine what files were changed relative to baseline.
|
|
1970
|
+
changed_paths = summarize_git_changes(repo)
|
|
1971
|
+
delta = [p for p in changed_paths if p not in baseline_changes]
|
|
1972
|
+
effective = delta if delta else changed_paths
|
|
1973
|
+
|
|
1974
|
+
# Build stdout: include agent/broker text output followed by file change info.
|
|
1975
|
+
stdout_parts: List[str] = []
|
|
1976
|
+
if agent_text:
|
|
1977
|
+
stdout_parts.append(agent_text)
|
|
1978
|
+
|
|
1979
|
+
if effective:
|
|
1980
|
+
listed = "\n".join(f"- {path}" for path in effective[:40])
|
|
1981
|
+
if len(effective) > 40:
|
|
1982
|
+
listed += "\n- ..."
|
|
1983
|
+
suffix = ""
|
|
1984
|
+
if delta and len(delta) != len(changed_paths):
|
|
1985
|
+
suffix = f" (delta={len(delta)}, total_status={len(changed_paths)})"
|
|
1986
|
+
stdout_parts.append(f"Changed files:\n{listed}")
|
|
1987
|
+
return {
|
|
1988
|
+
"ok": True,
|
|
1989
|
+
"summary": f"Executed task and modified {len(effective)} file(s){suffix}",
|
|
1990
|
+
"stdout": "\n\n".join(stdout_parts),
|
|
1991
|
+
"stderr": "",
|
|
1992
|
+
"exitCode": 0,
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
if not stdout_parts:
|
|
1996
|
+
stdout_parts.append("No modified files were detected after execution.")
|
|
1997
|
+
|
|
1998
|
+
return {
|
|
1999
|
+
"ok": True,
|
|
2000
|
+
"summary": "Executed task (no file changes detected)",
|
|
2001
|
+
"stdout": "\n\n".join(stdout_parts),
|
|
2002
|
+
"stderr": "",
|
|
2003
|
+
"exitCode": 0,
|
|
2004
|
+
}
|
|
2005
|
+
|
|
2006
|
+
|
|
2007
|
+
# ─── Main entry point ───────────────────────────────────────────────────────
|
|
2008
|
+
|
|
2009
|
+
def main() -> int:
|
|
2010
|
+
try:
|
|
2011
|
+
task = parse_task_execute_payload(sys.argv, logger=log)
|
|
2012
|
+
result = _run_miniswe_task(
|
|
2013
|
+
task.repo, task.instruction, task.payload, task.supplemental_guidance,
|
|
2014
|
+
)
|
|
2015
|
+
except Exception as exc:
|
|
2016
|
+
result = {
|
|
2017
|
+
"ok": False,
|
|
2018
|
+
"summary": "miniswe wrapper crashed while executing task.execute",
|
|
2019
|
+
"stdout": "",
|
|
2020
|
+
"stderr": traceback.format_exc(),
|
|
2021
|
+
"exitCode": 1,
|
|
2022
|
+
"error": to_single_line(exc, 300),
|
|
2023
|
+
}
|
|
2024
|
+
emit(result)
|
|
2025
|
+
return 0 if bool(result.get("ok")) else to_int(result.get("exitCode"), 1)
|
|
2026
|
+
|
|
2027
|
+
|
|
2028
|
+
if __name__ == "__main__":
|
|
2029
|
+
raise SystemExit(main())
|