@pushpalsdev/cli 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/pushpals-cli.js +542 -23
  2. package/package.json +1 -1
  3. package/runtime/sandbox/apps/workerpals/.python-version +1 -0
  4. package/runtime/sandbox/apps/workerpals/Dockerfile.sandbox +71 -0
  5. package/runtime/sandbox/apps/workerpals/package.json +25 -0
  6. package/runtime/sandbox/apps/workerpals/pyproject.toml +8 -0
  7. package/runtime/sandbox/apps/workerpals/src/backends/backend_config.ts +111 -0
  8. package/runtime/sandbox/apps/workerpals/src/backends/miniswe/miniswe_executor.py +2029 -0
  9. package/runtime/sandbox/apps/workerpals/src/backends/miniswe_backend.ts +48 -0
  10. package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +1259 -0
  11. package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +110 -0
  12. package/runtime/sandbox/apps/workerpals/src/backends/openai_codex_backend.ts +67 -0
  13. package/runtime/sandbox/apps/workerpals/src/backends/openhands/openhands_executor.py +563 -0
  14. package/runtime/sandbox/apps/workerpals/src/backends/openhands_backend.ts +161 -0
  15. package/runtime/sandbox/apps/workerpals/src/backends/openhands_task_execute.ts +536 -0
  16. package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +746 -0
  17. package/runtime/sandbox/apps/workerpals/src/backends/shared/test_settings_resolver.py +60 -0
  18. package/runtime/sandbox/apps/workerpals/src/backends/task_execute_registry.ts +21 -0
  19. package/runtime/sandbox/apps/workerpals/src/backends/types.ts +52 -0
  20. package/runtime/sandbox/apps/workerpals/src/common/execution_utils.ts +149 -0
  21. package/runtime/sandbox/apps/workerpals/src/common/executor_backend.ts +15 -0
  22. package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +210 -0
  23. package/runtime/sandbox/apps/workerpals/src/common/logger.ts +65 -0
  24. package/runtime/sandbox/apps/workerpals/src/common/types.ts +9 -0
  25. package/runtime/sandbox/apps/workerpals/src/common/worktree_cleanup.ts +66 -0
  26. package/runtime/sandbox/apps/workerpals/src/context_manager.ts +45 -0
  27. package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +1842 -0
  28. package/runtime/sandbox/apps/workerpals/src/execute_job.ts +3063 -0
  29. package/runtime/sandbox/apps/workerpals/src/job_runner.ts +194 -0
  30. package/runtime/sandbox/apps/workerpals/src/shell_manager.ts +210 -0
  31. package/runtime/sandbox/apps/workerpals/src/timeout_policy.ts +24 -0
  32. package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +1436 -0
  33. package/runtime/sandbox/apps/workerpals/tsconfig.json +15 -0
  34. package/runtime/sandbox/apps/workerpals/uv.lock +2014 -0
  35. package/runtime/sandbox/bun.lock +2591 -0
  36. package/runtime/sandbox/configs/backend.toml +79 -0
  37. package/runtime/sandbox/configs/default.toml +260 -0
  38. package/runtime/sandbox/configs/dev.toml +2 -0
  39. package/runtime/sandbox/configs/local.example.toml +129 -0
  40. package/runtime/sandbox/package.json +65 -0
  41. package/runtime/sandbox/packages/protocol/README.md +168 -0
  42. package/runtime/sandbox/packages/protocol/package.json +37 -0
  43. package/runtime/sandbox/packages/protocol/scripts/copy-schemas.js +17 -0
  44. package/runtime/sandbox/packages/protocol/src/a2a/README.md +52 -0
  45. package/runtime/sandbox/packages/protocol/src/a2a/mapping.ts +55 -0
  46. package/runtime/sandbox/packages/protocol/src/index.browser.ts +25 -0
  47. package/runtime/sandbox/packages/protocol/src/index.ts +25 -0
  48. package/runtime/sandbox/packages/protocol/src/schemas/approvals.schema.json +6 -0
  49. package/runtime/sandbox/packages/protocol/src/schemas/envelope.schema.json +96 -0
  50. package/runtime/sandbox/packages/protocol/src/schemas/events.schema.json +679 -0
  51. package/runtime/sandbox/packages/protocol/src/schemas/http.schema.json +50 -0
  52. package/runtime/sandbox/packages/protocol/src/types.ts +267 -0
  53. package/runtime/sandbox/packages/protocol/src/validate.browser.ts +154 -0
  54. package/runtime/sandbox/packages/protocol/src/validate.ts +233 -0
  55. package/runtime/sandbox/packages/protocol/src/version.ts +1 -0
  56. package/runtime/sandbox/packages/protocol/tsconfig.json +20 -0
  57. package/runtime/sandbox/packages/shared/package.json +19 -0
  58. package/runtime/sandbox/packages/shared/src/autonomy_policy.ts +400 -0
  59. package/runtime/sandbox/packages/shared/src/client_preflight.ts +297 -0
  60. package/runtime/sandbox/packages/shared/src/communication.ts +313 -0
  61. package/runtime/sandbox/packages/shared/src/config.ts +2201 -0
  62. package/runtime/sandbox/packages/shared/src/config_template_parity.ts +70 -0
  63. package/runtime/sandbox/packages/shared/src/git_backend.ts +205 -0
  64. package/runtime/sandbox/packages/shared/src/index.ts +100 -0
  65. package/runtime/sandbox/packages/shared/src/local_network.ts +101 -0
  66. package/runtime/sandbox/packages/shared/src/localbuddy_runtime.ts +329 -0
  67. package/runtime/sandbox/packages/shared/src/prompts.ts +64 -0
  68. package/runtime/sandbox/packages/shared/src/repo.ts +134 -0
  69. package/runtime/sandbox/packages/shared/src/session_event_visibility.ts +25 -0
  70. package/runtime/sandbox/packages/shared/src/vision.ts +247 -0
  71. package/runtime/sandbox/packages/shared/tsconfig.json +16 -0
  72. package/runtime/sandbox/prompts/workerpals/codex_quality_critic_instruction_prompt.md +14 -0
  73. package/runtime/sandbox/prompts/workerpals/commit_message_prompt.md +36 -0
  74. package/runtime/sandbox/prompts/workerpals/commit_message_user_prompt.md +7 -0
  75. package/runtime/sandbox/prompts/workerpals/miniswe_broker_system_prompt.md +33 -0
  76. package/runtime/sandbox/prompts/workerpals/miniswe_broker_task_prompt.md +5 -0
  77. package/runtime/sandbox/prompts/workerpals/miniswe_completion_requirement.md +1 -0
  78. package/runtime/sandbox/prompts/workerpals/miniswe_context_compaction_retry_prompt.md +1 -0
  79. package/runtime/sandbox/prompts/workerpals/miniswe_explicit_targets_block.md +2 -0
  80. package/runtime/sandbox/prompts/workerpals/miniswe_recovery_guidance_base.md +4 -0
  81. package/runtime/sandbox/prompts/workerpals/miniswe_recovery_guidance_blocker_line.md +1 -0
  82. package/runtime/sandbox/prompts/workerpals/miniswe_strict_tool_use_guidance.md +6 -0
  83. package/runtime/sandbox/prompts/workerpals/miniswe_supplemental_guidance_section.md +2 -0
  84. package/runtime/sandbox/prompts/workerpals/miniswe_timeout_note.md +1 -0
  85. package/runtime/sandbox/prompts/workerpals/miniswe_toolcall_retry_guidance.md +1 -0
  86. package/runtime/sandbox/prompts/workerpals/openai_codex_default_system_prompt.md +4 -0
  87. package/runtime/sandbox/prompts/workerpals/openai_codex_instruction_wrapper.md +5 -0
  88. package/runtime/sandbox/prompts/workerpals/openai_codex_runtime_policy_appendix.md +5 -0
  89. package/runtime/sandbox/prompts/workerpals/openai_codex_supplemental_guidance_section.md +2 -0
  90. package/runtime/sandbox/prompts/workerpals/openai_codex_task_execute_system_prompt.md +12 -0
  91. package/runtime/sandbox/prompts/workerpals/openhands_minimal_security_policy.j2 +8 -0
  92. package/runtime/sandbox/prompts/workerpals/openhands_minimal_system_prompt.j2 +20 -0
  93. package/runtime/sandbox/prompts/workerpals/openhands_strict_tool_use_message.md +1 -0
  94. package/runtime/sandbox/prompts/workerpals/openhands_supplemental_guidance_message.md +2 -0
  95. package/runtime/sandbox/prompts/workerpals/openhands_task_execute_fallback_system_prompt.md +1 -0
  96. package/runtime/sandbox/prompts/workerpals/openhands_task_execute_system_prompt.md +21 -0
  97. package/runtime/sandbox/prompts/workerpals/openhands_task_user_prompt.md +6 -0
  98. package/runtime/sandbox/prompts/workerpals/openhands_timeout_note.md +1 -0
  99. package/runtime/sandbox/prompts/workerpals/pr_description.md +42 -0
  100. package/runtime/sandbox/prompts/workerpals/task_quality_critic_system_prompt.md +9 -0
  101. package/runtime/sandbox/prompts/workerpals/task_quality_critic_user_prompt.md +17 -0
  102. package/runtime/sandbox/prompts/workerpals/workerpals_system_prompt.md +115 -0
  103. package/runtime/sandbox/protocol/schemas/approvals.schema.json +6 -0
  104. package/runtime/sandbox/protocol/schemas/envelope.schema.json +96 -0
  105. package/runtime/sandbox/protocol/schemas/events.schema.json +679 -0
  106. package/runtime/sandbox/protocol/schemas/http.schema.json +50 -0
@@ -0,0 +1,2029 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PushPals -> mini-swe-agent worker wrapper.
4
+
5
+ This script receives a base64-encoded JSON payload from the TS worker,
6
+ executes the requested task through the mini-swe-agent Python SDK, and prints
7
+ one structured result line:
8
+
9
+ __PUSHPALS_OH_RESULT__ {"ok":true,...}
10
+
11
+ The sentinel prefix is intentionally the same as the OpenHands wrapper so that
12
+ the TypeScript host can parse results with a single code path.
13
+
14
+ Production hardening:
15
+ - Detect the common failure mode where the model never emits tool calls
16
+ ("No tool calls found", etc.) and retry once with a strict tool-usage hint.
17
+ - If the model still cannot tool-call, return a structured failure that makes
18
+ the root cause obvious to the TS layer (so you can alert / route / fallback).
19
+
20
+ Tool-broker shim:
21
+ - If mini-swe-agent fails because the model doesn't tool-call, fall back to a
22
+ "tool broker" loop that does NOT require native tool/function calling.
23
+ - The broker asks the model to emit a strict JSON "plan of actions" (file ops + safe shell),
24
+ executes them locally, and feeds observations back to the model for a few steps.
25
+ - Broker can be forced on/off with WORKERPALS_MINISWE_TOOL_BROKER=1/0.
26
+ If unset, local endpoints (LM Studio/Ollama-style) default to ON.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import json
32
+ import ast
33
+ import os
34
+ import re
35
+ import shlex
36
+ import sys
37
+ import time
38
+ import traceback
39
+ import fnmatch
40
+ from dataclasses import dataclass
41
+ from pathlib import Path
42
+ from typing import Any, Dict, List, Optional, Tuple
43
+ from urllib.error import HTTPError, URLError
44
+ from urllib.request import Request, urlopen
45
+
46
+ # Shared executor infrastructure lives in src/backends/shared.
47
+ _SHARED_DIR = Path(__file__).resolve().parents[1] / "shared"
48
+ if str(_SHARED_DIR) not in sys.path:
49
+ sys.path.insert(0, str(_SHARED_DIR))
50
+
51
+ from executor_base import (
52
+ Logger,
53
+ config_get,
54
+ emit,
55
+ is_no_tool_calls_error,
56
+ log_agent_messages,
57
+ log_git_status,
58
+ looks_local_base_url,
59
+ parse_task_execute_payload,
60
+ resolve_llm_config,
61
+ setting_int,
62
+ setting_str,
63
+ summarize_git_changes,
64
+ to_int,
65
+ to_single_line,
66
+ DEFAULT_TOOLCALL_RETRY_MAX,
67
+ )
68
+
69
+ # ─── Constants ───────────────────────────────────────────────────────────────
70
+
71
+ DEFAULT_MINISWE_MODEL = "local-model"
72
+ LOG_PREFIX = "[MiniSweExecutor]"
73
+ log = Logger(LOG_PREFIX)
74
+
75
+ # Tool broker defaults (conservative)
76
+ # Keep explicit default off, but auto-enable when using a local endpoint.
77
+ _BROKER_ENABLED_DEFAULT = "0"
78
+ _BROKER_MAX_STEPS_DEFAULT = 8
79
+ _BROKER_MAX_ACTIONS_PER_STEP_DEFAULT = 10
80
+ _BROKER_HTTP_TIMEOUT_SEC_DEFAULT = 90
81
+ _BROKER_HTTP_TIMEOUT_SEC_LOCAL_DEFAULT = 120
82
+ _BROKER_HTTP_RETRY_MAX_DEFAULT = 1
83
+ _BROKER_HTTP_RETRY_MAX_LOCAL_DEFAULT = 2
84
+ _BROKER_RUN_RETRY_MAX_DEFAULT = 1
85
+ _BROKER_TEMPERATURE = 0.0
86
+ _BROKER_SHELL_TIMEOUT_SEC_DEFAULT = 120
87
+ _BROKER_OBSERVATION_MAX_CHARS = 4_000
88
+ _BROKER_READ_PREVIEW_CHARS = 800
89
+ PROMPT_TOKEN_REGEX = re.compile(r"\{\{\s*([a-zA-Z0-9_]+)\s*\}\}")
90
+ _PROMPT_TEMPLATE_CACHE: Dict[str, str] = {}
91
+
92
+ # Safety: very simple denylist for shell commands (can be adjusted)
93
+ _DENY_PATTERNS = [
94
+ r"\bsudo\b",
95
+ r"\brm\b\s+-rf\b",
96
+ r"\bmkfs\b",
97
+ r"\bdd\b",
98
+ r"\bshutdown\b",
99
+ r"\breboot\b",
100
+ r"\bpoweroff\b",
101
+ r"\bcurl\b",
102
+ r"\bwget\b",
103
+ r"\bnc\b",
104
+ r"\bnetcat\b",
105
+ r"\bssh\b",
106
+ r"\bscp\b",
107
+ r"\brsync\b",
108
+ r"\bpython\b\s+-m\s+http\.server\b",
109
+ ]
110
+ _ALLOWED_BINARIES = {
111
+ "git",
112
+ "bun",
113
+ "npm",
114
+ "cat",
115
+ "tail",
116
+ "head",
117
+ "ls",
118
+ "find",
119
+ "rg",
120
+ "grep",
121
+ "sed",
122
+ "awk",
123
+ "wc",
124
+ "stat",
125
+ "printf",
126
+ "echo",
127
+ "test",
128
+ }
129
+ _ALLOWED_GIT_SUBCOMMANDS = {
130
+ "status",
131
+ "diff",
132
+ "show",
133
+ "log",
134
+ "grep",
135
+ "rev-parse",
136
+ "ls-files",
137
+ }
138
+ _ALLOWED_PACKAGE_RUNNERS = {
139
+ "bun": {"test", "run", "--version", "-v"},
140
+ "npm": {"test", "run", "--version", "-v"},
141
+ }
142
+ _SHELL_META_CHARS = set(";|&$`()<>")
143
+ _BROKER_MAX_WRITE_CHARS = 200_000
144
+
145
+
146
+ # ─── Mini-swe-specific config ───────────────────────────────────────────────
147
+
148
+ def _execution_timeout_ms() -> int:
149
+ raw = setting_str("WORKERPALS_MINISWE_TIMEOUT_MS", "workerpals.miniswe_timeout_ms", "")
150
+ default_ms = 1800000
151
+ if not raw:
152
+ return default_ms
153
+ try:
154
+ parsed = int(raw)
155
+ except Exception:
156
+ return default_ms
157
+ return max(10000, parsed)
158
+
159
+
160
+ def _toolcall_retry_max() -> int:
161
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOLCALL_RETRY_MAX") or "").strip()
162
+ if raw:
163
+ return max(0, min(3, to_int(raw, DEFAULT_TOOLCALL_RETRY_MAX)))
164
+ cfg = config_get("workerpals.miniswe_toolcall_retry_max", None)
165
+ if cfg is None:
166
+ return DEFAULT_TOOLCALL_RETRY_MAX
167
+ return max(0, min(3, to_int(cfg, DEFAULT_TOOLCALL_RETRY_MAX)))
168
+
169
+
170
+ def _parse_boolish(raw: Any) -> Optional[bool]:
171
+ if raw is None:
172
+ return None
173
+ text = str(raw).strip().lower()
174
+ if not text:
175
+ return None
176
+ if text in {"1", "true", "yes", "on"}:
177
+ return True
178
+ if text in {"0", "false", "no", "off"}:
179
+ return False
180
+ return None
181
+
182
+
183
+ def _tool_broker_enabled(base_url: str = "") -> bool:
184
+ env_setting = _parse_boolish(os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER"))
185
+ if env_setting is not None:
186
+ return env_setting
187
+ cfg_setting = _parse_boolish(config_get("workerpals.miniswe_tool_broker", None))
188
+ if cfg_setting is not None:
189
+ return cfg_setting
190
+ if looks_local_base_url(base_url):
191
+ return True
192
+ return _parse_boolish(_BROKER_ENABLED_DEFAULT) is True
193
+
194
+
195
+ def _tool_broker_max_steps() -> int:
196
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_MAX_STEPS") or "").strip()
197
+ if raw:
198
+ return max(1, min(30, to_int(raw, _BROKER_MAX_STEPS_DEFAULT)))
199
+ return _BROKER_MAX_STEPS_DEFAULT
200
+
201
+
202
+ def _tool_broker_max_actions_per_step() -> int:
203
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_MAX_ACTIONS_PER_STEP") or "").strip()
204
+ if raw:
205
+ return max(1, min(50, to_int(raw, _BROKER_MAX_ACTIONS_PER_STEP_DEFAULT)))
206
+ return _BROKER_MAX_ACTIONS_PER_STEP_DEFAULT
207
+
208
+
209
+ def _tool_broker_shell_timeout_sec() -> int:
210
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_SHELL_TIMEOUT_SEC") or "").strip()
211
+ if raw:
212
+ return max(5, min(600, to_int(raw, _BROKER_SHELL_TIMEOUT_SEC_DEFAULT)))
213
+ return _BROKER_SHELL_TIMEOUT_SEC_DEFAULT
214
+
215
+
216
+ def _tool_broker_http_timeout_sec(base_url: str = "") -> int:
217
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_HTTP_TIMEOUT_SEC") or "").strip()
218
+ if raw:
219
+ return max(10, min(600, to_int(raw, _BROKER_HTTP_TIMEOUT_SEC_DEFAULT)))
220
+ cfg = config_get("workerpals.miniswe_tool_broker_http_timeout_sec", None)
221
+ if cfg is not None:
222
+ return max(10, min(600, to_int(cfg, _BROKER_HTTP_TIMEOUT_SEC_DEFAULT)))
223
+ if looks_local_base_url(base_url):
224
+ return _BROKER_HTTP_TIMEOUT_SEC_LOCAL_DEFAULT
225
+ return _BROKER_HTTP_TIMEOUT_SEC_DEFAULT
226
+
227
+
228
+ def _tool_broker_http_retry_max(base_url: str = "") -> int:
229
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_HTTP_RETRY_MAX") or "").strip()
230
+ if raw:
231
+ return max(0, min(3, to_int(raw, _BROKER_HTTP_RETRY_MAX_DEFAULT)))
232
+ cfg = config_get("workerpals.miniswe_tool_broker_http_retry_max", None)
233
+ if cfg is not None:
234
+ return max(0, min(3, to_int(cfg, _BROKER_HTTP_RETRY_MAX_DEFAULT)))
235
+ if looks_local_base_url(base_url):
236
+ return _BROKER_HTTP_RETRY_MAX_LOCAL_DEFAULT
237
+ return _BROKER_HTTP_RETRY_MAX_DEFAULT
238
+
239
+
240
+ def _tool_broker_run_retry_max() -> int:
241
+ raw = (os.environ.get("WORKERPALS_MINISWE_TOOL_BROKER_RUN_RETRY_MAX") or "").strip()
242
+ if raw:
243
+ return max(0, min(3, to_int(raw, _BROKER_RUN_RETRY_MAX_DEFAULT)))
244
+ cfg = config_get("workerpals.miniswe_tool_broker_run_retry_max", None)
245
+ if cfg is None:
246
+ return _BROKER_RUN_RETRY_MAX_DEFAULT
247
+ return max(0, min(3, to_int(cfg, _BROKER_RUN_RETRY_MAX_DEFAULT)))
248
+
249
+
250
+ def _build_strict_tool_use_guidance(repo: str) -> str:
251
+ return _load_prompt_template("workerpals/miniswe_strict_tool_use_guidance.md", {"repo": repo})
252
+
253
+
254
+ # ─── Tool Broker Shim ────────────────────────────────────────────────────────
255
+
256
+ def _messages_indicate_missing_tool_calls(messages: Any) -> bool:
257
+ if not isinstance(messages, list) or not messages:
258
+ return False
259
+ saw_tool_call = False
260
+ no_tool_call_prompts = 0
261
+ for msg in messages:
262
+ if not isinstance(msg, dict):
263
+ continue
264
+ tool_calls = msg.get("tool_calls")
265
+ if isinstance(tool_calls, list) and tool_calls:
266
+ saw_tool_call = True
267
+ role = str(msg.get("role") or "").strip().lower()
268
+ content = str(msg.get("content") or "").strip().lower()
269
+ if role == "user" and "no tool calls found" in content:
270
+ no_tool_call_prompts += 1
271
+ return (not saw_tool_call) and no_tool_call_prompts > 0
272
+
273
+
274
+ def _is_timeout_like_error_text(value: object) -> bool:
275
+ text = str(value or "").strip().lower()
276
+ return "timeout" in text or "timed out" in text
277
+
278
+
279
+ def _is_broker_timeout_failure(result: object) -> bool:
280
+ if not isinstance(result, dict):
281
+ return False
282
+ if bool(result.get("ok")):
283
+ return False
284
+ summary = str(result.get("summary") or "").lower()
285
+ stderr = str(result.get("stderr") or "")
286
+ return (
287
+ "tool broker failed: llm request error" in summary
288
+ and _is_timeout_like_error_text(stderr)
289
+ )
290
+
291
+
292
+ def _is_broker_incomplete_failure(result: object) -> bool:
293
+ if not isinstance(result, dict):
294
+ return False
295
+ if bool(result.get("ok")):
296
+ return False
297
+ summary = str(result.get("summary") or "").lower()
298
+ return "tool broker failed: did not reach done=true before limits" in summary
299
+
300
+
301
+ @dataclass
302
+ class _LLMConfig:
303
+ model: str
304
+ api_key: str
305
+ base_url: str
306
+
307
+
308
+ def _normalize_openai_base_url(base_url: str) -> str:
309
+ """
310
+ Accept:
311
+ - http://host:1234
312
+ - http://host:1234/
313
+ - http://host:1234/v1
314
+ - http://host:1234/v1/
315
+ Return a base that ends with /v1
316
+ """
317
+ b = (base_url or "").strip()
318
+ if not b:
319
+ return ""
320
+ b = b.rstrip("/")
321
+ if b.endswith("/v1"):
322
+ return b
323
+ if b.endswith("v1"):
324
+ # e.g. ".../v1" already covered, but keep safe
325
+ return b
326
+ return b + "/v1"
327
+
328
+
329
+ def _http_post_json(url: str, payload: Dict[str, Any], api_key: str, timeout_sec: float) -> Dict[str, Any]:
330
+ data = json.dumps(payload).encode("utf-8")
331
+ headers = {"Content-Type": "application/json"}
332
+ if api_key:
333
+ headers["Authorization"] = f"Bearer {api_key}"
334
+ req = Request(url, data=data, headers=headers, method="POST")
335
+ try:
336
+ with urlopen(req, timeout=timeout_sec) as resp:
337
+ raw = resp.read().decode("utf-8", errors="replace")
338
+ return json.loads(raw)
339
+ except HTTPError as e:
340
+ try:
341
+ details = e.read().decode("utf-8", errors="replace")
342
+ except Exception:
343
+ details = ""
344
+ raise RuntimeError(f"HTTP {e.code} {e.reason} for POST {url}\n{details}") from e
345
+ except URLError as e:
346
+ raise RuntimeError(f"URLError for POST {url}: {e}") from e
347
+ except TimeoutError as e:
348
+ raise RuntimeError(f"TimeoutError for POST {url}: timed out after {timeout_sec}s") from e
349
+
350
+
351
+ def _chat_completion(cfg: _LLMConfig, messages: List[Dict[str, str]], timeout_sec: int) -> str:
352
+ base = _normalize_openai_base_url(cfg.base_url)
353
+ if not base:
354
+ raise RuntimeError("No base_url configured for broker shim (WORKERPALS_LLM_ENDPOINT/BASE_URL).")
355
+ url = base + "/chat/completions"
356
+ payload: Dict[str, Any] = {
357
+ "model": cfg.model,
358
+ "messages": messages,
359
+ "temperature": _BROKER_TEMPERATURE,
360
+ "stream": False,
361
+ }
362
+ obj = _http_post_json(url, payload, cfg.api_key, timeout_sec=float(timeout_sec))
363
+ choices = obj.get("choices") or []
364
+ if not choices:
365
+ raise RuntimeError(f"LLM returned no choices: {to_single_line(obj, 400)}")
366
+ msg = choices[0].get("message") or {}
367
+ content = msg.get("content")
368
+ if not isinstance(content, str):
369
+ raise RuntimeError(f"LLM returned non-text content: {to_single_line(obj, 400)}")
370
+ return content.strip()
371
+
372
+
373
+ def _repo_safe_path(repo: str, rel_path: str) -> Path:
374
+ rel = str(rel_path or "")
375
+ if not rel.strip():
376
+ raise RuntimeError("Path is required")
377
+ if "\x00" in rel:
378
+ raise RuntimeError("Path contains NUL byte")
379
+ root = Path(repo).resolve()
380
+ # Accept absolute paths only when they are contained inside the assigned repo root.
381
+ if Path(rel).is_absolute() or re.match(r"^[A-Za-z]:[\\/]", rel):
382
+ p = Path(rel).resolve()
383
+ else:
384
+ p = (root / rel).resolve()
385
+ # Ensure p is within root
386
+ if root == p or root in p.parents:
387
+ return p
388
+ raise RuntimeError(f"Refusing to access path outside repo: {rel}")
389
+
390
+
391
+ def _normalize_concrete_repo_path(repo: str, path_value: str) -> Optional[str]:
392
+ """
393
+ Normalize a concrete file path (possibly absolute) to repo-relative POSIX form.
394
+ Returns None when the path cannot be normalized safely.
395
+ """
396
+ if not isinstance(path_value, str):
397
+ return None
398
+ raw = path_value.strip()
399
+ if not raw:
400
+ return None
401
+ try:
402
+ root = Path(repo).resolve()
403
+ p = _repo_safe_path(repo, raw)
404
+ rel = p.relative_to(root).as_posix().strip()
405
+ if not rel:
406
+ return "."
407
+ return rel
408
+ except Exception:
409
+ return None
410
+
411
+
412
+ def _normalize_scope_rel_path(value: Any) -> Optional[str]:
413
+ if not isinstance(value, str):
414
+ return None
415
+ raw = value.strip().replace("\\", "/")
416
+ if not raw:
417
+ return None
418
+ while raw.startswith("./"):
419
+ raw = raw[2:]
420
+ raw = raw.rstrip("/")
421
+ if not raw or raw.startswith("/"):
422
+ return None
423
+ if re.match(r"^[A-Za-z]:[\\/]", raw):
424
+ return None
425
+ segments = []
426
+ for segment in raw.split("/"):
427
+ seg = segment.strip()
428
+ if not seg or seg == ".":
429
+ continue
430
+ if seg == "..":
431
+ return None
432
+ segments.append(seg)
433
+ if not segments:
434
+ return None
435
+ return "/".join(segments)
436
+
437
+
438
+ def _extract_write_globs_from_payload(payload: Optional[Dict[str, Any]]) -> List[str]:
439
+ if not isinstance(payload, dict):
440
+ return []
441
+ params = payload.get("params")
442
+ if not isinstance(params, dict):
443
+ return []
444
+ planning = params.get("planning")
445
+ if not isinstance(planning, dict):
446
+ return []
447
+ scope = planning.get("scope")
448
+ if not isinstance(scope, dict):
449
+ return []
450
+ write_globs_raw = scope.get("writeGlobs")
451
+ if not isinstance(write_globs_raw, list):
452
+ return []
453
+ out: List[str] = []
454
+ seen = set()
455
+ for item in write_globs_raw:
456
+ normalized = _normalize_scope_rel_path(item)
457
+ if not normalized:
458
+ continue
459
+ if normalized in seen:
460
+ continue
461
+ seen.add(normalized)
462
+ out.append(normalized)
463
+ return out
464
+
465
+
466
+ def _assert_write_allowed(repo: str, path: str, write_globs: Optional[List[str]]) -> None:
467
+ if not write_globs:
468
+ return
469
+ normalized = _normalize_concrete_repo_path(repo, path)
470
+ if not normalized:
471
+ raise RuntimeError(f"Invalid write path for scope enforcement: {path!r}")
472
+ for glob in write_globs:
473
+ pattern = str(glob or "").strip()
474
+ if not pattern:
475
+ continue
476
+ if any(ch in pattern for ch in "*?[]"):
477
+ if fnmatch.fnmatchcase(normalized, pattern):
478
+ return
479
+ continue
480
+ if normalized == pattern or normalized.startswith(pattern + "/"):
481
+ return
482
+ raise RuntimeError(
483
+ "Scope violation: attempted write outside writeGlobs. "
484
+ f"path={normalized!r} write_globs={write_globs!r}"
485
+ )
486
+
487
+
488
+ def _read_text_file(repo: str, path: str, max_chars: int = 60000) -> str:
489
+ p = _repo_safe_path(repo, path)
490
+ if not p.exists():
491
+ raise RuntimeError(f"File not found: {path}")
492
+ data = p.read_text(encoding="utf-8", errors="replace")
493
+ if len(data) > max_chars:
494
+ return data[:max_chars] + "\n... (truncated)"
495
+ return data
496
+
497
+
498
+ def _write_text_file(repo: str, path: str, content: str, write_globs: Optional[List[str]] = None) -> None:
499
+ _assert_write_allowed(repo, path, write_globs)
500
+ p = _repo_safe_path(repo, path)
501
+ p.parent.mkdir(parents=True, exist_ok=True)
502
+ p.write_text(content, encoding="utf-8")
503
+
504
+
505
+ def _append_line(repo: str, path: str, line: str, write_globs: Optional[List[str]] = None) -> None:
506
+ """
507
+ Append a single line to end of file using append mode (no full-file rewrite).
508
+ If the file exists and does not end with newline, add one first.
509
+ """
510
+ _assert_write_allowed(repo, path, write_globs)
511
+ p = _repo_safe_path(repo, path)
512
+ p.parent.mkdir(parents=True, exist_ok=True)
513
+ needs_prefix_newline = False
514
+ if p.exists() and p.stat().st_size > 0:
515
+ try:
516
+ with open(p, "rb") as rf:
517
+ rf.seek(-1, os.SEEK_END)
518
+ needs_prefix_newline = rf.read(1) != b"\n"
519
+ except Exception:
520
+ needs_prefix_newline = False
521
+ with open(p, "a", encoding="utf-8") as wf:
522
+ if needs_prefix_newline:
523
+ wf.write("\n")
524
+ wf.write(f"{line}\n")
525
+
526
+
527
+ def _replace_text_once(
528
+ repo: str,
529
+ path: str,
530
+ old: str,
531
+ new: str,
532
+ write_globs: Optional[List[str]] = None,
533
+ ) -> int:
534
+ _assert_write_allowed(repo, path, write_globs)
535
+ p = _repo_safe_path(repo, path)
536
+ data = p.read_text(encoding="utf-8", errors="replace")
537
+ idx = data.find(old)
538
+ if idx < 0:
539
+ return 0
540
+ updated = data[:idx] + new + data[idx + len(old):]
541
+ p.write_text(updated, encoding="utf-8")
542
+ return 1
543
+
544
+
545
+ def _parse_and_validate_shell_command(cmd: str) -> Tuple[Optional[List[str]], str]:
546
+ c = (cmd or "").strip()
547
+ if not c:
548
+ return None, "empty command"
549
+ if any(ord(ch) < 32 for ch in c):
550
+ return None, "control characters are not allowed"
551
+ if any(ch in c for ch in _SHELL_META_CHARS):
552
+ return None, "shell metacharacters are not allowed"
553
+ try:
554
+ args = shlex.split(c, posix=True)
555
+ except Exception as exc:
556
+ return None, f"failed to parse command: {exc}"
557
+ if not args:
558
+ return None, "empty parsed command"
559
+ binary = args[0].strip().lower()
560
+ if binary not in _ALLOWED_BINARIES:
561
+ return None, f"binary not allowed: {binary}"
562
+ if binary in _ALLOWED_PACKAGE_RUNNERS:
563
+ if len(args) < 2:
564
+ return None, f"{binary} command requires a subcommand"
565
+ sub = args[1].strip().lower()
566
+ if sub not in _ALLOWED_PACKAGE_RUNNERS[binary]:
567
+ return None, f"{binary} subcommand not allowed: {sub}"
568
+ # Allow script runner only for repo scripts with a simple token.
569
+ if sub == "run":
570
+ if len(args) < 3:
571
+ return None, f"{binary} run requires a script name"
572
+ script_name = str(args[2] or "").strip().lower()
573
+ if not re.match(r"^[a-z0-9:_\-.]+$", script_name):
574
+ return None, f"{binary} run script token is not allowed: {script_name!r}"
575
+ lowered = c.lower()
576
+ for pat in _DENY_PATTERNS:
577
+ if re.search(pat, lowered):
578
+ return None, f"blocked by denylist: {pat}"
579
+ # Additional guardrails for risky allowlisted binaries.
580
+ if binary == "find":
581
+ joined = " ".join(args[1:]).lower()
582
+ if "-exec" in joined or "-delete" in joined:
583
+ return None, "find with -exec/-delete is not allowed"
584
+ if binary == "git" and len(args) >= 2:
585
+ sub = args[1].strip().lower()
586
+ if sub not in _ALLOWED_GIT_SUBCOMMANDS:
587
+ return None, f"git subcommand not allowed: {sub}"
588
+ for raw_arg in args[2:]:
589
+ arg = str(raw_arg or "").strip()
590
+ if not arg:
591
+ continue
592
+ lower_arg = arg.lower()
593
+ if lower_arg in {"-c", "-C"}:
594
+ return None, f"git option is not allowed: {arg}"
595
+ if lower_arg.startswith("-c"):
596
+ return None, f"git option prefix is not allowed: {arg}"
597
+ if lower_arg.startswith("--git-dir") or lower_arg.startswith("--work-tree"):
598
+ return None, f"git path/work-tree override is not allowed: {arg}"
599
+ if lower_arg == "--no-index":
600
+ return None, "git diff --no-index is not allowed"
601
+ if arg.startswith("/") or re.match(r"^[A-Za-z]:[\\/]", arg):
602
+ return None, f"absolute path-like git arg is not allowed: {arg}"
603
+ normalized = arg.replace("\\", "/")
604
+ while normalized.startswith("./"):
605
+ normalized = normalized[2:]
606
+ if (
607
+ normalized == ".."
608
+ or normalized.startswith("../")
609
+ or "/../" in normalized
610
+ ):
611
+ return None, f"path escape git arg is not allowed: {arg}"
612
+ if binary == "git" and len(args) < 2:
613
+ return None, "git command requires an explicit allowed subcommand"
614
+ if binary == "sed":
615
+ for raw_arg in args[1:]:
616
+ arg = str(raw_arg or "").strip().lower()
617
+ if not arg:
618
+ continue
619
+ if arg == "-i" or arg.startswith("-i") or arg.startswith("--in-place"):
620
+ return None, "sed in-place edits are not allowed"
621
+ if binary == "awk":
622
+ joined = " ".join(args[1:]).lower()
623
+ if "system(" in joined:
624
+ return None, "awk system() is not allowed"
625
+ return args, ""
626
+
627
+
628
+ def _attempt_salvage_rejected_shell_command(cmd: str, error_text: str) -> Optional[str]:
629
+ """
630
+ Best-effort salvage for common model command issues:
631
+ - strip piped/redirection suffixes from an otherwise valid command
632
+ - map common npm test/run invocations to bun equivalents for this repo
633
+ """
634
+ raw = str(cmd or "").strip()
635
+ if not raw:
636
+ return None
637
+ lowered_err = str(error_text or "").lower()
638
+
639
+ # If metacharacters were rejected, keep the prefix command before first metachar.
640
+ if "metacharacters" in lowered_err:
641
+ candidate = re.split(r"[;|&$`()<>]", raw, maxsplit=1)[0].strip()
642
+ if candidate and candidate != raw:
643
+ args, _ = _parse_and_validate_shell_command(candidate)
644
+ if args is not None:
645
+ return candidate
646
+
647
+ # If npm was rejected (or missing), try equivalent bun command.
648
+ if "binary not allowed: npm" in lowered_err or "no such file or directory" in lowered_err:
649
+ parts = raw.split()
650
+ if len(parts) >= 2 and parts[0].lower() == "npm":
651
+ sub = parts[1].lower()
652
+ if sub in {"test", "run"}:
653
+ candidate = "bun " + " ".join(parts[1:])
654
+ args, _ = _parse_and_validate_shell_command(candidate)
655
+ if args is not None:
656
+ return candidate
657
+
658
+ return None
659
+
660
+
661
+ def _run_shell(repo: str, cmd: str, max_output: int = 60000, timeout_sec: Optional[int] = None) -> str:
662
+ """
663
+ Run a tokenized command in repo without shell expansion/chaining.
664
+ Blocks unsafe commands with binary allowlist + additional guardrails.
665
+ """
666
+ args, reason = _parse_and_validate_shell_command(cmd)
667
+ if args is None:
668
+ raise RuntimeError(f"Shell command rejected: {reason}. cmd={cmd!r}")
669
+
670
+ import subprocess
671
+
672
+ proc = subprocess.run(
673
+ args,
674
+ cwd=str(Path(repo).resolve()),
675
+ capture_output=True,
676
+ text=True,
677
+ check=False,
678
+ timeout=(timeout_sec if timeout_sec is not None else _tool_broker_shell_timeout_sec()),
679
+ )
680
+ out = (proc.stdout or "") + (("\n" + proc.stderr) if proc.stderr else "")
681
+ out = out.strip()
682
+ if len(out) > max_output:
683
+ out = out[:max_output] + "\n... (truncated)"
684
+ return f"(exit={proc.returncode})\n{out}" if out else f"(exit={proc.returncode})"
685
+
686
+
687
+ def _shell_exit_code(output: str) -> Optional[int]:
688
+ m = re.match(r"^\(exit=(\d+)\)", str(output or "").strip())
689
+ if not m:
690
+ return None
691
+ try:
692
+ return int(m.group(1))
693
+ except Exception:
694
+ return None
695
+
696
+
697
+ def _extract_first_json_object(text: str) -> Optional[Dict[str, Any]]:
698
+ """
699
+ Tries to find and parse a single JSON object from the model response.
700
+ Accepts plain JSON, or JSON inside Markdown fences.
701
+ """
702
+ if not text:
703
+ return None
704
+ # Strip ```json fences
705
+ fenced = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
706
+ candidate = fenced.group(1).strip() if fenced else text.strip()
707
+
708
+ # Fast path: whole string is JSON
709
+ try:
710
+ obj = json.loads(candidate)
711
+ return obj if isinstance(obj, dict) else None
712
+ except Exception:
713
+ pass
714
+
715
+ def _normalize_common_json_typos(src: str) -> str:
716
+ """
717
+ Repair a few common, low-risk key/value separator typos in model output.
718
+
719
+ Example:
720
+ {"line','foo"} -> {"line":'foo"}
721
+ {"line","foo"} -> {"line":"foo"}
722
+ """
723
+ fixed = src
724
+ # Key was opened with double-quote but closed with single-quote before comma.
725
+ fixed = re.sub(
726
+ r'([{\s,])"([A-Za-z_][A-Za-z0-9_]*)\'\s*,\s*(["\'])',
727
+ r'\1"\2": \3',
728
+ fixed,
729
+ )
730
+ # Key is correctly quoted but comma used instead of colon.
731
+ fixed = re.sub(
732
+ r'([{\s,])"([A-Za-z_][A-Za-z0-9_]*)"\s*,\s*(["\'])',
733
+ r'\1"\2": \3',
734
+ fixed,
735
+ )
736
+ return fixed
737
+
738
+ def _try_relaxed_json_parse(src: str) -> Optional[Dict[str, Any]]:
739
+ # Common model drift: single-quoted strings, trailing commas, Python booleans.
740
+ working = src.strip()
741
+ if not working:
742
+ return None
743
+ working = _normalize_common_json_typos(working)
744
+ # Convert single-quoted literals to JSON double-quoted strings when possible.
745
+ working = re.sub(
746
+ r"'([^'\\]*(?:\\.[^'\\]*)*)'",
747
+ lambda m: json.dumps(m.group(1)),
748
+ working,
749
+ )
750
+ # Sometimes malformed model output leaves an extra quote after normalization:
751
+ # "line": "value.""}
752
+ # Repair by dropping the stray quote before delimiters.
753
+ working = re.sub(
754
+ r'("(?:(?:\\.|[^"\\])*)")\s*"\s*([,}\]])',
755
+ r"\1\2",
756
+ working,
757
+ )
758
+ # Remove trailing commas before object/array close.
759
+ working = re.sub(r",(\s*[}\]])", r"\1", working)
760
+ try:
761
+ obj = json.loads(working)
762
+ return obj if isinstance(obj, dict) else None
763
+ except Exception:
764
+ pass
765
+ # As a final fallback, parse Python-literal style payloads.
766
+ py_working = (
767
+ working.replace(": true", ": True")
768
+ .replace(": false", ": False")
769
+ .replace(": null", ": None")
770
+ )
771
+ py_working = re.sub(r"\btrue\b", "True", py_working)
772
+ py_working = re.sub(r"\bfalse\b", "False", py_working)
773
+ py_working = re.sub(r"\bnull\b", "None", py_working)
774
+ try:
775
+ parsed = ast.literal_eval(py_working)
776
+ if isinstance(parsed, dict):
777
+ return parsed
778
+ except Exception:
779
+ return None
780
+ return None
781
+
782
+ relaxed = _try_relaxed_json_parse(candidate)
783
+ if relaxed is not None:
784
+ return relaxed
785
+
786
+ # Heuristic: find first {...} block
787
+ start = candidate.find("{")
788
+ end = candidate.rfind("}")
789
+ if start >= 0 and end > start:
790
+ snippet = candidate[start : end + 1]
791
+ try:
792
+ obj = json.loads(snippet)
793
+ return obj if isinstance(obj, dict) else None
794
+ except Exception:
795
+ relaxed_snippet = _try_relaxed_json_parse(snippet)
796
+ if relaxed_snippet is not None:
797
+ return relaxed_snippet
798
+ return None
799
+ return None
800
+
801
+
802
+ def _truncate_observation(text: str, max_chars: int = _BROKER_OBSERVATION_MAX_CHARS) -> str:
803
+ t = str(text or "").strip()
804
+ if len(t) <= max_chars:
805
+ return t
806
+ head = max_chars // 2
807
+ tail = max_chars - head
808
+ return f"{t[:head]}\n...[observation truncated]...\n{t[-tail:]}"
809
+
810
+
811
+ def _validate_broker_actions(actions: Any, max_actions: int) -> Tuple[bool, str, List[Dict[str, Any]]]:
812
+ if actions is None:
813
+ return True, "", []
814
+ if not isinstance(actions, list):
815
+ return False, f"Expected actions to be a list, got: {type(actions).__name__}", []
816
+ valid: List[Dict[str, Any]] = []
817
+ for i, act in enumerate(actions[:max_actions], start=1):
818
+ if not isinstance(act, dict):
819
+ return False, f"Action {i}: must be an object", []
820
+ typ = str(act.get("type") or "").strip()
821
+ if not typ:
822
+ return False, f"Action {i}: missing type", []
823
+ if typ == "read_file":
824
+ if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
825
+ return False, f"Action {i} read_file: path is required", []
826
+ valid.append({"type": typ, "path": str(act.get("path")).strip()})
827
+ continue
828
+ if typ in {"append_line", "append_comment"}:
829
+ if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
830
+ return False, f"Action {i} {typ}: path is required", []
831
+ line_value = act.get("line")
832
+ if typ == "append_comment" and not isinstance(line_value, str):
833
+ line_value = act.get("comment")
834
+ if not isinstance(line_value, str):
835
+ return False, f"Action {i} {typ}: line/comment must be a string", []
836
+ valid.append(
837
+ {
838
+ "type": "append_line",
839
+ "path": str(act.get("path")).strip(),
840
+ "line": str(line_value),
841
+ }
842
+ )
843
+ continue
844
+ if typ == "replace_text_once":
845
+ if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
846
+ return False, f"Action {i} replace_text_once: path is required", []
847
+ old = act.get("old")
848
+ new = act.get("new")
849
+ if not isinstance(old, str) or not isinstance(new, str):
850
+ return False, f"Action {i} replace_text_once: old/new must be strings", []
851
+ if not old:
852
+ return False, f"Action {i} replace_text_once: old must be non-empty", []
853
+ valid.append(
854
+ {"type": typ, "path": str(act.get("path")).strip(), "old": old, "new": new}
855
+ )
856
+ continue
857
+ if typ == "write_file":
858
+ if not isinstance(act.get("path"), str) or not str(act.get("path")).strip():
859
+ return False, f"Action {i} write_file: path is required", []
860
+ content = act.get("content")
861
+ if not isinstance(content, str):
862
+ return False, f"Action {i} write_file: content must be a string", []
863
+ if len(content) > _BROKER_MAX_WRITE_CHARS:
864
+ return False, f"Action {i} write_file: content too large ({len(content)} chars)", []
865
+ valid.append({"type": typ, "path": str(act.get("path")).strip(), "content": content})
866
+ continue
867
+ if typ == "run_shell":
868
+ cmd = act.get("command")
869
+ if not isinstance(cmd, str) or not cmd.strip():
870
+ return False, f"Action {i} run_shell: command is required", []
871
+ valid.append({"type": typ, "command": cmd.strip()})
872
+ continue
873
+ return False, f"Action {i}: unknown type {typ!r}", []
874
+ return True, "", valid
875
+
876
+
877
+ def _extract_expected_target_paths(instruction: str) -> List[str]:
878
+ targets: List[str] = []
879
+ # lightweight heuristic for common file-target asks
880
+ for m in re.finditer(r"\b([A-Za-z0-9._/\-]+(?:\.[A-Za-z0-9._-]+))\b", instruction or ""):
881
+ token = m.group(1).strip()
882
+ if "/" in token or "." in token:
883
+ lower = token.lower()
884
+ if lower in {"true", "false", "none"}:
885
+ continue
886
+ if token not in targets:
887
+ targets.append(token)
888
+ if len(targets) >= 8:
889
+ break
890
+ return targets
891
+
892
+
893
+ def _extract_append_line_directives(instruction: str) -> List[Tuple[str, str]]:
894
+ """
895
+ Extract simple deterministic directives like:
896
+ In <path> append the bullet line '<text>'
897
+ In <path> append the comment line "<text>"
898
+ """
899
+ directives: List[Tuple[str, str]] = []
900
+ seen = set()
901
+ text = str(instruction or "")
902
+ patterns = [
903
+ re.compile(
904
+ r"in\s+([A-Za-z0-9._/\-\\]+)\s+append\s+the\s+(?:bullet|comment|text)?\s*line\s+['\"]([^'\"]+)['\"]",
905
+ flags=re.IGNORECASE,
906
+ ),
907
+ re.compile(
908
+ r"append\s+the\s+(?:bullet|comment|text)?\s*line\s+['\"]([^'\"]+)['\"]\s+to\s+([A-Za-z0-9._/\-\\]+)",
909
+ flags=re.IGNORECASE,
910
+ ),
911
+ ]
912
+ for pattern in patterns:
913
+ for match in pattern.finditer(text):
914
+ if pattern is patterns[0]:
915
+ path = str(match.group(1) or "").strip()
916
+ line = str(match.group(2) or "").strip()
917
+ else:
918
+ line = str(match.group(1) or "").strip()
919
+ path = str(match.group(2) or "").strip()
920
+ if not path or not line:
921
+ continue
922
+ key = (path.replace("\\", "/").lower(), line)
923
+ if key in seen:
924
+ continue
925
+ seen.add(key)
926
+ directives.append((path, line))
927
+ return directives
928
+
929
+
930
+ def _extract_explicit_target_paths_from_payload(payload: Optional[Dict[str, Any]]) -> List[str]:
931
+ if not isinstance(payload, dict):
932
+ return []
933
+ out: List[str] = []
934
+ seen = set()
935
+ params = payload.get("params")
936
+ if not isinstance(params, dict):
937
+ return []
938
+
939
+ def add(val: Any) -> None:
940
+ if not isinstance(val, str):
941
+ return
942
+ token = val.strip().replace("\\", "/")
943
+ while token.startswith("./"):
944
+ token = token[2:]
945
+ token = token.rstrip("/")
946
+ if not token or token in seen:
947
+ return
948
+ if token in {".", "/"}:
949
+ return
950
+ seen.add(token)
951
+ out.append(token)
952
+
953
+ add(params.get("targetPath"))
954
+ planning = params.get("planning")
955
+ if isinstance(planning, dict):
956
+ target_paths = planning.get("targetPaths")
957
+ if isinstance(target_paths, list):
958
+ for item in target_paths:
959
+ add(item)
960
+ scope = planning.get("scope")
961
+ if isinstance(scope, dict):
962
+ write_globs = scope.get("writeGlobs")
963
+ if isinstance(write_globs, list):
964
+ for item in write_globs:
965
+ add(item)
966
+ return out
967
+
968
+
969
+ def _target_hint_matches_changed_path(target_hint: str, changed_path: str) -> bool:
970
+ target = str(target_hint or "").strip().replace("\\", "/").rstrip("/")
971
+ changed = str(changed_path or "").strip().replace("\\", "/").rstrip("/")
972
+ if not target or not changed:
973
+ return False
974
+ if target in {".", "/"}:
975
+ return True
976
+ if changed == target:
977
+ return True
978
+ if changed.startswith(target + "/"):
979
+ return True
980
+ if any(ch in target for ch in "*?[]"):
981
+ return fnmatch.fnmatchcase(changed, target)
982
+ return False
983
+
984
+
985
+ def _is_git_porcelain_status_command(cmd: str) -> bool:
986
+ args, reason = _parse_and_validate_shell_command(cmd)
987
+ if args is None:
988
+ return False
989
+ if len(args) < 2 or args[0].lower() != "git" or args[1].lower() != "status":
990
+ return False
991
+ return any(a.lower().startswith("--porcelain") for a in args[2:])
992
+
993
+
994
+ def _repo_root_for_prompt_loading() -> Path:
995
+ current = Path(__file__).resolve()
996
+ for parent in current.parents:
997
+ if (parent / "prompts").is_dir():
998
+ return parent
999
+ return current.parents[5]
1000
+
1001
+
1002
+ def _resolve_prompt_file(relative_path: str) -> Path:
1003
+ return _repo_root_for_prompt_loading() / "prompts" / relative_path
1004
+
1005
+
1006
+ def _load_prompt_template(
1007
+ relative_path: str, replacements: Optional[Dict[str, str]] = None
1008
+ ) -> str:
1009
+ prompt_path = _resolve_prompt_file(relative_path)
1010
+ cache_key = str(prompt_path)
1011
+ template = _PROMPT_TEMPLATE_CACHE.get(cache_key)
1012
+ if template is None:
1013
+ if not prompt_path.exists():
1014
+ raise FileNotFoundError(f"Prompt template not found: {prompt_path}")
1015
+ template = prompt_path.read_text(encoding="utf-8")
1016
+ _PROMPT_TEMPLATE_CACHE[cache_key] = template
1017
+
1018
+ if not replacements:
1019
+ return template
1020
+
1021
+ def _replace(match: re.Match[str]) -> str:
1022
+ key = match.group(1)
1023
+ if key not in replacements:
1024
+ raise KeyError(f"Missing prompt replacement '{{{{{key}}}}}' for {prompt_path}")
1025
+ return replacements[key]
1026
+
1027
+ return PROMPT_TOKEN_REGEX.sub(_replace, template)
1028
+
1029
+
1030
+ def _broker_system_prompt(repo: str) -> str:
1031
+ return _load_prompt_template("workerpals/miniswe_broker_system_prompt.md", {"repo": repo})
1032
+
1033
+
1034
+ def _broker_run(
1035
+ repo: str,
1036
+ instruction: str,
1037
+ llm: _LLMConfig,
1038
+ timeout_ms: int,
1039
+ explicit_targets: Optional[List[str]] = None,
1040
+ write_globs: Optional[List[str]] = None,
1041
+ ) -> Dict[str, Any]:
1042
+ """
1043
+ Executes a simple plan/act loop where the model emits JSON actions.
1044
+ """
1045
+ started = time.time()
1046
+ deadline = started + max(5, int(timeout_ms / 1000))
1047
+
1048
+ max_steps = _tool_broker_max_steps()
1049
+ max_actions = _tool_broker_max_actions_per_step()
1050
+ shell_timeout_sec = _tool_broker_shell_timeout_sec()
1051
+ http_timeout_sec = _tool_broker_http_timeout_sec(llm.base_url)
1052
+ http_retry_max = _tool_broker_http_retry_max(llm.base_url)
1053
+
1054
+ transcript: List[str] = []
1055
+ obs: str = ""
1056
+ edits_made = False
1057
+ shell_validation_ran = False
1058
+ explicit_target_set = {str(t).strip() for t in (explicit_targets or []) if str(t).strip()}
1059
+ allowed_write_globs = [g for g in (write_globs or []) if str(g).strip()]
1060
+ expected_targets = sorted(explicit_target_set) if explicit_target_set else _extract_expected_target_paths(instruction)
1061
+
1062
+ explicit_targets_block = ""
1063
+ completion_requirement = ""
1064
+ if expected_targets:
1065
+ targets_block = "\n".join(f"- {target}" for target in expected_targets[:8])
1066
+ explicit_targets_block = (
1067
+ "\n\n"
1068
+ + _load_prompt_template(
1069
+ "workerpals/miniswe_explicit_targets_block.md",
1070
+ {"targets_block": targets_block},
1071
+ ).strip()
1072
+ )
1073
+ completion_requirement = (
1074
+ "\n\n" + _load_prompt_template("workerpals/miniswe_completion_requirement.md").strip()
1075
+ )
1076
+
1077
+ task_prompt = _load_prompt_template(
1078
+ "workerpals/miniswe_broker_task_prompt.md",
1079
+ {
1080
+ "instruction": instruction,
1081
+ "explicit_targets_block": explicit_targets_block,
1082
+ "completion_requirement": completion_requirement,
1083
+ },
1084
+ ).strip()
1085
+ messages: List[Dict[str, str]] = [
1086
+ {"role": "system", "content": _broker_system_prompt(repo)},
1087
+ {"role": "user", "content": task_prompt},
1088
+ ]
1089
+
1090
+ def _record(line: str) -> None:
1091
+ transcript.append(line)
1092
+ log.debug(line)
1093
+
1094
+ def _remaining_http_timeout_sec() -> int:
1095
+ remaining = int(deadline - time.time())
1096
+ if remaining <= 0:
1097
+ return 10
1098
+ return max(10, min(http_timeout_sec, remaining))
1099
+
1100
+ def _compact_messages_for_timeout_retry() -> bool:
1101
+ nonlocal messages
1102
+ if len(messages) <= 8:
1103
+ return False
1104
+ head: List[Dict[str, str]] = []
1105
+ if messages and isinstance(messages[0], dict):
1106
+ head.append(messages[0])
1107
+ if len(messages) > 1 and isinstance(messages[1], dict):
1108
+ head.append(messages[1])
1109
+ tail = [m for m in messages[-6:] if isinstance(m, dict)]
1110
+ compacted: List[Dict[str, str]] = list(head)
1111
+ compacted.append(
1112
+ {
1113
+ "role": "user",
1114
+ "content": _load_prompt_template(
1115
+ "workerpals/miniswe_context_compaction_retry_prompt.md"
1116
+ ).strip(),
1117
+ }
1118
+ )
1119
+ compacted.extend(tail)
1120
+ if len(compacted) >= len(messages):
1121
+ return False
1122
+ messages = compacted
1123
+ return True
1124
+
1125
+ def _broker_llm_call(step_label: str) -> str:
1126
+ attempt = 0
1127
+ while True:
1128
+ attempt += 1
1129
+ timeout_for_call = _remaining_http_timeout_sec()
1130
+ try:
1131
+ return _chat_completion(llm, messages, timeout_sec=timeout_for_call)
1132
+ except Exception as exc:
1133
+ msg = to_single_line(exc, 400)
1134
+ is_timeout = "timeout" in msg.lower() or "timed out" in msg.lower()
1135
+ if (not is_timeout) or attempt > (http_retry_max + 1) or time.time() >= deadline:
1136
+ raise RuntimeError(
1137
+ f"{step_label} failed after {attempt} attempt(s): {msg}"
1138
+ ) from exc
1139
+ compacted = False
1140
+ if attempt >= 2:
1141
+ compacted = _compact_messages_for_timeout_retry()
1142
+ _record(
1143
+ f"[Broker] {step_label} timeout; retry {attempt}/{http_retry_max + 1} "
1144
+ f"(timeout={timeout_for_call}s): {msg}"
1145
+ )
1146
+ if compacted:
1147
+ _record(
1148
+ "[Broker] timeout mitigation: compacted broker message context "
1149
+ "before retry to reduce token load."
1150
+ )
1151
+ time.sleep(min(2.0, 0.25 * attempt))
1152
+
1153
+ def _broker_fail(summary: str, stderr: str, exit_code: int = 3) -> Dict[str, Any]:
1154
+ transcript_text = "\n".join(transcript).strip()
1155
+ stdout = f"Tool broker transcript:\n{transcript_text}" if transcript_text else ""
1156
+ return {
1157
+ "ok": False,
1158
+ "summary": summary,
1159
+ "stdout": stdout,
1160
+ "stderr": stderr,
1161
+ "exitCode": exit_code,
1162
+ }
1163
+
1164
+ def _attempt_append_line_timeout_recovery(step: int, error_text: str) -> Optional[Dict[str, Any]]:
1165
+ lowered = str(error_text or "").lower()
1166
+ if "timeout" not in lowered and "timed out" not in lowered:
1167
+ return None
1168
+ directives = _extract_append_line_directives(instruction)
1169
+ if not directives:
1170
+ return None
1171
+ _record(
1172
+ f"[Broker] timeout recovery: attempting deterministic append-line completion from instruction "
1173
+ f"(step={step}, directives={len(directives)})."
1174
+ )
1175
+
1176
+ applied = 0
1177
+ skipped = 0
1178
+ for raw_path, line in directives:
1179
+ normalized = _normalize_concrete_repo_path(repo, raw_path)
1180
+ if not normalized or normalized in {".", "/"}:
1181
+ skipped += 1
1182
+ continue
1183
+ if expected_targets and not any(
1184
+ _target_hint_matches_changed_path(target, normalized) for target in expected_targets
1185
+ ):
1186
+ skipped += 1
1187
+ continue
1188
+ try:
1189
+ existing = _read_text_file(repo, normalized, max_chars=500_000)
1190
+ except Exception:
1191
+ existing = ""
1192
+ if any(existing_line.strip() == line.strip() for existing_line in existing.splitlines()):
1193
+ skipped += 1
1194
+ continue
1195
+ try:
1196
+ _append_line(repo, normalized, line, allowed_write_globs)
1197
+ applied += 1
1198
+ except Exception as exc:
1199
+ _record(
1200
+ f"[Broker] timeout recovery: failed to apply append_line for {normalized}: "
1201
+ f"{to_single_line(exc, 240)}"
1202
+ )
1203
+ return None
1204
+
1205
+ changed_paths = summarize_git_changes(repo)
1206
+ changed_set = {str(p).strip().replace("\\", "/") for p in changed_paths}
1207
+ missing_targets = [
1208
+ t
1209
+ for t in expected_targets
1210
+ if t not in {".", "/"} and not any(_target_hint_matches_changed_path(t, c) for c in changed_set)
1211
+ ]
1212
+ if missing_targets:
1213
+ _record(
1214
+ "[Broker] timeout recovery incomplete: expected targets still missing changes: "
1215
+ + ", ".join(missing_targets)
1216
+ )
1217
+ return None
1218
+ if applied == 0 and not changed_paths:
1219
+ return None
1220
+
1221
+ try:
1222
+ final_status = _run_shell(repo, "git status --porcelain", timeout_sec=shell_timeout_sec)
1223
+ except Exception as exc:
1224
+ final_status = f"(git status failed) {to_single_line(exc, 300)}"
1225
+
1226
+ transcript_text = "\n".join(transcript).strip()
1227
+ stdout = ""
1228
+ if transcript_text:
1229
+ stdout += "Tool broker transcript:\n" + transcript_text + "\n\n"
1230
+ stdout += "Deterministic timeout recovery applied append-line directives.\n"
1231
+ stdout += f"Applied directives: {applied}, skipped: {skipped}\n\n"
1232
+ stdout += "Final verification:\n" + final_status
1233
+ return {
1234
+ "ok": True,
1235
+ "summary": "Executed task via tool broker timeout recovery",
1236
+ "stdout": stdout,
1237
+ "stderr": "",
1238
+ "exitCode": 0,
1239
+ }
1240
+
1241
+ def _attempt_timeout_finalize_from_existing_edits(
1242
+ step: int, error_text: str
1243
+ ) -> Optional[Dict[str, Any]]:
1244
+ """
1245
+ If the model times out after prior edit actions already changed files,
1246
+ finalize deterministically from repo state instead of hard-failing.
1247
+ """
1248
+ lowered = str(error_text or "").lower()
1249
+ if "timeout" not in lowered and "timed out" not in lowered:
1250
+ return None
1251
+ if not edits_made:
1252
+ return None
1253
+
1254
+ changed_paths = summarize_git_changes(repo)
1255
+ if not changed_paths:
1256
+ return None
1257
+
1258
+ changed_set = {str(p).strip().replace("\\", "/") for p in changed_paths}
1259
+ if expected_targets:
1260
+ expected_set = {str(p).strip().replace("\\", "/") for p in expected_targets}
1261
+ matched = any(
1262
+ _target_hint_matches_changed_path(expected, changed)
1263
+ for expected in expected_set
1264
+ for changed in changed_set
1265
+ )
1266
+ if not matched:
1267
+ _record(
1268
+ "[Broker] timeout finalize skipped: changed files do not match expected targets. "
1269
+ f"expected={sorted(expected_set)} observed={sorted(changed_set)}"
1270
+ )
1271
+ return None
1272
+
1273
+ try:
1274
+ final_status = _run_shell(repo, "git status --porcelain", timeout_sec=shell_timeout_sec)
1275
+ except Exception as exc:
1276
+ final_status = f"(git status failed) {to_single_line(exc, 300)}"
1277
+
1278
+ transcript_text = "\n".join(transcript).strip()
1279
+ stdout = ""
1280
+ if transcript_text:
1281
+ stdout += "Tool broker transcript:\n" + transcript_text + "\n\n"
1282
+ stdout += (
1283
+ "Timeout recovery finalized already-applied edit actions from repository state.\n"
1284
+ f"Recovery trigger: step={step}, error={to_single_line(error_text, 220)}\n\n"
1285
+ "Changed files:\n"
1286
+ + "\n".join(f"- {p}" for p in changed_paths[:80])
1287
+ + "\n\nFinal verification:\n"
1288
+ + final_status
1289
+ )
1290
+ return {
1291
+ "ok": True,
1292
+ "summary": "Executed task via tool broker timeout finalize",
1293
+ "stdout": stdout,
1294
+ "stderr": "",
1295
+ "exitCode": 0,
1296
+ }
1297
+
1298
+ step = 0
1299
+ model_done = False
1300
+ no_edit_steps = 0
1301
+ no_progress_nudges = 0
1302
+ while step < max_steps and time.time() < deadline:
1303
+ step += 1
1304
+
1305
+ if obs:
1306
+ messages.append({"role": "user", "content": f"Observation (from executed actions):\n{obs}\n\nNext JSON only."})
1307
+
1308
+ try:
1309
+ raw = _broker_llm_call(f"step {step} initial call")
1310
+ except Exception as exc:
1311
+ recovered = _attempt_append_line_timeout_recovery(step, str(exc))
1312
+ if recovered:
1313
+ return recovered
1314
+ recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
1315
+ if recovered:
1316
+ return recovered
1317
+ return _broker_fail(
1318
+ "tool broker failed: llm request error",
1319
+ f"Broker LLM request failed at step {step}: {to_single_line(exc, 500)}",
1320
+ )
1321
+ raw_used = raw
1322
+ _record(f"[Broker] Step {step} model output: {to_single_line(raw, 500)}")
1323
+
1324
+ obj = _extract_first_json_object(raw)
1325
+ if not obj:
1326
+ # one reprompt to force JSON
1327
+ messages.append({"role": "user", "content": "Your last response was not valid JSON. Return ONLY the JSON object."})
1328
+ try:
1329
+ raw2 = _broker_llm_call(f"step {step} json-repair call")
1330
+ except Exception as exc:
1331
+ recovered = _attempt_append_line_timeout_recovery(step, str(exc))
1332
+ if recovered:
1333
+ return recovered
1334
+ recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
1335
+ if recovered:
1336
+ return recovered
1337
+ return _broker_fail(
1338
+ "tool broker failed: llm request error",
1339
+ f"Broker JSON-repair request failed at step {step}: {to_single_line(exc, 500)}",
1340
+ )
1341
+ _record(f"[Broker] Step {step} JSON repair output: {to_single_line(raw2, 500)}")
1342
+ obj = _extract_first_json_object(raw2)
1343
+ if not obj:
1344
+ messages.append(
1345
+ {
1346
+ "role": "user",
1347
+ "content": (
1348
+ "Still invalid JSON. Return ONLY one valid JSON object using strict syntax: "
1349
+ 'keys must use double quotes, key/value separator must be ":", and top-level '
1350
+ 'keys must be exactly: actions, done, note. '
1351
+ 'Example format: {"actions":[{"type":"read_file","path":"README.md"}],'
1352
+ '"done":false,"note":"short"}'
1353
+ ),
1354
+ }
1355
+ )
1356
+ try:
1357
+ raw3 = _broker_llm_call(f"step {step} hard-json-repair call")
1358
+ except Exception as exc:
1359
+ recovered = _attempt_append_line_timeout_recovery(step, str(exc))
1360
+ if recovered:
1361
+ return recovered
1362
+ recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
1363
+ if recovered:
1364
+ return recovered
1365
+ return _broker_fail(
1366
+ "tool broker failed: llm request error",
1367
+ f"Broker hard JSON-repair request failed at step {step}: {to_single_line(exc, 500)}",
1368
+ )
1369
+ _record(f"[Broker] Step {step} hard JSON repair output: {to_single_line(raw3, 500)}")
1370
+ obj = _extract_first_json_object(raw3)
1371
+ if not obj:
1372
+ return {
1373
+ "ok": False,
1374
+ "summary": "tool broker failed: model did not produce parsable JSON actions",
1375
+ "stderr": "Model output could not be parsed as the required JSON action format.",
1376
+ "exitCode": 3,
1377
+ }
1378
+ raw = raw3
1379
+ else:
1380
+ raw = raw2
1381
+ allowed_top_keys = {"actions", "done", "note"}
1382
+ extras = [k for k in obj.keys() if str(k) not in allowed_top_keys]
1383
+ if extras:
1384
+ messages.append(
1385
+ {
1386
+ "role": "user",
1387
+ "content": (
1388
+ "Your JSON had unsupported top-level keys. "
1389
+ "Return ONLY one JSON object with keys: actions, done, note."
1390
+ ),
1391
+ }
1392
+ )
1393
+ try:
1394
+ raw3 = _broker_llm_call(f"step {step} shape-repair call")
1395
+ except Exception as exc:
1396
+ recovered = _attempt_append_line_timeout_recovery(step, str(exc))
1397
+ if recovered:
1398
+ return recovered
1399
+ recovered = _attempt_timeout_finalize_from_existing_edits(step, str(exc))
1400
+ if recovered:
1401
+ return recovered
1402
+ return _broker_fail(
1403
+ "tool broker failed: llm request error",
1404
+ f"Broker shape-repair request failed at step {step}: {to_single_line(exc, 500)}",
1405
+ )
1406
+ _record(f"[Broker] Step {step} shape repair output: {to_single_line(raw3, 500)}")
1407
+ obj2 = _extract_first_json_object(raw3)
1408
+ if not isinstance(obj2, dict):
1409
+ return {
1410
+ "ok": False,
1411
+ "summary": "tool broker failed: invalid response shape",
1412
+ "stderr": f"Unexpected top-level keys in broker JSON: {extras}",
1413
+ "exitCode": 3,
1414
+ }
1415
+ obj = obj2
1416
+ raw_used = raw3
1417
+ extras = [k for k in obj.keys() if str(k) not in allowed_top_keys]
1418
+ if extras:
1419
+ # Recoverable formatting issue: ignore extras rather than hard-fail.
1420
+ obj = {k: obj.get(k) for k in allowed_top_keys}
1421
+ raw_used = json.dumps(obj, ensure_ascii=False)
1422
+
1423
+ actions = obj.get("actions")
1424
+ done = bool(obj.get("done"))
1425
+
1426
+ ok_actions, reason_actions, planned_actions = _validate_broker_actions(actions, max_actions)
1427
+ if not ok_actions:
1428
+ return {
1429
+ "ok": False,
1430
+ "summary": "tool broker failed: invalid actions schema",
1431
+ "stderr": reason_actions,
1432
+ "exitCode": 3,
1433
+ }
1434
+
1435
+ # Execute actions
1436
+ action_logs: List[str] = []
1437
+ step_made_edit = False
1438
+ step_had_shell_rejection = False
1439
+ for i, act in enumerate(planned_actions, start=1):
1440
+ typ = str(act.get("type") or "").strip()
1441
+ try:
1442
+ if typ == "read_file":
1443
+ path = str(act.get("path") or "")
1444
+ content = _read_text_file(repo, path)
1445
+ preview = _truncate_observation(content, max_chars=_BROKER_READ_PREVIEW_CHARS)
1446
+ action_logs.append(
1447
+ f"- read_file {path}: ok ({len(content)} chars total)\n{preview}"
1448
+ )
1449
+ elif typ == "append_line":
1450
+ path = str(act.get("path") or "")
1451
+ line = str(act.get("line") or "")
1452
+ _append_line(repo, path, line, allowed_write_globs)
1453
+ edits_made = True
1454
+ step_made_edit = True
1455
+ action_logs.append(f"- append_line {path}: ok (appended {line!r})")
1456
+ elif typ == "replace_text_once":
1457
+ path = str(act.get("path") or "")
1458
+ old = str(act.get("old") or "")
1459
+ new = str(act.get("new") or "")
1460
+ n = _replace_text_once(repo, path, old, new, allowed_write_globs)
1461
+ edits_made = edits_made or (n > 0)
1462
+ step_made_edit = step_made_edit or (n > 0)
1463
+ action_logs.append(f"- replace_text_once {path}: {n} replacement(s)")
1464
+ elif typ == "write_file":
1465
+ path = str(act.get("path") or "")
1466
+ content = str(act.get("content") or "")
1467
+ _write_text_file(repo, path, content, allowed_write_globs)
1468
+ edits_made = True
1469
+ step_made_edit = True
1470
+ action_logs.append(f"- write_file {path}: ok ({len(content)} chars)")
1471
+ elif typ == "run_shell":
1472
+ cmd = str(act.get("command") or "")
1473
+ out = _run_shell(repo, cmd, timeout_sec=shell_timeout_sec)
1474
+ shell_validation_ran = shell_validation_ran or _is_git_porcelain_status_command(cmd)
1475
+ action_logs.append(f"- run_shell {cmd!r}:\n{out}")
1476
+ else:
1477
+ action_logs.append(f"- action {i}: unknown type {typ!r} (rejected by schema)")
1478
+ except Exception as exc:
1479
+ err = to_single_line(exc, 400)
1480
+ if typ == "run_shell":
1481
+ if "Shell command rejected:" in err:
1482
+ step_had_shell_rejection = True
1483
+ salvage_cmd = _attempt_salvage_rejected_shell_command(
1484
+ str(act.get("command") or ""),
1485
+ err,
1486
+ )
1487
+ if salvage_cmd:
1488
+ try:
1489
+ salvage_out = _run_shell(repo, salvage_cmd, timeout_sec=shell_timeout_sec)
1490
+ shell_validation_ran = shell_validation_ran or _is_git_porcelain_status_command(
1491
+ salvage_cmd,
1492
+ )
1493
+ action_logs.append(
1494
+ f"- run_shell {str(act.get('command') or '')!r}: rejected ({err}); "
1495
+ f"salvage executed {salvage_cmd!r}:\n{salvage_out}"
1496
+ )
1497
+ continue
1498
+ except Exception as salvage_exc:
1499
+ err = f"{err}; salvage failed: {to_single_line(salvage_exc, 260)}"
1500
+ action_logs.append(f"- {typ or 'action'} failed: {err}")
1501
+
1502
+ obs = _truncate_observation("\n".join(action_logs).strip())
1503
+ if step_made_edit:
1504
+ no_edit_steps = 0
1505
+ else:
1506
+ no_edit_steps += 1
1507
+
1508
+ # Feed the raw JSON back as assistant message (helps the model stay consistent)
1509
+ messages.append({"role": "assistant", "content": raw_used})
1510
+ if (
1511
+ not done
1512
+ and not step_made_edit
1513
+ and no_edit_steps >= 2
1514
+ and no_progress_nudges < 2
1515
+ and step < max_steps
1516
+ ):
1517
+ no_progress_nudges += 1
1518
+ nudge_lines = [
1519
+ "Progress guard: you have not produced any edit actions yet.",
1520
+ "In your NEXT response, either:",
1521
+ '1) include at least one edit action (`append_line`, `replace_text_once`, or `write_file`) that advances the task, OR',
1522
+ "2) if genuinely blocked, set done=true and explain the blocker in note.",
1523
+ "Do not continue pure exploration.",
1524
+ ]
1525
+ if step_had_shell_rejection:
1526
+ nudge_lines.append(
1527
+ "Reminder: run_shell forbids pipes/redirection/chaining; use one simple command."
1528
+ )
1529
+ messages.append({"role": "user", "content": "\n".join(nudge_lines)})
1530
+ _record(
1531
+ f"[Broker] progress guard nudge injected (step={step}, no_edit_steps={no_edit_steps})."
1532
+ )
1533
+
1534
+ if done:
1535
+ _record("[Broker] Model signaled done=true.")
1536
+ model_done = True
1537
+ break
1538
+
1539
+ # Always include a final git status if possible (and safe)
1540
+ try:
1541
+ final_status = _run_shell(repo, "git status --porcelain", timeout_sec=shell_timeout_sec)
1542
+ except Exception as exc:
1543
+ final_status = f"(git status failed) {to_single_line(exc, 300)}"
1544
+ final_status_exit = _shell_exit_code(final_status)
1545
+
1546
+ transcript_text = "\n".join(transcript).strip()
1547
+ stdout = ""
1548
+ if transcript_text:
1549
+ stdout += "Tool broker transcript:\n" + transcript_text + "\n\n"
1550
+ stdout += "Final verification:\n" + final_status
1551
+ changed_paths = summarize_git_changes(repo)
1552
+
1553
+ if not model_done:
1554
+ if edits_made and changed_paths:
1555
+ _record(
1556
+ "[Broker] model never returned done=true, but edits were observed; "
1557
+ "auto-finalizing based on repository state."
1558
+ )
1559
+ stdout += (
1560
+ "\n\nAuto-finalize: model did not return done=true, "
1561
+ "but repository changes were detected."
1562
+ )
1563
+ else:
1564
+ return {
1565
+ "ok": False,
1566
+ "summary": "tool broker failed: did not reach done=true before limits",
1567
+ "stdout": stdout,
1568
+ "stderr": (
1569
+ "Model did not return done=true before max steps/timeout. "
1570
+ "Treating broker run as incomplete."
1571
+ ),
1572
+ "exitCode": 3,
1573
+ }
1574
+ if final_status_exit is not None and final_status_exit != 0:
1575
+ return {
1576
+ "ok": False,
1577
+ "summary": "tool broker failed: verification command failed",
1578
+ "stdout": stdout,
1579
+ "stderr": "Final verification command `git status --porcelain` failed.",
1580
+ "exitCode": 3,
1581
+ }
1582
+ if edits_made and not changed_paths:
1583
+ return {
1584
+ "ok": False,
1585
+ "summary": "tool broker failed: model claimed edits but repo has no changes",
1586
+ "stdout": stdout,
1587
+ "stderr": "Broker executed edit actions but git reports no changed files.",
1588
+ "exitCode": 3,
1589
+ }
1590
+ if expected_targets and changed_paths:
1591
+ changed_set = {str(p).strip().replace("\\", "/") for p in changed_paths}
1592
+ expected_set = {str(p).strip().replace("\\", "/") for p in expected_targets}
1593
+ strict_target_match = bool(
1594
+ explicit_target_set
1595
+ and not any(t in {".", "/"} for t in explicit_target_set)
1596
+ and not any(any(ch in t for ch in "*?[]") for t in explicit_target_set)
1597
+ )
1598
+ matched = any(
1599
+ _target_hint_matches_changed_path(expected, changed)
1600
+ for expected in expected_set
1601
+ for changed in changed_set
1602
+ )
1603
+ if expected_set and not matched:
1604
+ msg = (
1605
+ "Expected one of target paths to change, but observed different files. "
1606
+ f"expected={sorted(expected_set)} observed={sorted(changed_set)}"
1607
+ )
1608
+ if strict_target_match:
1609
+ return {
1610
+ "ok": False,
1611
+ "summary": "tool broker failed: changed files do not match explicit target paths",
1612
+ "stdout": stdout + "\n\nChanged files:\n" + "\n".join(f"- {p}" for p in changed_paths),
1613
+ "stderr": msg,
1614
+ "exitCode": 3,
1615
+ }
1616
+ stdout += "\n\nTarget-path mismatch (heuristic, non-fatal):\n" + msg
1617
+ if edits_made and not shell_validation_ran:
1618
+ stdout += (
1619
+ "\n\nValidation note:\n"
1620
+ "Model did not run `git status --porcelain` during broker steps; "
1621
+ "broker-level final verification was used."
1622
+ )
1623
+
1624
+ return {
1625
+ "ok": True,
1626
+ "summary": "Executed task via tool broker shim",
1627
+ "stdout": stdout,
1628
+ "stderr": "",
1629
+ "exitCode": 0,
1630
+ }
1631
+
1632
+
1633
+ # ─── mini-swe-agent execution ───────────────────────────────────────────────
1634
+
1635
+ def _run_miniswe_task(
1636
+ repo: str,
1637
+ instruction: str,
1638
+ payload: Optional[Dict[str, Any]] = None,
1639
+ supplemental_guidance: Optional[List[str]] = None,
1640
+ ) -> Dict[str, Any]:
1641
+ """Execute a task using mini-swe-agent's Python SDK (and optional broker fallback)."""
1642
+
1643
+ try:
1644
+ from minisweagent.agents.default import DefaultAgent
1645
+ from minisweagent.models.litellm_model import LitellmModel
1646
+ from minisweagent.environments.local import LocalEnvironment
1647
+ except ImportError as exc:
1648
+ return {
1649
+ "ok": False,
1650
+ "summary": (
1651
+ "mini-swe-agent is not installed. "
1652
+ "Install with: pip install mini-swe-agent"
1653
+ ),
1654
+ "stderr": str(exc),
1655
+ "exitCode": 3,
1656
+ }
1657
+
1658
+ model_name, api_key, base_url = resolve_llm_config(
1659
+ default_model=DEFAULT_MINISWE_MODEL, logger=log,
1660
+ )
1661
+ if not model_name:
1662
+ return {
1663
+ "ok": False,
1664
+ "summary": (
1665
+ "task.execute requires an LLM model for agentic execution. "
1666
+ "Set WORKERPALS_LLM_MODEL."
1667
+ ),
1668
+ "stderr": "",
1669
+ "exitCode": 2,
1670
+ }
1671
+
1672
+ if not api_key:
1673
+ if looks_local_base_url(base_url):
1674
+ api_key = "local"
1675
+ else:
1676
+ return {
1677
+ "ok": False,
1678
+ "summary": (
1679
+ "task.execute agent mode requires an API key. "
1680
+ "Set WORKERPALS_LLM_API_KEY."
1681
+ ),
1682
+ "stderr": "",
1683
+ "exitCode": 2,
1684
+ }
1685
+
1686
+ timeout_ms = _execution_timeout_ms()
1687
+ timeout_minutes = max(1, round(timeout_ms / 60000))
1688
+
1689
+ def _compose_instruction(extra_guidance: Optional[List[str]] = None) -> str:
1690
+ full = instruction
1691
+ merged_guidance: List[str] = []
1692
+ if supplemental_guidance:
1693
+ merged_guidance.extend([g for g in supplemental_guidance if g and str(g).strip()])
1694
+ if extra_guidance:
1695
+ merged_guidance.extend([g for g in extra_guidance if g and str(g).strip()])
1696
+ if merged_guidance:
1697
+ parts = [str(g).strip() for g in merged_guidance if str(g).strip()]
1698
+ if parts:
1699
+ guidance_section = _load_prompt_template(
1700
+ "workerpals/miniswe_supplemental_guidance_section.md",
1701
+ {"guidance_entries": "\n\n".join(parts)},
1702
+ ).strip()
1703
+ full += f"\n\n{guidance_section}"
1704
+
1705
+ timeout_note = _load_prompt_template(
1706
+ "workerpals/miniswe_timeout_note.md",
1707
+ {"timeout_minutes": str(timeout_minutes)},
1708
+ ).strip()
1709
+ full += f"\n\n{timeout_note}"
1710
+ return full
1711
+
1712
+ log.info(f"Starting mini-swe-agent execution in {repo}")
1713
+ log.info(f"Model: {model_name}, base_url: {base_url or '(default)'}")
1714
+ log.info(f"Timeout: {timeout_ms}ms ({timeout_minutes}min)")
1715
+ log.debug(f"Instruction: {to_single_line(instruction, 300)}")
1716
+
1717
+ # Pre-run baseline so we can tell whether *anything* changed even if the model/tooling is flaky.
1718
+ baseline_changes = set(summarize_git_changes(repo))
1719
+ explicit_targets = _extract_explicit_target_paths_from_payload(payload)
1720
+ explicit_write_globs = _extract_write_globs_from_payload(payload)
1721
+
1722
+ # Prepare broker config upfront (so we can fall back cleanly)
1723
+ llm_cfg = _LLMConfig(model=model_name, api_key=api_key or "", base_url=base_url or "")
1724
+
1725
+ def _run_broker_with_recovery(extra_guidance: Optional[List[str]] = None) -> Dict[str, Any]:
1726
+ broker_result = _broker_run(
1727
+ repo,
1728
+ instruction=_compose_instruction(extra_guidance=extra_guidance),
1729
+ llm=llm_cfg,
1730
+ timeout_ms=timeout_ms,
1731
+ explicit_targets=explicit_targets,
1732
+ write_globs=explicit_write_globs,
1733
+ )
1734
+ retry_max = _tool_broker_run_retry_max()
1735
+ retry_count = 0
1736
+ while retry_count < retry_max and (
1737
+ _is_broker_timeout_failure(broker_result)
1738
+ or _is_broker_incomplete_failure(broker_result)
1739
+ ):
1740
+ retry_count += 1
1741
+ timeout_like = _is_broker_timeout_failure(broker_result)
1742
+ if timeout_like:
1743
+ log.info(
1744
+ "Tool broker timed out while waiting for model output; retrying broker run "
1745
+ f"{retry_count}/{retry_max} with one-pass timeout recovery guidance."
1746
+ )
1747
+ else:
1748
+ log.info(
1749
+ "Tool broker did not converge before limits; retrying broker run "
1750
+ f"{retry_count}/{retry_max} with strict completion guidance."
1751
+ )
1752
+ retry_guidance = [
1753
+ line.strip()
1754
+ for line in _load_prompt_template(
1755
+ "workerpals/miniswe_recovery_guidance_base.md"
1756
+ ).splitlines()
1757
+ if line.strip()
1758
+ ]
1759
+ if not timeout_like:
1760
+ retry_guidance.append(
1761
+ _load_prompt_template("workerpals/miniswe_recovery_guidance_blocker_line.md").strip()
1762
+ )
1763
+ merged_guidance = list(extra_guidance or [])
1764
+ merged_guidance.extend(retry_guidance)
1765
+ previous = broker_result
1766
+ broker_result = _broker_run(
1767
+ repo,
1768
+ instruction=_compose_instruction(extra_guidance=merged_guidance),
1769
+ llm=llm_cfg,
1770
+ timeout_ms=timeout_ms,
1771
+ explicit_targets=explicit_targets,
1772
+ write_globs=explicit_write_globs,
1773
+ )
1774
+ if not bool(broker_result.get("ok")):
1775
+ prior_detail = to_single_line(
1776
+ previous.get("stderr") or previous.get("summary") or "",
1777
+ 300,
1778
+ )
1779
+ if prior_detail:
1780
+ current_stdout = str(broker_result.get("stdout") or "")
1781
+ broker_result["stdout"] = (
1782
+ f"Prior timeout attempt detail: {prior_detail}\n\n{current_stdout}"
1783
+ if current_stdout
1784
+ else f"Prior timeout attempt detail: {prior_detail}"
1785
+ )
1786
+ return broker_result
1787
+
1788
+ exit_info: Dict[str, Any] = {}
1789
+ agent = None
1790
+ agent_messages: List[Dict[str, Any]] = []
1791
+ broker_enabled = _tool_broker_enabled(base_url)
1792
+ prefer_broker_for_scoped_writes = bool(explicit_write_globs)
1793
+ ran_primary_broker = False
1794
+ if prefer_broker_for_scoped_writes and broker_enabled:
1795
+ log.info("Using tool broker shim for strict per-write scope enforcement.")
1796
+ broker_result = _run_broker_with_recovery()
1797
+ if not bool(broker_result.get("ok")):
1798
+ return {
1799
+ "ok": False,
1800
+ "summary": str(broker_result.get("summary") or "tool broker execution failed"),
1801
+ "stdout": str(broker_result.get("stdout") or ""),
1802
+ "stderr": str(broker_result.get("stderr") or ""),
1803
+ "exitCode": to_int(broker_result.get("exitCode"), 3),
1804
+ }
1805
+ exit_info = {"submission": broker_result.get("stdout") or ""}
1806
+ ran_primary_broker = True
1807
+ elif prefer_broker_for_scoped_writes and not broker_enabled:
1808
+ log.info(
1809
+ "Strict write scope requested but tool broker is disabled; "
1810
+ "using native mini-swe path with post-run scope verification."
1811
+ )
1812
+
1813
+ if not ran_primary_broker:
1814
+ try:
1815
+ import yaml
1816
+ from minisweagent import package_dir
1817
+
1818
+ litellm_kwargs: Dict[str, Any] = {}
1819
+ if api_key:
1820
+ litellm_kwargs["api_key"] = api_key
1821
+ if base_url:
1822
+ litellm_kwargs["base_url"] = base_url
1823
+
1824
+ model = LitellmModel(
1825
+ model_name=model_name,
1826
+ model_kwargs=litellm_kwargs,
1827
+ cost_tracking="ignore_errors",
1828
+ )
1829
+
1830
+ env = LocalEnvironment(cwd=repo)
1831
+
1832
+ config_path = package_dir / "config" / "default.yaml"
1833
+ with open(config_path, "r", encoding="utf-8") as f:
1834
+ builtin_config = yaml.safe_load(f)
1835
+ agent_kwargs = builtin_config.get("agent", {}) or {}
1836
+
1837
+ agent_kwargs["cost_limit"] = 0.0 # we manage budget externally
1838
+ agent_kwargs["step_limit"] = setting_int(
1839
+ "WORKERPALS_MINISWE_AGENT_MAX_STEPS",
1840
+ "workerpals.miniswe.agent_max_steps",
1841
+ 30,
1842
+ )
1843
+
1844
+ agent = DefaultAgent(model, env, **agent_kwargs)
1845
+ log.info("Agent initialized, running task...")
1846
+
1847
+ toolcall_retry_max = _toolcall_retry_max()
1848
+ attempt = 0
1849
+ while True:
1850
+ try:
1851
+ attempt += 1
1852
+ if attempt > 1:
1853
+ log.info(
1854
+ f"Retrying agent run after tool-call failure (attempt {attempt}/{toolcall_retry_max + 1})."
1855
+ )
1856
+
1857
+ extra_guidance: List[str] = []
1858
+ if attempt > 1:
1859
+ extra_guidance.append(_build_strict_tool_use_guidance(repo))
1860
+ extra_guidance.append(
1861
+ _load_prompt_template("workerpals/miniswe_toolcall_retry_guidance.md").strip()
1862
+ )
1863
+
1864
+ exit_info = agent.run(_compose_instruction(extra_guidance=extra_guidance)) or {}
1865
+ log.info("Agent execution completed.")
1866
+
1867
+ # Log what the agent did
1868
+ if hasattr(agent, "messages") and agent.messages:
1869
+ agent_messages = [msg for msg in agent.messages if isinstance(msg, dict)]
1870
+ log.debug(f"Agent message history ({len(agent.messages)} messages):")
1871
+ log_agent_messages(agent.messages, log)
1872
+ log_git_status(repo, log)
1873
+ break
1874
+
1875
+ except Exception as exc:
1876
+ if is_no_tool_calls_error(exc) and (attempt - 1) < toolcall_retry_max:
1877
+ log.info(
1878
+ "Detected tool-call failure from model/runtime: "
1879
+ f"{to_single_line(exc, 220)}"
1880
+ )
1881
+ continue
1882
+ raise
1883
+
1884
+ except Exception as exc:
1885
+ # If it's a tool-call failure, optionally fall back to broker shim.
1886
+ if is_no_tool_calls_error(exc):
1887
+ if broker_enabled:
1888
+ log.info("mini-swe-agent failed due to missing tool calls; falling back to tool broker shim.")
1889
+ broker_result = _run_broker_with_recovery()
1890
+ if not bool(broker_result.get("ok")):
1891
+ return {
1892
+ "ok": False,
1893
+ "summary": str(broker_result.get("summary") or "tool broker fallback failed"),
1894
+ "stdout": str(broker_result.get("stdout") or ""),
1895
+ "stderr": str(broker_result.get("stderr") or ""),
1896
+ "exitCode": to_int(broker_result.get("exitCode"), 3),
1897
+ }
1898
+
1899
+ # The broker_result itself doesn't include changed-files list; we add it below in the shared post-run path.
1900
+ # We return broker_result as "exit_info-like" output by mapping it into exit_info and continuing.
1901
+ exit_info = {"submission": broker_result.get("stdout") or ""}
1902
+ # Continue into post-run summary construction (changed files etc.) by not returning early.
1903
+ else:
1904
+ return {
1905
+ "ok": False,
1906
+ "summary": "mini-swe-agent could not execute: model did not emit tool calls",
1907
+ "stderr": (
1908
+ "Agentic execution requires a tool-calling-capable model/runtime. "
1909
+ "The model output did not include any tool calls.\n"
1910
+ f"Error: {to_single_line(exc, 600)}\n"
1911
+ "Fix options:\n"
1912
+ "- Use a model/runtime that supports tool calls (function calling), or\n"
1913
+ "- Enable the tool broker shim: WORKERPALS_MINISWE_TOOL_BROKER=1, or\n"
1914
+ "- Switch executor backend."
1915
+ ),
1916
+ "exitCode": 3,
1917
+ }
1918
+ else:
1919
+ return {
1920
+ "ok": False,
1921
+ "summary": "mini-swe-agent task execution failed",
1922
+ "stderr": str(exc),
1923
+ "exitCode": 1,
1924
+ }
1925
+
1926
+ if _messages_indicate_missing_tool_calls(agent_messages):
1927
+ if broker_enabled:
1928
+ log.info("mini-swe-agent exited without tool calls; falling back to tool broker shim.")
1929
+ broker_result = _run_broker_with_recovery()
1930
+ if not bool(broker_result.get("ok")):
1931
+ return {
1932
+ "ok": False,
1933
+ "summary": str(broker_result.get("summary") or "tool broker fallback failed"),
1934
+ "stdout": str(broker_result.get("stdout") or ""),
1935
+ "stderr": str(broker_result.get("stderr") or ""),
1936
+ "exitCode": to_int(broker_result.get("exitCode"), 3),
1937
+ }
1938
+ exit_info = {"submission": broker_result.get("stdout") or ""}
1939
+ else:
1940
+ return {
1941
+ "ok": False,
1942
+ "summary": "mini-swe-agent could not execute: model did not emit tool calls",
1943
+ "stderr": (
1944
+ "Agentic execution requires a tool-calling-capable model/runtime. "
1945
+ "The model output did not include any tool calls.\n"
1946
+ "Fix options:\n"
1947
+ "- Enable the tool broker shim: WORKERPALS_MINISWE_TOOL_BROKER=1, or\n"
1948
+ "- Use a model/runtime with function-calling support."
1949
+ ),
1950
+ "exitCode": 3,
1951
+ }
1952
+
1953
+ # Extract the agent's conversational output from its message history (or broker transcript).
1954
+ agent_text = ""
1955
+ try:
1956
+ agent_text = str(exit_info.get("submission") or "").strip()
1957
+ if not agent_text and agent is not None and hasattr(agent, "messages"):
1958
+ parts: List[str] = []
1959
+ for msg in agent.messages:
1960
+ if msg.get("role") == "assistant":
1961
+ content = str(msg.get("content") or "").strip()
1962
+ if content:
1963
+ parts.append(content)
1964
+ if parts:
1965
+ agent_text = "\n\n".join(parts)
1966
+ except Exception:
1967
+ pass
1968
+
1969
+ # Post-run: determine what files were changed relative to baseline.
1970
+ changed_paths = summarize_git_changes(repo)
1971
+ delta = [p for p in changed_paths if p not in baseline_changes]
1972
+ effective = delta if delta else changed_paths
1973
+
1974
+ # Build stdout: include agent/broker text output followed by file change info.
1975
+ stdout_parts: List[str] = []
1976
+ if agent_text:
1977
+ stdout_parts.append(agent_text)
1978
+
1979
+ if effective:
1980
+ listed = "\n".join(f"- {path}" for path in effective[:40])
1981
+ if len(effective) > 40:
1982
+ listed += "\n- ..."
1983
+ suffix = ""
1984
+ if delta and len(delta) != len(changed_paths):
1985
+ suffix = f" (delta={len(delta)}, total_status={len(changed_paths)})"
1986
+ stdout_parts.append(f"Changed files:\n{listed}")
1987
+ return {
1988
+ "ok": True,
1989
+ "summary": f"Executed task and modified {len(effective)} file(s){suffix}",
1990
+ "stdout": "\n\n".join(stdout_parts),
1991
+ "stderr": "",
1992
+ "exitCode": 0,
1993
+ }
1994
+
1995
+ if not stdout_parts:
1996
+ stdout_parts.append("No modified files were detected after execution.")
1997
+
1998
+ return {
1999
+ "ok": True,
2000
+ "summary": "Executed task (no file changes detected)",
2001
+ "stdout": "\n\n".join(stdout_parts),
2002
+ "stderr": "",
2003
+ "exitCode": 0,
2004
+ }
2005
+
2006
+
2007
+ # ─── Main entry point ───────────────────────────────────────────────────────
2008
+
2009
+ def main() -> int:
2010
+ try:
2011
+ task = parse_task_execute_payload(sys.argv, logger=log)
2012
+ result = _run_miniswe_task(
2013
+ task.repo, task.instruction, task.payload, task.supplemental_guidance,
2014
+ )
2015
+ except Exception as exc:
2016
+ result = {
2017
+ "ok": False,
2018
+ "summary": "miniswe wrapper crashed while executing task.execute",
2019
+ "stdout": "",
2020
+ "stderr": traceback.format_exc(),
2021
+ "exitCode": 1,
2022
+ "error": to_single_line(exc, 300),
2023
+ }
2024
+ emit(result)
2025
+ return 0 if bool(result.get("ok")) else to_int(result.get("exitCode"), 1)
2026
+
2027
+
2028
+ if __name__ == "__main__":
2029
+ raise SystemExit(main())