execution-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- execution_agent/__init__.py +8 -0
- execution_agent/__main__.py +5 -0
- execution_agent/agent.py +955 -0
- execution_agent/commands_interface.json +7 -0
- execution_agent/config.py +21 -0
- execution_agent/context.py +1565 -0
- execution_agent/docker_helpers_static.py +593 -0
- execution_agent/env.py +61 -0
- execution_agent/exceptions.py +17 -0
- execution_agent/exit_artifacts.py +350 -0
- execution_agent/main.py +1234 -0
- execution_agent/prompt_files/c_guidelines +481 -0
- execution_agent/prompt_files/command_stuck +7 -0
- execution_agent/prompt_files/cpp_guidelines +481 -0
- execution_agent/prompt_files/cycle_instruction +51 -0
- execution_agent/prompt_files/java_guidelines +37 -0
- execution_agent/prompt_files/javascript_guidelines +69 -0
- execution_agent/prompt_files/latest_containter_technology +7 -0
- execution_agent/prompt_files/python_guidelines +48 -0
- execution_agent/prompt_files/remove_progress_bars +1 -0
- execution_agent/prompt_files/rust_guidelines +53 -0
- execution_agent/prompt_files/search_workflows_summary +121 -0
- execution_agent/prompt_files/steps_list.json +32 -0
- execution_agent/prompt_files/summarize_cycle +13 -0
- execution_agent/prompt_files/tools_list +99 -0
- execution_agent/prompt_logging.py +311 -0
- execution_agent/repetition.py +39 -0
- execution_agent/shared_utils.py +507 -0
- execution_agent/state_persistence.py +286 -0
- execution_agent/tools.py +1611 -0
- execution_agent/trace_to_bash.py +281 -0
- execution_agent-0.1.0.dist-info/METADATA +231 -0
- execution_agent-0.1.0.dist-info/RECORD +37 -0
- execution_agent-0.1.0.dist-info/WHEEL +5 -0
- execution_agent-0.1.0.dist-info/entry_points.txt +2 -0
- execution_agent-0.1.0.dist-info/licenses/LICENSE.md +46 -0
- execution_agent-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,593 @@
|
|
|
1
|
+
# execution_agent/docker_helpers_static.py
|
|
2
|
+
"""
|
|
3
|
+
Docker helper functions for the execution agent.
|
|
4
|
+
|
|
5
|
+
This module uses shared_utils for common functionality like exec_in_container,
|
|
6
|
+
XML conversion, etc. to avoid code duplication.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import shlex
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
14
|
+
from typing import Any, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import docker # type: ignore
|
|
18
|
+
from docker.models.containers import Container as DockerContainer # type: ignore
|
|
19
|
+
except Exception: # pragma: no cover
|
|
20
|
+
docker = None
|
|
21
|
+
DockerContainer = Any # type: ignore
|
|
22
|
+
|
|
23
|
+
# Import shared utilities
|
|
24
|
+
from execution_agent.shared_utils import (
|
|
25
|
+
exec_in_container,
|
|
26
|
+
read_file_tail,
|
|
27
|
+
read_file_from_container,
|
|
28
|
+
convert_xml_to_yaml,
|
|
29
|
+
strip_ansi_codes,
|
|
30
|
+
get_docker_client,
|
|
31
|
+
SCREEN_SESSION,
|
|
32
|
+
RUN_DIR,
|
|
33
|
+
LOG_DIR,
|
|
34
|
+
STUCK_TIMEOUT_SECONDS as NO_OUTPUT_CHANGE_SECONDS,
|
|
35
|
+
POLL_INTERVAL_SECONDS as WAIT_POLL_SECONDS,
|
|
36
|
+
MAX_TAIL_BYTES,
|
|
37
|
+
ANSI_ESCAPE_RE as _ANSI_RE,
|
|
38
|
+
RC_MARKER_RE as _RC_ANY_RX,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_LOG = logging.getLogger("execution_agent.docker")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ----------------------------
|
|
46
|
+
# Docker client helpers - using shared implementations
|
|
47
|
+
# ----------------------------
|
|
48
|
+
|
|
49
|
+
def _docker_client():
|
|
50
|
+
"""Get Docker client. Wrapper around shared_utils."""
|
|
51
|
+
return get_docker_client()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def check_image_exists(tag: str) -> bool:
|
|
55
|
+
"""Check if a Docker image exists locally."""
|
|
56
|
+
try:
|
|
57
|
+
get_docker_client().images.get(tag)
|
|
58
|
+
return True
|
|
59
|
+
except Exception as e:
|
|
60
|
+
_LOG.debug(f"Image {tag} not found: {e}")
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def build_image(dockerfile_dir: str, tag: str) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Build docker image and return a human-readable build log.
|
|
67
|
+
"""
|
|
68
|
+
client = get_docker_client()
|
|
69
|
+
log_lines: list[str] = []
|
|
70
|
+
log_lines.append(f"Starting build: context='{dockerfile_dir}', tag='{tag}'")
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
image, logs = client.images.build(path=dockerfile_dir, tag=tag, rm=True, pull=True, nocache=False)
|
|
74
|
+
for chunk in logs:
|
|
75
|
+
if isinstance(chunk, dict):
|
|
76
|
+
if chunk.get("stream"):
|
|
77
|
+
for ln in str(chunk["stream"]).rstrip("\n").splitlines():
|
|
78
|
+
log_lines.append(ln)
|
|
79
|
+
if chunk.get("status"):
|
|
80
|
+
prog = chunk.get("progress") or ""
|
|
81
|
+
log_lines.append(f"{chunk['status']} {prog}".rstrip())
|
|
82
|
+
if chunk.get("errorDetail") or chunk.get("error"):
|
|
83
|
+
detail = (chunk.get("errorDetail") or {}).get("message") or chunk.get("error") or ""
|
|
84
|
+
log_lines.append(f"ERROR: {detail}")
|
|
85
|
+
if chunk.get("aux"):
|
|
86
|
+
log_lines.append(f"AUX: {chunk['aux']}")
|
|
87
|
+
else:
|
|
88
|
+
log_lines.append(str(chunk))
|
|
89
|
+
|
|
90
|
+
img_id = getattr(image, "short_id", None) or getattr(image, "id", None) or "unknown"
|
|
91
|
+
log_lines.append(f"Build completed successfully. Image: {img_id}")
|
|
92
|
+
return "\n".join(log_lines)
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
_LOG.error(f"Docker build failed: {e}", exc_info=True)
|
|
96
|
+
error_msg = f"{type(e).__name__}: {e}"
|
|
97
|
+
|
|
98
|
+
if hasattr(e, 'explanation'):
|
|
99
|
+
log_lines.append(f"ERROR EXPLANATION: {e.explanation}")
|
|
100
|
+
|
|
101
|
+
if hasattr(e, 'build_log'):
|
|
102
|
+
for entry in e.build_log:
|
|
103
|
+
if isinstance(entry, dict):
|
|
104
|
+
if entry.get("stream"):
|
|
105
|
+
for ln in str(entry["stream"]).rstrip("\n").splitlines():
|
|
106
|
+
log_lines.append(ln)
|
|
107
|
+
if entry.get("errorDetail"):
|
|
108
|
+
detail = entry["errorDetail"].get("message", str(entry["errorDetail"]))
|
|
109
|
+
log_lines.append(f"ERROR: {detail}")
|
|
110
|
+
if entry.get("error"):
|
|
111
|
+
log_lines.append(f"ERROR: {entry['error']}")
|
|
112
|
+
if entry.get("status"):
|
|
113
|
+
prog = entry.get("progress") or ""
|
|
114
|
+
log_lines.append(f"{entry['status']} {prog}".rstrip())
|
|
115
|
+
else:
|
|
116
|
+
log_lines.append(str(entry))
|
|
117
|
+
|
|
118
|
+
log_lines.append(f"BUILD FAILED: {error_msg}")
|
|
119
|
+
return "\n".join(log_lines)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ----------------------------
|
|
123
|
+
# Container exec primitives - using shared implementations
|
|
124
|
+
# ----------------------------
|
|
125
|
+
|
|
126
|
+
def _exec(container: DockerContainer, cmd: str, tty: bool = False) -> tuple[int, str]:
|
|
127
|
+
"""Run a command inside container. Wrapper around shared_utils.exec_in_container()."""
|
|
128
|
+
return exec_in_container(container, cmd, tty=tty)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _read_tail(container: DockerContainer, path: str) -> str:
|
|
132
|
+
"""Read tail of file from container. Wrapper around shared_utils."""
|
|
133
|
+
return read_file_tail(container, path, max_bytes=MAX_TAIL_BYTES)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ----------------------------
|
|
137
|
+
# XML helpers - using shared implementations
|
|
138
|
+
# ----------------------------
|
|
139
|
+
_convert_xml_to_yaml_content = convert_xml_to_yaml
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ----------------------------
|
|
143
|
+
# File read/write in container - now using shared implementations
|
|
144
|
+
# Note: read_file_from_container is imported from shared_utils
|
|
145
|
+
# We keep a local write_string_to_file that delegates to shared_utils
|
|
146
|
+
|
|
147
|
+
from execution_agent.shared_utils import write_file_to_container
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def write_string_to_file(container: DockerContainer, file_content: str, file_path: str) -> Optional[str]:
|
|
151
|
+
"""
|
|
152
|
+
Write content to a file inside the container.
|
|
153
|
+
Wrapper around shared_utils.write_file_to_container() for compatibility.
|
|
154
|
+
"""
|
|
155
|
+
return write_file_to_container(container, file_path, file_content)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ----------------------------
|
|
159
|
+
# Package/screen setup (legacy-inspired)
|
|
160
|
+
# ----------------------------
|
|
161
|
+
|
|
162
|
+
def _command_exists(container: DockerContainer, cmd: str) -> bool:
|
|
163
|
+
code, _ = _exec(container, f"command -v {shlex.quote(cmd)} >/dev/null 2>&1")
|
|
164
|
+
return code == 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _whoami_root(container: DockerContainer) -> bool:
|
|
168
|
+
code, out = _exec(container, "id -u")
|
|
169
|
+
return code == 0 and out.strip() == "0"
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _has_passwordless_sudo(container: DockerContainer) -> bool:
|
|
173
|
+
if not _command_exists(container, "sudo"):
|
|
174
|
+
return False
|
|
175
|
+
code, _ = _exec(container, "sudo -n true")
|
|
176
|
+
return code == 0
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _escalation_prefix(container: DockerContainer) -> Optional[str]:
|
|
180
|
+
if _whoami_root(container):
|
|
181
|
+
return ""
|
|
182
|
+
if _has_passwordless_sudo(container):
|
|
183
|
+
return "sudo -n "
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _detect_pkg_manager(container: DockerContainer) -> Optional[str]:
|
|
188
|
+
checks = [
|
|
189
|
+
("microdnf", "command -v microdnf >/dev/null 2>&1"),
|
|
190
|
+
("dnf", "command -v dnf >/dev/null 2>&1"),
|
|
191
|
+
("yum", "command -v yum >/dev/null 2>&1"),
|
|
192
|
+
("apt-get", "command -v apt-get >/dev/null 2>&1"),
|
|
193
|
+
("apk", "command -v apk >/dev/null 2>&1"),
|
|
194
|
+
]
|
|
195
|
+
for pm, probe in checks:
|
|
196
|
+
code, _ = _exec(container, probe)
|
|
197
|
+
if code == 0:
|
|
198
|
+
return pm
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _install_cmds(pm: str, pkgs: list[str]) -> list[str]:
|
|
203
|
+
pkgs_joined = " ".join(shlex.quote(p) for p in pkgs)
|
|
204
|
+
if pm == "apt-get":
|
|
205
|
+
return [
|
|
206
|
+
"apt-get update",
|
|
207
|
+
f"DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends {pkgs_joined}",
|
|
208
|
+
]
|
|
209
|
+
if pm in ("dnf", "microdnf"):
|
|
210
|
+
tool = "microdnf" if pm == "microdnf" else "dnf"
|
|
211
|
+
return [f"{tool} -y install {pkgs_joined}"]
|
|
212
|
+
if pm == "yum":
|
|
213
|
+
return [f"yum -y install {pkgs_joined}"]
|
|
214
|
+
if pm == "apk":
|
|
215
|
+
return [f"apk add --no-cache {pkgs_joined}"]
|
|
216
|
+
return []
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _ensure_run_dir(container: DockerContainer) -> None:
|
|
220
|
+
_exec(container, f"mkdir -p {shlex.quote(RUN_DIR)} && chmod 1777 {shlex.quote(RUN_DIR)}")
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _best_effort_timezone_setup(container: DockerContainer) -> None:
|
|
224
|
+
# Best-effort Europe/Berlin, never hard-fail
|
|
225
|
+
prefix = _escalation_prefix(container)
|
|
226
|
+
tz_cmd = "TZ=Europe/Berlin && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone"
|
|
227
|
+
if prefix is None:
|
|
228
|
+
_exec(container, "echo Europe/Berlin > /tmp/timezone.info || true")
|
|
229
|
+
else:
|
|
230
|
+
_exec(container, f"{prefix}sh -lc {shlex.quote(tz_cmd)} || true")
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _ensure_packages(container: DockerContainer, pkgs: list[str]) -> Optional[str]:
|
|
234
|
+
pm = _detect_pkg_manager(container)
|
|
235
|
+
prefix = _escalation_prefix(container)
|
|
236
|
+
|
|
237
|
+
if pm is None:
|
|
238
|
+
return "Cannot install packages: no supported package manager detected (microdnf/dnf/yum/apt-get/apk)."
|
|
239
|
+
if prefix is None:
|
|
240
|
+
return "Cannot install packages: need root privileges (not root; passwordless sudo unavailable)."
|
|
241
|
+
|
|
242
|
+
cmds = _install_cmds(pm, pkgs)
|
|
243
|
+
if not cmds:
|
|
244
|
+
return f"Package manager '{pm}' detected but installer does not support it."
|
|
245
|
+
|
|
246
|
+
for c in cmds:
|
|
247
|
+
code, out = _exec(container, f"{prefix}{c}")
|
|
248
|
+
if code != 0:
|
|
249
|
+
return f"Package installation failed.\nCommand: {prefix}{c}\nExit code: {code}\nOutput:\n{out}"
|
|
250
|
+
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def create_screen_session(container: DockerContainer) -> tuple[bool, str]:
|
|
255
|
+
"""
|
|
256
|
+
Ensure bash+screen+psmisc exist, then start a detached screen session with bash -l.
|
|
257
|
+
"""
|
|
258
|
+
needed = ["bash", "screen", "psmisc"]
|
|
259
|
+
pm = _detect_pkg_manager(container)
|
|
260
|
+
if pm == "apt-get":
|
|
261
|
+
needed.append("coreutils")
|
|
262
|
+
|
|
263
|
+
msg = _ensure_packages(container, needed)
|
|
264
|
+
if msg:
|
|
265
|
+
return False, msg
|
|
266
|
+
|
|
267
|
+
# Replace session if exists
|
|
268
|
+
_exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X quit >/dev/null 2>&1 || true")
|
|
269
|
+
code, out = _exec(container, f"screen -dmS {shlex.quote(SCREEN_SESSION)} bash -l")
|
|
270
|
+
if code != 0:
|
|
271
|
+
return False, f"Failed to start screen session '{SCREEN_SESSION}'. Output:\n{out}"
|
|
272
|
+
|
|
273
|
+
# Ensure it exists
|
|
274
|
+
code, ls_out = _exec(container, "screen -ls")
|
|
275
|
+
if code != 0 or f".{SCREEN_SESSION}" not in ls_out:
|
|
276
|
+
return False, f"Screen session '{SCREEN_SESSION}' not found after creation. screen -ls:\n{ls_out}"
|
|
277
|
+
|
|
278
|
+
_ensure_run_dir(container)
|
|
279
|
+
_best_effort_timezone_setup(container)
|
|
280
|
+
return True, f"Screen session '{SCREEN_SESSION}' created."
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _ensure_screen_alive(container: DockerContainer) -> tuple[bool, str]:
|
|
284
|
+
code, _ = _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -Q windows")
|
|
285
|
+
if code == 0:
|
|
286
|
+
return True, "ok"
|
|
287
|
+
ok, msg = create_screen_session(container)
|
|
288
|
+
return ok, msg
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# ----------------------------
|
|
292
|
+
# Stateful screen execution (legacy-faithful)
|
|
293
|
+
# ----------------------------
|
|
294
|
+
|
|
295
|
+
def exec_in_screen_and_get_log(container: DockerContainer, cmd: str) -> Tuple[int, str, str, bool]:
|
|
296
|
+
"""
|
|
297
|
+
Stateful execution inside a GNU screen session.
|
|
298
|
+
|
|
299
|
+
Returns: (rc, cleaned_output, logfile_path, stuck_flag)
|
|
300
|
+
Stuck is defined as "no output growth/change for NO_OUTPUT_CHANGE_SECONDS".
|
|
301
|
+
"""
|
|
302
|
+
ok, msg = _ensure_screen_alive(container)
|
|
303
|
+
if not ok:
|
|
304
|
+
return 1, f"Error: could not ensure screen session. {msg}", "", False
|
|
305
|
+
|
|
306
|
+
run_id = uuid.uuid4().hex
|
|
307
|
+
BEGIN = f"<<BEGIN:{run_id}>>"
|
|
308
|
+
END = f"<<END:{run_id}>>"
|
|
309
|
+
rc_rx = re.compile(rf"<<RC:{re.escape(run_id)}:(-?\d+)>>")
|
|
310
|
+
delim = f"__PAYLOAD_{run_id}__"
|
|
311
|
+
|
|
312
|
+
logfile = f"{LOG_DIR}/screen_exec_stateful_{run_id}.log"
|
|
313
|
+
script = f"{LOG_DIR}/screen_src_{run_id}.sh"
|
|
314
|
+
|
|
315
|
+
def _stuff_single_quoted(text: str) -> None:
|
|
316
|
+
safe = text.replace("'", r"'\''")
|
|
317
|
+
_exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X stuff '{safe}\\r'")
|
|
318
|
+
|
|
319
|
+
# Legacy special-case: renew shell
|
|
320
|
+
if cmd.strip() in {'exec "$SHELL" -l', "exec '$SHELL' -l"}:
|
|
321
|
+
_stuff_single_quoted("exec /bin/bash -l")
|
|
322
|
+
time.sleep(0.3)
|
|
323
|
+
return 0, "The shell has been renewed (exec /bin/bash -l).", logfile, False
|
|
324
|
+
|
|
325
|
+
# Write script
|
|
326
|
+
_exec(
|
|
327
|
+
container,
|
|
328
|
+
f"cat > {shlex.quote(script)} <<'{delim}'\n{cmd}\n{delim}\nchmod +x {shlex.quote(script)}"
|
|
329
|
+
)
|
|
330
|
+
_exec(container, f": > {shlex.quote(logfile)}")
|
|
331
|
+
|
|
332
|
+
payload = (
|
|
333
|
+
f'printf "%s\\n" "{BEGIN}" >> {logfile}; '
|
|
334
|
+
f'if . {script} >> {logfile} 2>&1; then __rc=0; else __rc=$?; fi; '
|
|
335
|
+
f'printf "%s\\n" "{END}" >> {logfile}; '
|
|
336
|
+
f'printf "<<RC:{run_id}:%d>>\\n" "$__rc" >> {logfile}'
|
|
337
|
+
)
|
|
338
|
+
_stuff_single_quoted(payload)
|
|
339
|
+
|
|
340
|
+
last_buf = ""
|
|
341
|
+
last_change = time.time()
|
|
342
|
+
rc: Optional[int] = None
|
|
343
|
+
|
|
344
|
+
while True:
|
|
345
|
+
buf = _read_tail(container, logfile)
|
|
346
|
+
# Compare content, not just length. This fixes a bug where long-running commands
|
|
347
|
+
# that produce lots of output (e.g., Maven builds) were incorrectly flagged as stuck.
|
|
348
|
+
# When the log file exceeds MAX_TAIL_BYTES, tail -c returns a fixed-size buffer,
|
|
349
|
+
# so the length stays constant even as new content is added. Comparing the actual
|
|
350
|
+
# content (or a hash/sample of it) correctly detects changes.
|
|
351
|
+
if buf != last_buf:
|
|
352
|
+
last_buf = buf
|
|
353
|
+
last_change = time.time()
|
|
354
|
+
|
|
355
|
+
m = rc_rx.search(buf.replace("\r", ""))
|
|
356
|
+
if m:
|
|
357
|
+
rc = int(m.group(1))
|
|
358
|
+
break
|
|
359
|
+
|
|
360
|
+
if time.time() - last_change >= NO_OUTPUT_CHANGE_SECONDS:
|
|
361
|
+
clean = _ANSI_RE.sub("", buf)
|
|
362
|
+
return 124, (
|
|
363
|
+
"The command appears stuck/idle (no output change within the timeout).\n\n"
|
|
364
|
+
f"Partial output:\n{clean}\n\n"
|
|
365
|
+
"You can WAIT, TERMINATE, or WRITE:<input> to continue."
|
|
366
|
+
), logfile, True
|
|
367
|
+
|
|
368
|
+
time.sleep(WAIT_POLL_SECONDS)
|
|
369
|
+
|
|
370
|
+
final = _read_tail(container, logfile)
|
|
371
|
+
final_noansi = _ANSI_RE.sub("", final)
|
|
372
|
+
|
|
373
|
+
bpos = final_noansi.rfind(BEGIN)
|
|
374
|
+
epos = final_noansi.rfind(END)
|
|
375
|
+
if bpos != -1 and epos != -1 and epos > bpos:
|
|
376
|
+
region = final_noansi[bpos + len(BEGIN): epos]
|
|
377
|
+
else:
|
|
378
|
+
region_lines = []
|
|
379
|
+
for ln in final_noansi.splitlines():
|
|
380
|
+
if ln.startswith("<<BEGIN:") or ln.startswith("<<END:") or ln.startswith("<<RC:"):
|
|
381
|
+
continue
|
|
382
|
+
region_lines.append(ln)
|
|
383
|
+
region = "\n".join(region_lines)
|
|
384
|
+
|
|
385
|
+
region = "\n".join(ln for ln in region.splitlines() if not ln.startswith("<<RC:")).strip()
|
|
386
|
+
|
|
387
|
+
_exec(container, f"rm -f {shlex.quote(script)}")
|
|
388
|
+
return (rc if rc is not None else 0), region, logfile, False
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
# ----------------------------
|
|
392
|
+
# Container lifecycle
|
|
393
|
+
# ----------------------------
|
|
394
|
+
|
|
395
|
+
def start_container(tag: str) -> Optional[DockerContainer]:
|
|
396
|
+
"""
|
|
397
|
+
Start a container from the given image tag and ensure screen session exists.
|
|
398
|
+
"""
|
|
399
|
+
client = _docker_client()
|
|
400
|
+
try:
|
|
401
|
+
c = client.containers.run(tag, command=["tail", "-f", "/dev/null"], detach=True, tty=True)
|
|
402
|
+
except Exception as e:
|
|
403
|
+
_LOG.error("Failed to start container: %s", e)
|
|
404
|
+
return None
|
|
405
|
+
|
|
406
|
+
ok, msg = create_screen_session(c)
|
|
407
|
+
if not ok:
|
|
408
|
+
try:
|
|
409
|
+
c.remove(force=True)
|
|
410
|
+
except Exception:
|
|
411
|
+
pass
|
|
412
|
+
_LOG.error("Failed to initialize screen session: %s", msg)
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
return c
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
# ----------------------------
|
|
419
|
+
# Stuck handling (WAIT/TERMINATE/WRITE:)
|
|
420
|
+
# ----------------------------
|
|
421
|
+
|
|
422
|
+
def handle_stuck_action(agent: Any, command: str) -> Optional[str]:
|
|
423
|
+
"""
|
|
424
|
+
If agent.command_stuck is True, interpret command as one of:
|
|
425
|
+
WAIT | TERMINATE | WRITE:<text>
|
|
426
|
+
Returns a user-facing terminal string if it handled the request.
|
|
427
|
+
Returns None if not in stuck mode or if no action was taken.
|
|
428
|
+
"""
|
|
429
|
+
if not getattr(agent, "command_stuck", False):
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
container = getattr(agent, "container", None)
|
|
433
|
+
logfile = getattr(agent, "current_logfile", None)
|
|
434
|
+
|
|
435
|
+
if not container or not logfile:
|
|
436
|
+
agent.command_stuck = False
|
|
437
|
+
agent.current_logfile = None
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
NO_CHANGE_TIMEOUT = 300
|
|
441
|
+
POLL_INTERVAL_SECONDS = 5
|
|
442
|
+
WRITE_GRACE_SECONDS = 2
|
|
443
|
+
|
|
444
|
+
def _read_clean_log() -> str:
|
|
445
|
+
try:
|
|
446
|
+
raw = read_file_from_container(container, logfile)
|
|
447
|
+
return _ANSI_RE.sub("", raw)
|
|
448
|
+
except Exception:
|
|
449
|
+
return ""
|
|
450
|
+
|
|
451
|
+
def _has_rc_marker(s: str) -> bool:
|
|
452
|
+
return bool(_RC_ANY_RX.search(s or ""))
|
|
453
|
+
|
|
454
|
+
def _extract_final_region(clean_log: str) -> str:
|
|
455
|
+
bpos = clean_log.rfind("<<BEGIN:")
|
|
456
|
+
epos = clean_log.rfind("<<END:")
|
|
457
|
+
if bpos != -1 and epos != -1 and epos > bpos:
|
|
458
|
+
arrow = clean_log.find(">>", bpos)
|
|
459
|
+
if arrow != -1 and arrow + 2 <= epos:
|
|
460
|
+
return clean_log[arrow + 2 : epos].strip()
|
|
461
|
+
|
|
462
|
+
lines = []
|
|
463
|
+
for ln in clean_log.splitlines():
|
|
464
|
+
if ln.startswith("<<BEGIN:") or ln.startswith("<<END:") or ln.startswith("<<RC:"):
|
|
465
|
+
continue
|
|
466
|
+
lines.append(ln)
|
|
467
|
+
return "\n".join(lines).strip()
|
|
468
|
+
|
|
469
|
+
def _progress_aware_wait(after_write: bool = False) -> tuple[bool, str]:
|
|
470
|
+
if after_write:
|
|
471
|
+
time.sleep(WRITE_GRACE_SECONDS)
|
|
472
|
+
|
|
473
|
+
remaining = NO_CHANGE_TIMEOUT
|
|
474
|
+
last = _read_clean_log()
|
|
475
|
+
|
|
476
|
+
if last and _has_rc_marker(last):
|
|
477
|
+
return True, _extract_final_region(last)
|
|
478
|
+
|
|
479
|
+
while remaining > 0:
|
|
480
|
+
time.sleep(POLL_INTERVAL_SECONDS)
|
|
481
|
+
cur = _read_clean_log()
|
|
482
|
+
|
|
483
|
+
if cur and _has_rc_marker(cur):
|
|
484
|
+
return True, _extract_final_region(cur)
|
|
485
|
+
|
|
486
|
+
if cur != last and cur != "":
|
|
487
|
+
remaining = NO_CHANGE_TIMEOUT
|
|
488
|
+
last = cur
|
|
489
|
+
else:
|
|
490
|
+
remaining -= POLL_INTERVAL_SECONDS
|
|
491
|
+
|
|
492
|
+
return False, (last or "")
|
|
493
|
+
|
|
494
|
+
def _reset_screen_session() -> None:
|
|
495
|
+
# Best-effort terminate running program and recreate screen
|
|
496
|
+
try:
|
|
497
|
+
_exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -p 0 -X stuff $'\\003'")
|
|
498
|
+
time.sleep(0.2)
|
|
499
|
+
_exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -p 0 -X stuff $'\\003'")
|
|
500
|
+
time.sleep(0.2)
|
|
501
|
+
except Exception:
|
|
502
|
+
pass
|
|
503
|
+
try:
|
|
504
|
+
_exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X quit || true")
|
|
505
|
+
except Exception:
|
|
506
|
+
pass
|
|
507
|
+
|
|
508
|
+
create_screen_session(container)
|
|
509
|
+
agent.command_stuck = False
|
|
510
|
+
agent.current_logfile = None
|
|
511
|
+
|
|
512
|
+
cmd = (command or "").strip()
|
|
513
|
+
|
|
514
|
+
if cmd == "WAIT":
|
|
515
|
+
finished, output = _progress_aware_wait(after_write=False)
|
|
516
|
+
if finished:
|
|
517
|
+
agent.command_stuck = False
|
|
518
|
+
return (
|
|
519
|
+
"Output in terminal after executing the command:\n"
|
|
520
|
+
f"Command finished. Output:\n{output}\n\nReturn code: 0\n"
|
|
521
|
+
)
|
|
522
|
+
return (
|
|
523
|
+
"Output in terminal after executing the command:\n"
|
|
524
|
+
"command waited for more time and there was no change; you can WAIT more, TERMINATE, or WRITE input to command.\n\n"
|
|
525
|
+
"Return code: 124\n"
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
if cmd == "TERMINATE":
|
|
529
|
+
_reset_screen_session()
|
|
530
|
+
return (
|
|
531
|
+
"Output in terminal after executing the command:\n"
|
|
532
|
+
"Previous command terminated; fresh screen session is ready.\n\nReturn code: 0\n"
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
if cmd.startswith("WRITE:"):
|
|
536
|
+
user_input = cmd.split("WRITE:", 1)[1]
|
|
537
|
+
safe = user_input.replace("'", r"'\''")
|
|
538
|
+
_exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X stuff '{safe}\\r'")
|
|
539
|
+
finished, output = _progress_aware_wait(after_write=True)
|
|
540
|
+
if finished:
|
|
541
|
+
agent.command_stuck = False
|
|
542
|
+
return (
|
|
543
|
+
"Output in terminal after executing the command:\n"
|
|
544
|
+
f"Command finished after input. Output:\n{output}\n\nReturn code: 0\n"
|
|
545
|
+
)
|
|
546
|
+
return (
|
|
547
|
+
"Output in terminal after executing the command:\n"
|
|
548
|
+
"command waited for more time and there was no change; you can WAIT more, TERMINATE, or WRITE input to command.\n\n"
|
|
549
|
+
"Return code: 124\n"
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Unknown action => reset to keep system usable
|
|
553
|
+
_reset_screen_session()
|
|
554
|
+
return (
|
|
555
|
+
"Output in terminal after executing the command:\n"
|
|
556
|
+
"Unknown stuck action. Previous command terminated and screen session reset.\n\nReturn code: 0\n"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# ----------------------------
|
|
561
|
+
# Container cleanup (for retry loop)
|
|
562
|
+
# ----------------------------
|
|
563
|
+
|
|
564
|
+
def cleanup_container(container: Optional[DockerContainer], docker_tag: Optional[str] = None) -> None:
|
|
565
|
+
"""
|
|
566
|
+
Stop and remove Docker container, and optionally remove the image.
|
|
567
|
+
Handles errors gracefully with warnings.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
container: Docker container object to clean up
|
|
571
|
+
docker_tag: Optional image tag to remove after container cleanup
|
|
572
|
+
"""
|
|
573
|
+
# Stop and remove container
|
|
574
|
+
if container is not None:
|
|
575
|
+
try:
|
|
576
|
+
container_id = container.id if hasattr(container, 'id') else str(container)
|
|
577
|
+
_LOG.info(f"Stopping container: {container_id}")
|
|
578
|
+
container.stop(timeout=10)
|
|
579
|
+
_LOG.info(f"Removing container: {container_id}")
|
|
580
|
+
container.remove(force=True)
|
|
581
|
+
_LOG.info(f"Container {container_id} cleaned up successfully")
|
|
582
|
+
except Exception as e:
|
|
583
|
+
_LOG.warning(f"Failed to cleanup container: {e}")
|
|
584
|
+
|
|
585
|
+
# Remove Docker image if tag provided
|
|
586
|
+
if docker_tag and docker_tag.strip():
|
|
587
|
+
try:
|
|
588
|
+
client = _docker_client()
|
|
589
|
+
_LOG.info(f"Removing Docker image: {docker_tag}")
|
|
590
|
+
client.images.remove(image=docker_tag, force=True)
|
|
591
|
+
_LOG.info(f"Docker image {docker_tag} removed successfully")
|
|
592
|
+
except Exception as e:
|
|
593
|
+
_LOG.warning(f"Failed to remove Docker image '{docker_tag}': {e}")
|
execution_agent/env.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# execution_agent/env.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Callable, Dict, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ExecutionEnvironment:
|
|
11
|
+
"""
|
|
12
|
+
Execution substrate for the agent.
|
|
13
|
+
|
|
14
|
+
Contract:
|
|
15
|
+
execute(cmd) -> {"output": str, "returncode": int, ...}
|
|
16
|
+
|
|
17
|
+
Notes:
|
|
18
|
+
- Local mode uses the injected shell_interact_fn(cwd-aware).
|
|
19
|
+
- If container is set, this env can still execute, but the primary
|
|
20
|
+
interface remains the tools (linux_terminal uses advanced helpers).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
workspace_path: str,
|
|
27
|
+
project_path: str,
|
|
28
|
+
shell_interact_fn: Callable[[str], tuple[str, str]],
|
|
29
|
+
):
|
|
30
|
+
self.workspace_path = workspace_path
|
|
31
|
+
self.project_path = project_path
|
|
32
|
+
self._shell_interact_fn = shell_interact_fn
|
|
33
|
+
|
|
34
|
+
# container is set by tools.write_to_file -> env.set_container(...)
|
|
35
|
+
self.container = None
|
|
36
|
+
|
|
37
|
+
def set_container(self, container) -> None:
|
|
38
|
+
self.container = container
|
|
39
|
+
|
|
40
|
+
def execute(self, command: str) -> Dict[str, Any]:
|
|
41
|
+
cmd = (command or "").strip()
|
|
42
|
+
|
|
43
|
+
# local mode
|
|
44
|
+
if self.container is None:
|
|
45
|
+
try:
|
|
46
|
+
out, cwd = self._shell_interact_fn(cmd)
|
|
47
|
+
return {"output": out or "", "returncode": 0, "cwd": cwd}
|
|
48
|
+
except Exception as e:
|
|
49
|
+
return {"output": f"Local execution error: {type(e).__name__}: {e}", "returncode": 1}
|
|
50
|
+
|
|
51
|
+
# container mode (best-effort fallback)
|
|
52
|
+
# Prefer tools' screen-based execution; but if something calls env.execute directly,
|
|
53
|
+
# keep it functional.
|
|
54
|
+
try:
|
|
55
|
+
from .docker_helpers_static import exec_in_screen_and_get_log
|
|
56
|
+
|
|
57
|
+
rc, out, logfile, stuck = exec_in_screen_and_get_log(self.container, cmd)
|
|
58
|
+
extra = {"logfile": logfile, "stuck": bool(stuck)}
|
|
59
|
+
return {"output": out, "returncode": int(rc), **extra}
|
|
60
|
+
except Exception as e:
|
|
61
|
+
return {"output": f"Container execution error: {type(e).__name__}: {e}", "returncode": 1}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# execution_agent/exceptions.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GoalsAccomplished(Exception):
|
|
6
|
+
"""Raised to stop the run loop cleanly when goals_accomplished is called."""
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FormatError(Exception):
|
|
11
|
+
"""Raised when the model output is not valid / not parseable per our contract."""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BudgetExhausted(Exception):
|
|
16
|
+
"""Raised when agent exhausts step budget without accomplishing goals."""
|
|
17
|
+
pass
|