execution-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. execution_agent/__init__.py +8 -0
  2. execution_agent/__main__.py +5 -0
  3. execution_agent/agent.py +955 -0
  4. execution_agent/commands_interface.json +7 -0
  5. execution_agent/config.py +21 -0
  6. execution_agent/context.py +1565 -0
  7. execution_agent/docker_helpers_static.py +593 -0
  8. execution_agent/env.py +61 -0
  9. execution_agent/exceptions.py +17 -0
  10. execution_agent/exit_artifacts.py +350 -0
  11. execution_agent/main.py +1234 -0
  12. execution_agent/prompt_files/c_guidelines +481 -0
  13. execution_agent/prompt_files/command_stuck +7 -0
  14. execution_agent/prompt_files/cpp_guidelines +481 -0
  15. execution_agent/prompt_files/cycle_instruction +51 -0
  16. execution_agent/prompt_files/java_guidelines +37 -0
  17. execution_agent/prompt_files/javascript_guidelines +69 -0
  18. execution_agent/prompt_files/latest_containter_technology +7 -0
  19. execution_agent/prompt_files/python_guidelines +48 -0
  20. execution_agent/prompt_files/remove_progress_bars +1 -0
  21. execution_agent/prompt_files/rust_guidelines +53 -0
  22. execution_agent/prompt_files/search_workflows_summary +121 -0
  23. execution_agent/prompt_files/steps_list.json +32 -0
  24. execution_agent/prompt_files/summarize_cycle +13 -0
  25. execution_agent/prompt_files/tools_list +99 -0
  26. execution_agent/prompt_logging.py +311 -0
  27. execution_agent/repetition.py +39 -0
  28. execution_agent/shared_utils.py +507 -0
  29. execution_agent/state_persistence.py +286 -0
  30. execution_agent/tools.py +1611 -0
  31. execution_agent/trace_to_bash.py +281 -0
  32. execution_agent-0.1.0.dist-info/METADATA +231 -0
  33. execution_agent-0.1.0.dist-info/RECORD +37 -0
  34. execution_agent-0.1.0.dist-info/WHEEL +5 -0
  35. execution_agent-0.1.0.dist-info/entry_points.txt +2 -0
  36. execution_agent-0.1.0.dist-info/licenses/LICENSE.md +46 -0
  37. execution_agent-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,593 @@
1
+ # execution_agent/docker_helpers_static.py
2
+ """
3
+ Docker helper functions for the execution agent.
4
+
5
+ This module uses shared_utils for common functionality like exec_in_container,
6
+ XML conversion, etc. to avoid code duplication.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import shlex
12
+ import time
13
+ import uuid
14
+ from typing import Any, Optional, Tuple
15
+
16
+ try:
17
+ import docker # type: ignore
18
+ from docker.models.containers import Container as DockerContainer # type: ignore
19
+ except Exception: # pragma: no cover
20
+ docker = None
21
+ DockerContainer = Any # type: ignore
22
+
23
+ # Import shared utilities
24
+ from execution_agent.shared_utils import (
25
+ exec_in_container,
26
+ read_file_tail,
27
+ read_file_from_container,
28
+ convert_xml_to_yaml,
29
+ strip_ansi_codes,
30
+ get_docker_client,
31
+ SCREEN_SESSION,
32
+ RUN_DIR,
33
+ LOG_DIR,
34
+ STUCK_TIMEOUT_SECONDS as NO_OUTPUT_CHANGE_SECONDS,
35
+ POLL_INTERVAL_SECONDS as WAIT_POLL_SECONDS,
36
+ MAX_TAIL_BYTES,
37
+ ANSI_ESCAPE_RE as _ANSI_RE,
38
+ RC_MARKER_RE as _RC_ANY_RX,
39
+ )
40
+
41
+
42
+ _LOG = logging.getLogger("execution_agent.docker")
43
+
44
+
45
+ # ----------------------------
46
+ # Docker client helpers - using shared implementations
47
+ # ----------------------------
48
+
49
+ def _docker_client():
50
+ """Get Docker client. Wrapper around shared_utils."""
51
+ return get_docker_client()
52
+
53
+
54
+ def check_image_exists(tag: str) -> bool:
55
+ """Check if a Docker image exists locally."""
56
+ try:
57
+ get_docker_client().images.get(tag)
58
+ return True
59
+ except Exception as e:
60
+ _LOG.debug(f"Image {tag} not found: {e}")
61
+ return False
62
+
63
+
64
+ def build_image(dockerfile_dir: str, tag: str) -> str:
65
+ """
66
+ Build docker image and return a human-readable build log.
67
+ """
68
+ client = get_docker_client()
69
+ log_lines: list[str] = []
70
+ log_lines.append(f"Starting build: context='{dockerfile_dir}', tag='{tag}'")
71
+
72
+ try:
73
+ image, logs = client.images.build(path=dockerfile_dir, tag=tag, rm=True, pull=True, nocache=False)
74
+ for chunk in logs:
75
+ if isinstance(chunk, dict):
76
+ if chunk.get("stream"):
77
+ for ln in str(chunk["stream"]).rstrip("\n").splitlines():
78
+ log_lines.append(ln)
79
+ if chunk.get("status"):
80
+ prog = chunk.get("progress") or ""
81
+ log_lines.append(f"{chunk['status']} {prog}".rstrip())
82
+ if chunk.get("errorDetail") or chunk.get("error"):
83
+ detail = (chunk.get("errorDetail") or {}).get("message") or chunk.get("error") or ""
84
+ log_lines.append(f"ERROR: {detail}")
85
+ if chunk.get("aux"):
86
+ log_lines.append(f"AUX: {chunk['aux']}")
87
+ else:
88
+ log_lines.append(str(chunk))
89
+
90
+ img_id = getattr(image, "short_id", None) or getattr(image, "id", None) or "unknown"
91
+ log_lines.append(f"Build completed successfully. Image: {img_id}")
92
+ return "\n".join(log_lines)
93
+
94
+ except Exception as e:
95
+ _LOG.error(f"Docker build failed: {e}", exc_info=True)
96
+ error_msg = f"{type(e).__name__}: {e}"
97
+
98
+ if hasattr(e, 'explanation'):
99
+ log_lines.append(f"ERROR EXPLANATION: {e.explanation}")
100
+
101
+ if hasattr(e, 'build_log'):
102
+ for entry in e.build_log:
103
+ if isinstance(entry, dict):
104
+ if entry.get("stream"):
105
+ for ln in str(entry["stream"]).rstrip("\n").splitlines():
106
+ log_lines.append(ln)
107
+ if entry.get("errorDetail"):
108
+ detail = entry["errorDetail"].get("message", str(entry["errorDetail"]))
109
+ log_lines.append(f"ERROR: {detail}")
110
+ if entry.get("error"):
111
+ log_lines.append(f"ERROR: {entry['error']}")
112
+ if entry.get("status"):
113
+ prog = entry.get("progress") or ""
114
+ log_lines.append(f"{entry['status']} {prog}".rstrip())
115
+ else:
116
+ log_lines.append(str(entry))
117
+
118
+ log_lines.append(f"BUILD FAILED: {error_msg}")
119
+ return "\n".join(log_lines)
120
+
121
+
122
+ # ----------------------------
123
+ # Container exec primitives - using shared implementations
124
+ # ----------------------------
125
+
126
+ def _exec(container: DockerContainer, cmd: str, tty: bool = False) -> tuple[int, str]:
127
+ """Run a command inside container. Wrapper around shared_utils.exec_in_container()."""
128
+ return exec_in_container(container, cmd, tty=tty)
129
+
130
+
131
+ def _read_tail(container: DockerContainer, path: str) -> str:
132
+ """Read tail of file from container. Wrapper around shared_utils."""
133
+ return read_file_tail(container, path, max_bytes=MAX_TAIL_BYTES)
134
+
135
+
136
+ # ----------------------------
137
+ # XML helpers - using shared implementations
138
+ # ----------------------------
139
+ _convert_xml_to_yaml_content = convert_xml_to_yaml
140
+
141
+
142
+ # ----------------------------
143
+ # File read/write in container - now using shared implementations
144
+ # Note: read_file_from_container is imported from shared_utils
145
+ # We keep a local write_string_to_file that delegates to shared_utils
146
+
147
+ from execution_agent.shared_utils import write_file_to_container
148
+
149
+
150
+ def write_string_to_file(container: DockerContainer, file_content: str, file_path: str) -> Optional[str]:
151
+ """
152
+ Write content to a file inside the container.
153
+ Wrapper around shared_utils.write_file_to_container() for compatibility.
154
+ """
155
+ return write_file_to_container(container, file_path, file_content)
156
+
157
+
158
+ # ----------------------------
159
+ # Package/screen setup (legacy-inspired)
160
+ # ----------------------------
161
+
162
+ def _command_exists(container: DockerContainer, cmd: str) -> bool:
163
+ code, _ = _exec(container, f"command -v {shlex.quote(cmd)} >/dev/null 2>&1")
164
+ return code == 0
165
+
166
+
167
+ def _whoami_root(container: DockerContainer) -> bool:
168
+ code, out = _exec(container, "id -u")
169
+ return code == 0 and out.strip() == "0"
170
+
171
+
172
+ def _has_passwordless_sudo(container: DockerContainer) -> bool:
173
+ if not _command_exists(container, "sudo"):
174
+ return False
175
+ code, _ = _exec(container, "sudo -n true")
176
+ return code == 0
177
+
178
+
179
+ def _escalation_prefix(container: DockerContainer) -> Optional[str]:
180
+ if _whoami_root(container):
181
+ return ""
182
+ if _has_passwordless_sudo(container):
183
+ return "sudo -n "
184
+ return None
185
+
186
+
187
+ def _detect_pkg_manager(container: DockerContainer) -> Optional[str]:
188
+ checks = [
189
+ ("microdnf", "command -v microdnf >/dev/null 2>&1"),
190
+ ("dnf", "command -v dnf >/dev/null 2>&1"),
191
+ ("yum", "command -v yum >/dev/null 2>&1"),
192
+ ("apt-get", "command -v apt-get >/dev/null 2>&1"),
193
+ ("apk", "command -v apk >/dev/null 2>&1"),
194
+ ]
195
+ for pm, probe in checks:
196
+ code, _ = _exec(container, probe)
197
+ if code == 0:
198
+ return pm
199
+ return None
200
+
201
+
202
+ def _install_cmds(pm: str, pkgs: list[str]) -> list[str]:
203
+ pkgs_joined = " ".join(shlex.quote(p) for p in pkgs)
204
+ if pm == "apt-get":
205
+ return [
206
+ "apt-get update",
207
+ f"DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends {pkgs_joined}",
208
+ ]
209
+ if pm in ("dnf", "microdnf"):
210
+ tool = "microdnf" if pm == "microdnf" else "dnf"
211
+ return [f"{tool} -y install {pkgs_joined}"]
212
+ if pm == "yum":
213
+ return [f"yum -y install {pkgs_joined}"]
214
+ if pm == "apk":
215
+ return [f"apk add --no-cache {pkgs_joined}"]
216
+ return []
217
+
218
+
219
+ def _ensure_run_dir(container: DockerContainer) -> None:
220
+ _exec(container, f"mkdir -p {shlex.quote(RUN_DIR)} && chmod 1777 {shlex.quote(RUN_DIR)}")
221
+
222
+
223
+ def _best_effort_timezone_setup(container: DockerContainer) -> None:
224
+ # Best-effort Europe/Berlin, never hard-fail
225
+ prefix = _escalation_prefix(container)
226
+ tz_cmd = "TZ=Europe/Berlin && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone"
227
+ if prefix is None:
228
+ _exec(container, "echo Europe/Berlin > /tmp/timezone.info || true")
229
+ else:
230
+ _exec(container, f"{prefix}sh -lc {shlex.quote(tz_cmd)} || true")
231
+
232
+
233
+ def _ensure_packages(container: DockerContainer, pkgs: list[str]) -> Optional[str]:
234
+ pm = _detect_pkg_manager(container)
235
+ prefix = _escalation_prefix(container)
236
+
237
+ if pm is None:
238
+ return "Cannot install packages: no supported package manager detected (microdnf/dnf/yum/apt-get/apk)."
239
+ if prefix is None:
240
+ return "Cannot install packages: need root privileges (not root; passwordless sudo unavailable)."
241
+
242
+ cmds = _install_cmds(pm, pkgs)
243
+ if not cmds:
244
+ return f"Package manager '{pm}' detected but installer does not support it."
245
+
246
+ for c in cmds:
247
+ code, out = _exec(container, f"{prefix}{c}")
248
+ if code != 0:
249
+ return f"Package installation failed.\nCommand: {prefix}{c}\nExit code: {code}\nOutput:\n{out}"
250
+
251
+ return None
252
+
253
+
254
+ def create_screen_session(container: DockerContainer) -> tuple[bool, str]:
255
+ """
256
+ Ensure bash+screen+psmisc exist, then start a detached screen session with bash -l.
257
+ """
258
+ needed = ["bash", "screen", "psmisc"]
259
+ pm = _detect_pkg_manager(container)
260
+ if pm == "apt-get":
261
+ needed.append("coreutils")
262
+
263
+ msg = _ensure_packages(container, needed)
264
+ if msg:
265
+ return False, msg
266
+
267
+ # Replace session if exists
268
+ _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X quit >/dev/null 2>&1 || true")
269
+ code, out = _exec(container, f"screen -dmS {shlex.quote(SCREEN_SESSION)} bash -l")
270
+ if code != 0:
271
+ return False, f"Failed to start screen session '{SCREEN_SESSION}'. Output:\n{out}"
272
+
273
+ # Ensure it exists
274
+ code, ls_out = _exec(container, "screen -ls")
275
+ if code != 0 or f".{SCREEN_SESSION}" not in ls_out:
276
+ return False, f"Screen session '{SCREEN_SESSION}' not found after creation. screen -ls:\n{ls_out}"
277
+
278
+ _ensure_run_dir(container)
279
+ _best_effort_timezone_setup(container)
280
+ return True, f"Screen session '{SCREEN_SESSION}' created."
281
+
282
+
283
+ def _ensure_screen_alive(container: DockerContainer) -> tuple[bool, str]:
284
+ code, _ = _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -Q windows")
285
+ if code == 0:
286
+ return True, "ok"
287
+ ok, msg = create_screen_session(container)
288
+ return ok, msg
289
+
290
+
291
+ # ----------------------------
292
+ # Stateful screen execution (legacy-faithful)
293
+ # ----------------------------
294
+
295
+ def exec_in_screen_and_get_log(container: DockerContainer, cmd: str) -> Tuple[int, str, str, bool]:
296
+ """
297
+ Stateful execution inside a GNU screen session.
298
+
299
+ Returns: (rc, cleaned_output, logfile_path, stuck_flag)
300
+ Stuck is defined as "no output growth/change for NO_OUTPUT_CHANGE_SECONDS".
301
+ """
302
+ ok, msg = _ensure_screen_alive(container)
303
+ if not ok:
304
+ return 1, f"Error: could not ensure screen session. {msg}", "", False
305
+
306
+ run_id = uuid.uuid4().hex
307
+ BEGIN = f"<<BEGIN:{run_id}>>"
308
+ END = f"<<END:{run_id}>>"
309
+ rc_rx = re.compile(rf"<<RC:{re.escape(run_id)}:(-?\d+)>>")
310
+ delim = f"__PAYLOAD_{run_id}__"
311
+
312
+ logfile = f"{LOG_DIR}/screen_exec_stateful_{run_id}.log"
313
+ script = f"{LOG_DIR}/screen_src_{run_id}.sh"
314
+
315
+ def _stuff_single_quoted(text: str) -> None:
316
+ safe = text.replace("'", r"'\''")
317
+ _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X stuff '{safe}\\r'")
318
+
319
+ # Legacy special-case: renew shell
320
+ if cmd.strip() in {'exec "$SHELL" -l', "exec '$SHELL' -l"}:
321
+ _stuff_single_quoted("exec /bin/bash -l")
322
+ time.sleep(0.3)
323
+ return 0, "The shell has been renewed (exec /bin/bash -l).", logfile, False
324
+
325
+ # Write script
326
+ _exec(
327
+ container,
328
+ f"cat > {shlex.quote(script)} <<'{delim}'\n{cmd}\n{delim}\nchmod +x {shlex.quote(script)}"
329
+ )
330
+ _exec(container, f": > {shlex.quote(logfile)}")
331
+
332
+ payload = (
333
+ f'printf "%s\\n" "{BEGIN}" >> {logfile}; '
334
+ f'if . {script} >> {logfile} 2>&1; then __rc=0; else __rc=$?; fi; '
335
+ f'printf "%s\\n" "{END}" >> {logfile}; '
336
+ f'printf "<<RC:{run_id}:%d>>\\n" "$__rc" >> {logfile}'
337
+ )
338
+ _stuff_single_quoted(payload)
339
+
340
+ last_buf = ""
341
+ last_change = time.time()
342
+ rc: Optional[int] = None
343
+
344
+ while True:
345
+ buf = _read_tail(container, logfile)
346
+ # Compare content, not just length. This fixes a bug where long-running commands
347
+ # that produce lots of output (e.g., Maven builds) were incorrectly flagged as stuck.
348
+ # When the log file exceeds MAX_TAIL_BYTES, tail -c returns a fixed-size buffer,
349
+ # so the length stays constant even as new content is added. Comparing the actual
350
+ # content (or a hash/sample of it) correctly detects changes.
351
+ if buf != last_buf:
352
+ last_buf = buf
353
+ last_change = time.time()
354
+
355
+ m = rc_rx.search(buf.replace("\r", ""))
356
+ if m:
357
+ rc = int(m.group(1))
358
+ break
359
+
360
+ if time.time() - last_change >= NO_OUTPUT_CHANGE_SECONDS:
361
+ clean = _ANSI_RE.sub("", buf)
362
+ return 124, (
363
+ "The command appears stuck/idle (no output change within the timeout).\n\n"
364
+ f"Partial output:\n{clean}\n\n"
365
+ "You can WAIT, TERMINATE, or WRITE:<input> to continue."
366
+ ), logfile, True
367
+
368
+ time.sleep(WAIT_POLL_SECONDS)
369
+
370
+ final = _read_tail(container, logfile)
371
+ final_noansi = _ANSI_RE.sub("", final)
372
+
373
+ bpos = final_noansi.rfind(BEGIN)
374
+ epos = final_noansi.rfind(END)
375
+ if bpos != -1 and epos != -1 and epos > bpos:
376
+ region = final_noansi[bpos + len(BEGIN): epos]
377
+ else:
378
+ region_lines = []
379
+ for ln in final_noansi.splitlines():
380
+ if ln.startswith("<<BEGIN:") or ln.startswith("<<END:") or ln.startswith("<<RC:"):
381
+ continue
382
+ region_lines.append(ln)
383
+ region = "\n".join(region_lines)
384
+
385
+ region = "\n".join(ln for ln in region.splitlines() if not ln.startswith("<<RC:")).strip()
386
+
387
+ _exec(container, f"rm -f {shlex.quote(script)}")
388
+ return (rc if rc is not None else 0), region, logfile, False
389
+
390
+
391
+ # ----------------------------
392
+ # Container lifecycle
393
+ # ----------------------------
394
+
395
+ def start_container(tag: str) -> Optional[DockerContainer]:
396
+ """
397
+ Start a container from the given image tag and ensure screen session exists.
398
+ """
399
+ client = _docker_client()
400
+ try:
401
+ c = client.containers.run(tag, command=["tail", "-f", "/dev/null"], detach=True, tty=True)
402
+ except Exception as e:
403
+ _LOG.error("Failed to start container: %s", e)
404
+ return None
405
+
406
+ ok, msg = create_screen_session(c)
407
+ if not ok:
408
+ try:
409
+ c.remove(force=True)
410
+ except Exception:
411
+ pass
412
+ _LOG.error("Failed to initialize screen session: %s", msg)
413
+ return None
414
+
415
+ return c
416
+
417
+
418
+ # ----------------------------
419
+ # Stuck handling (WAIT/TERMINATE/WRITE:)
420
+ # ----------------------------
421
+
422
+ def handle_stuck_action(agent: Any, command: str) -> Optional[str]:
423
+ """
424
+ If agent.command_stuck is True, interpret command as one of:
425
+ WAIT | TERMINATE | WRITE:<text>
426
+ Returns a user-facing terminal string if it handled the request.
427
+ Returns None if not in stuck mode or if no action was taken.
428
+ """
429
+ if not getattr(agent, "command_stuck", False):
430
+ return None
431
+
432
+ container = getattr(agent, "container", None)
433
+ logfile = getattr(agent, "current_logfile", None)
434
+
435
+ if not container or not logfile:
436
+ agent.command_stuck = False
437
+ agent.current_logfile = None
438
+ return None
439
+
440
+ NO_CHANGE_TIMEOUT = 300
441
+ POLL_INTERVAL_SECONDS = 5
442
+ WRITE_GRACE_SECONDS = 2
443
+
444
+ def _read_clean_log() -> str:
445
+ try:
446
+ raw = read_file_from_container(container, logfile)
447
+ return _ANSI_RE.sub("", raw)
448
+ except Exception:
449
+ return ""
450
+
451
+ def _has_rc_marker(s: str) -> bool:
452
+ return bool(_RC_ANY_RX.search(s or ""))
453
+
454
+ def _extract_final_region(clean_log: str) -> str:
455
+ bpos = clean_log.rfind("<<BEGIN:")
456
+ epos = clean_log.rfind("<<END:")
457
+ if bpos != -1 and epos != -1 and epos > bpos:
458
+ arrow = clean_log.find(">>", bpos)
459
+ if arrow != -1 and arrow + 2 <= epos:
460
+ return clean_log[arrow + 2 : epos].strip()
461
+
462
+ lines = []
463
+ for ln in clean_log.splitlines():
464
+ if ln.startswith("<<BEGIN:") or ln.startswith("<<END:") or ln.startswith("<<RC:"):
465
+ continue
466
+ lines.append(ln)
467
+ return "\n".join(lines).strip()
468
+
469
+ def _progress_aware_wait(after_write: bool = False) -> tuple[bool, str]:
470
+ if after_write:
471
+ time.sleep(WRITE_GRACE_SECONDS)
472
+
473
+ remaining = NO_CHANGE_TIMEOUT
474
+ last = _read_clean_log()
475
+
476
+ if last and _has_rc_marker(last):
477
+ return True, _extract_final_region(last)
478
+
479
+ while remaining > 0:
480
+ time.sleep(POLL_INTERVAL_SECONDS)
481
+ cur = _read_clean_log()
482
+
483
+ if cur and _has_rc_marker(cur):
484
+ return True, _extract_final_region(cur)
485
+
486
+ if cur != last and cur != "":
487
+ remaining = NO_CHANGE_TIMEOUT
488
+ last = cur
489
+ else:
490
+ remaining -= POLL_INTERVAL_SECONDS
491
+
492
+ return False, (last or "")
493
+
494
+ def _reset_screen_session() -> None:
495
+ # Best-effort terminate running program and recreate screen
496
+ try:
497
+ _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -p 0 -X stuff $'\\003'")
498
+ time.sleep(0.2)
499
+ _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -p 0 -X stuff $'\\003'")
500
+ time.sleep(0.2)
501
+ except Exception:
502
+ pass
503
+ try:
504
+ _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X quit || true")
505
+ except Exception:
506
+ pass
507
+
508
+ create_screen_session(container)
509
+ agent.command_stuck = False
510
+ agent.current_logfile = None
511
+
512
+ cmd = (command or "").strip()
513
+
514
+ if cmd == "WAIT":
515
+ finished, output = _progress_aware_wait(after_write=False)
516
+ if finished:
517
+ agent.command_stuck = False
518
+ return (
519
+ "Output in terminal after executing the command:\n"
520
+ f"Command finished. Output:\n{output}\n\nReturn code: 0\n"
521
+ )
522
+ return (
523
+ "Output in terminal after executing the command:\n"
524
+ "command waited for more time and there was no change; you can WAIT more, TERMINATE, or WRITE input to command.\n\n"
525
+ "Return code: 124\n"
526
+ )
527
+
528
+ if cmd == "TERMINATE":
529
+ _reset_screen_session()
530
+ return (
531
+ "Output in terminal after executing the command:\n"
532
+ "Previous command terminated; fresh screen session is ready.\n\nReturn code: 0\n"
533
+ )
534
+
535
+ if cmd.startswith("WRITE:"):
536
+ user_input = cmd.split("WRITE:", 1)[1]
537
+ safe = user_input.replace("'", r"'\''")
538
+ _exec(container, f"screen -S {shlex.quote(SCREEN_SESSION)} -X stuff '{safe}\\r'")
539
+ finished, output = _progress_aware_wait(after_write=True)
540
+ if finished:
541
+ agent.command_stuck = False
542
+ return (
543
+ "Output in terminal after executing the command:\n"
544
+ f"Command finished after input. Output:\n{output}\n\nReturn code: 0\n"
545
+ )
546
+ return (
547
+ "Output in terminal after executing the command:\n"
548
+ "command waited for more time and there was no change; you can WAIT more, TERMINATE, or WRITE input to command.\n\n"
549
+ "Return code: 124\n"
550
+ )
551
+
552
+ # Unknown action => reset to keep system usable
553
+ _reset_screen_session()
554
+ return (
555
+ "Output in terminal after executing the command:\n"
556
+ "Unknown stuck action. Previous command terminated and screen session reset.\n\nReturn code: 0\n"
557
+ )
558
+
559
+
560
+ # ----------------------------
561
+ # Container cleanup (for retry loop)
562
+ # ----------------------------
563
+
564
+ def cleanup_container(container: Optional[DockerContainer], docker_tag: Optional[str] = None) -> None:
565
+ """
566
+ Stop and remove Docker container, and optionally remove the image.
567
+ Handles errors gracefully with warnings.
568
+
569
+ Args:
570
+ container: Docker container object to clean up
571
+ docker_tag: Optional image tag to remove after container cleanup
572
+ """
573
+ # Stop and remove container
574
+ if container is not None:
575
+ try:
576
+ container_id = container.id if hasattr(container, 'id') else str(container)
577
+ _LOG.info(f"Stopping container: {container_id}")
578
+ container.stop(timeout=10)
579
+ _LOG.info(f"Removing container: {container_id}")
580
+ container.remove(force=True)
581
+ _LOG.info(f"Container {container_id} cleaned up successfully")
582
+ except Exception as e:
583
+ _LOG.warning(f"Failed to cleanup container: {e}")
584
+
585
+ # Remove Docker image if tag provided
586
+ if docker_tag and docker_tag.strip():
587
+ try:
588
+ client = _docker_client()
589
+ _LOG.info(f"Removing Docker image: {docker_tag}")
590
+ client.images.remove(image=docker_tag, force=True)
591
+ _LOG.info(f"Docker image {docker_tag} removed successfully")
592
+ except Exception as e:
593
+ _LOG.warning(f"Failed to remove Docker image '{docker_tag}': {e}")
execution_agent/env.py ADDED
@@ -0,0 +1,61 @@
1
+ # execution_agent/env.py
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import re
6
+ from pathlib import Path
7
+ from typing import Any, Callable, Dict, Optional
8
+
9
+
10
+ class ExecutionEnvironment:
11
+ """
12
+ Execution substrate for the agent.
13
+
14
+ Contract:
15
+ execute(cmd) -> {"output": str, "returncode": int, ...}
16
+
17
+ Notes:
18
+ - Local mode uses the injected shell_interact_fn(cwd-aware).
19
+ - If container is set, this env can still execute, but the primary
20
+ interface remains the tools (linux_terminal uses advanced helpers).
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ *,
26
+ workspace_path: str,
27
+ project_path: str,
28
+ shell_interact_fn: Callable[[str], tuple[str, str]],
29
+ ):
30
+ self.workspace_path = workspace_path
31
+ self.project_path = project_path
32
+ self._shell_interact_fn = shell_interact_fn
33
+
34
+ # container is set by tools.write_to_file -> env.set_container(...)
35
+ self.container = None
36
+
37
+ def set_container(self, container) -> None:
38
+ self.container = container
39
+
40
+ def execute(self, command: str) -> Dict[str, Any]:
41
+ cmd = (command or "").strip()
42
+
43
+ # local mode
44
+ if self.container is None:
45
+ try:
46
+ out, cwd = self._shell_interact_fn(cmd)
47
+ return {"output": out or "", "returncode": 0, "cwd": cwd}
48
+ except Exception as e:
49
+ return {"output": f"Local execution error: {type(e).__name__}: {e}", "returncode": 1}
50
+
51
+ # container mode (best-effort fallback)
52
+ # Prefer tools' screen-based execution; but if something calls env.execute directly,
53
+ # keep it functional.
54
+ try:
55
+ from .docker_helpers_static import exec_in_screen_and_get_log
56
+
57
+ rc, out, logfile, stuck = exec_in_screen_and_get_log(self.container, cmd)
58
+ extra = {"logfile": logfile, "stuck": bool(stuck)}
59
+ return {"output": out, "returncode": int(rc), **extra}
60
+ except Exception as e:
61
+ return {"output": f"Container execution error: {type(e).__name__}: {e}", "returncode": 1}
@@ -0,0 +1,17 @@
1
+ # execution_agent/exceptions.py
2
+ from __future__ import annotations
3
+
4
+
5
+ class GoalsAccomplished(Exception):
6
+ """Raised to stop the run loop cleanly when goals_accomplished is called."""
7
+ pass
8
+
9
+
10
+ class FormatError(Exception):
11
+ """Raised when the model output is not valid / not parseable per our contract."""
12
+ pass
13
+
14
+
15
+ class BudgetExhausted(Exception):
16
+ """Raised when agent exhausts step budget without accomplishing goals."""
17
+ pass