screenforge 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +0 -0
- cli/_version.py +1 -0
- cli/dispatch.py +266 -0
- cli/doctor.py +487 -0
- cli/modes/__init__.py +0 -0
- cli/modes/action.py +262 -0
- cli/modes/default.py +248 -0
- cli/modes/demo.py +162 -0
- cli/modes/dry_run.py +237 -0
- cli/modes/init.py +133 -0
- cli/modes/plan.py +148 -0
- cli/modes/workflow.py +354 -0
- cli/parser.py +305 -0
- cli/reporter.py +207 -0
- cli/session.py +146 -0
- cli/shared.py +427 -0
- cli/shorthand.py +90 -0
- cli/tool_protocol_handlers.py +446 -0
- common/__init__.py +0 -0
- common/adapters/__init__.py +21 -0
- common/adapters/android_adapter.py +273 -0
- common/adapters/base_adapter.py +24 -0
- common/adapters/ios_adapter.py +278 -0
- common/adapters/web_adapter.py +271 -0
- common/ai.py +277 -0
- common/ai_autonomous.py +273 -0
- common/ai_heal.py +222 -0
- common/cache/__init__.py +15 -0
- common/cache/cache_hash.py +57 -0
- common/cache/cache_manager.py +300 -0
- common/cache/cache_stats.py +133 -0
- common/cache/cache_storage.py +79 -0
- common/cache/embedding_loader.py +150 -0
- common/capabilities.py +121 -0
- common/case_memory.py +327 -0
- common/error_codes.py +61 -0
- common/exceptions.py +18 -0
- common/executor.py +1504 -0
- common/failure_diagnosis.py +138 -0
- common/history_manager.py +75 -0
- common/logs.py +168 -0
- common/mcp_server.py +467 -0
- common/preflight.py +496 -0
- common/progress.py +37 -0
- common/run_reporter.py +415 -0
- common/run_resume.py +149 -0
- common/runtime_modes.py +35 -0
- common/tool_protocol.py +196 -0
- common/visual_fallback.py +71 -0
- common/workflow_schema.py +150 -0
- config/__init__.py +0 -0
- config/config.py +167 -0
- config/env_loader.py +76 -0
- screenforge-0.4.0.dist-info/METADATA +43 -0
- screenforge-0.4.0.dist-info/RECORD +64 -0
- screenforge-0.4.0.dist-info/WHEEL +5 -0
- screenforge-0.4.0.dist-info/entry_points.txt +2 -0
- screenforge-0.4.0.dist-info/licenses/LICENSE +21 -0
- screenforge-0.4.0.dist-info/top_level.txt +4 -0
- utils/__init__.py +0 -0
- utils/screenshot_annotator.py +60 -0
- utils/utils_ios.py +195 -0
- utils/utils_web.py +304 -0
- utils/utils_xml.py +218 -0
common/run_reporter.py
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
import uuid
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
import config.config as config
|
|
9
|
+
from common.logs import log
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _now_iso() -> str:
|
|
13
|
+
return datetime.now().isoformat(timespec="seconds")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _resolve_project_root() -> Path:
|
|
17
|
+
return Path(getattr(config, "BASE_DIR", Path.cwd())).resolve()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _build_resume_commands(run_id: str, platform: str) -> Dict[str, str]:
|
|
21
|
+
if not str(run_id).strip() or not str(platform).strip():
|
|
22
|
+
return {}
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
"plan_only": f"./.venv/bin/python agent_cli.py --resume-run-id {run_id} --platform {platform} --plan-only",
|
|
26
|
+
"dry_run": f"./.venv/bin/python agent_cli.py --resume-run-id {run_id} --platform {platform} --dry-run",
|
|
27
|
+
"run": f"./.venv/bin/python agent_cli.py --resume-run-id {run_id} --platform {platform}",
|
|
28
|
+
"doctor": f"./.venv/bin/python agent_cli.py --doctor --platform {platform}",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _build_pytest_asset(
|
|
33
|
+
output_script_path: str,
|
|
34
|
+
run_id: str = "",
|
|
35
|
+
platform: str = "",
|
|
36
|
+
manifest_path: str = "",
|
|
37
|
+
) -> Dict[str, Any]:
|
|
38
|
+
raw_path = Path(str(output_script_path))
|
|
39
|
+
project_root = _resolve_project_root()
|
|
40
|
+
resolved_path = raw_path if raw_path.is_absolute() else (project_root / raw_path).resolve()
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
pytest_target = str(resolved_path.relative_to(project_root))
|
|
44
|
+
except ValueError:
|
|
45
|
+
pytest_target = str(resolved_path if raw_path.is_absolute() else raw_path)
|
|
46
|
+
|
|
47
|
+
exists = resolved_path.exists()
|
|
48
|
+
return {
|
|
49
|
+
"script_path": str(output_script_path),
|
|
50
|
+
"pytest_target": pytest_target,
|
|
51
|
+
"pytest_command": f"./.venv/bin/python -m pytest {pytest_target}",
|
|
52
|
+
"manifest_path": str(manifest_path),
|
|
53
|
+
"exists": exists,
|
|
54
|
+
"replay_ready": exists,
|
|
55
|
+
"resume_commands": _build_resume_commands(run_id, platform),
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _build_failure_analysis(
|
|
60
|
+
run_id: str,
|
|
61
|
+
platform: str,
|
|
62
|
+
execution_mode: str,
|
|
63
|
+
status: str,
|
|
64
|
+
exit_code: int,
|
|
65
|
+
steps_executed: int,
|
|
66
|
+
last_error: str,
|
|
67
|
+
pytest_asset: Dict[str, Any],
|
|
68
|
+
) -> Optional[Dict[str, Any]]:
|
|
69
|
+
if status == "success" and exit_code == 0 and not str(last_error).strip():
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
error_text = str(last_error or "").strip()
|
|
73
|
+
lowered_error = error_text.lower()
|
|
74
|
+
category = "execution_failure"
|
|
75
|
+
stage = "execution"
|
|
76
|
+
summary = "Execution failed; review context to determine next steps"
|
|
77
|
+
recovery_hint = "Run dry-run to verify current state before retrying."
|
|
78
|
+
recommended_mode = "dry_run"
|
|
79
|
+
retryable = True
|
|
80
|
+
|
|
81
|
+
if any(token in lowered_error for token in ("operation not permitted", "permission denied", " eperm", "environment restricts")):
|
|
82
|
+
category = "environment_restricted"
|
|
83
|
+
stage = "preflight"
|
|
84
|
+
summary = "Runtime environment restricts local device or browser connection"
|
|
85
|
+
recovery_hint = "Run doctor or dry-run in the host terminal to rule out sandbox permission issues."
|
|
86
|
+
recommended_mode = "doctor"
|
|
87
|
+
retryable = False
|
|
88
|
+
elif any(token in lowered_error for token in ("element not found", "not interactable", "locator cannot be resolved", "not found on the current page")):
|
|
89
|
+
category = "locator_resolution"
|
|
90
|
+
stage = "execution"
|
|
91
|
+
summary = "Target element not found or not interactable on the current page"
|
|
92
|
+
recovery_hint = "Run dry-run to verify locator strategy, then retry with resume context."
|
|
93
|
+
recommended_mode = "dry_run"
|
|
94
|
+
elif any(token in lowered_error for token in ("circuit breaker", "consecutive failures", "max step limit")):
|
|
95
|
+
category = "stagnation"
|
|
96
|
+
stage = "planning" if execution_mode in {"plan_only", "dry_run"} else "execution"
|
|
97
|
+
summary = "Execution hit retry limit or max step cap"
|
|
98
|
+
recovery_hint = "Run plan-only to refine strategy, then narrow the goal or add context."
|
|
99
|
+
recommended_mode = "plan_only"
|
|
100
|
+
elif any(token in lowered_error for token in ("api_key", "base_url", "config", "configuration validation failed")):
|
|
101
|
+
category = "configuration"
|
|
102
|
+
stage = "preflight"
|
|
103
|
+
summary = "Required configuration is incomplete or invalid"
|
|
104
|
+
recovery_hint = "Run doctor to fix configuration, then re-run."
|
|
105
|
+
recommended_mode = "doctor"
|
|
106
|
+
retryable = False
|
|
107
|
+
elif steps_executed == 0:
|
|
108
|
+
stage = "startup"
|
|
109
|
+
|
|
110
|
+
resume_commands = pytest_asset.get("resume_commands", {}) or _build_resume_commands(
|
|
111
|
+
run_id, platform
|
|
112
|
+
)
|
|
113
|
+
suggested_commands = []
|
|
114
|
+
if recommended_mode == "doctor":
|
|
115
|
+
suggested_commands.append(
|
|
116
|
+
resume_commands.get(
|
|
117
|
+
"doctor",
|
|
118
|
+
f"./.venv/bin/python agent_cli.py --doctor --platform {platform}",
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
elif recommended_mode == "plan_only":
|
|
122
|
+
suggested_commands.append(
|
|
123
|
+
resume_commands.get(
|
|
124
|
+
"plan_only",
|
|
125
|
+
f"./.venv/bin/python agent_cli.py --resume-run-id {run_id} --platform {platform} --plan-only",
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
suggested_commands.append(
|
|
130
|
+
resume_commands.get(
|
|
131
|
+
"dry_run",
|
|
132
|
+
f"./.venv/bin/python agent_cli.py --resume-run-id {run_id} --platform {platform} --dry-run",
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if pytest_asset.get("replay_ready") and retryable:
|
|
137
|
+
suggested_commands.append(pytest_asset["pytest_command"])
|
|
138
|
+
|
|
139
|
+
recommended_command = suggested_commands[0] if suggested_commands else ""
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
"category": category,
|
|
143
|
+
"stage": stage,
|
|
144
|
+
"summary": summary,
|
|
145
|
+
"recovery_hint": recovery_hint,
|
|
146
|
+
"retryable": retryable,
|
|
147
|
+
"recommended_mode": recommended_mode,
|
|
148
|
+
"recommended_command": recommended_command,
|
|
149
|
+
"execution_mode": execution_mode,
|
|
150
|
+
"steps_executed": steps_executed,
|
|
151
|
+
"last_error": error_text,
|
|
152
|
+
"suggested_commands": suggested_commands,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _build_pytest_replay_manifest(
|
|
157
|
+
summary: Dict[str, Any],
|
|
158
|
+
pytest_asset: Dict[str, Any],
|
|
159
|
+
failure_analysis: Optional[Dict[str, Any]],
|
|
160
|
+
summary_path: Path,
|
|
161
|
+
artifacts_path: Path,
|
|
162
|
+
) -> Dict[str, Any]:
|
|
163
|
+
return {
|
|
164
|
+
"run_id": summary.get("run_id", ""),
|
|
165
|
+
"generated_at": summary.get("finished_at", "") or _now_iso(),
|
|
166
|
+
"platform": summary.get("platform", ""),
|
|
167
|
+
"env": summary.get("env", ""),
|
|
168
|
+
"execution_mode": summary.get("execution_mode", ""),
|
|
169
|
+
"status": summary.get("status", ""),
|
|
170
|
+
"exit_code": summary.get("exit_code"),
|
|
171
|
+
"control_summary": summary.get("control_summary", {}) or {},
|
|
172
|
+
"output_script_path": summary.get("output_script_path", ""),
|
|
173
|
+
"summary_path": str(summary_path),
|
|
174
|
+
"artifacts_path": str(artifacts_path),
|
|
175
|
+
"failure_analysis": failure_analysis,
|
|
176
|
+
"pytest_asset": pytest_asset,
|
|
177
|
+
"resume_commands": pytest_asset.get("resume_commands", {}) or {},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class RunReporter:
|
|
182
|
+
def __init__(
|
|
183
|
+
self,
|
|
184
|
+
goal: str,
|
|
185
|
+
platform: str,
|
|
186
|
+
env_name: str,
|
|
187
|
+
output_script_path: str,
|
|
188
|
+
json_output: bool = False,
|
|
189
|
+
vision_enabled: bool = False,
|
|
190
|
+
max_steps: int = 0,
|
|
191
|
+
base_dir: Optional[str] = None,
|
|
192
|
+
execution_mode: str = "run",
|
|
193
|
+
resume_from_run_id: str = "",
|
|
194
|
+
control_kind: str = "goal",
|
|
195
|
+
control_label: str = "",
|
|
196
|
+
control_source_ref: str = "",
|
|
197
|
+
):
|
|
198
|
+
self.run_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}"
|
|
199
|
+
self._json_output = json_output
|
|
200
|
+
self._base_dir = Path(base_dir or config.RUN_REPORT_BASE_DIR) / self.run_id
|
|
201
|
+
self._screenshots_dir = self._base_dir / "screenshots"
|
|
202
|
+
self._steps_file = self._base_dir / "steps.jsonl"
|
|
203
|
+
self._summary_file = self._base_dir / "summary.json"
|
|
204
|
+
self._artifacts_file = self._base_dir / "artifacts.json"
|
|
205
|
+
self._pytest_manifest_file = self._base_dir / "pytest_replay.json"
|
|
206
|
+
self._finished = False
|
|
207
|
+
self._output_script_path = str(output_script_path)
|
|
208
|
+
|
|
209
|
+
self._base_dir.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
pytest_asset = _build_pytest_asset(
|
|
211
|
+
output_script_path,
|
|
212
|
+
run_id=self.run_id,
|
|
213
|
+
platform=platform,
|
|
214
|
+
manifest_path=str(self._pytest_manifest_file),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
self._artifacts: Dict[str, Any] = {
|
|
218
|
+
"generated_script": {
|
|
219
|
+
"path": str(output_script_path),
|
|
220
|
+
"pytest_target": pytest_asset["pytest_target"],
|
|
221
|
+
"pytest_command": pytest_asset["pytest_command"],
|
|
222
|
+
"manifest_path": pytest_asset["manifest_path"],
|
|
223
|
+
"exists": pytest_asset["exists"],
|
|
224
|
+
"replay_ready": pytest_asset["replay_ready"],
|
|
225
|
+
},
|
|
226
|
+
"pytest_manifest": {
|
|
227
|
+
"path": str(self._pytest_manifest_file),
|
|
228
|
+
"exists": False,
|
|
229
|
+
},
|
|
230
|
+
"steps_file": {"path": str(self._steps_file)},
|
|
231
|
+
"summary_file": {"path": str(self._summary_file)},
|
|
232
|
+
"screenshots": [],
|
|
233
|
+
"videos": [],
|
|
234
|
+
}
|
|
235
|
+
self._summary: Dict[str, Any] = {
|
|
236
|
+
"run_id": self.run_id,
|
|
237
|
+
"goal": goal,
|
|
238
|
+
"platform": platform,
|
|
239
|
+
"env": env_name,
|
|
240
|
+
"execution_mode": execution_mode,
|
|
241
|
+
"resume_from_run_id": resume_from_run_id,
|
|
242
|
+
"resume_context_available": bool(resume_from_run_id),
|
|
243
|
+
"plan_preview": None,
|
|
244
|
+
"dry_run_preview": None,
|
|
245
|
+
"doctor_summary": None,
|
|
246
|
+
"failure_analysis": None,
|
|
247
|
+
"case_memory_entry": None,
|
|
248
|
+
"pytest_asset": pytest_asset,
|
|
249
|
+
"control_summary": {
|
|
250
|
+
"control_kind": control_kind,
|
|
251
|
+
"control_label": control_label or goal,
|
|
252
|
+
"source_ref": control_source_ref,
|
|
253
|
+
"execution_mode": execution_mode,
|
|
254
|
+
},
|
|
255
|
+
"vision_enabled": vision_enabled,
|
|
256
|
+
"max_steps": max_steps,
|
|
257
|
+
"output_script_path": str(output_script_path),
|
|
258
|
+
"status": "running",
|
|
259
|
+
"exit_code": None,
|
|
260
|
+
"steps_executed": 0,
|
|
261
|
+
"last_error": "",
|
|
262
|
+
"started_at": _now_iso(),
|
|
263
|
+
"finished_at": None,
|
|
264
|
+
"artifacts_path": str(self._artifacts_file),
|
|
265
|
+
}
|
|
266
|
+
self._write_json(self._artifacts_file, self._artifacts)
|
|
267
|
+
self._write_json(self._summary_file, self._summary)
|
|
268
|
+
|
|
269
|
+
def update_summary(self, **fields: Any) -> None:
|
|
270
|
+
self._summary.update(fields)
|
|
271
|
+
self._write_json(self._summary_file, self._summary)
|
|
272
|
+
|
|
273
|
+
def update_control_summary(self, **fields: Any) -> None:
|
|
274
|
+
control_summary = self._summary.setdefault("control_summary", {})
|
|
275
|
+
control_summary.update(fields)
|
|
276
|
+
self._write_json(self._summary_file, self._summary)
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def run_dir(self) -> Path:
|
|
280
|
+
return self._base_dir
|
|
281
|
+
|
|
282
|
+
def emit_event(self, event: str, **payload: Any) -> None:
|
|
283
|
+
record = {
|
|
284
|
+
"timestamp": _now_iso(),
|
|
285
|
+
"run_id": self.run_id,
|
|
286
|
+
"event": event,
|
|
287
|
+
**payload,
|
|
288
|
+
}
|
|
289
|
+
try:
|
|
290
|
+
with self._steps_file.open("a", encoding="utf-8") as f:
|
|
291
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
292
|
+
if self._json_output:
|
|
293
|
+
print(json.dumps(record, ensure_ascii=False), file=sys.stdout, flush=True)
|
|
294
|
+
except Exception as e:
|
|
295
|
+
log.warning(f"⚠️ [Warning] Failed to write run event: {e}")
|
|
296
|
+
|
|
297
|
+
def save_screenshot(
|
|
298
|
+
self, img_bytes: bytes, step_index: int, name: Optional[str] = None
|
|
299
|
+
) -> str:
|
|
300
|
+
if not img_bytes:
|
|
301
|
+
return ""
|
|
302
|
+
|
|
303
|
+
self._screenshots_dir.mkdir(parents=True, exist_ok=True)
|
|
304
|
+
file_name = name or f"step_{step_index:03d}.png"
|
|
305
|
+
screenshot_path = self._screenshots_dir / file_name
|
|
306
|
+
try:
|
|
307
|
+
screenshot_path.write_bytes(img_bytes)
|
|
308
|
+
artifact = {
|
|
309
|
+
"path": str(screenshot_path),
|
|
310
|
+
"step": step_index,
|
|
311
|
+
"name": file_name,
|
|
312
|
+
}
|
|
313
|
+
self._artifacts["screenshots"].append(artifact)
|
|
314
|
+
self._write_json(self._artifacts_file, self._artifacts)
|
|
315
|
+
self.emit_event(
|
|
316
|
+
"artifact_saved",
|
|
317
|
+
artifact_type="screenshot",
|
|
318
|
+
step=step_index,
|
|
319
|
+
path=str(screenshot_path),
|
|
320
|
+
)
|
|
321
|
+
return str(screenshot_path)
|
|
322
|
+
except Exception as e:
|
|
323
|
+
log.warning(f"⚠️ [Warning] Failed to save screenshot: {e}")
|
|
324
|
+
return ""
|
|
325
|
+
|
|
326
|
+
def finalize(
|
|
327
|
+
self,
|
|
328
|
+
status: str,
|
|
329
|
+
exit_code: int,
|
|
330
|
+
steps_executed: int,
|
|
331
|
+
last_error: str = "",
|
|
332
|
+
) -> None:
|
|
333
|
+
if self._finished:
|
|
334
|
+
return
|
|
335
|
+
|
|
336
|
+
pytest_asset = _build_pytest_asset(
|
|
337
|
+
self._output_script_path,
|
|
338
|
+
run_id=self.run_id,
|
|
339
|
+
platform=self._summary.get("platform", ""),
|
|
340
|
+
manifest_path=str(self._pytest_manifest_file),
|
|
341
|
+
)
|
|
342
|
+
self._summary["pytest_asset"] = pytest_asset
|
|
343
|
+
self._artifacts["generated_script"].update(
|
|
344
|
+
{
|
|
345
|
+
"pytest_target": pytest_asset["pytest_target"],
|
|
346
|
+
"pytest_command": pytest_asset["pytest_command"],
|
|
347
|
+
"manifest_path": pytest_asset["manifest_path"],
|
|
348
|
+
"exists": pytest_asset["exists"],
|
|
349
|
+
"replay_ready": pytest_asset["replay_ready"],
|
|
350
|
+
}
|
|
351
|
+
)
|
|
352
|
+
failure_analysis = _build_failure_analysis(
|
|
353
|
+
run_id=self.run_id,
|
|
354
|
+
platform=self._summary.get("platform", ""),
|
|
355
|
+
execution_mode=self._summary.get("execution_mode", "run"),
|
|
356
|
+
status=status,
|
|
357
|
+
exit_code=exit_code,
|
|
358
|
+
steps_executed=steps_executed,
|
|
359
|
+
last_error=last_error,
|
|
360
|
+
pytest_asset=pytest_asset,
|
|
361
|
+
)
|
|
362
|
+
self._summary["failure_analysis"] = failure_analysis
|
|
363
|
+
self._summary.update(
|
|
364
|
+
{
|
|
365
|
+
"status": status,
|
|
366
|
+
"exit_code": exit_code,
|
|
367
|
+
"steps_executed": steps_executed,
|
|
368
|
+
"last_error": last_error,
|
|
369
|
+
"finished_at": _now_iso(),
|
|
370
|
+
}
|
|
371
|
+
)
|
|
372
|
+
try:
|
|
373
|
+
from common.case_memory import CaseMemoryStore
|
|
374
|
+
|
|
375
|
+
step_records = []
|
|
376
|
+
if self._steps_file.exists():
|
|
377
|
+
with self._steps_file.open("r", encoding="utf-8") as f:
|
|
378
|
+
for line in f:
|
|
379
|
+
line = line.strip()
|
|
380
|
+
if not line:
|
|
381
|
+
continue
|
|
382
|
+
step_records.append(json.loads(line))
|
|
383
|
+
self._summary["case_memory_entry"] = CaseMemoryStore().upsert_from_run(
|
|
384
|
+
self._summary,
|
|
385
|
+
step_records,
|
|
386
|
+
)
|
|
387
|
+
except Exception as e:
|
|
388
|
+
log.warning(f"⚠️ [Warning] Failed to update case memory: {e}")
|
|
389
|
+
replay_manifest = _build_pytest_replay_manifest(
|
|
390
|
+
summary=self._summary,
|
|
391
|
+
pytest_asset=pytest_asset,
|
|
392
|
+
failure_analysis=failure_analysis,
|
|
393
|
+
summary_path=self._summary_file,
|
|
394
|
+
artifacts_path=self._artifacts_file,
|
|
395
|
+
)
|
|
396
|
+
self._artifacts["pytest_manifest"]["exists"] = True
|
|
397
|
+
self._write_json(self._summary_file, self._summary)
|
|
398
|
+
self._write_json(self._artifacts_file, self._artifacts)
|
|
399
|
+
self._write_json(self._pytest_manifest_file, replay_manifest)
|
|
400
|
+
self.emit_event(
|
|
401
|
+
"run_finished",
|
|
402
|
+
status=status,
|
|
403
|
+
exit_code=exit_code,
|
|
404
|
+
steps_executed=steps_executed,
|
|
405
|
+
last_error=last_error,
|
|
406
|
+
)
|
|
407
|
+
self._finished = True
|
|
408
|
+
|
|
409
|
+
@staticmethod
|
|
410
|
+
def _write_json(file_path: Path, data: Dict[str, Any]) -> None:
|
|
411
|
+
tmp_path = file_path.with_suffix(file_path.suffix + ".tmp")
|
|
412
|
+
tmp_path.write_text(
|
|
413
|
+
json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8"
|
|
414
|
+
)
|
|
415
|
+
tmp_path.replace(file_path)
|
common/run_resume.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RunContextLoadError(Exception):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _read_summary(summary_file: Path) -> Dict[str, Any]:
|
|
11
|
+
if not summary_file.exists():
|
|
12
|
+
raise RunContextLoadError(f"No recoverable run record found: {summary_file.parent}")
|
|
13
|
+
data: Dict[str, Any] = json.loads(summary_file.read_text(encoding="utf-8"))
|
|
14
|
+
return data
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _read_optional_json(file_path: Path) -> Dict[str, Any]:
|
|
18
|
+
if not file_path.exists():
|
|
19
|
+
return {}
|
|
20
|
+
data: Dict[str, Any] = json.loads(file_path.read_text(encoding="utf-8"))
|
|
21
|
+
return data
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _read_steps(steps_file: Path) -> List[Dict[str, object]]:
|
|
25
|
+
if not steps_file.exists():
|
|
26
|
+
return []
|
|
27
|
+
|
|
28
|
+
records: List[Dict[str, object]] = []
|
|
29
|
+
with steps_file.open("r", encoding="utf-8") as f:
|
|
30
|
+
for line in f:
|
|
31
|
+
line = line.strip()
|
|
32
|
+
if not line:
|
|
33
|
+
continue
|
|
34
|
+
records.append(json.loads(line))
|
|
35
|
+
return records
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _build_recommended_next_step(
|
|
39
|
+
failure_analysis: Dict[str, Any] | None,
|
|
40
|
+
resume_commands: Dict[str, Any] | None,
|
|
41
|
+
) -> Dict[str, Any] | None:
|
|
42
|
+
failure_analysis = failure_analysis or {}
|
|
43
|
+
if not failure_analysis:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
resume_commands = resume_commands or {}
|
|
47
|
+
recommended_mode = str(failure_analysis.get("recommended_mode", "")).strip()
|
|
48
|
+
if not recommended_mode:
|
|
49
|
+
category = str(failure_analysis.get("category", "")).strip()
|
|
50
|
+
if category in {"configuration", "environment_restricted"}:
|
|
51
|
+
recommended_mode = "doctor"
|
|
52
|
+
elif category == "stagnation":
|
|
53
|
+
recommended_mode = "plan_only"
|
|
54
|
+
else:
|
|
55
|
+
recommended_mode = "dry_run"
|
|
56
|
+
recommended_command = str(failure_analysis.get("recommended_command", "")).strip()
|
|
57
|
+
if not recommended_command and recommended_mode:
|
|
58
|
+
recommended_command = str(resume_commands.get(recommended_mode, "")).strip()
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
"category": failure_analysis.get("category", ""),
|
|
62
|
+
"stage": failure_analysis.get("stage", ""),
|
|
63
|
+
"summary": failure_analysis.get("summary", ""),
|
|
64
|
+
"retryable": failure_analysis.get("retryable"),
|
|
65
|
+
"recommended_mode": recommended_mode,
|
|
66
|
+
"recommended_command": recommended_command,
|
|
67
|
+
"recovery_hint": failure_analysis.get("recovery_hint", ""),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def load_run_context(run_dir: Path) -> Dict[str, Any]:
|
|
72
|
+
run_dir = Path(run_dir)
|
|
73
|
+
try:
|
|
74
|
+
summary = _read_summary(run_dir / "summary.json")
|
|
75
|
+
steps = _read_steps(run_dir / "steps.jsonl")
|
|
76
|
+
except RunContextLoadError:
|
|
77
|
+
raise
|
|
78
|
+
except Exception as exc:
|
|
79
|
+
raise RunContextLoadError(f"Failed to read run record: {run_dir}") from exc
|
|
80
|
+
|
|
81
|
+
successful_actions = [
|
|
82
|
+
item.get("action_description", "")
|
|
83
|
+
for item in steps
|
|
84
|
+
if item.get("event") == "action_executed"
|
|
85
|
+
and item.get("success") is True
|
|
86
|
+
and item.get("action_description")
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
latest_screenshot_path: str = ""
|
|
90
|
+
for item in steps:
|
|
91
|
+
if item.get("event") == "artifact_saved" and item.get("artifact_type") == "screenshot":
|
|
92
|
+
latest_screenshot_path = str(item.get("path", "") or latest_screenshot_path)
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
"run_id": summary.get("run_id", ""),
|
|
96
|
+
"goal": summary.get("goal", ""),
|
|
97
|
+
"platform": summary.get("platform", ""),
|
|
98
|
+
"env": summary.get("env", ""),
|
|
99
|
+
"status": summary.get("status", ""),
|
|
100
|
+
"last_error": summary.get("last_error", ""),
|
|
101
|
+
"failure_analysis": summary.get("failure_analysis", {}) or {},
|
|
102
|
+
"pytest_asset": summary.get("pytest_asset", {}) or {},
|
|
103
|
+
"control_summary": summary.get("control_summary", {}) or {},
|
|
104
|
+
"successful_actions": successful_actions,
|
|
105
|
+
"latest_screenshot_path": latest_screenshot_path,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def load_run_bundle(run_dir: Path) -> Dict[str, Any]:
|
|
110
|
+
run_dir = Path(run_dir)
|
|
111
|
+
summary_file = run_dir / "summary.json"
|
|
112
|
+
artifacts_file = run_dir / "artifacts.json"
|
|
113
|
+
pytest_replay_file = run_dir / "pytest_replay.json"
|
|
114
|
+
|
|
115
|
+
summary = _read_summary(summary_file)
|
|
116
|
+
artifacts = _read_optional_json(artifacts_file)
|
|
117
|
+
pytest_replay = _read_optional_json(pytest_replay_file)
|
|
118
|
+
resume_context = load_run_context(run_dir)
|
|
119
|
+
|
|
120
|
+
pytest_asset: Dict[str, Any] = summary.get("pytest_asset", {}) or {}
|
|
121
|
+
failure_analysis: Dict[str, Any] | None = summary.get("failure_analysis", None)
|
|
122
|
+
resume_commands: Dict[str, Any] = {
|
|
123
|
+
**(pytest_asset.get("resume_commands", {}) or {}),
|
|
124
|
+
**(pytest_replay.get("resume_commands", {}) or {}),
|
|
125
|
+
}
|
|
126
|
+
run_assets: Dict[str, Any] = {
|
|
127
|
+
"summary_path": str(summary_file),
|
|
128
|
+
"artifacts_path": str(artifacts_file) if artifacts_file.exists() else "",
|
|
129
|
+
"pytest_replay_path": str(pytest_replay_file)
|
|
130
|
+
if pytest_replay_file.exists()
|
|
131
|
+
else str(pytest_asset.get("manifest_path", "") or ""),
|
|
132
|
+
"failure_analysis": failure_analysis,
|
|
133
|
+
"pytest_asset": pytest_asset,
|
|
134
|
+
"resume_commands": resume_commands,
|
|
135
|
+
"recommended_next_step": _build_recommended_next_step(
|
|
136
|
+
failure_analysis,
|
|
137
|
+
resume_commands,
|
|
138
|
+
),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
"run_id": summary.get("run_id", ""),
|
|
143
|
+
"run_dir": str(run_dir),
|
|
144
|
+
"summary": summary,
|
|
145
|
+
"artifacts": artifacts,
|
|
146
|
+
"pytest_replay": pytest_replay,
|
|
147
|
+
"resume_context": resume_context,
|
|
148
|
+
"run_assets": run_assets,
|
|
149
|
+
}
|
common/runtime_modes.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
MODE_RUN = "run"
|
|
2
|
+
MODE_DOCTOR = "doctor"
|
|
3
|
+
MODE_PLAN_ONLY = "plan_only"
|
|
4
|
+
MODE_DRY_RUN = "dry_run"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def validate_mode_conflicts(
|
|
8
|
+
doctor: bool,
|
|
9
|
+
plan_only: bool,
|
|
10
|
+
dry_run: bool,
|
|
11
|
+
) -> None:
|
|
12
|
+
if doctor and (plan_only or dry_run):
|
|
13
|
+
raise ValueError("--doctor cannot be combined with --plan-only or --dry-run")
|
|
14
|
+
if plan_only and dry_run:
|
|
15
|
+
raise ValueError("--plan-only and --dry-run are mutually exclusive")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def resolve_execution_mode(
|
|
19
|
+
doctor: bool,
|
|
20
|
+
plan_only: bool,
|
|
21
|
+
dry_run: bool,
|
|
22
|
+
) -> str:
|
|
23
|
+
validate_mode_conflicts(
|
|
24
|
+
doctor=doctor,
|
|
25
|
+
plan_only=plan_only,
|
|
26
|
+
dry_run=dry_run,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if doctor:
|
|
30
|
+
return MODE_DOCTOR
|
|
31
|
+
if plan_only:
|
|
32
|
+
return MODE_PLAN_ONLY
|
|
33
|
+
if dry_run:
|
|
34
|
+
return MODE_DRY_RUN
|
|
35
|
+
return MODE_RUN
|