agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from .actual_outputs_normalizer import ActualOutputsNormalizationContext, repair_actual_outputs_file, write_actual_outputs_normalization
|
|
11
|
+
from .contract_diagnostics import build_contract_diagnostics, diagnostics_text
|
|
12
|
+
from .codex_runner import (
|
|
13
|
+
AttemptResult,
|
|
14
|
+
_attempt_dir,
|
|
15
|
+
_copy_attempt_inputs,
|
|
16
|
+
_deliverables,
|
|
17
|
+
ensure_attempt_outputs,
|
|
18
|
+
)
|
|
19
|
+
from .config import get_settings
|
|
20
|
+
from .command_discovery import resolve_command
|
|
21
|
+
from .env import redact_secrets
|
|
22
|
+
from .io import write_json
|
|
23
|
+
from .recipes import WORKER_AGENT_RECIPES
|
|
24
|
+
from .schemas import ActualOutputs, RawTaskRecord, TaskIntakeSpec
|
|
25
|
+
from .trace_prompt import build_worker_prompt
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class AgentInvocation:
|
|
30
|
+
argv: list[str]
|
|
31
|
+
mode: str
|
|
32
|
+
unsupported_reason: str | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _help_text(command: str, *args: str) -> str:
|
|
36
|
+
try:
|
|
37
|
+
cp = subprocess.run([command, *args, "--help"], cwd=None, text=True, capture_output=True, timeout=5)
|
|
38
|
+
return f"{cp.stdout or ''}\n{cp.stderr or ''}"
|
|
39
|
+
except Exception:
|
|
40
|
+
return ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _first_json_array(text: str) -> list[object] | None:
|
|
44
|
+
decoder = json.JSONDecoder()
|
|
45
|
+
start = text.find("[")
|
|
46
|
+
while start != -1:
|
|
47
|
+
try:
|
|
48
|
+
value, _ = decoder.raw_decode(text[start:])
|
|
49
|
+
except json.JSONDecodeError:
|
|
50
|
+
start = text.find("[", start + 1)
|
|
51
|
+
continue
|
|
52
|
+
return value if isinstance(value, list) else None
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _openclaw_default_agent_id(command: str) -> str | None:
|
|
57
|
+
try:
|
|
58
|
+
cp = subprocess.run([command, "agents", "list", "--json"], cwd=None, text=True, capture_output=True, timeout=5)
|
|
59
|
+
except Exception:
|
|
60
|
+
return None
|
|
61
|
+
if cp.returncode != 0:
|
|
62
|
+
return None
|
|
63
|
+
agents = _first_json_array(f"{cp.stdout or ''}\n{cp.stderr or ''}")
|
|
64
|
+
if not agents:
|
|
65
|
+
return None
|
|
66
|
+
dict_agents = [a for a in agents if isinstance(a, dict) and a.get("id")]
|
|
67
|
+
for agent in dict_agents:
|
|
68
|
+
if agent.get("isDefault") is True:
|
|
69
|
+
return str(agent["id"])
|
|
70
|
+
return str(dict_agents[0]["id"]) if dict_agents else None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _has_any(text: str, *needles: str) -> bool:
|
|
74
|
+
low = text.lower()
|
|
75
|
+
return any(n.lower() in low for n in needles)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _has_subcommand(help_text: str, command_name: str) -> bool:
|
|
79
|
+
pattern = rf"(?m)^\s{{1,}}{re.escape(command_name)}(?:\s|$)"
|
|
80
|
+
return (
|
|
81
|
+
bool(re.search(pattern, help_text))
|
|
82
|
+
or bool(re.search(rf"(?m)^\s*{re.escape(command_name)}\s+\*", help_text))
|
|
83
|
+
or bool(re.search(rf"(?m)^\s*\S+\s+{re.escape(command_name)}(?:\s|$)", help_text))
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _with_workspace(argv: list[str], help_text: str, workspace: Path) -> list[str]:
|
|
88
|
+
if "--workspace" in help_text:
|
|
89
|
+
return [*argv, "--workspace", str(workspace)]
|
|
90
|
+
if "--cwd" in help_text:
|
|
91
|
+
return [*argv, "--cwd", str(workspace)]
|
|
92
|
+
if "--cd" in help_text:
|
|
93
|
+
return [*argv, "--cd", str(workspace)]
|
|
94
|
+
if "--dir" in help_text:
|
|
95
|
+
return [*argv, "--dir", str(workspace)]
|
|
96
|
+
return argv
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def build_agent_invocation(agent_id: str, command: str, workspace: Path, prompt_file: Path, prompt: str) -> AgentInvocation:
|
|
100
|
+
help_text = _help_text(command)
|
|
101
|
+
if agent_id == "cursor":
|
|
102
|
+
base = _with_workspace([command], help_text, workspace)
|
|
103
|
+
if "--trust" in help_text:
|
|
104
|
+
base.append("--trust")
|
|
105
|
+
if "--force" in help_text:
|
|
106
|
+
base.append("--force")
|
|
107
|
+
if "--prompt-file" in help_text:
|
|
108
|
+
return AgentInvocation([*base, "--prompt-file", str(prompt_file)], "cursor-agent --prompt-file")
|
|
109
|
+
if "--prompt" in help_text:
|
|
110
|
+
return AgentInvocation([*base, "--prompt", prompt], "cursor-agent --prompt")
|
|
111
|
+
if " -p" in help_text or "-p," in help_text:
|
|
112
|
+
return AgentInvocation([*base, "-p", prompt], "cursor-agent -p")
|
|
113
|
+
if _has_subcommand(help_text, "run"):
|
|
114
|
+
run_help = _help_text(command, "run")
|
|
115
|
+
if "--prompt-file" in run_help:
|
|
116
|
+
return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), "--prompt-file", str(prompt_file)], "cursor-agent run --prompt-file")
|
|
117
|
+
return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), prompt], "cursor-agent run")
|
|
118
|
+
return AgentInvocation([], "cursor-agent", "Cursor headless mode unavailable: cursor-agent help did not expose --prompt-file, --prompt, -p, or run.")
|
|
119
|
+
|
|
120
|
+
if agent_id == "claude-code":
|
|
121
|
+
base = [command]
|
|
122
|
+
if "--permission-mode" in help_text:
|
|
123
|
+
base.extend(["--permission-mode", "bypassPermissions"])
|
|
124
|
+
if "--output-format" in help_text:
|
|
125
|
+
base.extend(["--output-format", "text"])
|
|
126
|
+
if "--max-budget-usd" in help_text:
|
|
127
|
+
base.extend(["--max-budget-usd", "1"])
|
|
128
|
+
if " -p" in help_text or "-p," in help_text:
|
|
129
|
+
return AgentInvocation([*base, "-p", prompt], "claude -p")
|
|
130
|
+
if "--print" in help_text:
|
|
131
|
+
return AgentInvocation([*base, "--print", prompt], "claude --print")
|
|
132
|
+
return AgentInvocation([], "claude", "Claude Code headless mode unavailable: claude --help did not expose -p or --print.")
|
|
133
|
+
|
|
134
|
+
if agent_id == "opencode":
|
|
135
|
+
run_help = _help_text(command, "run") if _has_subcommand(help_text, "run") else ""
|
|
136
|
+
if run_help:
|
|
137
|
+
if "--prompt-file" in run_help:
|
|
138
|
+
return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), "--prompt-file", str(prompt_file)], "opencode run --prompt-file")
|
|
139
|
+
if "--prompt" in run_help:
|
|
140
|
+
return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), "--prompt", prompt], "opencode run --prompt")
|
|
141
|
+
return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), prompt], "opencode run")
|
|
142
|
+
return AgentInvocation([], "opencode", "OpenCode headless mode unavailable: opencode --help did not expose a run command.")
|
|
143
|
+
|
|
144
|
+
if agent_id == "openclaw":
|
|
145
|
+
if _has_subcommand(help_text, "agent"):
|
|
146
|
+
agent_help = _help_text(command, "agent")
|
|
147
|
+
if "--message" in agent_help or "-m," in agent_help:
|
|
148
|
+
settings = get_settings()
|
|
149
|
+
agent_ref = settings.worker_agent_model or _openclaw_default_agent_id(command)
|
|
150
|
+
if not agent_ref and "--agent" in agent_help:
|
|
151
|
+
return AgentInvocation(
|
|
152
|
+
[],
|
|
153
|
+
"openclaw agent",
|
|
154
|
+
"OpenClaw setup required: no configured OpenClaw agent was found. Run `openclaw setup` or `openclaw agents add`, then rerun the smoke.",
|
|
155
|
+
)
|
|
156
|
+
argv = [command, "agent"]
|
|
157
|
+
if "--local" in agent_help:
|
|
158
|
+
argv.append("--local")
|
|
159
|
+
if "--json" in agent_help:
|
|
160
|
+
argv.append("--json")
|
|
161
|
+
if "--agent" in agent_help and agent_ref:
|
|
162
|
+
argv.extend(["--agent", agent_ref])
|
|
163
|
+
if "--timeout" in agent_help:
|
|
164
|
+
argv.extend(["--timeout", str(settings.task_timeout_seconds)])
|
|
165
|
+
argv.extend(["--message", prompt])
|
|
166
|
+
return AgentInvocation(argv, "openclaw agent --local --message")
|
|
167
|
+
return AgentInvocation([], "openclaw agent", "OpenClaw headless execution is unavailable in this installed version: openclaw agent --help did not expose --message.")
|
|
168
|
+
for sub in ("run", "exec", "session"):
|
|
169
|
+
if _has_subcommand(help_text, sub):
|
|
170
|
+
sub_help = _help_text(command, sub)
|
|
171
|
+
if "--prompt-file" in sub_help:
|
|
172
|
+
return AgentInvocation([*_with_workspace([command, sub], sub_help, workspace), "--prompt-file", str(prompt_file)], f"openclaw {sub} --prompt-file")
|
|
173
|
+
if "--prompt" in sub_help:
|
|
174
|
+
return AgentInvocation([*_with_workspace([command, sub], sub_help, workspace), "--prompt", prompt], f"openclaw {sub} --prompt")
|
|
175
|
+
return AgentInvocation([*_with_workspace([command, sub], sub_help, workspace), prompt], f"openclaw {sub}")
|
|
176
|
+
return AgentInvocation([], "openclaw", "OpenClaw headless execution is unavailable in this installed version: openclaw --help did not expose agent, run, exec, or session.")
|
|
177
|
+
|
|
178
|
+
if agent_id == "hermes-agent":
|
|
179
|
+
for sub in ("run", "chat"):
|
|
180
|
+
if _has_subcommand(help_text, sub):
|
|
181
|
+
sub_help = _help_text(command, sub)
|
|
182
|
+
base = [command, sub]
|
|
183
|
+
if "--quiet" in sub_help:
|
|
184
|
+
base.append("--quiet")
|
|
185
|
+
if "--yolo" in sub_help:
|
|
186
|
+
base.append("--yolo")
|
|
187
|
+
if "--max-turns" in sub_help:
|
|
188
|
+
base.extend(["--max-turns", "12"])
|
|
189
|
+
if "--prompt-file" in sub_help:
|
|
190
|
+
return AgentInvocation([*_with_workspace(base, sub_help, workspace), "--prompt-file", str(prompt_file)], f"hermes {sub} --prompt-file")
|
|
191
|
+
if "--prompt" in sub_help:
|
|
192
|
+
return AgentInvocation([*_with_workspace(base, sub_help, workspace), "--prompt", prompt], f"hermes {sub} --prompt")
|
|
193
|
+
if "--query" in sub_help:
|
|
194
|
+
return AgentInvocation([*_with_workspace(base, sub_help, workspace), "--query", prompt], f"hermes {sub} --query")
|
|
195
|
+
if " -q" in sub_help or "-q," in sub_help:
|
|
196
|
+
return AgentInvocation([*_with_workspace(base, sub_help, workspace), "-q", prompt], f"hermes {sub} -q")
|
|
197
|
+
return AgentInvocation([*_with_workspace(base, sub_help, workspace), prompt], f"hermes {sub}")
|
|
198
|
+
return AgentInvocation([], "hermes", "Hermes Agent headless mode unavailable: hermes --help did not expose run or chat.")
|
|
199
|
+
|
|
200
|
+
return AgentInvocation([], agent_id, f"Unsupported Apprentice Agent adapter: {agent_id}")
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def classify_agent_failure(agent_id: str, display_name: str, error: object | None, stdout: str = "", stderr: str = "", returncode: int | None = None) -> str | None:
|
|
204
|
+
text = f"{error or ''}\n{stdout or ''}\n{stderr or ''}".lower()
|
|
205
|
+
command = WORKER_AGENT_RECIPES.get(agent_id).command_name if agent_id in WORKER_AGENT_RECIPES else agent_id
|
|
206
|
+
if isinstance(error, FileNotFoundError) or f"no such file or directory: '{command}'" in text or f"no such file or directory: {command}" in text:
|
|
207
|
+
return f"Apprentice Agent command not found: {command}"
|
|
208
|
+
if "headless mode unavailable" in text or "headless execution is unavailable" in text:
|
|
209
|
+
return str(error or stderr).strip()
|
|
210
|
+
if isinstance(error, subprocess.TimeoutExpired) or "timed out" in text or "timeout" in text:
|
|
211
|
+
return f"Apprentice Agent attempt timed out while running {display_name}."
|
|
212
|
+
if _has_any(text, "eperm", "permission denied", "operation not permitted", "read-only file system"):
|
|
213
|
+
return f"Apprentice Agent permission error while running {display_name}: the external CLI could not access a required file or directory."
|
|
214
|
+
if _has_any(
|
|
215
|
+
text,
|
|
216
|
+
"not authenticated",
|
|
217
|
+
"not logged in",
|
|
218
|
+
"login required",
|
|
219
|
+
"please login",
|
|
220
|
+
"please log in",
|
|
221
|
+
"authentication failed",
|
|
222
|
+
"unauthorized",
|
|
223
|
+
"invalid api key",
|
|
224
|
+
"missing api key",
|
|
225
|
+
"api key missing",
|
|
226
|
+
"api key is missing",
|
|
227
|
+
"api key not configured",
|
|
228
|
+
"api key is not configured",
|
|
229
|
+
"provider api key is missing",
|
|
230
|
+
"google generative ai api key is missing",
|
|
231
|
+
):
|
|
232
|
+
return f"{display_name} setup required: authentication, API key, or account setup is required."
|
|
233
|
+
if _has_any(text, "setup required", "onboarding", "configure first", "model not configured", "provider not configured"):
|
|
234
|
+
return f"{display_name} setup required: complete the provider/model setup before running."
|
|
235
|
+
if agent_id == "openclaw" and _has_any(text, "unknown agent id", "pass --to", "session-id", "choose a session", "no configured openclaw agent"):
|
|
236
|
+
return "OpenClaw setup required: configure an OpenClaw agent with `openclaw setup` or `openclaw agents add`, then rerun the smoke."
|
|
237
|
+
if _has_any(text, "quota", "rate limit", "billing", "insufficient quota", "insufficient credits", "out of credits", "credit limit", "usage limit"):
|
|
238
|
+
return f"{display_name} provider quota or credit limit reached."
|
|
239
|
+
if returncode not in (None, 0):
|
|
240
|
+
return f"Apprentice Agent exited before producing required outputs (exit code {returncode})."
|
|
241
|
+
if error:
|
|
242
|
+
return f"Apprentice Agent operational error: {redact_secrets(str(error))}"
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def run_external_agent_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, attempt_kind: str = "baseline", timeout: int | None = None) -> AttemptResult:
|
|
247
|
+
settings = get_settings()
|
|
248
|
+
agent_id = settings.worker_agent
|
|
249
|
+
recipe = WORKER_AGENT_RECIPES[agent_id]
|
|
250
|
+
display = recipe.display_name
|
|
251
|
+
command = settings.worker_agent_command or recipe.command_name
|
|
252
|
+
d = _attempt_dir(package_root, attempt_kind)
|
|
253
|
+
input_files = _copy_attempt_inputs(package_root, d)
|
|
254
|
+
deliverables = _deliverables(raw, spec)
|
|
255
|
+
rubric_md = (package_root / "rubric" / "worker_visible_rubric.md").read_text() if (package_root / "rubric" / "worker_visible_rubric.md").exists() else ""
|
|
256
|
+
prompt = build_worker_prompt(spec.normalized_instruction, rubric_md, attempt_kind, input_files, deliverables, settings.sensitive_info_masking, workspace_path=str(d))
|
|
257
|
+
prompt_file = d / "prompt.md"
|
|
258
|
+
prompt_file.write_text(prompt)
|
|
259
|
+
|
|
260
|
+
run_error: object | None = None
|
|
261
|
+
returncode: int | None = None
|
|
262
|
+
stdout = ""
|
|
263
|
+
stderr = ""
|
|
264
|
+
invocation = AgentInvocation([], agent_id)
|
|
265
|
+
resolved_command = resolve_command(command)
|
|
266
|
+
if not resolved_command:
|
|
267
|
+
run_error = FileNotFoundError(command)
|
|
268
|
+
else:
|
|
269
|
+
invocation = build_agent_invocation(agent_id, resolved_command, d, prompt_file, prompt)
|
|
270
|
+
if invocation.unsupported_reason:
|
|
271
|
+
run_error = RuntimeError(invocation.unsupported_reason)
|
|
272
|
+
else:
|
|
273
|
+
try:
|
|
274
|
+
cp = subprocess.run(invocation.argv, cwd=d, text=True, capture_output=True, timeout=timeout or settings.task_timeout_seconds)
|
|
275
|
+
returncode = cp.returncode
|
|
276
|
+
stdout = cp.stdout or ""
|
|
277
|
+
stderr = cp.stderr or ""
|
|
278
|
+
if cp.returncode != 0:
|
|
279
|
+
run_error = RuntimeError(classify_agent_failure(agent_id, display, None, stdout, stderr, cp.returncode) or f"{display} exited with code {cp.returncode}.")
|
|
280
|
+
except Exception as exc:
|
|
281
|
+
run_error = exc
|
|
282
|
+
(d / "stdout.txt").write_text(redact_secrets(stdout))
|
|
283
|
+
(d / "stderr.txt").write_text(redact_secrets(stderr if stderr else str(run_error or "")))
|
|
284
|
+
(d / "final_message.txt").write_text(redact_secrets((stdout or stderr or str(run_error or ""))[-4000:]))
|
|
285
|
+
contract_missing_before_repair = not (d / "agent_trace.json").exists() or not (d / "actual_outputs.json").exists()
|
|
286
|
+
contract_diagnostics = None
|
|
287
|
+
if contract_missing_before_repair:
|
|
288
|
+
command_for_diagnostics = invocation.argv or [command]
|
|
289
|
+
contract_diagnostics = build_contract_diagnostics(
|
|
290
|
+
d,
|
|
291
|
+
command=command_for_diagnostics,
|
|
292
|
+
working_directory=d,
|
|
293
|
+
agent_display_name=display,
|
|
294
|
+
prompt=prompt,
|
|
295
|
+
)
|
|
296
|
+
with (d / "final_message.txt").open("a") as f:
|
|
297
|
+
f.write("\n\n" + diagnostics_text(contract_diagnostics))
|
|
298
|
+
|
|
299
|
+
trace, actual, trace_valid = ensure_attempt_outputs(package_root, spec, attempt_kind, prompt, agent_id, run_error if isinstance(run_error, Exception) else None)
|
|
300
|
+
if actual.metadata_json is None:
|
|
301
|
+
actual.metadata_json = {}
|
|
302
|
+
actual.metadata_json["apprentice_agent"] = agent_id
|
|
303
|
+
actual.metadata_json["apprentice_agent_display_name"] = display
|
|
304
|
+
actual.metadata_json["apprentice_agent_invocation"] = [part if part != prompt else "<prompt>" for part in invocation.argv]
|
|
305
|
+
actual.metadata_json["apprentice_agent_invocation_mode"] = invocation.mode
|
|
306
|
+
if contract_diagnostics:
|
|
307
|
+
actual.metadata_json["apprentice_agent_contract_diagnostics"] = contract_diagnostics
|
|
308
|
+
classified_error = classify_agent_failure(agent_id, display, run_error, stdout, stderr, returncode)
|
|
309
|
+
output_contract_error = (
|
|
310
|
+
f"Apprentice Agent output-contract failure: {display} did not produce required "
|
|
311
|
+
"agent_trace.json and actual_outputs.json."
|
|
312
|
+
)
|
|
313
|
+
if not trace_valid:
|
|
314
|
+
operational_prefixes = (
|
|
315
|
+
"Apprentice Agent command not found",
|
|
316
|
+
f"{display} setup required",
|
|
317
|
+
f"{display} provider quota",
|
|
318
|
+
"Apprentice Agent timed out",
|
|
319
|
+
"Apprentice Agent attempt timed out",
|
|
320
|
+
"Apprentice Agent permission error",
|
|
321
|
+
)
|
|
322
|
+
if classified_error and (classified_error.startswith(operational_prefixes) or "headless mode unavailable" in classified_error or "headless execution is unavailable" in classified_error):
|
|
323
|
+
op_error = classified_error
|
|
324
|
+
else:
|
|
325
|
+
op_error = output_contract_error
|
|
326
|
+
else:
|
|
327
|
+
op_error = classified_error
|
|
328
|
+
if op_error and returncode not in (None, 0) and trace_valid and actual.status == "success":
|
|
329
|
+
op_error = f"Apprentice Agent exited nonzero after producing required outputs (exit code {returncode})."
|
|
330
|
+
if op_error:
|
|
331
|
+
actual.metadata_json["apprentice_agent_operational_error"] = op_error
|
|
332
|
+
actual.error_message = op_error
|
|
333
|
+
write_json(d / "actual_outputs.json", actual)
|
|
334
|
+
|
|
335
|
+
trace.metadata_json["apprentice_agent"] = agent_id
|
|
336
|
+
trace.metadata_json["apprentice_agent_invocation"] = [part if part != prompt else "<prompt>" for part in invocation.argv]
|
|
337
|
+
trace.metadata_json["trace_valid"] = trace_valid
|
|
338
|
+
if op_error:
|
|
339
|
+
trace.metadata_json["apprentice_agent_operational_error"] = op_error
|
|
340
|
+
if contract_diagnostics:
|
|
341
|
+
trace.metadata_json["apprentice_agent_contract_diagnostics"] = contract_diagnostics
|
|
342
|
+
write_json(d / "agent_trace.json", trace)
|
|
343
|
+
actual_ctx = ActualOutputsNormalizationContext(task_id=spec.task_id, attempt_id=f"{spec.task_id}_{attempt_kind}", attempt_kind=attempt_kind, package_root=package_root, required_artifacts=deliverables)
|
|
344
|
+
actual_result = repair_actual_outputs_file(d / "actual_outputs.json", actual_ctx)
|
|
345
|
+
write_actual_outputs_normalization(d, actual_result)
|
|
346
|
+
if actual_result.actual_outputs is not None:
|
|
347
|
+
actual = ActualOutputs.model_validate(actual_result.actual_outputs)
|
|
348
|
+
return AttemptResult(attempt_dir=str(d), trace_valid=trace_valid, trace=trace, actual_outputs=actual, apprentice_agent=agent_id)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import mimetypes, shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from .schemas import ArtifactRef
|
|
5
|
+
from .io import sha256_file
|
|
6
|
+
from .env import contains_secret
|
|
7
|
+
|
|
8
|
+
def media_type_for(path: Path) -> str:
|
|
9
|
+
suf=path.suffix.lower()
|
|
10
|
+
if suf in ['.txt','.md','.log','.json','.jsonl','.csv','.yaml','.yml']: return 'text' if suf in ['.txt','.md','.log'] else 'data'
|
|
11
|
+
if suf in ['.py','.js','.ts','.sh','.html','.css']: return 'code'
|
|
12
|
+
return 'unknown'
|
|
13
|
+
|
|
14
|
+
def artifact_ref(path: Path, package_root: Path, task_id: str, attempt_id: str | None, kind='output', role='other') -> ArtifactRef:
|
|
15
|
+
rel = path.relative_to(package_root).as_posix() if path.is_relative_to(package_root) else path.name
|
|
16
|
+
text = path.read_text(errors='ignore') if path.exists() and path.stat().st_size < 2_000_000 else ''
|
|
17
|
+
return ArtifactRef(artifact_id=rel.replace('/','__'), task_id=task_id, attempt_id=attempt_id, artifact_kind=kind, artifact_role=role, workspace_path=str(path), package_relative_path=rel, release_relative_path=None, mime_type=mimetypes.guess_type(path.name)[0], media_type=media_type_for(path), size_bytes=path.stat().st_size if path.exists() else None, content_hash=sha256_file(path) if path.exists() else None, secret_scan_ok=not contains_secret(text), metadata_json={})
|
|
18
|
+
|
|
19
|
+
def copy_inputs(input_refs: list[str], dest: Path):
|
|
20
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
for ref in input_refs:
|
|
22
|
+
p=Path(ref)
|
|
23
|
+
if p.exists() and p.is_file(): shutil.copy2(p, dest/p.name)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import csv, json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from .io import sha256_file
|
|
6
|
+
|
|
7
|
+
TEXT_EXTS={'.txt','.md','.json','.jsonl','.csv','.py','.sh','.html','.xml','.xlsx'}
|
|
8
|
+
|
|
9
|
+
def _read_text(path: Path, limit: int) -> tuple[str, bool]:
|
|
10
|
+
text=path.read_text(errors='replace')
|
|
11
|
+
return text[:limit], len(text)>limit
|
|
12
|
+
|
|
13
|
+
def _preview_one(path: Path, package_ref: str, max_chars: int=4000, csv_rows: int=8) -> dict[str, Any]:
|
|
14
|
+
size=path.stat().st_size if path.exists() else None
|
|
15
|
+
out={'ref': package_ref, 'content_hash': ('sha256:' + sha256_file(path)) if path.exists() else None, 'size_bytes': size, 'media_type': 'unknown', 'preview': None, 'preview_truncated': False}
|
|
16
|
+
ext=path.suffix.lower()
|
|
17
|
+
if not path.exists():
|
|
18
|
+
out.update({'parse_status':'missing'}); return out
|
|
19
|
+
if ext not in TEXT_EXTS:
|
|
20
|
+
out.update({'kind':'binary','media_type':'binary','parse_status':'metadata_only'}); return out
|
|
21
|
+
out['media_type']='text'
|
|
22
|
+
try:
|
|
23
|
+
if ext == '.xlsx':
|
|
24
|
+
try:
|
|
25
|
+
from openpyxl import load_workbook # type: ignore
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
out.update({'kind':'xlsx','media_type':'data','parse_status':'openpyxl_unavailable','preview_error_message':str(exc)[:200]}); return out
|
|
28
|
+
wb=load_workbook(path, data_only=False, read_only=False)
|
|
29
|
+
sheets=[]; formulas=[]; important={'inputs','input','sensitivity','assumptions','summary','model','outputs'}
|
|
30
|
+
for ws in wb.worksheets[:6]:
|
|
31
|
+
rows=[]
|
|
32
|
+
for row in ws.iter_rows(min_row=1, max_row=min(ws.max_row or 1, 20), max_col=min(ws.max_column or 1, 8), values_only=False):
|
|
33
|
+
vals=[]
|
|
34
|
+
for cell in row:
|
|
35
|
+
val=cell.value
|
|
36
|
+
vals.append(val)
|
|
37
|
+
if isinstance(val, str) and val.startswith('=') and len(formulas) < 40:
|
|
38
|
+
formulas.append({'sheet': ws.title, 'cell': cell.coordinate, 'formula': val})
|
|
39
|
+
rows.append(vals)
|
|
40
|
+
sheets.append({'name': ws.title, 'max_row': ws.max_row, 'max_column': ws.max_column, 'headers': rows[0] if rows else [], 'first_rows': rows[1:]})
|
|
41
|
+
out.update({'kind':'xlsx','media_type':'data','parse_status':'parsed_xlsx','sheet_names': wb.sheetnames, 'sheets': sheets, 'formulas_detected': bool(formulas), 'formulas': formulas, 'important_sheet_presence': {name: any(name in s.lower() for s in wb.sheetnames) for name in sorted(important)}, 'preview': json.dumps({'sheet_names': wb.sheetnames, 'sheets': sheets, 'formulas': formulas[:10]}, default=str)[:max_chars], 'preview_truncated': len(wb.sheetnames)>6 or len(formulas)>=40})
|
|
42
|
+
elif ext == '.csv':
|
|
43
|
+
text, trunc=_read_text(path, max_chars)
|
|
44
|
+
with path.open(newline='', errors='replace') as f:
|
|
45
|
+
reader=csv.reader(f); rows=[]
|
|
46
|
+
for i,row in enumerate(reader):
|
|
47
|
+
rows.append(row)
|
|
48
|
+
if i >= csv_rows: break
|
|
49
|
+
out.update({'kind':'csv','media_type':'data','parse_status':'parsed_csv','columns': rows[0] if rows else [], 'row_count': max(0, sum(1 for _ in path.open(errors='replace'))-1), 'first_rows': rows[1:], 'preview': text, 'preview_truncated': trunc})
|
|
50
|
+
elif ext in {'.json','.jsonl'}:
|
|
51
|
+
text, trunc=_read_text(path, max_chars)
|
|
52
|
+
keys=[]; parse_status='parsed_json'
|
|
53
|
+
if ext == '.json':
|
|
54
|
+
try:
|
|
55
|
+
obj=json.loads(path.read_text(errors='replace'))
|
|
56
|
+
keys=sorted(obj.keys()) if isinstance(obj, dict) else []
|
|
57
|
+
except Exception:
|
|
58
|
+
parse_status='json_parse_error'
|
|
59
|
+
else:
|
|
60
|
+
parse_status='jsonl_preview'
|
|
61
|
+
out.update({'kind':'json' if ext == '.json' else 'jsonl','media_type':'data','parse_status':parse_status,'top_level_keys':keys,'preview':text,'preview_truncated':trunc})
|
|
62
|
+
else:
|
|
63
|
+
text, trunc=_read_text(path, max_chars)
|
|
64
|
+
out.update({'kind':'text','parse_status':'text_preview','preview':text,'preview_truncated':trunc})
|
|
65
|
+
except Exception as exc:
|
|
66
|
+
out.update({'parse_status':'preview_error','preview_error_type':type(exc).__name__,'preview_error_message':str(exc)[:300]})
|
|
67
|
+
return out
|
|
68
|
+
|
|
69
|
+
def build_artifact_previews(package_root: Path | None, refs: list[str], max_artifacts: int=12) -> dict[str, Any]:
|
|
70
|
+
if package_root is None:
|
|
71
|
+
return {'artifact_content_refs': [], 'artifact_content_previews': [], 'artifact_content_hashes': {}, 'artifact_content_preview_truncated': False, 'model_grading_basis': 'trace_only'}
|
|
72
|
+
previews=[]
|
|
73
|
+
for ref in list(dict.fromkeys(refs))[:max_artifacts]:
|
|
74
|
+
p=package_root/ref
|
|
75
|
+
if p.exists() and p.is_file():
|
|
76
|
+
previews.append(_preview_one(p, ref))
|
|
77
|
+
hashes={p['ref']:p.get('content_hash') for p in previews}
|
|
78
|
+
truncated=any(bool(p.get('preview_truncated')) for p in previews)
|
|
79
|
+
has_content=any(p.get('preview') for p in previews)
|
|
80
|
+
return {'artifact_content_refs':[p['ref'] for p in previews], 'artifact_content_previews': previews, 'artifact_content_hashes': hashes, 'artifact_content_preview_truncated': truncated, 'model_grading_basis': 'artifact_content' if has_content and not truncated else ('artifact_preview' if previews else 'trace_only')}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
ARTIFACT_EXTENSIONS = (
|
|
8
|
+
"csv",
|
|
9
|
+
"tsv",
|
|
10
|
+
"json",
|
|
11
|
+
"jsonl",
|
|
12
|
+
"xlsx",
|
|
13
|
+
"xls",
|
|
14
|
+
"md",
|
|
15
|
+
"txt",
|
|
16
|
+
"pdf",
|
|
17
|
+
"docx",
|
|
18
|
+
"pptx",
|
|
19
|
+
"html",
|
|
20
|
+
"xml",
|
|
21
|
+
"yaml",
|
|
22
|
+
"yml",
|
|
23
|
+
"py",
|
|
24
|
+
"js",
|
|
25
|
+
"ts",
|
|
26
|
+
"sql",
|
|
27
|
+
"zip",
|
|
28
|
+
"png",
|
|
29
|
+
"jpg",
|
|
30
|
+
"jpeg",
|
|
31
|
+
"webp",
|
|
32
|
+
"gif",
|
|
33
|
+
"svg",
|
|
34
|
+
"ipynb",
|
|
35
|
+
"log",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def normalize_artifact_ref(ref: object) -> str:
|
|
40
|
+
"""Normalize display-style artifact evidence refs back to real paths."""
|
|
41
|
+
|
|
42
|
+
value = "" if ref is None else str(ref)
|
|
43
|
+
value = value.strip().strip('"').strip("'").replace("\\", "/").strip()
|
|
44
|
+
|
|
45
|
+
for prefix in (
|
|
46
|
+
"artifact_content_previews_truncated:",
|
|
47
|
+
"artifact_content_preview_truncated:",
|
|
48
|
+
"artifact_content_previews:",
|
|
49
|
+
"artifact_content_preview:",
|
|
50
|
+
"artifact_previews_truncated:",
|
|
51
|
+
"artifact_preview_truncated:",
|
|
52
|
+
"artifact_previews:",
|
|
53
|
+
"artifact_preview:",
|
|
54
|
+
):
|
|
55
|
+
if value.startswith(prefix):
|
|
56
|
+
value = value[len(prefix):].strip()
|
|
57
|
+
|
|
58
|
+
match = re.search(
|
|
59
|
+
r"((?:packages/[^/\s]+/)?attempts/[^/\s]+/artifacts/[^\s\)\],;]+|artifacts/[^\s\)\],;]+)",
|
|
60
|
+
value,
|
|
61
|
+
)
|
|
62
|
+
if match:
|
|
63
|
+
value = match.group(1).strip()
|
|
64
|
+
|
|
65
|
+
for marker in (
|
|
66
|
+
" (parse_error:",
|
|
67
|
+
" (preview_error:",
|
|
68
|
+
" (read_error:",
|
|
69
|
+
" (open_error:",
|
|
70
|
+
" (parse_status:",
|
|
71
|
+
" (text_preview)",
|
|
72
|
+
" (content_preview)",
|
|
73
|
+
" (artifact_preview)",
|
|
74
|
+
" (binary_preview)",
|
|
75
|
+
" (sheet_preview)",
|
|
76
|
+
" (truncated_preview)",
|
|
77
|
+
" (preview_truncated)",
|
|
78
|
+
" (truncated)",
|
|
79
|
+
" (preview)",
|
|
80
|
+
):
|
|
81
|
+
if marker in value:
|
|
82
|
+
value = value.split(marker, 1)[0].strip()
|
|
83
|
+
|
|
84
|
+
if "/artifacts/" in value or value.startswith("artifacts/"):
|
|
85
|
+
extensions = "|".join(re.escape(ext) for ext in ARTIFACT_EXTENSIONS)
|
|
86
|
+
ext_match = re.match(
|
|
87
|
+
rf"^(.*?\.({extensions}))(?:[:#_\s\)].*)?$",
|
|
88
|
+
value,
|
|
89
|
+
flags=re.IGNORECASE,
|
|
90
|
+
)
|
|
91
|
+
if ext_match:
|
|
92
|
+
value = ext_match.group(1).strip()
|
|
93
|
+
|
|
94
|
+
return value
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def is_artifact_evidence_ref(ref: object) -> bool:
|
|
98
|
+
raw = str(ref).strip()
|
|
99
|
+
normalized = normalize_artifact_ref(raw)
|
|
100
|
+
metadata_prefixes = (
|
|
101
|
+
"trace_summary_json:",
|
|
102
|
+
"actual_outputs:",
|
|
103
|
+
"actual_outputs.",
|
|
104
|
+
"rubric:",
|
|
105
|
+
"rubric.",
|
|
106
|
+
"score:",
|
|
107
|
+
"score.",
|
|
108
|
+
"metadata_json:",
|
|
109
|
+
"metadata_json.",
|
|
110
|
+
)
|
|
111
|
+
if raw.startswith(metadata_prefixes) or normalized.startswith(metadata_prefixes):
|
|
112
|
+
return False
|
|
113
|
+
return (
|
|
114
|
+
("attempts/" in normalized and "/artifacts/" in normalized)
|
|
115
|
+
or normalized.startswith("artifacts/")
|
|
116
|
+
or ("packages/" in normalized and "/attempts/" in normalized and "/artifacts/" in normalized)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def artifact_ref_resolves(ref: object, existing_refs: set[str]) -> bool:
|
|
121
|
+
normalized = normalize_artifact_ref(ref).replace("\\", "/").lstrip("/")
|
|
122
|
+
if not normalized:
|
|
123
|
+
return False
|
|
124
|
+
existing_normalized = {str(x).replace("\\", "/").lstrip("/") for x in existing_refs if x}
|
|
125
|
+
if normalized in existing_normalized:
|
|
126
|
+
return True
|
|
127
|
+
return any(
|
|
128
|
+
existing.endswith("/" + normalized)
|
|
129
|
+
or normalized.endswith("/" + existing)
|
|
130
|
+
for existing in existing_normalized
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def artifact_ref_candidates(ref: object) -> set[str]:
|
|
135
|
+
normalized = normalize_artifact_ref(ref).replace("\\", "/").lstrip("/")
|
|
136
|
+
if not normalized:
|
|
137
|
+
return set()
|
|
138
|
+
candidates = {normalized}
|
|
139
|
+
if "/artifacts/" in normalized:
|
|
140
|
+
candidates.add(normalized.split("/artifacts/", 1)[1])
|
|
141
|
+
candidates.add(Path(normalized).name)
|
|
142
|
+
return {c for c in candidates if c}
|