agent-apprenticeship 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +217 -0
  3. package/bin/agent-apprenticeship.js +131 -0
  4. package/package.json +30 -0
  5. package/pyproject.toml +23 -0
  6. package/src/agent_apprenticeship_trace/__init__.py +2 -0
  7. package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
  8. package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
  9. package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
  10. package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
  11. package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
  12. package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
  13. package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
  14. package/src/agent_apprenticeship_trace/certification.py +580 -0
  15. package/src/agent_apprenticeship_trace/cli.py +2979 -0
  16. package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
  17. package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
  18. package/src/agent_apprenticeship_trace/config.py +609 -0
  19. package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
  20. package/src/agent_apprenticeship_trace/env.py +46 -0
  21. package/src/agent_apprenticeship_trace/evaluator.py +64 -0
  22. package/src/agent_apprenticeship_trace/grader.py +194 -0
  23. package/src/agent_apprenticeship_trace/integration_status.py +193 -0
  24. package/src/agent_apprenticeship_trace/io.py +20 -0
  25. package/src/agent_apprenticeship_trace/learning.py +627 -0
  26. package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
  27. package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
  28. package/src/agent_apprenticeship_trace/loop.py +111 -0
  29. package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
  30. package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
  31. package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
  32. package/src/agent_apprenticeship_trace/progress.py +223 -0
  33. package/src/agent_apprenticeship_trace/public_run.py +1109 -0
  34. package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
  35. package/src/agent_apprenticeship_trace/recipes.py +129 -0
  36. package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
  37. package/src/agent_apprenticeship_trace/revision.py +21 -0
  38. package/src/agent_apprenticeship_trace/role_runners.py +7 -0
  39. package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
  40. package/src/agent_apprenticeship_trace/schemas.py +273 -0
  41. package/src/agent_apprenticeship_trace/session_events.py +99 -0
  42. package/src/agent_apprenticeship_trace/task_intake.py +112 -0
  43. package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
  44. package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
  45. package/src/agent_apprenticeship_trace/training_signals.py +30 -0
  46. package/src/agent_apprenticeship_trace/validation.py +210 -0
  47. package/src/agent_apprenticeship_trace/verifier.py +55 -0
@@ -0,0 +1,348 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import shutil
5
+ import subprocess
6
+ import re
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ from .actual_outputs_normalizer import ActualOutputsNormalizationContext, repair_actual_outputs_file, write_actual_outputs_normalization
11
+ from .contract_diagnostics import build_contract_diagnostics, diagnostics_text
12
+ from .codex_runner import (
13
+ AttemptResult,
14
+ _attempt_dir,
15
+ _copy_attempt_inputs,
16
+ _deliverables,
17
+ ensure_attempt_outputs,
18
+ )
19
+ from .config import get_settings
20
+ from .command_discovery import resolve_command
21
+ from .env import redact_secrets
22
+ from .io import write_json
23
+ from .recipes import WORKER_AGENT_RECIPES
24
+ from .schemas import ActualOutputs, RawTaskRecord, TaskIntakeSpec
25
+ from .trace_prompt import build_worker_prompt
26
+
27
+
28
+ @dataclass
29
+ class AgentInvocation:
30
+ argv: list[str]
31
+ mode: str
32
+ unsupported_reason: str | None = None
33
+
34
+
35
+ def _help_text(command: str, *args: str) -> str:
36
+ try:
37
+ cp = subprocess.run([command, *args, "--help"], cwd=None, text=True, capture_output=True, timeout=5)
38
+ return f"{cp.stdout or ''}\n{cp.stderr or ''}"
39
+ except Exception:
40
+ return ""
41
+
42
+
43
+ def _first_json_array(text: str) -> list[object] | None:
44
+ decoder = json.JSONDecoder()
45
+ start = text.find("[")
46
+ while start != -1:
47
+ try:
48
+ value, _ = decoder.raw_decode(text[start:])
49
+ except json.JSONDecodeError:
50
+ start = text.find("[", start + 1)
51
+ continue
52
+ return value if isinstance(value, list) else None
53
+ return None
54
+
55
+
56
+ def _openclaw_default_agent_id(command: str) -> str | None:
57
+ try:
58
+ cp = subprocess.run([command, "agents", "list", "--json"], cwd=None, text=True, capture_output=True, timeout=5)
59
+ except Exception:
60
+ return None
61
+ if cp.returncode != 0:
62
+ return None
63
+ agents = _first_json_array(f"{cp.stdout or ''}\n{cp.stderr or ''}")
64
+ if not agents:
65
+ return None
66
+ dict_agents = [a for a in agents if isinstance(a, dict) and a.get("id")]
67
+ for agent in dict_agents:
68
+ if agent.get("isDefault") is True:
69
+ return str(agent["id"])
70
+ return str(dict_agents[0]["id"]) if dict_agents else None
71
+
72
+
73
+ def _has_any(text: str, *needles: str) -> bool:
74
+ low = text.lower()
75
+ return any(n.lower() in low for n in needles)
76
+
77
+
78
+ def _has_subcommand(help_text: str, command_name: str) -> bool:
79
+ pattern = rf"(?m)^\s{{1,}}{re.escape(command_name)}(?:\s|$)"
80
+ return (
81
+ bool(re.search(pattern, help_text))
82
+ or bool(re.search(rf"(?m)^\s*{re.escape(command_name)}\s+\*", help_text))
83
+ or bool(re.search(rf"(?m)^\s*\S+\s+{re.escape(command_name)}(?:\s|$)", help_text))
84
+ )
85
+
86
+
87
+ def _with_workspace(argv: list[str], help_text: str, workspace: Path) -> list[str]:
88
+ if "--workspace" in help_text:
89
+ return [*argv, "--workspace", str(workspace)]
90
+ if "--cwd" in help_text:
91
+ return [*argv, "--cwd", str(workspace)]
92
+ if "--cd" in help_text:
93
+ return [*argv, "--cd", str(workspace)]
94
+ if "--dir" in help_text:
95
+ return [*argv, "--dir", str(workspace)]
96
+ return argv
97
+
98
+
99
+ def build_agent_invocation(agent_id: str, command: str, workspace: Path, prompt_file: Path, prompt: str) -> AgentInvocation:
100
+ help_text = _help_text(command)
101
+ if agent_id == "cursor":
102
+ base = _with_workspace([command], help_text, workspace)
103
+ if "--trust" in help_text:
104
+ base.append("--trust")
105
+ if "--force" in help_text:
106
+ base.append("--force")
107
+ if "--prompt-file" in help_text:
108
+ return AgentInvocation([*base, "--prompt-file", str(prompt_file)], "cursor-agent --prompt-file")
109
+ if "--prompt" in help_text:
110
+ return AgentInvocation([*base, "--prompt", prompt], "cursor-agent --prompt")
111
+ if " -p" in help_text or "-p," in help_text:
112
+ return AgentInvocation([*base, "-p", prompt], "cursor-agent -p")
113
+ if _has_subcommand(help_text, "run"):
114
+ run_help = _help_text(command, "run")
115
+ if "--prompt-file" in run_help:
116
+ return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), "--prompt-file", str(prompt_file)], "cursor-agent run --prompt-file")
117
+ return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), prompt], "cursor-agent run")
118
+ return AgentInvocation([], "cursor-agent", "Cursor headless mode unavailable: cursor-agent help did not expose --prompt-file, --prompt, -p, or run.")
119
+
120
+ if agent_id == "claude-code":
121
+ base = [command]
122
+ if "--permission-mode" in help_text:
123
+ base.extend(["--permission-mode", "bypassPermissions"])
124
+ if "--output-format" in help_text:
125
+ base.extend(["--output-format", "text"])
126
+ if "--max-budget-usd" in help_text:
127
+ base.extend(["--max-budget-usd", "1"])
128
+ if " -p" in help_text or "-p," in help_text:
129
+ return AgentInvocation([*base, "-p", prompt], "claude -p")
130
+ if "--print" in help_text:
131
+ return AgentInvocation([*base, "--print", prompt], "claude --print")
132
+ return AgentInvocation([], "claude", "Claude Code headless mode unavailable: claude --help did not expose -p or --print.")
133
+
134
+ if agent_id == "opencode":
135
+ run_help = _help_text(command, "run") if _has_subcommand(help_text, "run") else ""
136
+ if run_help:
137
+ if "--prompt-file" in run_help:
138
+ return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), "--prompt-file", str(prompt_file)], "opencode run --prompt-file")
139
+ if "--prompt" in run_help:
140
+ return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), "--prompt", prompt], "opencode run --prompt")
141
+ return AgentInvocation([*_with_workspace([command, "run"], run_help, workspace), prompt], "opencode run")
142
+ return AgentInvocation([], "opencode", "OpenCode headless mode unavailable: opencode --help did not expose a run command.")
143
+
144
+ if agent_id == "openclaw":
145
+ if _has_subcommand(help_text, "agent"):
146
+ agent_help = _help_text(command, "agent")
147
+ if "--message" in agent_help or "-m," in agent_help:
148
+ settings = get_settings()
149
+ agent_ref = settings.worker_agent_model or _openclaw_default_agent_id(command)
150
+ if not agent_ref and "--agent" in agent_help:
151
+ return AgentInvocation(
152
+ [],
153
+ "openclaw agent",
154
+ "OpenClaw setup required: no configured OpenClaw agent was found. Run `openclaw setup` or `openclaw agents add`, then rerun the smoke.",
155
+ )
156
+ argv = [command, "agent"]
157
+ if "--local" in agent_help:
158
+ argv.append("--local")
159
+ if "--json" in agent_help:
160
+ argv.append("--json")
161
+ if "--agent" in agent_help and agent_ref:
162
+ argv.extend(["--agent", agent_ref])
163
+ if "--timeout" in agent_help:
164
+ argv.extend(["--timeout", str(settings.task_timeout_seconds)])
165
+ argv.extend(["--message", prompt])
166
+ return AgentInvocation(argv, "openclaw agent --local --message")
167
+ return AgentInvocation([], "openclaw agent", "OpenClaw headless execution is unavailable in this installed version: openclaw agent --help did not expose --message.")
168
+ for sub in ("run", "exec", "session"):
169
+ if _has_subcommand(help_text, sub):
170
+ sub_help = _help_text(command, sub)
171
+ if "--prompt-file" in sub_help:
172
+ return AgentInvocation([*_with_workspace([command, sub], sub_help, workspace), "--prompt-file", str(prompt_file)], f"openclaw {sub} --prompt-file")
173
+ if "--prompt" in sub_help:
174
+ return AgentInvocation([*_with_workspace([command, sub], sub_help, workspace), "--prompt", prompt], f"openclaw {sub} --prompt")
175
+ return AgentInvocation([*_with_workspace([command, sub], sub_help, workspace), prompt], f"openclaw {sub}")
176
+ return AgentInvocation([], "openclaw", "OpenClaw headless execution is unavailable in this installed version: openclaw --help did not expose agent, run, exec, or session.")
177
+
178
+ if agent_id == "hermes-agent":
179
+ for sub in ("run", "chat"):
180
+ if _has_subcommand(help_text, sub):
181
+ sub_help = _help_text(command, sub)
182
+ base = [command, sub]
183
+ if "--quiet" in sub_help:
184
+ base.append("--quiet")
185
+ if "--yolo" in sub_help:
186
+ base.append("--yolo")
187
+ if "--max-turns" in sub_help:
188
+ base.extend(["--max-turns", "12"])
189
+ if "--prompt-file" in sub_help:
190
+ return AgentInvocation([*_with_workspace(base, sub_help, workspace), "--prompt-file", str(prompt_file)], f"hermes {sub} --prompt-file")
191
+ if "--prompt" in sub_help:
192
+ return AgentInvocation([*_with_workspace(base, sub_help, workspace), "--prompt", prompt], f"hermes {sub} --prompt")
193
+ if "--query" in sub_help:
194
+ return AgentInvocation([*_with_workspace(base, sub_help, workspace), "--query", prompt], f"hermes {sub} --query")
195
+ if " -q" in sub_help or "-q," in sub_help:
196
+ return AgentInvocation([*_with_workspace(base, sub_help, workspace), "-q", prompt], f"hermes {sub} -q")
197
+ return AgentInvocation([*_with_workspace(base, sub_help, workspace), prompt], f"hermes {sub}")
198
+ return AgentInvocation([], "hermes", "Hermes Agent headless mode unavailable: hermes --help did not expose run or chat.")
199
+
200
+ return AgentInvocation([], agent_id, f"Unsupported Apprentice Agent adapter: {agent_id}")
201
+
202
+
203
+ def classify_agent_failure(agent_id: str, display_name: str, error: object | None, stdout: str = "", stderr: str = "", returncode: int | None = None) -> str | None:
204
+ text = f"{error or ''}\n{stdout or ''}\n{stderr or ''}".lower()
205
+ command = WORKER_AGENT_RECIPES.get(agent_id).command_name if agent_id in WORKER_AGENT_RECIPES else agent_id
206
+ if isinstance(error, FileNotFoundError) or f"no such file or directory: '{command}'" in text or f"no such file or directory: {command}" in text:
207
+ return f"Apprentice Agent command not found: {command}"
208
+ if "headless mode unavailable" in text or "headless execution is unavailable" in text:
209
+ return str(error or stderr).strip()
210
+ if isinstance(error, subprocess.TimeoutExpired) or "timed out" in text or "timeout" in text:
211
+ return f"Apprentice Agent attempt timed out while running {display_name}."
212
+ if _has_any(text, "eperm", "permission denied", "operation not permitted", "read-only file system"):
213
+ return f"Apprentice Agent permission error while running {display_name}: the external CLI could not access a required file or directory."
214
+ if _has_any(
215
+ text,
216
+ "not authenticated",
217
+ "not logged in",
218
+ "login required",
219
+ "please login",
220
+ "please log in",
221
+ "authentication failed",
222
+ "unauthorized",
223
+ "invalid api key",
224
+ "missing api key",
225
+ "api key missing",
226
+ "api key is missing",
227
+ "api key not configured",
228
+ "api key is not configured",
229
+ "provider api key is missing",
230
+ "google generative ai api key is missing",
231
+ ):
232
+ return f"{display_name} setup required: authentication, API key, or account setup is required."
233
+ if _has_any(text, "setup required", "onboarding", "configure first", "model not configured", "provider not configured"):
234
+ return f"{display_name} setup required: complete the provider/model setup before running."
235
+ if agent_id == "openclaw" and _has_any(text, "unknown agent id", "pass --to", "session-id", "choose a session", "no configured openclaw agent"):
236
+ return "OpenClaw setup required: configure an OpenClaw agent with `openclaw setup` or `openclaw agents add`, then rerun the smoke."
237
+ if _has_any(text, "quota", "rate limit", "billing", "insufficient quota", "insufficient credits", "out of credits", "credit limit", "usage limit"):
238
+ return f"{display_name} provider quota or credit limit reached."
239
+ if returncode not in (None, 0):
240
+ return f"Apprentice Agent exited before producing required outputs (exit code {returncode})."
241
+ if error:
242
+ return f"Apprentice Agent operational error: {redact_secrets(str(error))}"
243
+ return None
244
+
245
+
246
+ def run_external_agent_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, attempt_kind: str = "baseline", timeout: int | None = None) -> AttemptResult:
247
+ settings = get_settings()
248
+ agent_id = settings.worker_agent
249
+ recipe = WORKER_AGENT_RECIPES[agent_id]
250
+ display = recipe.display_name
251
+ command = settings.worker_agent_command or recipe.command_name
252
+ d = _attempt_dir(package_root, attempt_kind)
253
+ input_files = _copy_attempt_inputs(package_root, d)
254
+ deliverables = _deliverables(raw, spec)
255
+ rubric_md = (package_root / "rubric" / "worker_visible_rubric.md").read_text() if (package_root / "rubric" / "worker_visible_rubric.md").exists() else ""
256
+ prompt = build_worker_prompt(spec.normalized_instruction, rubric_md, attempt_kind, input_files, deliverables, settings.sensitive_info_masking, workspace_path=str(d))
257
+ prompt_file = d / "prompt.md"
258
+ prompt_file.write_text(prompt)
259
+
260
+ run_error: object | None = None
261
+ returncode: int | None = None
262
+ stdout = ""
263
+ stderr = ""
264
+ invocation = AgentInvocation([], agent_id)
265
+ resolved_command = resolve_command(command)
266
+ if not resolved_command:
267
+ run_error = FileNotFoundError(command)
268
+ else:
269
+ invocation = build_agent_invocation(agent_id, resolved_command, d, prompt_file, prompt)
270
+ if invocation.unsupported_reason:
271
+ run_error = RuntimeError(invocation.unsupported_reason)
272
+ else:
273
+ try:
274
+ cp = subprocess.run(invocation.argv, cwd=d, text=True, capture_output=True, timeout=timeout or settings.task_timeout_seconds)
275
+ returncode = cp.returncode
276
+ stdout = cp.stdout or ""
277
+ stderr = cp.stderr or ""
278
+ if cp.returncode != 0:
279
+ run_error = RuntimeError(classify_agent_failure(agent_id, display, None, stdout, stderr, cp.returncode) or f"{display} exited with code {cp.returncode}.")
280
+ except Exception as exc:
281
+ run_error = exc
282
+ (d / "stdout.txt").write_text(redact_secrets(stdout))
283
+ (d / "stderr.txt").write_text(redact_secrets(stderr if stderr else str(run_error or "")))
284
+ (d / "final_message.txt").write_text(redact_secrets((stdout or stderr or str(run_error or ""))[-4000:]))
285
+ contract_missing_before_repair = not (d / "agent_trace.json").exists() or not (d / "actual_outputs.json").exists()
286
+ contract_diagnostics = None
287
+ if contract_missing_before_repair:
288
+ command_for_diagnostics = invocation.argv or [command]
289
+ contract_diagnostics = build_contract_diagnostics(
290
+ d,
291
+ command=command_for_diagnostics,
292
+ working_directory=d,
293
+ agent_display_name=display,
294
+ prompt=prompt,
295
+ )
296
+ with (d / "final_message.txt").open("a") as f:
297
+ f.write("\n\n" + diagnostics_text(contract_diagnostics))
298
+
299
+ trace, actual, trace_valid = ensure_attempt_outputs(package_root, spec, attempt_kind, prompt, agent_id, run_error if isinstance(run_error, Exception) else None)
300
+ if actual.metadata_json is None:
301
+ actual.metadata_json = {}
302
+ actual.metadata_json["apprentice_agent"] = agent_id
303
+ actual.metadata_json["apprentice_agent_display_name"] = display
304
+ actual.metadata_json["apprentice_agent_invocation"] = [part if part != prompt else "<prompt>" for part in invocation.argv]
305
+ actual.metadata_json["apprentice_agent_invocation_mode"] = invocation.mode
306
+ if contract_diagnostics:
307
+ actual.metadata_json["apprentice_agent_contract_diagnostics"] = contract_diagnostics
308
+ classified_error = classify_agent_failure(agent_id, display, run_error, stdout, stderr, returncode)
309
+ output_contract_error = (
310
+ f"Apprentice Agent output-contract failure: {display} did not produce required "
311
+ "agent_trace.json and actual_outputs.json."
312
+ )
313
+ if not trace_valid:
314
+ operational_prefixes = (
315
+ "Apprentice Agent command not found",
316
+ f"{display} setup required",
317
+ f"{display} provider quota",
318
+ "Apprentice Agent timed out",
319
+ "Apprentice Agent attempt timed out",
320
+ "Apprentice Agent permission error",
321
+ )
322
+ if classified_error and (classified_error.startswith(operational_prefixes) or "headless mode unavailable" in classified_error or "headless execution is unavailable" in classified_error):
323
+ op_error = classified_error
324
+ else:
325
+ op_error = output_contract_error
326
+ else:
327
+ op_error = classified_error
328
+ if op_error and returncode not in (None, 0) and trace_valid and actual.status == "success":
329
+ op_error = f"Apprentice Agent exited nonzero after producing required outputs (exit code {returncode})."
330
+ if op_error:
331
+ actual.metadata_json["apprentice_agent_operational_error"] = op_error
332
+ actual.error_message = op_error
333
+ write_json(d / "actual_outputs.json", actual)
334
+
335
+ trace.metadata_json["apprentice_agent"] = agent_id
336
+ trace.metadata_json["apprentice_agent_invocation"] = [part if part != prompt else "<prompt>" for part in invocation.argv]
337
+ trace.metadata_json["trace_valid"] = trace_valid
338
+ if op_error:
339
+ trace.metadata_json["apprentice_agent_operational_error"] = op_error
340
+ if contract_diagnostics:
341
+ trace.metadata_json["apprentice_agent_contract_diagnostics"] = contract_diagnostics
342
+ write_json(d / "agent_trace.json", trace)
343
+ actual_ctx = ActualOutputsNormalizationContext(task_id=spec.task_id, attempt_id=f"{spec.task_id}_{attempt_kind}", attempt_kind=attempt_kind, package_root=package_root, required_artifacts=deliverables)
344
+ actual_result = repair_actual_outputs_file(d / "actual_outputs.json", actual_ctx)
345
+ write_actual_outputs_normalization(d, actual_result)
346
+ if actual_result.actual_outputs is not None:
347
+ actual = ActualOutputs.model_validate(actual_result.actual_outputs)
348
+ return AttemptResult(attempt_dir=str(d), trace_valid=trace_valid, trace=trace, actual_outputs=actual, apprentice_agent=agent_id)
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+ import mimetypes, shutil
3
+ from pathlib import Path
4
+ from .schemas import ArtifactRef
5
+ from .io import sha256_file
6
+ from .env import contains_secret
7
+
8
+ def media_type_for(path: Path) -> str:
9
+ suf=path.suffix.lower()
10
+ if suf in ['.txt','.md','.log','.json','.jsonl','.csv','.yaml','.yml']: return 'text' if suf in ['.txt','.md','.log'] else 'data'
11
+ if suf in ['.py','.js','.ts','.sh','.html','.css']: return 'code'
12
+ return 'unknown'
13
+
14
+ def artifact_ref(path: Path, package_root: Path, task_id: str, attempt_id: str | None, kind='output', role='other') -> ArtifactRef:
15
+ rel = path.relative_to(package_root).as_posix() if path.is_relative_to(package_root) else path.name
16
+ text = path.read_text(errors='ignore') if path.exists() and path.stat().st_size < 2_000_000 else ''
17
+ return ArtifactRef(artifact_id=rel.replace('/','__'), task_id=task_id, attempt_id=attempt_id, artifact_kind=kind, artifact_role=role, workspace_path=str(path), package_relative_path=rel, release_relative_path=None, mime_type=mimetypes.guess_type(path.name)[0], media_type=media_type_for(path), size_bytes=path.stat().st_size if path.exists() else None, content_hash=sha256_file(path) if path.exists() else None, secret_scan_ok=not contains_secret(text), metadata_json={})
18
+
19
+ def copy_inputs(input_refs: list[str], dest: Path):
20
+ dest.mkdir(parents=True, exist_ok=True)
21
+ for ref in input_refs:
22
+ p=Path(ref)
23
+ if p.exists() and p.is_file(): shutil.copy2(p, dest/p.name)
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+ import csv, json
3
+ from pathlib import Path
4
+ from typing import Any
5
+ from .io import sha256_file
6
+
7
+ TEXT_EXTS={'.txt','.md','.json','.jsonl','.csv','.py','.sh','.html','.xml','.xlsx'}
8
+
9
+ def _read_text(path: Path, limit: int) -> tuple[str, bool]:
10
+ text=path.read_text(errors='replace')
11
+ return text[:limit], len(text)>limit
12
+
13
+ def _preview_one(path: Path, package_ref: str, max_chars: int=4000, csv_rows: int=8) -> dict[str, Any]:
14
+ size=path.stat().st_size if path.exists() else None
15
+ out={'ref': package_ref, 'content_hash': ('sha256:' + sha256_file(path)) if path.exists() else None, 'size_bytes': size, 'media_type': 'unknown', 'preview': None, 'preview_truncated': False}
16
+ ext=path.suffix.lower()
17
+ if not path.exists():
18
+ out.update({'parse_status':'missing'}); return out
19
+ if ext not in TEXT_EXTS:
20
+ out.update({'kind':'binary','media_type':'binary','parse_status':'metadata_only'}); return out
21
+ out['media_type']='text'
22
+ try:
23
+ if ext == '.xlsx':
24
+ try:
25
+ from openpyxl import load_workbook # type: ignore
26
+ except Exception as exc:
27
+ out.update({'kind':'xlsx','media_type':'data','parse_status':'openpyxl_unavailable','preview_error_message':str(exc)[:200]}); return out
28
+ wb=load_workbook(path, data_only=False, read_only=False)
29
+ sheets=[]; formulas=[]; important={'inputs','input','sensitivity','assumptions','summary','model','outputs'}
30
+ for ws in wb.worksheets[:6]:
31
+ rows=[]
32
+ for row in ws.iter_rows(min_row=1, max_row=min(ws.max_row or 1, 20), max_col=min(ws.max_column or 1, 8), values_only=False):
33
+ vals=[]
34
+ for cell in row:
35
+ val=cell.value
36
+ vals.append(val)
37
+ if isinstance(val, str) and val.startswith('=') and len(formulas) < 40:
38
+ formulas.append({'sheet': ws.title, 'cell': cell.coordinate, 'formula': val})
39
+ rows.append(vals)
40
+ sheets.append({'name': ws.title, 'max_row': ws.max_row, 'max_column': ws.max_column, 'headers': rows[0] if rows else [], 'first_rows': rows[1:]})
41
+ out.update({'kind':'xlsx','media_type':'data','parse_status':'parsed_xlsx','sheet_names': wb.sheetnames, 'sheets': sheets, 'formulas_detected': bool(formulas), 'formulas': formulas, 'important_sheet_presence': {name: any(name in s.lower() for s in wb.sheetnames) for name in sorted(important)}, 'preview': json.dumps({'sheet_names': wb.sheetnames, 'sheets': sheets, 'formulas': formulas[:10]}, default=str)[:max_chars], 'preview_truncated': len(wb.sheetnames)>6 or len(formulas)>=40})
42
+ elif ext == '.csv':
43
+ text, trunc=_read_text(path, max_chars)
44
+ with path.open(newline='', errors='replace') as f:
45
+ reader=csv.reader(f); rows=[]
46
+ for i,row in enumerate(reader):
47
+ rows.append(row)
48
+ if i >= csv_rows: break
49
+ out.update({'kind':'csv','media_type':'data','parse_status':'parsed_csv','columns': rows[0] if rows else [], 'row_count': max(0, sum(1 for _ in path.open(errors='replace'))-1), 'first_rows': rows[1:], 'preview': text, 'preview_truncated': trunc})
50
+ elif ext in {'.json','.jsonl'}:
51
+ text, trunc=_read_text(path, max_chars)
52
+ keys=[]; parse_status='parsed_json'
53
+ if ext == '.json':
54
+ try:
55
+ obj=json.loads(path.read_text(errors='replace'))
56
+ keys=sorted(obj.keys()) if isinstance(obj, dict) else []
57
+ except Exception:
58
+ parse_status='json_parse_error'
59
+ else:
60
+ parse_status='jsonl_preview'
61
+ out.update({'kind':'json' if ext == '.json' else 'jsonl','media_type':'data','parse_status':parse_status,'top_level_keys':keys,'preview':text,'preview_truncated':trunc})
62
+ else:
63
+ text, trunc=_read_text(path, max_chars)
64
+ out.update({'kind':'text','parse_status':'text_preview','preview':text,'preview_truncated':trunc})
65
+ except Exception as exc:
66
+ out.update({'parse_status':'preview_error','preview_error_type':type(exc).__name__,'preview_error_message':str(exc)[:300]})
67
+ return out
68
+
69
+ def build_artifact_previews(package_root: Path | None, refs: list[str], max_artifacts: int=12) -> dict[str, Any]:
70
+ if package_root is None:
71
+ return {'artifact_content_refs': [], 'artifact_content_previews': [], 'artifact_content_hashes': {}, 'artifact_content_preview_truncated': False, 'model_grading_basis': 'trace_only'}
72
+ previews=[]
73
+ for ref in list(dict.fromkeys(refs))[:max_artifacts]:
74
+ p=package_root/ref
75
+ if p.exists() and p.is_file():
76
+ previews.append(_preview_one(p, ref))
77
+ hashes={p['ref']:p.get('content_hash') for p in previews}
78
+ truncated=any(bool(p.get('preview_truncated')) for p in previews)
79
+ has_content=any(p.get('preview') for p in previews)
80
+ return {'artifact_content_refs':[p['ref'] for p in previews], 'artifact_content_previews': previews, 'artifact_content_hashes': hashes, 'artifact_content_preview_truncated': truncated, 'model_grading_basis': 'artifact_content' if has_content and not truncated else ('artifact_preview' if previews else 'trace_only')}
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+
7
+ ARTIFACT_EXTENSIONS = (
8
+ "csv",
9
+ "tsv",
10
+ "json",
11
+ "jsonl",
12
+ "xlsx",
13
+ "xls",
14
+ "md",
15
+ "txt",
16
+ "pdf",
17
+ "docx",
18
+ "pptx",
19
+ "html",
20
+ "xml",
21
+ "yaml",
22
+ "yml",
23
+ "py",
24
+ "js",
25
+ "ts",
26
+ "sql",
27
+ "zip",
28
+ "png",
29
+ "jpg",
30
+ "jpeg",
31
+ "webp",
32
+ "gif",
33
+ "svg",
34
+ "ipynb",
35
+ "log",
36
+ )
37
+
38
+
39
+ def normalize_artifact_ref(ref: object) -> str:
40
+ """Normalize display-style artifact evidence refs back to real paths."""
41
+
42
+ value = "" if ref is None else str(ref)
43
+ value = value.strip().strip('"').strip("'").replace("\\", "/").strip()
44
+
45
+ for prefix in (
46
+ "artifact_content_previews_truncated:",
47
+ "artifact_content_preview_truncated:",
48
+ "artifact_content_previews:",
49
+ "artifact_content_preview:",
50
+ "artifact_previews_truncated:",
51
+ "artifact_preview_truncated:",
52
+ "artifact_previews:",
53
+ "artifact_preview:",
54
+ ):
55
+ if value.startswith(prefix):
56
+ value = value[len(prefix):].strip()
57
+
58
+ match = re.search(
59
+ r"((?:packages/[^/\s]+/)?attempts/[^/\s]+/artifacts/[^\s\)\],;]+|artifacts/[^\s\)\],;]+)",
60
+ value,
61
+ )
62
+ if match:
63
+ value = match.group(1).strip()
64
+
65
+ for marker in (
66
+ " (parse_error:",
67
+ " (preview_error:",
68
+ " (read_error:",
69
+ " (open_error:",
70
+ " (parse_status:",
71
+ " (text_preview)",
72
+ " (content_preview)",
73
+ " (artifact_preview)",
74
+ " (binary_preview)",
75
+ " (sheet_preview)",
76
+ " (truncated_preview)",
77
+ " (preview_truncated)",
78
+ " (truncated)",
79
+ " (preview)",
80
+ ):
81
+ if marker in value:
82
+ value = value.split(marker, 1)[0].strip()
83
+
84
+ if "/artifacts/" in value or value.startswith("artifacts/"):
85
+ extensions = "|".join(re.escape(ext) for ext in ARTIFACT_EXTENSIONS)
86
+ ext_match = re.match(
87
+ rf"^(.*?\.({extensions}))(?:[:#_\s\)].*)?$",
88
+ value,
89
+ flags=re.IGNORECASE,
90
+ )
91
+ if ext_match:
92
+ value = ext_match.group(1).strip()
93
+
94
+ return value
95
+
96
+
97
+ def is_artifact_evidence_ref(ref: object) -> bool:
98
+ raw = str(ref).strip()
99
+ normalized = normalize_artifact_ref(raw)
100
+ metadata_prefixes = (
101
+ "trace_summary_json:",
102
+ "actual_outputs:",
103
+ "actual_outputs.",
104
+ "rubric:",
105
+ "rubric.",
106
+ "score:",
107
+ "score.",
108
+ "metadata_json:",
109
+ "metadata_json.",
110
+ )
111
+ if raw.startswith(metadata_prefixes) or normalized.startswith(metadata_prefixes):
112
+ return False
113
+ return (
114
+ ("attempts/" in normalized and "/artifacts/" in normalized)
115
+ or normalized.startswith("artifacts/")
116
+ or ("packages/" in normalized and "/attempts/" in normalized and "/artifacts/" in normalized)
117
+ )
118
+
119
+
120
+ def artifact_ref_resolves(ref: object, existing_refs: set[str]) -> bool:
121
+ normalized = normalize_artifact_ref(ref).replace("\\", "/").lstrip("/")
122
+ if not normalized:
123
+ return False
124
+ existing_normalized = {str(x).replace("\\", "/").lstrip("/") for x in existing_refs if x}
125
+ if normalized in existing_normalized:
126
+ return True
127
+ return any(
128
+ existing.endswith("/" + normalized)
129
+ or normalized.endswith("/" + existing)
130
+ for existing in existing_normalized
131
+ )
132
+
133
+
134
+ def artifact_ref_candidates(ref: object) -> set[str]:
135
+ normalized = normalize_artifact_ref(ref).replace("\\", "/").lstrip("/")
136
+ if not normalized:
137
+ return set()
138
+ candidates = {normalized}
139
+ if "/artifacts/" in normalized:
140
+ candidates.add(normalized.split("/artifacts/", 1)[1])
141
+ candidates.add(Path(normalized).name)
142
+ return {c for c in candidates if c}