agent-apprenticeship 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +217 -0
  3. package/bin/agent-apprenticeship.js +131 -0
  4. package/package.json +30 -0
  5. package/pyproject.toml +23 -0
  6. package/src/agent_apprenticeship_trace/__init__.py +2 -0
  7. package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
  8. package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
  9. package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
  10. package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
  11. package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
  12. package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
  13. package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
  14. package/src/agent_apprenticeship_trace/certification.py +580 -0
  15. package/src/agent_apprenticeship_trace/cli.py +2979 -0
  16. package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
  17. package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
  18. package/src/agent_apprenticeship_trace/config.py +609 -0
  19. package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
  20. package/src/agent_apprenticeship_trace/env.py +46 -0
  21. package/src/agent_apprenticeship_trace/evaluator.py +64 -0
  22. package/src/agent_apprenticeship_trace/grader.py +194 -0
  23. package/src/agent_apprenticeship_trace/integration_status.py +193 -0
  24. package/src/agent_apprenticeship_trace/io.py +20 -0
  25. package/src/agent_apprenticeship_trace/learning.py +627 -0
  26. package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
  27. package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
  28. package/src/agent_apprenticeship_trace/loop.py +111 -0
  29. package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
  30. package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
  31. package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
  32. package/src/agent_apprenticeship_trace/progress.py +223 -0
  33. package/src/agent_apprenticeship_trace/public_run.py +1109 -0
  34. package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
  35. package/src/agent_apprenticeship_trace/recipes.py +129 -0
  36. package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
  37. package/src/agent_apprenticeship_trace/revision.py +21 -0
  38. package/src/agent_apprenticeship_trace/role_runners.py +7 -0
  39. package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
  40. package/src/agent_apprenticeship_trace/schemas.py +273 -0
  41. package/src/agent_apprenticeship_trace/session_events.py +99 -0
  42. package/src/agent_apprenticeship_trace/task_intake.py +112 -0
  43. package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
  44. package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
  45. package/src/agent_apprenticeship_trace/training_signals.py +30 -0
  46. package/src/agent_apprenticeship_trace/validation.py +210 -0
  47. package/src/agent_apprenticeship_trace/verifier.py +55 -0
@@ -0,0 +1,428 @@
1
+ from __future__ import annotations
2
+ import shutil, subprocess
3
+ from pathlib import Path
4
+ import re
5
+ from .schemas import AgentTrace, AgentTraceStep, ActualOutputs, RawTaskRecord, TaskIntakeSpec
6
+ from .trace_prompt import build_worker_prompt
7
+ from .io import write_json
8
+ from .config import get_settings
9
+ from .env import redact_secrets
10
+ from .contract_diagnostics import build_contract_diagnostics, diagnostics_text
11
+ from .trace_normalizer import TraceNormalizationContext, repair_agent_trace_file, TraceNormalizationReport
12
+ from .actual_outputs_normalizer import ActualOutputsNormalizationContext, repair_actual_outputs_file, write_actual_outputs_normalization
13
+ from .public_sanitizer import classify_provider_failure, public_error_summary
14
+
15
+ class AttemptResult(dict): pass
16
+
17
+ CODEX_TRUST_RETRY_MESSAGE = "Codex refused to run because the workspace is not a trusted Git directory. Retrying with --skip-git-repo-check if supported."
18
+
19
+ def _attempt_dir(package_root: Path, attempt_kind: str) -> Path:
20
+ p=package_root/'attempts'/attempt_kind
21
+ (p/'artifacts').mkdir(parents=True, exist_ok=True)
22
+ (p/'input').mkdir(parents=True, exist_ok=True)
23
+ return p
24
+
25
+ def _copy_attempt_inputs(package_root: Path, attempt_dir: Path) -> list[str]:
26
+ src=package_root/'input'; dst=attempt_dir/'input'
27
+ dst.mkdir(parents=True, exist_ok=True)
28
+ copied=[]
29
+ if src.exists():
30
+ for f in src.iterdir():
31
+ if f.is_file():
32
+ shutil.copy2(f, dst/f.name); copied.append(f.name)
33
+ elif f.is_dir():
34
+ shutil.copytree(f, dst/f.name, dirs_exist_ok=True); copied.append(f.name)
35
+ return copied
36
+
37
+ def parse_expected_deliverables(text: str | None) -> list[str]:
38
+ if not text:
39
+ return []
40
+ parts=re.split(r'\s*(?:\+|,|;|\band\b|\n)\s*', str(text))
41
+ items=[]
42
+ for part in parts:
43
+ part=part.strip().strip('`*•- ')
44
+ if not part:
45
+ continue
46
+ match=re.search(r'[A-Za-z0-9_.-]+\.(?:csv|xlsx|xls|json|jsonl|md|txt|pdf|html|xml|py|zip)', part, re.I)
47
+ if match:
48
+ items.append(match.group(0))
49
+ return list(dict.fromkeys(items))
50
+
51
+ def _deliverables(raw: RawTaskRecord, spec: TaskIntakeSpec) -> list[str]:
52
+ expected=raw.expected_deliverable or raw.raw_payload.get('expected_deliverable')
53
+ parsed=parse_expected_deliverables(expected)
54
+ if parsed:
55
+ return parsed
56
+ reqs=raw.raw_payload.get('output_requirements') or spec.output_requirements
57
+ file_like=[]
58
+ for item in reqs or []:
59
+ item=str(item)
60
+ if re.search(r'\.(?:csv|xlsx|xls|json|jsonl|md|txt|pdf|html|xml|py|zip)\b', item, re.I):
61
+ file_like.extend(parse_expected_deliverables(item))
62
+ return file_like or ['output.txt']
63
+
64
+ def _fallback_actual(spec: TaskIntakeSpec, attempt_kind: str, error_type: str, error_message: str) -> ActualOutputs:
65
+ cls=classify_provider_failure(error_message)
66
+ et=cls.get('error_type') or error_type
67
+ msg=public_error_summary(error_message) if cls else redact_secrets(error_message)
68
+ return ActualOutputs(task_id=spec.task_id, attempt_id=f'{spec.task_id}_{attempt_kind}', attempt_kind=attempt_kind, status='failed', output_summary='Attempt failed or did not produce a valid agent trace; raw logs were preserved.', primary_output_ref=None, deliverable_refs=[], final_message_ref=f'attempts/{attempt_kind}/final_message.txt', artifact_refs=[f'attempts/{attempt_kind}/prompt.md', f'attempts/{attempt_kind}/stdout.txt', f'attempts/{attempt_kind}/stderr.txt', f'attempts/{attempt_kind}/final_message.txt'], files_created=[], files_modified=[], files_deleted=[], stdout_ref=f'attempts/{attempt_kind}/stdout.txt', stderr_ref=f'attempts/{attempt_kind}/stderr.txt', raw_log_refs=[f'attempts/{attempt_kind}/stdout.txt',f'attempts/{attempt_kind}/stderr.txt',f'attempts/{attempt_kind}/final_message.txt'], error_type=et, error_message=msg, metadata_json={'trace_valid':False,'fallback_actual_outputs_created':True, **cls})
69
+
70
+
71
+ def _apprentice_operational_error(error: object | None, stdout: str = "", stderr: str = "", returncode: int | None = None) -> str | None:
72
+ text = f"{error or ''}\n{stdout or ''}\n{stderr or ''}".lower()
73
+ if isinstance(error, FileNotFoundError) or "no such file or directory: 'codex'" in text or "no such file or directory: codex" in text:
74
+ return "Apprentice Agent command not found: codex"
75
+ if isinstance(error, subprocess.TimeoutExpired) or "timed out" in text or "timeout" in text:
76
+ return "Apprentice Agent timed out while running Codex."
77
+ if _is_codex_workspace_trust_error(stdout, stderr, error):
78
+ return "Codex workspace trust error: Codex refused to run because the workspace is not a trusted Git directory. Use a Codex CLI with --skip-git-repo-check support or run from a trusted Git repository."
79
+ if _is_codex_local_state_permission_error(stdout, stderr, error):
80
+ return "Codex local state permission error: Codex could not initialize its local state or app-server client. Check that CODEX_HOME is writable and Codex can access its state directory."
81
+ auth_markers = [
82
+ "not authenticated",
83
+ "not logged in",
84
+ "login required",
85
+ "please login",
86
+ "please log in",
87
+ "authentication failed",
88
+ "auth failed",
89
+ "unauthorized",
90
+ "http 401",
91
+ "status 401",
92
+ "status code 401",
93
+ "401 unauthorized",
94
+ "missing api key",
95
+ "api key not configured",
96
+ "invalid api key",
97
+ "credential error",
98
+ "credentials not configured",
99
+ ]
100
+ if any(token in text for token in auth_markers):
101
+ return "Apprentice Agent setup error: Codex is not authenticated or configured."
102
+ quota_markers = [
103
+ "quota",
104
+ "rate limit",
105
+ "billing",
106
+ "insufficient quota",
107
+ "insufficient credits",
108
+ "out of credits",
109
+ "credit limit",
110
+ "usage limit",
111
+ ]
112
+ if any(token in text for token in quota_markers):
113
+ return "Apprentice Agent provider quota or usage limit reached."
114
+ if returncode not in (None, 0):
115
+ return f"Apprentice Agent exited before producing required outputs (exit code {returncode})."
116
+ if error:
117
+ return f"Apprentice Agent operational error: {redact_secrets(str(error))}"
118
+ return None
119
+
120
+
121
+ def _is_codex_workspace_trust_error(stdout: str = "", stderr: str = "", error: object | None = None) -> bool:
122
+ text = f"{error or ''}\n{stdout or ''}\n{stderr or ''}".lower()
123
+ return (
124
+ "not inside a trusted directory" in text
125
+ or "--skip-git-repo-check was not specified" in text
126
+ or "trusted git directory" in text
127
+ )
128
+
129
+
130
+ def _is_codex_local_state_permission_error(stdout: str = "", stderr: str = "", error: object | None = None) -> bool:
131
+ text = f"{error or ''}\n{stdout or ''}\n{stderr or ''}".lower()
132
+ return (
133
+ ("codex_state" in text and "readonly database" in text)
134
+ or ("failed to open state db" in text and "readonly database" in text)
135
+ or ("failed to initialize in-process app-server client" in text and "operation not permitted" in text)
136
+ or ("failed to initialize state runtime" in text and "readonly database" in text)
137
+ )
138
+
139
+
140
+ def codex_exec_help(command: str = "codex") -> str:
141
+ try:
142
+ cp = subprocess.run([command, "exec", "--help"], cwd=None, text=True, capture_output=True, timeout=5)
143
+ return (getattr(cp, "stdout", "") or "") + "\n" + (getattr(cp, "stderr", "") or "")
144
+ except Exception:
145
+ try:
146
+ cp = subprocess.run([command, "--help"], cwd=None, text=True, capture_output=True, timeout=5)
147
+ return (getattr(cp, "stdout", "") or "") + "\n" + (getattr(cp, "stderr", "") or "")
148
+ except Exception:
149
+ return ""
150
+
151
+
152
+ def _codex_exec_supports(flag: str, command: str = "codex") -> bool:
153
+ return flag in codex_exec_help(command)
154
+
155
+ def _fallback_trace(spec: TaskIntakeSpec, attempt_kind: str, prompt: str, actual: ActualOutputs, error_type: str, error_message: str, codex_sandbox: str) -> AgentTrace:
156
+ cls=classify_provider_failure(error_message)
157
+ attempt_id=f'{spec.task_id}_{attempt_kind}'
158
+ actor=f'agent:{"reviser" if attempt_kind=="revised" else "worker"}'
159
+ safe=(public_error_summary(error_message) if cls else redact_secrets(error_message)[-3000:])
160
+ error_type=cls.get('error_type') or error_type
161
+ steps=[
162
+ AgentTraceStep(step=1, turn=1, actor='user', action='user_message', input=spec.normalized_instruction, message_role='direct_request'),
163
+ AgentTraceStep(step=2, turn=1, actor=actor, action='error', operation='other', tool='codex_cli', observation='Codex attempt ended without a valid agent_trace.json or actual_outputs.json.', input='Validate required output contract for ./agent_trace.json and ./actual_outputs.json.', output=safe, state_change='A minimal failure trace and failed actual_outputs.json were written by the runner.', reasoning='The package must preserve failure evidence without fabricating a successful detailed trace.', caused_by=[1], causal_type='dependency_on_tool_result', success=False, step_outcome='failed', error_type=error_type, error_message=safe, artifact_refs=[f'attempts/{attempt_kind}/prompt.md', f'attempts/{attempt_kind}/stdout.txt', f'attempts/{attempt_kind}/stderr.txt', f'attempts/{attempt_kind}/final_message.txt']),
164
+ ]
165
+ return AgentTrace(trace_id=f'trace_{attempt_id}_fallback', collection_id=None, trace_mode='live', task=spec.normalized_instruction, task_id=spec.task_id, attempt_id=attempt_id, attempt_kind=attempt_kind if attempt_kind in ['baseline','revised'] else 'other', agent_tools=['codex_cli','Bash','python','file_read','file_write'], system_prompt=prompt, system_prompt_hash=None, skills=['agent_trace_skill'], learning='When a live agent cannot write outputs, preserve logs and create a minimal failure trace.', termination_reason='agent_blocked', steps=steps, actual_outputs=actual, artifacts=[], metadata_json={'trace_valid':False,'fallback_trace_created':True,'codex_sandbox':codex_sandbox, **cls})
166
+
167
+ def _write_fallback_report(d: Path, spec: TaskIntakeSpec, attempt_kind: str, reason: str, raw_parse_error: bool=False) -> None:
168
+ report=TraceNormalizationReport(task_id=spec.task_id, attempt_id=f'{spec.task_id}_{attempt_kind}', attempt_kind=attempt_kind, raw_trace_ref='agent_trace.raw.json' if (d/'agent_trace.raw.json').exists() else None, normalized_trace_ref=None, canonical_trace_ref='agent_trace.json', trace_schema_valid=True, trace_normalized=False, trace_lossless=True, fallback_trace=True, raw_step_count=0, normalized_step_count=2, discarded_step_count=0, raw_trace_parse_error=raw_parse_error, validation_errors=[reason], metadata_json={'fallback_reason': reason})
169
+ write_json(d/'trace_normalization_report.json', report)
170
+
171
+
172
+ def ensure_attempt_outputs(package_root: Path, spec: TaskIntakeSpec, attempt_kind: str, prompt: str, codex_sandbox: str, validation_error: Exception | None=None) -> tuple[AgentTrace, ActualOutputs, bool]:
173
+ d=_attempt_dir(package_root, attempt_kind)
174
+ actual_path=d/'actual_outputs.json'; trace_path=d/'agent_trace.json'; raw_path=d/'agent_trace.raw.json'
175
+ err = validation_error or FileNotFoundError('agent_trace.json missing or invalid')
176
+ required_artifacts=_deliverables(RawTaskRecord(raw_task_id=spec.task_id, source_kind='normalized_spec', raw_title=spec.normalized_title, raw_description=spec.normalized_instruction, raw_payload={'expected_deliverable': spec.expected_agent_deliverable}), spec)
177
+ actual_ctx=ActualOutputsNormalizationContext(task_id=spec.task_id, attempt_id=f'{spec.task_id}_{attempt_kind}', attempt_kind=attempt_kind, package_root=package_root, required_artifacts=required_artifacts)
178
+ actual_result=repair_actual_outputs_file(actual_path, actual_ctx)
179
+ if actual_result.actual_outputs is not None:
180
+ write_actual_outputs_normalization(d, actual_result)
181
+ actual=ActualOutputs.model_validate(actual_result.actual_outputs)
182
+ else:
183
+ actual=_fallback_actual(spec, attempt_kind, type(err).__name__, str(err)); write_json(actual_path, actual); write_json(d/'actual_outputs_normalization_report.json', actual_result.report)
184
+ if trace_path.exists():
185
+ original_text=trace_path.read_text(errors='ignore')
186
+ if not raw_path.exists(): raw_path.write_text(original_text)
187
+ context=TraceNormalizationContext(task_id=spec.task_id, attempt_id=f'{spec.task_id}_{attempt_kind}', attempt_kind=attempt_kind, task=spec.normalized_instruction, actual_outputs=actual)
188
+ result=repair_agent_trace_file(raw_path, context)
189
+ if result.normalized_trace and (result.report.raw_step_count or result.normalized_trace.get('steps')):
190
+ try:
191
+ AgentTrace.model_validate_json(raw_path.read_text())
192
+ except Exception:
193
+ (d/'agent_trace.invalid.json').write_text(raw_path.read_text())
194
+ write_json(d/'agent_trace.normalized.json', result.normalized_trace)
195
+ write_json(d/'agent_trace.json', result.normalized_trace)
196
+ write_json(d/'trace_normalization_report.json', result.report)
197
+ return AgentTrace.model_validate(result.normalized_trace), actual, bool(result.report.trace_schema_valid)
198
+ reason=result.parse_error or 'parseable trace contained no steps'
199
+ final=(d/'final_message.txt').read_text(errors='ignore') if (d/'final_message.txt').exists() else ''
200
+ stderr=(d/'stderr.txt').read_text(errors='ignore') if (d/'stderr.txt').exists() else ''
201
+ msg=f'{reason}\n\nfinal_message:\n{final}\n\nstderr:\n{stderr}'
202
+ trace=_fallback_trace(spec, attempt_kind, prompt, actual, 'TraceNormalizationError', msg, codex_sandbox)
203
+ trace.metadata_json['fallback_reason']=classify_provider_failure(msg).get('fallback_reason') or ('unparseable_trace' if result.fallback_required else 'missing_steps')
204
+ write_json(trace_path, trace); _write_fallback_report(d, spec, attempt_kind, trace.metadata_json['fallback_reason'], result.fallback_required)
205
+ return trace, actual, False
206
+ final=(d/'final_message.txt').read_text(errors='ignore') if (d/'final_message.txt').exists() else ''
207
+ stderr=(d/'stderr.txt').read_text(errors='ignore') if (d/'stderr.txt').exists() else ''
208
+ msg=f'missing_trace\n\nfinal_message:\n{final}\n\nstderr:\n{stderr}'
209
+ trace=_fallback_trace(spec, attempt_kind, prompt, actual, 'FileNotFoundError', msg, codex_sandbox)
210
+ trace.metadata_json['fallback_reason']=classify_provider_failure(msg).get('fallback_reason') or 'missing_trace'
211
+ write_json(trace_path, trace); _write_fallback_report(d, spec, attempt_kind, 'missing_trace')
212
+ return trace, actual, False
213
+
214
+ def deterministic_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, attempt_kind='baseline', feedback: str | None=None) -> AttemptResult:
215
+ attempt_id=f'{spec.task_id}_{attempt_kind}'
216
+ d=_attempt_dir(package_root, attempt_kind)
217
+ input_files=_copy_attempt_inputs(package_root, d)
218
+ deliverables = _deliverables(raw, spec)
219
+ settings=get_settings()
220
+ prompt=build_worker_prompt(spec.normalized_instruction, '', attempt_kind, input_files, deliverables, settings.sensitive_info_masking, workspace_path=str(d))
221
+ (d/'prompt.md').write_text(prompt)
222
+ (d/'stdout.txt').write_text('deterministic runner completed\n')
223
+ (d/'stderr.txt').write_text('')
224
+ (d/'final_message.txt').write_text(f'Deterministic {attempt_kind} attempt complete.\n')
225
+ files=[]
226
+ for name in deliverables:
227
+ fname=name if '.' in name and '/' not in name else name.lower().replace(' ','_') + '.txt'
228
+ (d/'artifacts'/fname).write_text(f'{attempt_kind} deterministic artifact for {name}\n')
229
+ files.append(f'attempts/{attempt_kind}/artifacts/{fname}')
230
+ actual=ActualOutputs(task_id=spec.task_id, attempt_id=attempt_id, attempt_kind=attempt_kind, status='success', output_summary=f'Deterministic {attempt_kind} outputs for {spec.normalized_title}', primary_output_ref=files[0] if files else None, deliverable_refs=files, final_message_ref=f'attempts/{attempt_kind}/final_message.txt', artifact_refs=files, files_created=files, files_modified=[], files_deleted=[], stdout_ref=f'attempts/{attempt_kind}/stdout.txt', stderr_ref=f'attempts/{attempt_kind}/stderr.txt', raw_log_refs=[f'attempts/{attempt_kind}/stdout.txt', f'attempts/{attempt_kind}/stderr.txt'], error_type=None, error_message=None, metadata_json={'runner':'deterministic','expected_deliverable_items':deliverables,'produced_deliverable_items':[Path(f).name for f in files]})
231
+ steps=[
232
+ AgentTraceStep(step=1, turn=1, actor='user', action='user_message', input=spec.normalized_instruction, message_role='direct_request'),
233
+ AgentTraceStep(step=2, turn=1, actor=f'agent:{"reviser" if attempt_kind=="revised" else "worker"}', action='agent_step', operation='plan', tool=None, observation='Task instructions, ./input files, and worker-visible rubric are available.', input='Review task and required deliverables.', output='Plan created.', state_change='The attempt plan identified required artifacts.', reasoning='A short plan reduces missed deliverables.', caused_by=[1], causal_type='user_request', success=True, step_outcome='progress'),
234
+ AgentTraceStep(step=3, turn=1, actor=f'agent:{"reviser" if attempt_kind=="revised" else "worker"}', action='agent_step', operation='write', tool='file_write', observation='Deliverable names are known.', input=', '.join(files), output='Artifacts written.', state_change='Required deterministic artifacts were created under ./artifacts.', reasoning='Writing explicit files satisfies the artifact contract.', caused_by=[2], causal_type='execution_of_plan', success=True, step_outcome='completed', artifact_refs=files),
235
+ ]
236
+ trace=AgentTrace(trace_id=f'trace_{attempt_id}', collection_id=None, trace_mode='live', task=spec.normalized_instruction, task_id=spec.task_id, attempt_id=attempt_id, attempt_kind=attempt_kind if attempt_kind in ['baseline','revised'] else 'other', agent_tools=['deterministic_runner','file_write'], system_prompt=prompt, system_prompt_hash=None, skills=['agent_trace_skill'], learning='Materialize required artifacts and keep evaluation outside the trace.', termination_reason='task_complete', steps=steps, actual_outputs=actual, artifacts=[], metadata_json={'trace_valid': True})
237
+ write_json(d/'actual_outputs.json', actual)
238
+ actual_ctx=ActualOutputsNormalizationContext(task_id=spec.task_id, attempt_id=attempt_id, attempt_kind=attempt_kind, package_root=package_root, required_artifacts=deliverables)
239
+ actual_result=repair_actual_outputs_file(d/'actual_outputs.json', actual_ctx)
240
+ write_actual_outputs_normalization(d, actual_result)
241
+ actual=ActualOutputs.model_validate(actual_result.actual_outputs)
242
+ write_json(d/'agent_trace.raw.json', trace); write_json(d/'agent_trace.normalized.json', trace); write_json(d/'agent_trace.json', trace)
243
+ report=TraceNormalizationReport(task_id=spec.task_id, attempt_id=attempt_id, attempt_kind=attempt_kind, raw_trace_ref='agent_trace.raw.json', normalized_trace_ref='agent_trace.normalized.json', canonical_trace_ref='agent_trace.json', trace_schema_valid=True, trace_normalized=False, trace_lossless=True, fallback_trace=False, raw_step_count=len(steps), normalized_step_count=len(steps), discarded_step_count=0)
244
+ write_json(d/'trace_normalization_report.json', report)
245
+ return AttemptResult(attempt_dir=str(d), trace_valid=True, trace=trace, actual_outputs=actual)
246
+
247
+ def codex_command(
248
+ prompt: str,
249
+ sandbox: str,
250
+ workspace: Path | str | None = None,
251
+ *,
252
+ command: str = "codex",
253
+ skip_git_repo_check_supported: bool | None = None,
254
+ ask_for_approval_supported: bool | None = None,
255
+ ) -> list[str]:
256
+ cmd = [command, 'exec']
257
+ if workspace is not None:
258
+ cmd.extend(['--cd', str(workspace)])
259
+ cmd.extend(['--sandbox', sandbox])
260
+ if ask_for_approval_supported is None:
261
+ ask_for_approval_supported = _codex_exec_supports('--ask-for-approval', command)
262
+ if ask_for_approval_supported:
263
+ cmd.extend(['--ask-for-approval', 'never'])
264
+ if skip_git_repo_check_supported is None:
265
+ skip_git_repo_check_supported = _codex_exec_supports('--skip-git-repo-check', command)
266
+ if skip_git_repo_check_supported:
267
+ cmd.append('--skip-git-repo-check')
268
+ cmd.append(prompt)
269
+ return cmd
270
+
271
+ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, attempt_kind='baseline', timeout=900) -> AttemptResult:
272
+ d=_attempt_dir(package_root, attempt_kind)
273
+ input_files=_copy_attempt_inputs(package_root, d)
274
+ deliverables=_deliverables(raw, spec)
275
+ settings=get_settings()
276
+ sandbox=settings.codex_sandbox or 'workspace-write'
277
+ codex_executable=settings.worker_agent_command or 'codex'
278
+ prompt=build_worker_prompt(spec.normalized_instruction, (package_root/'rubric'/'worker_visible_rubric.md').read_text() if (package_root/'rubric'/'worker_visible_rubric.md').exists() else '', attempt_kind, input_files, deliverables, settings.sensitive_info_masking, workspace_path=str(d))
279
+ (d/'prompt.md').write_text(prompt)
280
+ skip_supported=_codex_exec_supports('--skip-git-repo-check', codex_executable)
281
+ ask_supported=_codex_exec_supports('--ask-for-approval', codex_executable)
282
+ cmd=codex_command(
283
+ prompt,
284
+ sandbox,
285
+ d,
286
+ command=codex_executable,
287
+ skip_git_repo_check_supported=skip_supported,
288
+ ask_for_approval_supported=ask_supported,
289
+ )
290
+ run_error=None
291
+ returncode=None
292
+ stdout=''
293
+ stderr=''
294
+ try:
295
+ cp=subprocess.run(cmd, cwd=d, text=True, capture_output=True, timeout=timeout)
296
+ returncode=cp.returncode
297
+ stdout=cp.stdout or ''
298
+ stderr=cp.stderr or ''
299
+ if cp.returncode != 0 and _is_codex_workspace_trust_error(stdout, stderr) and not skip_supported:
300
+ stderr = f"{stderr}\n{CODEX_TRUST_RETRY_MESSAGE}\nCodex CLI help did not list --skip-git-repo-check, so Agent Apprenticeship could not retry safely."
301
+ elif cp.returncode != 0 and _is_codex_workspace_trust_error(stdout, stderr) and skip_supported and '--skip-git-repo-check' not in cmd:
302
+ stderr = f"{stderr}\n{CODEX_TRUST_RETRY_MESSAGE}"
303
+ retry_cmd=codex_command(
304
+ prompt,
305
+ sandbox,
306
+ d,
307
+ command=codex_executable,
308
+ skip_git_repo_check_supported=True,
309
+ ask_for_approval_supported=ask_supported,
310
+ )
311
+ cp=subprocess.run(retry_cmd, cwd=d, text=True, capture_output=True, timeout=timeout)
312
+ cmd=retry_cmd
313
+ returncode=cp.returncode
314
+ stdout=(stdout or '') + "\n" + (cp.stdout or '')
315
+ stderr=(stderr or '') + "\n" + (cp.stderr or '')
316
+ (d/'stdout.txt').write_text(redact_secrets(stdout)); (d/'stderr.txt').write_text(redact_secrets(stderr)); (d/'final_message.txt').write_text(redact_secrets((stdout or stderr or '')[-4000:]))
317
+ if cp.returncode != 0:
318
+ run_error=RuntimeError(_apprentice_operational_error(None, stdout, stderr, cp.returncode) or f'Codex exited with code {cp.returncode}.')
319
+ except Exception as e:
320
+ run_error=e
321
+ (d/'stdout.txt').write_text(''); (d/'stderr.txt').write_text(redact_secrets(str(e))); (d/'final_message.txt').write_text('Codex run failed before producing validated trace.')
322
+ contract_diagnostics = None
323
+ if not (d/'agent_trace.json').exists() or not (d/'actual_outputs.json').exists():
324
+ contract_diagnostics = build_contract_diagnostics(d, command=cmd, working_directory=d, agent_display_name='Codex', prompt=prompt)
325
+ with (d/'final_message.txt').open('a') as f:
326
+ f.write('\n\n' + diagnostics_text(contract_diagnostics))
327
+ trace, actual, trace_valid = ensure_attempt_outputs(package_root, spec, attempt_kind, prompt, sandbox, run_error)
328
+ trace.metadata_json['codex_sandbox']=sandbox
329
+ trace.metadata_json['codex_command']=[part if part != prompt else '<prompt>' for part in cmd]
330
+ trace.metadata_json['codex_skip_git_repo_check_supported']=skip_supported
331
+ trace.metadata_json['codex_skip_git_repo_check_used']='--skip-git-repo-check' in cmd
332
+ trace.metadata_json['trace_valid']=trace_valid
333
+ if contract_diagnostics:
334
+ trace.metadata_json['apprentice_agent_contract_diagnostics']=contract_diagnostics
335
+ write_json(d/'agent_trace.json', trace)
336
+ if actual.metadata_json is None: actual.metadata_json={}
337
+ actual.metadata_json['codex_sandbox']=sandbox
338
+ if contract_diagnostics:
339
+ actual.metadata_json['apprentice_agent_contract_diagnostics']=contract_diagnostics
340
+ op_error = _apprentice_operational_error(run_error, stdout, stderr, returncode)
341
+ if op_error and returncode not in (None, 0) and trace_valid and actual.status == 'success':
342
+ op_error = f"Apprentice Agent exited nonzero after producing required outputs (exit code {returncode})."
343
+ if op_error or not trace_valid:
344
+ actual.metadata_json['apprentice_agent_operational_error'] = op_error or 'Apprentice Agent did not produce a valid agent_trace.json; raw logs were preserved.'
345
+ actual.error_message = op_error or actual.error_message
346
+ trace.metadata_json['apprentice_agent_operational_error'] = actual.metadata_json['apprentice_agent_operational_error']
347
+ write_json(d/'actual_outputs.json', actual)
348
+ write_json(d/'agent_trace.json', trace)
349
+ return AttemptResult(attempt_dir=str(d), trace_valid=trace_valid, trace=trace, actual_outputs=actual, codex_sandbox=sandbox)
350
+
351
+
352
+ def run_custom_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, attempt_kind='baseline', timeout: int | None=None) -> AttemptResult:
353
+ settings=get_settings()
354
+ d=_attempt_dir(package_root, attempt_kind)
355
+ input_files=_copy_attempt_inputs(package_root, d)
356
+ deliverables=_deliverables(raw, spec)
357
+ rubric_md=(package_root/'rubric'/'worker_visible_rubric.md').read_text() if (package_root/'rubric'/'worker_visible_rubric.md').exists() else ''
358
+ prompt=build_worker_prompt(spec.normalized_instruction, rubric_md, attempt_kind, input_files, deliverables, settings.sensitive_info_masking, workspace_path=str(d))
359
+ prompt_file=d/'prompt.md'
360
+ prompt_file.write_text(prompt)
361
+ template=settings.custom_worker_command_template
362
+ if not template:
363
+ err=RuntimeError('Custom Apprentice Agent is configured without a command template.')
364
+ (d/'stdout.txt').write_text('')
365
+ (d/'stderr.txt').write_text(str(err))
366
+ (d/'final_message.txt').write_text('Custom Apprentice Agent configuration error.')
367
+ contract_diagnostics = build_contract_diagnostics(d, command='custom-agent', working_directory=d, agent_display_name=settings.custom_worker_display_name or 'Custom', prompt=prompt)
368
+ with (d/'final_message.txt').open('a') as f:
369
+ f.write('\n\n' + diagnostics_text(contract_diagnostics))
370
+ trace, actual, trace_valid=ensure_attempt_outputs(package_root, spec, attempt_kind, prompt, 'custom', err)
371
+ if actual.metadata_json is None:
372
+ actual.metadata_json = {}
373
+ actual.metadata_json['apprentice_agent_contract_diagnostics'] = contract_diagnostics
374
+ write_json(d/'actual_outputs.json', actual)
375
+ trace.metadata_json['apprentice_agent_contract_diagnostics'] = contract_diagnostics
376
+ write_json(d/'agent_trace.json', trace)
377
+ return AttemptResult(attempt_dir=str(d), trace_valid=trace_valid, trace=trace, actual_outputs=actual, custom_worker_error=str(err))
378
+ replacements={
379
+ 'workspace': str(d),
380
+ 'prompt_file': str(prompt_file),
381
+ 'run_dir': str(package_root.parent.parent),
382
+ 'task_instruction': spec.normalized_instruction,
383
+ }
384
+ command=template
385
+ for key, value in replacements.items():
386
+ command=command.replace('{'+key+'}', value)
387
+ run_error=None
388
+ cp=None
389
+ try:
390
+ cp=subprocess.run(command, cwd=d, text=True, capture_output=True, timeout=timeout or settings.task_timeout_seconds, shell=True)
391
+ stdout=redact_secrets(cp.stdout or '')
392
+ stderr=redact_secrets(cp.stderr or '')
393
+ (d/'stdout.txt').write_text(stdout)
394
+ (d/'stderr.txt').write_text(stderr)
395
+ (d/'final_message.txt').write_text(redact_secrets((cp.stdout or cp.stderr or '')[-4000:]))
396
+ if cp.returncode != 0:
397
+ run_error=RuntimeError(f'Apprentice Agent exited before producing required outputs (exit code {cp.returncode}).')
398
+ except Exception as e:
399
+ run_error=e
400
+ (d/'stdout.txt').write_text('')
401
+ (d/'stderr.txt').write_text(redact_secrets(str(e)))
402
+ (d/'final_message.txt').write_text('Custom Apprentice Agent failed before producing validated trace.')
403
+ contract_diagnostics = None
404
+ if not (d/'agent_trace.json').exists() or not (d/'actual_outputs.json').exists():
405
+ contract_diagnostics = build_contract_diagnostics(d, command=command, working_directory=d, agent_display_name=settings.custom_worker_display_name or 'Custom', prompt=prompt)
406
+ with (d/'final_message.txt').open('a') as f:
407
+ f.write('\n\n' + diagnostics_text(contract_diagnostics))
408
+ trace, actual, trace_valid=ensure_attempt_outputs(package_root, spec, attempt_kind, prompt, 'custom', run_error)
409
+ trace.metadata_json['custom_worker_display_name']=settings.custom_worker_display_name
410
+ trace.metadata_json['custom_worker_command_template']=template
411
+ trace.metadata_json['trace_valid']=trace_valid
412
+ if contract_diagnostics:
413
+ trace.metadata_json['apprentice_agent_contract_diagnostics']=contract_diagnostics
414
+ write_json(d/'agent_trace.json', trace)
415
+ if actual.metadata_json is None: actual.metadata_json={}
416
+ actual.metadata_json['apprentice_agent']='custom'
417
+ if contract_diagnostics:
418
+ actual.metadata_json['apprentice_agent_contract_diagnostics']=contract_diagnostics
419
+ if run_error:
420
+ actual.metadata_json['apprentice_agent_operational_error']=str(run_error)
421
+ write_json(d/'actual_outputs.json', actual)
422
+ return AttemptResult(
423
+ attempt_dir=str(d),
424
+ trace_valid=trace_valid,
425
+ trace=trace,
426
+ actual_outputs=actual,
427
+ custom_worker_returncode=(cp.returncode if cp else None),
428
+ )
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+
8
+ AGENT_COMMAND_CANDIDATES: dict[str, list[str]] = {
9
+ "codex": ["codex"],
10
+ "cursor": ["cursor-agent", "cursor"],
11
+ "claude-code": ["claude"],
12
+ "openclaw": ["openclaw"],
13
+ "opencode": ["opencode"],
14
+ "hermes-agent": ["hermes"],
15
+ }
16
+
17
+
18
+ def common_command_dirs() -> list[Path]:
19
+ if os.getenv("AA_DISABLE_LOCAL_ENV") == "1":
20
+ return []
21
+ home = Path.home()
22
+ candidates = [
23
+ Path("/opt/homebrew/bin"),
24
+ Path("/usr/local/bin"),
25
+ home / ".local/bin",
26
+ home / ".npm-global/bin",
27
+ home / ".bun/bin",
28
+ home / ".cargo/bin",
29
+ home / ".volta/bin",
30
+ home / ".yarn/bin",
31
+ home / "Library/pnpm",
32
+ home / ".opencode/bin",
33
+ home / ".hermes/hermes-agent",
34
+ ]
35
+ extra = [Path(p) for p in os.getenv("AA_AGENT_COMMAND_DIRS", "").split(os.pathsep) if p]
36
+ seen: set[str] = set()
37
+ dirs: list[Path] = []
38
+ for path in [*extra, *candidates]:
39
+ key = str(path.expanduser())
40
+ if key in seen:
41
+ continue
42
+ seen.add(key)
43
+ dirs.append(path.expanduser())
44
+ return dirs
45
+
46
+
47
+ def explicit_command_dirs() -> list[Path]:
48
+ if os.getenv("AA_DISABLE_LOCAL_ENV") == "1":
49
+ return []
50
+ return [Path(p).expanduser() for p in os.getenv("AA_AGENT_COMMAND_DIRS", "").split(os.pathsep) if p]
51
+
52
+
53
+ def resolve_command(command: str | None) -> str | None:
54
+ if not command:
55
+ return None
56
+ expanded = str(Path(command).expanduser()) if any(command.startswith(p) for p in ("~", ".", "/")) else command
57
+ if os.path.sep in expanded:
58
+ path = Path(expanded)
59
+ return str(path) if path.exists() and os.access(path, os.X_OK) else None
60
+ for directory in explicit_command_dirs():
61
+ candidate = directory / expanded
62
+ if candidate.exists() and os.access(candidate, os.X_OK):
63
+ return str(candidate)
64
+ found = shutil.which(expanded)
65
+ if found:
66
+ return expanded
67
+ for directory in common_command_dirs():
68
+ candidate = directory / expanded
69
+ if candidate.exists() and os.access(candidate, os.X_OK):
70
+ return str(candidate)
71
+ return None
72
+
73
+
74
+ def resolve_agent_command(agent_id: str, configured_command: str | None = None) -> tuple[str | None, str | None]:
75
+ candidates = [configured_command] if configured_command else []
76
+ candidates.extend(AGENT_COMMAND_CANDIDATES.get(agent_id, []))
77
+ for candidate in candidates:
78
+ if not candidate:
79
+ continue
80
+ resolved = resolve_command(candidate)
81
+ if resolved:
82
+ return candidate, resolved
83
+ return (candidates[0] if candidates else None), None
84
+
85
+
86
+ def gui_app_hint(agent_id: str) -> str | None:
87
+ app_paths = {
88
+ "cursor": Path("/Applications/Cursor.app"),
89
+ "claude-code": Path("/Applications/Claude.app"),
90
+ }
91
+ path = app_paths.get(agent_id)
92
+ if path and path.exists():
93
+ return f"{path} is installed, but the headless CLI command was not found."
94
+ return None