agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
|
@@ -0,0 +1,1109 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import shlex
|
|
5
|
+
import shutil
|
|
6
|
+
import os
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from .bundle_exporter import create_contribution_bundle
|
|
11
|
+
from .config import Settings, apprentice_agent_display_name, apprentice_agent_readiness_status, get_settings
|
|
12
|
+
from .io import read_json, write_json
|
|
13
|
+
from .loop import run_task
|
|
14
|
+
from .mentor_checkpoints import write_mentor_checkpoints
|
|
15
|
+
from .progress import ProgressCallback, append_progress_event, update_run_status
|
|
16
|
+
from .recipes import WORKER_AGENT_RECIPES
|
|
17
|
+
from .schemas import RawTaskRecord
|
|
18
|
+
from .session_events import append_session_event, backfill_session_event_task_ids, next_followup_index
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def slugify(text: str, fallback: str = "task") -> str:
|
|
22
|
+
value = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
|
|
23
|
+
return (value[:64].strip("-") or fallback)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def make_run_id(instruction: str) -> str:
|
|
27
|
+
stamp = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M%S")
|
|
28
|
+
return f"{stamp}-{slugify(instruction)}"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def run_root_for(run_id: str, settings: Settings | None = None) -> Path:
|
|
32
|
+
settings = settings or get_settings()
|
|
33
|
+
candidate = Path(run_id).expanduser()
|
|
34
|
+
if candidate.exists() or candidate.is_absolute() or "/" in run_id:
|
|
35
|
+
return candidate
|
|
36
|
+
return settings.app_home / "runs" / run_id
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _unique_target(dest_dir: Path, name: str) -> Path:
|
|
40
|
+
target = dest_dir / name
|
|
41
|
+
if not target.exists():
|
|
42
|
+
return target
|
|
43
|
+
stem = target.stem
|
|
44
|
+
suffix = target.suffix
|
|
45
|
+
for i in range(2, 1000):
|
|
46
|
+
candidate = dest_dir / f"{stem}-{i}{suffix}"
|
|
47
|
+
if not candidate.exists():
|
|
48
|
+
return candidate
|
|
49
|
+
raise RuntimeError(f"Could not create unique asset target for {name}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def copy_assets(assets: list[Path] | None, dest_dir: Path, rel_prefix: str) -> list[str]:
|
|
53
|
+
copied: list[str] = []
|
|
54
|
+
if not assets:
|
|
55
|
+
return copied
|
|
56
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
for raw_path in assets:
|
|
58
|
+
src = Path(raw_path).expanduser()
|
|
59
|
+
if not src.exists():
|
|
60
|
+
raise FileNotFoundError(f"Asset does not exist: {src}")
|
|
61
|
+
target = _unique_target(dest_dir, src.name)
|
|
62
|
+
if src.is_dir():
|
|
63
|
+
shutil.copytree(src, target)
|
|
64
|
+
else:
|
|
65
|
+
shutil.copy2(src, target)
|
|
66
|
+
copied.append(f"{rel_prefix.rstrip('/')}/{target.name}")
|
|
67
|
+
return copied
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _asset_abs_refs(run_root: Path, rel_refs: list[str]) -> list[str]:
|
|
71
|
+
refs = []
|
|
72
|
+
for rel in rel_refs:
|
|
73
|
+
p = run_root / rel
|
|
74
|
+
refs.append(str(p))
|
|
75
|
+
return refs
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def prompt_to_raw_task(run_id: str, instruction: str, asset_refs: list[str] | None = None) -> RawTaskRecord:
|
|
79
|
+
title = instruction.strip().splitlines()[0][:90] or "Agent Apprenticeship task"
|
|
80
|
+
return RawTaskRecord(
|
|
81
|
+
raw_task_id=f"raw_{slugify(run_id)}",
|
|
82
|
+
source_kind="user_prompt",
|
|
83
|
+
raw_title=title,
|
|
84
|
+
raw_description=instruction,
|
|
85
|
+
raw_payload={
|
|
86
|
+
"expected_deliverable": "Completed task deliverables under artifacts/.",
|
|
87
|
+
"input_requirements": [Path(ref).name for ref in asset_refs or []],
|
|
88
|
+
"output_requirements": ["final deliverable"],
|
|
89
|
+
"privacy_classification": "unknown",
|
|
90
|
+
"task_entrypoint": "direct_prompt",
|
|
91
|
+
},
|
|
92
|
+
input_artifact_refs=asset_refs or [],
|
|
93
|
+
task_id=f"task_{slugify(run_id)}",
|
|
94
|
+
normalized_title=title,
|
|
95
|
+
normalized_instruction=instruction,
|
|
96
|
+
expected_deliverable="Completed task deliverables under artifacts/.",
|
|
97
|
+
metadata_json={"created_by": "agent-apprenticeship-run"},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def task_id_for_run_id(run_id: str) -> str:
|
|
102
|
+
return f"task_{slugify(run_id)}"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _worker_display(settings: Settings) -> str:
|
|
106
|
+
return settings.custom_worker_display_name or settings.worker_agent.replace("-", " ").title()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def apprentice_agent_display(settings: Settings) -> str:
|
|
110
|
+
return apprentice_agent_display_name(settings)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _configured_apprentice_command(settings: Settings, override: str | None = None) -> str | None:
|
|
114
|
+
if override == "deterministic":
|
|
115
|
+
return None
|
|
116
|
+
if settings.worker_agent == "custom":
|
|
117
|
+
template = settings.custom_worker_command_template or ""
|
|
118
|
+
try:
|
|
119
|
+
return shlex.split(template)[0] if template else settings.worker_agent_command
|
|
120
|
+
except ValueError:
|
|
121
|
+
return template.split()[0] if template.split() else settings.worker_agent_command
|
|
122
|
+
recipe = WORKER_AGENT_RECIPES.get(settings.worker_agent)
|
|
123
|
+
return settings.worker_agent_command or (recipe.command_name if recipe else settings.worker_agent)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def apprentice_agent_readiness(settings: Settings, override: str | None = None) -> tuple[bool, str | None]:
|
|
127
|
+
if override == "deterministic":
|
|
128
|
+
return True, None
|
|
129
|
+
status = apprentice_agent_readiness_status(settings)
|
|
130
|
+
if status["status"] == "ready":
|
|
131
|
+
return True, None
|
|
132
|
+
return False, status.get("reason") or str(status["status"])
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _human_checkpoint_mode(settings: Settings) -> bool:
|
|
136
|
+
return settings.mentor_mode in {"expert_led", "hybrid"}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _checkpoint_auto_approve(settings: Settings) -> bool:
|
|
140
|
+
return (
|
|
141
|
+
(settings.mentor_mode == "expert_led" and os.getenv("AA_EXPERT_AUTO_APPROVE") == "1")
|
|
142
|
+
or (settings.mentor_mode == "hybrid" and os.getenv("AA_HYBRID_AUTO_APPROVE") == "1")
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _checkpoint_review_labels(settings: Settings, revision_requested: bool | None = None) -> tuple[str, str]:
|
|
147
|
+
if settings.mentor_mode == "hybrid":
|
|
148
|
+
pending = "Hybrid model draft complete - human approval pending"
|
|
149
|
+
if revision_requested is None:
|
|
150
|
+
return pending, "Hybrid human approval complete"
|
|
151
|
+
return pending, "Hybrid human approval complete - revision requested" if revision_requested else "Hybrid human approval complete - finish selected"
|
|
152
|
+
pending = "Expert review pending"
|
|
153
|
+
if revision_requested is None:
|
|
154
|
+
return pending, "Expert review complete"
|
|
155
|
+
return pending, "Expert review complete - revision requested" if revision_requested else "Expert review complete - finish selected"
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _pre_attempt_checkpoint_callback(
|
|
159
|
+
run_root: Path,
|
|
160
|
+
settings: Settings,
|
|
161
|
+
*,
|
|
162
|
+
progress_callback: ProgressCallback | None,
|
|
163
|
+
followup_index: int | None = None,
|
|
164
|
+
):
|
|
165
|
+
if not _human_checkpoint_mode(settings):
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def _callback(_pkg: Path) -> None:
|
|
169
|
+
write_mentor_checkpoints(
|
|
170
|
+
run_root,
|
|
171
|
+
settings,
|
|
172
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
173
|
+
stages=("task_intake", "rubric"),
|
|
174
|
+
preserve_interactive=followup_index is None,
|
|
175
|
+
)
|
|
176
|
+
append_progress_event(
|
|
177
|
+
run_root,
|
|
178
|
+
"apprentice_attempt_started",
|
|
179
|
+
run_id=run_root.name,
|
|
180
|
+
message=(f"Follow-up {followup_index} Apprentice attempt started" if followup_index else "Apprentice attempt started"),
|
|
181
|
+
current_loop=1,
|
|
182
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
183
|
+
phase="apprentice_attempt",
|
|
184
|
+
metadata_json={"followup_index": followup_index} if followup_index else None,
|
|
185
|
+
callback=progress_callback,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return _callback
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _revision_decision_callback(
|
|
192
|
+
run_root: Path,
|
|
193
|
+
settings: Settings,
|
|
194
|
+
*,
|
|
195
|
+
progress_callback: ProgressCallback | None,
|
|
196
|
+
followup_index: int | None = None,
|
|
197
|
+
):
|
|
198
|
+
if not _human_checkpoint_mode(settings):
|
|
199
|
+
return None, {}
|
|
200
|
+
state: dict[str, bool] = {}
|
|
201
|
+
|
|
202
|
+
def _callback(pkg: Path) -> bool:
|
|
203
|
+
status, _reason = _session_status_for_package(pkg)
|
|
204
|
+
traced_steps, artifact_count, artifacts_path, operational_error = _package_progress_summary(run_root, pkg)
|
|
205
|
+
append_progress_event(
|
|
206
|
+
run_root,
|
|
207
|
+
"apprentice_attempt_completed",
|
|
208
|
+
run_id=run_root.name,
|
|
209
|
+
message=("Apprentice attempt failed - operational error" if operational_error else ("Follow-up %s Apprentice attempt complete" % followup_index if followup_index else "Apprentice attempt complete")),
|
|
210
|
+
current_loop=1,
|
|
211
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
212
|
+
phase="apprentice_attempt_complete",
|
|
213
|
+
traced_steps=traced_steps,
|
|
214
|
+
artifact_count=artifact_count,
|
|
215
|
+
artifacts_path=artifacts_path,
|
|
216
|
+
operational_error=operational_error,
|
|
217
|
+
metadata_json={"followup_index": followup_index} if followup_index else None,
|
|
218
|
+
callback=progress_callback,
|
|
219
|
+
)
|
|
220
|
+
state["apprentice_completed_emitted"] = True
|
|
221
|
+
if operational_error:
|
|
222
|
+
return False
|
|
223
|
+
pending, _ = _checkpoint_review_labels(settings)
|
|
224
|
+
append_progress_event(
|
|
225
|
+
run_root,
|
|
226
|
+
"mentor_review_started",
|
|
227
|
+
run_id=run_root.name,
|
|
228
|
+
message=pending,
|
|
229
|
+
current_loop=1,
|
|
230
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
231
|
+
phase="expert_review" if settings.mentor_mode == "expert_led" else "hybrid_human_approval",
|
|
232
|
+
metadata_json={"followup_index": followup_index} if followup_index else None,
|
|
233
|
+
callback=progress_callback,
|
|
234
|
+
)
|
|
235
|
+
update_run_status(
|
|
236
|
+
run_root,
|
|
237
|
+
run_status=status,
|
|
238
|
+
task_status=status,
|
|
239
|
+
current_phase="expert_review_pending" if settings.mentor_mode == "expert_led" else "hybrid_human_approval_pending",
|
|
240
|
+
latest_message=pending,
|
|
241
|
+
traced_steps=traced_steps,
|
|
242
|
+
artifact_count=artifact_count,
|
|
243
|
+
artifacts_path=str(artifacts_path),
|
|
244
|
+
)
|
|
245
|
+
write_mentor_checkpoints(
|
|
246
|
+
run_root,
|
|
247
|
+
settings,
|
|
248
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
249
|
+
stages=("evaluation", "revision"),
|
|
250
|
+
preserve_interactive=followup_index is None,
|
|
251
|
+
)
|
|
252
|
+
revision_path = run_root / "mentor_checkpoints" / "revision_checkpoint.json"
|
|
253
|
+
revision = read_json(revision_path) if revision_path.exists() else {}
|
|
254
|
+
revision_requested = bool(revision.get("revision_should_run")) and settings.max_improvement_loops > 1
|
|
255
|
+
_, complete = _checkpoint_review_labels(settings, revision_requested)
|
|
256
|
+
append_progress_event(
|
|
257
|
+
run_root,
|
|
258
|
+
"mentor_review_completed",
|
|
259
|
+
run_id=run_root.name,
|
|
260
|
+
message=complete,
|
|
261
|
+
current_loop=1,
|
|
262
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
263
|
+
phase="expert_review_complete" if settings.mentor_mode == "expert_led" else "hybrid_human_approval_complete",
|
|
264
|
+
task_status=status,
|
|
265
|
+
traced_steps=traced_steps,
|
|
266
|
+
artifact_count=artifact_count,
|
|
267
|
+
artifacts_path=artifacts_path,
|
|
268
|
+
metadata_json={"followup_index": followup_index} if followup_index else None,
|
|
269
|
+
callback=progress_callback,
|
|
270
|
+
)
|
|
271
|
+
if revision_requested:
|
|
272
|
+
append_progress_event(
|
|
273
|
+
run_root,
|
|
274
|
+
"revision_started",
|
|
275
|
+
run_id=run_root.name,
|
|
276
|
+
message=("Follow-up %s revision attempt started" % followup_index if followup_index else "Revision attempt started"),
|
|
277
|
+
current_loop=2,
|
|
278
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
279
|
+
phase="revision_attempt",
|
|
280
|
+
metadata_json={"followup_index": followup_index} if followup_index else None,
|
|
281
|
+
callback=progress_callback,
|
|
282
|
+
)
|
|
283
|
+
state["revision_started_emitted"] = True
|
|
284
|
+
return revision_requested
|
|
285
|
+
|
|
286
|
+
return _callback, state
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def runner_for_settings(settings: Settings, override: str | None = None) -> str:
|
|
290
|
+
if override:
|
|
291
|
+
return override
|
|
292
|
+
if settings.worker_agent == "custom":
|
|
293
|
+
return "custom"
|
|
294
|
+
if settings.worker_runner == "deterministic":
|
|
295
|
+
return "deterministic"
|
|
296
|
+
if settings.worker_agent == "codex":
|
|
297
|
+
return "codex"
|
|
298
|
+
if settings.worker_agent in WORKER_AGENT_RECIPES:
|
|
299
|
+
return settings.worker_agent
|
|
300
|
+
raise RuntimeError(f"Unsupported Apprentice Agent: {settings.worker_agent}")
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _session_status_for_package(pkg: Path) -> tuple[str, str | None]:
|
|
304
|
+
actual_paths = sorted((pkg / "attempts").glob("*/actual_outputs.json"))
|
|
305
|
+
if not actual_paths:
|
|
306
|
+
return "partial", "No attempt outputs were found."
|
|
307
|
+
statuses = []
|
|
308
|
+
operational_errors = []
|
|
309
|
+
for path in actual_paths:
|
|
310
|
+
try:
|
|
311
|
+
data = read_json(path)
|
|
312
|
+
except Exception:
|
|
313
|
+
return "partial", f"Could not read {path.relative_to(pkg)}."
|
|
314
|
+
metadata = data.get("metadata_json") or {}
|
|
315
|
+
op_error = metadata.get("apprentice_agent_operational_error") or metadata.get("worker_agent_operational_error")
|
|
316
|
+
if op_error:
|
|
317
|
+
operational_errors.append(str(op_error))
|
|
318
|
+
statuses.append(str(data.get("status") or "failed"))
|
|
319
|
+
if operational_errors:
|
|
320
|
+
return "failed", operational_errors[0]
|
|
321
|
+
if all(status == "success" for status in statuses):
|
|
322
|
+
return "completed", None
|
|
323
|
+
if all(status in {"failed", "timeout", "error"} for status in statuses):
|
|
324
|
+
return "failed", "All recorded attempts failed."
|
|
325
|
+
return "partial", "One or more attempts were partial or failed."
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _sync_run_artifacts(run_root: Path, pkg: Path) -> Path:
|
|
329
|
+
public_artifacts = run_root / "artifacts"
|
|
330
|
+
public_artifacts.mkdir(parents=True, exist_ok=True)
|
|
331
|
+
for existing in public_artifacts.iterdir():
|
|
332
|
+
if existing.is_dir():
|
|
333
|
+
shutil.rmtree(existing)
|
|
334
|
+
else:
|
|
335
|
+
existing.unlink()
|
|
336
|
+
manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
|
|
337
|
+
selected = str(manifest.get("selected_attempt_id") or "")
|
|
338
|
+
selected_kind = "revised" if selected.endswith("_revised") else "baseline"
|
|
339
|
+
candidates = [pkg / "attempts" / selected_kind / "artifacts"]
|
|
340
|
+
candidates.extend(sorted(pkg.glob("attempts/*/artifacts"), reverse=True))
|
|
341
|
+
source = next((path for path in candidates if path.exists() and any(path.rglob("*"))), None)
|
|
342
|
+
if not source:
|
|
343
|
+
return public_artifacts
|
|
344
|
+
for item in source.rglob("*"):
|
|
345
|
+
if not item.is_file():
|
|
346
|
+
continue
|
|
347
|
+
rel = item.relative_to(source)
|
|
348
|
+
target = public_artifacts / rel
|
|
349
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
350
|
+
shutil.copy2(item, target)
|
|
351
|
+
return public_artifacts
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _package_progress_summary(run_root: Path, pkg: Path) -> tuple[int, int, Path, str | None]:
|
|
355
|
+
traced_steps = 0
|
|
356
|
+
for trace_path in pkg.glob("attempts/*/agent_trace.json"):
|
|
357
|
+
try:
|
|
358
|
+
traced_steps += len(read_json(trace_path).get("steps") or [])
|
|
359
|
+
except Exception:
|
|
360
|
+
pass
|
|
361
|
+
artifact_count = 0
|
|
362
|
+
for art_dir in pkg.glob("attempts/*/artifacts"):
|
|
363
|
+
if art_dir.exists():
|
|
364
|
+
files = [p for p in art_dir.rglob("*") if p.is_file()]
|
|
365
|
+
artifact_count += len(files)
|
|
366
|
+
artifacts_path = _sync_run_artifacts(run_root, pkg)
|
|
367
|
+
operational_error = None
|
|
368
|
+
for actual_path in sorted((pkg / "attempts").glob("*/actual_outputs.json")):
|
|
369
|
+
try:
|
|
370
|
+
actual = read_json(actual_path)
|
|
371
|
+
except Exception:
|
|
372
|
+
continue
|
|
373
|
+
metadata = actual.get("metadata_json") or {}
|
|
374
|
+
operational_error = metadata.get("apprentice_agent_operational_error") or metadata.get("worker_agent_operational_error")
|
|
375
|
+
if operational_error:
|
|
376
|
+
break
|
|
377
|
+
if actual.get("status") in {"failed", "timeout", "error"}:
|
|
378
|
+
operational_error = actual.get("error_message") or actual.get("error_type")
|
|
379
|
+
break
|
|
380
|
+
return traced_steps, artifact_count, artifacts_path, operational_error
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def run_prompt_task(
|
|
384
|
+
instruction: str,
|
|
385
|
+
assets: list[Path] | None = None,
|
|
386
|
+
run_id: str | None = None,
|
|
387
|
+
settings: Settings | None = None,
|
|
388
|
+
runner: str | None = None,
|
|
389
|
+
create_bundle: bool = True,
|
|
390
|
+
progress_callback: ProgressCallback | None = None,
|
|
391
|
+
experience_pack_refs: list[dict] | None = None,
|
|
392
|
+
) -> tuple[Path, Path | None]:
|
|
393
|
+
settings = settings or get_settings()
|
|
394
|
+
run_id = run_id or make_run_id(instruction)
|
|
395
|
+
run_root = run_root_for(run_id, settings)
|
|
396
|
+
run_root.mkdir(parents=True, exist_ok=True)
|
|
397
|
+
public_artifacts = run_root / "artifacts"
|
|
398
|
+
public_artifacts.mkdir(parents=True, exist_ok=True)
|
|
399
|
+
title = instruction.strip().splitlines()[0][:90] or "Agent Apprenticeship task"
|
|
400
|
+
update_run_status(
|
|
401
|
+
run_root,
|
|
402
|
+
run_id=run_id,
|
|
403
|
+
run_status="running",
|
|
404
|
+
task_status="running",
|
|
405
|
+
current_phase="starting",
|
|
406
|
+
current_loop=1,
|
|
407
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
408
|
+
latest_message="Agent Apprenticeship run started.",
|
|
409
|
+
apprentice_agent=apprentice_agent_display(settings),
|
|
410
|
+
mentor_mode=settings.mentor_mode,
|
|
411
|
+
task_title=title,
|
|
412
|
+
task_workspace_path=str(run_root),
|
|
413
|
+
artifacts_path=str(public_artifacts),
|
|
414
|
+
**_experience_session_fields(experience_pack_refs),
|
|
415
|
+
)
|
|
416
|
+
append_progress_event(
|
|
417
|
+
run_root,
|
|
418
|
+
"run_started",
|
|
419
|
+
run_id=run_id,
|
|
420
|
+
message="Agent Apprenticeship run started",
|
|
421
|
+
current_loop=1,
|
|
422
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
423
|
+
phase="starting",
|
|
424
|
+
run_status="running",
|
|
425
|
+
task_status="running",
|
|
426
|
+
callback=progress_callback,
|
|
427
|
+
)
|
|
428
|
+
(run_root / "task").mkdir(exist_ok=True)
|
|
429
|
+
append_progress_event(
|
|
430
|
+
run_root,
|
|
431
|
+
"task_workspace_prepared",
|
|
432
|
+
run_id=run_id,
|
|
433
|
+
message="Preparing task workspace",
|
|
434
|
+
current_loop=1,
|
|
435
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
436
|
+
phase="preparing_workspace",
|
|
437
|
+
callback=progress_callback,
|
|
438
|
+
)
|
|
439
|
+
(run_root / "task" / "task_instruction.md").write_text(instruction.rstrip() + "\n")
|
|
440
|
+
task_id = task_id_for_run_id(run_id)
|
|
441
|
+
|
|
442
|
+
try:
|
|
443
|
+
asset_refs = copy_assets(
|
|
444
|
+
assets,
|
|
445
|
+
run_root / "task" / "task_instruction_assets",
|
|
446
|
+
"task/task_instruction_assets",
|
|
447
|
+
)
|
|
448
|
+
except Exception as exc:
|
|
449
|
+
append_progress_event(
|
|
450
|
+
run_root,
|
|
451
|
+
"operational_error",
|
|
452
|
+
run_id=run_id,
|
|
453
|
+
message="File-copy failure while preparing task assets",
|
|
454
|
+
current_loop=1,
|
|
455
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
456
|
+
phase="asset_copy_failed",
|
|
457
|
+
run_status="failed",
|
|
458
|
+
task_status="failed",
|
|
459
|
+
operational_error=str(exc),
|
|
460
|
+
callback=progress_callback,
|
|
461
|
+
)
|
|
462
|
+
raise
|
|
463
|
+
append_session_event(
|
|
464
|
+
run_root,
|
|
465
|
+
event_type="task_instruction",
|
|
466
|
+
run_id=run_id,
|
|
467
|
+
task_id=task_id,
|
|
468
|
+
session_id=run_id,
|
|
469
|
+
instruction=instruction,
|
|
470
|
+
)
|
|
471
|
+
if asset_refs:
|
|
472
|
+
append_session_event(
|
|
473
|
+
run_root,
|
|
474
|
+
event_type="task_assets_added",
|
|
475
|
+
run_id=run_id,
|
|
476
|
+
task_id=task_id,
|
|
477
|
+
session_id=run_id,
|
|
478
|
+
assets=asset_refs,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
raw = prompt_to_raw_task(run_id, instruction, _asset_abs_refs(run_root, asset_refs))
|
|
482
|
+
write_json(
|
|
483
|
+
run_root / "session.json",
|
|
484
|
+
{
|
|
485
|
+
"run_id": run_id,
|
|
486
|
+
"session_id": run_id,
|
|
487
|
+
"run_status": "started",
|
|
488
|
+
"task_id": raw.task_id,
|
|
489
|
+
"task_instruction": instruction,
|
|
490
|
+
"task_assets": asset_refs,
|
|
491
|
+
"mentor_mode": settings.mentor_mode,
|
|
492
|
+
"sensitive_info_masking": settings.sensitive_info_masking,
|
|
493
|
+
"max_improvement_loops": settings.max_improvement_loops,
|
|
494
|
+
"apprentice_agent": apprentice_agent_display(settings),
|
|
495
|
+
"model_provider": settings.model_provider,
|
|
496
|
+
**_experience_session_fields(experience_pack_refs),
|
|
497
|
+
},
|
|
498
|
+
)
|
|
499
|
+
if not _human_checkpoint_mode(settings):
|
|
500
|
+
append_progress_event(
|
|
501
|
+
run_root,
|
|
502
|
+
"apprentice_attempt_started",
|
|
503
|
+
run_id=run_id,
|
|
504
|
+
message="Apprentice attempt started",
|
|
505
|
+
current_loop=1,
|
|
506
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
507
|
+
phase="apprentice_attempt",
|
|
508
|
+
callback=progress_callback,
|
|
509
|
+
)
|
|
510
|
+
revision_decider, checkpoint_state = _revision_decision_callback(
|
|
511
|
+
run_root,
|
|
512
|
+
settings,
|
|
513
|
+
progress_callback=progress_callback,
|
|
514
|
+
)
|
|
515
|
+
pkg = run_task(
|
|
516
|
+
raw,
|
|
517
|
+
run_root,
|
|
518
|
+
runner=runner_for_settings(settings, runner),
|
|
519
|
+
max_iterations=settings.max_improvement_loops,
|
|
520
|
+
pre_attempt_callback=_pre_attempt_checkpoint_callback(
|
|
521
|
+
run_root,
|
|
522
|
+
settings,
|
|
523
|
+
progress_callback=progress_callback,
|
|
524
|
+
),
|
|
525
|
+
revision_decision_callback=revision_decider,
|
|
526
|
+
)
|
|
527
|
+
manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
|
|
528
|
+
run_status, partial_reason = _session_status_for_package(pkg)
|
|
529
|
+
actual_iterations = int(manifest.get("actual_iterations") or 1)
|
|
530
|
+
traced_steps, artifact_count, artifacts_path, operational_error = _package_progress_summary(run_root, pkg)
|
|
531
|
+
if operational_error or not checkpoint_state.get("apprentice_completed_emitted"):
|
|
532
|
+
append_progress_event(
|
|
533
|
+
run_root,
|
|
534
|
+
"apprentice_attempt_completed",
|
|
535
|
+
run_id=run_id,
|
|
536
|
+
message=("Apprentice attempt failed - operational error" if operational_error else "Apprentice attempt complete"),
|
|
537
|
+
current_loop=1,
|
|
538
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
539
|
+
phase="apprentice_attempt_complete",
|
|
540
|
+
traced_steps=traced_steps,
|
|
541
|
+
artifact_count=artifact_count,
|
|
542
|
+
artifacts_path=artifacts_path,
|
|
543
|
+
operational_error=operational_error,
|
|
544
|
+
callback=progress_callback,
|
|
545
|
+
)
|
|
546
|
+
if operational_error:
|
|
547
|
+
append_progress_event(
|
|
548
|
+
run_root,
|
|
549
|
+
"operational_error",
|
|
550
|
+
run_id=run_id,
|
|
551
|
+
message="Apprentice Agent operational error",
|
|
552
|
+
current_loop=1,
|
|
553
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
554
|
+
phase="operational_error",
|
|
555
|
+
run_status=run_status,
|
|
556
|
+
task_status=run_status,
|
|
557
|
+
operational_error=operational_error,
|
|
558
|
+
artifacts_path=artifacts_path,
|
|
559
|
+
callback=progress_callback,
|
|
560
|
+
)
|
|
561
|
+
should_run_mentor_review = not (operational_error and run_status == "failed")
|
|
562
|
+
if should_run_mentor_review and actual_iterations > 1:
|
|
563
|
+
if not checkpoint_state.get("revision_started_emitted"):
|
|
564
|
+
append_progress_event(
|
|
565
|
+
run_root,
|
|
566
|
+
"revision_started",
|
|
567
|
+
run_id=run_id,
|
|
568
|
+
message="Revision attempt started",
|
|
569
|
+
current_loop=2,
|
|
570
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
571
|
+
phase="revision_attempt",
|
|
572
|
+
callback=progress_callback,
|
|
573
|
+
)
|
|
574
|
+
append_progress_event(
|
|
575
|
+
run_root,
|
|
576
|
+
"revision_completed",
|
|
577
|
+
run_id=run_id,
|
|
578
|
+
message="Revision attempt complete",
|
|
579
|
+
current_loop=2,
|
|
580
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
581
|
+
phase="revision_attempt_complete",
|
|
582
|
+
traced_steps=traced_steps,
|
|
583
|
+
artifact_count=artifact_count,
|
|
584
|
+
artifacts_path=artifacts_path,
|
|
585
|
+
callback=progress_callback,
|
|
586
|
+
)
|
|
587
|
+
if should_run_mentor_review and not _human_checkpoint_mode(settings):
|
|
588
|
+
append_progress_event(
|
|
589
|
+
run_root,
|
|
590
|
+
"mentor_review_started",
|
|
591
|
+
run_id=run_id,
|
|
592
|
+
message="Mentor review started",
|
|
593
|
+
current_loop=actual_iterations,
|
|
594
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
595
|
+
phase="mentor_review",
|
|
596
|
+
callback=progress_callback,
|
|
597
|
+
)
|
|
598
|
+
append_progress_event(
|
|
599
|
+
run_root,
|
|
600
|
+
"mentor_review_completed",
|
|
601
|
+
run_id=run_id,
|
|
602
|
+
message=f"Mentor review complete - task {run_status}",
|
|
603
|
+
current_loop=actual_iterations,
|
|
604
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
605
|
+
phase="mentor_review_complete",
|
|
606
|
+
task_status=run_status,
|
|
607
|
+
traced_steps=traced_steps,
|
|
608
|
+
artifact_count=artifact_count,
|
|
609
|
+
artifacts_path=artifacts_path,
|
|
610
|
+
callback=progress_callback,
|
|
611
|
+
)
|
|
612
|
+
if should_run_mentor_review and _human_checkpoint_mode(settings):
|
|
613
|
+
write_mentor_checkpoints(
|
|
614
|
+
run_root,
|
|
615
|
+
settings,
|
|
616
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
617
|
+
stages=("final_approval",),
|
|
618
|
+
)
|
|
619
|
+
append_session_event(
|
|
620
|
+
run_root,
|
|
621
|
+
event_type="agent_attempt",
|
|
622
|
+
run_id=run_id,
|
|
623
|
+
task_id=pkg.name,
|
|
624
|
+
session_id=run_id,
|
|
625
|
+
attempt_id=manifest.get("selected_attempt_id"),
|
|
626
|
+
metadata_json={"package_path": str(pkg.relative_to(run_root))},
|
|
627
|
+
)
|
|
628
|
+
backfill_session_event_task_ids(run_root, pkg.name)
|
|
629
|
+
session_data = {
|
|
630
|
+
"run_id": run_id,
|
|
631
|
+
"session_id": run_id,
|
|
632
|
+
"run_status": run_status,
|
|
633
|
+
"task_status": run_status,
|
|
634
|
+
"task_id": pkg.name,
|
|
635
|
+
"task_instruction": instruction,
|
|
636
|
+
"task_assets": asset_refs,
|
|
637
|
+
"latest_package": str(pkg.relative_to(run_root)),
|
|
638
|
+
"latest_attempt_id": manifest.get("selected_attempt_id"),
|
|
639
|
+
"mentor_mode": settings.mentor_mode,
|
|
640
|
+
"sensitive_info_masking": settings.sensitive_info_masking,
|
|
641
|
+
"max_improvement_loops": settings.max_improvement_loops,
|
|
642
|
+
"apprentice_agent": apprentice_agent_display(settings),
|
|
643
|
+
"model_provider": settings.model_provider,
|
|
644
|
+
**_experience_session_fields(experience_pack_refs),
|
|
645
|
+
}
|
|
646
|
+
if partial_reason:
|
|
647
|
+
session_data["status_reason"] = partial_reason
|
|
648
|
+
write_json(
|
|
649
|
+
run_root / "session.json",
|
|
650
|
+
session_data,
|
|
651
|
+
)
|
|
652
|
+
bundle = None
|
|
653
|
+
if create_bundle:
|
|
654
|
+
if _human_checkpoint_mode(settings) and not should_run_mentor_review:
|
|
655
|
+
write_mentor_checkpoints(
|
|
656
|
+
run_root,
|
|
657
|
+
settings,
|
|
658
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
659
|
+
stages=("final_approval",),
|
|
660
|
+
)
|
|
661
|
+
append_progress_event(
|
|
662
|
+
run_root,
|
|
663
|
+
"contribution_bundle_started",
|
|
664
|
+
run_id=run_id,
|
|
665
|
+
message="Contribution Bundle packaging started",
|
|
666
|
+
current_loop=actual_iterations,
|
|
667
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
668
|
+
phase="contribution_bundle",
|
|
669
|
+
callback=progress_callback,
|
|
670
|
+
)
|
|
671
|
+
bundle = create_contribution_bundle(run_root, settings=settings)
|
|
672
|
+
_update_bundle_experience_metadata(bundle, experience_pack_refs)
|
|
673
|
+
append_progress_event(
|
|
674
|
+
run_root,
|
|
675
|
+
"contribution_bundle_completed",
|
|
676
|
+
run_id=run_id,
|
|
677
|
+
message="Contribution Bundle ready",
|
|
678
|
+
current_loop=actual_iterations,
|
|
679
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
680
|
+
phase="contribution_bundle_complete",
|
|
681
|
+
contribution_bundle_path=bundle,
|
|
682
|
+
callback=progress_callback,
|
|
683
|
+
)
|
|
684
|
+
append_progress_event(
|
|
685
|
+
run_root,
|
|
686
|
+
"run_completed",
|
|
687
|
+
run_id=run_id,
|
|
688
|
+
message=f"Task {run_status}.",
|
|
689
|
+
current_loop=actual_iterations,
|
|
690
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
691
|
+
phase="completed" if run_status == "completed" else run_status,
|
|
692
|
+
run_status=run_status,
|
|
693
|
+
task_status=run_status,
|
|
694
|
+
traced_steps=traced_steps,
|
|
695
|
+
artifact_count=artifact_count,
|
|
696
|
+
artifacts_path=artifacts_path,
|
|
697
|
+
contribution_bundle_path=bundle,
|
|
698
|
+
operational_error=operational_error,
|
|
699
|
+
callback=progress_callback,
|
|
700
|
+
)
|
|
701
|
+
return run_root, bundle
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _experience_session_fields(experience_pack_refs: list[dict] | None) -> dict:
|
|
705
|
+
refs = experience_pack_refs or []
|
|
706
|
+
if not refs:
|
|
707
|
+
return {}
|
|
708
|
+
return {
|
|
709
|
+
"experience_pack_ids": [ref.get("pack_id") for ref in refs if ref.get("pack_id")],
|
|
710
|
+
"experience_pack_titles": [ref.get("title") for ref in refs if ref.get("title")],
|
|
711
|
+
"experience_pack_sources": [
|
|
712
|
+
source
|
|
713
|
+
for ref in refs
|
|
714
|
+
for source in (ref.get("source_refs") or [])
|
|
715
|
+
],
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def _update_json_experience_metadata(path: Path, experience_pack_refs: list[dict] | None) -> None:
|
|
720
|
+
if not experience_pack_refs or not path.exists():
|
|
721
|
+
return
|
|
722
|
+
data = read_json(path)
|
|
723
|
+
if not isinstance(data, dict):
|
|
724
|
+
return
|
|
725
|
+
data.update(_experience_session_fields(experience_pack_refs))
|
|
726
|
+
data["experience_learning_status"] = "experience_pack_applied"
|
|
727
|
+
write_json(path, data)
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def _update_bundle_experience_metadata(bundle: Path | None, experience_pack_refs: list[dict] | None) -> None:
|
|
731
|
+
if not bundle or not experience_pack_refs:
|
|
732
|
+
return
|
|
733
|
+
for rel in ("contribution_manifest.json", "session_metadata.json"):
|
|
734
|
+
_update_json_experience_metadata(bundle / rel, experience_pack_refs)
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def _latest_attempt_id(run_root: Path) -> str | None:
|
|
738
|
+
packages = sorted((run_root / "packages").glob("*")) if (run_root / "packages").exists() else []
|
|
739
|
+
for pkg in reversed(packages):
|
|
740
|
+
manifest = pkg / "package_manifest.json"
|
|
741
|
+
if manifest.exists():
|
|
742
|
+
data = read_json(manifest)
|
|
743
|
+
if data.get("selected_attempt_id"):
|
|
744
|
+
return str(data["selected_attempt_id"])
|
|
745
|
+
return None
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def _session_task_id(run_root: Path) -> str | None:
|
|
749
|
+
session_path = run_root / "session.json"
|
|
750
|
+
if session_path.exists():
|
|
751
|
+
try:
|
|
752
|
+
data = read_json(session_path)
|
|
753
|
+
if data.get("task_id"):
|
|
754
|
+
return str(data["task_id"])
|
|
755
|
+
except Exception:
|
|
756
|
+
pass
|
|
757
|
+
packages = sorted((run_root / "packages").glob("*")) if (run_root / "packages").exists() else []
|
|
758
|
+
return packages[-1].name if packages else None
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def continue_session(
|
|
762
|
+
run_id: str,
|
|
763
|
+
followup_instruction: str,
|
|
764
|
+
assets: list[Path] | None = None,
|
|
765
|
+
run_loop: bool = False,
|
|
766
|
+
settings: Settings | None = None,
|
|
767
|
+
runner: str | None = None,
|
|
768
|
+
progress_callback: ProgressCallback | None = None,
|
|
769
|
+
) -> tuple[Path, Path | None]:
|
|
770
|
+
settings = settings or get_settings()
|
|
771
|
+
run_root = run_root_for(run_id, settings)
|
|
772
|
+
if not run_root.exists():
|
|
773
|
+
raise FileNotFoundError(f"Run not found: {run_id}")
|
|
774
|
+
followup_index = next_followup_index(run_root)
|
|
775
|
+
task_id = _session_task_id(run_root)
|
|
776
|
+
append_progress_event(
|
|
777
|
+
run_root,
|
|
778
|
+
"followup_started",
|
|
779
|
+
run_id=run_root.name,
|
|
780
|
+
message=f"Follow-up {followup_index} received",
|
|
781
|
+
current_loop=1,
|
|
782
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
783
|
+
phase="followup",
|
|
784
|
+
run_status="running",
|
|
785
|
+
task_status="running",
|
|
786
|
+
metadata_json={"followup_index": followup_index},
|
|
787
|
+
callback=progress_callback,
|
|
788
|
+
)
|
|
789
|
+
try:
|
|
790
|
+
asset_refs = copy_assets(
|
|
791
|
+
assets,
|
|
792
|
+
run_root / "task" / "task_instruction_assets" / f"followup_{followup_index}",
|
|
793
|
+
f"task/task_instruction_assets/followup_{followup_index}",
|
|
794
|
+
)
|
|
795
|
+
except Exception as exc:
|
|
796
|
+
append_progress_event(
|
|
797
|
+
run_root,
|
|
798
|
+
"operational_error",
|
|
799
|
+
run_id=run_root.name,
|
|
800
|
+
message="File-copy failure while preparing follow-up assets",
|
|
801
|
+
current_loop=1,
|
|
802
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
803
|
+
phase="asset_copy_failed",
|
|
804
|
+
run_status="failed",
|
|
805
|
+
task_status="failed",
|
|
806
|
+
operational_error=str(exc),
|
|
807
|
+
metadata_json={"followup_index": followup_index},
|
|
808
|
+
callback=progress_callback,
|
|
809
|
+
)
|
|
810
|
+
raise
|
|
811
|
+
append_session_event(
|
|
812
|
+
run_root,
|
|
813
|
+
event_type="user_followup",
|
|
814
|
+
run_id=run_root.name,
|
|
815
|
+
task_id=task_id,
|
|
816
|
+
session_id=run_root.name,
|
|
817
|
+
feedback_source="user",
|
|
818
|
+
feedback_type="followup_instruction",
|
|
819
|
+
applies_to_attempt=_latest_attempt_id(run_root),
|
|
820
|
+
followup_index=followup_index,
|
|
821
|
+
followup_instruction=followup_instruction,
|
|
822
|
+
followup_assets=asset_refs,
|
|
823
|
+
)
|
|
824
|
+
if asset_refs:
|
|
825
|
+
append_session_event(
|
|
826
|
+
run_root,
|
|
827
|
+
event_type="followup_assets_added",
|
|
828
|
+
run_id=run_root.name,
|
|
829
|
+
task_id=task_id,
|
|
830
|
+
session_id=run_root.name,
|
|
831
|
+
feedback_source="user",
|
|
832
|
+
feedback_type="followup_instruction",
|
|
833
|
+
applies_to_attempt=_latest_attempt_id(run_root),
|
|
834
|
+
followup_index=followup_index,
|
|
835
|
+
followup_assets=asset_refs,
|
|
836
|
+
)
|
|
837
|
+
bundle: Path | None = None
|
|
838
|
+
if run_loop:
|
|
839
|
+
session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
|
|
840
|
+
original = session.get("task_instruction") or ""
|
|
841
|
+
combined = (
|
|
842
|
+
"Continue the same Agent Apprenticeship session.\n\n"
|
|
843
|
+
f"Original task instruction:\n{original}\n\n"
|
|
844
|
+
f"Follow-up instruction {followup_index}:\n{followup_instruction}\n"
|
|
845
|
+
)
|
|
846
|
+
raw = prompt_to_raw_task(
|
|
847
|
+
f"{run_root.name}-followup-{followup_index}",
|
|
848
|
+
combined,
|
|
849
|
+
_asset_abs_refs(run_root, asset_refs),
|
|
850
|
+
)
|
|
851
|
+
if not _human_checkpoint_mode(settings):
|
|
852
|
+
append_progress_event(
|
|
853
|
+
run_root,
|
|
854
|
+
"apprentice_attempt_started",
|
|
855
|
+
run_id=run_root.name,
|
|
856
|
+
message=f"Follow-up {followup_index} Apprentice attempt started",
|
|
857
|
+
current_loop=1,
|
|
858
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
859
|
+
phase="apprentice_attempt",
|
|
860
|
+
metadata_json={"followup_index": followup_index},
|
|
861
|
+
callback=progress_callback,
|
|
862
|
+
)
|
|
863
|
+
revision_decider, checkpoint_state = _revision_decision_callback(
|
|
864
|
+
run_root,
|
|
865
|
+
settings,
|
|
866
|
+
progress_callback=progress_callback,
|
|
867
|
+
followup_index=followup_index,
|
|
868
|
+
)
|
|
869
|
+
pkg = run_task(
|
|
870
|
+
raw,
|
|
871
|
+
run_root,
|
|
872
|
+
runner=runner_for_settings(settings, runner),
|
|
873
|
+
max_iterations=settings.max_improvement_loops,
|
|
874
|
+
pre_attempt_callback=_pre_attempt_checkpoint_callback(
|
|
875
|
+
run_root,
|
|
876
|
+
settings,
|
|
877
|
+
progress_callback=progress_callback,
|
|
878
|
+
followup_index=followup_index,
|
|
879
|
+
),
|
|
880
|
+
revision_decision_callback=revision_decider,
|
|
881
|
+
)
|
|
882
|
+
manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
|
|
883
|
+
status, _reason = _session_status_for_package(pkg)
|
|
884
|
+
actual_iterations = int(manifest.get("actual_iterations") or 1)
|
|
885
|
+
traced_steps, artifact_count, artifacts_path, operational_error = _package_progress_summary(run_root, pkg)
|
|
886
|
+
if operational_error or not checkpoint_state.get("apprentice_completed_emitted"):
|
|
887
|
+
append_progress_event(
|
|
888
|
+
run_root,
|
|
889
|
+
"apprentice_attempt_completed",
|
|
890
|
+
run_id=run_root.name,
|
|
891
|
+
message=(f"Follow-up {followup_index} Apprentice attempt failed - operational error" if operational_error else f"Follow-up {followup_index} Apprentice attempt complete"),
|
|
892
|
+
current_loop=1,
|
|
893
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
894
|
+
phase="apprentice_attempt_complete",
|
|
895
|
+
traced_steps=traced_steps,
|
|
896
|
+
artifact_count=artifact_count,
|
|
897
|
+
artifacts_path=artifacts_path,
|
|
898
|
+
operational_error=operational_error,
|
|
899
|
+
metadata_json={"followup_index": followup_index},
|
|
900
|
+
callback=progress_callback,
|
|
901
|
+
)
|
|
902
|
+
if operational_error:
|
|
903
|
+
append_progress_event(
|
|
904
|
+
run_root,
|
|
905
|
+
"operational_error",
|
|
906
|
+
run_id=run_root.name,
|
|
907
|
+
message=f"Follow-up {followup_index} Apprentice Agent operational error",
|
|
908
|
+
current_loop=1,
|
|
909
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
910
|
+
phase="operational_error",
|
|
911
|
+
run_status=status,
|
|
912
|
+
task_status=status,
|
|
913
|
+
operational_error=operational_error,
|
|
914
|
+
artifacts_path=artifacts_path,
|
|
915
|
+
metadata_json={"followup_index": followup_index},
|
|
916
|
+
callback=progress_callback,
|
|
917
|
+
)
|
|
918
|
+
should_run_mentor_review = not (operational_error and status == "failed")
|
|
919
|
+
if should_run_mentor_review and actual_iterations > 1:
|
|
920
|
+
if not checkpoint_state.get("revision_started_emitted"):
|
|
921
|
+
append_progress_event(
|
|
922
|
+
run_root,
|
|
923
|
+
"revision_started",
|
|
924
|
+
run_id=run_root.name,
|
|
925
|
+
message=f"Follow-up {followup_index} revision attempt started",
|
|
926
|
+
current_loop=2,
|
|
927
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
928
|
+
phase="revision_attempt",
|
|
929
|
+
metadata_json={"followup_index": followup_index},
|
|
930
|
+
callback=progress_callback,
|
|
931
|
+
)
|
|
932
|
+
append_progress_event(
|
|
933
|
+
run_root,
|
|
934
|
+
"revision_completed",
|
|
935
|
+
run_id=run_root.name,
|
|
936
|
+
message=f"Follow-up {followup_index} revision attempt complete",
|
|
937
|
+
current_loop=2,
|
|
938
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
939
|
+
phase="revision_attempt_complete",
|
|
940
|
+
traced_steps=traced_steps,
|
|
941
|
+
artifact_count=artifact_count,
|
|
942
|
+
artifacts_path=artifacts_path,
|
|
943
|
+
metadata_json={"followup_index": followup_index},
|
|
944
|
+
callback=progress_callback,
|
|
945
|
+
)
|
|
946
|
+
if should_run_mentor_review and not _human_checkpoint_mode(settings):
|
|
947
|
+
append_progress_event(
|
|
948
|
+
run_root,
|
|
949
|
+
"mentor_review_completed",
|
|
950
|
+
run_id=run_root.name,
|
|
951
|
+
message=f"Follow-up {followup_index} mentor review complete",
|
|
952
|
+
current_loop=actual_iterations,
|
|
953
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
954
|
+
phase="mentor_review_complete",
|
|
955
|
+
task_status=status,
|
|
956
|
+
traced_steps=traced_steps,
|
|
957
|
+
artifact_count=artifact_count,
|
|
958
|
+
artifacts_path=artifacts_path,
|
|
959
|
+
metadata_json={"followup_index": followup_index},
|
|
960
|
+
callback=progress_callback,
|
|
961
|
+
)
|
|
962
|
+
if should_run_mentor_review and _human_checkpoint_mode(settings):
|
|
963
|
+
write_mentor_checkpoints(
|
|
964
|
+
run_root,
|
|
965
|
+
settings,
|
|
966
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
967
|
+
stages=("final_approval",),
|
|
968
|
+
)
|
|
969
|
+
append_session_event(
|
|
970
|
+
run_root,
|
|
971
|
+
event_type="agent_attempt",
|
|
972
|
+
run_id=run_root.name,
|
|
973
|
+
task_id=task_id or pkg.name,
|
|
974
|
+
session_id=run_root.name,
|
|
975
|
+
attempt_id=manifest.get("selected_attempt_id"),
|
|
976
|
+
metadata_json={"package_path": str(pkg.relative_to(run_root)), "followup_index": followup_index},
|
|
977
|
+
)
|
|
978
|
+
backfill_session_event_task_ids(run_root, task_id or pkg.name)
|
|
979
|
+
final_status = status
|
|
980
|
+
final_artifacts_path = artifacts_path
|
|
981
|
+
final_traced_steps = traced_steps
|
|
982
|
+
final_artifact_count = artifact_count
|
|
983
|
+
final_operational_error = operational_error
|
|
984
|
+
session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
|
|
985
|
+
session.update(
|
|
986
|
+
{
|
|
987
|
+
"run_status": status,
|
|
988
|
+
"task_status": status,
|
|
989
|
+
"task_id": task_id or pkg.name,
|
|
990
|
+
"latest_package": str(pkg.relative_to(run_root)),
|
|
991
|
+
"latest_attempt_id": manifest.get("selected_attempt_id"),
|
|
992
|
+
"status_reason": _reason if status != "completed" and _reason else None,
|
|
993
|
+
}
|
|
994
|
+
)
|
|
995
|
+
session = {k: v for k, v in session.items() if v is not None}
|
|
996
|
+
write_json(run_root / "session.json", session)
|
|
997
|
+
else:
|
|
998
|
+
backfill_session_event_task_ids(run_root, task_id)
|
|
999
|
+
session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
|
|
1000
|
+
final_status = session.get("task_status") or session.get("run_status") or "completed"
|
|
1001
|
+
final_artifacts_path = None
|
|
1002
|
+
status_file = run_root / "run_status.json"
|
|
1003
|
+
if status_file.exists():
|
|
1004
|
+
status_data = read_json(status_file)
|
|
1005
|
+
final_artifacts_path = status_data.get("artifacts_path")
|
|
1006
|
+
final_traced_steps = None
|
|
1007
|
+
final_artifact_count = None
|
|
1008
|
+
final_operational_error = None
|
|
1009
|
+
record_only_message = "Follow-up recorded. No Apprentice Agent loop was run. Use --run-loop to continue work."
|
|
1010
|
+
append_progress_event(
|
|
1011
|
+
run_root,
|
|
1012
|
+
"contribution_bundle_started",
|
|
1013
|
+
run_id=run_root.name,
|
|
1014
|
+
message="Contribution Bundle update started",
|
|
1015
|
+
current_loop=1,
|
|
1016
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
1017
|
+
phase="contribution_bundle",
|
|
1018
|
+
metadata_json={"followup_index": followup_index},
|
|
1019
|
+
callback=progress_callback,
|
|
1020
|
+
)
|
|
1021
|
+
if run_loop and _human_checkpoint_mode(settings):
|
|
1022
|
+
write_mentor_checkpoints(
|
|
1023
|
+
run_root,
|
|
1024
|
+
settings,
|
|
1025
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
1026
|
+
stages=("final_approval",),
|
|
1027
|
+
preserve_interactive=False,
|
|
1028
|
+
)
|
|
1029
|
+
bundle = create_contribution_bundle(run_root, settings=settings)
|
|
1030
|
+
append_progress_event(
|
|
1031
|
+
run_root,
|
|
1032
|
+
"contribution_bundle_completed",
|
|
1033
|
+
run_id=run_root.name,
|
|
1034
|
+
message="Contribution Bundle updated",
|
|
1035
|
+
current_loop=1,
|
|
1036
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
1037
|
+
phase="contribution_bundle_complete",
|
|
1038
|
+
contribution_bundle_path=bundle,
|
|
1039
|
+
metadata_json={"followup_index": followup_index},
|
|
1040
|
+
callback=progress_callback,
|
|
1041
|
+
)
|
|
1042
|
+
append_progress_event(
|
|
1043
|
+
run_root,
|
|
1044
|
+
"followup_completed",
|
|
1045
|
+
run_id=run_root.name,
|
|
1046
|
+
message=(record_only_message if not run_loop else f"Follow-up {followup_index} complete"),
|
|
1047
|
+
current_loop=1,
|
|
1048
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
1049
|
+
phase="followup_complete",
|
|
1050
|
+
run_status=final_status,
|
|
1051
|
+
task_status=final_status,
|
|
1052
|
+
traced_steps=final_traced_steps,
|
|
1053
|
+
artifact_count=final_artifact_count,
|
|
1054
|
+
artifacts_path=final_artifacts_path,
|
|
1055
|
+
contribution_bundle_path=bundle,
|
|
1056
|
+
operational_error=final_operational_error,
|
|
1057
|
+
metadata_json={"followup_index": followup_index, "record_only": not run_loop},
|
|
1058
|
+
callback=progress_callback,
|
|
1059
|
+
)
|
|
1060
|
+
return run_root, bundle
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def finish_session(run_id: str, settings: Settings | None = None) -> tuple[Path, Path]:
|
|
1064
|
+
settings = settings or get_settings()
|
|
1065
|
+
run_root = run_root_for(run_id, settings)
|
|
1066
|
+
if not run_root.exists():
|
|
1067
|
+
raise FileNotFoundError(f"Run not found: {run_id}")
|
|
1068
|
+
task_id = _session_task_id(run_root)
|
|
1069
|
+
append_session_event(
|
|
1070
|
+
run_root,
|
|
1071
|
+
event_type="session_finished",
|
|
1072
|
+
run_id=run_root.name,
|
|
1073
|
+
task_id=task_id,
|
|
1074
|
+
session_id=run_root.name,
|
|
1075
|
+
applies_to_attempt=_latest_attempt_id(run_root),
|
|
1076
|
+
)
|
|
1077
|
+
if (run_root / "session.json").exists():
|
|
1078
|
+
data = read_json(run_root / "session.json")
|
|
1079
|
+
else:
|
|
1080
|
+
data = {"run_id": run_root.name, "session_id": run_root.name}
|
|
1081
|
+
status_data = read_json(run_root / "run_status.json") if (run_root / "run_status.json").exists() else {}
|
|
1082
|
+
existing_task_status = data.get("task_status") or status_data.get("task_status") or "partial"
|
|
1083
|
+
existing_run_status = data.get("run_status") or status_data.get("run_status") or existing_task_status
|
|
1084
|
+
data["run_status"] = existing_run_status
|
|
1085
|
+
data["task_status"] = existing_task_status
|
|
1086
|
+
data["session_status"] = "finished"
|
|
1087
|
+
if task_id:
|
|
1088
|
+
data["task_id"] = task_id
|
|
1089
|
+
write_json(run_root / "session.json", data)
|
|
1090
|
+
backfill_session_event_task_ids(run_root, task_id)
|
|
1091
|
+
write_mentor_checkpoints(
|
|
1092
|
+
run_root,
|
|
1093
|
+
settings,
|
|
1094
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
1095
|
+
stages=("final_approval",),
|
|
1096
|
+
)
|
|
1097
|
+
bundle = create_contribution_bundle(run_root, settings=settings)
|
|
1098
|
+
append_progress_event(
|
|
1099
|
+
run_root,
|
|
1100
|
+
"run_completed",
|
|
1101
|
+
run_id=run_root.name,
|
|
1102
|
+
message="Session finished.",
|
|
1103
|
+
phase="session_finished",
|
|
1104
|
+
run_status=existing_run_status,
|
|
1105
|
+
task_status=existing_task_status,
|
|
1106
|
+
artifacts_path=status_data.get("artifacts_path") or str(run_root / "artifacts"),
|
|
1107
|
+
contribution_bundle_path=bundle,
|
|
1108
|
+
)
|
|
1109
|
+
return run_root, bundle
|