agent-apprenticeship 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +217 -0
  3. package/bin/agent-apprenticeship.js +131 -0
  4. package/package.json +30 -0
  5. package/pyproject.toml +23 -0
  6. package/src/agent_apprenticeship_trace/__init__.py +2 -0
  7. package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
  8. package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
  9. package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
  10. package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
  11. package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
  12. package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
  13. package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
  14. package/src/agent_apprenticeship_trace/certification.py +580 -0
  15. package/src/agent_apprenticeship_trace/cli.py +2979 -0
  16. package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
  17. package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
  18. package/src/agent_apprenticeship_trace/config.py +609 -0
  19. package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
  20. package/src/agent_apprenticeship_trace/env.py +46 -0
  21. package/src/agent_apprenticeship_trace/evaluator.py +64 -0
  22. package/src/agent_apprenticeship_trace/grader.py +194 -0
  23. package/src/agent_apprenticeship_trace/integration_status.py +193 -0
  24. package/src/agent_apprenticeship_trace/io.py +20 -0
  25. package/src/agent_apprenticeship_trace/learning.py +627 -0
  26. package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
  27. package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
  28. package/src/agent_apprenticeship_trace/loop.py +111 -0
  29. package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
  30. package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
  31. package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
  32. package/src/agent_apprenticeship_trace/progress.py +223 -0
  33. package/src/agent_apprenticeship_trace/public_run.py +1109 -0
  34. package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
  35. package/src/agent_apprenticeship_trace/recipes.py +129 -0
  36. package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
  37. package/src/agent_apprenticeship_trace/revision.py +21 -0
  38. package/src/agent_apprenticeship_trace/role_runners.py +7 -0
  39. package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
  40. package/src/agent_apprenticeship_trace/schemas.py +273 -0
  41. package/src/agent_apprenticeship_trace/session_events.py +99 -0
  42. package/src/agent_apprenticeship_trace/task_intake.py +112 -0
  43. package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
  44. package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
  45. package/src/agent_apprenticeship_trace/training_signals.py +30 -0
  46. package/src/agent_apprenticeship_trace/validation.py +210 -0
  47. package/src/agent_apprenticeship_trace/verifier.py +55 -0
@@ -0,0 +1,303 @@
1
+ from __future__ import annotations
2
+ from pathlib import Path
3
+ import hashlib, mimetypes, json, shutil
4
+ from .schemas import *
5
+ from .io import write_json, append_jsonl
6
+ from .rubric_generation import worker_visible_markdown
7
+ from .artifact_resolver import artifact_ref_candidates, normalize_artifact_ref
8
+
9
+
10
+ SOURCE_FIELD_KEYS={'source_url_or_ref','source_kind','source_url','source_ref','source_license'}
11
+ IGNORED_RELEASE_DIR_NAMES={
12
+ 'node_modules','.venv','__pycache__','.pytest_cache','.mypy_cache','.ruff_cache','.git','.cache','dist','build'
13
+ }
14
+
15
+ def is_ignored_release_path(path: Path | str) -> bool:
16
+ parts=Path(path).parts
17
+ for part in parts:
18
+ if part in IGNORED_RELEASE_DIR_NAMES or 'pycache' in part.lower():
19
+ return True
20
+ return False
21
+
22
+ FINANCE_INPUTS = {
23
+ 'invoices.csv': """invoice_id,vendor,invoice_date,due_date,currency,amount
24
+ INV-1001,Acme Incorporated,2026-01-05,2026-02-04,USD,1200.00
25
+ INV-1002,Globex LLC,2026-01-08,2026-02-07,EUR,950.00
26
+ INV-1002,Globex LLC,2026-01-08,2026-02-07,EUR,950.00
27
+ INV-1003,Soylent Corp,2026-01-12,2026-02-11,GBP,500.00
28
+ INV-1004,Initech,2026-01-15,2026-02-14,USD,750.00
29
+ INV-1005,Acme Inc,2026-01-20,2026-02-19,JPY,100000
30
+ """,
31
+ 'payments.csv': """payment_id,payment_date,vendor_name,invoice_ref,currency,amount
32
+ PAY-9001,2026-02-03,ACME Inc.,INV-1001,USD,1200.00
33
+ PAY-9002,2026-02-08,Globex Corporation,INV-1002,EUR,500.00
34
+ PAY-9003,2026-02-10,Globex LLC,INV-1002,EUR,450.00
35
+ PAY-9004,2026-02-15,Soylent,INV-1003,GBP,550.00
36
+ PAY-9005,2026-02-18,Initech LLC,INV-9999,USD,300.00
37
+ PAY-9006,2026-02-20,Acme Incorporated,INV-1005,JPY,100000
38
+ """,
39
+ 'vendor_aliases.csv': """canonical_vendor,alias
40
+ Acme Incorporated,ACME Inc.
41
+ Acme Incorporated,Acme Inc
42
+ Globex LLC,Globex Corporation
43
+ Soylent Corp,Soylent
44
+ Initech,Initech LLC
45
+ """,
46
+ 'fx_rates.csv': """currency,usd_rate,effective_date
47
+ USD,1.0000,2026-02-01
48
+ EUR,1.1000,2026-02-01
49
+ GBP,1.2800,2026-02-01
50
+ JPY,0.0068,2026-02-01
51
+ """,
52
+ 'reconciliation_policy.md': """# Reconciliation policy
53
+
54
+ - Convert every invoice and payment amount to USD using `fx_rates.csv`.
55
+ - Resolve vendor names through `vendor_aliases.csv` before matching.
56
+ - Flag duplicate invoice IDs as `duplicate_invoice_id`.
57
+ - A payment can be within 3 calendar days after due date and still be on time.
58
+ - Categorize partial payments as `partial_payment` when total paid is less than invoice amount.
59
+ - Categorize overpayments as `overpayment` when total paid exceeds invoice amount by more than 1 USD.
60
+ - Categorize payments with no matching invoice as `missing_invoice_reference`.
61
+ - Produce vendor totals in USD and an audit summary with assumptions and exception counts.
62
+ """,
63
+ }
64
+
65
+ def init_package(root: Path, task_id: str) -> Path:
66
+ p=root/'packages'/task_id
67
+ for sub in ['task','task/task_instruction_assets','rubric','input','hidden_reference','attempts/baseline/artifacts','attempts/revised/artifacts','grading','feedback','signals']:
68
+ (p/sub).mkdir(parents=True, exist_ok=True)
69
+ return p
70
+
71
+ def _drop_source_fields(obj):
72
+ if isinstance(obj, list):
73
+ return [_drop_source_fields(v) for v in obj]
74
+ if isinstance(obj, dict):
75
+ return {k:_drop_source_fields(v) for k,v in obj.items() if k not in SOURCE_FIELD_KEYS and v is not None}
76
+ return obj
77
+
78
+ def public_task_record(raw: RawTaskRecord | dict) -> dict:
79
+ data=raw.model_dump(mode='json') if hasattr(raw, 'model_dump') else dict(raw)
80
+ payload=dict(data.get('raw_payload') or {})
81
+ if data.get('expected_economic_value') is None:
82
+ data['expected_economic_value']=data.get('expected_pay') or payload.get('expected_economic_value') or payload.get('expected_pay')
83
+ if data.get('expected_economic_value_for_agent_apprentice') is None:
84
+ data['expected_economic_value_for_agent_apprentice']=data.get('expected_apprentice_pay') or payload.get('expected_economic_value_for_agent_apprentice') or payload.get('expected_apprentice_pay')
85
+ data.pop('expected_pay', None)
86
+ data.pop('expected_apprentice_pay', None)
87
+ if isinstance(data.get('raw_payload'), dict):
88
+ data['raw_payload']={k:v for k,v in data['raw_payload'].items() if k not in {'expected_pay','expected_apprentice_pay'}}
89
+ return _drop_source_fields(data)
90
+
91
+ def public_task_intake_spec(spec: TaskIntakeSpec | dict) -> dict:
92
+ data=spec.model_dump(mode='json') if hasattr(spec, 'model_dump') else dict(spec)
93
+ if data.get('expected_economic_value') is None:
94
+ data['expected_economic_value']=data.get('expected_pay')
95
+ if data.get('expected_economic_value_for_agent_apprentice') is None:
96
+ data['expected_economic_value_for_agent_apprentice']=data.get('expected_apprentice_pay')
97
+ data.pop('expected_pay', None)
98
+ data.pop('expected_apprentice_pay', None)
99
+ return _drop_source_fields(data)
100
+
101
+ def _copy_asset(src: Path, dst_dir: Path) -> Path:
102
+ dst_dir.mkdir(parents=True, exist_ok=True)
103
+ target=dst_dir/src.name
104
+ if src.resolve() == target.resolve() if target.exists() else False:
105
+ return target
106
+ if target.exists():
107
+ stem=target.stem; suffix=target.suffix
108
+ for i in range(2,1000):
109
+ candidate=dst_dir/f'{stem}-{i}{suffix}'
110
+ if not candidate.exists():
111
+ target=candidate; break
112
+ if src.is_dir():
113
+ shutil.copytree(src, target, dirs_exist_ok=True)
114
+ else:
115
+ shutil.copy2(src, target)
116
+ return target
117
+
118
+ def _task_brief(raw: RawTaskRecord, spec: TaskIntakeSpec) -> str:
119
+ lines=[
120
+ f"# {spec.normalized_title}",
121
+ "",
122
+ "## Instruction",
123
+ spec.normalized_instruction,
124
+ "",
125
+ "## Expected deliverable",
126
+ raw.expected_deliverable or raw.raw_payload.get('expected_deliverable') or spec.expected_agent_deliverable,
127
+ "",
128
+ "## Publishable task metadata",
129
+ ]
130
+ for key, value in {
131
+ 'domain': spec.domain,
132
+ 'subdomain': spec.subdomain,
133
+ 'apprenticeship_role': spec.apprenticeship_role,
134
+ 'task_family': spec.task_family,
135
+ 'difficulty_tier': spec.difficulty_tier,
136
+ 'needs_expert_review': spec.needs_expert_review,
137
+ 'expected_economic_value': spec.expected_economic_value or spec.expected_pay,
138
+ 'expected_economic_value_for_agent_apprentice': spec.expected_economic_value_for_agent_apprentice or spec.expected_apprentice_pay,
139
+ }.items():
140
+ if value is not None:
141
+ lines.append(f"- {key}: {value}")
142
+ return "\n".join(lines).rstrip()+"\n"
143
+
144
+ def materialize_task_inputs(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec) -> list[str]:
145
+ input_dir = package_root/'input'
146
+ asset_dir = package_root/'task'/'task_instruction_assets'
147
+ input_dir.mkdir(parents=True, exist_ok=True)
148
+ created=[]
149
+ (input_dir/'task_brief.md').write_text(_task_brief(raw, spec))
150
+ created.append('task_brief.md')
151
+ write_json(input_dir/'task.json', public_task_record(raw))
152
+ created.append('task.json')
153
+
154
+ # Only materialize real local attachment refs or built-in fixture files with exact known names.
155
+ # Instruction fragments are never converted into invented input filenames.
156
+ for ref in raw.input_artifact_refs or []:
157
+ src=Path(ref)
158
+ if src.exists() and (src.is_file() or src.is_dir()):
159
+ _copy_asset(src, asset_dir)
160
+ target=_copy_asset(src, input_dir)
161
+ created.append(target.name)
162
+ for name in (raw.raw_payload.get('input_requirements') or []):
163
+ if name in FINANCE_INPUTS:
164
+ target=input_dir/name
165
+ target.write_text(FINANCE_INPUTS[name])
166
+ created.append(name)
167
+ return list(dict.fromkeys(created))
168
+
169
+ def write_task_package(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, quality: TaskIntakeQualityReport, rubric: RubricSpec, rubric_quality: RubricQualityReport):
170
+ write_json(package_root/'task/raw_task_record.json', public_task_record(raw)); write_json(package_root/'task/task_intake_spec.json', public_task_intake_spec(spec)); write_json(package_root/'task/task_intake_quality_report.json', quality)
171
+ materialize_task_inputs(package_root, raw, spec)
172
+ write_json(package_root/'rubric/rubric.json', rubric); write_json(package_root/'rubric/rubric_quality_report.json', rubric_quality); (package_root/'rubric/worker_visible_rubric.md').write_text(worker_visible_markdown(rubric)); write_json(package_root/'rubric/verifier_private_rubric.json', rubric)
173
+ for item in rubric.rubric_items: append_jsonl(package_root/'rubric/rubric_items.jsonl', item)
174
+ (package_root/'README.md').write_text(f'# Task package {spec.task_id}\n\nLocal agent apprenticeship trace package.\n')
175
+ write_json(package_root/'manifest.json', {'task_id': spec.task_id, 'schema_version':'aa-package-v0.1'})
176
+
177
+ def _media_type(path: Path, mime: str | None) -> str:
178
+ suffix=path.suffix.lower()
179
+ if mime and mime.startswith('image/'): return 'image'
180
+ if mime and mime.startswith('audio/'): return 'audio'
181
+ if mime and mime.startswith('video/'): return 'video'
182
+ if suffix in {'.py','.js','.ts','.sh','.rs','.go','.java','.sql','.html','.css'}: return 'code'
183
+ if suffix in {'.csv','.json','.jsonl','.yaml','.yml','.xml'}: return 'data'
184
+ if suffix in {'.md','.txt','.log'}: return 'text'
185
+ if suffix in {'.pdf','.doc','.docx'}: return 'document'
186
+ if suffix in {'.zip','.tar','.gz'}: return 'archive'
187
+ return 'unknown'
188
+
189
+ def _sha256(path: Path) -> str:
190
+ h=hashlib.sha256()
191
+ with path.open('rb') as fh:
192
+ for chunk in iter(lambda: fh.read(1024 * 1024), b''):
193
+ h.update(chunk)
194
+ return h.hexdigest()
195
+
196
+ def _artifact_kind(path: Path, media_type: str) -> str:
197
+ rel=path.as_posix()
198
+ if '/artifacts/' in rel:
199
+ return 'worker_output'
200
+ if path.name in {'agent_trace.json','agent_trace.raw.json','agent_trace.normalized.json'}:
201
+ return 'trace'
202
+ if path.name in {'stdout.txt','stderr.txt','final_message.txt'} or media_type == 'text' and path.suffix == '.log':
203
+ return 'log'
204
+ if rel.startswith('input/') or '/input/' in rel:
205
+ return 'input'
206
+ if rel.startswith('grading/') or '/grading/' in rel:
207
+ return 'evaluation'
208
+ if rel.startswith('feedback/') or '/feedback/' in rel:
209
+ return 'feedback'
210
+ return 'package_file'
211
+
212
+ def _attempt_from_package_rel(package_root: Path, rel: Path) -> tuple[str | None, str | None]:
213
+ parts=rel.parts
214
+ if len(parts) >= 2 and parts[0] == 'attempts':
215
+ attempt_kind=parts[1]
216
+ return f"{package_root.name}_{attempt_kind}", attempt_kind
217
+ return None, None
218
+
219
+ def _trace_artifact_links(package_root: Path) -> dict[str, list[dict]]:
220
+ links: dict[str, list[dict]] = {}
221
+ for trace_path in package_root.glob('attempts/*/agent_trace.json'):
222
+ try:
223
+ trace=json.loads(trace_path.read_text())
224
+ except Exception:
225
+ continue
226
+ attempt_id=trace.get('attempt_id')
227
+ attempt_kind=trace.get('attempt_kind') or trace_path.parent.name
228
+ trace_id=trace.get('trace_id')
229
+ for step in trace.get('steps') or []:
230
+ link={
231
+ 'attempt_id': attempt_id,
232
+ 'attempt_kind': attempt_kind,
233
+ 'trace_id': trace_id,
234
+ 'step': step.get('step'),
235
+ 'actor': step.get('actor'),
236
+ 'operation': step.get('operation'),
237
+ }
238
+ for ref in step.get('artifact_refs') or []:
239
+ normalized=normalize_artifact_ref(ref)
240
+ keys=artifact_ref_candidates(normalized) | {normalized, f'packages/{package_root.name}/{normalized}'}
241
+ for key in keys:
242
+ if key:
243
+ bucket=links.setdefault(key, [])
244
+ if link not in bucket:
245
+ bucket.append(link)
246
+ return links
247
+
248
+ def write_artifacts_index(package_root: Path):
249
+ paths=[]
250
+ trace_links=_trace_artifact_links(package_root)
251
+ for f in package_root.rglob('*'):
252
+ rel=f.relative_to(package_root)
253
+ if is_ignored_release_path(rel):
254
+ continue
255
+ if f.is_file() and f.name != 'artifacts_index.json':
256
+ mime=mimetypes.guess_type(f.name)[0]
257
+ media_type=_media_type(f, mime)
258
+ attempt_id, attempt_kind=_attempt_from_package_rel(package_root, rel)
259
+ link_keys=artifact_ref_candidates(rel.as_posix()) | {rel.as_posix(), f'packages/{package_root.name}/{rel.as_posix()}'}
260
+ linked=[]
261
+ for key in link_keys:
262
+ for link in trace_links.get(key, []):
263
+ if link not in linked:
264
+ linked.append(link)
265
+ paths.append({
266
+ 'package_relative_path': rel.as_posix(),
267
+ 'artifact_ref': rel.as_posix(),
268
+ 'artifact_kind': _artifact_kind(rel, media_type),
269
+ 'linked_trace_steps': linked,
270
+ 'produced_by_attempt_id': attempt_id,
271
+ 'produced_by_attempt_kind': attempt_kind,
272
+ 'size_bytes': f.stat().st_size,
273
+ 'content_hash': _sha256(f),
274
+ 'mime_type': mime,
275
+ 'media_type': media_type,
276
+ 'preview_available': media_type in {'text','code','data','document'},
277
+ 'preview_truncated': False,
278
+ 'artifact_missing': False,
279
+ })
280
+ counters={'raw_trace_count':0,'raw_trace_step_count':0,'normalized_trace_count':0,'normalized_trace_step_count':0,'fallback_trace_count':0,'fallback_trace_step_count':0,'discarded_step_count':0,'raw_trace_parse_error_count':0,'trace_normalization_error_count':0,'trace_normalization_partial_count':0,'trace_lossless_count':0,'trace_lossless_failure_count':0}
281
+ import json
282
+ for report_path in package_root.glob('attempts/*/trace_normalization_report.json'):
283
+ try: r=json.loads(report_path.read_text())
284
+ except Exception: continue
285
+ counters['raw_trace_count'] += 1 if r.get('raw_trace_ref') else 0
286
+ counters['raw_trace_step_count'] += int(r.get('raw_step_count') or 0)
287
+ counters['normalized_trace_count'] += 1 if r.get('normalized_trace_ref') else 0
288
+ counters['normalized_trace_step_count'] += int(r.get('normalized_step_count') or 0)
289
+ counters['fallback_trace_count'] += 1 if r.get('fallback_trace') else 0
290
+ counters['fallback_trace_step_count'] += int(r.get('normalized_step_count') or 0) if r.get('fallback_trace') else 0
291
+ counters['discarded_step_count'] += int(r.get('discarded_step_count') or 0)
292
+ counters['raw_trace_parse_error_count'] += 1 if r.get('raw_trace_parse_error') else 0
293
+ counters['trace_normalization_error_count'] += 1 if r.get('trace_normalization_error') else 0
294
+ counters['trace_normalization_partial_count'] += 1 if r.get('trace_normalization_partial') else 0
295
+ counters['trace_lossless_count'] += 1 if r.get('trace_lossless') else 0
296
+ counters['trace_lossless_failure_count'] += 0 if r.get('trace_lossless') else 1
297
+ manifest_path=package_root/'manifest.json'
298
+ try: manifest=json.loads(manifest_path.read_text())
299
+ except Exception: manifest={'task_id': package_root.name, 'schema_version':'aa-package-v0.1'}
300
+ manifest['trace_counters']=counters
301
+ write_json(manifest_path, manifest)
302
+ write_json(package_root/'artifacts_index.json', paths)
303
+ return paths
@@ -0,0 +1,223 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import time
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Any, Callable, Literal
8
+
9
+ from .io import append_jsonl, read_json, read_jsonl, write_json
10
+
11
+ RUN_TERMINAL_STATUSES = {"completed", "partial", "failed"}
12
+
13
+ RunStatus = Literal["running", "completed", "partial", "failed"]
14
+ TaskStatus = Literal["running", "completed", "partial", "failed"]
15
+ ProgressCallback = Callable[[dict[str, Any], dict[str, Any]], None]
16
+
17
+
18
+ def utc_now() -> str:
19
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
20
+
21
+
22
+ def run_status_path(run_root: Path) -> Path:
23
+ return run_root / "run_status.json"
24
+
25
+
26
+ def progress_events_path(run_root: Path) -> Path:
27
+ return run_root / "progress_events.jsonl"
28
+
29
+
30
+ def read_run_status(run_root: Path) -> dict[str, Any]:
31
+ path = run_status_path(run_root)
32
+ return read_json(path) if path.exists() else {}
33
+
34
+
35
+ def read_progress_events(run_root: Path) -> list[dict[str, Any]]:
36
+ path = progress_events_path(run_root)
37
+ return read_jsonl(path) if path.exists() else []
38
+
39
+
40
+ def _event_index(run_root: Path) -> int:
41
+ return len(read_progress_events(run_root)) + 1
42
+
43
+
44
+ def _base_status(run_id: str, run_root: Path, **kwargs: Any) -> dict[str, Any]:
45
+ now = utc_now()
46
+ return {
47
+ "run_id": run_id,
48
+ "run_status": "running",
49
+ "task_status": "running",
50
+ "current_phase": kwargs.get("current_phase") or "starting",
51
+ "current_loop": kwargs.get("current_loop") or 1,
52
+ "maximum_improvement_loops": kwargs.get("maximum_improvement_loops"),
53
+ "latest_message": kwargs.get("latest_message") or "Run starting.",
54
+ "apprentice_agent": kwargs.get("apprentice_agent") or kwargs.get("worker_agent"),
55
+ "mentor_mode": kwargs.get("mentor_mode"),
56
+ "task_title": kwargs.get("task_title"),
57
+ "task_workspace_path": str(kwargs.get("task_workspace_path") or run_root),
58
+ "artifacts_path": str(kwargs.get("artifacts_path") or run_root / "artifacts"),
59
+ "contribution_bundle_path": kwargs.get("contribution_bundle_path"),
60
+ "traced_steps": kwargs.get("traced_steps"),
61
+ "artifact_count": kwargs.get("artifact_count"),
62
+ "started_at": kwargs.get("started_at") or now,
63
+ "updated_at": now,
64
+ "last_operational_error": kwargs.get("last_operational_error"),
65
+ }
66
+
67
+
68
+ def update_run_status(run_root: Path, **updates: Any) -> dict[str, Any]:
69
+ run_root.mkdir(parents=True, exist_ok=True)
70
+ current = read_run_status(run_root)
71
+ updates = dict(updates)
72
+ clear_operational_error = bool(updates.pop("clear_operational_error", False))
73
+ if "worker_agent" in updates and "apprentice_agent" not in updates:
74
+ updates["apprentice_agent"] = updates["worker_agent"]
75
+ updates.pop("worker_agent", None)
76
+ run_id = str(updates.get("run_id") or current.get("run_id") or run_root.name)
77
+ if current:
78
+ data = dict(current)
79
+ data.update({k: v for k, v in updates.items() if v is not None})
80
+ if clear_operational_error:
81
+ data.pop("last_operational_error", None)
82
+ if data.get("apprentice_agent"):
83
+ data.pop("worker_agent", None)
84
+ data.setdefault("run_id", run_id)
85
+ data.setdefault("started_at", current.get("started_at") or utc_now())
86
+ data["updated_at"] = utc_now()
87
+ else:
88
+ base_updates = {k: v for k, v in updates.items() if k != "run_id"}
89
+ data = _base_status(run_id, run_root, **base_updates)
90
+ write_json(run_status_path(run_root), data)
91
+ return data
92
+
93
+
94
+ def append_progress_event(
95
+ run_root: Path,
96
+ event_type: str,
97
+ *,
98
+ message: str,
99
+ run_id: str | None = None,
100
+ current_loop: int | None = None,
101
+ maximum_improvement_loops: int | None = None,
102
+ phase: str | None = None,
103
+ run_status: RunStatus | None = None,
104
+ task_status: TaskStatus | None = None,
105
+ traced_steps: int | None = None,
106
+ artifact_count: int | None = None,
107
+ artifacts_path: str | Path | None = None,
108
+ contribution_bundle_path: str | Path | None = None,
109
+ operational_error: str | None = None,
110
+ metadata_json: dict[str, Any] | None = None,
111
+ callback: ProgressCallback | None = None,
112
+ ) -> dict[str, Any]:
113
+ run_root.mkdir(parents=True, exist_ok=True)
114
+ event = {
115
+ "event_type": event_type,
116
+ "event_index": _event_index(run_root),
117
+ "created_at": utc_now(),
118
+ "run_id": run_id or run_root.name,
119
+ "current_loop": current_loop,
120
+ "maximum_improvement_loops": maximum_improvement_loops,
121
+ "phase": phase,
122
+ "message": message,
123
+ "run_status": run_status,
124
+ "task_status": task_status,
125
+ "traced_steps": traced_steps,
126
+ "artifact_count": artifact_count,
127
+ "artifacts_path": str(artifacts_path) if artifacts_path else None,
128
+ "contribution_bundle_path": str(contribution_bundle_path) if contribution_bundle_path else None,
129
+ "operational_error": operational_error,
130
+ "metadata_json": metadata_json or {},
131
+ }
132
+ event = {k: v for k, v in event.items() if v is not None}
133
+ append_jsonl(progress_events_path(run_root), event)
134
+ status_updates: dict[str, Any] = {
135
+ "run_id": event["run_id"],
136
+ "current_phase": phase or event_type,
137
+ "latest_message": message,
138
+ }
139
+ for key, value in {
140
+ "current_loop": current_loop,
141
+ "maximum_improvement_loops": maximum_improvement_loops,
142
+ "run_status": run_status,
143
+ "task_status": task_status,
144
+ "traced_steps": traced_steps,
145
+ "artifact_count": artifact_count,
146
+ "artifacts_path": str(artifacts_path) if artifacts_path else None,
147
+ "contribution_bundle_path": str(contribution_bundle_path) if contribution_bundle_path else None,
148
+ "last_operational_error": operational_error,
149
+ }.items():
150
+ if value is not None:
151
+ status_updates[key] = value
152
+ if operational_error is None and (run_status == "completed" or task_status == "completed"):
153
+ status_updates["clear_operational_error"] = True
154
+ status = update_run_status(run_root, **status_updates)
155
+ if callback:
156
+ callback(event, status)
157
+ return event
158
+
159
+
160
+ def format_progress_event(event: dict[str, Any]) -> str:
161
+ loop = event.get("current_loop")
162
+ max_loop = event.get("maximum_improvement_loops")
163
+ metadata = event.get("metadata_json") or {}
164
+ followup_index = metadata.get("followup_index")
165
+ followup_prefix = f"[Follow-up {followup_index}]" if followup_index else ""
166
+ loop_prefix = f"[{loop}/{max_loop}] " if loop and max_loop else ""
167
+ prefix = f"{followup_prefix}{loop_prefix}"
168
+ message = event.get("message") or event.get("event_type") or "Progress update"
169
+ details: list[str] = []
170
+ if event.get("traced_steps") is not None:
171
+ details.append(f"{event['traced_steps']} traced steps")
172
+ if event.get("artifact_count") is not None:
173
+ details.append(f"{event['artifact_count']} artifacts")
174
+ if event.get("operational_error"):
175
+ details.append(f"error: {event['operational_error']}")
176
+ return prefix + message + (f" - {', '.join(details)}" if details else "")
177
+
178
+
179
+ def format_run_status(status: dict[str, Any]) -> str:
180
+ if not status:
181
+ return "No run_status.json found."
182
+ lines = [
183
+ f"Run: {status.get('run_id')}",
184
+ f"Apprentice Agent: {status.get('apprentice_agent') or status.get('worker_agent')}",
185
+ f"Run Status: {status.get('run_status')}",
186
+ f"Task Status: {status.get('task_status')}",
187
+ f"Current Phase: {status.get('current_phase')}",
188
+ f"Loop: {status.get('current_loop')}/{status.get('maximum_improvement_loops')}",
189
+ f"Latest: {status.get('latest_message')}",
190
+ f"Task Workspace: {status.get('task_workspace_path')}",
191
+ f"Artifacts: {status.get('artifacts_path')}",
192
+ ]
193
+ if status.get("contribution_bundle_path"):
194
+ lines.append(f"Contribution Bundle: {status.get('contribution_bundle_path')}")
195
+ if status.get("traced_steps") is not None:
196
+ lines.append(f"Traced Steps: {status.get('traced_steps')}")
197
+ if status.get("artifact_count") is not None:
198
+ lines.append(f"Artifacts Indexed: {status.get('artifact_count')}")
199
+ if status.get("last_operational_error"):
200
+ lines.append(f"Operational Error: {status.get('last_operational_error')}")
201
+ return "\n".join(lines)
202
+
203
+
204
+ def watch_progress(
205
+ run_root: Path,
206
+ *,
207
+ interval_seconds: float = 1.0,
208
+ timeout_seconds: float | None = None,
209
+ emit: Callable[[str], None] = print,
210
+ ) -> None:
211
+ seen = 0
212
+ started = time.monotonic()
213
+ while True:
214
+ events = read_progress_events(run_root)
215
+ for event in events[seen:]:
216
+ emit(format_progress_event(event))
217
+ seen = len(events)
218
+ status = read_run_status(run_root)
219
+ if status.get("run_status") in RUN_TERMINAL_STATUSES:
220
+ return
221
+ if timeout_seconds is not None and time.monotonic() - started >= timeout_seconds:
222
+ return
223
+ time.sleep(interval_seconds)