agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import hashlib, mimetypes, json, shutil
|
|
4
|
+
from .schemas import *
|
|
5
|
+
from .io import write_json, append_jsonl
|
|
6
|
+
from .rubric_generation import worker_visible_markdown
|
|
7
|
+
from .artifact_resolver import artifact_ref_candidates, normalize_artifact_ref
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
SOURCE_FIELD_KEYS={'source_url_or_ref','source_kind','source_url','source_ref','source_license'}
|
|
11
|
+
IGNORED_RELEASE_DIR_NAMES={
|
|
12
|
+
'node_modules','.venv','__pycache__','.pytest_cache','.mypy_cache','.ruff_cache','.git','.cache','dist','build'
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
def is_ignored_release_path(path: Path | str) -> bool:
|
|
16
|
+
parts=Path(path).parts
|
|
17
|
+
for part in parts:
|
|
18
|
+
if part in IGNORED_RELEASE_DIR_NAMES or 'pycache' in part.lower():
|
|
19
|
+
return True
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
FINANCE_INPUTS = {
|
|
23
|
+
'invoices.csv': """invoice_id,vendor,invoice_date,due_date,currency,amount
|
|
24
|
+
INV-1001,Acme Incorporated,2026-01-05,2026-02-04,USD,1200.00
|
|
25
|
+
INV-1002,Globex LLC,2026-01-08,2026-02-07,EUR,950.00
|
|
26
|
+
INV-1002,Globex LLC,2026-01-08,2026-02-07,EUR,950.00
|
|
27
|
+
INV-1003,Soylent Corp,2026-01-12,2026-02-11,GBP,500.00
|
|
28
|
+
INV-1004,Initech,2026-01-15,2026-02-14,USD,750.00
|
|
29
|
+
INV-1005,Acme Inc,2026-01-20,2026-02-19,JPY,100000
|
|
30
|
+
""",
|
|
31
|
+
'payments.csv': """payment_id,payment_date,vendor_name,invoice_ref,currency,amount
|
|
32
|
+
PAY-9001,2026-02-03,ACME Inc.,INV-1001,USD,1200.00
|
|
33
|
+
PAY-9002,2026-02-08,Globex Corporation,INV-1002,EUR,500.00
|
|
34
|
+
PAY-9003,2026-02-10,Globex LLC,INV-1002,EUR,450.00
|
|
35
|
+
PAY-9004,2026-02-15,Soylent,INV-1003,GBP,550.00
|
|
36
|
+
PAY-9005,2026-02-18,Initech LLC,INV-9999,USD,300.00
|
|
37
|
+
PAY-9006,2026-02-20,Acme Incorporated,INV-1005,JPY,100000
|
|
38
|
+
""",
|
|
39
|
+
'vendor_aliases.csv': """canonical_vendor,alias
|
|
40
|
+
Acme Incorporated,ACME Inc.
|
|
41
|
+
Acme Incorporated,Acme Inc
|
|
42
|
+
Globex LLC,Globex Corporation
|
|
43
|
+
Soylent Corp,Soylent
|
|
44
|
+
Initech,Initech LLC
|
|
45
|
+
""",
|
|
46
|
+
'fx_rates.csv': """currency,usd_rate,effective_date
|
|
47
|
+
USD,1.0000,2026-02-01
|
|
48
|
+
EUR,1.1000,2026-02-01
|
|
49
|
+
GBP,1.2800,2026-02-01
|
|
50
|
+
JPY,0.0068,2026-02-01
|
|
51
|
+
""",
|
|
52
|
+
'reconciliation_policy.md': """# Reconciliation policy
|
|
53
|
+
|
|
54
|
+
- Convert every invoice and payment amount to USD using `fx_rates.csv`.
|
|
55
|
+
- Resolve vendor names through `vendor_aliases.csv` before matching.
|
|
56
|
+
- Flag duplicate invoice IDs as `duplicate_invoice_id`.
|
|
57
|
+
- A payment can be within 3 calendar days after due date and still be on time.
|
|
58
|
+
- Categorize partial payments as `partial_payment` when total paid is less than invoice amount.
|
|
59
|
+
- Categorize overpayments as `overpayment` when total paid exceeds invoice amount by more than 1 USD.
|
|
60
|
+
- Categorize payments with no matching invoice as `missing_invoice_reference`.
|
|
61
|
+
- Produce vendor totals in USD and an audit summary with assumptions and exception counts.
|
|
62
|
+
""",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def init_package(root: Path, task_id: str) -> Path:
|
|
66
|
+
p=root/'packages'/task_id
|
|
67
|
+
for sub in ['task','task/task_instruction_assets','rubric','input','hidden_reference','attempts/baseline/artifacts','attempts/revised/artifacts','grading','feedback','signals']:
|
|
68
|
+
(p/sub).mkdir(parents=True, exist_ok=True)
|
|
69
|
+
return p
|
|
70
|
+
|
|
71
|
+
def _drop_source_fields(obj):
|
|
72
|
+
if isinstance(obj, list):
|
|
73
|
+
return [_drop_source_fields(v) for v in obj]
|
|
74
|
+
if isinstance(obj, dict):
|
|
75
|
+
return {k:_drop_source_fields(v) for k,v in obj.items() if k not in SOURCE_FIELD_KEYS and v is not None}
|
|
76
|
+
return obj
|
|
77
|
+
|
|
78
|
+
def public_task_record(raw: RawTaskRecord | dict) -> dict:
|
|
79
|
+
data=raw.model_dump(mode='json') if hasattr(raw, 'model_dump') else dict(raw)
|
|
80
|
+
payload=dict(data.get('raw_payload') or {})
|
|
81
|
+
if data.get('expected_economic_value') is None:
|
|
82
|
+
data['expected_economic_value']=data.get('expected_pay') or payload.get('expected_economic_value') or payload.get('expected_pay')
|
|
83
|
+
if data.get('expected_economic_value_for_agent_apprentice') is None:
|
|
84
|
+
data['expected_economic_value_for_agent_apprentice']=data.get('expected_apprentice_pay') or payload.get('expected_economic_value_for_agent_apprentice') or payload.get('expected_apprentice_pay')
|
|
85
|
+
data.pop('expected_pay', None)
|
|
86
|
+
data.pop('expected_apprentice_pay', None)
|
|
87
|
+
if isinstance(data.get('raw_payload'), dict):
|
|
88
|
+
data['raw_payload']={k:v for k,v in data['raw_payload'].items() if k not in {'expected_pay','expected_apprentice_pay'}}
|
|
89
|
+
return _drop_source_fields(data)
|
|
90
|
+
|
|
91
|
+
def public_task_intake_spec(spec: TaskIntakeSpec | dict) -> dict:
|
|
92
|
+
data=spec.model_dump(mode='json') if hasattr(spec, 'model_dump') else dict(spec)
|
|
93
|
+
if data.get('expected_economic_value') is None:
|
|
94
|
+
data['expected_economic_value']=data.get('expected_pay')
|
|
95
|
+
if data.get('expected_economic_value_for_agent_apprentice') is None:
|
|
96
|
+
data['expected_economic_value_for_agent_apprentice']=data.get('expected_apprentice_pay')
|
|
97
|
+
data.pop('expected_pay', None)
|
|
98
|
+
data.pop('expected_apprentice_pay', None)
|
|
99
|
+
return _drop_source_fields(data)
|
|
100
|
+
|
|
101
|
+
def _copy_asset(src: Path, dst_dir: Path) -> Path:
|
|
102
|
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
target=dst_dir/src.name
|
|
104
|
+
if src.resolve() == target.resolve() if target.exists() else False:
|
|
105
|
+
return target
|
|
106
|
+
if target.exists():
|
|
107
|
+
stem=target.stem; suffix=target.suffix
|
|
108
|
+
for i in range(2,1000):
|
|
109
|
+
candidate=dst_dir/f'{stem}-{i}{suffix}'
|
|
110
|
+
if not candidate.exists():
|
|
111
|
+
target=candidate; break
|
|
112
|
+
if src.is_dir():
|
|
113
|
+
shutil.copytree(src, target, dirs_exist_ok=True)
|
|
114
|
+
else:
|
|
115
|
+
shutil.copy2(src, target)
|
|
116
|
+
return target
|
|
117
|
+
|
|
118
|
+
def _task_brief(raw: RawTaskRecord, spec: TaskIntakeSpec) -> str:
|
|
119
|
+
lines=[
|
|
120
|
+
f"# {spec.normalized_title}",
|
|
121
|
+
"",
|
|
122
|
+
"## Instruction",
|
|
123
|
+
spec.normalized_instruction,
|
|
124
|
+
"",
|
|
125
|
+
"## Expected deliverable",
|
|
126
|
+
raw.expected_deliverable or raw.raw_payload.get('expected_deliverable') or spec.expected_agent_deliverable,
|
|
127
|
+
"",
|
|
128
|
+
"## Publishable task metadata",
|
|
129
|
+
]
|
|
130
|
+
for key, value in {
|
|
131
|
+
'domain': spec.domain,
|
|
132
|
+
'subdomain': spec.subdomain,
|
|
133
|
+
'apprenticeship_role': spec.apprenticeship_role,
|
|
134
|
+
'task_family': spec.task_family,
|
|
135
|
+
'difficulty_tier': spec.difficulty_tier,
|
|
136
|
+
'needs_expert_review': spec.needs_expert_review,
|
|
137
|
+
'expected_economic_value': spec.expected_economic_value or spec.expected_pay,
|
|
138
|
+
'expected_economic_value_for_agent_apprentice': spec.expected_economic_value_for_agent_apprentice or spec.expected_apprentice_pay,
|
|
139
|
+
}.items():
|
|
140
|
+
if value is not None:
|
|
141
|
+
lines.append(f"- {key}: {value}")
|
|
142
|
+
return "\n".join(lines).rstrip()+"\n"
|
|
143
|
+
|
|
144
|
+
def materialize_task_inputs(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec) -> list[str]:
|
|
145
|
+
input_dir = package_root/'input'
|
|
146
|
+
asset_dir = package_root/'task'/'task_instruction_assets'
|
|
147
|
+
input_dir.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
created=[]
|
|
149
|
+
(input_dir/'task_brief.md').write_text(_task_brief(raw, spec))
|
|
150
|
+
created.append('task_brief.md')
|
|
151
|
+
write_json(input_dir/'task.json', public_task_record(raw))
|
|
152
|
+
created.append('task.json')
|
|
153
|
+
|
|
154
|
+
# Only materialize real local attachment refs or built-in fixture files with exact known names.
|
|
155
|
+
# Instruction fragments are never converted into invented input filenames.
|
|
156
|
+
for ref in raw.input_artifact_refs or []:
|
|
157
|
+
src=Path(ref)
|
|
158
|
+
if src.exists() and (src.is_file() or src.is_dir()):
|
|
159
|
+
_copy_asset(src, asset_dir)
|
|
160
|
+
target=_copy_asset(src, input_dir)
|
|
161
|
+
created.append(target.name)
|
|
162
|
+
for name in (raw.raw_payload.get('input_requirements') or []):
|
|
163
|
+
if name in FINANCE_INPUTS:
|
|
164
|
+
target=input_dir/name
|
|
165
|
+
target.write_text(FINANCE_INPUTS[name])
|
|
166
|
+
created.append(name)
|
|
167
|
+
return list(dict.fromkeys(created))
|
|
168
|
+
|
|
169
|
+
def write_task_package(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSpec, quality: TaskIntakeQualityReport, rubric: RubricSpec, rubric_quality: RubricQualityReport):
|
|
170
|
+
write_json(package_root/'task/raw_task_record.json', public_task_record(raw)); write_json(package_root/'task/task_intake_spec.json', public_task_intake_spec(spec)); write_json(package_root/'task/task_intake_quality_report.json', quality)
|
|
171
|
+
materialize_task_inputs(package_root, raw, spec)
|
|
172
|
+
write_json(package_root/'rubric/rubric.json', rubric); write_json(package_root/'rubric/rubric_quality_report.json', rubric_quality); (package_root/'rubric/worker_visible_rubric.md').write_text(worker_visible_markdown(rubric)); write_json(package_root/'rubric/verifier_private_rubric.json', rubric)
|
|
173
|
+
for item in rubric.rubric_items: append_jsonl(package_root/'rubric/rubric_items.jsonl', item)
|
|
174
|
+
(package_root/'README.md').write_text(f'# Task package {spec.task_id}\n\nLocal agent apprenticeship trace package.\n')
|
|
175
|
+
write_json(package_root/'manifest.json', {'task_id': spec.task_id, 'schema_version':'aa-package-v0.1'})
|
|
176
|
+
|
|
177
|
+
def _media_type(path: Path, mime: str | None) -> str:
|
|
178
|
+
suffix=path.suffix.lower()
|
|
179
|
+
if mime and mime.startswith('image/'): return 'image'
|
|
180
|
+
if mime and mime.startswith('audio/'): return 'audio'
|
|
181
|
+
if mime and mime.startswith('video/'): return 'video'
|
|
182
|
+
if suffix in {'.py','.js','.ts','.sh','.rs','.go','.java','.sql','.html','.css'}: return 'code'
|
|
183
|
+
if suffix in {'.csv','.json','.jsonl','.yaml','.yml','.xml'}: return 'data'
|
|
184
|
+
if suffix in {'.md','.txt','.log'}: return 'text'
|
|
185
|
+
if suffix in {'.pdf','.doc','.docx'}: return 'document'
|
|
186
|
+
if suffix in {'.zip','.tar','.gz'}: return 'archive'
|
|
187
|
+
return 'unknown'
|
|
188
|
+
|
|
189
|
+
def _sha256(path: Path) -> str:
|
|
190
|
+
h=hashlib.sha256()
|
|
191
|
+
with path.open('rb') as fh:
|
|
192
|
+
for chunk in iter(lambda: fh.read(1024 * 1024), b''):
|
|
193
|
+
h.update(chunk)
|
|
194
|
+
return h.hexdigest()
|
|
195
|
+
|
|
196
|
+
def _artifact_kind(path: Path, media_type: str) -> str:
|
|
197
|
+
rel=path.as_posix()
|
|
198
|
+
if '/artifacts/' in rel:
|
|
199
|
+
return 'worker_output'
|
|
200
|
+
if path.name in {'agent_trace.json','agent_trace.raw.json','agent_trace.normalized.json'}:
|
|
201
|
+
return 'trace'
|
|
202
|
+
if path.name in {'stdout.txt','stderr.txt','final_message.txt'} or media_type == 'text' and path.suffix == '.log':
|
|
203
|
+
return 'log'
|
|
204
|
+
if rel.startswith('input/') or '/input/' in rel:
|
|
205
|
+
return 'input'
|
|
206
|
+
if rel.startswith('grading/') or '/grading/' in rel:
|
|
207
|
+
return 'evaluation'
|
|
208
|
+
if rel.startswith('feedback/') or '/feedback/' in rel:
|
|
209
|
+
return 'feedback'
|
|
210
|
+
return 'package_file'
|
|
211
|
+
|
|
212
|
+
def _attempt_from_package_rel(package_root: Path, rel: Path) -> tuple[str | None, str | None]:
|
|
213
|
+
parts=rel.parts
|
|
214
|
+
if len(parts) >= 2 and parts[0] == 'attempts':
|
|
215
|
+
attempt_kind=parts[1]
|
|
216
|
+
return f"{package_root.name}_{attempt_kind}", attempt_kind
|
|
217
|
+
return None, None
|
|
218
|
+
|
|
219
|
+
def _trace_artifact_links(package_root: Path) -> dict[str, list[dict]]:
|
|
220
|
+
links: dict[str, list[dict]] = {}
|
|
221
|
+
for trace_path in package_root.glob('attempts/*/agent_trace.json'):
|
|
222
|
+
try:
|
|
223
|
+
trace=json.loads(trace_path.read_text())
|
|
224
|
+
except Exception:
|
|
225
|
+
continue
|
|
226
|
+
attempt_id=trace.get('attempt_id')
|
|
227
|
+
attempt_kind=trace.get('attempt_kind') or trace_path.parent.name
|
|
228
|
+
trace_id=trace.get('trace_id')
|
|
229
|
+
for step in trace.get('steps') or []:
|
|
230
|
+
link={
|
|
231
|
+
'attempt_id': attempt_id,
|
|
232
|
+
'attempt_kind': attempt_kind,
|
|
233
|
+
'trace_id': trace_id,
|
|
234
|
+
'step': step.get('step'),
|
|
235
|
+
'actor': step.get('actor'),
|
|
236
|
+
'operation': step.get('operation'),
|
|
237
|
+
}
|
|
238
|
+
for ref in step.get('artifact_refs') or []:
|
|
239
|
+
normalized=normalize_artifact_ref(ref)
|
|
240
|
+
keys=artifact_ref_candidates(normalized) | {normalized, f'packages/{package_root.name}/{normalized}'}
|
|
241
|
+
for key in keys:
|
|
242
|
+
if key:
|
|
243
|
+
bucket=links.setdefault(key, [])
|
|
244
|
+
if link not in bucket:
|
|
245
|
+
bucket.append(link)
|
|
246
|
+
return links
|
|
247
|
+
|
|
248
|
+
def write_artifacts_index(package_root: Path):
|
|
249
|
+
paths=[]
|
|
250
|
+
trace_links=_trace_artifact_links(package_root)
|
|
251
|
+
for f in package_root.rglob('*'):
|
|
252
|
+
rel=f.relative_to(package_root)
|
|
253
|
+
if is_ignored_release_path(rel):
|
|
254
|
+
continue
|
|
255
|
+
if f.is_file() and f.name != 'artifacts_index.json':
|
|
256
|
+
mime=mimetypes.guess_type(f.name)[0]
|
|
257
|
+
media_type=_media_type(f, mime)
|
|
258
|
+
attempt_id, attempt_kind=_attempt_from_package_rel(package_root, rel)
|
|
259
|
+
link_keys=artifact_ref_candidates(rel.as_posix()) | {rel.as_posix(), f'packages/{package_root.name}/{rel.as_posix()}'}
|
|
260
|
+
linked=[]
|
|
261
|
+
for key in link_keys:
|
|
262
|
+
for link in trace_links.get(key, []):
|
|
263
|
+
if link not in linked:
|
|
264
|
+
linked.append(link)
|
|
265
|
+
paths.append({
|
|
266
|
+
'package_relative_path': rel.as_posix(),
|
|
267
|
+
'artifact_ref': rel.as_posix(),
|
|
268
|
+
'artifact_kind': _artifact_kind(rel, media_type),
|
|
269
|
+
'linked_trace_steps': linked,
|
|
270
|
+
'produced_by_attempt_id': attempt_id,
|
|
271
|
+
'produced_by_attempt_kind': attempt_kind,
|
|
272
|
+
'size_bytes': f.stat().st_size,
|
|
273
|
+
'content_hash': _sha256(f),
|
|
274
|
+
'mime_type': mime,
|
|
275
|
+
'media_type': media_type,
|
|
276
|
+
'preview_available': media_type in {'text','code','data','document'},
|
|
277
|
+
'preview_truncated': False,
|
|
278
|
+
'artifact_missing': False,
|
|
279
|
+
})
|
|
280
|
+
counters={'raw_trace_count':0,'raw_trace_step_count':0,'normalized_trace_count':0,'normalized_trace_step_count':0,'fallback_trace_count':0,'fallback_trace_step_count':0,'discarded_step_count':0,'raw_trace_parse_error_count':0,'trace_normalization_error_count':0,'trace_normalization_partial_count':0,'trace_lossless_count':0,'trace_lossless_failure_count':0}
|
|
281
|
+
import json
|
|
282
|
+
for report_path in package_root.glob('attempts/*/trace_normalization_report.json'):
|
|
283
|
+
try: r=json.loads(report_path.read_text())
|
|
284
|
+
except Exception: continue
|
|
285
|
+
counters['raw_trace_count'] += 1 if r.get('raw_trace_ref') else 0
|
|
286
|
+
counters['raw_trace_step_count'] += int(r.get('raw_step_count') or 0)
|
|
287
|
+
counters['normalized_trace_count'] += 1 if r.get('normalized_trace_ref') else 0
|
|
288
|
+
counters['normalized_trace_step_count'] += int(r.get('normalized_step_count') or 0)
|
|
289
|
+
counters['fallback_trace_count'] += 1 if r.get('fallback_trace') else 0
|
|
290
|
+
counters['fallback_trace_step_count'] += int(r.get('normalized_step_count') or 0) if r.get('fallback_trace') else 0
|
|
291
|
+
counters['discarded_step_count'] += int(r.get('discarded_step_count') or 0)
|
|
292
|
+
counters['raw_trace_parse_error_count'] += 1 if r.get('raw_trace_parse_error') else 0
|
|
293
|
+
counters['trace_normalization_error_count'] += 1 if r.get('trace_normalization_error') else 0
|
|
294
|
+
counters['trace_normalization_partial_count'] += 1 if r.get('trace_normalization_partial') else 0
|
|
295
|
+
counters['trace_lossless_count'] += 1 if r.get('trace_lossless') else 0
|
|
296
|
+
counters['trace_lossless_failure_count'] += 0 if r.get('trace_lossless') else 1
|
|
297
|
+
manifest_path=package_root/'manifest.json'
|
|
298
|
+
try: manifest=json.loads(manifest_path.read_text())
|
|
299
|
+
except Exception: manifest={'task_id': package_root.name, 'schema_version':'aa-package-v0.1'}
|
|
300
|
+
manifest['trace_counters']=counters
|
|
301
|
+
write_json(manifest_path, manifest)
|
|
302
|
+
write_json(package_root/'artifacts_index.json', paths)
|
|
303
|
+
return paths
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Callable, Literal
|
|
8
|
+
|
|
9
|
+
from .io import append_jsonl, read_json, read_jsonl, write_json
|
|
10
|
+
|
|
11
|
+
RUN_TERMINAL_STATUSES = {"completed", "partial", "failed"}
|
|
12
|
+
|
|
13
|
+
RunStatus = Literal["running", "completed", "partial", "failed"]
|
|
14
|
+
TaskStatus = Literal["running", "completed", "partial", "failed"]
|
|
15
|
+
ProgressCallback = Callable[[dict[str, Any], dict[str, Any]], None]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def utc_now() -> str:
|
|
19
|
+
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def run_status_path(run_root: Path) -> Path:
|
|
23
|
+
return run_root / "run_status.json"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def progress_events_path(run_root: Path) -> Path:
|
|
27
|
+
return run_root / "progress_events.jsonl"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def read_run_status(run_root: Path) -> dict[str, Any]:
|
|
31
|
+
path = run_status_path(run_root)
|
|
32
|
+
return read_json(path) if path.exists() else {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def read_progress_events(run_root: Path) -> list[dict[str, Any]]:
|
|
36
|
+
path = progress_events_path(run_root)
|
|
37
|
+
return read_jsonl(path) if path.exists() else []
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _event_index(run_root: Path) -> int:
|
|
41
|
+
return len(read_progress_events(run_root)) + 1
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _base_status(run_id: str, run_root: Path, **kwargs: Any) -> dict[str, Any]:
|
|
45
|
+
now = utc_now()
|
|
46
|
+
return {
|
|
47
|
+
"run_id": run_id,
|
|
48
|
+
"run_status": "running",
|
|
49
|
+
"task_status": "running",
|
|
50
|
+
"current_phase": kwargs.get("current_phase") or "starting",
|
|
51
|
+
"current_loop": kwargs.get("current_loop") or 1,
|
|
52
|
+
"maximum_improvement_loops": kwargs.get("maximum_improvement_loops"),
|
|
53
|
+
"latest_message": kwargs.get("latest_message") or "Run starting.",
|
|
54
|
+
"apprentice_agent": kwargs.get("apprentice_agent") or kwargs.get("worker_agent"),
|
|
55
|
+
"mentor_mode": kwargs.get("mentor_mode"),
|
|
56
|
+
"task_title": kwargs.get("task_title"),
|
|
57
|
+
"task_workspace_path": str(kwargs.get("task_workspace_path") or run_root),
|
|
58
|
+
"artifacts_path": str(kwargs.get("artifacts_path") or run_root / "artifacts"),
|
|
59
|
+
"contribution_bundle_path": kwargs.get("contribution_bundle_path"),
|
|
60
|
+
"traced_steps": kwargs.get("traced_steps"),
|
|
61
|
+
"artifact_count": kwargs.get("artifact_count"),
|
|
62
|
+
"started_at": kwargs.get("started_at") or now,
|
|
63
|
+
"updated_at": now,
|
|
64
|
+
"last_operational_error": kwargs.get("last_operational_error"),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def update_run_status(run_root: Path, **updates: Any) -> dict[str, Any]:
|
|
69
|
+
run_root.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
current = read_run_status(run_root)
|
|
71
|
+
updates = dict(updates)
|
|
72
|
+
clear_operational_error = bool(updates.pop("clear_operational_error", False))
|
|
73
|
+
if "worker_agent" in updates and "apprentice_agent" not in updates:
|
|
74
|
+
updates["apprentice_agent"] = updates["worker_agent"]
|
|
75
|
+
updates.pop("worker_agent", None)
|
|
76
|
+
run_id = str(updates.get("run_id") or current.get("run_id") or run_root.name)
|
|
77
|
+
if current:
|
|
78
|
+
data = dict(current)
|
|
79
|
+
data.update({k: v for k, v in updates.items() if v is not None})
|
|
80
|
+
if clear_operational_error:
|
|
81
|
+
data.pop("last_operational_error", None)
|
|
82
|
+
if data.get("apprentice_agent"):
|
|
83
|
+
data.pop("worker_agent", None)
|
|
84
|
+
data.setdefault("run_id", run_id)
|
|
85
|
+
data.setdefault("started_at", current.get("started_at") or utc_now())
|
|
86
|
+
data["updated_at"] = utc_now()
|
|
87
|
+
else:
|
|
88
|
+
base_updates = {k: v for k, v in updates.items() if k != "run_id"}
|
|
89
|
+
data = _base_status(run_id, run_root, **base_updates)
|
|
90
|
+
write_json(run_status_path(run_root), data)
|
|
91
|
+
return data
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def append_progress_event(
|
|
95
|
+
run_root: Path,
|
|
96
|
+
event_type: str,
|
|
97
|
+
*,
|
|
98
|
+
message: str,
|
|
99
|
+
run_id: str | None = None,
|
|
100
|
+
current_loop: int | None = None,
|
|
101
|
+
maximum_improvement_loops: int | None = None,
|
|
102
|
+
phase: str | None = None,
|
|
103
|
+
run_status: RunStatus | None = None,
|
|
104
|
+
task_status: TaskStatus | None = None,
|
|
105
|
+
traced_steps: int | None = None,
|
|
106
|
+
artifact_count: int | None = None,
|
|
107
|
+
artifacts_path: str | Path | None = None,
|
|
108
|
+
contribution_bundle_path: str | Path | None = None,
|
|
109
|
+
operational_error: str | None = None,
|
|
110
|
+
metadata_json: dict[str, Any] | None = None,
|
|
111
|
+
callback: ProgressCallback | None = None,
|
|
112
|
+
) -> dict[str, Any]:
|
|
113
|
+
run_root.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
event = {
|
|
115
|
+
"event_type": event_type,
|
|
116
|
+
"event_index": _event_index(run_root),
|
|
117
|
+
"created_at": utc_now(),
|
|
118
|
+
"run_id": run_id or run_root.name,
|
|
119
|
+
"current_loop": current_loop,
|
|
120
|
+
"maximum_improvement_loops": maximum_improvement_loops,
|
|
121
|
+
"phase": phase,
|
|
122
|
+
"message": message,
|
|
123
|
+
"run_status": run_status,
|
|
124
|
+
"task_status": task_status,
|
|
125
|
+
"traced_steps": traced_steps,
|
|
126
|
+
"artifact_count": artifact_count,
|
|
127
|
+
"artifacts_path": str(artifacts_path) if artifacts_path else None,
|
|
128
|
+
"contribution_bundle_path": str(contribution_bundle_path) if contribution_bundle_path else None,
|
|
129
|
+
"operational_error": operational_error,
|
|
130
|
+
"metadata_json": metadata_json or {},
|
|
131
|
+
}
|
|
132
|
+
event = {k: v for k, v in event.items() if v is not None}
|
|
133
|
+
append_jsonl(progress_events_path(run_root), event)
|
|
134
|
+
status_updates: dict[str, Any] = {
|
|
135
|
+
"run_id": event["run_id"],
|
|
136
|
+
"current_phase": phase or event_type,
|
|
137
|
+
"latest_message": message,
|
|
138
|
+
}
|
|
139
|
+
for key, value in {
|
|
140
|
+
"current_loop": current_loop,
|
|
141
|
+
"maximum_improvement_loops": maximum_improvement_loops,
|
|
142
|
+
"run_status": run_status,
|
|
143
|
+
"task_status": task_status,
|
|
144
|
+
"traced_steps": traced_steps,
|
|
145
|
+
"artifact_count": artifact_count,
|
|
146
|
+
"artifacts_path": str(artifacts_path) if artifacts_path else None,
|
|
147
|
+
"contribution_bundle_path": str(contribution_bundle_path) if contribution_bundle_path else None,
|
|
148
|
+
"last_operational_error": operational_error,
|
|
149
|
+
}.items():
|
|
150
|
+
if value is not None:
|
|
151
|
+
status_updates[key] = value
|
|
152
|
+
if operational_error is None and (run_status == "completed" or task_status == "completed"):
|
|
153
|
+
status_updates["clear_operational_error"] = True
|
|
154
|
+
status = update_run_status(run_root, **status_updates)
|
|
155
|
+
if callback:
|
|
156
|
+
callback(event, status)
|
|
157
|
+
return event
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def format_progress_event(event: dict[str, Any]) -> str:
|
|
161
|
+
loop = event.get("current_loop")
|
|
162
|
+
max_loop = event.get("maximum_improvement_loops")
|
|
163
|
+
metadata = event.get("metadata_json") or {}
|
|
164
|
+
followup_index = metadata.get("followup_index")
|
|
165
|
+
followup_prefix = f"[Follow-up {followup_index}]" if followup_index else ""
|
|
166
|
+
loop_prefix = f"[{loop}/{max_loop}] " if loop and max_loop else ""
|
|
167
|
+
prefix = f"{followup_prefix}{loop_prefix}"
|
|
168
|
+
message = event.get("message") or event.get("event_type") or "Progress update"
|
|
169
|
+
details: list[str] = []
|
|
170
|
+
if event.get("traced_steps") is not None:
|
|
171
|
+
details.append(f"{event['traced_steps']} traced steps")
|
|
172
|
+
if event.get("artifact_count") is not None:
|
|
173
|
+
details.append(f"{event['artifact_count']} artifacts")
|
|
174
|
+
if event.get("operational_error"):
|
|
175
|
+
details.append(f"error: {event['operational_error']}")
|
|
176
|
+
return prefix + message + (f" - {', '.join(details)}" if details else "")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def format_run_status(status: dict[str, Any]) -> str:
|
|
180
|
+
if not status:
|
|
181
|
+
return "No run_status.json found."
|
|
182
|
+
lines = [
|
|
183
|
+
f"Run: {status.get('run_id')}",
|
|
184
|
+
f"Apprentice Agent: {status.get('apprentice_agent') or status.get('worker_agent')}",
|
|
185
|
+
f"Run Status: {status.get('run_status')}",
|
|
186
|
+
f"Task Status: {status.get('task_status')}",
|
|
187
|
+
f"Current Phase: {status.get('current_phase')}",
|
|
188
|
+
f"Loop: {status.get('current_loop')}/{status.get('maximum_improvement_loops')}",
|
|
189
|
+
f"Latest: {status.get('latest_message')}",
|
|
190
|
+
f"Task Workspace: {status.get('task_workspace_path')}",
|
|
191
|
+
f"Artifacts: {status.get('artifacts_path')}",
|
|
192
|
+
]
|
|
193
|
+
if status.get("contribution_bundle_path"):
|
|
194
|
+
lines.append(f"Contribution Bundle: {status.get('contribution_bundle_path')}")
|
|
195
|
+
if status.get("traced_steps") is not None:
|
|
196
|
+
lines.append(f"Traced Steps: {status.get('traced_steps')}")
|
|
197
|
+
if status.get("artifact_count") is not None:
|
|
198
|
+
lines.append(f"Artifacts Indexed: {status.get('artifact_count')}")
|
|
199
|
+
if status.get("last_operational_error"):
|
|
200
|
+
lines.append(f"Operational Error: {status.get('last_operational_error')}")
|
|
201
|
+
return "\n".join(lines)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def watch_progress(
|
|
205
|
+
run_root: Path,
|
|
206
|
+
*,
|
|
207
|
+
interval_seconds: float = 1.0,
|
|
208
|
+
timeout_seconds: float | None = None,
|
|
209
|
+
emit: Callable[[str], None] = print,
|
|
210
|
+
) -> None:
|
|
211
|
+
seen = 0
|
|
212
|
+
started = time.monotonic()
|
|
213
|
+
while True:
|
|
214
|
+
events = read_progress_events(run_root)
|
|
215
|
+
for event in events[seen:]:
|
|
216
|
+
emit(format_progress_event(event))
|
|
217
|
+
seen = len(events)
|
|
218
|
+
status = read_run_status(run_root)
|
|
219
|
+
if status.get("run_status") in RUN_TERMINAL_STATUSES:
|
|
220
|
+
return
|
|
221
|
+
if timeout_seconds is not None and time.monotonic() - started >= timeout_seconds:
|
|
222
|
+
return
|
|
223
|
+
time.sleep(interval_seconds)
|